1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinRemoteSupport library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @license: GNU GPL (v2 or later), see COPYING.txt for details. 7 """ 8 9 from ContentTypeSupport import getContentTypeAndEncoding 10 from MoinMoin.action import cache 11 from MoinMoin import caching, log 12 from email.parser import Parser 13 from email.mime.multipart import MIMEMultipart 14 from urllib import splithost, splitpasswd, splitport, splituser, unquote_plus 15 from urlparse import urlsplit 16 import urllib2, time 17 import imaplib 18 19 logging = log.getLogger(__name__) 20 21 def getCachedResource(request, url, arena, scope, max_cache_age, reader=None): 22 23 """ 24 Using the given 'request', return the resource data for the given 'url', 25 accessing a cache entry with the given 'arena' and 'scope' where the data 26 has already been downloaded. The 'max_cache_age' indicates the length in 27 seconds that a cache entry remains valid. 28 29 If the optional 'reader' object is given, it will be used to access the 30 'url' and write the downloaded data to a cache entry. Otherwise, a standard 31 URL reader will be used. 32 33 If the resource cannot be downloaded and cached, None is returned. 34 Otherwise, the form of the data is as follows: 35 36 url <newline> 37 [ content-type-header ] <newline> 38 [ other-header <newline> ]* 39 <newline> 40 content-body 41 """ 42 43 reader = reader or urlreader 44 45 # See if the URL is cached. 46 47 cache_key = cache.key(request, content=url) 48 cache_entry = caching.CacheEntry(request, arena, cache_key, scope=scope) 49 50 # If no entry exists, or if the entry is older than the specified age, 51 # create one with the response from the URL. 52 53 now = time.time() 54 mtime = cache_entry.mtime() 55 56 # NOTE: The URL could be checked and the 'If-Modified-Since' header 57 # NOTE: (see MoinMoin.action.pollsistersites) could be checked. 58 59 if not cache_entry.exists() or now - mtime >= max_cache_age: 60 61 # Access the remote data source. 62 63 cache_entry.open(mode="w") 64 65 try: 66 try: 67 # Read from the source and write to the cache. 68 69 reader(url, cache_entry) 70 71 # In case of an exception, return None. 72 73 except IOError: 74 if cache_entry.exists(): 75 cache_entry.remove() 76 return None 77 78 finally: 79 cache_entry.close() 80 81 # Open the cache entry and read it. 82 83 cache_entry.open() 84 try: 85 return cache_entry.read() 86 finally: 87 cache_entry.close() 88 89 def urlreader(url, cache_entry): 90 91 "Retrieve data from the given 'url', writing it to the 'cache_entry'." 92 93 f = urllib2.urlopen(url) 94 try: 95 writeCacheHeaders(url, f.headers, cache_entry) 96 cache_entry.write(f.read()) 97 finally: 98 f.close() 99 100 def imapreader(url, cache_entry): 101 102 """ 103 Retrieve data associated with the given 'url' using the IMAP protocol 104 specifically, writing it to the 'cache_entry'. 105 """ 106 107 # NOTE: Should use something like pykolab.imap_utf7.encode here. 108 109 enc = lambda s: s.encode("utf-7") 110 111 # The URL maps to credentials and folder details. 112 113 scheme, netloc, path, query, fragment = urlsplit(url) 114 credentials, location = splituser(netloc) 115 username, password = map(unquote_plus, splitpasswd(credentials)) 116 host, port = splitport(location) 117 folders = map(unquote_plus, path.split("/")[1:]) 118 119 # Connect and log in to the IMAP server. 120 121 cls = scheme == "imaps" and imaplib.IMAP4_SSL or imaplib.IMAP4 122 123 if port is None: 124 i = cls(host) 125 else: 126 i = cls(host, int(port)) 127 128 i.login(username, password) 129 130 try: 131 # Descend to the desired folder. 132 133 for folder in folders: 134 code, response = i.select(enc(folder), readonly=True) 135 if code != "OK": 136 logging.warning("Could not enter folder: %s" % folder) 137 raise IOError 138 139 # Search for all messages. 140 # NOTE: This could also be parameterised. 141 142 code, response = i.search(None, "(ALL)") 143 144 if code != "OK": 145 logging.warning("Could not enter folder: %s" % folder) 146 raise IOError 147 148 # For each result, obtain the full message, but embed it in a larger 149 # multipart message. 150 151 message = MIMEMultipart() 152 153 writeCacheHeaders(url, message, cache_entry) 154 155 numbers = response and response[0].split(" ") or [] 156 157 for n in numbers: 158 code, response = i.fetch(n, "(RFC822.PEEK)") 159 160 if code == "OK" and response: 161 162 # Write the message payload into the cache entry for later 163 # processing. 164 165 for data in response: 166 try: 167 envelope, body = data 168 message.attach(Parser().parsestr(body)) 169 except ValueError: 170 pass 171 else: 172 logging.warning("Could not obtain message %d from folder %s" % (n, folder)) 173 174 cache_entry.write(message.as_string()) 175 176 finally: 177 i.logout() 178 del i 179 180 def writeCacheHeaders(url, headers, cache_entry): 181 182 """ 183 For the given 'url', write it and the given 'headers' to the given 184 'cache_entry'. 185 """ 186 187 cache_entry.write(url + "\n") 188 cache_entry.write((headers.get("content-type") or "") + "\n") 189 for key, value in headers.items(): 190 if key.lower() != "content-type": 191 cache_entry.write("%s: %s\n" % (key, value)) 192 cache_entry.write("\n") 193 194 def getCachedResourceMetadata(f): 195 196 "Return a metadata dictionary for the given resource file-like object 'f'." 197 198 url = f.readline() 199 content_type, encoding = getContentTypeAndEncoding(f.readline()) 200 201 metadata = {} 202 line = f.readline() 203 204 while line.strip(): 205 key, value = [v.strip() for v in line.split(":", 1)] 206 metadata[key] = value 207 line = f.readline() 208 209 return url, content_type, encoding, metadata 210 211 # vim: tabstop=4 expandtab shiftwidth=4