1 # Copyright (C) 2001-2010 Python Software Foundation 2 # Contact: email-sig@python.org 3 4 """Classes to generate plain text from a message object tree.""" 5 6 __all__ = ['Generator', 'DecodedGenerator'] 7 8 import re 9 import sys 10 import time 11 import random 12 import warnings 13 14 from cStringIO import StringIO 15 from emailfix.header import Header 16 17 UNDERSCORE = '_' 18 NL = '\n' 19 20 fcre = re.compile(r'^From ', re.MULTILINE) 21 nlre = re.compile(r'(?<!\r)\n', re.MULTILINE) 22 23 def _is8bitstring(s): 24 if isinstance(s, str): 25 try: 26 unicode(s, 'us-ascii') 27 except UnicodeError: 28 return True 29 return False 30 31 32 33 class Generator: 34 """Generates output from a Message object tree. 35 36 This basic generator writes the message to the given file object as plain 37 text. 38 """ 39 # 40 # Public interface 41 # 42 43 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): 44 """Create the generator for message flattening. 45 46 outfp is the output file-like object for writing the message to. It 47 must have a write() method. 48 49 Optional mangle_from_ is a flag that, when True (the default), escapes 50 From_ lines in the body of the message by putting a `>' in front of 51 them. 52 53 Optional maxheaderlen specifies the longest length for a non-continued 54 header. When a header line is longer (in characters, with tabs 55 expanded to 8 spaces) than maxheaderlen, the header will split as 56 defined in the Header class. Set maxheaderlen to zero to disable 57 header wrapping. The default is 78, as recommended (but not required) 58 by RFC 2822. 59 """ 60 self._fp = outfp 61 self._mangle_from_ = mangle_from_ 62 self._maxheaderlen = maxheaderlen 63 64 def write(self, s): 65 # Just delegate to the file object 66 self._fp.write(s) 67 68 def flatten(self, msg, unixfrom=False, linesep=NL): 69 """Print the message object tree rooted at msg to the output file 70 specified when the Generator instance was created. 71 72 unixfrom is a flag that forces the printing of a Unix From_ delimiter 73 before the first object in the message tree. If the original message 74 has no From_ delimiter, a `standard' one is crafted. By default, this 75 is False to inhibit the printing of any From_ delimiter. 76 77 Note that for subobjects, no From_ line is printed. 78 """ 79 self._NL = linesep 80 if unixfrom: 81 ufrom = msg.get_unixfrom() 82 if not ufrom: 83 ufrom = 'From nobody ' + time.ctime(time.time()) 84 self.write(ufrom + self._NL) 85 self._write(msg) 86 87 def clone(self, fp): 88 """Clone this generator with the exact same options.""" 89 return self.__class__(fp, self._mangle_from_, self._maxheaderlen) 90 91 # 92 # Protected interface - undocumented ;/ 93 # 94 95 def _write(self, msg): 96 # We can't write the headers yet because of the following scenario: 97 # say a multipart message includes the boundary string somewhere in 98 # its body. We'd have to calculate the new boundary /before/ we write 99 # the headers so that we can write the correct Content-Type: 100 # parameter. 101 # 102 # The way we do this, so as to make the _handle_*() methods simpler, 103 # is to cache any subpart writes into a StringIO. The we write the 104 # headers and the StringIO contents. That way, subpart handlers can 105 # Do The Right Thing, and can still modify the Content-Type: header if 106 # necessary. 107 oldfp = self._fp 108 try: 109 self._fp = sfp = StringIO() 110 self._dispatch(msg) 111 finally: 112 self._fp = oldfp 113 # Write the headers. First we see if the message object wants to 114 # handle that itself. If not, we'll do it generically. 115 meth = getattr(msg, '_write_headers', None) 116 if meth is None: 117 self._write_headers(msg) 118 else: 119 meth(self) 120 self._fp.write(sfp.getvalue()) 121 122 def _dispatch(self, msg): 123 # Get the Content-Type: for the message, then try to dispatch to 124 # self._handle_<maintype>_<subtype>(). If there's no handler for the 125 # full MIME type, then dispatch to self._handle_<maintype>(). If 126 # that's missing too, then dispatch to self._writeBody(). 127 main = msg.get_content_maintype() 128 sub = msg.get_content_subtype() 129 specific = UNDERSCORE.join((main, sub)).replace('-', '_') 130 meth = getattr(self, '_handle_' + specific, None) 131 if meth is None: 132 generic = main.replace('-', '_') 133 meth = getattr(self, '_handle_' + generic, None) 134 if meth is None: 135 meth = self._writeBody 136 meth(msg) 137 138 # 139 # Default handlers 140 # 141 142 def _write_headers(self, msg): 143 for h, v in msg.items(): 144 self.write('%s: ' % h) 145 if self._maxheaderlen == 0: 146 # Explicit no-wrapping 147 if _is8bitstring(v): 148 self.write(v + self._NL) 149 else: 150 self.write(nlre.sub(self._NL, v) + self._NL) 151 elif isinstance(v, Header): 152 # Header instances know what to do 153 self.write(v.encode(linesep=self._NL) + self._NL) 154 elif _is8bitstring(v): 155 # If we have raw 8bit data in a byte string, we have no idea 156 # what the encoding is. There is no safe way to split this 157 # string. If it's ascii-subset, then we could do a normal 158 # ascii split, but if it's multibyte then we could break the 159 # string. There's no way to know so the least harm seems to 160 # be to not split the string and risk it being too long. 161 self.write(v + self._NL) 162 else: 163 # Header's got lots of smarts, so use it. Note that this is 164 # fundamentally broken though because we lose idempotency when 165 # the header string is continued with tabs. It will now be 166 # continued with spaces. This was reversedly broken before we 167 # fixed bug 1974. Either way, we lose. 168 self.write(Header( 169 v, maxlinelen=self._maxheaderlen, header_name=h).encode( 170 linesep=self._NL) + self._NL) 171 # A blank line always separates headers from body 172 self.write(self._NL) 173 174 # 175 # Handlers for writing types and subtypes 176 # 177 178 def _handle_text(self, msg): 179 payload = msg.get_payload() 180 if payload is None: 181 return 182 if not isinstance(payload, basestring): 183 raise TypeError('string payload expected: %s' % type(payload)) 184 if self._mangle_from_: 185 payload = fcre.sub('>From ', payload) 186 self.write(nlre.sub(self._NL, payload)) 187 188 # Default body handler 189 _writeBody = _handle_text 190 191 def _handle_multipart(self, msg): 192 # The trick here is to write out each part separately, merge them all 193 # together, and then make sure that the boundary we've chosen isn't 194 # present in the payload. 195 msgtexts = [] 196 subparts = msg.get_payload() 197 if subparts is None: 198 subparts = [] 199 elif isinstance(subparts, basestring): 200 # e.g. a non-strict parse of a message with no starting boundary. 201 self.write(subparts) 202 return 203 elif not isinstance(subparts, list): 204 # Scalar payload 205 subparts = [subparts] 206 for part in subparts: 207 s = StringIO() 208 g = self.clone(s) 209 g.flatten(part, unixfrom=False, linesep=self._NL) 210 msgtexts.append(s.getvalue()) 211 # BAW: What about boundaries that are wrapped in double-quotes? 212 boundary = msg.get_boundary() 213 if not boundary: 214 # Create a boundary that doesn't appear in any of the 215 # message texts. 216 alltext = self._NL.join(msgtexts) 217 boundary = _make_boundary(alltext) 218 msg.set_boundary(boundary) 219 # If there's a preamble, write it out, with a trailing CRLF 220 if msg.preamble is not None: 221 if self._mangle_from_: 222 preamble = fcre.sub('>From ', msg.preamble) 223 else: 224 preamble = msg.preamble 225 self.write(preamble + self._NL) 226 # dash-boundary transport-padding CRLF 227 self.write('--' + boundary + self._NL) 228 # body-part 229 if msgtexts: 230 self.write(msgtexts.pop(0)) 231 # *encapsulation 232 # --> delimiter transport-padding 233 # --> CRLF body-part 234 for body_part in msgtexts: 235 # delimiter transport-padding CRLF 236 self.write(self._NL + '--' + boundary + self._NL) 237 # body-part 238 self.write(body_part) 239 # close-delimiter transport-padding 240 self.write(self._NL + '--' + boundary + '--') 241 if msg.epilogue is not None: 242 self.write(self._NL) 243 if self._mangle_from_: 244 epilogue = fcre.sub('>From ', msg.epilogue) 245 else: 246 epilogue = msg.epilogue 247 self.write(epilogue) 248 249 def _handle_multipart_signed(self, msg): 250 # The contents of signed parts has to stay unmodified in order to keep 251 # the signature intact per RFC1847 2.1, so we disable header wrapping. 252 # RDM: This isn't enough to completely preserve the part, but it helps. 253 old_maxheaderlen = self._maxheaderlen 254 try: 255 self._maxheaderlen = 0 256 self._handle_multipart(msg) 257 finally: 258 self._maxheaderlen = old_maxheaderlen 259 260 def _handle_message_delivery_status(self, msg): 261 # We can't just write the headers directly to self's file object 262 # because this will leave an extra newline between the last header 263 # block and the boundary. Sigh. 264 blocks = [] 265 for part in msg.get_payload(): 266 s = StringIO() 267 g = self.clone(s) 268 g.flatten(part, unixfrom=False, linesep=self._NL) 269 text = s.getvalue() 270 lines = text.split(self._NL) 271 # Strip off the unnecessary trailing empty line 272 if lines and lines[-1] == '': 273 blocks.append(self._NL.join(lines[:-1])) 274 else: 275 blocks.append(text) 276 # Now join all the blocks with an empty line. This has the lovely 277 # effect of separating each block with an empty line, but not adding 278 # an extra one after the last one. 279 self.write(self._NL.join(blocks)) 280 281 def _handle_message(self, msg): 282 s = StringIO() 283 g = self.clone(s) 284 # The payload of a message/rfc822 part should be a multipart sequence 285 # of length 1. The zeroth element of the list should be the Message 286 # object for the subpart. Extract that object, stringify it, and 287 # write it out. 288 # Except, it turns out, when it's a string instead, which happens when 289 # and only when HeaderParser is used on a message of mime type 290 # message/rfc822. Such messages are generated by, for example, 291 # Groupwise when forwarding unadorned messages. (Issue 7970.) So 292 # in that case we just emit the string body. 293 payload = msg.get_payload() 294 if isinstance(payload, list): 295 g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) 296 payload = s.getvalue() 297 self.write(payload) 298 299 300 301 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' 302 303 class DecodedGenerator(Generator): 304 """Generates a text representation of a message. 305 306 Like the Generator base class, except that non-text parts are substituted 307 with a format string representing the part. 308 """ 309 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): 310 """Like Generator.__init__() except that an additional optional 311 argument is allowed. 312 313 Walks through all subparts of a message. If the subpart is of main 314 type `text', then it prints the decoded payload of the subpart. 315 316 Otherwise, fmt is a format string that is used instead of the message 317 payload. fmt is expanded with the following keywords (in 318 %(keyword)s format): 319 320 type : Full MIME type of the non-text part 321 maintype : Main MIME type of the non-text part 322 subtype : Sub-MIME type of the non-text part 323 filename : Filename of the non-text part 324 description: Description associated with the non-text part 325 encoding : Content transfer encoding of the non-text part 326 327 The default value for fmt is None, meaning 328 329 [Non-text (%(type)s) part of message omitted, filename %(filename)s] 330 """ 331 Generator.__init__(self, outfp, mangle_from_, maxheaderlen) 332 if fmt is None: 333 self._fmt = _FMT 334 else: 335 self._fmt = fmt 336 337 def _dispatch(self, msg): 338 for part in msg.walk(): 339 maintype = part.get_content_maintype() 340 if maintype == 'text': 341 print >> self, part.get_payload(decode=True) 342 elif maintype == 'multipart': 343 # Just skip this 344 pass 345 else: 346 print >> self, self._fmt % { 347 'type' : part.get_content_type(), 348 'maintype' : part.get_content_maintype(), 349 'subtype' : part.get_content_subtype(), 350 'filename' : part.get_filename('[no filename]'), 351 'description': part.get('Content-Description', 352 '[no description]'), 353 'encoding' : part.get('Content-Transfer-Encoding', 354 '[no encoding]'), 355 } 356 357 358 359 # Helper 360 _width = len(repr(sys.maxint-1)) 361 _fmt = '%%0%dd' % _width 362 363 def _make_boundary(text=None): 364 # Craft a random boundary. If text is given, ensure that the chosen 365 # boundary doesn't appear in the text. 366 token = random.randrange(sys.maxint) 367 boundary = ('=' * 15) + (_fmt % token) + '==' 368 if text is None: 369 return boundary 370 b = boundary 371 counter = 0 372 while True: 373 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) 374 if not cre.search(text): 375 break 376 b = boundary + '.' + str(counter) 377 counter += 1 378 return b