MoinMessage (file emailfix/generator.py at 49da801a72e4)

     1 # Copyright (C) 2001-2010 Python Software Foundation     2 # Contact: email-sig@python.org     3      4 """Classes to generate plain text from a message object tree."""     5      6 __all__ = ['Generator', 'DecodedGenerator']     7      8 import re     9 import sys    10 import time    11 import random    12 import warnings    13     14 from cStringIO import StringIO    15 from emailfix.header import Header    16     17 UNDERSCORE = '_'    18 NL = '\n'    19     20 fcre = re.compile(r'^From ', re.MULTILINE)    21 nlre = re.compile(r'(?<!\r)\n', re.MULTILINE)    22     23 def _is8bitstring(s):    24     if isinstance(s, str):    25         try:    26             unicode(s, 'us-ascii')    27         except UnicodeError:    28             return True    29     return False    30     31     32     33 class Generator:    34     """Generates output from a Message object tree.    35     36     This basic generator writes the message to the given file object as plain    37     text.    38     """    39     #    40     # Public interface    41     #    42     43     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):    44         """Create the generator for message flattening.    45     46         outfp is the output file-like object for writing the message to.  It    47         must have a write() method.    48     49         Optional mangle_from_ is a flag that, when True (the default), escapes    50         From_ lines in the body of the message by putting a `>' in front of    51         them.    52     53         Optional maxheaderlen specifies the longest length for a non-continued    54         header.  When a header line is longer (in characters, with tabs    55         expanded to 8 spaces) than maxheaderlen, the header will split as    56         defined in the Header class.  Set maxheaderlen to zero to disable    57         header wrapping.  The default is 78, as recommended (but not required)    58         by RFC 2822.    59         """    60         self._fp = outfp    61         self._mangle_from_ = mangle_from_    62         self._maxheaderlen = maxheaderlen    63     64     def write(self, s):    65         # Just delegate to the file object    66         self._fp.write(s)    67     68     def flatten(self, msg, unixfrom=False, linesep=NL):    69         """Print the message object tree rooted at msg to the output file    70         specified when the Generator instance was created.    71     72         unixfrom is a flag that forces the printing of a Unix From_ delimiter    73         before the first object in the message tree.  If the original message    74         has no From_ delimiter, a `standard' one is crafted.  By default, this    75         is False to inhibit the printing of any From_ delimiter.    76     77         Note that for subobjects, no From_ line is printed.    78         """    79         self._NL = linesep    80         if unixfrom:    81             ufrom = msg.get_unixfrom()    82             if not ufrom:    83                 ufrom = 'From nobody ' + time.ctime(time.time())    84             self.write(ufrom + self._NL)    85         self._write(msg)    86     87     def clone(self, fp):    88         """Clone this generator with the exact same options."""    89         return self.__class__(fp, self._mangle_from_, self._maxheaderlen)    90     91     #    92     # Protected interface - undocumented ;/    93     #    94     95     def _write(self, msg):    96         # We can't write the headers yet because of the following scenario:    97         # say a multipart message includes the boundary string somewhere in    98         # its body.  We'd have to calculate the new boundary /before/ we write    99         # the headers so that we can write the correct Content-Type:   100         # parameter.   101         #   102         # The way we do this, so as to make the _handle_*() methods simpler,   103         # is to cache any subpart writes into a StringIO.  The we write the   104         # headers and the StringIO contents.  That way, subpart handlers can   105         # Do The Right Thing, and can still modify the Content-Type: header if   106         # necessary.   107         oldfp = self._fp   108         try:   109             self._fp = sfp = StringIO()   110             self._dispatch(msg)   111         finally:   112             self._fp = oldfp   113         # Write the headers.  First we see if the message object wants to   114         # handle that itself.  If not, we'll do it generically.   115         meth = getattr(msg, '_write_headers', None)   116         if meth is None:   117             self._write_headers(msg)   118         else:   119             meth(self)   120         self._fp.write(sfp.getvalue())   121    122     def _dispatch(self, msg):   123         # Get the Content-Type: for the message, then try to dispatch to   124         # self._handle_<maintype>_<subtype>().  If there's no handler for the   125         # full MIME type, then dispatch to self._handle_<maintype>().  If   126         # that's missing too, then dispatch to self._writeBody().   127         main = msg.get_content_maintype()   128         sub = msg.get_content_subtype()   129         specific = UNDERSCORE.join((main, sub)).replace('-', '_')   130         meth = getattr(self, '_handle_' + specific, None)   131         if meth is None:   132             generic = main.replace('-', '_')   133             meth = getattr(self, '_handle_' + generic, None)   134             if meth is None:   135                 meth = self._writeBody   136         meth(msg)   137    138     #   139     # Default handlers   140     #   141    142     def _write_headers(self, msg):   143         for h, v in msg.items():   144             self.write('%s: ' % h)   145             if self._maxheaderlen == 0:   146                 # Explicit no-wrapping   147                 if _is8bitstring(v):   148                     self.write(v + self._NL)   149                 else:   150                     self.write(nlre.sub(self._NL, v) + self._NL)   151             elif isinstance(v, Header):   152                 # Header instances know what to do   153                 self.write(v.encode(linesep=self._NL) + self._NL)   154             elif _is8bitstring(v):   155                 # If we have raw 8bit data in a byte string, we have no idea   156                 # what the encoding is.  There is no safe way to split this   157                 # string.  If it's ascii-subset, then we could do a normal   158                 # ascii split, but if it's multibyte then we could break the   159                 # string.  There's no way to know so the least harm seems to   160                 # be to not split the string and risk it being too long.   161                 self.write(v + self._NL)   162             else:   163                 # Header's got lots of smarts, so use it.  Note that this is   164                 # fundamentally broken though because we lose idempotency when   165                 # the header string is continued with tabs.  It will now be   166                 # continued with spaces.  This was reversedly broken before we   167                 # fixed bug 1974.  Either way, we lose.   168                 self.write(Header(   169                     v, maxlinelen=self._maxheaderlen, header_name=h).encode(   170                     linesep=self._NL) + self._NL)   171         # A blank line always separates headers from body   172         self.write(self._NL)   173    174     #   175     # Handlers for writing types and subtypes   176     #   177    178     def _handle_text(self, msg):   179         payload = msg.get_payload()   180         if payload is None:   181             return   182         if not isinstance(payload, basestring):   183             raise TypeError('string payload expected: %s' % type(payload))   184         if self._mangle_from_:   185             payload = fcre.sub('>From ', payload)   186         self.write(nlre.sub(self._NL, payload))   187    188     # Default body handler   189     _writeBody = _handle_text   190    191     def _handle_multipart(self, msg):   192         # The trick here is to write out each part separately, merge them all   193         # together, and then make sure that the boundary we've chosen isn't   194         # present in the payload.   195         msgtexts = []   196         subparts = msg.get_payload()   197         if subparts is None:   198             subparts = []   199         elif isinstance(subparts, basestring):   200             # e.g. a non-strict parse of a message with no starting boundary.   201             self.write(subparts)   202             return   203         elif not isinstance(subparts, list):   204             # Scalar payload   205             subparts = [subparts]   206         for part in subparts:   207             s = StringIO()   208             g = self.clone(s)   209             g.flatten(part, unixfrom=False, linesep=self._NL)   210             msgtexts.append(s.getvalue())   211         # BAW: What about boundaries that are wrapped in double-quotes?   212         boundary = msg.get_boundary()   213         if not boundary:   214             # Create a boundary that doesn't appear in any of the   215             # message texts.   216             alltext = self._NL.join(msgtexts)   217             boundary = _make_boundary(alltext)   218             msg.set_boundary(boundary)   219         # If there's a preamble, write it out, with a trailing CRLF   220         if msg.preamble is not None:   221             if self._mangle_from_:   222                 preamble = fcre.sub('>From ', msg.preamble)   223             else:   224                 preamble = msg.preamble   225             self.write(preamble + self._NL)   226         # dash-boundary transport-padding CRLF   227         self.write('--' + boundary + self._NL)   228         # body-part   229         if msgtexts:   230             self.write(msgtexts.pop(0))   231         # *encapsulation   232         # --> delimiter transport-padding   233         # --> CRLF body-part   234         for body_part in msgtexts:   235             # delimiter transport-padding CRLF   236             self.write(self._NL + '--' + boundary + self._NL)   237             # body-part   238             self.write(body_part)   239         # close-delimiter transport-padding   240         self.write(self._NL + '--' + boundary + '--')   241         if msg.epilogue is not None:   242             self.write(self._NL)   243             if self._mangle_from_:   244                 epilogue = fcre.sub('>From ', msg.epilogue)   245             else:   246                 epilogue = msg.epilogue   247             self.write(epilogue)   248    249     def _handle_multipart_signed(self, msg):   250         # The contents of signed parts has to stay unmodified in order to keep   251         # the signature intact per RFC1847 2.1, so we disable header wrapping.   252         # RDM: This isn't enough to completely preserve the part, but it helps.   253         old_maxheaderlen = self._maxheaderlen   254         try:   255             self._maxheaderlen = 0   256             self._handle_multipart(msg)   257         finally:   258             self._maxheaderlen = old_maxheaderlen   259    260     def _handle_message_delivery_status(self, msg):   261         # We can't just write the headers directly to self's file object   262         # because this will leave an extra newline between the last header   263         # block and the boundary.  Sigh.   264         blocks = []   265         for part in msg.get_payload():   266             s = StringIO()   267             g = self.clone(s)   268             g.flatten(part, unixfrom=False, linesep=self._NL)   269             text = s.getvalue()   270             lines = text.split(self._NL)   271             # Strip off the unnecessary trailing empty line   272             if lines and lines[-1] == '':   273                 blocks.append(self._NL.join(lines[:-1]))   274             else:   275                 blocks.append(text)   276         # Now join all the blocks with an empty line.  This has the lovely   277         # effect of separating each block with an empty line, but not adding   278         # an extra one after the last one.   279         self.write(self._NL.join(blocks))   280    281     def _handle_message(self, msg):   282         s = StringIO()   283         g = self.clone(s)   284         # The payload of a message/rfc822 part should be a multipart sequence   285         # of length 1.  The zeroth element of the list should be the Message   286         # object for the subpart.  Extract that object, stringify it, and   287         # write it out.   288         # Except, it turns out, when it's a string instead, which happens when   289         # and only when HeaderParser is used on a message of mime type   290         # message/rfc822.  Such messages are generated by, for example,   291         # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So   292         # in that case we just emit the string body.   293         payload = msg.get_payload()   294         if isinstance(payload, list):   295             g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)   296             payload = s.getvalue()   297         self.write(payload)   298    299    300    301 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'   302    303 class DecodedGenerator(Generator):   304     """Generates a text representation of a message.   305    306     Like the Generator base class, except that non-text parts are substituted   307     with a format string representing the part.   308     """   309     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):   310         """Like Generator.__init__() except that an additional optional   311         argument is allowed.   312    313         Walks through all subparts of a message.  If the subpart is of main   314         type `text', then it prints the decoded payload of the subpart.   315    316         Otherwise, fmt is a format string that is used instead of the message   317         payload.  fmt is expanded with the following keywords (in   318         %(keyword)s format):   319    320         type       : Full MIME type of the non-text part   321         maintype   : Main MIME type of the non-text part   322         subtype    : Sub-MIME type of the non-text part   323         filename   : Filename of the non-text part   324         description: Description associated with the non-text part   325         encoding   : Content transfer encoding of the non-text part   326    327         The default value for fmt is None, meaning   328    329         [Non-text (%(type)s) part of message omitted, filename %(filename)s]   330         """   331         Generator.__init__(self, outfp, mangle_from_, maxheaderlen)   332         if fmt is None:   333             self._fmt = _FMT   334         else:   335             self._fmt = fmt   336    337     def _dispatch(self, msg):   338         for part in msg.walk():   339             maintype = part.get_content_maintype()   340             if maintype == 'text':   341                 print >> self, part.get_payload(decode=True)   342             elif maintype == 'multipart':   343                 # Just skip this   344                 pass   345             else:   346                 print >> self, self._fmt % {   347                     'type'       : part.get_content_type(),   348                     'maintype'   : part.get_content_maintype(),   349                     'subtype'    : part.get_content_subtype(),   350                     'filename'   : part.get_filename('[no filename]'),   351                     'description': part.get('Content-Description',   352                                             '[no description]'),   353                     'encoding'   : part.get('Content-Transfer-Encoding',   354                                             '[no encoding]'),   355                     }   356    357    358    359 # Helper   360 _width = len(repr(sys.maxint-1))   361 _fmt = '%%0%dd' % _width   362    363 def _make_boundary(text=None):   364     # Craft a random boundary.  If text is given, ensure that the chosen   365     # boundary doesn't appear in the text.   366     token = random.randrange(sys.maxint)   367     boundary = ('=' * 15) + (_fmt % token) + '=='   368     if text is None:   369         return boundary   370     b = boundary   371     counter = 0   372     while True:   373         cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)   374         if not cre.search(text):   375             break   376         b = boundary + '.' + str(counter)   377         counter += 1   378     return b