1.1 --- a/GPGUtils.py Sat Apr 11 17:07:59 2015 +0200
1.2 +++ b/GPGUtils.py Sun Apr 12 19:35:41 2015 +0200
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 GPG utilities derived from the MoinMessage library.
1.6
1.7 -Copyright (C) 2012, 2013, 2014 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2012, 2013, 2014, 2015 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -19,8 +19,12 @@
1.13 this program. If not, see <http://www.gnu.org/licenses/>.
1.14 """
1.15
1.16 +# NOTE: Nasty fix-up for the email module, thanks to the Python 2 abandonment
1.17 +# NOTE: policy of the core developers.
1.18 +
1.19 +from emailfix.generator import Generator
1.20 +
1.21 from email.encoders import encode_noop
1.22 -from email.generator import Generator
1.23 from email.mime.multipart import MIMEMultipart
1.24 from email.mime.application import MIMEApplication
1.25 from email.mime.base import MIMEBase
1.26 @@ -347,7 +351,7 @@
1.27
1.28 out = StringIO()
1.29 generator = Generator(out, False, 0) # disable reformatting measures
1.30 - generator.flatten(message)
1.31 + generator.flatten(message, linesep="\r\n")
1.32 return out.getvalue()
1.33
1.34 # Message decoding functions.
2.1 --- a/docs/COPYING.txt Sat Apr 11 17:07:59 2015 +0200
2.2 +++ b/docs/COPYING.txt Sun Apr 12 19:35:41 2015 +0200
2.3 @@ -24,3 +24,30 @@
2.4 License along with this library; see the file LICENCE.txt
2.5 If not, write to the Free Software Foundation, Inc.,
2.6 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
2.7 +
2.8 +Additional Notices
2.9 +------------------
2.10 +
2.11 +The following additional copyright notices (taken from the Python
2.12 +distribution's LICENSE file) apply to modules originating from the Python
2.13 +standard library:
2.14 +
2.15 +Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
2.16 +Python Software Foundation. All rights reserved.
2.17 +
2.18 +Copyright (c) 2000 BeOpen.com.
2.19 +All rights reserved.
2.20 +
2.21 +Copyright (c) 1995-2001 Corporation for National Research Initiatives.
2.22 +All rights reserved.
2.23 +
2.24 +Copyright (c) 1991-1995 Stichting Mathematisch Centrum.
2.25 +All rights reserved.
2.26 +
2.27 +See LICENCE-python.txt for the original licensing conditions applying
2.28 +specifically to modules originating from the Python standard library.
2.29 +
2.30 +Note that these modules have been modified and are made available under the
2.31 +terms of the GNU General Public License as stated above. The inclusion of
2.32 +these additional notices and conditions is done merely to satisfy the request
2.33 +in those conditions that certain notices be preserved in derived works.
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/docs/LICENCE-python.txt Sun Apr 12 19:35:41 2015 +0200
3.3 @@ -0,0 +1,193 @@
3.4 +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
3.5 +--------------------------------------------
3.6 +
3.7 +1. This LICENSE AGREEMENT is between the Python Software Foundation
3.8 +("PSF"), and the Individual or Organization ("Licensee") accessing and
3.9 +otherwise using this software ("Python") in source or binary form and
3.10 +its associated documentation.
3.11 +
3.12 +2. Subject to the terms and conditions of this License Agreement, PSF
3.13 +hereby grants Licensee a nonexclusive, royalty-free, world-wide
3.14 +license to reproduce, analyze, test, perform and/or display publicly,
3.15 +prepare derivative works, distribute, and otherwise use Python
3.16 +alone or in any derivative version, provided, however, that PSF's
3.17 +License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
3.18 +2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Python Software Foundation;
3.19 +All Rights Reserved" are retained in Python alone or in any derivative
3.20 +version prepared by Licensee.
3.21 +
3.22 +3. In the event Licensee prepares a derivative work that is based on
3.23 +or incorporates Python or any part thereof, and wants to make
3.24 +the derivative work available to others as provided herein, then
3.25 +Licensee hereby agrees to include in any such work a brief summary of
3.26 +the changes made to Python.
3.27 +
3.28 +4. PSF is making Python available to Licensee on an "AS IS"
3.29 +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
3.30 +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
3.31 +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
3.32 +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
3.33 +INFRINGE ANY THIRD PARTY RIGHTS.
3.34 +
3.35 +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
3.36 +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
3.37 +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
3.38 +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
3.39 +
3.40 +6. This License Agreement will automatically terminate upon a material
3.41 +breach of its terms and conditions.
3.42 +
3.43 +7. Nothing in this License Agreement shall be deemed to create any
3.44 +relationship of agency, partnership, or joint venture between PSF and
3.45 +Licensee. This License Agreement does not grant permission to use PSF
3.46 +trademarks or trade name in a trademark sense to endorse or promote
3.47 +products or services of Licensee, or any third party.
3.48 +
3.49 +8. By copying, installing or otherwise using Python, Licensee
3.50 +agrees to be bound by the terms and conditions of this License
3.51 +Agreement.
3.52 +
3.53 +
3.54 +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
3.55 +-------------------------------------------
3.56 +
3.57 +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
3.58 +
3.59 +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
3.60 +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
3.61 +Individual or Organization ("Licensee") accessing and otherwise using
3.62 +this software in source or binary form and its associated
3.63 +documentation ("the Software").
3.64 +
3.65 +2. Subject to the terms and conditions of this BeOpen Python License
3.66 +Agreement, BeOpen hereby grants Licensee a non-exclusive,
3.67 +royalty-free, world-wide license to reproduce, analyze, test, perform
3.68 +and/or display publicly, prepare derivative works, distribute, and
3.69 +otherwise use the Software alone or in any derivative version,
3.70 +provided, however, that the BeOpen Python License is retained in the
3.71 +Software, alone or in any derivative version prepared by Licensee.
3.72 +
3.73 +3. BeOpen is making the Software available to Licensee on an "AS IS"
3.74 +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
3.75 +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
3.76 +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
3.77 +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
3.78 +INFRINGE ANY THIRD PARTY RIGHTS.
3.79 +
3.80 +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
3.81 +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
3.82 +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
3.83 +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
3.84 +
3.85 +5. This License Agreement will automatically terminate upon a material
3.86 +breach of its terms and conditions.
3.87 +
3.88 +6. This License Agreement shall be governed by and interpreted in all
3.89 +respects by the law of the State of California, excluding conflict of
3.90 +law provisions. Nothing in this License Agreement shall be deemed to
3.91 +create any relationship of agency, partnership, or joint venture
3.92 +between BeOpen and Licensee. This License Agreement does not grant
3.93 +permission to use BeOpen trademarks or trade names in a trademark
3.94 +sense to endorse or promote products or services of Licensee, or any
3.95 +third party. As an exception, the "BeOpen Python" logos available at
3.96 +http://www.pythonlabs.com/logos.html may be used according to the
3.97 +permissions granted on that web page.
3.98 +
3.99 +7. By copying, installing or otherwise using the software, Licensee
3.100 +agrees to be bound by the terms and conditions of this License
3.101 +Agreement.
3.102 +
3.103 +
3.104 +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
3.105 +---------------------------------------
3.106 +
3.107 +1. This LICENSE AGREEMENT is between the Corporation for National
3.108 +Research Initiatives, having an office at 1895 Preston White Drive,
3.109 +Reston, VA 20191 ("CNRI"), and the Individual or Organization
3.110 +("Licensee") accessing and otherwise using Python 1.6.1 software in
3.111 +source or binary form and its associated documentation.
3.112 +
3.113 +2. Subject to the terms and conditions of this License Agreement, CNRI
3.114 +hereby grants Licensee a nonexclusive, royalty-free, world-wide
3.115 +license to reproduce, analyze, test, perform and/or display publicly,
3.116 +prepare derivative works, distribute, and otherwise use Python 1.6.1
3.117 +alone or in any derivative version, provided, however, that CNRI's
3.118 +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
3.119 +1995-2001 Corporation for National Research Initiatives; All Rights
3.120 +Reserved" are retained in Python 1.6.1 alone or in any derivative
3.121 +version prepared by Licensee. Alternately, in lieu of CNRI's License
3.122 +Agreement, Licensee may substitute the following text (omitting the
3.123 +quotes): "Python 1.6.1 is made available subject to the terms and
3.124 +conditions in CNRI's License Agreement. This Agreement together with
3.125 +Python 1.6.1 may be located on the Internet using the following
3.126 +unique, persistent identifier (known as a handle): 1895.22/1013. This
3.127 +Agreement may also be obtained from a proxy server on the Internet
3.128 +using the following URL: http://hdl.handle.net/1895.22/1013".
3.129 +
3.130 +3. In the event Licensee prepares a derivative work that is based on
3.131 +or incorporates Python 1.6.1 or any part thereof, and wants to make
3.132 +the derivative work available to others as provided herein, then
3.133 +Licensee hereby agrees to include in any such work a brief summary of
3.134 +the changes made to Python 1.6.1.
3.135 +
3.136 +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
3.137 +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
3.138 +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
3.139 +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
3.140 +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
3.141 +INFRINGE ANY THIRD PARTY RIGHTS.
3.142 +
3.143 +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
3.144 +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
3.145 +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
3.146 +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
3.147 +
3.148 +6. This License Agreement will automatically terminate upon a material
3.149 +breach of its terms and conditions.
3.150 +
3.151 +7. This License Agreement shall be governed by the federal
3.152 +intellectual property law of the United States, including without
3.153 +limitation the federal copyright law, and, to the extent such
3.154 +U.S. federal law does not apply, by the law of the Commonwealth of
3.155 +Virginia, excluding Virginia's conflict of law provisions.
3.156 +Notwithstanding the foregoing, with regard to derivative works based
3.157 +on Python 1.6.1 that incorporate non-separable material that was
3.158 +previously distributed under the GNU General Public License (GPL), the
3.159 +law of the Commonwealth of Virginia shall govern this License
3.160 +Agreement only as to issues arising under or with respect to
3.161 +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
3.162 +License Agreement shall be deemed to create any relationship of
3.163 +agency, partnership, or joint venture between CNRI and Licensee. This
3.164 +License Agreement does not grant permission to use CNRI trademarks or
3.165 +trade name in a trademark sense to endorse or promote products or
3.166 +services of Licensee, or any third party.
3.167 +
3.168 +8. By clicking on the "ACCEPT" button where indicated, or by copying,
3.169 +installing or otherwise using Python 1.6.1, Licensee agrees to be
3.170 +bound by the terms and conditions of this License Agreement.
3.171 +
3.172 + ACCEPT
3.173 +
3.174 +
3.175 +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
3.176 +--------------------------------------------------
3.177 +
3.178 +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
3.179 +The Netherlands. All rights reserved.
3.180 +
3.181 +Permission to use, copy, modify, and distribute this software and its
3.182 +documentation for any purpose and without fee is hereby granted,
3.183 +provided that the above copyright notice appear in all copies and that
3.184 +both that copyright notice and this permission notice appear in
3.185 +supporting documentation, and that the name of Stichting Mathematisch
3.186 +Centrum or CWI not be used in advertising or publicity pertaining to
3.187 +distribution of the software without specific, written prior
3.188 +permission.
3.189 +
3.190 +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
3.191 +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
3.192 +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
3.193 +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
3.194 +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
3.195 +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
3.196 +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/emailfix/generator.py Sun Apr 12 19:35:41 2015 +0200
5.3 @@ -0,0 +1,378 @@
5.4 +# Copyright (C) 2001-2010 Python Software Foundation
5.5 +# Contact: email-sig@python.org
5.6 +
5.7 +"""Classes to generate plain text from a message object tree."""
5.8 +
5.9 +__all__ = ['Generator', 'DecodedGenerator']
5.10 +
5.11 +import re
5.12 +import sys
5.13 +import time
5.14 +import random
5.15 +import warnings
5.16 +
5.17 +from cStringIO import StringIO
5.18 +from emailfix.header import Header
5.19 +
5.20 +UNDERSCORE = '_'
5.21 +NL = '\n'
5.22 +
5.23 +fcre = re.compile(r'^From ', re.MULTILINE)
5.24 +nlre = re.compile(r'(?<!\r)\n', re.MULTILINE)
5.25 +
5.26 +def _is8bitstring(s):
5.27 + if isinstance(s, str):
5.28 + try:
5.29 + unicode(s, 'us-ascii')
5.30 + except UnicodeError:
5.31 + return True
5.32 + return False
5.33 +
5.34 +
5.35 +
5.36 +class Generator:
5.37 + """Generates output from a Message object tree.
5.38 +
5.39 + This basic generator writes the message to the given file object as plain
5.40 + text.
5.41 + """
5.42 + #
5.43 + # Public interface
5.44 + #
5.45 +
5.46 + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
5.47 + """Create the generator for message flattening.
5.48 +
5.49 + outfp is the output file-like object for writing the message to. It
5.50 + must have a write() method.
5.51 +
5.52 + Optional mangle_from_ is a flag that, when True (the default), escapes
5.53 + From_ lines in the body of the message by putting a `>' in front of
5.54 + them.
5.55 +
5.56 + Optional maxheaderlen specifies the longest length for a non-continued
5.57 + header. When a header line is longer (in characters, with tabs
5.58 + expanded to 8 spaces) than maxheaderlen, the header will split as
5.59 + defined in the Header class. Set maxheaderlen to zero to disable
5.60 + header wrapping. The default is 78, as recommended (but not required)
5.61 + by RFC 2822.
5.62 + """
5.63 + self._fp = outfp
5.64 + self._mangle_from_ = mangle_from_
5.65 + self._maxheaderlen = maxheaderlen
5.66 +
5.67 + def write(self, s):
5.68 + # Just delegate to the file object
5.69 + self._fp.write(s)
5.70 +
5.71 + def flatten(self, msg, unixfrom=False, linesep=NL):
5.72 + """Print the message object tree rooted at msg to the output file
5.73 + specified when the Generator instance was created.
5.74 +
5.75 + unixfrom is a flag that forces the printing of a Unix From_ delimiter
5.76 + before the first object in the message tree. If the original message
5.77 + has no From_ delimiter, a `standard' one is crafted. By default, this
5.78 + is False to inhibit the printing of any From_ delimiter.
5.79 +
5.80 + Note that for subobjects, no From_ line is printed.
5.81 + """
5.82 + self._NL = linesep
5.83 + if unixfrom:
5.84 + ufrom = msg.get_unixfrom()
5.85 + if not ufrom:
5.86 + ufrom = 'From nobody ' + time.ctime(time.time())
5.87 + self.write(ufrom + self._NL)
5.88 + self._write(msg)
5.89 +
5.90 + def clone(self, fp):
5.91 + """Clone this generator with the exact same options."""
5.92 + return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
5.93 +
5.94 + #
5.95 + # Protected interface - undocumented ;/
5.96 + #
5.97 +
5.98 + def _write(self, msg):
5.99 + # We can't write the headers yet because of the following scenario:
5.100 + # say a multipart message includes the boundary string somewhere in
5.101 + # its body. We'd have to calculate the new boundary /before/ we write
5.102 + # the headers so that we can write the correct Content-Type:
5.103 + # parameter.
5.104 + #
5.105 + # The way we do this, so as to make the _handle_*() methods simpler,
5.106 + # is to cache any subpart writes into a StringIO. The we write the
5.107 + # headers and the StringIO contents. That way, subpart handlers can
5.108 + # Do The Right Thing, and can still modify the Content-Type: header if
5.109 + # necessary.
5.110 + oldfp = self._fp
5.111 + try:
5.112 + self._fp = sfp = StringIO()
5.113 + self._dispatch(msg)
5.114 + finally:
5.115 + self._fp = oldfp
5.116 + # Write the headers. First we see if the message object wants to
5.117 + # handle that itself. If not, we'll do it generically.
5.118 + meth = getattr(msg, '_write_headers', None)
5.119 + if meth is None:
5.120 + self._write_headers(msg)
5.121 + else:
5.122 + meth(self)
5.123 + self._fp.write(sfp.getvalue())
5.124 +
5.125 + def _dispatch(self, msg):
5.126 + # Get the Content-Type: for the message, then try to dispatch to
5.127 + # self._handle_<maintype>_<subtype>(). If there's no handler for the
5.128 + # full MIME type, then dispatch to self._handle_<maintype>(). If
5.129 + # that's missing too, then dispatch to self._writeBody().
5.130 + main = msg.get_content_maintype()
5.131 + sub = msg.get_content_subtype()
5.132 + specific = UNDERSCORE.join((main, sub)).replace('-', '_')
5.133 + meth = getattr(self, '_handle_' + specific, None)
5.134 + if meth is None:
5.135 + generic = main.replace('-', '_')
5.136 + meth = getattr(self, '_handle_' + generic, None)
5.137 + if meth is None:
5.138 + meth = self._writeBody
5.139 + meth(msg)
5.140 +
5.141 + #
5.142 + # Default handlers
5.143 + #
5.144 +
5.145 + def _write_headers(self, msg):
5.146 + for h, v in msg.items():
5.147 + self.write('%s: ' % h)
5.148 + if self._maxheaderlen == 0:
5.149 + # Explicit no-wrapping
5.150 + if _is8bitstring(v):
5.151 + self.write(v + self._NL)
5.152 + else:
5.153 + self.write(nlre.sub(self._NL, v) + self._NL)
5.154 + elif isinstance(v, Header):
5.155 + # Header instances know what to do
5.156 + self.write(v.encode(linesep=self._NL) + self._NL)
5.157 + elif _is8bitstring(v):
5.158 + # If we have raw 8bit data in a byte string, we have no idea
5.159 + # what the encoding is. There is no safe way to split this
5.160 + # string. If it's ascii-subset, then we could do a normal
5.161 + # ascii split, but if it's multibyte then we could break the
5.162 + # string. There's no way to know so the least harm seems to
5.163 + # be to not split the string and risk it being too long.
5.164 + self.write(v + self._NL)
5.165 + else:
5.166 + # Header's got lots of smarts, so use it. Note that this is
5.167 + # fundamentally broken though because we lose idempotency when
5.168 + # the header string is continued with tabs. It will now be
5.169 + # continued with spaces. This was reversedly broken before we
5.170 + # fixed bug 1974. Either way, we lose.
5.171 + self.write(Header(
5.172 + v, maxlinelen=self._maxheaderlen, header_name=h).encode(
5.173 + linesep=self._NL) + self._NL)
5.174 + # A blank line always separates headers from body
5.175 + self.write(self._NL)
5.176 +
5.177 + #
5.178 + # Handlers for writing types and subtypes
5.179 + #
5.180 +
5.181 + def _handle_text(self, msg):
5.182 + payload = msg.get_payload()
5.183 + if payload is None:
5.184 + return
5.185 + if not isinstance(payload, basestring):
5.186 + raise TypeError('string payload expected: %s' % type(payload))
5.187 + if self._mangle_from_:
5.188 + payload = fcre.sub('>From ', payload)
5.189 + self.write(nlre.sub(self._NL, payload))
5.190 +
5.191 + # Default body handler
5.192 + _writeBody = _handle_text
5.193 +
5.194 + def _handle_multipart(self, msg):
5.195 + # The trick here is to write out each part separately, merge them all
5.196 + # together, and then make sure that the boundary we've chosen isn't
5.197 + # present in the payload.
5.198 + msgtexts = []
5.199 + subparts = msg.get_payload()
5.200 + if subparts is None:
5.201 + subparts = []
5.202 + elif isinstance(subparts, basestring):
5.203 + # e.g. a non-strict parse of a message with no starting boundary.
5.204 + self.write(subparts)
5.205 + return
5.206 + elif not isinstance(subparts, list):
5.207 + # Scalar payload
5.208 + subparts = [subparts]
5.209 + for part in subparts:
5.210 + s = StringIO()
5.211 + g = self.clone(s)
5.212 + g.flatten(part, unixfrom=False, linesep=self._NL)
5.213 + msgtexts.append(s.getvalue())
5.214 + # BAW: What about boundaries that are wrapped in double-quotes?
5.215 + boundary = msg.get_boundary()
5.216 + if not boundary:
5.217 + # Create a boundary that doesn't appear in any of the
5.218 + # message texts.
5.219 + alltext = self._NL.join(msgtexts)
5.220 + boundary = _make_boundary(alltext)
5.221 + msg.set_boundary(boundary)
5.222 + # If there's a preamble, write it out, with a trailing CRLF
5.223 + if msg.preamble is not None:
5.224 + if self._mangle_from_:
5.225 + preamble = fcre.sub('>From ', msg.preamble)
5.226 + else:
5.227 + preamble = msg.preamble
5.228 + self.write(preamble + self._NL)
5.229 + # dash-boundary transport-padding CRLF
5.230 + self.write('--' + boundary + self._NL)
5.231 + # body-part
5.232 + if msgtexts:
5.233 + self.write(msgtexts.pop(0))
5.234 + # *encapsulation
5.235 + # --> delimiter transport-padding
5.236 + # --> CRLF body-part
5.237 + for body_part in msgtexts:
5.238 + # delimiter transport-padding CRLF
5.239 + self.write(self._NL + '--' + boundary + self._NL)
5.240 + # body-part
5.241 + self.write(body_part)
5.242 + # close-delimiter transport-padding
5.243 + self.write(self._NL + '--' + boundary + '--')
5.244 + if msg.epilogue is not None:
5.245 + self.write(self._NL)
5.246 + if self._mangle_from_:
5.247 + epilogue = fcre.sub('>From ', msg.epilogue)
5.248 + else:
5.249 + epilogue = msg.epilogue
5.250 + self.write(epilogue)
5.251 +
5.252 + def _handle_multipart_signed(self, msg):
5.253 + # The contents of signed parts has to stay unmodified in order to keep
5.254 + # the signature intact per RFC1847 2.1, so we disable header wrapping.
5.255 + # RDM: This isn't enough to completely preserve the part, but it helps.
5.256 + old_maxheaderlen = self._maxheaderlen
5.257 + try:
5.258 + self._maxheaderlen = 0
5.259 + self._handle_multipart(msg)
5.260 + finally:
5.261 + self._maxheaderlen = old_maxheaderlen
5.262 +
5.263 + def _handle_message_delivery_status(self, msg):
5.264 + # We can't just write the headers directly to self's file object
5.265 + # because this will leave an extra newline between the last header
5.266 + # block and the boundary. Sigh.
5.267 + blocks = []
5.268 + for part in msg.get_payload():
5.269 + s = StringIO()
5.270 + g = self.clone(s)
5.271 + g.flatten(part, unixfrom=False, linesep=self._NL)
5.272 + text = s.getvalue()
5.273 + lines = text.split(self._NL)
5.274 + # Strip off the unnecessary trailing empty line
5.275 + if lines and lines[-1] == '':
5.276 + blocks.append(self._NL.join(lines[:-1]))
5.277 + else:
5.278 + blocks.append(text)
5.279 + # Now join all the blocks with an empty line. This has the lovely
5.280 + # effect of separating each block with an empty line, but not adding
5.281 + # an extra one after the last one.
5.282 + self.write(self._NL.join(blocks))
5.283 +
5.284 + def _handle_message(self, msg):
5.285 + s = StringIO()
5.286 + g = self.clone(s)
5.287 + # The payload of a message/rfc822 part should be a multipart sequence
5.288 + # of length 1. The zeroth element of the list should be the Message
5.289 + # object for the subpart. Extract that object, stringify it, and
5.290 + # write it out.
5.291 + # Except, it turns out, when it's a string instead, which happens when
5.292 + # and only when HeaderParser is used on a message of mime type
5.293 + # message/rfc822. Such messages are generated by, for example,
5.294 + # Groupwise when forwarding unadorned messages. (Issue 7970.) So
5.295 + # in that case we just emit the string body.
5.296 + payload = msg.get_payload()
5.297 + if isinstance(payload, list):
5.298 + g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
5.299 + payload = s.getvalue()
5.300 + self.write(payload)
5.301 +
5.302 +
5.303 +
5.304 +_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
5.305 +
5.306 +class DecodedGenerator(Generator):
5.307 + """Generates a text representation of a message.
5.308 +
5.309 + Like the Generator base class, except that non-text parts are substituted
5.310 + with a format string representing the part.
5.311 + """
5.312 + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
5.313 + """Like Generator.__init__() except that an additional optional
5.314 + argument is allowed.
5.315 +
5.316 + Walks through all subparts of a message. If the subpart is of main
5.317 + type `text', then it prints the decoded payload of the subpart.
5.318 +
5.319 + Otherwise, fmt is a format string that is used instead of the message
5.320 + payload. fmt is expanded with the following keywords (in
5.321 + %(keyword)s format):
5.322 +
5.323 + type : Full MIME type of the non-text part
5.324 + maintype : Main MIME type of the non-text part
5.325 + subtype : Sub-MIME type of the non-text part
5.326 + filename : Filename of the non-text part
5.327 + description: Description associated with the non-text part
5.328 + encoding : Content transfer encoding of the non-text part
5.329 +
5.330 + The default value for fmt is None, meaning
5.331 +
5.332 + [Non-text (%(type)s) part of message omitted, filename %(filename)s]
5.333 + """
5.334 + Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
5.335 + if fmt is None:
5.336 + self._fmt = _FMT
5.337 + else:
5.338 + self._fmt = fmt
5.339 +
5.340 + def _dispatch(self, msg):
5.341 + for part in msg.walk():
5.342 + maintype = part.get_content_maintype()
5.343 + if maintype == 'text':
5.344 + print >> self, part.get_payload(decode=True)
5.345 + elif maintype == 'multipart':
5.346 + # Just skip this
5.347 + pass
5.348 + else:
5.349 + print >> self, self._fmt % {
5.350 + 'type' : part.get_content_type(),
5.351 + 'maintype' : part.get_content_maintype(),
5.352 + 'subtype' : part.get_content_subtype(),
5.353 + 'filename' : part.get_filename('[no filename]'),
5.354 + 'description': part.get('Content-Description',
5.355 + '[no description]'),
5.356 + 'encoding' : part.get('Content-Transfer-Encoding',
5.357 + '[no encoding]'),
5.358 + }
5.359 +
5.360 +
5.361 +
5.362 +# Helper
5.363 +_width = len(repr(sys.maxint-1))
5.364 +_fmt = '%%0%dd' % _width
5.365 +
5.366 +def _make_boundary(text=None):
5.367 + # Craft a random boundary. If text is given, ensure that the chosen
5.368 + # boundary doesn't appear in the text.
5.369 + token = random.randrange(sys.maxint)
5.370 + boundary = ('=' * 15) + (_fmt % token) + '=='
5.371 + if text is None:
5.372 + return boundary
5.373 + b = boundary
5.374 + counter = 0
5.375 + while True:
5.376 + cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
5.377 + if not cre.search(text):
5.378 + break
5.379 + b = boundary + '.' + str(counter)
5.380 + counter += 1
5.381 + return b
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/emailfix/header.py Sun Apr 12 19:35:41 2015 +0200
6.3 @@ -0,0 +1,515 @@
6.4 +# Copyright (C) 2002-2006 Python Software Foundation
6.5 +# Author: Ben Gertzfield, Barry Warsaw
6.6 +# Contact: email-sig@python.org
6.7 +
6.8 +"""Header encoding and decoding functionality."""
6.9 +
6.10 +__all__ = [
6.11 + 'Header',
6.12 + 'decode_header',
6.13 + 'make_header',
6.14 + ]
6.15 +
6.16 +import re
6.17 +import binascii
6.18 +
6.19 +import email.quoprimime
6.20 +import email.base64mime
6.21 +
6.22 +from email.errors import HeaderParseError
6.23 +from email.charset import Charset
6.24 +
6.25 +NL = '\n'
6.26 +SPACE = ' '
6.27 +USPACE = u' '
6.28 +SPACE8 = ' ' * 8
6.29 +UEMPTYSTRING = u''
6.30 +
6.31 +MAXLINELEN = 76
6.32 +
6.33 +USASCII = Charset('us-ascii')
6.34 +UTF8 = Charset('utf-8')
6.35 +
6.36 +# Match encoded-word strings in the form =?charset?q?Hello_World?=
6.37 +ecre = re.compile(r'''
6.38 + =\? # literal =?
6.39 + (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
6.40 + \? # literal ?
6.41 + (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
6.42 + \? # literal ?
6.43 + (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
6.44 + \?= # literal ?=
6.45 + (?=[ \t]|$) # whitespace or the end of the string
6.46 + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
6.47 +
6.48 +# Field name regexp, including trailing colon, but not separating whitespace,
6.49 +# according to RFC 2822. Character range is from tilde to exclamation mark.
6.50 +# For use with .match()
6.51 +fcre = re.compile(r'[\041-\176]+:$')
6.52 +
6.53 +# Find a header embedded in a putative header value. Used to check for
6.54 +# header injection attack.
6.55 +_embeded_header = re.compile(r'\n[^ \t]+:')
6.56 +
6.57 +
6.58 +
6.59 +# Helpers
6.60 +_max_append = email.quoprimime._max_append
6.61 +
6.62 +
6.63 +
6.64 +def decode_header(header):
6.65 + """Decode a message header value without converting charset.
6.66 +
6.67 + Returns a list of (decoded_string, charset) pairs containing each of the
6.68 + decoded parts of the header. Charset is None for non-encoded parts of the
6.69 + header, otherwise a lower-case string containing the name of the character
6.70 + set specified in the encoded string.
6.71 +
6.72 + An email.errors.HeaderParseError may be raised when certain decoding error
6.73 + occurs (e.g. a base64 decoding exception).
6.74 + """
6.75 + # If no encoding, just return the header
6.76 + header = str(header)
6.77 + if not ecre.search(header):
6.78 + return [(header, None)]
6.79 + decoded = []
6.80 + dec = ''
6.81 + for line in header.splitlines():
6.82 + # This line might not have an encoding in it
6.83 + if not ecre.search(line):
6.84 + decoded.append((line, None))
6.85 + continue
6.86 + parts = ecre.split(line)
6.87 + while parts:
6.88 + unenc = parts.pop(0).strip()
6.89 + if unenc:
6.90 + # Should we continue a long line?
6.91 + if decoded and decoded[-1][1] is None:
6.92 + decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
6.93 + else:
6.94 + decoded.append((unenc, None))
6.95 + if parts:
6.96 + charset, encoding = [s.lower() for s in parts[0:2]]
6.97 + encoded = parts[2]
6.98 + dec = None
6.99 + if encoding == 'q':
6.100 + dec = email.quoprimime.header_decode(encoded)
6.101 + elif encoding == 'b':
6.102 + paderr = len(encoded) % 4 # Postel's law: add missing padding
6.103 + if paderr:
6.104 + encoded += '==='[:4 - paderr]
6.105 + try:
6.106 + dec = email.base64mime.decode(encoded)
6.107 + except binascii.Error:
6.108 + # Turn this into a higher level exception. BAW: Right
6.109 + # now we throw the lower level exception away but
6.110 + # when/if we get exception chaining, we'll preserve it.
6.111 + raise HeaderParseError
6.112 + if dec is None:
6.113 + dec = encoded
6.114 +
6.115 + if decoded and decoded[-1][1] == charset:
6.116 + decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
6.117 + else:
6.118 + decoded.append((dec, charset))
6.119 + del parts[0:3]
6.120 + return decoded
6.121 +
6.122 +
6.123 +
6.124 +def make_header(decoded_seq, maxlinelen=None, header_name=None,
6.125 + continuation_ws=' '):
6.126 + """Create a Header from a sequence of pairs as returned by decode_header()
6.127 +
6.128 + decode_header() takes a header value string and returns a sequence of
6.129 + pairs of the format (decoded_string, charset) where charset is the string
6.130 + name of the character set.
6.131 +
6.132 + This function takes one of those sequence of pairs and returns a Header
6.133 + instance. Optional maxlinelen, header_name, and continuation_ws are as in
6.134 + the Header constructor.
6.135 + """
6.136 + h = Header(maxlinelen=maxlinelen, header_name=header_name,
6.137 + continuation_ws=continuation_ws)
6.138 + for s, charset in decoded_seq:
6.139 + # None means us-ascii but we can simply pass it on to h.append()
6.140 + if charset is not None and not isinstance(charset, Charset):
6.141 + charset = Charset(charset)
6.142 + h.append(s, charset)
6.143 + return h
6.144 +
6.145 +
6.146 +
6.147 +class Header:
6.148 + def __init__(self, s=None, charset=None,
6.149 + maxlinelen=None, header_name=None,
6.150 + continuation_ws=' ', errors='strict'):
6.151 + """Create a MIME-compliant header that can contain many character sets.
6.152 +
6.153 + Optional s is the initial header value. If None, the initial header
6.154 + value is not set. You can later append to the header with .append()
6.155 + method calls. s may be a byte string or a Unicode string, but see the
6.156 + .append() documentation for semantics.
6.157 +
6.158 + Optional charset serves two purposes: it has the same meaning as the
6.159 + charset argument to the .append() method. It also sets the default
6.160 + character set for all subsequent .append() calls that omit the charset
6.161 + argument. If charset is not provided in the constructor, the us-ascii
6.162 + charset is used both as s's initial charset and as the default for
6.163 + subsequent .append() calls.
6.164 +
6.165 + The maximum line length can be specified explicit via maxlinelen. For
6.166 + splitting the first line to a shorter value (to account for the field
6.167 + header which isn't included in s, e.g. `Subject') pass in the name of
6.168 + the field in header_name. The default maxlinelen is 76.
6.169 +
6.170 + continuation_ws must be RFC 2822 compliant folding whitespace (usually
6.171 + either a space or a hard tab) which will be prepended to continuation
6.172 + lines.
6.173 +
6.174 + errors is passed through to the .append() call.
6.175 + """
6.176 + if charset is None:
6.177 + charset = USASCII
6.178 + if not isinstance(charset, Charset):
6.179 + charset = Charset(charset)
6.180 + self._charset = charset
6.181 + self._continuation_ws = continuation_ws
6.182 + cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
6.183 + # BAW: I believe `chunks' and `maxlinelen' should be non-public.
6.184 + self._chunks = []
6.185 + if s is not None:
6.186 + self.append(s, charset, errors)
6.187 + if maxlinelen is None:
6.188 + maxlinelen = MAXLINELEN
6.189 + if header_name is None:
6.190 + # We don't know anything about the field header so the first line
6.191 + # is the same length as subsequent lines.
6.192 + self._firstlinelen = maxlinelen
6.193 + else:
6.194 + # The first line should be shorter to take into account the field
6.195 + # header. Also subtract off 2 extra for the colon and space.
6.196 + self._firstlinelen = maxlinelen - len(header_name) - 2
6.197 + # Second and subsequent lines should subtract off the length in
6.198 + # columns of the continuation whitespace prefix.
6.199 + self._maxlinelen = maxlinelen - cws_expanded_len
6.200 +
6.201 + def __str__(self):
6.202 + """A synonym for self.encode()."""
6.203 + return self.encode()
6.204 +
6.205 + def __unicode__(self):
6.206 + """Helper for the built-in unicode function."""
6.207 + uchunks = []
6.208 + lastcs = None
6.209 + for s, charset in self._chunks:
6.210 + # We must preserve spaces between encoded and non-encoded word
6.211 + # boundaries, which means for us we need to add a space when we go
6.212 + # from a charset to None/us-ascii, or from None/us-ascii to a
6.213 + # charset. Only do this for the second and subsequent chunks.
6.214 + nextcs = charset
6.215 + if uchunks:
6.216 + if lastcs not in (None, 'us-ascii'):
6.217 + if nextcs in (None, 'us-ascii'):
6.218 + uchunks.append(USPACE)
6.219 + nextcs = None
6.220 + elif nextcs not in (None, 'us-ascii'):
6.221 + uchunks.append(USPACE)
6.222 + lastcs = nextcs
6.223 + uchunks.append(unicode(s, str(charset)))
6.224 + return UEMPTYSTRING.join(uchunks)
6.225 +
6.226 + # Rich comparison operators for equality only. BAW: does it make sense to
6.227 + # have or explicitly disable <, <=, >, >= operators?
6.228 + def __eq__(self, other):
6.229 + # other may be a Header or a string. Both are fine so coerce
6.230 + # ourselves to a string, swap the args and do another comparison.
6.231 + return other == self.encode()
6.232 +
6.233 + def __ne__(self, other):
6.234 + return not self == other
6.235 +
6.236 + def append(self, s, charset=None, errors='strict'):
6.237 + """Append a string to the MIME header.
6.238 +
6.239 + Optional charset, if given, should be a Charset instance or the name
6.240 + of a character set (which will be converted to a Charset instance). A
6.241 + value of None (the default) means that the charset given in the
6.242 + constructor is used.
6.243 +
6.244 + s may be a byte string or a Unicode string. If it is a byte string
6.245 + (i.e. isinstance(s, str) is true), then charset is the encoding of
6.246 + that byte string, and a UnicodeError will be raised if the string
6.247 + cannot be decoded with that charset. If s is a Unicode string, then
6.248 + charset is a hint specifying the character set of the characters in
6.249 + the string. In this case, when producing an RFC 2822 compliant header
6.250 + using RFC 2047 rules, the Unicode string will be encoded using the
6.251 + following charsets in order: us-ascii, the charset hint, utf-8. The
6.252 + first character set not to provoke a UnicodeError is used.
6.253 +
6.254 + Optional `errors' is passed as the third argument to any unicode() or
6.255 + ustr.encode() call.
6.256 + """
6.257 + if charset is None:
6.258 + charset = self._charset
6.259 + elif not isinstance(charset, Charset):
6.260 + charset = Charset(charset)
6.261 + # If the charset is our faux 8bit charset, leave the string unchanged
6.262 + if charset != '8bit':
6.263 + # We need to test that the string can be converted to unicode and
6.264 + # back to a byte string, given the input and output codecs of the
6.265 + # charset.
6.266 + if isinstance(s, str):
6.267 + # Possibly raise UnicodeError if the byte string can't be
6.268 + # converted to a unicode with the input codec of the charset.
6.269 + incodec = charset.input_codec or 'us-ascii'
6.270 + ustr = unicode(s, incodec, errors)
6.271 + # Now make sure that the unicode could be converted back to a
6.272 + # byte string with the output codec, which may be different
6.273 + # than the iput coded. Still, use the original byte string.
6.274 + outcodec = charset.output_codec or 'us-ascii'
6.275 + ustr.encode(outcodec, errors)
6.276 + elif isinstance(s, unicode):
6.277 + # Now we have to be sure the unicode string can be converted
6.278 + # to a byte string with a reasonable output codec. We want to
6.279 + # use the byte string in the chunk.
6.280 + for charset in USASCII, charset, UTF8:
6.281 + try:
6.282 + outcodec = charset.output_codec or 'us-ascii'
6.283 + s = s.encode(outcodec, errors)
6.284 + break
6.285 + except UnicodeError:
6.286 + pass
6.287 + else:
6.288 + assert False, 'utf-8 conversion failed'
6.289 + self._chunks.append((s, charset))
6.290 +
6.291 + def _split(self, s, charset, maxlinelen, splitchars):
6.292 + # Split up a header safely for use with encode_chunks.
6.293 + splittable = charset.to_splittable(s)
6.294 + encoded = charset.from_splittable(splittable, True)
6.295 + elen = charset.encoded_header_len(encoded)
6.296 + # If the line's encoded length first, just return it
6.297 + if elen <= maxlinelen:
6.298 + return [(encoded, charset)]
6.299 + # If we have undetermined raw 8bit characters sitting in a byte
6.300 + # string, we really don't know what the right thing to do is. We
6.301 + # can't really split it because it might be multibyte data which we
6.302 + # could break if we split it between pairs. The least harm seems to
6.303 + # be to not split the header at all, but that means they could go out
6.304 + # longer than maxlinelen.
6.305 + if charset == '8bit':
6.306 + return [(s, charset)]
6.307 + # BAW: I'm not sure what the right test here is. What we're trying to
6.308 + # do is be faithful to RFC 2822's recommendation that ($2.2.3):
6.309 + #
6.310 + # "Note: Though structured field bodies are defined in such a way that
6.311 + # folding can take place between many of the lexical tokens (and even
6.312 + # within some of the lexical tokens), folding SHOULD be limited to
6.313 + # placing the CRLF at higher-level syntactic breaks."
6.314 + #
6.315 + # For now, I can only imagine doing this when the charset is us-ascii,
6.316 + # although it's possible that other charsets may also benefit from the
6.317 + # higher-level syntactic breaks.
6.318 + elif charset == 'us-ascii':
6.319 + return self._split_ascii(s, charset, maxlinelen, splitchars)
6.320 + # BAW: should we use encoded?
6.321 + elif elen == len(s):
6.322 + # We can split on _maxlinelen boundaries because we know that the
6.323 + # encoding won't change the size of the string
6.324 + splitpnt = maxlinelen
6.325 + first = charset.from_splittable(splittable[:splitpnt], False)
6.326 + last = charset.from_splittable(splittable[splitpnt:], False)
6.327 + else:
6.328 + # Binary search for split point
6.329 + first, last = _binsplit(splittable, charset, maxlinelen)
6.330 + # first is of the proper length so just wrap it in the appropriate
6.331 + # chrome. last must be recursively split.
6.332 + fsplittable = charset.to_splittable(first)
6.333 + fencoded = charset.from_splittable(fsplittable, True)
6.334 + chunk = [(fencoded, charset)]
6.335 + return chunk + self._split(last, charset, self._maxlinelen, splitchars)
6.336 +
6.337 + def _split_ascii(self, s, charset, firstlen, splitchars):
6.338 + chunks = _split_ascii(s, firstlen, self._maxlinelen,
6.339 + self._continuation_ws, splitchars, self._NL)
6.340 + return zip(chunks, [charset]*len(chunks))
6.341 +
6.342 + def _encode_chunks(self, newchunks, maxlinelen):
6.343 + # MIME-encode a header with many different charsets and/or encodings.
6.344 + #
6.345 + # Given a list of pairs (string, charset), return a MIME-encoded
6.346 + # string suitable for use in a header field. Each pair may have
6.347 + # different charsets and/or encodings, and the resulting header will
6.348 + # accurately reflect each setting.
6.349 + #
6.350 + # Each encoding can be email.utils.QP (quoted-printable, for
6.351 + # ASCII-like character sets like iso-8859-1), email.utils.BASE64
6.352 + # (Base64, for non-ASCII like character sets like KOI8-R and
6.353 + # iso-2022-jp), or None (no encoding).
6.354 + #
6.355 + # Each pair will be represented on a separate line; the resulting
6.356 + # string will be in the format:
6.357 + #
6.358 + # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
6.359 + # =?charset2?b?SvxyZ2VuIEL2aW5n?="
6.360 + chunks = []
6.361 + for header, charset in newchunks:
6.362 + if not header:
6.363 + continue
6.364 + if charset is None or charset.header_encoding is None:
6.365 + s = header
6.366 + else:
6.367 + s = charset.header_encode(header)
6.368 + # Don't add more folding whitespace than necessary
6.369 + if chunks and chunks[-1].endswith(' '):
6.370 + extra = ''
6.371 + else:
6.372 + extra = ' '
6.373 + _max_append(chunks, s, maxlinelen, extra)
6.374 + joiner = self._NL + self._continuation_ws
6.375 + return joiner.join(chunks)
6.376 +
6.377 + def encode(self, splitchars=';, ', linesep=NL):
6.378 + """Encode a message header into an RFC-compliant format.
6.379 +
6.380 + There are many issues involved in converting a given string for use in
6.381 + an email header. Only certain character sets are readable in most
6.382 + email clients, and as header strings can only contain a subset of
6.383 + 7-bit ASCII, care must be taken to properly convert and encode (with
6.384 + Base64 or quoted-printable) header strings. In addition, there is a
6.385 + 75-character length limit on any given encoded header field, so
6.386 + line-wrapping must be performed, even with double-byte character sets.
6.387 +
6.388 + This method will do its best to convert the string to the correct
6.389 + character set used in email, and encode and line wrap it safely with
6.390 + the appropriate scheme for that character set.
6.391 +
6.392 + If the given charset is not known or an error occurs during
6.393 + conversion, this function will return the header untouched.
6.394 +
6.395 + Optional splitchars is a string containing characters to split long
6.396 + ASCII lines on, in rough support of RFC 2822's `highest level
6.397 + syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
6.398 + """
6.399 + self._NL = linesep
6.400 + newchunks = []
6.401 + maxlinelen = self._firstlinelen
6.402 + lastlen = 0
6.403 + for s, charset in self._chunks:
6.404 + # The first bit of the next chunk should be just long enough to
6.405 + # fill the next line. Don't forget the space separating the
6.406 + # encoded words.
6.407 + targetlen = maxlinelen - lastlen - 1
6.408 + if targetlen < charset.encoded_header_len(''):
6.409 + # Stick it on the next line
6.410 + targetlen = maxlinelen
6.411 + newchunks += self._split(s, charset, targetlen, splitchars)
6.412 + lastchunk, lastcharset = newchunks[-1]
6.413 + lastlen = lastcharset.encoded_header_len(lastchunk)
6.414 + value = self._encode_chunks(newchunks, maxlinelen)
6.415 + if _embeded_header.search(value):
6.416 + raise HeaderParseError("header value appears to contain "
6.417 + "an embedded header: {!r}".format(value))
6.418 + return value
6.419 +
6.420 +
6.421 +
6.422 +def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars, linesep):
6.423 + lines = []
6.424 + maxlen = firstlen
6.425 + for line in s.split(linesep):
6.426 + # Ignore any leading whitespace (i.e. continuation whitespace) already
6.427 + # on the line, since we'll be adding our own.
6.428 + line = line.lstrip()
6.429 + if len(line) < maxlen:
6.430 + lines.append(line)
6.431 + maxlen = restlen
6.432 + continue
6.433 + # Attempt to split the line at the highest-level syntactic break
6.434 + # possible. Note that we don't have a lot of smarts about field
6.435 + # syntax; we just try to break on semi-colons, then commas, then
6.436 + # whitespace.
6.437 + for ch in splitchars:
6.438 + if ch in line:
6.439 + break
6.440 + else:
6.441 + # There's nothing useful to split the line on, not even spaces, so
6.442 + # just append this line unchanged
6.443 + lines.append(line)
6.444 + maxlen = restlen
6.445 + continue
6.446 + # Now split the line on the character plus trailing whitespace
6.447 + cre = re.compile(r'%s\s*' % ch)
6.448 + if ch in ';,':
6.449 + eol = ch
6.450 + else:
6.451 + eol = ''
6.452 + joiner = eol + ' '
6.453 + joinlen = len(joiner)
6.454 + wslen = len(continuation_ws.replace('\t', SPACE8))
6.455 + this = []
6.456 + linelen = 0
6.457 + for part in cre.split(line):
6.458 + curlen = linelen + max(0, len(this)-1) * joinlen
6.459 + partlen = len(part)
6.460 + onfirstline = not lines
6.461 + # We don't want to split after the field name, if we're on the
6.462 + # first line and the field name is present in the header string.
6.463 + if ch == ' ' and onfirstline and \
6.464 + len(this) == 1 and fcre.match(this[0]):
6.465 + this.append(part)
6.466 + linelen += partlen
6.467 + elif curlen + partlen > maxlen:
6.468 + if this:
6.469 + lines.append(joiner.join(this) + eol)
6.470 + # If this part is longer than maxlen and we aren't already
6.471 + # splitting on whitespace, try to recursively split this line
6.472 + # on whitespace.
6.473 + if partlen > maxlen and ch != ' ':
6.474 + subl = _split_ascii(part, maxlen, restlen,
6.475 + continuation_ws, ' ', self._NL)
6.476 + lines.extend(subl[:-1])
6.477 + this = [subl[-1]]
6.478 + else:
6.479 + this = [part]
6.480 + linelen = wslen + len(this[-1])
6.481 + maxlen = restlen
6.482 + else:
6.483 + this.append(part)
6.484 + linelen += partlen
6.485 + # Put any left over parts on a line by themselves
6.486 + if this:
6.487 + lines.append(joiner.join(this))
6.488 + return lines
6.489 +
6.490 +
6.491 +
6.492 +def _binsplit(splittable, charset, maxlinelen):
6.493 + i = 0
6.494 + j = len(splittable)
6.495 + while i < j:
6.496 + # Invariants:
6.497 + # 1. splittable[:k] fits for all k <= i (note that we *assume*,
6.498 + # at the start, that splittable[:0] fits).
6.499 + # 2. splittable[:k] does not fit for any k > j (at the start,
6.500 + # this means we shouldn't look at any k > len(splittable)).
6.501 + # 3. We don't know about splittable[:k] for k in i+1..j.
6.502 + # 4. We want to set i to the largest k that fits, with i <= k <= j.
6.503 + #
6.504 + m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
6.505 + chunk = charset.from_splittable(splittable[:m], True)
6.506 + chunklen = charset.encoded_header_len(chunk)
6.507 + if chunklen <= maxlinelen:
6.508 + # m is acceptable, so is a new lower bound.
6.509 + i = m
6.510 + else:
6.511 + # m is not acceptable, so final i must be < m.
6.512 + j = m - 1
6.513 + # i == j. Invariant #1 implies that splittable[:i] fits, and
6.514 + # invariant #2 implies that splittable[:i+1] does not fit, so i
6.515 + # is what we're looking for.
6.516 + first = charset.from_splittable(splittable[:i], False)
6.517 + last = charset.from_splittable(splittable[i:], False)
6.518 + return first, last