Lib/email/generator.py

   1 # Copyright (C) 2001-2009 Python Software Foundation
   2 # Contact: email-sig@python.org
   3
   4 """Classes to generate plain text from a message object tree."""
   5
   6 __all__ = ['Generator', 'DecodedGenerator']
   7
   8 import re
   9 import sys
  10 import time
  11 import random
  12 import warnings
  13
  14 from cStringIO import StringIO
  15 from email.header import Header
  16
  17 UNDERSCORE = '_'
  18 NL = '\n'
  19
  20 fcre = re.compile(r'^From ', re.MULTILINE)
  21
  22 def _is8bitstring(s):
  23     if isinstance(s, str):
  24         try:
  25             unicode(s, 'us-ascii')
  26         except UnicodeError:
  27             return True
  28     return False
  29
  30
  31 \f
  32 class Generator:
  33     """Generates output from a Message object tree.
  34
  35     This basic generator writes the message to the given file object as plain
  36     text.
  37     """
  38     #
  39     # Public interface
  40     #
  41
  42     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
  43         """Create the generator for message flattening.
  44
  45         outfp is the output file-like object for writing the message to.  It
  46         must have a write() method.
  47
  48         Optional mangle_from_ is a flag that, when True (the default), escapes
  49         From_ lines in the body of the message by putting a `>' in front of
  50         them.
  51
  52         Optional maxheaderlen specifies the longest length for a non-continued
  53         header.  When a header line is longer (in characters, with tabs
  54         expanded to 8 spaces) than maxheaderlen, the header will split as
  55         defined in the Header class.  Set maxheaderlen to zero to disable
  56         header wrapping.  The default is 78, as recommended (but not required)
  57         by RFC 2822.
  58         """
  59         self._fp = outfp
  60         self._mangle_from_ = mangle_from_
  61         self._maxheaderlen = maxheaderlen
  62
  63     def write(self, s):
  64         # Just delegate to the file object
  65         self._fp.write(s)
  66
  67     def flatten(self, msg, unixfrom=False):
  68         """Print the message object tree rooted at msg to the output file
  69         specified when the Generator instance was created.
  70
  71         unixfrom is a flag that forces the printing of a Unix From_ delimiter
  72         before the first object in the message tree.  If the original message
  73         has no From_ delimiter, a `standard' one is crafted.  By default, this
  74         is False to inhibit the printing of any From_ delimiter.
  75
  76         Note that for subobjects, no From_ line is printed.
  77         """
  78         if unixfrom:
  79             ufrom = msg.get_unixfrom()
  80             if not ufrom:
  81                 ufrom = 'From nobody ' + time.ctime(time.time())
  82             print >> self._fp, ufrom
  83         self._write(msg)
  84
  85     def clone(self, fp):
  86         """Clone this generator with the exact same options."""
  87         return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
  88
  89     #
  90     # Protected interface - undocumented ;/
  91     #
  92
  93     def _write(self, msg):
  94         # We can't write the headers yet because of the following scenario:
  95         # say a multipart message includes the boundary string somewhere in
  96         # its body.  We'd have to calculate the new boundary /before/ we write
  97         # the headers so that we can write the correct Content-Type:
  98         # parameter.
  99         #
 100         # The way we do this, so as to make the _handle_*() methods simpler,
 101         # is to cache any subpart writes into a StringIO.  The we write the
 102         # headers and the StringIO contents.  That way, subpart handlers can
 103         # Do The Right Thing, and can still modify the Content-Type: header if
 104         # necessary.
 105         oldfp = self._fp
 106         try:
 107             self._fp = sfp = StringIO()
 108             self._dispatch(msg)
 109         finally:
 110             self._fp = oldfp
 111         # Write the headers.  First we see if the message object wants to
 112         # handle that itself.  If not, we'll do it generically.
 113         meth = getattr(msg, '_write_headers', None)
 114         if meth is None:
 115             self._write_headers(msg)
 116         else:
 117             meth(self)
 118         self._fp.write(sfp.getvalue())
 119
 120     def _dispatch(self, msg):
 121         # Get the Content-Type: for the message, then try to dispatch to
 122         # self._handle_<maintype>_<subtype>().  If there's no handler for the
 123         # full MIME type, then dispatch to self._handle_<maintype>().  If
 124         # that's missing too, then dispatch to self._writeBody().
 125         main = msg.get_content_maintype()
 126         sub = msg.get_content_subtype()
 127         specific = UNDERSCORE.join((main, sub)).replace('-', '_')
 128         meth = getattr(self, '_handle_' + specific, None)
 129         if meth is None:
 130             generic = main.replace('-', '_')
 131             meth = getattr(self, '_handle_' + generic, None)
 132             if meth is None:
 133                 meth = self._writeBody
 134         meth(msg)
 135
 136     #
 137     # Default handlers
 138     #
 139
 140     def _write_headers(self, msg):
 141         for h, v in msg.items():
 142             print >> self._fp, '%s:' % h,
 143             if self._maxheaderlen == 0:
 144                 # Explicit no-wrapping
 145                 print >> self._fp, v
 146             elif isinstance(v, Header):
 147                 # Header instances know what to do
 148                 print >> self._fp, v.encode()
 149             elif _is8bitstring(v):
 150                 # If we have raw 8bit data in a byte string, we have no idea
 151                 # what the encoding is.  There is no safe way to split this
 152                 # string.  If it's ascii-subset, then we could do a normal
 153                 # ascii split, but if it's multibyte then we could break the
 154                 # string.  There's no way to know so the least harm seems to
 155                 # be to not split the string and risk it being too long.
 156                 print >> self._fp, v
 157             else:
 158                 # Header's got lots of smarts, so use it.  Note that this is
 159                 # fundamentally broken though because we lose idempotency when
 160                 # the header string is continued with tabs.  It will now be
 161                 # continued with spaces.  This was reversedly broken before we
 162                 # fixed bug 1974.  Either way, we lose.
 163                 print >> self._fp, Header(
 164                     v, maxlinelen=self._maxheaderlen, header_name=h).encode()
 165         # A blank line always separates headers from body
 166         print >> self._fp
 167
 168     #
 169     # Handlers for writing types and subtypes
 170     #
 171
 172     def _handle_text(self, msg):
 173         payload = msg.get_payload()
 174         if payload is None:
 175             return
 176         if not isinstance(payload, basestring):
 177             raise TypeError('string payload expected: %s' % type(payload))
 178         if self._mangle_from_:
 179             payload = fcre.sub('>From ', payload)
 180         self._fp.write(payload)
 181
 182     # Default body handler
 183     _writeBody = _handle_text
 184
 185     def _handle_multipart(self, msg):
 186         # The trick here is to write out each part separately, merge them all
 187         # together, and then make sure that the boundary we've chosen isn't
 188         # present in the payload.
 189         msgtexts = []
 190         subparts = msg.get_payload()
 191         if subparts is None:
 192             subparts = []
 193         elif isinstance(subparts, basestring):
 194             # e.g. a non-strict parse of a message with no starting boundary.
 195             self._fp.write(subparts)
 196             return
 197         elif not isinstance(subparts, list):
 198             # Scalar payload
 199             subparts = [subparts]
 200         for part in subparts:
 201             s = StringIO()
 202             g = self.clone(s)
 203             g.flatten(part, unixfrom=False)
 204             msgtexts.append(s.getvalue())
 205         # Now make sure the boundary we've selected doesn't appear in any of
 206         # the message texts.
 207         alltext = NL.join(msgtexts)
 208         # BAW: What about boundaries that are wrapped in double-quotes?
 209         boundary = msg.get_boundary(failobj=_make_boundary(alltext))
 210         # If we had to calculate a new boundary because the body text
 211         # contained that string, set the new boundary.  We don't do it
 212         # unconditionally because, while set_boundary() preserves order, it
 213         # doesn't preserve newlines/continuations in headers.  This is no big
 214         # deal in practice, but turns out to be inconvenient for the unittest
 215         # suite.
 216         if msg.get_boundary() != boundary:
 217             msg.set_boundary(boundary)
 218         # If there's a preamble, write it out, with a trailing CRLF
 219         if msg.preamble is not None:
 220             print >> self._fp, msg.preamble
 221         # dash-boundary transport-padding CRLF
 222         print >> self._fp, '--' + boundary
 223         # body-part
 224         if msgtexts:
 225             self._fp.write(msgtexts.pop(0))
 226         # *encapsulation
 227         # --> delimiter transport-padding
 228         # --> CRLF body-part
 229         for body_part in msgtexts:
 230             # delimiter transport-padding CRLF
 231             print >> self._fp, '\n--' + boundary
 232             # body-part
 233             self._fp.write(body_part)
 234         # close-delimiter transport-padding
 235         self._fp.write('\n--' + boundary + '--')
 236         if msg.epilogue is not None:
 237             print >> self._fp
 238             self._fp.write(msg.epilogue)
 239
 240     def _handle_message_delivery_status(self, msg):
 241         # We can't just write the headers directly to self's file object
 242         # because this will leave an extra newline between the last header
 243         # block and the boundary.  Sigh.
 244         blocks = []
 245         for part in msg.get_payload():
 246             s = StringIO()
 247             g = self.clone(s)
 248             g.flatten(part, unixfrom=False)
 249             text = s.getvalue()
 250             lines = text.split('\n')
 251             # Strip off the unnecessary trailing empty line
 252             if lines and lines[-1] == '':
 253                 blocks.append(NL.join(lines[:-1]))
 254             else:
 255                 blocks.append(text)
 256         # Now join all the blocks with an empty line.  This has the lovely
 257         # effect of separating each block with an empty line, but not adding
 258         # an extra one after the last one.
 259         self._fp.write(NL.join(blocks))
 260
 261     def _handle_message(self, msg):
 262         s = StringIO()
 263         g = self.clone(s)
 264         # The payload of a message/rfc822 part should be a multipart sequence
 265         # of length 1.  The zeroth element of the list should be the Message
 266         # object for the subpart.  Extract that object, stringify it, and
 267         # write it out.
 268         g.flatten(msg.get_payload(0), unixfrom=False)
 269         self._fp.write(s.getvalue())
 270
 271
 272 \f
 273 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
 274
 275 class DecodedGenerator(Generator):
 276     """Generator a text representation of a message.
 277
 278     Like the Generator base class, except that non-text parts are substituted
 279     with a format string representing the part.
 280     """
 281     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
 282         """Like Generator.__init__() except that an additional optional
 283         argument is allowed.
 284
 285         Walks through all subparts of a message.  If the subpart is of main
 286         type `text', then it prints the decoded payload of the subpart.
 287
 288         Otherwise, fmt is a format string that is used instead of the message
 289         payload.  fmt is expanded with the following keywords (in
 290         %(keyword)s format):
 291
 292         type       : Full MIME type of the non-text part
 293         maintype   : Main MIME type of the non-text part
 294         subtype    : Sub-MIME type of the non-text part
 295         filename   : Filename of the non-text part
 296         description: Description associated with the non-text part
 297         encoding   : Content transfer encoding of the non-text part
 298
 299         The default value for fmt is None, meaning
 300
 301         [Non-text (%(type)s) part of message omitted, filename %(filename)s]
 302         """
 303         Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
 304         if fmt is None:
 305             self._fmt = _FMT
 306         else:
 307             self._fmt = fmt
 308
 309     def _dispatch(self, msg):
 310         for part in msg.walk():
 311             maintype = part.get_content_maintype()
 312             if maintype == 'text':
 313                 print >> self, part.get_payload(decode=True)
 314             elif maintype == 'multipart':
 315                 # Just skip this
 316                 pass
 317             else:
 318                 print >> self, self._fmt % {
 319                     'type'       : part.get_content_type(),
 320                     'maintype'   : part.get_content_maintype(),
 321                     'subtype'    : part.get_content_subtype(),
 322                     'filename'   : part.get_filename('[no filename]'),
 323                     'description': part.get('Content-Description',
 324                                             '[no description]'),
 325                     'encoding'   : part.get('Content-Transfer-Encoding',
 326                                             '[no encoding]'),
 327                     }
 328
 329
 330 \f
 331 # Helper
 332 _width = len(repr(sys.maxint-1))
 333 _fmt = '%%0%dd' % _width
 334
 335 def _make_boundary(text=None):
 336     # Craft a random boundary.  If text is given, ensure that the chosen
 337     # boundary doesn't appear in the text.
 338     token = random.randrange(sys.maxint)
 339     boundary = ('=' * 15) + (_fmt % token) + '=='
 340     if text is None:
 341         return boundary
 342     b = boundary
 343     counter = 0
 344     while True:
 345         cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
 346         if not cre.search(text):
 347             break
 348         b = boundary + '.' + str(counter)
 349         counter += 1
 350     return b