1 # Copyright (C) 2001-2009 Python Software Foundation
2 # Contact: email-sig@python.org
4 """Classes to generate plain text from a message object tree."""
6 __all__
= ['Generator', 'DecodedGenerator']
14 from cStringIO
import StringIO
15 from email
.header
import Header
20 fcre
= re
.compile(r
'^From ', re
.MULTILINE
)
23 if isinstance(s
, str):
25 unicode(s
, 'us-ascii')
33 """Generates output from a Message object tree.
35 This basic generator writes the message to the given file object as plain
42 def __init__(self
, outfp
, mangle_from_
=True, maxheaderlen
=78):
43 """Create the generator for message flattening.
45 outfp is the output file-like object for writing the message to. It
46 must have a write() method.
48 Optional mangle_from_ is a flag that, when True (the default), escapes
49 From_ lines in the body of the message by putting a `>' in front of
52 Optional maxheaderlen specifies the longest length for a non-continued
53 header. When a header line is longer (in characters, with tabs
54 expanded to 8 spaces) than maxheaderlen, the header will split as
55 defined in the Header class. Set maxheaderlen to zero to disable
56 header wrapping. The default is 78, as recommended (but not required)
60 self
._mangle
_from
_ = mangle_from_
61 self
._maxheaderlen
= maxheaderlen
64 # Just delegate to the file object
67 def flatten(self
, msg
, unixfrom
=False):
68 """Print the message object tree rooted at msg to the output file
69 specified when the Generator instance was created.
71 unixfrom is a flag that forces the printing of a Unix From_ delimiter
72 before the first object in the message tree. If the original message
73 has no From_ delimiter, a `standard' one is crafted. By default, this
74 is False to inhibit the printing of any From_ delimiter.
76 Note that for subobjects, no From_ line is printed.
79 ufrom
= msg
.get_unixfrom()
81 ufrom
= 'From nobody ' + time
.ctime(time
.time())
82 print >> self
._fp
, ufrom
86 """Clone this generator with the exact same options."""
87 return self
.__class
__(fp
, self
._mangle
_from
_, self
._maxheaderlen
)
90 # Protected interface - undocumented ;/
93 def _write(self
, msg
):
94 # We can't write the headers yet because of the following scenario:
95 # say a multipart message includes the boundary string somewhere in
96 # its body. We'd have to calculate the new boundary /before/ we write
97 # the headers so that we can write the correct Content-Type:
100 # The way we do this, so as to make the _handle_*() methods simpler,
101 # is to cache any subpart writes into a StringIO. The we write the
102 # headers and the StringIO contents. That way, subpart handlers can
103 # Do The Right Thing, and can still modify the Content-Type: header if
107 self
._fp
= sfp
= StringIO()
111 # Write the headers. First we see if the message object wants to
112 # handle that itself. If not, we'll do it generically.
113 meth
= getattr(msg
, '_write_headers', None)
115 self
._write
_headers
(msg
)
118 self
._fp
.write(sfp
.getvalue())
120 def _dispatch(self
, msg
):
121 # Get the Content-Type: for the message, then try to dispatch to
122 # self._handle_<maintype>_<subtype>(). If there's no handler for the
123 # full MIME type, then dispatch to self._handle_<maintype>(). If
124 # that's missing too, then dispatch to self._writeBody().
125 main
= msg
.get_content_maintype()
126 sub
= msg
.get_content_subtype()
127 specific
= UNDERSCORE
.join((main
, sub
)).replace('-', '_')
128 meth
= getattr(self
, '_handle_' + specific
, None)
130 generic
= main
.replace('-', '_')
131 meth
= getattr(self
, '_handle_' + generic
, None)
133 meth
= self
._writeBody
140 def _write_headers(self
, msg
):
141 for h
, v
in msg
.items():
142 print >> self
._fp
, '%s:' % h
,
143 if self
._maxheaderlen
== 0:
144 # Explicit no-wrapping
146 elif isinstance(v
, Header
):
147 # Header instances know what to do
148 print >> self
._fp
, v
.encode()
149 elif _is8bitstring(v
):
150 # If we have raw 8bit data in a byte string, we have no idea
151 # what the encoding is. There is no safe way to split this
152 # string. If it's ascii-subset, then we could do a normal
153 # ascii split, but if it's multibyte then we could break the
154 # string. There's no way to know so the least harm seems to
155 # be to not split the string and risk it being too long.
158 # Header's got lots of smarts, so use it. Note that this is
159 # fundamentally broken though because we lose idempotency when
160 # the header string is continued with tabs. It will now be
161 # continued with spaces. This was reversedly broken before we
162 # fixed bug 1974. Either way, we lose.
163 print >> self
._fp
, Header(
164 v
, maxlinelen
=self
._maxheaderlen
, header_name
=h
).encode()
165 # A blank line always separates headers from body
169 # Handlers for writing types and subtypes
172 def _handle_text(self
, msg
):
173 payload
= msg
.get_payload()
176 if not isinstance(payload
, basestring
):
177 raise TypeError('string payload expected: %s' % type(payload
))
178 if self
._mangle
_from
_:
179 payload
= fcre
.sub('>From ', payload
)
180 self
._fp
.write(payload
)
182 # Default body handler
183 _writeBody
= _handle_text
185 def _handle_multipart(self
, msg
):
186 # The trick here is to write out each part separately, merge them all
187 # together, and then make sure that the boundary we've chosen isn't
188 # present in the payload.
190 subparts
= msg
.get_payload()
193 elif isinstance(subparts
, basestring
):
194 # e.g. a non-strict parse of a message with no starting boundary.
195 self
._fp
.write(subparts
)
197 elif not isinstance(subparts
, list):
199 subparts
= [subparts
]
200 for part
in subparts
:
203 g
.flatten(part
, unixfrom
=False)
204 msgtexts
.append(s
.getvalue())
205 # Now make sure the boundary we've selected doesn't appear in any of
207 alltext
= NL
.join(msgtexts
)
208 # BAW: What about boundaries that are wrapped in double-quotes?
209 boundary
= msg
.get_boundary(failobj
=_make_boundary(alltext
))
210 # If we had to calculate a new boundary because the body text
211 # contained that string, set the new boundary. We don't do it
212 # unconditionally because, while set_boundary() preserves order, it
213 # doesn't preserve newlines/continuations in headers. This is no big
214 # deal in practice, but turns out to be inconvenient for the unittest
216 if msg
.get_boundary() != boundary
:
217 msg
.set_boundary(boundary
)
218 # If there's a preamble, write it out, with a trailing CRLF
219 if msg
.preamble
is not None:
220 print >> self
._fp
, msg
.preamble
221 # dash-boundary transport-padding CRLF
222 print >> self
._fp
, '--' + boundary
225 self
._fp
.write(msgtexts
.pop(0))
227 # --> delimiter transport-padding
229 for body_part
in msgtexts
:
230 # delimiter transport-padding CRLF
231 print >> self
._fp
, '\n--' + boundary
233 self
._fp
.write(body_part
)
234 # close-delimiter transport-padding
235 self
._fp
.write('\n--' + boundary
+ '--')
236 if msg
.epilogue
is not None:
238 self
._fp
.write(msg
.epilogue
)
240 def _handle_message_delivery_status(self
, msg
):
241 # We can't just write the headers directly to self's file object
242 # because this will leave an extra newline between the last header
243 # block and the boundary. Sigh.
245 for part
in msg
.get_payload():
248 g
.flatten(part
, unixfrom
=False)
250 lines
= text
.split('\n')
251 # Strip off the unnecessary trailing empty line
252 if lines
and lines
[-1] == '':
253 blocks
.append(NL
.join(lines
[:-1]))
256 # Now join all the blocks with an empty line. This has the lovely
257 # effect of separating each block with an empty line, but not adding
258 # an extra one after the last one.
259 self
._fp
.write(NL
.join(blocks
))
261 def _handle_message(self
, msg
):
264 # The payload of a message/rfc822 part should be a multipart sequence
265 # of length 1. The zeroth element of the list should be the Message
266 # object for the subpart. Extract that object, stringify it, and
268 g
.flatten(msg
.get_payload(0), unixfrom
=False)
269 self
._fp
.write(s
.getvalue())
273 _FMT
= '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
275 class DecodedGenerator(Generator
):
276 """Generator a text representation of a message.
278 Like the Generator base class, except that non-text parts are substituted
279 with a format string representing the part.
281 def __init__(self
, outfp
, mangle_from_
=True, maxheaderlen
=78, fmt
=None):
282 """Like Generator.__init__() except that an additional optional
285 Walks through all subparts of a message. If the subpart is of main
286 type `text', then it prints the decoded payload of the subpart.
288 Otherwise, fmt is a format string that is used instead of the message
289 payload. fmt is expanded with the following keywords (in
292 type : Full MIME type of the non-text part
293 maintype : Main MIME type of the non-text part
294 subtype : Sub-MIME type of the non-text part
295 filename : Filename of the non-text part
296 description: Description associated with the non-text part
297 encoding : Content transfer encoding of the non-text part
299 The default value for fmt is None, meaning
301 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
303 Generator
.__init
__(self
, outfp
, mangle_from_
, maxheaderlen
)
309 def _dispatch(self
, msg
):
310 for part
in msg
.walk():
311 maintype
= part
.get_content_maintype()
312 if maintype
== 'text':
313 print >> self
, part
.get_payload(decode
=True)
314 elif maintype
== 'multipart':
318 print >> self
, self
._fmt
% {
319 'type' : part
.get_content_type(),
320 'maintype' : part
.get_content_maintype(),
321 'subtype' : part
.get_content_subtype(),
322 'filename' : part
.get_filename('[no filename]'),
323 'description': part
.get('Content-Description',
325 'encoding' : part
.get('Content-Transfer-Encoding',
332 _width
= len(repr(sys
.maxint
-1))
333 _fmt
= '%%0%dd' % _width
335 def _make_boundary(text
=None):
336 # Craft a random boundary. If text is given, ensure that the chosen
337 # boundary doesn't appear in the text.
338 token
= random
.randrange(sys
.maxint
)
339 boundary
= ('=' * 15) + (_fmt
% token
) + '=='
345 cre
= re
.compile('^--' + re
.escape(b
) + '(--)?$', re
.MULTILINE
)
346 if not cre
.search(text
):
348 b
= boundary
+ '.' + str(counter
)