1 # Copyright (C) 2001-2007 Python Software Foundation
3 # Contact: email-sig@python.org
5 """Miscellaneous utilities."""
8 'collapse_rfc2231_value',
30 from io
import StringIO
32 from email
._parseaddr
import quote
33 from email
._parseaddr
import AddressList
as _AddressList
34 from email
._parseaddr
import mktime_tz
36 # We need wormarounds for bugs in these methods in older Pythons (see below)
37 from email
._parseaddr
import parsedate
as _parsedate
38 from email
._parseaddr
import parsedate_tz
as _parsedate_tz
40 from quopri
import decodestring
as _qdecode
42 # Intrapackage imports
43 from email
.encoders
import _bencode
, _qencode
51 specialsre
= re
.compile(r
'[][\\()<>@,:;".]')
52 escapesre
= re
.compile(r
'[][\\()"]')
59 """The inverse of parseaddr(), this takes a 2-tuple of the form
60 (realname, email_address) and returns the string value suitable
61 for an RFC 2822 From, To or Cc header.
63 If the first element of pair is false, then the second element is
69 if specialsre
.search(name
):
71 name
= escapesre
.sub(r
'\\\g<0>', name
)
72 return '%s%s%s <%s>' % (quotes
, name
, quotes
, address
)
77 def getaddresses(fieldvalues
):
78 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
79 all
= COMMASPACE
.join(fieldvalues
)
85 ecre
= re
.compile(r
'''
87 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
89 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
91 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
93 ''', re
.VERBOSE | re
.IGNORECASE
)
97 def formatdate(timeval
=None, localtime
=False, usegmt
=False):
98 """Returns a date string as specified by RFC 2822, e.g.:
100 Fri, 09 Nov 2001 01:08:47 -0000
102 Optional timeval if given is a floating point time value as accepted by
103 gmtime() and localtime(), otherwise the current time is used.
105 Optional localtime is a flag that when True, interprets timeval, and
106 returns a date relative to the local timezone instead of UTC, properly
107 taking daylight savings time into account.
109 Optional argument usegmt means that the timezone is written out as
110 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
111 is needed for HTTP, and is only used when localtime==False.
113 # Note: we cannot use strftime() because that honors the locale and RFC
114 # 2822 requires that day and month names be the English abbreviations.
116 timeval
= time
.time()
118 now
= time
.localtime(timeval
)
119 # Calculate timezone offset, based on whether the local zone has
120 # daylight savings time, and whether DST is in effect.
121 if time
.daylight
and now
[-1]:
122 offset
= time
.altzone
124 offset
= time
.timezone
125 hours
, minutes
= divmod(abs(offset
), 3600)
126 # Remember offset is in seconds west of UTC, but the timezone is in
127 # minutes east of UTC, so the signs differ.
132 zone
= '%s%02d%02d' % (sign
, hours
, minutes
// 60)
134 now
= time
.gmtime(timeval
)
135 # Timezone offset is always -0000
140 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
141 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now
[6]],
143 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
144 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now
[1] - 1],
145 now
[0], now
[3], now
[4], now
[5],
150 def make_msgid(idstring
=None):
151 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
153 <20020201195627.33539.96671@nightshade.la.mastaler.com>
155 Optional idstring if given is a string used to strengthen the
156 uniqueness of the message id.
158 timeval
= time
.time()
159 utcdate
= time
.strftime('%Y%m%d%H%M%S', time
.gmtime(timeval
))
161 randint
= random
.randrange(100000)
165 idstring
= '.' + idstring
166 idhost
= socket
.getfqdn()
167 msgid
= '<%s.%s.%s%s@%s>' % (utcdate
, pid
, randint
, idstring
, idhost
)
172 # These functions are in the standalone mimelib version only because they've
173 # subsequently been fixed in the latest Python versions. We use this to worm
174 # around broken older Pythons.
178 return _parsedate(data
)
181 def parsedate_tz(data
):
184 return _parsedate_tz(data
)
188 addrs
= _AddressList(addr
).addresslist
194 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
196 """Remove quotes from a string."""
198 if str.startswith('"') and str.endswith('"'):
199 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
200 if str.startswith('<') and str.endswith('>'):
206 # RFC2231-related functions - parameter encoding and decoding
207 def decode_rfc2231(s
):
208 """Decode string according to RFC 2231"""
209 parts
= s
.split(TICK
, 2)
215 def encode_rfc2231(s
, charset
=None, language
=None):
216 """Encode string according to RFC 2231.
218 If neither charset nor language is given, then s is returned as-is. If
219 charset is given but not language, the string is encoded using the empty
222 s
= urllib
.parse
.quote(s
, safe
='', encoding
=charset
or 'ascii')
223 if charset
is None and language
is None:
227 return "%s'%s'%s" % (charset
, language
, s
)
230 rfc2231_continuation
= re
.compile(r
'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
233 def decode_params(params
):
234 """Decode parameters list according to RFC 2231.
236 params is a sequence of 2-tuples containing (param name, string value).
238 # Copy params so we don't mess with the original
241 # Map parameter's name to a list of continuations. The values are a
242 # 3-tuple of the continuation number, the string value, and a flag
243 # specifying whether a particular segment is %-encoded.
245 name
, value
= params
.pop(0)
246 new_params
.append((name
, value
))
248 name
, value
= params
.pop(0)
249 if name
.endswith('*'):
253 value
= unquote(value
)
254 mo
= rfc2231_continuation
.match(name
)
256 name
, num
= mo
.group('name', 'num')
259 rfc2231_params
.setdefault(name
, []).append((num
, value
, encoded
))
261 new_params
.append((name
, '"%s"' % quote(value
)))
263 for name
, continuations
in rfc2231_params
.items():
268 # And now append all values in numerical order, converting
269 # %-encodings for the encoded segments. If any of the
270 # continuation names ends in a *, then the entire string, after
271 # decoding segments and concatenating, must have the charset and
272 # language specifiers at the beginning of the string.
273 for num
, s
, encoded
in continuations
:
275 # Decode as "latin-1", so the characters in s directly
276 # represent the percent-encoded octet values.
277 # collapse_rfc2231_value treats this as an octet sequence.
278 s
= urllib
.parse
.unquote(s
, encoding
="latin-1")
281 value
= quote(EMPTYSTRING
.join(value
))
283 charset
, language
, value
= decode_rfc2231(value
)
284 new_params
.append((name
, (charset
, language
, '"%s"' % value
)))
286 new_params
.append((name
, '"%s"' % value
))
289 def collapse_rfc2231_value(value
, errors
='replace',
290 fallback_charset
='us-ascii'):
291 if not isinstance(value
, tuple) or len(value
) != 3:
292 return unquote(value
)
293 # While value comes to us as a unicode string, we need it to be a bytes
294 # object. We do not want bytes() normal utf-8 decoder, we want a straight
295 # interpretation of the string as character bytes.
296 charset
, language
, text
= value
297 rawbytes
= bytes(text
, 'raw-unicode-escape')
299 return str(rawbytes
, charset
, errors
)
301 # charset is not a known codec.