Bump version
[pytest.git] / Lib / email / utils.py
blob250eb19d930922a76b6a47fba897ddee25c53ef2
1 # Copyright (C) 2001-2006 Python Software Foundation
2 # Author: Barry Warsaw
3 # Contact: email-sig@python.org
5 """Miscellaneous utilities."""
7 __all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'parseaddr',
17 'parsedate',
18 'parsedate_tz',
19 'unquote',
22 import os
23 import re
24 import time
25 import base64
26 import random
27 import socket
28 import warnings
29 from cStringIO import StringIO
31 from email._parseaddr import quote
32 from email._parseaddr import AddressList as _AddressList
33 from email._parseaddr import mktime_tz
35 # We need wormarounds for bugs in these methods in older Pythons (see below)
36 from email._parseaddr import parsedate as _parsedate
37 from email._parseaddr import parsedate_tz as _parsedate_tz
39 from quopri import decodestring as _qdecode
41 # Intrapackage imports
42 from email.encoders import _bencode, _qencode
44 COMMASPACE = ', '
45 EMPTYSTRING = ''
46 UEMPTYSTRING = u''
47 CRLF = '\r\n'
49 specialsre = re.compile(r'[][\\()<>@,:;".]')
50 escapesre = re.compile(r'[][\\()"]')
54 # Helpers
56 def _identity(s):
57 return s
60 def _bdecode(s):
61 # We can't quite use base64.encodestring() since it tacks on a "courtesy
62 # newline". Blech!
63 if not s:
64 return s
65 value = base64.decodestring(s)
66 if not s.endswith('\n') and value.endswith('\n'):
67 return value[:-1]
68 return value
72 def fix_eols(s):
73 """Replace all line-ending characters with \r\n."""
74 # Fix newlines with no preceding carriage return
75 s = re.sub(r'(?<!\r)\n', CRLF, s)
76 # Fix carriage returns with no following newline
77 s = re.sub(r'\r(?!\n)', CRLF, s)
78 return s
82 def formataddr(pair):
83 """The inverse of parseaddr(), this takes a 2-tuple of the form
84 (realname, email_address) and returns the string value suitable
85 for an RFC 2822 From, To or Cc header.
87 If the first element of pair is false, then the second element is
88 returned unmodified.
89 """
90 name, address = pair
91 if name:
92 quotes = ''
93 if specialsre.search(name):
94 quotes = '"'
95 name = escapesre.sub(r'\\\g<0>', name)
96 return '%s%s%s <%s>' % (quotes, name, quotes, address)
97 return address
101 def getaddresses(fieldvalues):
102 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
103 all = COMMASPACE.join(fieldvalues)
104 a = _AddressList(all)
105 return a.addresslist
109 ecre = re.compile(r'''
110 =\? # literal =?
111 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
112 \? # literal ?
113 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
114 \? # literal ?
115 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
116 \?= # literal ?=
117 ''', re.VERBOSE | re.IGNORECASE)
121 def formatdate(timeval=None, localtime=False, usegmt=False):
122 """Returns a date string as specified by RFC 2822, e.g.:
124 Fri, 09 Nov 2001 01:08:47 -0000
126 Optional timeval if given is a floating point time value as accepted by
127 gmtime() and localtime(), otherwise the current time is used.
129 Optional localtime is a flag that when True, interprets timeval, and
130 returns a date relative to the local timezone instead of UTC, properly
131 taking daylight savings time into account.
133 Optional argument usegmt means that the timezone is written out as
134 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
135 is needed for HTTP, and is only used when localtime==False.
137 # Note: we cannot use strftime() because that honors the locale and RFC
138 # 2822 requires that day and month names be the English abbreviations.
139 if timeval is None:
140 timeval = time.time()
141 if localtime:
142 now = time.localtime(timeval)
143 # Calculate timezone offset, based on whether the local zone has
144 # daylight savings time, and whether DST is in effect.
145 if time.daylight and now[-1]:
146 offset = time.altzone
147 else:
148 offset = time.timezone
149 hours, minutes = divmod(abs(offset), 3600)
150 # Remember offset is in seconds west of UTC, but the timezone is in
151 # minutes east of UTC, so the signs differ.
152 if offset > 0:
153 sign = '-'
154 else:
155 sign = '+'
156 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
157 else:
158 now = time.gmtime(timeval)
159 # Timezone offset is always -0000
160 if usegmt:
161 zone = 'GMT'
162 else:
163 zone = '-0000'
164 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
165 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
166 now[2],
167 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
168 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
169 now[0], now[3], now[4], now[5],
170 zone)
174 def make_msgid(idstring=None):
175 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
177 <20020201195627.33539.96671@nightshade.la.mastaler.com>
179 Optional idstring if given is a string used to strengthen the
180 uniqueness of the message id.
182 timeval = time.time()
183 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
184 pid = os.getpid()
185 randint = random.randrange(100000)
186 if idstring is None:
187 idstring = ''
188 else:
189 idstring = '.' + idstring
190 idhost = socket.getfqdn()
191 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
192 return msgid
196 # These functions are in the standalone mimelib version only because they've
197 # subsequently been fixed in the latest Python versions. We use this to worm
198 # around broken older Pythons.
199 def parsedate(data):
200 if not data:
201 return None
202 return _parsedate(data)
205 def parsedate_tz(data):
206 if not data:
207 return None
208 return _parsedate_tz(data)
211 def parseaddr(addr):
212 addrs = _AddressList(addr).addresslist
213 if not addrs:
214 return '', ''
215 return addrs[0]
218 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
219 def unquote(str):
220 """Remove quotes from a string."""
221 if len(str) > 1:
222 if str.startswith('"') and str.endswith('"'):
223 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
224 if str.startswith('<') and str.endswith('>'):
225 return str[1:-1]
226 return str
230 # RFC2231-related functions - parameter encoding and decoding
231 def decode_rfc2231(s):
232 """Decode string according to RFC 2231"""
233 import urllib
234 parts = s.split("'", 2)
235 if len(parts) == 1:
236 return None, None, urllib.unquote(s)
237 charset, language, s = parts
238 return charset, language, urllib.unquote(s)
241 def encode_rfc2231(s, charset=None, language=None):
242 """Encode string according to RFC 2231.
244 If neither charset nor language is given, then s is returned as-is. If
245 charset is given but not language, the string is encoded using the empty
246 string for language.
248 import urllib
249 s = urllib.quote(s, safe='')
250 if charset is None and language is None:
251 return s
252 if language is None:
253 language = ''
254 return "%s'%s'%s" % (charset, language, s)
257 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
259 def decode_params(params):
260 """Decode parameters list according to RFC 2231.
262 params is a sequence of 2-tuples containing (content type, string value).
264 new_params = []
265 # maps parameter's name to a list of continuations
266 rfc2231_params = {}
267 # params is a sequence of 2-tuples containing (content_type, string value)
268 name, value = params[0]
269 new_params.append((name, value))
270 # Cycle through each of the rest of the parameters.
271 for name, value in params[1:]:
272 value = unquote(value)
273 mo = rfc2231_continuation.match(name)
274 if mo:
275 name, num = mo.group('name', 'num')
276 if num is not None:
277 num = int(num)
278 rfc2231_param1 = rfc2231_params.setdefault(name, [])
279 rfc2231_param1.append((num, value))
280 else:
281 new_params.append((name, '"%s"' % quote(value)))
282 if rfc2231_params:
283 for name, continuations in rfc2231_params.items():
284 value = []
285 # Sort by number
286 continuations.sort()
287 # And now append all values in num order
288 for num, continuation in continuations:
289 value.append(continuation)
290 charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
291 new_params.append(
292 (name, (charset, language, '"%s"' % quote(value))))
293 return new_params
295 def collapse_rfc2231_value(value, errors='replace',
296 fallback_charset='us-ascii'):
297 if isinstance(value, tuple):
298 rawval = unquote(value[2])
299 charset = value[0] or 'us-ascii'
300 try:
301 return unicode(rawval, charset, errors)
302 except LookupError:
303 # XXX charset is unknown to Python.
304 return unicode(rawval, fallback_charset, errors)
305 else:
306 return unquote(value)