Warn against replacing PyNumber_Add with PyNumber_InPlaceAdd in sum
[python.git] / Lib / email / utils.py
blob50010e69b06315c1c001362439756733957eda51
1 # Copyright (C) 2001-2006 Python Software Foundation
2 # Author: Barry Warsaw
3 # Contact: email-sig@python.org
5 """Miscellaneous utilities."""
7 __all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'parseaddr',
17 'parsedate',
18 'parsedate_tz',
19 'unquote',
22 import os
23 import re
24 import time
25 import base64
26 import random
27 import socket
28 import urllib
29 import warnings
31 from email._parseaddr import quote
32 from email._parseaddr import AddressList as _AddressList
33 from email._parseaddr import mktime_tz
35 # We need wormarounds for bugs in these methods in older Pythons (see below)
36 from email._parseaddr import parsedate as _parsedate
37 from email._parseaddr import parsedate_tz as _parsedate_tz
39 from quopri import decodestring as _qdecode
41 # Intrapackage imports
42 from email.encoders import _bencode, _qencode
44 COMMASPACE = ', '
45 EMPTYSTRING = ''
46 UEMPTYSTRING = u''
47 CRLF = '\r\n'
48 TICK = "'"
50 specialsre = re.compile(r'[][\\()<>@,:;".]')
51 escapesre = re.compile(r'[][\\()"]')
55 # Helpers
57 def _identity(s):
58 return s
61 def _bdecode(s):
62 # We can't quite use base64.encodestring() since it tacks on a "courtesy
63 # newline". Blech!
64 if not s:
65 return s
66 value = base64.decodestring(s)
67 if not s.endswith('\n') and value.endswith('\n'):
68 return value[:-1]
69 return value
73 def fix_eols(s):
74 """Replace all line-ending characters with \r\n."""
75 # Fix newlines with no preceding carriage return
76 s = re.sub(r'(?<!\r)\n', CRLF, s)
77 # Fix carriage returns with no following newline
78 s = re.sub(r'\r(?!\n)', CRLF, s)
79 return s
83 def formataddr(pair):
84 """The inverse of parseaddr(), this takes a 2-tuple of the form
85 (realname, email_address) and returns the string value suitable
86 for an RFC 2822 From, To or Cc header.
88 If the first element of pair is false, then the second element is
89 returned unmodified.
90 """
91 name, address = pair
92 if name:
93 quotes = ''
94 if specialsre.search(name):
95 quotes = '"'
96 name = escapesre.sub(r'\\\g<0>', name)
97 return '%s%s%s <%s>' % (quotes, name, quotes, address)
98 return address
102 def getaddresses(fieldvalues):
103 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
104 all = COMMASPACE.join(fieldvalues)
105 a = _AddressList(all)
106 return a.addresslist
110 ecre = re.compile(r'''
111 =\? # literal =?
112 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
113 \? # literal ?
114 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
115 \? # literal ?
116 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
117 \?= # literal ?=
118 ''', re.VERBOSE | re.IGNORECASE)
122 def formatdate(timeval=None, localtime=False, usegmt=False):
123 """Returns a date string as specified by RFC 2822, e.g.:
125 Fri, 09 Nov 2001 01:08:47 -0000
127 Optional timeval if given is a floating point time value as accepted by
128 gmtime() and localtime(), otherwise the current time is used.
130 Optional localtime is a flag that when True, interprets timeval, and
131 returns a date relative to the local timezone instead of UTC, properly
132 taking daylight savings time into account.
134 Optional argument usegmt means that the timezone is written out as
135 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
136 is needed for HTTP, and is only used when localtime==False.
138 # Note: we cannot use strftime() because that honors the locale and RFC
139 # 2822 requires that day and month names be the English abbreviations.
140 if timeval is None:
141 timeval = time.time()
142 if localtime:
143 now = time.localtime(timeval)
144 # Calculate timezone offset, based on whether the local zone has
145 # daylight savings time, and whether DST is in effect.
146 if time.daylight and now[-1]:
147 offset = time.altzone
148 else:
149 offset = time.timezone
150 hours, minutes = divmod(abs(offset), 3600)
151 # Remember offset is in seconds west of UTC, but the timezone is in
152 # minutes east of UTC, so the signs differ.
153 if offset > 0:
154 sign = '-'
155 else:
156 sign = '+'
157 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
158 else:
159 now = time.gmtime(timeval)
160 # Timezone offset is always -0000
161 if usegmt:
162 zone = 'GMT'
163 else:
164 zone = '-0000'
165 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
166 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
167 now[2],
168 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
169 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
170 now[0], now[3], now[4], now[5],
171 zone)
175 def make_msgid(idstring=None):
176 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
178 <20020201195627.33539.96671@nightshade.la.mastaler.com>
180 Optional idstring if given is a string used to strengthen the
181 uniqueness of the message id.
183 timeval = time.time()
184 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
185 pid = os.getpid()
186 randint = random.randrange(100000)
187 if idstring is None:
188 idstring = ''
189 else:
190 idstring = '.' + idstring
191 idhost = socket.getfqdn()
192 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
193 return msgid
197 # These functions are in the standalone mimelib version only because they've
198 # subsequently been fixed in the latest Python versions. We use this to worm
199 # around broken older Pythons.
200 def parsedate(data):
201 if not data:
202 return None
203 return _parsedate(data)
206 def parsedate_tz(data):
207 if not data:
208 return None
209 return _parsedate_tz(data)
212 def parseaddr(addr):
213 addrs = _AddressList(addr).addresslist
214 if not addrs:
215 return '', ''
216 return addrs[0]
219 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
220 def unquote(str):
221 """Remove quotes from a string."""
222 if len(str) > 1:
223 if str.startswith('"') and str.endswith('"'):
224 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
225 if str.startswith('<') and str.endswith('>'):
226 return str[1:-1]
227 return str
231 # RFC2231-related functions - parameter encoding and decoding
232 def decode_rfc2231(s):
233 """Decode string according to RFC 2231"""
234 parts = s.split(TICK, 2)
235 if len(parts) <= 2:
236 return None, None, s
237 return parts
240 def encode_rfc2231(s, charset=None, language=None):
241 """Encode string according to RFC 2231.
243 If neither charset nor language is given, then s is returned as-is. If
244 charset is given but not language, the string is encoded using the empty
245 string for language.
247 import urllib
248 s = urllib.quote(s, safe='')
249 if charset is None and language is None:
250 return s
251 if language is None:
252 language = ''
253 return "%s'%s'%s" % (charset, language, s)
256 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
258 def decode_params(params):
259 """Decode parameters list according to RFC 2231.
261 params is a sequence of 2-tuples containing (param name, string value).
263 # Copy params so we don't mess with the original
264 params = params[:]
265 new_params = []
266 # Map parameter's name to a list of continuations. The values are a
267 # 3-tuple of the continuation number, the string value, and a flag
268 # specifying whether a particular segment is %-encoded.
269 rfc2231_params = {}
270 name, value = params.pop(0)
271 new_params.append((name, value))
272 while params:
273 name, value = params.pop(0)
274 if name.endswith('*'):
275 encoded = True
276 else:
277 encoded = False
278 value = unquote(value)
279 mo = rfc2231_continuation.match(name)
280 if mo:
281 name, num = mo.group('name', 'num')
282 if num is not None:
283 num = int(num)
284 rfc2231_params.setdefault(name, []).append((num, value, encoded))
285 else:
286 new_params.append((name, '"%s"' % quote(value)))
287 if rfc2231_params:
288 for name, continuations in rfc2231_params.items():
289 value = []
290 extended = False
291 # Sort by number
292 continuations.sort()
293 # And now append all values in numerical order, converting
294 # %-encodings for the encoded segments. If any of the
295 # continuation names ends in a *, then the entire string, after
296 # decoding segments and concatenating, must have the charset and
297 # language specifiers at the beginning of the string.
298 for num, s, encoded in continuations:
299 if encoded:
300 s = urllib.unquote(s)
301 extended = True
302 value.append(s)
303 value = quote(EMPTYSTRING.join(value))
304 if extended:
305 charset, language, value = decode_rfc2231(value)
306 new_params.append((name, (charset, language, '"%s"' % value)))
307 else:
308 new_params.append((name, '"%s"' % value))
309 return new_params
311 def collapse_rfc2231_value(value, errors='replace',
312 fallback_charset='us-ascii'):
313 if isinstance(value, tuple):
314 rawval = unquote(value[2])
315 charset = value[0] or 'us-ascii'
316 try:
317 return unicode(rawval, charset, errors)
318 except LookupError:
319 # XXX charset is unknown to Python.
320 return unicode(rawval, fallback_charset, errors)
321 else:
322 return unquote(value)