Forgot to add a `versionadded` tag
[python.git] / Lib / email / _parseaddr.py
blob81913a382461ff9c767e621eddc219382b69347d
1 # Copyright (C) 2002-2007 Python Software Foundation
2 # Contact: email-sig@python.org
4 """Email address parsing code.
6 Lifted directly from rfc822.py. This should eventually be rewritten.
7 """
9 __all__ = [
10 'mktime_tz',
11 'parsedate',
12 'parsedate_tz',
13 'quote',
16 import time
18 SPACE = ' '
19 EMPTYSTRING = ''
20 COMMASPACE = ', '
22 # Parse a date field
23 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
24 'aug', 'sep', 'oct', 'nov', 'dec',
25 'january', 'february', 'march', 'april', 'may', 'june', 'july',
26 'august', 'september', 'october', 'november', 'december']
28 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
30 # The timezone table does not include the military time zones defined
31 # in RFC822, other than Z. According to RFC1123, the description in
32 # RFC822 gets the signs wrong, so we can't rely on any such time
33 # zones. RFC1123 recommends that numeric timezone indicators be used
34 # instead of timezone names.
36 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
37 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
38 'EST': -500, 'EDT': -400, # Eastern
39 'CST': -600, 'CDT': -500, # Central
40 'MST': -700, 'MDT': -600, # Mountain
41 'PST': -800, 'PDT': -700 # Pacific
45 def parsedate_tz(data):
46 """Convert a date string to a time tuple.
48 Accounts for military timezones.
49 """
50 data = data.split()
51 # The FWS after the comma after the day-of-week is optional, so search and
52 # adjust for this.
53 if data[0].endswith(',') or data[0].lower() in _daynames:
54 # There's a dayname here. Skip it
55 del data[0]
56 else:
57 i = data[0].rfind(',')
58 if i >= 0:
59 data[0] = data[0][i+1:]
60 if len(data) == 3: # RFC 850 date, deprecated
61 stuff = data[0].split('-')
62 if len(stuff) == 3:
63 data = stuff + data[1:]
64 if len(data) == 4:
65 s = data[3]
66 i = s.find('+')
67 if i > 0:
68 data[3:] = [s[:i], s[i+1:]]
69 else:
70 data.append('') # Dummy tz
71 if len(data) < 5:
72 return None
73 data = data[:5]
74 [dd, mm, yy, tm, tz] = data
75 mm = mm.lower()
76 if mm not in _monthnames:
77 dd, mm = mm, dd.lower()
78 if mm not in _monthnames:
79 return None
80 mm = _monthnames.index(mm) + 1
81 if mm > 12:
82 mm -= 12
83 if dd[-1] == ',':
84 dd = dd[:-1]
85 i = yy.find(':')
86 if i > 0:
87 yy, tm = tm, yy
88 if yy[-1] == ',':
89 yy = yy[:-1]
90 if not yy[0].isdigit():
91 yy, tz = tz, yy
92 if tm[-1] == ',':
93 tm = tm[:-1]
94 tm = tm.split(':')
95 if len(tm) == 2:
96 [thh, tmm] = tm
97 tss = '0'
98 elif len(tm) == 3:
99 [thh, tmm, tss] = tm
100 else:
101 return None
102 try:
103 yy = int(yy)
104 dd = int(dd)
105 thh = int(thh)
106 tmm = int(tmm)
107 tss = int(tss)
108 except ValueError:
109 return None
110 tzoffset = None
111 tz = tz.upper()
112 if tz in _timezones:
113 tzoffset = _timezones[tz]
114 else:
115 try:
116 tzoffset = int(tz)
117 except ValueError:
118 pass
119 # Convert a timezone offset into seconds ; -0500 -> -18000
120 if tzoffset:
121 if tzoffset < 0:
122 tzsign = -1
123 tzoffset = -tzoffset
124 else:
125 tzsign = 1
126 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
127 # Daylight Saving Time flag is set to -1, since DST is unknown.
128 return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
131 def parsedate(data):
132 """Convert a time string to a time tuple."""
133 t = parsedate_tz(data)
134 if isinstance(t, tuple):
135 return t[:9]
136 else:
137 return t
140 def mktime_tz(data):
141 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
142 if data[9] is None:
143 # No zone info, so localtime is better assumption than GMT
144 return time.mktime(data[:8] + (-1,))
145 else:
146 t = time.mktime(data[:8] + (0,))
147 return t - data[9] - time.timezone
150 def quote(str):
151 """Add quotes around a string."""
152 return str.replace('\\', '\\\\').replace('"', '\\"')
155 class AddrlistClass:
156 """Address parser class by Ben Escoto.
158 To understand what this class does, it helps to have a copy of RFC 2822 in
159 front of you.
161 Note: this class interface is deprecated and may be removed in the future.
162 Use rfc822.AddressList instead.
165 def __init__(self, field):
166 """Initialize a new instance.
168 `field' is an unparsed address header field, containing
169 one or more addresses.
171 self.specials = '()<>@,:;.\"[]'
172 self.pos = 0
173 self.LWS = ' \t'
174 self.CR = '\r\n'
175 self.FWS = self.LWS + self.CR
176 self.atomends = self.specials + self.LWS + self.CR
177 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
178 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
179 # syntax, so allow dots in phrases.
180 self.phraseends = self.atomends.replace('.', '')
181 self.field = field
182 self.commentlist = []
184 def gotonext(self):
185 """Parse up to the start of the next address."""
186 while self.pos < len(self.field):
187 if self.field[self.pos] in self.LWS + '\n\r':
188 self.pos += 1
189 elif self.field[self.pos] == '(':
190 self.commentlist.append(self.getcomment())
191 else:
192 break
194 def getaddrlist(self):
195 """Parse all addresses.
197 Returns a list containing all of the addresses.
199 result = []
200 while self.pos < len(self.field):
201 ad = self.getaddress()
202 if ad:
203 result += ad
204 else:
205 result.append(('', ''))
206 return result
208 def getaddress(self):
209 """Parse the next address."""
210 self.commentlist = []
211 self.gotonext()
213 oldpos = self.pos
214 oldcl = self.commentlist
215 plist = self.getphraselist()
217 self.gotonext()
218 returnlist = []
220 if self.pos >= len(self.field):
221 # Bad email address technically, no domain.
222 if plist:
223 returnlist = [(SPACE.join(self.commentlist), plist[0])]
225 elif self.field[self.pos] in '.@':
226 # email address is just an addrspec
227 # this isn't very efficient since we start over
228 self.pos = oldpos
229 self.commentlist = oldcl
230 addrspec = self.getaddrspec()
231 returnlist = [(SPACE.join(self.commentlist), addrspec)]
233 elif self.field[self.pos] == ':':
234 # address is a group
235 returnlist = []
237 fieldlen = len(self.field)
238 self.pos += 1
239 while self.pos < len(self.field):
240 self.gotonext()
241 if self.pos < fieldlen and self.field[self.pos] == ';':
242 self.pos += 1
243 break
244 returnlist = returnlist + self.getaddress()
246 elif self.field[self.pos] == '<':
247 # Address is a phrase then a route addr
248 routeaddr = self.getrouteaddr()
250 if self.commentlist:
251 returnlist = [(SPACE.join(plist) + ' (' +
252 ' '.join(self.commentlist) + ')', routeaddr)]
253 else:
254 returnlist = [(SPACE.join(plist), routeaddr)]
256 else:
257 if plist:
258 returnlist = [(SPACE.join(self.commentlist), plist[0])]
259 elif self.field[self.pos] in self.specials:
260 self.pos += 1
262 self.gotonext()
263 if self.pos < len(self.field) and self.field[self.pos] == ',':
264 self.pos += 1
265 return returnlist
267 def getrouteaddr(self):
268 """Parse a route address (Return-path value).
270 This method just skips all the route stuff and returns the addrspec.
272 if self.field[self.pos] != '<':
273 return
275 expectroute = False
276 self.pos += 1
277 self.gotonext()
278 adlist = ''
279 while self.pos < len(self.field):
280 if expectroute:
281 self.getdomain()
282 expectroute = False
283 elif self.field[self.pos] == '>':
284 self.pos += 1
285 break
286 elif self.field[self.pos] == '@':
287 self.pos += 1
288 expectroute = True
289 elif self.field[self.pos] == ':':
290 self.pos += 1
291 else:
292 adlist = self.getaddrspec()
293 self.pos += 1
294 break
295 self.gotonext()
297 return adlist
299 def getaddrspec(self):
300 """Parse an RFC 2822 addr-spec."""
301 aslist = []
303 self.gotonext()
304 while self.pos < len(self.field):
305 if self.field[self.pos] == '.':
306 aslist.append('.')
307 self.pos += 1
308 elif self.field[self.pos] == '"':
309 aslist.append('"%s"' % self.getquote())
310 elif self.field[self.pos] in self.atomends:
311 break
312 else:
313 aslist.append(self.getatom())
314 self.gotonext()
316 if self.pos >= len(self.field) or self.field[self.pos] != '@':
317 return EMPTYSTRING.join(aslist)
319 aslist.append('@')
320 self.pos += 1
321 self.gotonext()
322 return EMPTYSTRING.join(aslist) + self.getdomain()
324 def getdomain(self):
325 """Get the complete domain name from an address."""
326 sdlist = []
327 while self.pos < len(self.field):
328 if self.field[self.pos] in self.LWS:
329 self.pos += 1
330 elif self.field[self.pos] == '(':
331 self.commentlist.append(self.getcomment())
332 elif self.field[self.pos] == '[':
333 sdlist.append(self.getdomainliteral())
334 elif self.field[self.pos] == '.':
335 self.pos += 1
336 sdlist.append('.')
337 elif self.field[self.pos] in self.atomends:
338 break
339 else:
340 sdlist.append(self.getatom())
341 return EMPTYSTRING.join(sdlist)
343 def getdelimited(self, beginchar, endchars, allowcomments=True):
344 """Parse a header fragment delimited by special characters.
346 `beginchar' is the start character for the fragment.
347 If self is not looking at an instance of `beginchar' then
348 getdelimited returns the empty string.
350 `endchars' is a sequence of allowable end-delimiting characters.
351 Parsing stops when one of these is encountered.
353 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
354 within the parsed fragment.
356 if self.field[self.pos] != beginchar:
357 return ''
359 slist = ['']
360 quote = False
361 self.pos += 1
362 while self.pos < len(self.field):
363 if quote:
364 slist.append(self.field[self.pos])
365 quote = False
366 elif self.field[self.pos] in endchars:
367 self.pos += 1
368 break
369 elif allowcomments and self.field[self.pos] == '(':
370 slist.append(self.getcomment())
371 continue # have already advanced pos from getcomment
372 elif self.field[self.pos] == '\\':
373 quote = True
374 else:
375 slist.append(self.field[self.pos])
376 self.pos += 1
378 return EMPTYSTRING.join(slist)
380 def getquote(self):
381 """Get a quote-delimited fragment from self's field."""
382 return self.getdelimited('"', '"\r', False)
384 def getcomment(self):
385 """Get a parenthesis-delimited fragment from self's field."""
386 return self.getdelimited('(', ')\r', True)
388 def getdomainliteral(self):
389 """Parse an RFC 2822 domain-literal."""
390 return '[%s]' % self.getdelimited('[', ']\r', False)
392 def getatom(self, atomends=None):
393 """Parse an RFC 2822 atom.
395 Optional atomends specifies a different set of end token delimiters
396 (the default is to use self.atomends). This is used e.g. in
397 getphraselist() since phrase endings must not include the `.' (which
398 is legal in phrases)."""
399 atomlist = ['']
400 if atomends is None:
401 atomends = self.atomends
403 while self.pos < len(self.field):
404 if self.field[self.pos] in atomends:
405 break
406 else:
407 atomlist.append(self.field[self.pos])
408 self.pos += 1
410 return EMPTYSTRING.join(atomlist)
412 def getphraselist(self):
413 """Parse a sequence of RFC 2822 phrases.
415 A phrase is a sequence of words, which are in turn either RFC 2822
416 atoms or quoted-strings. Phrases are canonicalized by squeezing all
417 runs of continuous whitespace into one space.
419 plist = []
421 while self.pos < len(self.field):
422 if self.field[self.pos] in self.FWS:
423 self.pos += 1
424 elif self.field[self.pos] == '"':
425 plist.append(self.getquote())
426 elif self.field[self.pos] == '(':
427 self.commentlist.append(self.getcomment())
428 elif self.field[self.pos] in self.phraseends:
429 break
430 else:
431 plist.append(self.getatom(self.phraseends))
433 return plist
435 class AddressList(AddrlistClass):
436 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
437 def __init__(self, field):
438 AddrlistClass.__init__(self, field)
439 if field:
440 self.addresslist = self.getaddrlist()
441 else:
442 self.addresslist = []
444 def __len__(self):
445 return len(self.addresslist)
447 def __add__(self, other):
448 # Set union
449 newaddr = AddressList(None)
450 newaddr.addresslist = self.addresslist[:]
451 for x in other.addresslist:
452 if not x in self.addresslist:
453 newaddr.addresslist.append(x)
454 return newaddr
456 def __iadd__(self, other):
457 # Set union, in-place
458 for x in other.addresslist:
459 if not x in self.addresslist:
460 self.addresslist.append(x)
461 return self
463 def __sub__(self, other):
464 # Set difference
465 newaddr = AddressList(None)
466 for x in self.addresslist:
467 if not x in other.addresslist:
468 newaddr.addresslist.append(x)
469 return newaddr
471 def __isub__(self, other):
472 # Set difference, in-place
473 for x in other.addresslist:
474 if x in self.addresslist:
475 self.addresslist.remove(x)
476 return self
478 def __getitem__(self, index):
479 # Make indexing, slices, and 'in' work
480 return self.addresslist[index]