1 # Copyright (C) 2002-2007 Python Software Foundation
2 # Contact: email-sig@python.org
4 """Email address parsing code.
6 Lifted directly from rfc822.py. This should eventually be rewritten.
23 _monthnames
= ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
24 'aug', 'sep', 'oct', 'nov', 'dec',
25 'january', 'february', 'march', 'april', 'may', 'june', 'july',
26 'august', 'september', 'october', 'november', 'december']
28 _daynames
= ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
30 # The timezone table does not include the military time zones defined
31 # in RFC822, other than Z. According to RFC1123, the description in
32 # RFC822 gets the signs wrong, so we can't rely on any such time
33 # zones. RFC1123 recommends that numeric timezone indicators be used
34 # instead of timezone names.
36 _timezones
= {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
37 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
38 'EST': -500, 'EDT': -400, # Eastern
39 'CST': -600, 'CDT': -500, # Central
40 'MST': -700, 'MDT': -600, # Mountain
41 'PST': -800, 'PDT': -700 # Pacific
45 def parsedate_tz(data
):
46 """Convert a date string to a time tuple.
48 Accounts for military timezones.
51 # The FWS after the comma after the day-of-week is optional, so search and
53 if data
[0].endswith(',') or data
[0].lower() in _daynames
:
54 # There's a dayname here. Skip it
57 i
= data
[0].rfind(',')
59 data
[0] = data
[0][i
+1:]
60 if len(data
) == 3: # RFC 850 date, deprecated
61 stuff
= data
[0].split('-')
63 data
= stuff
+ data
[1:]
68 data
[3:] = [s
[:i
], s
[i
+1:]]
70 data
.append('') # Dummy tz
74 [dd
, mm
, yy
, tm
, tz
] = data
76 if mm
not in _monthnames
:
77 dd
, mm
= mm
, dd
.lower()
78 if mm
not in _monthnames
:
80 mm
= _monthnames
.index(mm
) + 1
90 if not yy
[0].isdigit():
113 tzoffset
= _timezones
[tz
]
119 # Convert a timezone offset into seconds ; -0500 -> -18000
126 tzoffset
= tzsign
* ( (tzoffset
//100)*3600 + (tzoffset
% 100)*60)
127 # Daylight Saving Time flag is set to -1, since DST is unknown.
128 return yy
, mm
, dd
, thh
, tmm
, tss
, 0, 1, -1, tzoffset
132 """Convert a time string to a time tuple."""
133 t
= parsedate_tz(data
)
134 if isinstance(t
, tuple):
141 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
143 # No zone info, so localtime is better assumption than GMT
144 return time
.mktime(data
[:8] + (-1,))
146 t
= time
.mktime(data
[:8] + (0,))
147 return t
- data
[9] - time
.timezone
151 """Add quotes around a string."""
152 return str.replace('\\', '\\\\').replace('"', '\\"')
156 """Address parser class by Ben Escoto.
158 To understand what this class does, it helps to have a copy of RFC 2822 in
161 Note: this class interface is deprecated and may be removed in the future.
162 Use rfc822.AddressList instead.
165 def __init__(self
, field
):
166 """Initialize a new instance.
168 `field' is an unparsed address header field, containing
169 one or more addresses.
171 self
.specials
= '()<>@,:;.\"[]'
175 self
.FWS
= self
.LWS
+ self
.CR
176 self
.atomends
= self
.specials
+ self
.LWS
+ self
.CR
177 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
178 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
179 # syntax, so allow dots in phrases.
180 self
.phraseends
= self
.atomends
.replace('.', '')
182 self
.commentlist
= []
185 """Parse up to the start of the next address."""
186 while self
.pos
< len(self
.field
):
187 if self
.field
[self
.pos
] in self
.LWS
+ '\n\r':
189 elif self
.field
[self
.pos
] == '(':
190 self
.commentlist
.append(self
.getcomment())
194 def getaddrlist(self
):
195 """Parse all addresses.
197 Returns a list containing all of the addresses.
200 while self
.pos
< len(self
.field
):
201 ad
= self
.getaddress()
205 result
.append(('', ''))
208 def getaddress(self
):
209 """Parse the next address."""
210 self
.commentlist
= []
214 oldcl
= self
.commentlist
215 plist
= self
.getphraselist()
220 if self
.pos
>= len(self
.field
):
221 # Bad email address technically, no domain.
223 returnlist
= [(SPACE
.join(self
.commentlist
), plist
[0])]
225 elif self
.field
[self
.pos
] in '.@':
226 # email address is just an addrspec
227 # this isn't very efficient since we start over
229 self
.commentlist
= oldcl
230 addrspec
= self
.getaddrspec()
231 returnlist
= [(SPACE
.join(self
.commentlist
), addrspec
)]
233 elif self
.field
[self
.pos
] == ':':
237 fieldlen
= len(self
.field
)
239 while self
.pos
< len(self
.field
):
241 if self
.pos
< fieldlen
and self
.field
[self
.pos
] == ';':
244 returnlist
= returnlist
+ self
.getaddress()
246 elif self
.field
[self
.pos
] == '<':
247 # Address is a phrase then a route addr
248 routeaddr
= self
.getrouteaddr()
251 returnlist
= [(SPACE
.join(plist
) + ' (' +
252 ' '.join(self
.commentlist
) + ')', routeaddr
)]
254 returnlist
= [(SPACE
.join(plist
), routeaddr
)]
258 returnlist
= [(SPACE
.join(self
.commentlist
), plist
[0])]
259 elif self
.field
[self
.pos
] in self
.specials
:
263 if self
.pos
< len(self
.field
) and self
.field
[self
.pos
] == ',':
267 def getrouteaddr(self
):
268 """Parse a route address (Return-path value).
270 This method just skips all the route stuff and returns the addrspec.
272 if self
.field
[self
.pos
] != '<':
279 while self
.pos
< len(self
.field
):
283 elif self
.field
[self
.pos
] == '>':
286 elif self
.field
[self
.pos
] == '@':
289 elif self
.field
[self
.pos
] == ':':
292 adlist
= self
.getaddrspec()
299 def getaddrspec(self
):
300 """Parse an RFC 2822 addr-spec."""
304 while self
.pos
< len(self
.field
):
305 if self
.field
[self
.pos
] == '.':
308 elif self
.field
[self
.pos
] == '"':
309 aslist
.append('"%s"' % self
.getquote())
310 elif self
.field
[self
.pos
] in self
.atomends
:
313 aslist
.append(self
.getatom())
316 if self
.pos
>= len(self
.field
) or self
.field
[self
.pos
] != '@':
317 return EMPTYSTRING
.join(aslist
)
322 return EMPTYSTRING
.join(aslist
) + self
.getdomain()
325 """Get the complete domain name from an address."""
327 while self
.pos
< len(self
.field
):
328 if self
.field
[self
.pos
] in self
.LWS
:
330 elif self
.field
[self
.pos
] == '(':
331 self
.commentlist
.append(self
.getcomment())
332 elif self
.field
[self
.pos
] == '[':
333 sdlist
.append(self
.getdomainliteral())
334 elif self
.field
[self
.pos
] == '.':
337 elif self
.field
[self
.pos
] in self
.atomends
:
340 sdlist
.append(self
.getatom())
341 return EMPTYSTRING
.join(sdlist
)
343 def getdelimited(self
, beginchar
, endchars
, allowcomments
=True):
344 """Parse a header fragment delimited by special characters.
346 `beginchar' is the start character for the fragment.
347 If self is not looking at an instance of `beginchar' then
348 getdelimited returns the empty string.
350 `endchars' is a sequence of allowable end-delimiting characters.
351 Parsing stops when one of these is encountered.
353 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
354 within the parsed fragment.
356 if self
.field
[self
.pos
] != beginchar
:
362 while self
.pos
< len(self
.field
):
364 slist
.append(self
.field
[self
.pos
])
366 elif self
.field
[self
.pos
] in endchars
:
369 elif allowcomments
and self
.field
[self
.pos
] == '(':
370 slist
.append(self
.getcomment())
371 continue # have already advanced pos from getcomment
372 elif self
.field
[self
.pos
] == '\\':
375 slist
.append(self
.field
[self
.pos
])
378 return EMPTYSTRING
.join(slist
)
381 """Get a quote-delimited fragment from self's field."""
382 return self
.getdelimited('"', '"\r', False)
384 def getcomment(self
):
385 """Get a parenthesis-delimited fragment from self's field."""
386 return self
.getdelimited('(', ')\r', True)
388 def getdomainliteral(self
):
389 """Parse an RFC 2822 domain-literal."""
390 return '[%s]' % self
.getdelimited('[', ']\r', False)
392 def getatom(self
, atomends
=None):
393 """Parse an RFC 2822 atom.
395 Optional atomends specifies a different set of end token delimiters
396 (the default is to use self.atomends). This is used e.g. in
397 getphraselist() since phrase endings must not include the `.' (which
398 is legal in phrases)."""
401 atomends
= self
.atomends
403 while self
.pos
< len(self
.field
):
404 if self
.field
[self
.pos
] in atomends
:
407 atomlist
.append(self
.field
[self
.pos
])
410 return EMPTYSTRING
.join(atomlist
)
412 def getphraselist(self
):
413 """Parse a sequence of RFC 2822 phrases.
415 A phrase is a sequence of words, which are in turn either RFC 2822
416 atoms or quoted-strings. Phrases are canonicalized by squeezing all
417 runs of continuous whitespace into one space.
421 while self
.pos
< len(self
.field
):
422 if self
.field
[self
.pos
] in self
.FWS
:
424 elif self
.field
[self
.pos
] == '"':
425 plist
.append(self
.getquote())
426 elif self
.field
[self
.pos
] == '(':
427 self
.commentlist
.append(self
.getcomment())
428 elif self
.field
[self
.pos
] in self
.phraseends
:
431 plist
.append(self
.getatom(self
.phraseends
))
435 class AddressList(AddrlistClass
):
436 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
437 def __init__(self
, field
):
438 AddrlistClass
.__init
__(self
, field
)
440 self
.addresslist
= self
.getaddrlist()
442 self
.addresslist
= []
445 return len(self
.addresslist
)
447 def __add__(self
, other
):
449 newaddr
= AddressList(None)
450 newaddr
.addresslist
= self
.addresslist
[:]
451 for x
in other
.addresslist
:
452 if not x
in self
.addresslist
:
453 newaddr
.addresslist
.append(x
)
456 def __iadd__(self
, other
):
457 # Set union, in-place
458 for x
in other
.addresslist
:
459 if not x
in self
.addresslist
:
460 self
.addresslist
.append(x
)
463 def __sub__(self
, other
):
465 newaddr
= AddressList(None)
466 for x
in self
.addresslist
:
467 if not x
in other
.addresslist
:
468 newaddr
.addresslist
.append(x
)
471 def __isub__(self
, other
):
472 # Set difference, in-place
473 for x
in other
.addresslist
:
474 if x
in self
.addresslist
:
475 self
.addresslist
.remove(x
)
478 def __getitem__(self
, index
):
479 # Make indexing, slices, and 'in' work
480 return self
.addresslist
[index
]