Catch situations where currentframe() returns None. See SF patch #1447410, this is...
[python.git] / Lib / rfc822.py
blob871a049c2192074a9c6a45d010a6fb568e78c657
1 """RFC 2822 message manipulation.
3 Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4 the tokenizing of addresses does not adhere to all the quoting rules.
6 Note: RFC 2822 is a long awaited update to RFC 822. This module should
7 conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8 effort at RFC 2822 updates have been made, but a thorough audit has not been
9 performed. Consider any RFC 2822 non-conformance to be a bug.
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
14 Directions for use:
16 To create a Message object: first open a file, e.g.:
18 fp = open(file, 'r')
20 You can use any other legal way of getting an open file object, e.g. use
21 sys.stdin or call os.popen(). Then pass the open file object to the Message()
22 constructor:
24 m = Message(fp)
26 This class can work with any input object that supports a readline method. If
27 the input object has seek and tell capability, the rewindbody method will
28 work; also illegal lines will be pushed back onto the input stream. If the
29 input object lacks seek but has an `unread' method that can push back a line
30 of input, Message will use that to push back illegal lines. Thus this class
31 can be used to parse messages coming from a buffered stream.
33 The optional `seekable' argument is provided as a workaround for certain stdio
34 libraries in which tell() discards buffered data before discovering that the
35 lseek() system call doesn't work. For maximum portability, you should set the
36 seekable argument to zero to prevent that initial \code{tell} when passing in
37 an unseekable object such as a a file object created from a socket object. If
38 it is 1 on entry -- which it is by default -- the tell() method of the open
39 file object is called once; if this raises an exception, seekable is reset to
40 0. For other nonzero values of seekable, this test is not made.
42 To get the text of a particular header there are several methods:
44 str = m.getheader(name)
45 str = m.getrawheader(name)
47 where name is the name of the header, e.g. 'Subject'. The difference is that
48 getheader() strips the leading and trailing whitespace, while getrawheader()
49 doesn't. Both functions retain embedded whitespace (including newlines)
50 exactly as they are specified in the header, and leave the case of the text
51 unchanged.
53 For addresses and address lists there are functions
55 realname, mailaddress = m.getaddr(name)
56 list = m.getaddrlist(name)
58 where the latter returns a list of (realname, mailaddr) tuples.
60 There is also a method
62 time = m.getdate(name)
64 which parses a Date-like field and returns a time-compatible tuple,
65 i.e. a tuple such as returned by time.localtime() or accepted by
66 time.mktime().
68 See the class definition for lower level access methods.
70 There are also some utility functions here.
71 """
72 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
74 import time
76 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
78 _blanklines = ('\r\n', '\n') # Optimization for islast()
81 class Message:
82 """Represents a single RFC 2822-compliant message."""
84 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
86 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
91 except (AttributeError, IOError):
92 seekable = 0
93 self.fp = fp
94 self.seekable = seekable
95 self.startofheaders = None
96 self.startofbody = None
98 if self.seekable:
99 try:
100 self.startofheaders = self.fp.tell()
101 except IOError:
102 self.seekable = 0
104 self.readheaders()
106 if self.seekable:
107 try:
108 self.startofbody = self.fp.tell()
109 except IOError:
110 self.seekable = 0
112 def rewindbody(self):
113 """Rewind the file to the start of the body (if seekable)."""
114 if not self.seekable:
115 raise IOError, "unseekable file"
116 self.fp.seek(self.startofbody)
118 def readheaders(self):
119 """Read header lines.
121 Read header lines up to the entirely blank line that terminates them.
122 The (normally blank) line that ends the headers is skipped, but not
123 included in the returned list. If a non-header line ends the headers,
124 (which is an error), an attempt is made to backspace over it; it is
125 never included in the returned list.
127 The variable self.status is set to the empty string if all went well,
128 otherwise it is an error message. The variable self.headers is a
129 completely uninterpreted list of lines contained in the header (so
130 printing them will reproduce the header exactly as it appears in the
131 file).
133 self.dict = {}
134 self.unixfrom = ''
135 self.headers = lst = []
136 self.status = ''
137 headerseen = ""
138 firstline = 1
139 startofline = unread = tell = None
140 if hasattr(self.fp, 'unread'):
141 unread = self.fp.unread
142 elif self.seekable:
143 tell = self.fp.tell
144 while 1:
145 if tell:
146 try:
147 startofline = tell()
148 except IOError:
149 startofline = tell = None
150 self.seekable = 0
151 line = self.fp.readline()
152 if not line:
153 self.status = 'EOF in headers'
154 break
155 # Skip unix From name time lines
156 if firstline and line.startswith('From '):
157 self.unixfrom = self.unixfrom + line
158 continue
159 firstline = 0
160 if headerseen and line[0] in ' \t':
161 # It's a continuation line.
162 lst.append(line)
163 x = (self.dict[headerseen] + "\n " + line.strip())
164 self.dict[headerseen] = x.strip()
165 continue
166 elif self.iscomment(line):
167 # It's a comment. Ignore it.
168 continue
169 elif self.islast(line):
170 # Note! No pushback here! The delimiter line gets eaten.
171 break
172 headerseen = self.isheader(line)
173 if headerseen:
174 # It's a legal header line, save it.
175 lst.append(line)
176 self.dict[headerseen] = line[len(headerseen)+1:].strip()
177 continue
178 else:
179 # It's not a header line; throw it back and stop here.
180 if not self.dict:
181 self.status = 'No headers'
182 else:
183 self.status = 'Non-header line where header expected'
184 # Try to undo the read.
185 if unread:
186 unread(line)
187 elif tell:
188 self.fp.seek(startofline)
189 else:
190 self.status = self.status + '; bad seek'
191 break
193 def isheader(self, line):
194 """Determine whether a given line is a legal header.
196 This method should return the header name, suitably canonicalized.
197 You may override this method in order to use Message parsing on tagged
198 data in RFC 2822-like formats with special header formats.
200 i = line.find(':')
201 if i > 0:
202 return line[:i].lower()
203 return None
205 def islast(self, line):
206 """Determine whether a line is a legal end of RFC 2822 headers.
208 You may override this method if your application wants to bend the
209 rules, e.g. to strip trailing whitespace, or to recognize MH template
210 separators ('--------'). For convenience (e.g. for code reading from
211 sockets) a line consisting of \r\n also matches.
213 return line in _blanklines
215 def iscomment(self, line):
216 """Determine whether a line should be skipped entirely.
218 You may override this method in order to use Message parsing on tagged
219 data in RFC 2822-like formats that support embedded comments or
220 free-text data.
222 return False
224 def getallmatchingheaders(self, name):
225 """Find all header lines matching a given header name.
227 Look through the list of headers and find all lines matching a given
228 header name (and their continuation lines). A list of the lines is
229 returned, without interpretation. If the header does not occur, an
230 empty list is returned. If the header occurs multiple times, all
231 occurrences are returned. Case is not important in the header name.
233 name = name.lower() + ':'
234 n = len(name)
235 lst = []
236 hit = 0
237 for line in self.headers:
238 if line[:n].lower() == name:
239 hit = 1
240 elif not line[:1].isspace():
241 hit = 0
242 if hit:
243 lst.append(line)
244 return lst
246 def getfirstmatchingheader(self, name):
247 """Get the first header line matching name.
249 This is similar to getallmatchingheaders, but it returns only the
250 first matching header (and its continuation lines).
252 name = name.lower() + ':'
253 n = len(name)
254 lst = []
255 hit = 0
256 for line in self.headers:
257 if hit:
258 if not line[:1].isspace():
259 break
260 elif line[:n].lower() == name:
261 hit = 1
262 if hit:
263 lst.append(line)
264 return lst
266 def getrawheader(self, name):
267 """A higher-level interface to getfirstmatchingheader().
269 Return a string containing the literal text of the header but with the
270 keyword stripped. All leading, trailing and embedded whitespace is
271 kept in the string, however. Return None if the header does not
272 occur.
275 lst = self.getfirstmatchingheader(name)
276 if not lst:
277 return None
278 lst[0] = lst[0][len(name) + 1:]
279 return ''.join(lst)
281 def getheader(self, name, default=None):
282 """Get the header value for a name.
284 This is the normal interface: it returns a stripped version of the
285 header value for a given header name, or None if it doesn't exist.
286 This uses the dictionary version which finds the *last* such header.
288 return self.dict.get(name.lower(), default)
289 get = getheader
291 def getheaders(self, name):
292 """Get all values for a header.
294 This returns a list of values for headers given more than once; each
295 value in the result list is stripped in the same way as the result of
296 getheader(). If the header is not given, return an empty list.
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
302 if s[0].isspace():
303 if current:
304 current = "%s\n %s" % (current, s.strip())
305 else:
306 current = s.strip()
307 else:
308 if have_header:
309 result.append(current)
310 current = s[s.find(":") + 1:].strip()
311 have_header = 1
312 if have_header:
313 result.append(current)
314 return result
316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
336 raw = []
337 for h in self.getallmatchingheaders(name):
338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
343 i = h.find(':')
344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
347 alladdrs = ''.join(raw)
348 a = AddressList(alladdrs)
349 return a.addresslist
351 def getdate(self, name):
352 """Retrieve a date field from a header.
354 Retrieves a date field from the named header, returning a tuple
355 compatible with time.mktime().
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
366 The first 9 elements make up a tuple compatible with time.mktime(),
367 and the 10th is the offset of the poster's time zone from GMT/UTC.
369 try:
370 data = self[name]
371 except KeyError:
372 return None
373 return parsedate_tz(data)
376 # Access as a dictionary (only finds *last* header of each type):
378 def __len__(self):
379 """Get the number of headers in a message."""
380 return len(self.dict)
382 def __getitem__(self, name):
383 """Get a specific header, as from a dictionary."""
384 return self.dict[name.lower()]
386 def __setitem__(self, name, value):
387 """Set the value of a header.
389 Note: This is not a perfect inversion of __getitem__, because any
390 changed headers get stuck at the end of the raw-headers list rather
391 than where the altered header was.
393 del self[name] # Won't fail if it doesn't exist
394 self.dict[name.lower()] = value
395 text = name + ": " + value
396 for line in text.split("\n"):
397 self.headers.append(line + "\n")
399 def __delitem__(self, name):
400 """Delete all occurrences of a specific header, if it is present."""
401 name = name.lower()
402 if not name in self.dict:
403 return
404 del self.dict[name]
405 name = name + ':'
406 n = len(name)
407 lst = []
408 hit = 0
409 for i in range(len(self.headers)):
410 line = self.headers[i]
411 if line[:n].lower() == name:
412 hit = 1
413 elif not line[:1].isspace():
414 hit = 0
415 if hit:
416 lst.append(i)
417 for i in reversed(lst):
418 del self.headers[i]
420 def setdefault(self, name, default=""):
421 lowername = name.lower()
422 if lowername in self.dict:
423 return self.dict[lowername]
424 else:
425 text = name + ": " + default
426 for line in text.split("\n"):
427 self.headers.append(line + "\n")
428 self.dict[lowername] = default
429 return default
431 def has_key(self, name):
432 """Determine whether a message contains the named header."""
433 return name.lower() in self.dict
435 def __contains__(self, name):
436 """Determine whether a message contains the named header."""
437 return name.lower() in self.dict
439 def __iter__(self):
440 return iter(self.dict)
442 def keys(self):
443 """Get all of a message's header field names."""
444 return self.dict.keys()
446 def values(self):
447 """Get all of a message's header field values."""
448 return self.dict.values()
450 def items(self):
451 """Get all of a message's headers.
453 Returns a list of name, value tuples.
455 return self.dict.items()
457 def __str__(self):
458 return ''.join(self.headers)
461 # Utility functions
462 # -----------------
464 # XXX Should fix unquote() and quote() to be really conformant.
465 # XXX The inverses of the parse functions may also be useful.
468 def unquote(s):
469 """Remove quotes from a string."""
470 if len(s) > 1:
471 if s.startswith('"') and s.endswith('"'):
472 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
473 if s.startswith('<') and s.endswith('>'):
474 return s[1:-1]
475 return s
478 def quote(s):
479 """Add quotes around a string."""
480 return s.replace('\\', '\\\\').replace('"', '\\"')
483 def parseaddr(address):
484 """Parse an address into a (realname, mailaddr) tuple."""
485 a = AddressList(address)
486 lst = a.addresslist
487 if not lst:
488 return (None, None)
489 return lst[0]
492 class AddrlistClass:
493 """Address parser class by Ben Escoto.
495 To understand what this class does, it helps to have a copy of
496 RFC 2822 in front of you.
498 http://www.faqs.org/rfcs/rfc2822.html
500 Note: this class interface is deprecated and may be removed in the future.
501 Use rfc822.AddressList instead.
504 def __init__(self, field):
505 """Initialize a new instance.
507 `field' is an unparsed address header field, containing one or more
508 addresses.
510 self.specials = '()<>@,:;.\"[]'
511 self.pos = 0
512 self.LWS = ' \t'
513 self.CR = '\r\n'
514 self.atomends = self.specials + self.LWS + self.CR
515 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
516 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
517 # syntax, so allow dots in phrases.
518 self.phraseends = self.atomends.replace('.', '')
519 self.field = field
520 self.commentlist = []
522 def gotonext(self):
523 """Parse up to the start of the next address."""
524 while self.pos < len(self.field):
525 if self.field[self.pos] in self.LWS + '\n\r':
526 self.pos = self.pos + 1
527 elif self.field[self.pos] == '(':
528 self.commentlist.append(self.getcomment())
529 else: break
531 def getaddrlist(self):
532 """Parse all addresses.
534 Returns a list containing all of the addresses.
536 result = []
537 ad = self.getaddress()
538 while ad:
539 result += ad
540 ad = self.getaddress()
541 return result
543 def getaddress(self):
544 """Parse the next address."""
545 self.commentlist = []
546 self.gotonext()
548 oldpos = self.pos
549 oldcl = self.commentlist
550 plist = self.getphraselist()
552 self.gotonext()
553 returnlist = []
555 if self.pos >= len(self.field):
556 # Bad email address technically, no domain.
557 if plist:
558 returnlist = [(' '.join(self.commentlist), plist[0])]
560 elif self.field[self.pos] in '.@':
561 # email address is just an addrspec
562 # this isn't very efficient since we start over
563 self.pos = oldpos
564 self.commentlist = oldcl
565 addrspec = self.getaddrspec()
566 returnlist = [(' '.join(self.commentlist), addrspec)]
568 elif self.field[self.pos] == ':':
569 # address is a group
570 returnlist = []
572 fieldlen = len(self.field)
573 self.pos += 1
574 while self.pos < len(self.field):
575 self.gotonext()
576 if self.pos < fieldlen and self.field[self.pos] == ';':
577 self.pos += 1
578 break
579 returnlist = returnlist + self.getaddress()
581 elif self.field[self.pos] == '<':
582 # Address is a phrase then a route addr
583 routeaddr = self.getrouteaddr()
585 if self.commentlist:
586 returnlist = [(' '.join(plist) + ' (' + \
587 ' '.join(self.commentlist) + ')', routeaddr)]
588 else: returnlist = [(' '.join(plist), routeaddr)]
590 else:
591 if plist:
592 returnlist = [(' '.join(self.commentlist), plist[0])]
593 elif self.field[self.pos] in self.specials:
594 self.pos += 1
596 self.gotonext()
597 if self.pos < len(self.field) and self.field[self.pos] == ',':
598 self.pos += 1
599 return returnlist
601 def getrouteaddr(self):
602 """Parse a route address (Return-path value).
604 This method just skips all the route stuff and returns the addrspec.
606 if self.field[self.pos] != '<':
607 return
609 expectroute = 0
610 self.pos += 1
611 self.gotonext()
612 adlist = ""
613 while self.pos < len(self.field):
614 if expectroute:
615 self.getdomain()
616 expectroute = 0
617 elif self.field[self.pos] == '>':
618 self.pos += 1
619 break
620 elif self.field[self.pos] == '@':
621 self.pos += 1
622 expectroute = 1
623 elif self.field[self.pos] == ':':
624 self.pos += 1
625 else:
626 adlist = self.getaddrspec()
627 self.pos += 1
628 break
629 self.gotonext()
631 return adlist
633 def getaddrspec(self):
634 """Parse an RFC 2822 addr-spec."""
635 aslist = []
637 self.gotonext()
638 while self.pos < len(self.field):
639 if self.field[self.pos] == '.':
640 aslist.append('.')
641 self.pos += 1
642 elif self.field[self.pos] == '"':
643 aslist.append('"%s"' % self.getquote())
644 elif self.field[self.pos] in self.atomends:
645 break
646 else: aslist.append(self.getatom())
647 self.gotonext()
649 if self.pos >= len(self.field) or self.field[self.pos] != '@':
650 return ''.join(aslist)
652 aslist.append('@')
653 self.pos += 1
654 self.gotonext()
655 return ''.join(aslist) + self.getdomain()
657 def getdomain(self):
658 """Get the complete domain name from an address."""
659 sdlist = []
660 while self.pos < len(self.field):
661 if self.field[self.pos] in self.LWS:
662 self.pos += 1
663 elif self.field[self.pos] == '(':
664 self.commentlist.append(self.getcomment())
665 elif self.field[self.pos] == '[':
666 sdlist.append(self.getdomainliteral())
667 elif self.field[self.pos] == '.':
668 self.pos += 1
669 sdlist.append('.')
670 elif self.field[self.pos] in self.atomends:
671 break
672 else: sdlist.append(self.getatom())
673 return ''.join(sdlist)
675 def getdelimited(self, beginchar, endchars, allowcomments = 1):
676 """Parse a header fragment delimited by special characters.
678 `beginchar' is the start character for the fragment. If self is not
679 looking at an instance of `beginchar' then getdelimited returns the
680 empty string.
682 `endchars' is a sequence of allowable end-delimiting characters.
683 Parsing stops when one of these is encountered.
685 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
686 within the parsed fragment.
688 if self.field[self.pos] != beginchar:
689 return ''
691 slist = ['']
692 quote = 0
693 self.pos += 1
694 while self.pos < len(self.field):
695 if quote == 1:
696 slist.append(self.field[self.pos])
697 quote = 0
698 elif self.field[self.pos] in endchars:
699 self.pos += 1
700 break
701 elif allowcomments and self.field[self.pos] == '(':
702 slist.append(self.getcomment())
703 elif self.field[self.pos] == '\\':
704 quote = 1
705 else:
706 slist.append(self.field[self.pos])
707 self.pos += 1
709 return ''.join(slist)
711 def getquote(self):
712 """Get a quote-delimited fragment from self's field."""
713 return self.getdelimited('"', '"\r', 0)
715 def getcomment(self):
716 """Get a parenthesis-delimited fragment from self's field."""
717 return self.getdelimited('(', ')\r', 1)
719 def getdomainliteral(self):
720 """Parse an RFC 2822 domain-literal."""
721 return '[%s]' % self.getdelimited('[', ']\r', 0)
723 def getatom(self, atomends=None):
724 """Parse an RFC 2822 atom.
726 Optional atomends specifies a different set of end token delimiters
727 (the default is to use self.atomends). This is used e.g. in
728 getphraselist() since phrase endings must not include the `.' (which
729 is legal in phrases)."""
730 atomlist = ['']
731 if atomends is None:
732 atomends = self.atomends
734 while self.pos < len(self.field):
735 if self.field[self.pos] in atomends:
736 break
737 else: atomlist.append(self.field[self.pos])
738 self.pos += 1
740 return ''.join(atomlist)
742 def getphraselist(self):
743 """Parse a sequence of RFC 2822 phrases.
745 A phrase is a sequence of words, which are in turn either RFC 2822
746 atoms or quoted-strings. Phrases are canonicalized by squeezing all
747 runs of continuous whitespace into one space.
749 plist = []
751 while self.pos < len(self.field):
752 if self.field[self.pos] in self.LWS:
753 self.pos += 1
754 elif self.field[self.pos] == '"':
755 plist.append(self.getquote())
756 elif self.field[self.pos] == '(':
757 self.commentlist.append(self.getcomment())
758 elif self.field[self.pos] in self.phraseends:
759 break
760 else:
761 plist.append(self.getatom(self.phraseends))
763 return plist
765 class AddressList(AddrlistClass):
766 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
767 def __init__(self, field):
768 AddrlistClass.__init__(self, field)
769 if field:
770 self.addresslist = self.getaddrlist()
771 else:
772 self.addresslist = []
774 def __len__(self):
775 return len(self.addresslist)
777 def __str__(self):
778 return ", ".join(map(dump_address_pair, self.addresslist))
780 def __add__(self, other):
781 # Set union
782 newaddr = AddressList(None)
783 newaddr.addresslist = self.addresslist[:]
784 for x in other.addresslist:
785 if not x in self.addresslist:
786 newaddr.addresslist.append(x)
787 return newaddr
789 def __iadd__(self, other):
790 # Set union, in-place
791 for x in other.addresslist:
792 if not x in self.addresslist:
793 self.addresslist.append(x)
794 return self
796 def __sub__(self, other):
797 # Set difference
798 newaddr = AddressList(None)
799 for x in self.addresslist:
800 if not x in other.addresslist:
801 newaddr.addresslist.append(x)
802 return newaddr
804 def __isub__(self, other):
805 # Set difference, in-place
806 for x in other.addresslist:
807 if x in self.addresslist:
808 self.addresslist.remove(x)
809 return self
811 def __getitem__(self, index):
812 # Make indexing, slices, and 'in' work
813 return self.addresslist[index]
815 def dump_address_pair(pair):
816 """Dump a (name, address) pair in a canonicalized form."""
817 if pair[0]:
818 return '"' + pair[0] + '" <' + pair[1] + '>'
819 else:
820 return pair[1]
822 # Parse a date field
824 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
825 'aug', 'sep', 'oct', 'nov', 'dec',
826 'january', 'february', 'march', 'april', 'may', 'june', 'july',
827 'august', 'september', 'october', 'november', 'december']
828 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
830 # The timezone table does not include the military time zones defined
831 # in RFC822, other than Z. According to RFC1123, the description in
832 # RFC822 gets the signs wrong, so we can't rely on any such time
833 # zones. RFC1123 recommends that numeric timezone indicators be used
834 # instead of timezone names.
836 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
837 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
838 'EST': -500, 'EDT': -400, # Eastern
839 'CST': -600, 'CDT': -500, # Central
840 'MST': -700, 'MDT': -600, # Mountain
841 'PST': -800, 'PDT': -700 # Pacific
845 def parsedate_tz(data):
846 """Convert a date string to a time tuple.
848 Accounts for military timezones.
850 if not data:
851 return None
852 data = data.split()
853 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
854 # There's a dayname here. Skip it
855 del data[0]
856 if len(data) == 3: # RFC 850 date, deprecated
857 stuff = data[0].split('-')
858 if len(stuff) == 3:
859 data = stuff + data[1:]
860 if len(data) == 4:
861 s = data[3]
862 i = s.find('+')
863 if i > 0:
864 data[3:] = [s[:i], s[i+1:]]
865 else:
866 data.append('') # Dummy tz
867 if len(data) < 5:
868 return None
869 data = data[:5]
870 [dd, mm, yy, tm, tz] = data
871 mm = mm.lower()
872 if not mm in _monthnames:
873 dd, mm = mm, dd.lower()
874 if not mm in _monthnames:
875 return None
876 mm = _monthnames.index(mm)+1
877 if mm > 12: mm = mm - 12
878 if dd[-1] == ',':
879 dd = dd[:-1]
880 i = yy.find(':')
881 if i > 0:
882 yy, tm = tm, yy
883 if yy[-1] == ',':
884 yy = yy[:-1]
885 if not yy[0].isdigit():
886 yy, tz = tz, yy
887 if tm[-1] == ',':
888 tm = tm[:-1]
889 tm = tm.split(':')
890 if len(tm) == 2:
891 [thh, tmm] = tm
892 tss = '0'
893 elif len(tm) == 3:
894 [thh, tmm, tss] = tm
895 else:
896 return None
897 try:
898 yy = int(yy)
899 dd = int(dd)
900 thh = int(thh)
901 tmm = int(tmm)
902 tss = int(tss)
903 except ValueError:
904 return None
905 tzoffset = None
906 tz = tz.upper()
907 if tz in _timezones:
908 tzoffset = _timezones[tz]
909 else:
910 try:
911 tzoffset = int(tz)
912 except ValueError:
913 pass
914 # Convert a timezone offset into seconds ; -0500 -> -18000
915 if tzoffset:
916 if tzoffset < 0:
917 tzsign = -1
918 tzoffset = -tzoffset
919 else:
920 tzsign = 1
921 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
922 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
925 def parsedate(data):
926 """Convert a time string to a time tuple."""
927 t = parsedate_tz(data)
928 if t is None:
929 return t
930 return t[:9]
933 def mktime_tz(data):
934 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
935 if data[9] is None:
936 # No zone info, so localtime is better assumption than GMT
937 return time.mktime(data[:8] + (-1,))
938 else:
939 t = time.mktime(data[:8] + (0,))
940 return t - data[9] - time.timezone
942 def formatdate(timeval=None):
943 """Returns time format preferred for Internet standards.
945 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
947 According to RFC 1123, day and month names must always be in
948 English. If not for that, this code could use strftime(). It
949 can't because strftime() honors the locale and could generated
950 non-English names.
952 if timeval is None:
953 timeval = time.time()
954 timeval = time.gmtime(timeval)
955 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
956 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
957 timeval[2],
958 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
959 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
960 timeval[0], timeval[3], timeval[4], timeval[5])
963 # When used as script, run a small test program.
964 # The first command line argument must be a filename containing one
965 # message in RFC-822 format.
967 if __name__ == '__main__':
968 import sys, os
969 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
970 if sys.argv[1:]: file = sys.argv[1]
971 f = open(file, 'r')
972 m = Message(f)
973 print 'From:', m.getaddr('from')
974 print 'To:', m.getaddrlist('to')
975 print 'Subject:', m.getheader('subject')
976 print 'Date:', m.getheader('date')
977 date = m.getdate_tz('date')
978 tz = date[-1]
979 date = time.localtime(mktime_tz(date))
980 if date:
981 print 'ParsedDate:', time.asctime(date),
982 hhmmss = tz
983 hhmm, ss = divmod(hhmmss, 60)
984 hh, mm = divmod(hhmm, 60)
985 print "%+03d%02d" % (hh, mm),
986 if ss: print ".%02d" % ss,
987 print
988 else:
989 print 'ParsedDate:', None
990 m.rewindbody()
991 n = 0
992 while f.readline():
993 n += 1
994 print 'Lines:', n
995 print '-'*70
996 print 'len =', len(m)
997 if 'Date' in m: print 'Date =', m['Date']
998 if 'X-Nonsense' in m: pass
999 print 'keys =', m.keys()
1000 print 'values =', m.values()
1001 print 'items =', m.items()