Updates of recent changes to logging.
[python.git] / Lib / rfc822.py
blob14cc7297f1dc0f829dd4f63de9051096defdd7e1
1 """RFC 2822 message manipulation.
3 Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4 the tokenizing of addresses does not adhere to all the quoting rules.
6 Note: RFC 2822 is a long awaited update to RFC 822. This module should
7 conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8 effort at RFC 2822 updates have been made, but a thorough audit has not been
9 performed. Consider any RFC 2822 non-conformance to be a bug.
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
14 Directions for use:
16 To create a Message object: first open a file, e.g.:
18 fp = open(file, 'r')
20 You can use any other legal way of getting an open file object, e.g. use
21 sys.stdin or call os.popen(). Then pass the open file object to the Message()
22 constructor:
24 m = Message(fp)
26 This class can work with any input object that supports a readline method. If
27 the input object has seek and tell capability, the rewindbody method will
28 work; also illegal lines will be pushed back onto the input stream. If the
29 input object lacks seek but has an `unread' method that can push back a line
30 of input, Message will use that to push back illegal lines. Thus this class
31 can be used to parse messages coming from a buffered stream.
33 The optional `seekable' argument is provided as a workaround for certain stdio
34 libraries in which tell() discards buffered data before discovering that the
35 lseek() system call doesn't work. For maximum portability, you should set the
36 seekable argument to zero to prevent that initial \code{tell} when passing in
37 an unseekable object such as a a file object created from a socket object. If
38 it is 1 on entry -- which it is by default -- the tell() method of the open
39 file object is called once; if this raises an exception, seekable is reset to
40 0. For other nonzero values of seekable, this test is not made.
42 To get the text of a particular header there are several methods:
44 str = m.getheader(name)
45 str = m.getrawheader(name)
47 where name is the name of the header, e.g. 'Subject'. The difference is that
48 getheader() strips the leading and trailing whitespace, while getrawheader()
49 doesn't. Both functions retain embedded whitespace (including newlines)
50 exactly as they are specified in the header, and leave the case of the text
51 unchanged.
53 For addresses and address lists there are functions
55 realname, mailaddress = m.getaddr(name)
56 list = m.getaddrlist(name)
58 where the latter returns a list of (realname, mailaddr) tuples.
60 There is also a method
62 time = m.getdate(name)
64 which parses a Date-like field and returns a time-compatible tuple,
65 i.e. a tuple such as returned by time.localtime() or accepted by
66 time.mktime().
68 See the class definition for lower level access methods.
70 There are also some utility functions here.
71 """
72 # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
74 import time
76 __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
78 _blanklines = ('\r\n', '\n') # Optimization for islast()
81 class Message:
82 """Represents a single RFC 2822-compliant message."""
84 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
86 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
91 except (AttributeError, IOError):
92 seekable = 0
93 self.fp = fp
94 self.seekable = seekable
95 self.startofheaders = None
96 self.startofbody = None
98 if self.seekable:
99 try:
100 self.startofheaders = self.fp.tell()
101 except IOError:
102 self.seekable = 0
104 self.readheaders()
106 if self.seekable:
107 try:
108 self.startofbody = self.fp.tell()
109 except IOError:
110 self.seekable = 0
112 def rewindbody(self):
113 """Rewind the file to the start of the body (if seekable)."""
114 if not self.seekable:
115 raise IOError, "unseekable file"
116 self.fp.seek(self.startofbody)
118 def readheaders(self):
119 """Read header lines.
121 Read header lines up to the entirely blank line that terminates them.
122 The (normally blank) line that ends the headers is skipped, but not
123 included in the returned list. If a non-header line ends the headers,
124 (which is an error), an attempt is made to backspace over it; it is
125 never included in the returned list.
127 The variable self.status is set to the empty string if all went well,
128 otherwise it is an error message. The variable self.headers is a
129 completely uninterpreted list of lines contained in the header (so
130 printing them will reproduce the header exactly as it appears in the
131 file).
133 self.dict = {}
134 self.unixfrom = ''
135 self.headers = lst = []
136 self.status = ''
137 headerseen = ""
138 firstline = 1
139 startofline = unread = tell = None
140 if hasattr(self.fp, 'unread'):
141 unread = self.fp.unread
142 elif self.seekable:
143 tell = self.fp.tell
144 while 1:
145 if tell:
146 try:
147 startofline = tell()
148 except IOError:
149 startofline = tell = None
150 self.seekable = 0
151 line = self.fp.readline()
152 if not line:
153 self.status = 'EOF in headers'
154 break
155 # Skip unix From name time lines
156 if firstline and line.startswith('From '):
157 self.unixfrom = self.unixfrom + line
158 continue
159 firstline = 0
160 if headerseen and line[0] in ' \t':
161 # It's a continuation line.
162 lst.append(line)
163 x = (self.dict[headerseen] + "\n " + line.strip())
164 self.dict[headerseen] = x.strip()
165 continue
166 elif self.iscomment(line):
167 # It's a comment. Ignore it.
168 continue
169 elif self.islast(line):
170 # Note! No pushback here! The delimiter line gets eaten.
171 break
172 headerseen = self.isheader(line)
173 if headerseen:
174 # It's a legal header line, save it.
175 lst.append(line)
176 self.dict[headerseen] = line[len(headerseen)+1:].strip()
177 continue
178 else:
179 # It's not a header line; throw it back and stop here.
180 if not self.dict:
181 self.status = 'No headers'
182 else:
183 self.status = 'Non-header line where header expected'
184 # Try to undo the read.
185 if unread:
186 unread(line)
187 elif tell:
188 self.fp.seek(startofline)
189 else:
190 self.status = self.status + '; bad seek'
191 break
193 def isheader(self, line):
194 """Determine whether a given line is a legal header.
196 This method should return the header name, suitably canonicalized.
197 You may override this method in order to use Message parsing on tagged
198 data in RFC 2822-like formats with special header formats.
200 i = line.find(':')
201 if i > 0:
202 return line[:i].lower()
203 return None
205 def islast(self, line):
206 """Determine whether a line is a legal end of RFC 2822 headers.
208 You may override this method if your application wants to bend the
209 rules, e.g. to strip trailing whitespace, or to recognize MH template
210 separators ('--------'). For convenience (e.g. for code reading from
211 sockets) a line consisting of \r\n also matches.
213 return line in _blanklines
215 def iscomment(self, line):
216 """Determine whether a line should be skipped entirely.
218 You may override this method in order to use Message parsing on tagged
219 data in RFC 2822-like formats that support embedded comments or
220 free-text data.
222 return False
224 def getallmatchingheaders(self, name):
225 """Find all header lines matching a given header name.
227 Look through the list of headers and find all lines matching a given
228 header name (and their continuation lines). A list of the lines is
229 returned, without interpretation. If the header does not occur, an
230 empty list is returned. If the header occurs multiple times, all
231 occurrences are returned. Case is not important in the header name.
233 name = name.lower() + ':'
234 n = len(name)
235 lst = []
236 hit = 0
237 for line in self.headers:
238 if line[:n].lower() == name:
239 hit = 1
240 elif not line[:1].isspace():
241 hit = 0
242 if hit:
243 lst.append(line)
244 return lst
246 def getfirstmatchingheader(self, name):
247 """Get the first header line matching name.
249 This is similar to getallmatchingheaders, but it returns only the
250 first matching header (and its continuation lines).
252 name = name.lower() + ':'
253 n = len(name)
254 lst = []
255 hit = 0
256 for line in self.headers:
257 if hit:
258 if not line[:1].isspace():
259 break
260 elif line[:n].lower() == name:
261 hit = 1
262 if hit:
263 lst.append(line)
264 return lst
266 def getrawheader(self, name):
267 """A higher-level interface to getfirstmatchingheader().
269 Return a string containing the literal text of the header but with the
270 keyword stripped. All leading, trailing and embedded whitespace is
271 kept in the string, however. Return None if the header does not
272 occur.
275 lst = self.getfirstmatchingheader(name)
276 if not lst:
277 return None
278 lst[0] = lst[0][len(name) + 1:]
279 return ''.join(lst)
281 def getheader(self, name, default=None):
282 """Get the header value for a name.
284 This is the normal interface: it returns a stripped version of the
285 header value for a given header name, or None if it doesn't exist.
286 This uses the dictionary version which finds the *last* such header.
288 return self.dict.get(name.lower(), default)
289 get = getheader
291 def getheaders(self, name):
292 """Get all values for a header.
294 This returns a list of values for headers given more than once; each
295 value in the result list is stripped in the same way as the result of
296 getheader(). If the header is not given, return an empty list.
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
302 if s[0].isspace():
303 if current:
304 current = "%s\n %s" % (current, s.strip())
305 else:
306 current = s.strip()
307 else:
308 if have_header:
309 result.append(current)
310 current = s[s.find(":") + 1:].strip()
311 have_header = 1
312 if have_header:
313 result.append(current)
314 return result
316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
336 raw = []
337 for h in self.getallmatchingheaders(name):
338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
343 i = h.find(':')
344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
347 alladdrs = ''.join(raw)
348 a = AddressList(alladdrs)
349 return a.addresslist
351 def getdate(self, name):
352 """Retrieve a date field from a header.
354 Retrieves a date field from the named header, returning a tuple
355 compatible with time.mktime().
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
366 The first 9 elements make up a tuple compatible with time.mktime(),
367 and the 10th is the offset of the poster's time zone from GMT/UTC.
369 try:
370 data = self[name]
371 except KeyError:
372 return None
373 return parsedate_tz(data)
376 # Access as a dictionary (only finds *last* header of each type):
378 def __len__(self):
379 """Get the number of headers in a message."""
380 return len(self.dict)
382 def __getitem__(self, name):
383 """Get a specific header, as from a dictionary."""
384 return self.dict[name.lower()]
386 def __setitem__(self, name, value):
387 """Set the value of a header.
389 Note: This is not a perfect inversion of __getitem__, because any
390 changed headers get stuck at the end of the raw-headers list rather
391 than where the altered header was.
393 del self[name] # Won't fail if it doesn't exist
394 self.dict[name.lower()] = value
395 text = name + ": " + value
396 for line in text.split("\n"):
397 self.headers.append(line + "\n")
399 def __delitem__(self, name):
400 """Delete all occurrences of a specific header, if it is present."""
401 name = name.lower()
402 if not name in self.dict:
403 return
404 del self.dict[name]
405 name = name + ':'
406 n = len(name)
407 lst = []
408 hit = 0
409 for i in range(len(self.headers)):
410 line = self.headers[i]
411 if line[:n].lower() == name:
412 hit = 1
413 elif not line[:1].isspace():
414 hit = 0
415 if hit:
416 lst.append(i)
417 for i in reversed(lst):
418 del self.headers[i]
420 def setdefault(self, name, default=""):
421 lowername = name.lower()
422 if lowername in self.dict:
423 return self.dict[lowername]
424 else:
425 text = name + ": " + default
426 for line in text.split("\n"):
427 self.headers.append(line + "\n")
428 self.dict[lowername] = default
429 return default
431 def has_key(self, name):
432 """Determine whether a message contains the named header."""
433 return name.lower() in self.dict
435 def __contains__(self, name):
436 """Determine whether a message contains the named header."""
437 return name.lower() in self.dict
439 def __iter__(self):
440 return iter(self.dict)
442 def keys(self):
443 """Get all of a message's header field names."""
444 return self.dict.keys()
446 def values(self):
447 """Get all of a message's header field values."""
448 return self.dict.values()
450 def items(self):
451 """Get all of a message's headers.
453 Returns a list of name, value tuples.
455 return self.dict.items()
457 def __str__(self):
458 return ''.join(self.headers)
461 # Utility functions
462 # -----------------
464 # XXX Should fix unquote() and quote() to be really conformant.
465 # XXX The inverses of the parse functions may also be useful.
468 def unquote(s):
469 """Remove quotes from a string."""
470 if len(s) > 1:
471 if s.startswith('"') and s.endswith('"'):
472 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
473 if s.startswith('<') and s.endswith('>'):
474 return s[1:-1]
475 return s
478 def quote(s):
479 """Add quotes around a string."""
480 return s.replace('\\', '\\\\').replace('"', '\\"')
483 def parseaddr(address):
484 """Parse an address into a (realname, mailaddr) tuple."""
485 a = AddressList(address)
486 lst = a.addresslist
487 if not lst:
488 return (None, None)
489 return lst[0]
492 class AddrlistClass:
493 """Address parser class by Ben Escoto.
495 To understand what this class does, it helps to have a copy of
496 RFC 2822 in front of you.
498 http://www.faqs.org/rfcs/rfc2822.html
500 Note: this class interface is deprecated and may be removed in the future.
501 Use rfc822.AddressList instead.
504 def __init__(self, field):
505 """Initialize a new instance.
507 `field' is an unparsed address header field, containing one or more
508 addresses.
510 self.specials = '()<>@,:;.\"[]'
511 self.pos = 0
512 self.LWS = ' \t'
513 self.CR = '\r\n'
514 self.atomends = self.specials + self.LWS + self.CR
515 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
516 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
517 # syntax, so allow dots in phrases.
518 self.phraseends = self.atomends.replace('.', '')
519 self.field = field
520 self.commentlist = []
522 def gotonext(self):
523 """Parse up to the start of the next address."""
524 while self.pos < len(self.field):
525 if self.field[self.pos] in self.LWS + '\n\r':
526 self.pos = self.pos + 1
527 elif self.field[self.pos] == '(':
528 self.commentlist.append(self.getcomment())
529 else: break
531 def getaddrlist(self):
532 """Parse all addresses.
534 Returns a list containing all of the addresses.
536 result = []
537 ad = self.getaddress()
538 while ad:
539 result += ad
540 ad = self.getaddress()
541 return result
543 def getaddress(self):
544 """Parse the next address."""
545 self.commentlist = []
546 self.gotonext()
548 oldpos = self.pos
549 oldcl = self.commentlist
550 plist = self.getphraselist()
552 self.gotonext()
553 returnlist = []
555 if self.pos >= len(self.field):
556 # Bad email address technically, no domain.
557 if plist:
558 returnlist = [(' '.join(self.commentlist), plist[0])]
560 elif self.field[self.pos] in '.@':
561 # email address is just an addrspec
562 # this isn't very efficient since we start over
563 self.pos = oldpos
564 self.commentlist = oldcl
565 addrspec = self.getaddrspec()
566 returnlist = [(' '.join(self.commentlist), addrspec)]
568 elif self.field[self.pos] == ':':
569 # address is a group
570 returnlist = []
572 fieldlen = len(self.field)
573 self.pos += 1
574 while self.pos < len(self.field):
575 self.gotonext()
576 if self.pos < fieldlen and self.field[self.pos] == ';':
577 self.pos += 1
578 break
579 returnlist = returnlist + self.getaddress()
581 elif self.field[self.pos] == '<':
582 # Address is a phrase then a route addr
583 routeaddr = self.getrouteaddr()
585 if self.commentlist:
586 returnlist = [(' '.join(plist) + ' (' + \
587 ' '.join(self.commentlist) + ')', routeaddr)]
588 else: returnlist = [(' '.join(plist), routeaddr)]
590 else:
591 if plist:
592 returnlist = [(' '.join(self.commentlist), plist[0])]
593 elif self.field[self.pos] in self.specials:
594 self.pos += 1
596 self.gotonext()
597 if self.pos < len(self.field) and self.field[self.pos] == ',':
598 self.pos += 1
599 return returnlist
601 def getrouteaddr(self):
602 """Parse a route address (Return-path value).
604 This method just skips all the route stuff and returns the addrspec.
606 if self.field[self.pos] != '<':
607 return
609 expectroute = 0
610 self.pos += 1
611 self.gotonext()
612 adlist = ""
613 while self.pos < len(self.field):
614 if expectroute:
615 self.getdomain()
616 expectroute = 0
617 elif self.field[self.pos] == '>':
618 self.pos += 1
619 break
620 elif self.field[self.pos] == '@':
621 self.pos += 1
622 expectroute = 1
623 elif self.field[self.pos] == ':':
624 self.pos += 1
625 else:
626 adlist = self.getaddrspec()
627 self.pos += 1
628 break
629 self.gotonext()
631 return adlist
633 def getaddrspec(self):
634 """Parse an RFC 2822 addr-spec."""
635 aslist = []
637 self.gotonext()
638 while self.pos < len(self.field):
639 if self.field[self.pos] == '.':
640 aslist.append('.')
641 self.pos += 1
642 elif self.field[self.pos] == '"':
643 aslist.append('"%s"' % self.getquote())
644 elif self.field[self.pos] in self.atomends:
645 break
646 else: aslist.append(self.getatom())
647 self.gotonext()
649 if self.pos >= len(self.field) or self.field[self.pos] != '@':
650 return ''.join(aslist)
652 aslist.append('@')
653 self.pos += 1
654 self.gotonext()
655 return ''.join(aslist) + self.getdomain()
657 def getdomain(self):
658 """Get the complete domain name from an address."""
659 sdlist = []
660 while self.pos < len(self.field):
661 if self.field[self.pos] in self.LWS:
662 self.pos += 1
663 elif self.field[self.pos] == '(':
664 self.commentlist.append(self.getcomment())
665 elif self.field[self.pos] == '[':
666 sdlist.append(self.getdomainliteral())
667 elif self.field[self.pos] == '.':
668 self.pos += 1
669 sdlist.append('.')
670 elif self.field[self.pos] in self.atomends:
671 break
672 else: sdlist.append(self.getatom())
673 return ''.join(sdlist)
675 def getdelimited(self, beginchar, endchars, allowcomments = 1):
676 """Parse a header fragment delimited by special characters.
678 `beginchar' is the start character for the fragment. If self is not
679 looking at an instance of `beginchar' then getdelimited returns the
680 empty string.
682 `endchars' is a sequence of allowable end-delimiting characters.
683 Parsing stops when one of these is encountered.
685 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
686 within the parsed fragment.
688 if self.field[self.pos] != beginchar:
689 return ''
691 slist = ['']
692 quote = 0
693 self.pos += 1
694 while self.pos < len(self.field):
695 if quote == 1:
696 slist.append(self.field[self.pos])
697 quote = 0
698 elif self.field[self.pos] in endchars:
699 self.pos += 1
700 break
701 elif allowcomments and self.field[self.pos] == '(':
702 slist.append(self.getcomment())
703 continue # have already advanced pos from getcomment
704 elif self.field[self.pos] == '\\':
705 quote = 1
706 else:
707 slist.append(self.field[self.pos])
708 self.pos += 1
710 return ''.join(slist)
712 def getquote(self):
713 """Get a quote-delimited fragment from self's field."""
714 return self.getdelimited('"', '"\r', 0)
716 def getcomment(self):
717 """Get a parenthesis-delimited fragment from self's field."""
718 return self.getdelimited('(', ')\r', 1)
720 def getdomainliteral(self):
721 """Parse an RFC 2822 domain-literal."""
722 return '[%s]' % self.getdelimited('[', ']\r', 0)
724 def getatom(self, atomends=None):
725 """Parse an RFC 2822 atom.
727 Optional atomends specifies a different set of end token delimiters
728 (the default is to use self.atomends). This is used e.g. in
729 getphraselist() since phrase endings must not include the `.' (which
730 is legal in phrases)."""
731 atomlist = ['']
732 if atomends is None:
733 atomends = self.atomends
735 while self.pos < len(self.field):
736 if self.field[self.pos] in atomends:
737 break
738 else: atomlist.append(self.field[self.pos])
739 self.pos += 1
741 return ''.join(atomlist)
743 def getphraselist(self):
744 """Parse a sequence of RFC 2822 phrases.
746 A phrase is a sequence of words, which are in turn either RFC 2822
747 atoms or quoted-strings. Phrases are canonicalized by squeezing all
748 runs of continuous whitespace into one space.
750 plist = []
752 while self.pos < len(self.field):
753 if self.field[self.pos] in self.LWS:
754 self.pos += 1
755 elif self.field[self.pos] == '"':
756 plist.append(self.getquote())
757 elif self.field[self.pos] == '(':
758 self.commentlist.append(self.getcomment())
759 elif self.field[self.pos] in self.phraseends:
760 break
761 else:
762 plist.append(self.getatom(self.phraseends))
764 return plist
766 class AddressList(AddrlistClass):
767 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
768 def __init__(self, field):
769 AddrlistClass.__init__(self, field)
770 if field:
771 self.addresslist = self.getaddrlist()
772 else:
773 self.addresslist = []
775 def __len__(self):
776 return len(self.addresslist)
778 def __str__(self):
779 return ", ".join(map(dump_address_pair, self.addresslist))
781 def __add__(self, other):
782 # Set union
783 newaddr = AddressList(None)
784 newaddr.addresslist = self.addresslist[:]
785 for x in other.addresslist:
786 if not x in self.addresslist:
787 newaddr.addresslist.append(x)
788 return newaddr
790 def __iadd__(self, other):
791 # Set union, in-place
792 for x in other.addresslist:
793 if not x in self.addresslist:
794 self.addresslist.append(x)
795 return self
797 def __sub__(self, other):
798 # Set difference
799 newaddr = AddressList(None)
800 for x in self.addresslist:
801 if not x in other.addresslist:
802 newaddr.addresslist.append(x)
803 return newaddr
805 def __isub__(self, other):
806 # Set difference, in-place
807 for x in other.addresslist:
808 if x in self.addresslist:
809 self.addresslist.remove(x)
810 return self
812 def __getitem__(self, index):
813 # Make indexing, slices, and 'in' work
814 return self.addresslist[index]
816 def dump_address_pair(pair):
817 """Dump a (name, address) pair in a canonicalized form."""
818 if pair[0]:
819 return '"' + pair[0] + '" <' + pair[1] + '>'
820 else:
821 return pair[1]
823 # Parse a date field
825 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
826 'aug', 'sep', 'oct', 'nov', 'dec',
827 'january', 'february', 'march', 'april', 'may', 'june', 'july',
828 'august', 'september', 'october', 'november', 'december']
829 _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
831 # The timezone table does not include the military time zones defined
832 # in RFC822, other than Z. According to RFC1123, the description in
833 # RFC822 gets the signs wrong, so we can't rely on any such time
834 # zones. RFC1123 recommends that numeric timezone indicators be used
835 # instead of timezone names.
837 _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
838 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
839 'EST': -500, 'EDT': -400, # Eastern
840 'CST': -600, 'CDT': -500, # Central
841 'MST': -700, 'MDT': -600, # Mountain
842 'PST': -800, 'PDT': -700 # Pacific
846 def parsedate_tz(data):
847 """Convert a date string to a time tuple.
849 Accounts for military timezones.
851 if not data:
852 return None
853 data = data.split()
854 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
855 # There's a dayname here. Skip it
856 del data[0]
857 else:
858 # no space after the "weekday,"?
859 i = data[0].rfind(',')
860 if i >= 0:
861 data[0] = data[0][i+1:]
862 if len(data) == 3: # RFC 850 date, deprecated
863 stuff = data[0].split('-')
864 if len(stuff) == 3:
865 data = stuff + data[1:]
866 if len(data) == 4:
867 s = data[3]
868 i = s.find('+')
869 if i > 0:
870 data[3:] = [s[:i], s[i+1:]]
871 else:
872 data.append('') # Dummy tz
873 if len(data) < 5:
874 return None
875 data = data[:5]
876 [dd, mm, yy, tm, tz] = data
877 mm = mm.lower()
878 if not mm in _monthnames:
879 dd, mm = mm, dd.lower()
880 if not mm in _monthnames:
881 return None
882 mm = _monthnames.index(mm)+1
883 if mm > 12: mm = mm - 12
884 if dd[-1] == ',':
885 dd = dd[:-1]
886 i = yy.find(':')
887 if i > 0:
888 yy, tm = tm, yy
889 if yy[-1] == ',':
890 yy = yy[:-1]
891 if not yy[0].isdigit():
892 yy, tz = tz, yy
893 if tm[-1] == ',':
894 tm = tm[:-1]
895 tm = tm.split(':')
896 if len(tm) == 2:
897 [thh, tmm] = tm
898 tss = '0'
899 elif len(tm) == 3:
900 [thh, tmm, tss] = tm
901 else:
902 return None
903 try:
904 yy = int(yy)
905 dd = int(dd)
906 thh = int(thh)
907 tmm = int(tmm)
908 tss = int(tss)
909 except ValueError:
910 return None
911 tzoffset = None
912 tz = tz.upper()
913 if tz in _timezones:
914 tzoffset = _timezones[tz]
915 else:
916 try:
917 tzoffset = int(tz)
918 except ValueError:
919 pass
920 # Convert a timezone offset into seconds ; -0500 -> -18000
921 if tzoffset:
922 if tzoffset < 0:
923 tzsign = -1
924 tzoffset = -tzoffset
925 else:
926 tzsign = 1
927 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
928 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
931 def parsedate(data):
932 """Convert a time string to a time tuple."""
933 t = parsedate_tz(data)
934 if t is None:
935 return t
936 return t[:9]
939 def mktime_tz(data):
940 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
941 if data[9] is None:
942 # No zone info, so localtime is better assumption than GMT
943 return time.mktime(data[:8] + (-1,))
944 else:
945 t = time.mktime(data[:8] + (0,))
946 return t - data[9] - time.timezone
948 def formatdate(timeval=None):
949 """Returns time format preferred for Internet standards.
951 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
953 According to RFC 1123, day and month names must always be in
954 English. If not for that, this code could use strftime(). It
955 can't because strftime() honors the locale and could generated
956 non-English names.
958 if timeval is None:
959 timeval = time.time()
960 timeval = time.gmtime(timeval)
961 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
962 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
963 timeval[2],
964 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
965 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
966 timeval[0], timeval[3], timeval[4], timeval[5])
969 # When used as script, run a small test program.
970 # The first command line argument must be a filename containing one
971 # message in RFC-822 format.
973 if __name__ == '__main__':
974 import sys, os
975 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
976 if sys.argv[1:]: file = sys.argv[1]
977 f = open(file, 'r')
978 m = Message(f)
979 print 'From:', m.getaddr('from')
980 print 'To:', m.getaddrlist('to')
981 print 'Subject:', m.getheader('subject')
982 print 'Date:', m.getheader('date')
983 date = m.getdate_tz('date')
984 tz = date[-1]
985 date = time.localtime(mktime_tz(date))
986 if date:
987 print 'ParsedDate:', time.asctime(date),
988 hhmmss = tz
989 hhmm, ss = divmod(hhmmss, 60)
990 hh, mm = divmod(hhmm, 60)
991 print "%+03d%02d" % (hh, mm),
992 if ss: print ".%02d" % ss,
993 print
994 else:
995 print 'ParsedDate:', None
996 m.rewindbody()
997 n = 0
998 while f.readline():
999 n += 1
1000 print 'Lines:', n
1001 print '-'*70
1002 print 'len =', len(m)
1003 if 'Date' in m: print 'Date =', m['Date']
1004 if 'X-Nonsense' in m: pass
1005 print 'keys =', m.keys()
1006 print 'values =', m.values()
1007 print 'items =', m.items()