1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 Package mail implements parsing of mail messages.
8 For the most part, this package follows the syntax as specified by RFC 5322.
10 * Obsolete address formats are not parsed, including addresses with
11 embedded route information.
12 * Group addresses are not parsed.
13 * The full range of spacing (the CFWS syntax element) is not supported,
14 such as breaking addresses across lines.
33 var debug
= debugT(false)
37 func (d debugT
) Printf(format
string, args
...interface{}) {
39 log
.Printf(format
, args
...)
43 // A Message represents a parsed mail message.
49 // ReadMessage reads a message from r.
50 // The headers are parsed, and the body of the message will be available
51 // for reading from r.
52 func ReadMessage(r io
.Reader
) (msg
*Message
, err error
) {
53 tp
:= textproto
.NewReader(bufio
.NewReader(r
))
55 hdr
, err
:= tp
.ReadMIMEHeader()
66 // Layouts suitable for passing to time.Parse.
67 // These are tried in order.
68 var dateLayouts
[]string
71 // Generate layouts based on RFC 5322, section 3.3.
73 dows
:= [...]string{"", "Mon, "} // day-of-week
74 days
:= [...]string{"2", "02"} // day = 1*2DIGIT
75 years
:= [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
76 seconds
:= [...]string{":05", ""} // second
77 // "-0700 (MST)" is not in RFC 5322, but is common.
78 zones
:= [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
80 for _
, dow
:= range dows
{
81 for _
, day
:= range days
{
82 for _
, year
:= range years
{
83 for _
, second
:= range seconds
{
84 for _
, zone
:= range zones
{
85 s
:= dow
+ day
+ " Jan " + year
+ " 15:04" + second
+ " " + zone
86 dateLayouts
= append(dateLayouts
, s
)
94 func parseDate(date
string) (time
.Time
, error
) {
95 for _
, layout
:= range dateLayouts
{
96 t
, err
:= time
.Parse(layout
, date
)
101 return time
.Time
{}, errors
.New("mail: header could not be parsed")
104 // A Header represents the key-value pairs in a mail message header.
105 type Header
map[string][]string
107 // Get gets the first value associated with the given key.
108 // If there are no values associated with the key, Get returns "".
109 func (h Header
) Get(key
string) string {
110 return textproto
.MIMEHeader(h
).Get(key
)
113 var ErrHeaderNotPresent
= errors
.New("mail: header not in message")
115 // Date parses the Date header field.
116 func (h Header
) Date() (time
.Time
, error
) {
119 return time
.Time
{}, ErrHeaderNotPresent
121 return parseDate(hdr
)
124 // AddressList parses the named header field as a list of addresses.
125 func (h Header
) AddressList(key
string) ([]*Address
, error
) {
128 return nil, ErrHeaderNotPresent
130 return ParseAddressList(hdr
)
133 // Address represents a single mail address.
134 // An address such as "Barry Gibbs <bg@example.com>" is represented
135 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
136 type Address
struct {
137 Name
string // Proper name; may be empty.
138 Address
string // user@domain
141 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
142 func ParseAddress(address
string) (*Address
, error
) {
143 return newAddrParser(address
).parseAddress()
146 // ParseAddressList parses the given string as a list of addresses.
147 func ParseAddressList(list
string) ([]*Address
, error
) {
148 return newAddrParser(list
).parseAddressList()
151 // String formats the address as a valid RFC 5322 address.
152 // If the address's name contains non-ASCII characters
153 // the name will be rendered according to RFC 2047.
154 func (a
*Address
) String() string {
155 s
:= "<" + a
.Address
+ ">"
159 // If every character is printable ASCII, quoting is simple.
161 for i
:= 0; i
< len(a
.Name
); i
++ {
162 // isWSP here should actually be isFWS,
163 // but we don't support folding yet.
164 if !isVchar(a
.Name
[i
]) && !isWSP(a
.Name
[i
]) {
170 b
:= bytes
.NewBufferString(`"`)
171 for i
:= 0; i
< len(a
.Name
); i
++ {
172 if !isQtext(a
.Name
[i
]) && !isWSP(a
.Name
[i
]) {
175 b
.WriteByte(a
.Name
[i
])
182 // UTF-8 "Q" encoding
183 b
:= bytes
.NewBufferString("=?utf-8?q?")
184 for i
:= 0; i
< len(a
.Name
); i
++ {
185 switch c
:= a
.Name
[i
]; {
188 case isVchar(c
) && c
!= '=' && c
!= '?' && c
!= '_':
191 fmt
.Fprintf(b
, "=%02X", c
)
199 type addrParser
[]byte
201 func newAddrParser(s
string) *addrParser
{
206 func (p
*addrParser
) parseAddressList() ([]*Address
, error
) {
210 addr
, err
:= p
.parseAddress()
214 list
= append(list
, addr
)
221 return nil, errors
.New("mail: expected comma")
227 // parseAddress parses a single RFC 5322 address at the start of p.
228 func (p
*addrParser
) parseAddress() (addr
*Address
, err error
) {
229 debug
.Printf("parseAddress: %q", *p
)
232 return nil, errors
.New("mail: no address")
235 // address = name-addr / addr-spec
236 // TODO(dsymonds): Support parsing group address.
238 // addr-spec has a more restricted grammar than name-addr,
239 // so try parsing it first, and fallback to name-addr.
240 // TODO(dsymonds): Is this really correct?
241 spec
, err
:= p
.consumeAddrSpec()
247 debug
.Printf("parseAddress: not an addr-spec: %v", err
)
248 debug
.Printf("parseAddress: state is now %q", *p
)
251 var displayName
string
253 displayName
, err
= p
.consumePhrase()
258 debug
.Printf("parseAddress: displayName=%q", displayName
)
260 // angle-addr = "<" addr-spec ">"
263 return nil, errors
.New("mail: no angle-addr")
265 spec
, err
= p
.consumeAddrSpec()
270 return nil, errors
.New("mail: unclosed angle-addr")
272 debug
.Printf("parseAddress: spec=%q", spec
)
280 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
281 func (p
*addrParser
) consumeAddrSpec() (spec
string, err error
) {
282 debug
.Printf("consumeAddrSpec: %q", *p
)
291 // local-part = dot-atom / quoted-string
295 return "", errors
.New("mail: no addr-spec")
299 debug
.Printf("consumeAddrSpec: parsing quoted-string")
300 localPart
, err
= p
.consumeQuotedString()
303 debug
.Printf("consumeAddrSpec: parsing dot-atom")
304 localPart
, err
= p
.consumeAtom(true)
307 debug
.Printf("consumeAddrSpec: failed: %v", err
)
312 return "", errors
.New("mail: missing @ in addr-spec")
315 // domain = dot-atom / domain-literal
319 return "", errors
.New("mail: no domain in addr-spec")
321 // TODO(dsymonds): Handle domain-literal
322 domain
, err
= p
.consumeAtom(true)
327 return localPart
+ "@" + domain
, nil
330 // consumePhrase parses the RFC 5322 phrase at the start of p.
331 func (p
*addrParser
) consumePhrase() (phrase
string, err error
) {
332 debug
.Printf("consumePhrase: [%s]", *p
)
336 // word = atom / quoted-string
340 return "", errors
.New("mail: missing phrase")
344 word
, err
= p
.consumeQuotedString()
347 // We actually parse dot-atom here to be more permissive
348 // than what RFC 5322 specifies.
349 word
, err
= p
.consumeAtom(true)
352 // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
353 if err
== nil && strings
.HasPrefix(word
, "=?") && strings
.HasSuffix(word
, "?=") && strings
.Count(word
, "?") == 4 {
354 word
, err
= decodeRFC2047Word(word
)
360 debug
.Printf("consumePhrase: consumed %q", word
)
361 words
= append(words
, word
)
363 // Ignore any error if we got at least one word.
364 if err
!= nil && len(words
) == 0 {
365 debug
.Printf("consumePhrase: hit err: %v", err
)
366 return "", fmt
.Errorf("mail: missing word in phrase: %v", err
)
368 phrase
= strings
.Join(words
, " ")
372 // consumeQuotedString parses the quoted string at the start of p.
373 func (p
*addrParser
) consumeQuotedString() (qs
string, err error
) {
374 // Assume first byte is '"'.
376 qsb
:= make([]byte, 0, 10)
380 return "", errors
.New("mail: unclosed quoted-string")
382 switch c
:= (*p
)[i
]; {
387 return "", errors
.New("mail: unclosed quoted-string")
389 qsb
= append(qsb
, (*p
)[i
+1])
391 case isQtext(c
), c
== ' ' || c
== '\t':
392 // qtext (printable US-ASCII excluding " and \), or
393 // FWS (almost; we're ignoring CRLF)
397 return "", fmt
.Errorf("mail: bad character in quoted-string: %q", c
)
401 return string(qsb
), nil
404 // consumeAtom parses an RFC 5322 atom at the start of p.
405 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
406 func (p
*addrParser
) consumeAtom(dot
bool) (atom
string, err error
) {
407 if !isAtext(p
.peek(), false) {
408 return "", errors
.New("mail: invalid string")
411 for ; i
< p
.len() && isAtext((*p
)[i
], dot
); i
++ {
413 atom
, *p
= string((*p
)[:i
]), (*p
)[i
:]
417 func (p
*addrParser
) consume(c
byte) bool {
418 if p
.empty() || p
.peek() != c
{
425 // skipSpace skips the leading space and tab characters.
426 func (p
*addrParser
) skipSpace() {
427 *p
= bytes
.TrimLeft(*p
, " \t")
430 func (p
*addrParser
) peek() byte {
434 func (p
*addrParser
) empty() bool {
438 func (p
*addrParser
) len() int {
442 func decodeRFC2047Word(s
string) (string, error
) {
443 fields
:= strings
.Split(s
, "?")
444 if len(fields
) != 5 || fields
[0] != "=" || fields
[4] != "=" {
445 return "", errors
.New("address not RFC 2047 encoded")
447 charset
, enc
:= strings
.ToLower(fields
[1]), strings
.ToLower(fields
[2])
448 if charset
!= "iso-8859-1" && charset
!= "utf-8" {
449 return "", fmt
.Errorf("charset not supported: %q", charset
)
452 in
:= bytes
.NewBufferString(fields
[3])
456 r
= base64
.NewDecoder(base64
.StdEncoding
, in
)
460 return "", fmt
.Errorf("RFC 2047 encoding not supported: %q", enc
)
463 dec
, err
:= ioutil
.ReadAll(r
)
470 b
:= new(bytes
.Buffer
)
471 for _
, c
:= range dec
{
474 return b
.String(), nil
476 return string(dec
), nil
481 type qDecoder
struct {
486 func (qd qDecoder
) Read(p
[]byte) (n
int, err error
) {
487 // This method writes at most one byte into p.
491 if _
, err
:= qd
.r
.Read(qd
.scratch
[:1]); err
!= nil {
494 switch c
:= qd
.scratch
[0]; {
496 if _
, err
:= io
.ReadFull(qd
.r
, qd
.scratch
[:2]); err
!= nil {
499 x
, err
:= strconv
.ParseInt(string(qd
.scratch
[:2]), 16, 64)
501 return 0, fmt
.Errorf("mail: invalid RFC 2047 encoding: %q", qd
.scratch
[:2])
512 var atextChars
= []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
513 "abcdefghijklmnopqrstuvwxyz" +
515 "!#$%&'*+-/=?^_`{|}~")
517 // isAtext returns true if c is an RFC 5322 atext character.
518 // If dot is true, period is included.
519 func isAtext(c
byte, dot
bool) bool {
523 return bytes
.IndexByte(atextChars
, c
) >= 0
526 // isQtext returns true if c is an RFC 5322 qtext character.
527 func isQtext(c
byte) bool {
528 // Printable US-ASCII, excluding backslash or quote.
529 if c
== '\\' || c
== '"' {
532 return '!' <= c
&& c
<= '~'
535 // isVchar returns true if c is an RFC 5322 VCHAR character.
536 func isVchar(c
byte) bool {
537 // Visible (printing) characters.
538 return '!' <= c
&& c
<= '~'
541 // isWSP returns true if c is a WSP (white space).
542 // WSP is a space or horizontal tab (RFC5234 Appendix B).
543 func isWSP(c
byte) bool {
544 return c
== ' ' || c
== '\t'