1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
6 Package mail implements parsing of mail messages.
8 For the most part, this package follows the syntax as specified by RFC 5322 and
11 * Obsolete address formats are not parsed, including addresses with
12 embedded route information.
13 * The full range of spacing (the CFWS syntax element) is not supported,
14 such as breaking addresses across lines.
15 * No unicode normalization is performed.
16 * The special characters ()[]:;@\, are allowed to appear unquoted in names.
34 var debug
= debugT(false)
38 func (d debugT
) Printf(format
string, args
...any
) {
40 log
.Printf(format
, args
...)
44 // A Message represents a parsed mail message.
50 // ReadMessage reads a message from r.
51 // The headers are parsed, and the body of the message will be available
52 // for reading from msg.Body.
53 func ReadMessage(r io
.Reader
) (msg
*Message
, err error
) {
54 tp
:= textproto
.NewReader(bufio
.NewReader(r
))
56 hdr
, err
:= tp
.ReadMIMEHeader()
67 // Layouts suitable for passing to time.Parse.
68 // These are tried in order.
70 dateLayoutsBuildOnce sync
.Once
74 func buildDateLayouts() {
75 // Generate layouts based on RFC 5322, section 3.3.
77 dows
:= [...]string{"", "Mon, "} // day-of-week
78 days
:= [...]string{"2", "02"} // day = 1*2DIGIT
79 years
:= [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
80 seconds
:= [...]string{":05", ""} // second
81 // "-0700 (MST)" is not in RFC 5322, but is common.
82 zones
:= [...]string{"-0700", "MST"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
84 for _
, dow
:= range dows
{
85 for _
, day
:= range days
{
86 for _
, year
:= range years
{
87 for _
, second
:= range seconds
{
88 for _
, zone
:= range zones
{
89 s
:= dow
+ day
+ " Jan " + year
+ " 15:04" + second
+ " " + zone
90 dateLayouts
= append(dateLayouts
, s
)
98 // ParseDate parses an RFC 5322 date string.
99 func ParseDate(date
string) (time
.Time
, error
) {
100 dateLayoutsBuildOnce
.Do(buildDateLayouts
)
101 // CR and LF must match and are tolerated anywhere in the date field.
102 date
= strings
.ReplaceAll(date
, "\r\n", "")
103 if strings
.Contains(date
, "\r") {
104 return time
.Time
{}, errors
.New("mail: header has a CR without LF")
106 // Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
107 p
:= addrParser
{date
, nil}
110 // RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
111 // zone length is always 5 chars unless obsolete (obs-zone)
112 if ind
:= strings
.IndexAny(p
.s
, "+-"); ind
!= -1 && len(p
.s
) >= ind
+5 {
116 ind
:= strings
.Index(p
.s
, "T")
118 // In this case we have the following date formats:
119 // * Thu, 20 Nov 1997 09:55:06 MDT
120 // * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
121 // * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
122 ind
= strings
.Index(p
.s
[1:], "T")
128 if ind
!= -1 && len(p
.s
) >= ind
+5 {
129 // The last letter T of the obsolete time zone is checked when no standard time zone is found.
130 // If T is misplaced, the date to parse is garbage.
136 return time
.Time
{}, errors
.New("mail: misformatted parenthetical comment")
138 for _
, layout
:= range dateLayouts
{
139 t
, err
:= time
.Parse(layout
, date
)
144 return time
.Time
{}, errors
.New("mail: header could not be parsed")
147 // A Header represents the key-value pairs in a mail message header.
148 type Header
map[string][]string
150 // Get gets the first value associated with the given key.
151 // It is case insensitive; CanonicalMIMEHeaderKey is used
152 // to canonicalize the provided key.
153 // If there are no values associated with the key, Get returns "".
154 // To access multiple values of a key, or to use non-canonical keys,
155 // access the map directly.
156 func (h Header
) Get(key
string) string {
157 return textproto
.MIMEHeader(h
).Get(key
)
160 var ErrHeaderNotPresent
= errors
.New("mail: header not in message")
162 // Date parses the Date header field.
163 func (h Header
) Date() (time
.Time
, error
) {
166 return time
.Time
{}, ErrHeaderNotPresent
168 return ParseDate(hdr
)
171 // AddressList parses the named header field as a list of addresses.
172 func (h Header
) AddressList(key
string) ([]*Address
, error
) {
175 return nil, ErrHeaderNotPresent
177 return ParseAddressList(hdr
)
180 // Address represents a single mail address.
181 // An address such as "Barry Gibbs <bg@example.com>" is represented
182 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
183 type Address
struct {
184 Name
string // Proper name; may be empty.
185 Address
string // user@domain
188 // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
189 func ParseAddress(address
string) (*Address
, error
) {
190 return (&addrParser
{s
: address
}).parseSingleAddress()
193 // ParseAddressList parses the given string as a list of addresses.
194 func ParseAddressList(list
string) ([]*Address
, error
) {
195 return (&addrParser
{s
: list
}).parseAddressList()
198 // An AddressParser is an RFC 5322 address parser.
199 type AddressParser
struct {
200 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
201 WordDecoder
*mime
.WordDecoder
204 // Parse parses a single RFC 5322 address of the
205 // form "Gogh Fir <gf@example.com>" or "foo@example.com".
206 func (p
*AddressParser
) Parse(address
string) (*Address
, error
) {
207 return (&addrParser
{s
: address
, dec
: p
.WordDecoder
}).parseSingleAddress()
210 // ParseList parses the given string as a list of comma-separated addresses
211 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
212 func (p
*AddressParser
) ParseList(list
string) ([]*Address
, error
) {
213 return (&addrParser
{s
: list
, dec
: p
.WordDecoder
}).parseAddressList()
216 // String formats the address as a valid RFC 5322 address.
217 // If the address's name contains non-ASCII characters
218 // the name will be rendered according to RFC 2047.
219 func (a
*Address
) String() string {
220 // Format address local@domain
221 at
:= strings
.LastIndex(a
.Address
, "@")
222 var local
, domain
string
224 // This is a malformed address ("@" is required in addr-spec);
225 // treat the whole address as local-part.
228 local
, domain
= a
.Address
[:at
], a
.Address
[at
+1:]
231 // Add quotes if needed
233 for i
, r
:= range local
{
234 if isAtext(r
, false, false) {
238 // Dots are okay if they are surrounded by atext.
239 // We only need to check that the previous byte is
240 // not a dot, and this isn't the end of the string.
241 if i
> 0 && local
[i
-1] != '.' && i
< len(local
)-1 {
249 local
= quoteString(local
)
253 s
:= "<" + local
+ "@" + domain
+ ">"
259 // If every character is printable ASCII, quoting is simple.
261 for _
, r
:= range a
.Name
{
262 // isWSP here should actually be isFWS,
263 // but we don't support folding yet.
264 if !isVchar(r
) && !isWSP(r
) ||
isMultibyte(r
) {
270 return quoteString(a
.Name
) + " " + s
273 // Text in an encoded-word in a display-name must not contain certain
274 // characters like quotes or parentheses (see RFC 2047 section 5.3).
275 // When this is the case encode the name using base64 encoding.
276 if strings
.ContainsAny(a
.Name
, "\"#$%&'(),.:;<>@[]^`{|}~") {
277 return mime
.BEncoding
.Encode("utf-8", a
.Name
) + " " + s
279 return mime
.QEncoding
.Encode("utf-8", a
.Name
) + " " + s
282 type addrParser
struct {
284 dec
*mime
.WordDecoder
// may be nil
287 func (p
*addrParser
) parseAddressList() ([]*Address
, error
) {
292 // allow skipping empty entries (RFC5322 obs-addr-list)
297 addrs
, err
:= p
.parseAddress(true)
301 list
= append(list
, addrs
...)
304 return nil, errors
.New("mail: misformatted parenthetical comment")
310 return nil, errors
.New("mail: expected comma")
313 // Skip empty entries for obs-addr-list.
324 func (p
*addrParser
) parseSingleAddress() (*Address
, error
) {
325 addrs
, err
:= p
.parseAddress(true)
330 return nil, errors
.New("mail: misformatted parenthetical comment")
333 return nil, fmt
.Errorf("mail: expected single address, got %q", p
.s
)
336 return nil, errors
.New("mail: empty group")
339 return nil, errors
.New("mail: group with multiple addresses")
344 // parseAddress parses a single RFC 5322 address at the start of p.
345 func (p
*addrParser
) parseAddress(handleGroup
bool) ([]*Address
, error
) {
346 debug
.Printf("parseAddress: %q", p
.s
)
349 return nil, errors
.New("mail: no address")
352 // address = mailbox / group
353 // mailbox = name-addr / addr-spec
354 // group = display-name ":" [group-list] ";" [CFWS]
356 // addr-spec has a more restricted grammar than name-addr,
357 // so try parsing it first, and fallback to name-addr.
358 // TODO(dsymonds): Is this really correct?
359 spec
, err
:= p
.consumeAddrSpec()
361 var displayName
string
363 if !p
.empty() && p
.peek() == '(' {
364 displayName
, err
= p
.consumeDisplayNameComment()
375 debug
.Printf("parseAddress: not an addr-spec: %v", err
)
376 debug
.Printf("parseAddress: state is now %q", p
.s
)
379 var displayName
string
381 displayName
, err
= p
.consumePhrase()
386 debug
.Printf("parseAddress: displayName=%q", displayName
)
391 return p
.consumeGroupList()
394 // angle-addr = "<" addr-spec ">"
397 for _
, r
:= range displayName
{
398 if !isAtext(r
, true, false) {
404 // The input is like "foo.bar"; it's possible the input
405 // meant to be "foo.bar@domain", or "foo.bar <...>".
406 return nil, errors
.New("mail: missing '@' or angle-addr")
408 // The input is like "Full Name", which couldn't possibly be a
409 // valid email address if followed by "@domain"; the input
410 // likely meant to be "Full Name <...>".
411 return nil, errors
.New("mail: no angle-addr")
413 spec
, err
= p
.consumeAddrSpec()
418 return nil, errors
.New("mail: unclosed angle-addr")
420 debug
.Printf("parseAddress: spec=%q", spec
)
428 func (p
*addrParser
) consumeGroupList() ([]*Address
, error
) {
430 // handle empty group.
439 // embedded groups not allowed.
440 addrs
, err
:= p
.parseAddress(false)
444 group
= append(group
, addrs
...)
447 return nil, errors
.New("mail: misformatted parenthetical comment")
454 return nil, errors
.New("mail: expected comma")
460 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
461 func (p
*addrParser
) consumeAddrSpec() (spec
string, err error
) {
462 debug
.Printf("consumeAddrSpec: %q", p
.s
)
471 // local-part = dot-atom / quoted-string
475 return "", errors
.New("mail: no addr-spec")
479 debug
.Printf("consumeAddrSpec: parsing quoted-string")
480 localPart
, err
= p
.consumeQuotedString()
482 err
= errors
.New("mail: empty quoted string in addr-spec")
486 debug
.Printf("consumeAddrSpec: parsing dot-atom")
487 localPart
, err
= p
.consumeAtom(true, false)
490 debug
.Printf("consumeAddrSpec: failed: %v", err
)
495 return "", errors
.New("mail: missing @ in addr-spec")
498 // domain = dot-atom / domain-literal
502 return "", errors
.New("mail: no domain in addr-spec")
504 // TODO(dsymonds): Handle domain-literal
505 domain
, err
= p
.consumeAtom(true, false)
510 return localPart
+ "@" + domain
, nil
513 // consumePhrase parses the RFC 5322 phrase at the start of p.
514 func (p
*addrParser
) consumePhrase() (phrase
string, err error
) {
515 debug
.Printf("consumePhrase: [%s]", p
.s
)
518 var isPrevEncoded
bool
520 // word = atom / quoted-string
529 word
, err
= p
.consumeQuotedString()
532 // We actually parse dot-atom here to be more permissive
533 // than what RFC 5322 specifies.
534 word
, err
= p
.consumeAtom(true, true)
536 word
, isEncoded
, err
= p
.decodeRFC2047Word(word
)
543 debug
.Printf("consumePhrase: consumed %q", word
)
544 if isPrevEncoded
&& isEncoded
{
545 words
[len(words
)-1] += word
547 words
= append(words
, word
)
549 isPrevEncoded
= isEncoded
551 // Ignore any error if we got at least one word.
552 if err
!= nil && len(words
) == 0 {
553 debug
.Printf("consumePhrase: hit err: %v", err
)
554 return "", fmt
.Errorf("mail: missing word in phrase: %v", err
)
556 phrase
= strings
.Join(words
, " ")
560 // consumeQuotedString parses the quoted string at the start of p.
561 func (p
*addrParser
) consumeQuotedString() (qs
string, err error
) {
562 // Assume first byte is '"'.
564 qsb
:= make([]rune
, 0, 10)
570 r
, size
:= utf8
.DecodeRuneInString(p
.s
[i
:])
574 return "", errors
.New("mail: unclosed quoted-string")
576 case size
== 1 && r
== utf8
.RuneError
:
577 return "", fmt
.Errorf("mail: invalid utf-8 in quoted-string: %q", p
.s
)
580 // quoted-pair = ("\" (VCHAR / WSP))
582 if !isVchar(r
) && !isWSP(r
) {
583 return "", fmt
.Errorf("mail: bad character in quoted-string: %q", r
)
589 case isQtext(r
) ||
isWSP(r
):
590 // qtext (printable US-ASCII excluding " and \), or
591 // FWS (almost; we're ignoring CRLF)
601 return "", fmt
.Errorf("mail: bad character in quoted-string: %q", r
)
608 return string(qsb
), nil
611 // consumeAtom parses an RFC 5322 atom at the start of p.
612 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
613 // If permissive is true, consumeAtom will not fail on:
614 // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
615 // - special characters (RFC 5322 3.2.3) except '<', '>', ':' and '"' (see golang.org/issue/21018)
616 func (p
*addrParser
) consumeAtom(dot
bool, permissive
bool) (atom
string, err error
) {
621 r
, size
:= utf8
.DecodeRuneInString(p
.s
[i
:])
623 case size
== 1 && r
== utf8
.RuneError
:
624 return "", fmt
.Errorf("mail: invalid utf-8 in address: %q", p
.s
)
626 case size
== 0 ||
!isAtext(r
, dot
, permissive
):
636 return "", errors
.New("mail: invalid string")
638 atom
, p
.s
= p
.s
[:i
], p
.s
[i
:]
640 if strings
.HasPrefix(atom
, ".") {
641 return "", errors
.New("mail: leading dot in atom")
643 if strings
.Contains(atom
, "..") {
644 return "", errors
.New("mail: double dot in atom")
646 if strings
.HasSuffix(atom
, ".") {
647 return "", errors
.New("mail: trailing dot in atom")
653 func (p
*addrParser
) consumeDisplayNameComment() (string, error
) {
655 return "", errors
.New("mail: comment does not start with (")
657 comment
, ok
:= p
.consumeComment()
659 return "", errors
.New("mail: misformatted parenthetical comment")
662 // TODO(stapelberg): parse quoted-string within comment
663 words
:= strings
.FieldsFunc(comment
, func(r rune
) bool { return r
== ' ' || r
== '\t' })
664 for idx
, word
:= range words
{
665 decoded
, isEncoded
, err
:= p
.decodeRFC2047Word(word
)
674 return strings
.Join(words
, " "), nil
677 func (p
*addrParser
) consume(c
byte) bool {
678 if p
.empty() || p
.peek() != c
{
685 // skipSpace skips the leading space and tab characters.
686 func (p
*addrParser
) skipSpace() {
687 p
.s
= strings
.TrimLeft(p
.s
, " \t")
690 func (p
*addrParser
) peek() byte {
694 func (p
*addrParser
) empty() bool {
698 func (p
*addrParser
) len() int {
702 // skipCFWS skips CFWS as defined in RFC5322.
703 func (p
*addrParser
) skipCFWS() bool {
711 if _
, ok
:= p
.consumeComment(); !ok
{
721 func (p
*addrParser
) consumeComment() (string, bool) {
722 // '(' already consumed.
727 if p
.empty() || depth
== 0 {
731 if p
.peek() == '\\' && p
.len() > 1 {
733 } else if p
.peek() == '(' {
735 } else if p
.peek() == ')' {
744 return comment
, depth
== 0
747 func (p
*addrParser
) decodeRFC2047Word(s
string) (word
string, isEncoded
bool, err error
) {
749 word
, err
= p
.dec
.Decode(s
)
751 word
, err
= rfc2047Decoder
.Decode(s
)
755 return word
, true, nil
758 if _
, ok
:= err
.(charsetError
); ok
{
762 // Ignore invalid RFC 2047 encoded-word errors.
766 var rfc2047Decoder
= mime
.WordDecoder
{
767 CharsetReader
: func(charset
string, input io
.Reader
) (io
.Reader
, error
) {
768 return nil, charsetError(charset
)
772 type charsetError
string
774 func (e charsetError
) Error() string {
775 return fmt
.Sprintf("charset not supported: %q", string(e
))
778 // isAtext reports whether r is an RFC 5322 atext character.
779 // If dot is true, period is included.
780 // If permissive is true, RFC 5322 3.2.3 specials is included,
781 // except '<', '>', ':' and '"'.
782 func isAtext(r rune
, dot
, permissive
bool) bool {
787 // RFC 5322 3.2.3. specials
788 case '(', ')', '[', ']', ';', '@', '\\', ',':
791 case '<', '>', '"', ':':
797 // isQtext reports whether r is an RFC 5322 qtext character.
798 func isQtext(r rune
) bool {
799 // Printable US-ASCII, excluding backslash or quote.
800 if r
== '\\' || r
== '"' {
806 // quoteString renders a string as an RFC 5322 quoted-string.
807 func quoteString(s
string) string {
808 var buf strings
.Builder
810 for _
, r
:= range s
{
811 if isQtext(r
) ||
isWSP(r
) {
813 } else if isVchar(r
) {
822 // isVchar reports whether r is an RFC 5322 VCHAR character.
823 func isVchar(r rune
) bool {
824 // Visible (printing) characters.
825 return '!' <= r
&& r
<= '~' ||
isMultibyte(r
)
828 // isMultibyte reports whether r is a multi-byte UTF-8 character
829 // as supported by RFC 6532
830 func isMultibyte(r rune
) bool {
831 return r
>= utf8
.RuneSelf
834 // isWSP reports whether r is a WSP (white space).
835 // WSP is a space or horizontal tab (RFC 5234 Appendix B).
836 func isWSP(r rune
) bool {
837 return r
== ' ' || r
== '\t'