[to-be-committed,RISC-V,V4] movmem for RISCV with V extension
[official-gcc.git] / libgo / go / net / mail / message.go
blob985b6fcae271bc9eba8993e87b290f2723007422
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 /*
6 Package mail implements parsing of mail messages.
8 For the most part, this package follows the syntax as specified by RFC 5322 and
9 extended by RFC 6532.
10 Notable divergences:
11 * Obsolete address formats are not parsed, including addresses with
12 embedded route information.
13 * The full range of spacing (the CFWS syntax element) is not supported,
14 such as breaking addresses across lines.
15 * No unicode normalization is performed.
16 * The special characters ()[]:;@\, are allowed to appear unquoted in names.
18 package mail
20 import (
21 "bufio"
22 "errors"
23 "fmt"
24 "io"
25 "log"
26 "mime"
27 "net/textproto"
28 "strings"
29 "sync"
30 "time"
31 "unicode/utf8"
34 var debug = debugT(false)
36 type debugT bool
38 func (d debugT) Printf(format string, args ...any) {
39 if d {
40 log.Printf(format, args...)
44 // A Message represents a parsed mail message.
45 type Message struct {
46 Header Header
47 Body io.Reader
50 // ReadMessage reads a message from r.
51 // The headers are parsed, and the body of the message will be available
52 // for reading from msg.Body.
53 func ReadMessage(r io.Reader) (msg *Message, err error) {
54 tp := textproto.NewReader(bufio.NewReader(r))
56 hdr, err := tp.ReadMIMEHeader()
57 if err != nil {
58 return nil, err
61 return &Message{
62 Header: Header(hdr),
63 Body: tp.R,
64 }, nil
67 // Layouts suitable for passing to time.Parse.
68 // These are tried in order.
69 var (
70 dateLayoutsBuildOnce sync.Once
71 dateLayouts []string
74 func buildDateLayouts() {
75 // Generate layouts based on RFC 5322, section 3.3.
77 dows := [...]string{"", "Mon, "} // day-of-week
78 days := [...]string{"2", "02"} // day = 1*2DIGIT
79 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
80 seconds := [...]string{":05", ""} // second
81 // "-0700 (MST)" is not in RFC 5322, but is common.
82 zones := [...]string{"-0700", "MST"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
84 for _, dow := range dows {
85 for _, day := range days {
86 for _, year := range years {
87 for _, second := range seconds {
88 for _, zone := range zones {
89 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
90 dateLayouts = append(dateLayouts, s)
98 // ParseDate parses an RFC 5322 date string.
99 func ParseDate(date string) (time.Time, error) {
100 dateLayoutsBuildOnce.Do(buildDateLayouts)
101 // CR and LF must match and are tolerated anywhere in the date field.
102 date = strings.ReplaceAll(date, "\r\n", "")
103 if strings.Contains(date, "\r") {
104 return time.Time{}, errors.New("mail: header has a CR without LF")
106 // Re-using some addrParser methods which support obsolete text, i.e. non-printable ASCII
107 p := addrParser{date, nil}
108 p.skipSpace()
110 // RFC 5322: zone = (FWS ( "+" / "-" ) 4DIGIT) / obs-zone
111 // zone length is always 5 chars unless obsolete (obs-zone)
112 if ind := strings.IndexAny(p.s, "+-"); ind != -1 && len(p.s) >= ind+5 {
113 date = p.s[:ind+5]
114 p.s = p.s[ind+5:]
115 } else {
116 ind := strings.Index(p.s, "T")
117 if ind == 0 {
118 // In this case we have the following date formats:
119 // * Thu, 20 Nov 1997 09:55:06 MDT
120 // * Thu, 20 Nov 1997 09:55:06 MDT (MDT)
121 // * Thu, 20 Nov 1997 09:55:06 MDT (This comment)
122 ind = strings.Index(p.s[1:], "T")
123 if ind != -1 {
124 ind++
128 if ind != -1 && len(p.s) >= ind+5 {
129 // The last letter T of the obsolete time zone is checked when no standard time zone is found.
130 // If T is misplaced, the date to parse is garbage.
131 date = p.s[:ind+1]
132 p.s = p.s[ind+1:]
135 if !p.skipCFWS() {
136 return time.Time{}, errors.New("mail: misformatted parenthetical comment")
138 for _, layout := range dateLayouts {
139 t, err := time.Parse(layout, date)
140 if err == nil {
141 return t, nil
144 return time.Time{}, errors.New("mail: header could not be parsed")
147 // A Header represents the key-value pairs in a mail message header.
148 type Header map[string][]string
150 // Get gets the first value associated with the given key.
151 // It is case insensitive; CanonicalMIMEHeaderKey is used
152 // to canonicalize the provided key.
153 // If there are no values associated with the key, Get returns "".
154 // To access multiple values of a key, or to use non-canonical keys,
155 // access the map directly.
156 func (h Header) Get(key string) string {
157 return textproto.MIMEHeader(h).Get(key)
160 var ErrHeaderNotPresent = errors.New("mail: header not in message")
162 // Date parses the Date header field.
163 func (h Header) Date() (time.Time, error) {
164 hdr := h.Get("Date")
165 if hdr == "" {
166 return time.Time{}, ErrHeaderNotPresent
168 return ParseDate(hdr)
171 // AddressList parses the named header field as a list of addresses.
172 func (h Header) AddressList(key string) ([]*Address, error) {
173 hdr := h.Get(key)
174 if hdr == "" {
175 return nil, ErrHeaderNotPresent
177 return ParseAddressList(hdr)
180 // Address represents a single mail address.
181 // An address such as "Barry Gibbs <bg@example.com>" is represented
182 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
183 type Address struct {
184 Name string // Proper name; may be empty.
185 Address string // user@domain
188 // ParseAddress parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
189 func ParseAddress(address string) (*Address, error) {
190 return (&addrParser{s: address}).parseSingleAddress()
193 // ParseAddressList parses the given string as a list of addresses.
194 func ParseAddressList(list string) ([]*Address, error) {
195 return (&addrParser{s: list}).parseAddressList()
198 // An AddressParser is an RFC 5322 address parser.
199 type AddressParser struct {
200 // WordDecoder optionally specifies a decoder for RFC 2047 encoded-words.
201 WordDecoder *mime.WordDecoder
204 // Parse parses a single RFC 5322 address of the
205 // form "Gogh Fir <gf@example.com>" or "foo@example.com".
206 func (p *AddressParser) Parse(address string) (*Address, error) {
207 return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
210 // ParseList parses the given string as a list of comma-separated addresses
211 // of the form "Gogh Fir <gf@example.com>" or "foo@example.com".
212 func (p *AddressParser) ParseList(list string) ([]*Address, error) {
213 return (&addrParser{s: list, dec: p.WordDecoder}).parseAddressList()
216 // String formats the address as a valid RFC 5322 address.
217 // If the address's name contains non-ASCII characters
218 // the name will be rendered according to RFC 2047.
219 func (a *Address) String() string {
220 // Format address local@domain
221 at := strings.LastIndex(a.Address, "@")
222 var local, domain string
223 if at < 0 {
224 // This is a malformed address ("@" is required in addr-spec);
225 // treat the whole address as local-part.
226 local = a.Address
227 } else {
228 local, domain = a.Address[:at], a.Address[at+1:]
231 // Add quotes if needed
232 quoteLocal := false
233 for i, r := range local {
234 if isAtext(r, false, false) {
235 continue
237 if r == '.' {
238 // Dots are okay if they are surrounded by atext.
239 // We only need to check that the previous byte is
240 // not a dot, and this isn't the end of the string.
241 if i > 0 && local[i-1] != '.' && i < len(local)-1 {
242 continue
245 quoteLocal = true
246 break
248 if quoteLocal {
249 local = quoteString(local)
253 s := "<" + local + "@" + domain + ">"
255 if a.Name == "" {
256 return s
259 // If every character is printable ASCII, quoting is simple.
260 allPrintable := true
261 for _, r := range a.Name {
262 // isWSP here should actually be isFWS,
263 // but we don't support folding yet.
264 if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
265 allPrintable = false
266 break
269 if allPrintable {
270 return quoteString(a.Name) + " " + s
273 // Text in an encoded-word in a display-name must not contain certain
274 // characters like quotes or parentheses (see RFC 2047 section 5.3).
275 // When this is the case encode the name using base64 encoding.
276 if strings.ContainsAny(a.Name, "\"#$%&'(),.:;<>@[]^`{|}~") {
277 return mime.BEncoding.Encode("utf-8", a.Name) + " " + s
279 return mime.QEncoding.Encode("utf-8", a.Name) + " " + s
282 type addrParser struct {
283 s string
284 dec *mime.WordDecoder // may be nil
287 func (p *addrParser) parseAddressList() ([]*Address, error) {
288 var list []*Address
289 for {
290 p.skipSpace()
292 // allow skipping empty entries (RFC5322 obs-addr-list)
293 if p.consume(',') {
294 continue
297 addrs, err := p.parseAddress(true)
298 if err != nil {
299 return nil, err
301 list = append(list, addrs...)
303 if !p.skipCFWS() {
304 return nil, errors.New("mail: misformatted parenthetical comment")
306 if p.empty() {
307 break
309 if p.peek() != ',' {
310 return nil, errors.New("mail: expected comma")
313 // Skip empty entries for obs-addr-list.
314 for p.consume(',') {
315 p.skipSpace()
317 if p.empty() {
318 break
321 return list, nil
324 func (p *addrParser) parseSingleAddress() (*Address, error) {
325 addrs, err := p.parseAddress(true)
326 if err != nil {
327 return nil, err
329 if !p.skipCFWS() {
330 return nil, errors.New("mail: misformatted parenthetical comment")
332 if !p.empty() {
333 return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
335 if len(addrs) == 0 {
336 return nil, errors.New("mail: empty group")
338 if len(addrs) > 1 {
339 return nil, errors.New("mail: group with multiple addresses")
341 return addrs[0], nil
344 // parseAddress parses a single RFC 5322 address at the start of p.
345 func (p *addrParser) parseAddress(handleGroup bool) ([]*Address, error) {
346 debug.Printf("parseAddress: %q", p.s)
347 p.skipSpace()
348 if p.empty() {
349 return nil, errors.New("mail: no address")
352 // address = mailbox / group
353 // mailbox = name-addr / addr-spec
354 // group = display-name ":" [group-list] ";" [CFWS]
356 // addr-spec has a more restricted grammar than name-addr,
357 // so try parsing it first, and fallback to name-addr.
358 // TODO(dsymonds): Is this really correct?
359 spec, err := p.consumeAddrSpec()
360 if err == nil {
361 var displayName string
362 p.skipSpace()
363 if !p.empty() && p.peek() == '(' {
364 displayName, err = p.consumeDisplayNameComment()
365 if err != nil {
366 return nil, err
370 return []*Address{{
371 Name: displayName,
372 Address: spec,
373 }}, err
375 debug.Printf("parseAddress: not an addr-spec: %v", err)
376 debug.Printf("parseAddress: state is now %q", p.s)
378 // display-name
379 var displayName string
380 if p.peek() != '<' {
381 displayName, err = p.consumePhrase()
382 if err != nil {
383 return nil, err
386 debug.Printf("parseAddress: displayName=%q", displayName)
388 p.skipSpace()
389 if handleGroup {
390 if p.consume(':') {
391 return p.consumeGroupList()
394 // angle-addr = "<" addr-spec ">"
395 if !p.consume('<') {
396 atext := true
397 for _, r := range displayName {
398 if !isAtext(r, true, false) {
399 atext = false
400 break
403 if atext {
404 // The input is like "foo.bar"; it's possible the input
405 // meant to be "foo.bar@domain", or "foo.bar <...>".
406 return nil, errors.New("mail: missing '@' or angle-addr")
408 // The input is like "Full Name", which couldn't possibly be a
409 // valid email address if followed by "@domain"; the input
410 // likely meant to be "Full Name <...>".
411 return nil, errors.New("mail: no angle-addr")
413 spec, err = p.consumeAddrSpec()
414 if err != nil {
415 return nil, err
417 if !p.consume('>') {
418 return nil, errors.New("mail: unclosed angle-addr")
420 debug.Printf("parseAddress: spec=%q", spec)
422 return []*Address{{
423 Name: displayName,
424 Address: spec,
425 }}, nil
428 func (p *addrParser) consumeGroupList() ([]*Address, error) {
429 var group []*Address
430 // handle empty group.
431 p.skipSpace()
432 if p.consume(';') {
433 p.skipCFWS()
434 return group, nil
437 for {
438 p.skipSpace()
439 // embedded groups not allowed.
440 addrs, err := p.parseAddress(false)
441 if err != nil {
442 return nil, err
444 group = append(group, addrs...)
446 if !p.skipCFWS() {
447 return nil, errors.New("mail: misformatted parenthetical comment")
449 if p.consume(';') {
450 p.skipCFWS()
451 break
453 if !p.consume(',') {
454 return nil, errors.New("mail: expected comma")
457 return group, nil
460 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
461 func (p *addrParser) consumeAddrSpec() (spec string, err error) {
462 debug.Printf("consumeAddrSpec: %q", p.s)
464 orig := *p
465 defer func() {
466 if err != nil {
467 *p = orig
471 // local-part = dot-atom / quoted-string
472 var localPart string
473 p.skipSpace()
474 if p.empty() {
475 return "", errors.New("mail: no addr-spec")
477 if p.peek() == '"' {
478 // quoted-string
479 debug.Printf("consumeAddrSpec: parsing quoted-string")
480 localPart, err = p.consumeQuotedString()
481 if localPart == "" {
482 err = errors.New("mail: empty quoted string in addr-spec")
484 } else {
485 // dot-atom
486 debug.Printf("consumeAddrSpec: parsing dot-atom")
487 localPart, err = p.consumeAtom(true, false)
489 if err != nil {
490 debug.Printf("consumeAddrSpec: failed: %v", err)
491 return "", err
494 if !p.consume('@') {
495 return "", errors.New("mail: missing @ in addr-spec")
498 // domain = dot-atom / domain-literal
499 var domain string
500 p.skipSpace()
501 if p.empty() {
502 return "", errors.New("mail: no domain in addr-spec")
504 // TODO(dsymonds): Handle domain-literal
505 domain, err = p.consumeAtom(true, false)
506 if err != nil {
507 return "", err
510 return localPart + "@" + domain, nil
513 // consumePhrase parses the RFC 5322 phrase at the start of p.
514 func (p *addrParser) consumePhrase() (phrase string, err error) {
515 debug.Printf("consumePhrase: [%s]", p.s)
516 // phrase = 1*word
517 var words []string
518 var isPrevEncoded bool
519 for {
520 // word = atom / quoted-string
521 var word string
522 p.skipSpace()
523 if p.empty() {
524 break
526 isEncoded := false
527 if p.peek() == '"' {
528 // quoted-string
529 word, err = p.consumeQuotedString()
530 } else {
531 // atom
532 // We actually parse dot-atom here to be more permissive
533 // than what RFC 5322 specifies.
534 word, err = p.consumeAtom(true, true)
535 if err == nil {
536 word, isEncoded, err = p.decodeRFC2047Word(word)
540 if err != nil {
541 break
543 debug.Printf("consumePhrase: consumed %q", word)
544 if isPrevEncoded && isEncoded {
545 words[len(words)-1] += word
546 } else {
547 words = append(words, word)
549 isPrevEncoded = isEncoded
551 // Ignore any error if we got at least one word.
552 if err != nil && len(words) == 0 {
553 debug.Printf("consumePhrase: hit err: %v", err)
554 return "", fmt.Errorf("mail: missing word in phrase: %v", err)
556 phrase = strings.Join(words, " ")
557 return phrase, nil
560 // consumeQuotedString parses the quoted string at the start of p.
561 func (p *addrParser) consumeQuotedString() (qs string, err error) {
562 // Assume first byte is '"'.
563 i := 1
564 qsb := make([]rune, 0, 10)
566 escaped := false
568 Loop:
569 for {
570 r, size := utf8.DecodeRuneInString(p.s[i:])
572 switch {
573 case size == 0:
574 return "", errors.New("mail: unclosed quoted-string")
576 case size == 1 && r == utf8.RuneError:
577 return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
579 case escaped:
580 // quoted-pair = ("\" (VCHAR / WSP))
582 if !isVchar(r) && !isWSP(r) {
583 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
586 qsb = append(qsb, r)
587 escaped = false
589 case isQtext(r) || isWSP(r):
590 // qtext (printable US-ASCII excluding " and \), or
591 // FWS (almost; we're ignoring CRLF)
592 qsb = append(qsb, r)
594 case r == '"':
595 break Loop
597 case r == '\\':
598 escaped = true
600 default:
601 return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
605 i += size
607 p.s = p.s[i+1:]
608 return string(qsb), nil
611 // consumeAtom parses an RFC 5322 atom at the start of p.
612 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
613 // If permissive is true, consumeAtom will not fail on:
614 // - leading/trailing/double dots in the atom (see golang.org/issue/4938)
615 // - special characters (RFC 5322 3.2.3) except '<', '>', ':' and '"' (see golang.org/issue/21018)
616 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
617 i := 0
619 Loop:
620 for {
621 r, size := utf8.DecodeRuneInString(p.s[i:])
622 switch {
623 case size == 1 && r == utf8.RuneError:
624 return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
626 case size == 0 || !isAtext(r, dot, permissive):
627 break Loop
629 default:
630 i += size
635 if i == 0 {
636 return "", errors.New("mail: invalid string")
638 atom, p.s = p.s[:i], p.s[i:]
639 if !permissive {
640 if strings.HasPrefix(atom, ".") {
641 return "", errors.New("mail: leading dot in atom")
643 if strings.Contains(atom, "..") {
644 return "", errors.New("mail: double dot in atom")
646 if strings.HasSuffix(atom, ".") {
647 return "", errors.New("mail: trailing dot in atom")
650 return atom, nil
653 func (p *addrParser) consumeDisplayNameComment() (string, error) {
654 if !p.consume('(') {
655 return "", errors.New("mail: comment does not start with (")
657 comment, ok := p.consumeComment()
658 if !ok {
659 return "", errors.New("mail: misformatted parenthetical comment")
662 // TODO(stapelberg): parse quoted-string within comment
663 words := strings.FieldsFunc(comment, func(r rune) bool { return r == ' ' || r == '\t' })
664 for idx, word := range words {
665 decoded, isEncoded, err := p.decodeRFC2047Word(word)
666 if err != nil {
667 return "", err
669 if isEncoded {
670 words[idx] = decoded
674 return strings.Join(words, " "), nil
677 func (p *addrParser) consume(c byte) bool {
678 if p.empty() || p.peek() != c {
679 return false
681 p.s = p.s[1:]
682 return true
685 // skipSpace skips the leading space and tab characters.
686 func (p *addrParser) skipSpace() {
687 p.s = strings.TrimLeft(p.s, " \t")
690 func (p *addrParser) peek() byte {
691 return p.s[0]
694 func (p *addrParser) empty() bool {
695 return p.len() == 0
698 func (p *addrParser) len() int {
699 return len(p.s)
702 // skipCFWS skips CFWS as defined in RFC5322.
703 func (p *addrParser) skipCFWS() bool {
704 p.skipSpace()
706 for {
707 if !p.consume('(') {
708 break
711 if _, ok := p.consumeComment(); !ok {
712 return false
715 p.skipSpace()
718 return true
721 func (p *addrParser) consumeComment() (string, bool) {
722 // '(' already consumed.
723 depth := 1
725 var comment string
726 for {
727 if p.empty() || depth == 0 {
728 break
731 if p.peek() == '\\' && p.len() > 1 {
732 p.s = p.s[1:]
733 } else if p.peek() == '(' {
734 depth++
735 } else if p.peek() == ')' {
736 depth--
738 if depth > 0 {
739 comment += p.s[:1]
741 p.s = p.s[1:]
744 return comment, depth == 0
747 func (p *addrParser) decodeRFC2047Word(s string) (word string, isEncoded bool, err error) {
748 if p.dec != nil {
749 word, err = p.dec.Decode(s)
750 } else {
751 word, err = rfc2047Decoder.Decode(s)
754 if err == nil {
755 return word, true, nil
758 if _, ok := err.(charsetError); ok {
759 return s, true, err
762 // Ignore invalid RFC 2047 encoded-word errors.
763 return s, false, nil
766 var rfc2047Decoder = mime.WordDecoder{
767 CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
768 return nil, charsetError(charset)
772 type charsetError string
774 func (e charsetError) Error() string {
775 return fmt.Sprintf("charset not supported: %q", string(e))
778 // isAtext reports whether r is an RFC 5322 atext character.
779 // If dot is true, period is included.
780 // If permissive is true, RFC 5322 3.2.3 specials is included,
781 // except '<', '>', ':' and '"'.
782 func isAtext(r rune, dot, permissive bool) bool {
783 switch r {
784 case '.':
785 return dot
787 // RFC 5322 3.2.3. specials
788 case '(', ')', '[', ']', ';', '@', '\\', ',':
789 return permissive
791 case '<', '>', '"', ':':
792 return false
794 return isVchar(r)
797 // isQtext reports whether r is an RFC 5322 qtext character.
798 func isQtext(r rune) bool {
799 // Printable US-ASCII, excluding backslash or quote.
800 if r == '\\' || r == '"' {
801 return false
803 return isVchar(r)
806 // quoteString renders a string as an RFC 5322 quoted-string.
807 func quoteString(s string) string {
808 var buf strings.Builder
809 buf.WriteByte('"')
810 for _, r := range s {
811 if isQtext(r) || isWSP(r) {
812 buf.WriteRune(r)
813 } else if isVchar(r) {
814 buf.WriteByte('\\')
815 buf.WriteRune(r)
818 buf.WriteByte('"')
819 return buf.String()
822 // isVchar reports whether r is an RFC 5322 VCHAR character.
823 func isVchar(r rune) bool {
824 // Visible (printing) characters.
825 return '!' <= r && r <= '~' || isMultibyte(r)
828 // isMultibyte reports whether r is a multi-byte UTF-8 character
829 // as supported by RFC 6532
830 func isMultibyte(r rune) bool {
831 return r >= utf8.RuneSelf
834 // isWSP reports whether r is a WSP (white space).
835 // WSP is a space or horizontal tab (RFC 5234 Appendix B).
836 func isWSP(r rune) bool {
837 return r == ' ' || r == '\t'