libgo: Update to Go 1.3 release.
[official-gcc.git] / libgo / go / net / mail / message.go
blobba0778caa73af448b960f71557b239f9f2debfd4
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 /*
6 Package mail implements parsing of mail messages.
8 For the most part, this package follows the syntax as specified by RFC 5322.
9 Notable divergences:
10 * Obsolete address formats are not parsed, including addresses with
11 embedded route information.
12 * Group addresses are not parsed.
13 * The full range of spacing (the CFWS syntax element) is not supported,
14 such as breaking addresses across lines.
16 package mail
18 import (
19 "bufio"
20 "bytes"
21 "encoding/base64"
22 "errors"
23 "fmt"
24 "io"
25 "io/ioutil"
26 "log"
27 "net/textproto"
28 "strconv"
29 "strings"
30 "time"
33 var debug = debugT(false)
35 type debugT bool
37 func (d debugT) Printf(format string, args ...interface{}) {
38 if d {
39 log.Printf(format, args...)
43 // A Message represents a parsed mail message.
44 type Message struct {
45 Header Header
46 Body io.Reader
49 // ReadMessage reads a message from r.
50 // The headers are parsed, and the body of the message will be available
51 // for reading from r.
52 func ReadMessage(r io.Reader) (msg *Message, err error) {
53 tp := textproto.NewReader(bufio.NewReader(r))
55 hdr, err := tp.ReadMIMEHeader()
56 if err != nil {
57 return nil, err
60 return &Message{
61 Header: Header(hdr),
62 Body: tp.R,
63 }, nil
66 // Layouts suitable for passing to time.Parse.
67 // These are tried in order.
68 var dateLayouts []string
70 func init() {
71 // Generate layouts based on RFC 5322, section 3.3.
73 dows := [...]string{"", "Mon, "} // day-of-week
74 days := [...]string{"2", "02"} // day = 1*2DIGIT
75 years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT
76 seconds := [...]string{":05", ""} // second
77 // "-0700 (MST)" is not in RFC 5322, but is common.
78 zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ...
80 for _, dow := range dows {
81 for _, day := range days {
82 for _, year := range years {
83 for _, second := range seconds {
84 for _, zone := range zones {
85 s := dow + day + " Jan " + year + " 15:04" + second + " " + zone
86 dateLayouts = append(dateLayouts, s)
94 func parseDate(date string) (time.Time, error) {
95 for _, layout := range dateLayouts {
96 t, err := time.Parse(layout, date)
97 if err == nil {
98 return t, nil
101 return time.Time{}, errors.New("mail: header could not be parsed")
104 // A Header represents the key-value pairs in a mail message header.
105 type Header map[string][]string
107 // Get gets the first value associated with the given key.
108 // If there are no values associated with the key, Get returns "".
109 func (h Header) Get(key string) string {
110 return textproto.MIMEHeader(h).Get(key)
113 var ErrHeaderNotPresent = errors.New("mail: header not in message")
115 // Date parses the Date header field.
116 func (h Header) Date() (time.Time, error) {
117 hdr := h.Get("Date")
118 if hdr == "" {
119 return time.Time{}, ErrHeaderNotPresent
121 return parseDate(hdr)
124 // AddressList parses the named header field as a list of addresses.
125 func (h Header) AddressList(key string) ([]*Address, error) {
126 hdr := h.Get(key)
127 if hdr == "" {
128 return nil, ErrHeaderNotPresent
130 return ParseAddressList(hdr)
133 // Address represents a single mail address.
134 // An address such as "Barry Gibbs <bg@example.com>" is represented
135 // as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
136 type Address struct {
137 Name string // Proper name; may be empty.
138 Address string // user@domain
141 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
142 func ParseAddress(address string) (*Address, error) {
143 return newAddrParser(address).parseAddress()
146 // ParseAddressList parses the given string as a list of addresses.
147 func ParseAddressList(list string) ([]*Address, error) {
148 return newAddrParser(list).parseAddressList()
151 // String formats the address as a valid RFC 5322 address.
152 // If the address's name contains non-ASCII characters
153 // the name will be rendered according to RFC 2047.
154 func (a *Address) String() string {
155 s := "<" + a.Address + ">"
156 if a.Name == "" {
157 return s
159 // If every character is printable ASCII, quoting is simple.
160 allPrintable := true
161 for i := 0; i < len(a.Name); i++ {
162 // isWSP here should actually be isFWS,
163 // but we don't support folding yet.
164 if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) {
165 allPrintable = false
166 break
169 if allPrintable {
170 b := bytes.NewBufferString(`"`)
171 for i := 0; i < len(a.Name); i++ {
172 if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
173 b.WriteByte('\\')
175 b.WriteByte(a.Name[i])
177 b.WriteString(`" `)
178 b.WriteString(s)
179 return b.String()
182 // UTF-8 "Q" encoding
183 b := bytes.NewBufferString("=?utf-8?q?")
184 for i := 0; i < len(a.Name); i++ {
185 switch c := a.Name[i]; {
186 case c == ' ':
187 b.WriteByte('_')
188 case isVchar(c) && c != '=' && c != '?' && c != '_':
189 b.WriteByte(c)
190 default:
191 fmt.Fprintf(b, "=%02X", c)
194 b.WriteString("?= ")
195 b.WriteString(s)
196 return b.String()
199 type addrParser []byte
201 func newAddrParser(s string) *addrParser {
202 p := addrParser(s)
203 return &p
206 func (p *addrParser) parseAddressList() ([]*Address, error) {
207 var list []*Address
208 for {
209 p.skipSpace()
210 addr, err := p.parseAddress()
211 if err != nil {
212 return nil, err
214 list = append(list, addr)
216 p.skipSpace()
217 if p.empty() {
218 break
220 if !p.consume(',') {
221 return nil, errors.New("mail: expected comma")
224 return list, nil
227 // parseAddress parses a single RFC 5322 address at the start of p.
228 func (p *addrParser) parseAddress() (addr *Address, err error) {
229 debug.Printf("parseAddress: %q", *p)
230 p.skipSpace()
231 if p.empty() {
232 return nil, errors.New("mail: no address")
235 // address = name-addr / addr-spec
236 // TODO(dsymonds): Support parsing group address.
238 // addr-spec has a more restricted grammar than name-addr,
239 // so try parsing it first, and fallback to name-addr.
240 // TODO(dsymonds): Is this really correct?
241 spec, err := p.consumeAddrSpec()
242 if err == nil {
243 return &Address{
244 Address: spec,
245 }, err
247 debug.Printf("parseAddress: not an addr-spec: %v", err)
248 debug.Printf("parseAddress: state is now %q", *p)
250 // display-name
251 var displayName string
252 if p.peek() != '<' {
253 displayName, err = p.consumePhrase()
254 if err != nil {
255 return nil, err
258 debug.Printf("parseAddress: displayName=%q", displayName)
260 // angle-addr = "<" addr-spec ">"
261 p.skipSpace()
262 if !p.consume('<') {
263 return nil, errors.New("mail: no angle-addr")
265 spec, err = p.consumeAddrSpec()
266 if err != nil {
267 return nil, err
269 if !p.consume('>') {
270 return nil, errors.New("mail: unclosed angle-addr")
272 debug.Printf("parseAddress: spec=%q", spec)
274 return &Address{
275 Name: displayName,
276 Address: spec,
277 }, nil
280 // consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
281 func (p *addrParser) consumeAddrSpec() (spec string, err error) {
282 debug.Printf("consumeAddrSpec: %q", *p)
284 orig := *p
285 defer func() {
286 if err != nil {
287 *p = orig
291 // local-part = dot-atom / quoted-string
292 var localPart string
293 p.skipSpace()
294 if p.empty() {
295 return "", errors.New("mail: no addr-spec")
297 if p.peek() == '"' {
298 // quoted-string
299 debug.Printf("consumeAddrSpec: parsing quoted-string")
300 localPart, err = p.consumeQuotedString()
301 } else {
302 // dot-atom
303 debug.Printf("consumeAddrSpec: parsing dot-atom")
304 localPart, err = p.consumeAtom(true)
306 if err != nil {
307 debug.Printf("consumeAddrSpec: failed: %v", err)
308 return "", err
311 if !p.consume('@') {
312 return "", errors.New("mail: missing @ in addr-spec")
315 // domain = dot-atom / domain-literal
316 var domain string
317 p.skipSpace()
318 if p.empty() {
319 return "", errors.New("mail: no domain in addr-spec")
321 // TODO(dsymonds): Handle domain-literal
322 domain, err = p.consumeAtom(true)
323 if err != nil {
324 return "", err
327 return localPart + "@" + domain, nil
330 // consumePhrase parses the RFC 5322 phrase at the start of p.
331 func (p *addrParser) consumePhrase() (phrase string, err error) {
332 debug.Printf("consumePhrase: [%s]", *p)
333 // phrase = 1*word
334 var words []string
335 for {
336 // word = atom / quoted-string
337 var word string
338 p.skipSpace()
339 if p.empty() {
340 return "", errors.New("mail: missing phrase")
342 if p.peek() == '"' {
343 // quoted-string
344 word, err = p.consumeQuotedString()
345 } else {
346 // atom
347 // We actually parse dot-atom here to be more permissive
348 // than what RFC 5322 specifies.
349 word, err = p.consumeAtom(true)
352 // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s.
353 if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 {
354 word, err = decodeRFC2047Word(word)
357 if err != nil {
358 break
360 debug.Printf("consumePhrase: consumed %q", word)
361 words = append(words, word)
363 // Ignore any error if we got at least one word.
364 if err != nil && len(words) == 0 {
365 debug.Printf("consumePhrase: hit err: %v", err)
366 return "", fmt.Errorf("mail: missing word in phrase: %v", err)
368 phrase = strings.Join(words, " ")
369 return phrase, nil
372 // consumeQuotedString parses the quoted string at the start of p.
373 func (p *addrParser) consumeQuotedString() (qs string, err error) {
374 // Assume first byte is '"'.
375 i := 1
376 qsb := make([]byte, 0, 10)
377 Loop:
378 for {
379 if i >= p.len() {
380 return "", errors.New("mail: unclosed quoted-string")
382 switch c := (*p)[i]; {
383 case c == '"':
384 break Loop
385 case c == '\\':
386 if i+1 == p.len() {
387 return "", errors.New("mail: unclosed quoted-string")
389 qsb = append(qsb, (*p)[i+1])
390 i += 2
391 case isQtext(c), c == ' ' || c == '\t':
392 // qtext (printable US-ASCII excluding " and \), or
393 // FWS (almost; we're ignoring CRLF)
394 qsb = append(qsb, c)
396 default:
397 return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
400 *p = (*p)[i+1:]
401 return string(qsb), nil
404 // consumeAtom parses an RFC 5322 atom at the start of p.
405 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
406 func (p *addrParser) consumeAtom(dot bool) (atom string, err error) {
407 if !isAtext(p.peek(), false) {
408 return "", errors.New("mail: invalid string")
410 i := 1
411 for ; i < p.len() && isAtext((*p)[i], dot); i++ {
413 atom, *p = string((*p)[:i]), (*p)[i:]
414 return atom, nil
417 func (p *addrParser) consume(c byte) bool {
418 if p.empty() || p.peek() != c {
419 return false
421 *p = (*p)[1:]
422 return true
425 // skipSpace skips the leading space and tab characters.
426 func (p *addrParser) skipSpace() {
427 *p = bytes.TrimLeft(*p, " \t")
430 func (p *addrParser) peek() byte {
431 return (*p)[0]
434 func (p *addrParser) empty() bool {
435 return p.len() == 0
438 func (p *addrParser) len() int {
439 return len(*p)
442 func decodeRFC2047Word(s string) (string, error) {
443 fields := strings.Split(s, "?")
444 if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" {
445 return "", errors.New("address not RFC 2047 encoded")
447 charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2])
448 if charset != "iso-8859-1" && charset != "utf-8" {
449 return "", fmt.Errorf("charset not supported: %q", charset)
452 in := bytes.NewBufferString(fields[3])
453 var r io.Reader
454 switch enc {
455 case "b":
456 r = base64.NewDecoder(base64.StdEncoding, in)
457 case "q":
458 r = qDecoder{r: in}
459 default:
460 return "", fmt.Errorf("RFC 2047 encoding not supported: %q", enc)
463 dec, err := ioutil.ReadAll(r)
464 if err != nil {
465 return "", err
468 switch charset {
469 case "iso-8859-1":
470 b := new(bytes.Buffer)
471 for _, c := range dec {
472 b.WriteRune(rune(c))
474 return b.String(), nil
475 case "utf-8":
476 return string(dec), nil
478 panic("unreachable")
481 type qDecoder struct {
482 r io.Reader
483 scratch [2]byte
486 func (qd qDecoder) Read(p []byte) (n int, err error) {
487 // This method writes at most one byte into p.
488 if len(p) == 0 {
489 return 0, nil
491 if _, err := qd.r.Read(qd.scratch[:1]); err != nil {
492 return 0, err
494 switch c := qd.scratch[0]; {
495 case c == '=':
496 if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil {
497 return 0, err
499 x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64)
500 if err != nil {
501 return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2])
503 p[0] = byte(x)
504 case c == '_':
505 p[0] = ' '
506 default:
507 p[0] = c
509 return 1, nil
512 var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
513 "abcdefghijklmnopqrstuvwxyz" +
514 "0123456789" +
515 "!#$%&'*+-/=?^_`{|}~")
517 // isAtext returns true if c is an RFC 5322 atext character.
518 // If dot is true, period is included.
519 func isAtext(c byte, dot bool) bool {
520 if dot && c == '.' {
521 return true
523 return bytes.IndexByte(atextChars, c) >= 0
526 // isQtext returns true if c is an RFC 5322 qtext character.
527 func isQtext(c byte) bool {
528 // Printable US-ASCII, excluding backslash or quote.
529 if c == '\\' || c == '"' {
530 return false
532 return '!' <= c && c <= '~'
535 // isVchar returns true if c is an RFC 5322 VCHAR character.
536 func isVchar(c byte) bool {
537 // Visible (printing) characters.
538 return '!' <= c && c <= '~'
541 // isWSP returns true if c is a WSP (white space).
542 // WSP is a space or horizontal tab (RFC5234 Appendix B).
543 func isWSP(c byte) bool {
544 return c == ' ' || c == '\t'