libgo: update to Go 1.11
[official-gcc.git] / libgo / go / net / textproto / reader.go
blobfeb464b2f284ec1b8db79319084f9ff71b13deaa
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package textproto
7 import (
8 "bufio"
9 "bytes"
10 "io"
11 "io/ioutil"
12 "strconv"
13 "strings"
16 // A Reader implements convenience methods for reading requests
17 // or responses from a text protocol network connection.
18 type Reader struct {
19 R *bufio.Reader
20 dot *dotReader
21 buf []byte // a re-usable buffer for readContinuedLineSlice
24 // NewReader returns a new Reader reading from r.
26 // To avoid denial of service attacks, the provided bufio.Reader
27 // should be reading from an io.LimitReader or similar Reader to bound
28 // the size of responses.
29 func NewReader(r *bufio.Reader) *Reader {
30 return &Reader{R: r}
33 // ReadLine reads a single line from r,
34 // eliding the final \n or \r\n from the returned string.
35 func (r *Reader) ReadLine() (string, error) {
36 line, err := r.readLineSlice()
37 return string(line), err
40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
41 func (r *Reader) ReadLineBytes() ([]byte, error) {
42 line, err := r.readLineSlice()
43 if line != nil {
44 buf := make([]byte, len(line))
45 copy(buf, line)
46 line = buf
48 return line, err
51 func (r *Reader) readLineSlice() ([]byte, error) {
52 r.closeDot()
53 var line []byte
54 for {
55 l, more, err := r.R.ReadLine()
56 if err != nil {
57 return nil, err
59 // Avoid the copy if the first call produced a full line.
60 if line == nil && !more {
61 return l, nil
63 line = append(line, l...)
64 if !more {
65 break
68 return line, nil
71 // ReadContinuedLine reads a possibly continued line from r,
72 // eliding the final trailing ASCII white space.
73 // Lines after the first are considered continuations if they
74 // begin with a space or tab character. In the returned data,
75 // continuation lines are separated from the previous line
76 // only by a single space: the newline and leading white space
77 // are removed.
79 // For example, consider this input:
81 // Line 1
82 // continued...
83 // Line 2
85 // The first call to ReadContinuedLine will return "Line 1 continued..."
86 // and the second will return "Line 2".
88 // A line consisting of only white space is never continued.
90 func (r *Reader) ReadContinuedLine() (string, error) {
91 line, err := r.readContinuedLineSlice()
92 return string(line), err
95 // trim returns s with leading and trailing spaces and tabs removed.
96 // It does not assume Unicode or UTF-8.
97 func trim(s []byte) []byte {
98 i := 0
99 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
102 n := len(s)
103 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
106 return s[i:n]
109 // ReadContinuedLineBytes is like ReadContinuedLine but
110 // returns a []byte instead of a string.
111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
112 line, err := r.readContinuedLineSlice()
113 if line != nil {
114 buf := make([]byte, len(line))
115 copy(buf, line)
116 line = buf
118 return line, err
121 func (r *Reader) readContinuedLineSlice() ([]byte, error) {
122 // Read the first line.
123 line, err := r.readLineSlice()
124 if err != nil {
125 return nil, err
127 if len(line) == 0 { // blank line - no continuation
128 return line, nil
131 // Optimistically assume that we have started to buffer the next line
132 // and it starts with an ASCII letter (the next header key), so we can
133 // avoid copying that buffered data around in memory and skipping over
134 // non-existent whitespace.
135 if r.R.Buffered() > 1 {
136 peek, err := r.R.Peek(1)
137 if err == nil && isASCIILetter(peek[0]) {
138 return trim(line), nil
142 // ReadByte or the next readLineSlice will flush the read buffer;
143 // copy the slice into buf.
144 r.buf = append(r.buf[:0], trim(line)...)
146 // Read continuation lines.
147 for r.skipSpace() > 0 {
148 line, err := r.readLineSlice()
149 if err != nil {
150 break
152 r.buf = append(r.buf, ' ')
153 r.buf = append(r.buf, trim(line)...)
155 return r.buf, nil
158 // skipSpace skips R over all spaces and returns the number of bytes skipped.
159 func (r *Reader) skipSpace() int {
160 n := 0
161 for {
162 c, err := r.R.ReadByte()
163 if err != nil {
164 // Bufio will keep err until next read.
165 break
167 if c != ' ' && c != '\t' {
168 r.R.UnreadByte()
169 break
173 return n
176 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
177 line, err := r.ReadLine()
178 if err != nil {
179 return
181 return parseCodeLine(line, expectCode)
184 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
185 if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
186 err = ProtocolError("short response: " + line)
187 return
189 continued = line[3] == '-'
190 code, err = strconv.Atoi(line[0:3])
191 if err != nil || code < 100 {
192 err = ProtocolError("invalid response code: " + line)
193 return
195 message = line[4:]
196 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
197 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
198 100 <= expectCode && expectCode < 1000 && code != expectCode {
199 err = &Error{code, message}
201 return
204 // ReadCodeLine reads a response code line of the form
205 // code message
206 // where code is a three-digit status code and the message
207 // extends to the rest of the line. An example of such a line is:
208 // 220 plan9.bell-labs.com ESMTP
210 // If the prefix of the status does not match the digits in expectCode,
211 // ReadCodeLine returns with err set to &Error{code, message}.
212 // For example, if expectCode is 31, an error will be returned if
213 // the status is not in the range [310,319].
215 // If the response is multi-line, ReadCodeLine returns an error.
217 // An expectCode <= 0 disables the check of the status code.
219 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
220 code, continued, message, err := r.readCodeLine(expectCode)
221 if err == nil && continued {
222 err = ProtocolError("unexpected multi-line response: " + message)
224 return
227 // ReadResponse reads a multi-line response of the form:
229 // code-message line 1
230 // code-message line 2
231 // ...
232 // code message line n
234 // where code is a three-digit status code. The first line starts with the
235 // code and a hyphen. The response is terminated by a line that starts
236 // with the same code followed by a space. Each line in message is
237 // separated by a newline (\n).
239 // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
240 // details of another form of response accepted:
242 // code-message line 1
243 // message line 2
244 // ...
245 // code message line n
247 // If the prefix of the status does not match the digits in expectCode,
248 // ReadResponse returns with err set to &Error{code, message}.
249 // For example, if expectCode is 31, an error will be returned if
250 // the status is not in the range [310,319].
252 // An expectCode <= 0 disables the check of the status code.
254 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
255 code, continued, message, err := r.readCodeLine(expectCode)
256 multi := continued
257 for continued {
258 line, err := r.ReadLine()
259 if err != nil {
260 return 0, "", err
263 var code2 int
264 var moreMessage string
265 code2, continued, moreMessage, err = parseCodeLine(line, 0)
266 if err != nil || code2 != code {
267 message += "\n" + strings.TrimRight(line, "\r\n")
268 continued = true
269 continue
271 message += "\n" + moreMessage
273 if err != nil && multi && message != "" {
274 // replace one line error message with all lines (full message)
275 err = &Error{code, message}
277 return
280 // DotReader returns a new Reader that satisfies Reads using the
281 // decoded text of a dot-encoded block read from r.
282 // The returned Reader is only valid until the next call
283 // to a method on r.
285 // Dot encoding is a common framing used for data blocks
286 // in text protocols such as SMTP. The data consists of a sequence
287 // of lines, each of which ends in "\r\n". The sequence itself
288 // ends at a line containing just a dot: ".\r\n". Lines beginning
289 // with a dot are escaped with an additional dot to avoid
290 // looking like the end of the sequence.
292 // The decoded form returned by the Reader's Read method
293 // rewrites the "\r\n" line endings into the simpler "\n",
294 // removes leading dot escapes if present, and stops with error io.EOF
295 // after consuming (and discarding) the end-of-sequence line.
296 func (r *Reader) DotReader() io.Reader {
297 r.closeDot()
298 r.dot = &dotReader{r: r}
299 return r.dot
302 type dotReader struct {
303 r *Reader
304 state int
307 // Read satisfies reads by decoding dot-encoded data read from d.r.
308 func (d *dotReader) Read(b []byte) (n int, err error) {
309 // Run data through a simple state machine to
310 // elide leading dots, rewrite trailing \r\n into \n,
311 // and detect ending .\r\n line.
312 const (
313 stateBeginLine = iota // beginning of line; initial state; must be zero
314 stateDot // read . at beginning of line
315 stateDotCR // read .\r at beginning of line
316 stateCR // read \r (possibly at end of line)
317 stateData // reading data in middle of line
318 stateEOF // reached .\r\n end marker line
320 br := d.r.R
321 for n < len(b) && d.state != stateEOF {
322 var c byte
323 c, err = br.ReadByte()
324 if err != nil {
325 if err == io.EOF {
326 err = io.ErrUnexpectedEOF
328 break
330 switch d.state {
331 case stateBeginLine:
332 if c == '.' {
333 d.state = stateDot
334 continue
336 if c == '\r' {
337 d.state = stateCR
338 continue
340 d.state = stateData
342 case stateDot:
343 if c == '\r' {
344 d.state = stateDotCR
345 continue
347 if c == '\n' {
348 d.state = stateEOF
349 continue
351 d.state = stateData
353 case stateDotCR:
354 if c == '\n' {
355 d.state = stateEOF
356 continue
358 // Not part of .\r\n.
359 // Consume leading dot and emit saved \r.
360 br.UnreadByte()
361 c = '\r'
362 d.state = stateData
364 case stateCR:
365 if c == '\n' {
366 d.state = stateBeginLine
367 break
369 // Not part of \r\n. Emit saved \r
370 br.UnreadByte()
371 c = '\r'
372 d.state = stateData
374 case stateData:
375 if c == '\r' {
376 d.state = stateCR
377 continue
379 if c == '\n' {
380 d.state = stateBeginLine
383 b[n] = c
386 if err == nil && d.state == stateEOF {
387 err = io.EOF
389 if err != nil && d.r.dot == d {
390 d.r.dot = nil
392 return
395 // closeDot drains the current DotReader if any,
396 // making sure that it reads until the ending dot line.
397 func (r *Reader) closeDot() {
398 if r.dot == nil {
399 return
401 buf := make([]byte, 128)
402 for r.dot != nil {
403 // When Read reaches EOF or an error,
404 // it will set r.dot == nil.
405 r.dot.Read(buf)
409 // ReadDotBytes reads a dot-encoding and returns the decoded data.
411 // See the documentation for the DotReader method for details about dot-encoding.
412 func (r *Reader) ReadDotBytes() ([]byte, error) {
413 return ioutil.ReadAll(r.DotReader())
416 // ReadDotLines reads a dot-encoding and returns a slice
417 // containing the decoded lines, with the final \r\n or \n elided from each.
419 // See the documentation for the DotReader method for details about dot-encoding.
420 func (r *Reader) ReadDotLines() ([]string, error) {
421 // We could use ReadDotBytes and then Split it,
422 // but reading a line at a time avoids needing a
423 // large contiguous block of memory and is simpler.
424 var v []string
425 var err error
426 for {
427 var line string
428 line, err = r.ReadLine()
429 if err != nil {
430 if err == io.EOF {
431 err = io.ErrUnexpectedEOF
433 break
436 // Dot by itself marks end; otherwise cut one dot.
437 if len(line) > 0 && line[0] == '.' {
438 if len(line) == 1 {
439 break
441 line = line[1:]
443 v = append(v, line)
445 return v, err
448 // ReadMIMEHeader reads a MIME-style header from r.
449 // The header is a sequence of possibly continued Key: Value lines
450 // ending in a blank line.
451 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
452 // sequence of values in the same order encountered in the input.
454 // For example, consider this input:
456 // My-Key: Value 1
457 // Long-Key: Even
458 // Longer Value
459 // My-Key: Value 2
461 // Given that input, ReadMIMEHeader returns the map:
463 // map[string][]string{
464 // "My-Key": {"Value 1", "Value 2"},
465 // "Long-Key": {"Even Longer Value"},
466 // }
468 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
469 // Avoid lots of small slice allocations later by allocating one
470 // large one ahead of time which we'll cut up into smaller
471 // slices. If this isn't big enough later, we allocate small ones.
472 var strs []string
473 hint := r.upcomingHeaderNewlines()
474 if hint > 0 {
475 strs = make([]string, hint)
478 m := make(MIMEHeader, hint)
480 // The first line cannot start with a leading space.
481 if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
482 line, err := r.readLineSlice()
483 if err != nil {
484 return m, err
486 return m, ProtocolError("malformed MIME header initial line: " + string(line))
489 for {
490 kv, err := r.readContinuedLineSlice()
491 if len(kv) == 0 {
492 return m, err
495 // Key ends at first colon; should not have trailing spaces
496 // but they appear in the wild, violating specs, so we remove
497 // them if present.
498 i := bytes.IndexByte(kv, ':')
499 if i < 0 {
500 return m, ProtocolError("malformed MIME header line: " + string(kv))
502 endKey := i
503 for endKey > 0 && kv[endKey-1] == ' ' {
504 endKey--
506 key := canonicalMIMEHeaderKey(kv[:endKey])
508 // As per RFC 7230 field-name is a token, tokens consist of one or more chars.
509 // We could return a ProtocolError here, but better to be liberal in what we
510 // accept, so if we get an empty key, skip it.
511 if key == "" {
512 continue
515 // Skip initial spaces in value.
516 i++ // skip colon
517 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
520 value := string(kv[i:])
522 vv := m[key]
523 if vv == nil && len(strs) > 0 {
524 // More than likely this will be a single-element key.
525 // Most headers aren't multi-valued.
526 // Set the capacity on strs[0] to 1, so any future append
527 // won't extend the slice into the other strings.
528 vv, strs = strs[:1:1], strs[1:]
529 vv[0] = value
530 m[key] = vv
531 } else {
532 m[key] = append(vv, value)
535 if err != nil {
536 return m, err
541 // upcomingHeaderNewlines returns an approximation of the number of newlines
542 // that will be in this header. If it gets confused, it returns 0.
543 func (r *Reader) upcomingHeaderNewlines() (n int) {
544 // Try to determine the 'hint' size.
545 r.R.Peek(1) // force a buffer load if empty
546 s := r.R.Buffered()
547 if s == 0 {
548 return
550 peek, _ := r.R.Peek(s)
551 for len(peek) > 0 {
552 i := bytes.IndexByte(peek, '\n')
553 if i < 3 {
554 // Not present (-1) or found within the next few bytes,
555 // implying we're at the end ("\r\n\r\n" or "\n\n")
556 return
559 peek = peek[i+1:]
561 return
564 // CanonicalMIMEHeaderKey returns the canonical format of the
565 // MIME header key s. The canonicalization converts the first
566 // letter and any letter following a hyphen to upper case;
567 // the rest are converted to lowercase. For example, the
568 // canonical key for "accept-encoding" is "Accept-Encoding".
569 // MIME header keys are assumed to be ASCII only.
570 // If s contains a space or invalid header field bytes, it is
571 // returned without modifications.
572 func CanonicalMIMEHeaderKey(s string) string {
573 // Quick check for canonical encoding.
574 upper := true
575 for i := 0; i < len(s); i++ {
576 c := s[i]
577 if !validHeaderFieldByte(c) {
578 return s
580 if upper && 'a' <= c && c <= 'z' {
581 return canonicalMIMEHeaderKey([]byte(s))
583 if !upper && 'A' <= c && c <= 'Z' {
584 return canonicalMIMEHeaderKey([]byte(s))
586 upper = c == '-'
588 return s
591 const toLower = 'a' - 'A'
593 // validHeaderFieldByte reports whether b is a valid byte in a header
594 // field name. RFC 7230 says:
595 // header-field = field-name ":" OWS field-value OWS
596 // field-name = token
597 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
598 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
599 // token = 1*tchar
600 func validHeaderFieldByte(b byte) bool {
601 return int(b) < len(isTokenTable) && isTokenTable[b]
604 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
605 // allowed to mutate the provided byte slice before returning the
606 // string.
608 // For invalid inputs (if a contains spaces or non-token bytes), a
609 // is unchanged and a string copy is returned.
610 func canonicalMIMEHeaderKey(a []byte) string {
611 // See if a looks like a header key. If not, return it unchanged.
612 for _, c := range a {
613 if validHeaderFieldByte(c) {
614 continue
616 // Don't canonicalize.
617 return string(a)
620 upper := true
621 for i, c := range a {
622 // Canonicalize: first letter upper case
623 // and upper case after each dash.
624 // (Host, User-Agent, If-Modified-Since).
625 // MIME headers are ASCII only, so no Unicode issues.
626 if upper && 'a' <= c && c <= 'z' {
627 c -= toLower
628 } else if !upper && 'A' <= c && c <= 'Z' {
629 c += toLower
631 a[i] = c
632 upper = c == '-' // for next time
634 // The compiler recognizes m[string(byteSlice)] as a special
635 // case, so a copy of a's bytes into a new string does not
636 // happen in this map lookup:
637 if v := commonHeader[string(a)]; v != "" {
638 return v
640 return string(a)
643 // commonHeader interns common header strings.
644 var commonHeader = make(map[string]string)
646 func init() {
647 for _, v := range []string{
648 "Accept",
649 "Accept-Charset",
650 "Accept-Encoding",
651 "Accept-Language",
652 "Accept-Ranges",
653 "Cache-Control",
654 "Cc",
655 "Connection",
656 "Content-Id",
657 "Content-Language",
658 "Content-Length",
659 "Content-Transfer-Encoding",
660 "Content-Type",
661 "Cookie",
662 "Date",
663 "Dkim-Signature",
664 "Etag",
665 "Expires",
666 "From",
667 "Host",
668 "If-Modified-Since",
669 "If-None-Match",
670 "In-Reply-To",
671 "Last-Modified",
672 "Location",
673 "Message-Id",
674 "Mime-Version",
675 "Pragma",
676 "Received",
677 "Return-Path",
678 "Server",
679 "Set-Cookie",
680 "Subject",
681 "To",
682 "User-Agent",
683 "Via",
684 "X-Forwarded-For",
685 "X-Imforwards",
686 "X-Powered-By",
688 commonHeader[v] = v
692 // isTokenTable is a copy of net/http/lex.go's isTokenTable.
693 // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
694 var isTokenTable = [127]bool{
695 '!': true,
696 '#': true,
697 '$': true,
698 '%': true,
699 '&': true,
700 '\'': true,
701 '*': true,
702 '+': true,
703 '-': true,
704 '.': true,
705 '0': true,
706 '1': true,
707 '2': true,
708 '3': true,
709 '4': true,
710 '5': true,
711 '6': true,
712 '7': true,
713 '8': true,
714 '9': true,
715 'A': true,
716 'B': true,
717 'C': true,
718 'D': true,
719 'E': true,
720 'F': true,
721 'G': true,
722 'H': true,
723 'I': true,
724 'J': true,
725 'K': true,
726 'L': true,
727 'M': true,
728 'N': true,
729 'O': true,
730 'P': true,
731 'Q': true,
732 'R': true,
733 'S': true,
734 'T': true,
735 'U': true,
736 'W': true,
737 'V': true,
738 'X': true,
739 'Y': true,
740 'Z': true,
741 '^': true,
742 '_': true,
743 '`': true,
744 'a': true,
745 'b': true,
746 'c': true,
747 'd': true,
748 'e': true,
749 'f': true,
750 'g': true,
751 'h': true,
752 'i': true,
753 'j': true,
754 'k': true,
755 'l': true,
756 'm': true,
757 'n': true,
758 'o': true,
759 'p': true,
760 'q': true,
761 'r': true,
762 's': true,
763 't': true,
764 'u': true,
765 'v': true,
766 'w': true,
767 'x': true,
768 'y': true,
769 'z': true,
770 '|': true,
771 '~': true,