Daily bump.
[official-gcc.git] / libgo / go / net / textproto / reader.go
blob157c59b17accfb2a48ca5ace16bf1d214ebe388c
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package textproto
7 import (
8 "bufio"
9 "bytes"
10 "fmt"
11 "io"
12 "strconv"
13 "strings"
14 "sync"
17 // A Reader implements convenience methods for reading requests
18 // or responses from a text protocol network connection.
19 type Reader struct {
20 R *bufio.Reader
21 dot *dotReader
22 buf []byte // a re-usable buffer for readContinuedLineSlice
25 // NewReader returns a new Reader reading from r.
27 // To avoid denial of service attacks, the provided bufio.Reader
28 // should be reading from an io.LimitReader or similar Reader to bound
29 // the size of responses.
30 func NewReader(r *bufio.Reader) *Reader {
31 commonHeaderOnce.Do(initCommonHeader)
32 return &Reader{R: r}
35 // ReadLine reads a single line from r,
36 // eliding the final \n or \r\n from the returned string.
37 func (r *Reader) ReadLine() (string, error) {
38 line, err := r.readLineSlice()
39 return string(line), err
42 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
43 func (r *Reader) ReadLineBytes() ([]byte, error) {
44 line, err := r.readLineSlice()
45 if line != nil {
46 buf := make([]byte, len(line))
47 copy(buf, line)
48 line = buf
50 return line, err
53 func (r *Reader) readLineSlice() ([]byte, error) {
54 r.closeDot()
55 var line []byte
56 for {
57 l, more, err := r.R.ReadLine()
58 if err != nil {
59 return nil, err
61 // Avoid the copy if the first call produced a full line.
62 if line == nil && !more {
63 return l, nil
65 line = append(line, l...)
66 if !more {
67 break
70 return line, nil
73 // ReadContinuedLine reads a possibly continued line from r,
74 // eliding the final trailing ASCII white space.
75 // Lines after the first are considered continuations if they
76 // begin with a space or tab character. In the returned data,
77 // continuation lines are separated from the previous line
78 // only by a single space: the newline and leading white space
79 // are removed.
81 // For example, consider this input:
83 // Line 1
84 // continued...
85 // Line 2
87 // The first call to ReadContinuedLine will return "Line 1 continued..."
88 // and the second will return "Line 2".
90 // Empty lines are never continued.
92 func (r *Reader) ReadContinuedLine() (string, error) {
93 line, err := r.readContinuedLineSlice(noValidation)
94 return string(line), err
97 // trim returns s with leading and trailing spaces and tabs removed.
98 // It does not assume Unicode or UTF-8.
99 func trim(s []byte) []byte {
100 i := 0
101 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
104 n := len(s)
105 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
108 return s[i:n]
111 // ReadContinuedLineBytes is like ReadContinuedLine but
112 // returns a []byte instead of a string.
113 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
114 line, err := r.readContinuedLineSlice(noValidation)
115 if line != nil {
116 buf := make([]byte, len(line))
117 copy(buf, line)
118 line = buf
120 return line, err
123 // readContinuedLineSlice reads continued lines from the reader buffer,
124 // returning a byte slice with all lines. The validateFirstLine function
125 // is run on the first read line, and if it returns an error then this
126 // error is returned from readContinuedLineSlice.
127 func (r *Reader) readContinuedLineSlice(validateFirstLine func([]byte) error) ([]byte, error) {
128 if validateFirstLine == nil {
129 return nil, fmt.Errorf("missing validateFirstLine func")
132 // Read the first line.
133 line, err := r.readLineSlice()
134 if err != nil {
135 return nil, err
137 if len(line) == 0 { // blank line - no continuation
138 return line, nil
141 if err := validateFirstLine(line); err != nil {
142 return nil, err
145 // Optimistically assume that we have started to buffer the next line
146 // and it starts with an ASCII letter (the next header key), or a blank
147 // line, so we can avoid copying that buffered data around in memory
148 // and skipping over non-existent whitespace.
149 if r.R.Buffered() > 1 {
150 peek, _ := r.R.Peek(2)
151 if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') ||
152 len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' {
153 return trim(line), nil
157 // ReadByte or the next readLineSlice will flush the read buffer;
158 // copy the slice into buf.
159 r.buf = append(r.buf[:0], trim(line)...)
161 // Read continuation lines.
162 for r.skipSpace() > 0 {
163 line, err := r.readLineSlice()
164 if err != nil {
165 break
167 r.buf = append(r.buf, ' ')
168 r.buf = append(r.buf, trim(line)...)
170 return r.buf, nil
173 // skipSpace skips R over all spaces and returns the number of bytes skipped.
174 func (r *Reader) skipSpace() int {
175 n := 0
176 for {
177 c, err := r.R.ReadByte()
178 if err != nil {
179 // Bufio will keep err until next read.
180 break
182 if c != ' ' && c != '\t' {
183 r.R.UnreadByte()
184 break
188 return n
191 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
192 line, err := r.ReadLine()
193 if err != nil {
194 return
196 return parseCodeLine(line, expectCode)
199 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
200 if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
201 err = ProtocolError("short response: " + line)
202 return
204 continued = line[3] == '-'
205 code, err = strconv.Atoi(line[0:3])
206 if err != nil || code < 100 {
207 err = ProtocolError("invalid response code: " + line)
208 return
210 message = line[4:]
211 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
212 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
213 100 <= expectCode && expectCode < 1000 && code != expectCode {
214 err = &Error{code, message}
216 return
219 // ReadCodeLine reads a response code line of the form
220 // code message
221 // where code is a three-digit status code and the message
222 // extends to the rest of the line. An example of such a line is:
223 // 220 plan9.bell-labs.com ESMTP
225 // If the prefix of the status does not match the digits in expectCode,
226 // ReadCodeLine returns with err set to &Error{code, message}.
227 // For example, if expectCode is 31, an error will be returned if
228 // the status is not in the range [310,319].
230 // If the response is multi-line, ReadCodeLine returns an error.
232 // An expectCode <= 0 disables the check of the status code.
234 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
235 code, continued, message, err := r.readCodeLine(expectCode)
236 if err == nil && continued {
237 err = ProtocolError("unexpected multi-line response: " + message)
239 return
242 // ReadResponse reads a multi-line response of the form:
244 // code-message line 1
245 // code-message line 2
246 // ...
247 // code message line n
249 // where code is a three-digit status code. The first line starts with the
250 // code and a hyphen. The response is terminated by a line that starts
251 // with the same code followed by a space. Each line in message is
252 // separated by a newline (\n).
254 // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
255 // details of another form of response accepted:
257 // code-message line 1
258 // message line 2
259 // ...
260 // code message line n
262 // If the prefix of the status does not match the digits in expectCode,
263 // ReadResponse returns with err set to &Error{code, message}.
264 // For example, if expectCode is 31, an error will be returned if
265 // the status is not in the range [310,319].
267 // An expectCode <= 0 disables the check of the status code.
269 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
270 code, continued, message, err := r.readCodeLine(expectCode)
271 multi := continued
272 for continued {
273 line, err := r.ReadLine()
274 if err != nil {
275 return 0, "", err
278 var code2 int
279 var moreMessage string
280 code2, continued, moreMessage, err = parseCodeLine(line, 0)
281 if err != nil || code2 != code {
282 message += "\n" + strings.TrimRight(line, "\r\n")
283 continued = true
284 continue
286 message += "\n" + moreMessage
288 if err != nil && multi && message != "" {
289 // replace one line error message with all lines (full message)
290 err = &Error{code, message}
292 return
295 // DotReader returns a new Reader that satisfies Reads using the
296 // decoded text of a dot-encoded block read from r.
297 // The returned Reader is only valid until the next call
298 // to a method on r.
300 // Dot encoding is a common framing used for data blocks
301 // in text protocols such as SMTP. The data consists of a sequence
302 // of lines, each of which ends in "\r\n". The sequence itself
303 // ends at a line containing just a dot: ".\r\n". Lines beginning
304 // with a dot are escaped with an additional dot to avoid
305 // looking like the end of the sequence.
307 // The decoded form returned by the Reader's Read method
308 // rewrites the "\r\n" line endings into the simpler "\n",
309 // removes leading dot escapes if present, and stops with error io.EOF
310 // after consuming (and discarding) the end-of-sequence line.
311 func (r *Reader) DotReader() io.Reader {
312 r.closeDot()
313 r.dot = &dotReader{r: r}
314 return r.dot
317 type dotReader struct {
318 r *Reader
319 state int
322 // Read satisfies reads by decoding dot-encoded data read from d.r.
323 func (d *dotReader) Read(b []byte) (n int, err error) {
324 // Run data through a simple state machine to
325 // elide leading dots, rewrite trailing \r\n into \n,
326 // and detect ending .\r\n line.
327 const (
328 stateBeginLine = iota // beginning of line; initial state; must be zero
329 stateDot // read . at beginning of line
330 stateDotCR // read .\r at beginning of line
331 stateCR // read \r (possibly at end of line)
332 stateData // reading data in middle of line
333 stateEOF // reached .\r\n end marker line
335 br := d.r.R
336 for n < len(b) && d.state != stateEOF {
337 var c byte
338 c, err = br.ReadByte()
339 if err != nil {
340 if err == io.EOF {
341 err = io.ErrUnexpectedEOF
343 break
345 switch d.state {
346 case stateBeginLine:
347 if c == '.' {
348 d.state = stateDot
349 continue
351 if c == '\r' {
352 d.state = stateCR
353 continue
355 d.state = stateData
357 case stateDot:
358 if c == '\r' {
359 d.state = stateDotCR
360 continue
362 if c == '\n' {
363 d.state = stateEOF
364 continue
366 d.state = stateData
368 case stateDotCR:
369 if c == '\n' {
370 d.state = stateEOF
371 continue
373 // Not part of .\r\n.
374 // Consume leading dot and emit saved \r.
375 br.UnreadByte()
376 c = '\r'
377 d.state = stateData
379 case stateCR:
380 if c == '\n' {
381 d.state = stateBeginLine
382 break
384 // Not part of \r\n. Emit saved \r
385 br.UnreadByte()
386 c = '\r'
387 d.state = stateData
389 case stateData:
390 if c == '\r' {
391 d.state = stateCR
392 continue
394 if c == '\n' {
395 d.state = stateBeginLine
398 b[n] = c
401 if err == nil && d.state == stateEOF {
402 err = io.EOF
404 if err != nil && d.r.dot == d {
405 d.r.dot = nil
407 return
410 // closeDot drains the current DotReader if any,
411 // making sure that it reads until the ending dot line.
412 func (r *Reader) closeDot() {
413 if r.dot == nil {
414 return
416 buf := make([]byte, 128)
417 for r.dot != nil {
418 // When Read reaches EOF or an error,
419 // it will set r.dot == nil.
420 r.dot.Read(buf)
424 // ReadDotBytes reads a dot-encoding and returns the decoded data.
426 // See the documentation for the DotReader method for details about dot-encoding.
427 func (r *Reader) ReadDotBytes() ([]byte, error) {
428 return io.ReadAll(r.DotReader())
431 // ReadDotLines reads a dot-encoding and returns a slice
432 // containing the decoded lines, with the final \r\n or \n elided from each.
434 // See the documentation for the DotReader method for details about dot-encoding.
435 func (r *Reader) ReadDotLines() ([]string, error) {
436 // We could use ReadDotBytes and then Split it,
437 // but reading a line at a time avoids needing a
438 // large contiguous block of memory and is simpler.
439 var v []string
440 var err error
441 for {
442 var line string
443 line, err = r.ReadLine()
444 if err != nil {
445 if err == io.EOF {
446 err = io.ErrUnexpectedEOF
448 break
451 // Dot by itself marks end; otherwise cut one dot.
452 if len(line) > 0 && line[0] == '.' {
453 if len(line) == 1 {
454 break
456 line = line[1:]
458 v = append(v, line)
460 return v, err
463 var colon = []byte(":")
465 // ReadMIMEHeader reads a MIME-style header from r.
466 // The header is a sequence of possibly continued Key: Value lines
467 // ending in a blank line.
468 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
469 // sequence of values in the same order encountered in the input.
471 // For example, consider this input:
473 // My-Key: Value 1
474 // Long-Key: Even
475 // Longer Value
476 // My-Key: Value 2
478 // Given that input, ReadMIMEHeader returns the map:
480 // map[string][]string{
481 // "My-Key": {"Value 1", "Value 2"},
482 // "Long-Key": {"Even Longer Value"},
483 // }
485 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
486 // Avoid lots of small slice allocations later by allocating one
487 // large one ahead of time which we'll cut up into smaller
488 // slices. If this isn't big enough later, we allocate small ones.
489 var strs []string
490 hint := r.upcomingHeaderNewlines()
491 if hint > 0 {
492 strs = make([]string, hint)
495 m := make(MIMEHeader, hint)
497 // The first line cannot start with a leading space.
498 if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') {
499 line, err := r.readLineSlice()
500 if err != nil {
501 return m, err
503 return m, ProtocolError("malformed MIME header initial line: " + string(line))
506 for {
507 kv, err := r.readContinuedLineSlice(mustHaveFieldNameColon)
508 if len(kv) == 0 {
509 return m, err
512 // Key ends at first colon.
513 k, v, ok := bytes.Cut(kv, colon)
514 if !ok {
515 return m, ProtocolError("malformed MIME header line: " + string(kv))
517 key := canonicalMIMEHeaderKey(k)
519 // As per RFC 7230 field-name is a token, tokens consist of one or more chars.
520 // We could return a ProtocolError here, but better to be liberal in what we
521 // accept, so if we get an empty key, skip it.
522 if key == "" {
523 continue
526 // Skip initial spaces in value.
527 value := strings.TrimLeft(string(v), " \t")
529 vv := m[key]
530 if vv == nil && len(strs) > 0 {
531 // More than likely this will be a single-element key.
532 // Most headers aren't multi-valued.
533 // Set the capacity on strs[0] to 1, so any future append
534 // won't extend the slice into the other strings.
535 vv, strs = strs[:1:1], strs[1:]
536 vv[0] = value
537 m[key] = vv
538 } else {
539 m[key] = append(vv, value)
542 if err != nil {
543 return m, err
548 // noValidation is a no-op validation func for readContinuedLineSlice
549 // that permits any lines.
550 func noValidation(_ []byte) error { return nil }
552 // mustHaveFieldNameColon ensures that, per RFC 7230, the
553 // field-name is on a single line, so the first line must
554 // contain a colon.
555 func mustHaveFieldNameColon(line []byte) error {
556 if bytes.IndexByte(line, ':') < 0 {
557 return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line))
559 return nil
562 var nl = []byte("\n")
564 // upcomingHeaderNewlines returns an approximation of the number of newlines
565 // that will be in this header. If it gets confused, it returns 0.
566 func (r *Reader) upcomingHeaderNewlines() (n int) {
567 // Try to determine the 'hint' size.
568 r.R.Peek(1) // force a buffer load if empty
569 s := r.R.Buffered()
570 if s == 0 {
571 return
573 peek, _ := r.R.Peek(s)
574 return bytes.Count(peek, nl)
577 // CanonicalMIMEHeaderKey returns the canonical format of the
578 // MIME header key s. The canonicalization converts the first
579 // letter and any letter following a hyphen to upper case;
580 // the rest are converted to lowercase. For example, the
581 // canonical key for "accept-encoding" is "Accept-Encoding".
582 // MIME header keys are assumed to be ASCII only.
583 // If s contains a space or invalid header field bytes, it is
584 // returned without modifications.
585 func CanonicalMIMEHeaderKey(s string) string {
586 commonHeaderOnce.Do(initCommonHeader)
588 // Quick check for canonical encoding.
589 upper := true
590 for i := 0; i < len(s); i++ {
591 c := s[i]
592 if !validHeaderFieldByte(c) {
593 return s
595 if upper && 'a' <= c && c <= 'z' {
596 return canonicalMIMEHeaderKey([]byte(s))
598 if !upper && 'A' <= c && c <= 'Z' {
599 return canonicalMIMEHeaderKey([]byte(s))
601 upper = c == '-'
603 return s
606 const toLower = 'a' - 'A'
608 // validHeaderFieldByte reports whether b is a valid byte in a header
609 // field name. RFC 7230 says:
610 // header-field = field-name ":" OWS field-value OWS
611 // field-name = token
612 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
613 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
614 // token = 1*tchar
615 func validHeaderFieldByte(b byte) bool {
616 return int(b) < len(isTokenTable) && isTokenTable[b]
619 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
620 // allowed to mutate the provided byte slice before returning the
621 // string.
623 // For invalid inputs (if a contains spaces or non-token bytes), a
624 // is unchanged and a string copy is returned.
625 func canonicalMIMEHeaderKey(a []byte) string {
626 // See if a looks like a header key. If not, return it unchanged.
627 for _, c := range a {
628 if validHeaderFieldByte(c) {
629 continue
631 // Don't canonicalize.
632 return string(a)
635 upper := true
636 for i, c := range a {
637 // Canonicalize: first letter upper case
638 // and upper case after each dash.
639 // (Host, User-Agent, If-Modified-Since).
640 // MIME headers are ASCII only, so no Unicode issues.
641 if upper && 'a' <= c && c <= 'z' {
642 c -= toLower
643 } else if !upper && 'A' <= c && c <= 'Z' {
644 c += toLower
646 a[i] = c
647 upper = c == '-' // for next time
649 // The compiler recognizes m[string(byteSlice)] as a special
650 // case, so a copy of a's bytes into a new string does not
651 // happen in this map lookup:
652 if v := commonHeader[string(a)]; v != "" {
653 return v
655 return string(a)
658 // commonHeader interns common header strings.
659 var commonHeader map[string]string
661 var commonHeaderOnce sync.Once
663 func initCommonHeader() {
664 commonHeader = make(map[string]string)
665 for _, v := range []string{
666 "Accept",
667 "Accept-Charset",
668 "Accept-Encoding",
669 "Accept-Language",
670 "Accept-Ranges",
671 "Cache-Control",
672 "Cc",
673 "Connection",
674 "Content-Id",
675 "Content-Language",
676 "Content-Length",
677 "Content-Transfer-Encoding",
678 "Content-Type",
679 "Cookie",
680 "Date",
681 "Dkim-Signature",
682 "Etag",
683 "Expires",
684 "From",
685 "Host",
686 "If-Modified-Since",
687 "If-None-Match",
688 "In-Reply-To",
689 "Last-Modified",
690 "Location",
691 "Message-Id",
692 "Mime-Version",
693 "Pragma",
694 "Received",
695 "Return-Path",
696 "Server",
697 "Set-Cookie",
698 "Subject",
699 "To",
700 "User-Agent",
701 "Via",
702 "X-Forwarded-For",
703 "X-Imforwards",
704 "X-Powered-By",
706 commonHeader[v] = v
710 // isTokenTable is a copy of net/http/lex.go's isTokenTable.
711 // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
712 var isTokenTable = [127]bool{
713 '!': true,
714 '#': true,
715 '$': true,
716 '%': true,
717 '&': true,
718 '\'': true,
719 '*': true,
720 '+': true,
721 '-': true,
722 '.': true,
723 '0': true,
724 '1': true,
725 '2': true,
726 '3': true,
727 '4': true,
728 '5': true,
729 '6': true,
730 '7': true,
731 '8': true,
732 '9': true,
733 'A': true,
734 'B': true,
735 'C': true,
736 'D': true,
737 'E': true,
738 'F': true,
739 'G': true,
740 'H': true,
741 'I': true,
742 'J': true,
743 'K': true,
744 'L': true,
745 'M': true,
746 'N': true,
747 'O': true,
748 'P': true,
749 'Q': true,
750 'R': true,
751 'S': true,
752 'T': true,
753 'U': true,
754 'W': true,
755 'V': true,
756 'X': true,
757 'Y': true,
758 'Z': true,
759 '^': true,
760 '_': true,
761 '`': true,
762 'a': true,
763 'b': true,
764 'c': true,
765 'd': true,
766 'e': true,
767 'f': true,
768 'g': true,
769 'h': true,
770 'i': true,
771 'j': true,
772 'k': true,
773 'l': true,
774 'm': true,
775 'n': true,
776 'o': true,
777 'p': true,
778 'q': true,
779 'r': true,
780 's': true,
781 't': true,
782 'u': true,
783 'v': true,
784 'w': true,
785 'x': true,
786 'y': true,
787 'z': true,
788 '|': true,
789 '~': true,