1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 // A Reader implements convenience methods for reading requests
18 // or responses from a text protocol network connection.
22 buf
[]byte // a re-usable buffer for readContinuedLineSlice
25 // NewReader returns a new Reader reading from r.
27 // To avoid denial of service attacks, the provided bufio.Reader
28 // should be reading from an io.LimitReader or similar Reader to bound
29 // the size of responses.
30 func NewReader(r
*bufio
.Reader
) *Reader
{
31 commonHeaderOnce
.Do(initCommonHeader
)
35 // ReadLine reads a single line from r,
36 // eliding the final \n or \r\n from the returned string.
37 func (r
*Reader
) ReadLine() (string, error
) {
38 line
, err
:= r
.readLineSlice()
39 return string(line
), err
42 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
43 func (r
*Reader
) ReadLineBytes() ([]byte, error
) {
44 line
, err
:= r
.readLineSlice()
46 buf
:= make([]byte, len(line
))
53 func (r
*Reader
) readLineSlice() ([]byte, error
) {
57 l
, more
, err
:= r
.R
.ReadLine()
61 // Avoid the copy if the first call produced a full line.
62 if line
== nil && !more
{
65 line
= append(line
, l
...)
73 // ReadContinuedLine reads a possibly continued line from r,
74 // eliding the final trailing ASCII white space.
75 // Lines after the first are considered continuations if they
76 // begin with a space or tab character. In the returned data,
77 // continuation lines are separated from the previous line
78 // only by a single space: the newline and leading white space
81 // For example, consider this input:
87 // The first call to ReadContinuedLine will return "Line 1 continued..."
88 // and the second will return "Line 2".
90 // Empty lines are never continued.
92 func (r
*Reader
) ReadContinuedLine() (string, error
) {
93 line
, err
:= r
.readContinuedLineSlice(noValidation
)
94 return string(line
), err
97 // trim returns s with leading and trailing spaces and tabs removed.
98 // It does not assume Unicode or UTF-8.
99 func trim(s
[]byte) []byte {
101 for i
< len(s
) && (s
[i
] == ' ' || s
[i
] == '\t') {
105 for n
> i
&& (s
[n
-1] == ' ' || s
[n
-1] == '\t') {
111 // ReadContinuedLineBytes is like ReadContinuedLine but
112 // returns a []byte instead of a string.
113 func (r
*Reader
) ReadContinuedLineBytes() ([]byte, error
) {
114 line
, err
:= r
.readContinuedLineSlice(noValidation
)
116 buf
:= make([]byte, len(line
))
123 // readContinuedLineSlice reads continued lines from the reader buffer,
124 // returning a byte slice with all lines. The validateFirstLine function
125 // is run on the first read line, and if it returns an error then this
126 // error is returned from readContinuedLineSlice.
127 func (r
*Reader
) readContinuedLineSlice(validateFirstLine
func([]byte) error
) ([]byte, error
) {
128 if validateFirstLine
== nil {
129 return nil, fmt
.Errorf("missing validateFirstLine func")
132 // Read the first line.
133 line
, err
:= r
.readLineSlice()
137 if len(line
) == 0 { // blank line - no continuation
141 if err
:= validateFirstLine(line
); err
!= nil {
145 // Optimistically assume that we have started to buffer the next line
146 // and it starts with an ASCII letter (the next header key), or a blank
147 // line, so we can avoid copying that buffered data around in memory
148 // and skipping over non-existent whitespace.
149 if r
.R
.Buffered() > 1 {
150 peek
, _
:= r
.R
.Peek(2)
151 if len(peek
) > 0 && (isASCIILetter(peek
[0]) || peek
[0] == '\n') ||
152 len(peek
) == 2 && peek
[0] == '\r' && peek
[1] == '\n' {
153 return trim(line
), nil
157 // ReadByte or the next readLineSlice will flush the read buffer;
158 // copy the slice into buf.
159 r
.buf
= append(r
.buf
[:0], trim(line
)...)
161 // Read continuation lines.
162 for r
.skipSpace() > 0 {
163 line
, err
:= r
.readLineSlice()
167 r
.buf
= append(r
.buf
, ' ')
168 r
.buf
= append(r
.buf
, trim(line
)...)
173 // skipSpace skips R over all spaces and returns the number of bytes skipped.
174 func (r
*Reader
) skipSpace() int {
177 c
, err
:= r
.R
.ReadByte()
179 // Bufio will keep err until next read.
182 if c
!= ' ' && c
!= '\t' {
191 func (r
*Reader
) readCodeLine(expectCode
int) (code
int, continued
bool, message
string, err error
) {
192 line
, err
:= r
.ReadLine()
196 return parseCodeLine(line
, expectCode
)
199 func parseCodeLine(line
string, expectCode
int) (code
int, continued
bool, message
string, err error
) {
200 if len(line
) < 4 || line
[3] != ' ' && line
[3] != '-' {
201 err
= ProtocolError("short response: " + line
)
204 continued
= line
[3] == '-'
205 code
, err
= strconv
.Atoi(line
[0:3])
206 if err
!= nil || code
< 100 {
207 err
= ProtocolError("invalid response code: " + line
)
211 if 1 <= expectCode
&& expectCode
< 10 && code
/100 != expectCode ||
212 10 <= expectCode
&& expectCode
< 100 && code
/10 != expectCode ||
213 100 <= expectCode
&& expectCode
< 1000 && code
!= expectCode
{
214 err
= &Error
{code
, message
}
219 // ReadCodeLine reads a response code line of the form
221 // where code is a three-digit status code and the message
222 // extends to the rest of the line. An example of such a line is:
223 // 220 plan9.bell-labs.com ESMTP
225 // If the prefix of the status does not match the digits in expectCode,
226 // ReadCodeLine returns with err set to &Error{code, message}.
227 // For example, if expectCode is 31, an error will be returned if
228 // the status is not in the range [310,319].
230 // If the response is multi-line, ReadCodeLine returns an error.
232 // An expectCode <= 0 disables the check of the status code.
234 func (r
*Reader
) ReadCodeLine(expectCode
int) (code
int, message
string, err error
) {
235 code
, continued
, message
, err
:= r
.readCodeLine(expectCode
)
236 if err
== nil && continued
{
237 err
= ProtocolError("unexpected multi-line response: " + message
)
242 // ReadResponse reads a multi-line response of the form:
244 // code-message line 1
245 // code-message line 2
247 // code message line n
249 // where code is a three-digit status code. The first line starts with the
250 // code and a hyphen. The response is terminated by a line that starts
251 // with the same code followed by a space. Each line in message is
252 // separated by a newline (\n).
254 // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for
255 // details of another form of response accepted:
257 // code-message line 1
260 // code message line n
262 // If the prefix of the status does not match the digits in expectCode,
263 // ReadResponse returns with err set to &Error{code, message}.
264 // For example, if expectCode is 31, an error will be returned if
265 // the status is not in the range [310,319].
267 // An expectCode <= 0 disables the check of the status code.
269 func (r
*Reader
) ReadResponse(expectCode
int) (code
int, message
string, err error
) {
270 code
, continued
, message
, err
:= r
.readCodeLine(expectCode
)
273 line
, err
:= r
.ReadLine()
279 var moreMessage
string
280 code2
, continued
, moreMessage
, err
= parseCodeLine(line
, 0)
281 if err
!= nil || code2
!= code
{
282 message
+= "\n" + strings
.TrimRight(line
, "\r\n")
286 message
+= "\n" + moreMessage
288 if err
!= nil && multi
&& message
!= "" {
289 // replace one line error message with all lines (full message)
290 err
= &Error
{code
, message
}
295 // DotReader returns a new Reader that satisfies Reads using the
296 // decoded text of a dot-encoded block read from r.
297 // The returned Reader is only valid until the next call
300 // Dot encoding is a common framing used for data blocks
301 // in text protocols such as SMTP. The data consists of a sequence
302 // of lines, each of which ends in "\r\n". The sequence itself
303 // ends at a line containing just a dot: ".\r\n". Lines beginning
304 // with a dot are escaped with an additional dot to avoid
305 // looking like the end of the sequence.
307 // The decoded form returned by the Reader's Read method
308 // rewrites the "\r\n" line endings into the simpler "\n",
309 // removes leading dot escapes if present, and stops with error io.EOF
310 // after consuming (and discarding) the end-of-sequence line.
311 func (r
*Reader
) DotReader() io
.Reader
{
313 r
.dot
= &dotReader
{r
: r
}
317 type dotReader
struct {
322 // Read satisfies reads by decoding dot-encoded data read from d.r.
323 func (d
*dotReader
) Read(b
[]byte) (n
int, err error
) {
324 // Run data through a simple state machine to
325 // elide leading dots, rewrite trailing \r\n into \n,
326 // and detect ending .\r\n line.
328 stateBeginLine
= iota // beginning of line; initial state; must be zero
329 stateDot
// read . at beginning of line
330 stateDotCR
// read .\r at beginning of line
331 stateCR
// read \r (possibly at end of line)
332 stateData
// reading data in middle of line
333 stateEOF
// reached .\r\n end marker line
336 for n
< len(b
) && d
.state
!= stateEOF
{
338 c
, err
= br
.ReadByte()
341 err
= io
.ErrUnexpectedEOF
373 // Not part of .\r\n.
374 // Consume leading dot and emit saved \r.
381 d
.state
= stateBeginLine
384 // Not part of \r\n. Emit saved \r
395 d
.state
= stateBeginLine
401 if err
== nil && d
.state
== stateEOF
{
404 if err
!= nil && d
.r
.dot
== d
{
410 // closeDot drains the current DotReader if any,
411 // making sure that it reads until the ending dot line.
412 func (r
*Reader
) closeDot() {
416 buf
:= make([]byte, 128)
418 // When Read reaches EOF or an error,
419 // it will set r.dot == nil.
424 // ReadDotBytes reads a dot-encoding and returns the decoded data.
426 // See the documentation for the DotReader method for details about dot-encoding.
427 func (r
*Reader
) ReadDotBytes() ([]byte, error
) {
428 return io
.ReadAll(r
.DotReader())
431 // ReadDotLines reads a dot-encoding and returns a slice
432 // containing the decoded lines, with the final \r\n or \n elided from each.
434 // See the documentation for the DotReader method for details about dot-encoding.
435 func (r
*Reader
) ReadDotLines() ([]string, error
) {
436 // We could use ReadDotBytes and then Split it,
437 // but reading a line at a time avoids needing a
438 // large contiguous block of memory and is simpler.
443 line
, err
= r
.ReadLine()
446 err
= io
.ErrUnexpectedEOF
451 // Dot by itself marks end; otherwise cut one dot.
452 if len(line
) > 0 && line
[0] == '.' {
463 var colon
= []byte(":")
465 // ReadMIMEHeader reads a MIME-style header from r.
466 // The header is a sequence of possibly continued Key: Value lines
467 // ending in a blank line.
468 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
469 // sequence of values in the same order encountered in the input.
471 // For example, consider this input:
478 // Given that input, ReadMIMEHeader returns the map:
480 // map[string][]string{
481 // "My-Key": {"Value 1", "Value 2"},
482 // "Long-Key": {"Even Longer Value"},
485 func (r
*Reader
) ReadMIMEHeader() (MIMEHeader
, error
) {
486 // Avoid lots of small slice allocations later by allocating one
487 // large one ahead of time which we'll cut up into smaller
488 // slices. If this isn't big enough later, we allocate small ones.
490 hint
:= r
.upcomingHeaderNewlines()
492 strs
= make([]string, hint
)
495 m
:= make(MIMEHeader
, hint
)
497 // The first line cannot start with a leading space.
498 if buf
, err
:= r
.R
.Peek(1); err
== nil && (buf
[0] == ' ' || buf
[0] == '\t') {
499 line
, err
:= r
.readLineSlice()
503 return m
, ProtocolError("malformed MIME header initial line: " + string(line
))
507 kv
, err
:= r
.readContinuedLineSlice(mustHaveFieldNameColon
)
512 // Key ends at first colon.
513 k
, v
, ok
:= bytes
.Cut(kv
, colon
)
515 return m
, ProtocolError("malformed MIME header line: " + string(kv
))
517 key
:= canonicalMIMEHeaderKey(k
)
519 // As per RFC 7230 field-name is a token, tokens consist of one or more chars.
520 // We could return a ProtocolError here, but better to be liberal in what we
521 // accept, so if we get an empty key, skip it.
526 // Skip initial spaces in value.
527 value
:= strings
.TrimLeft(string(v
), " \t")
530 if vv
== nil && len(strs
) > 0 {
531 // More than likely this will be a single-element key.
532 // Most headers aren't multi-valued.
533 // Set the capacity on strs[0] to 1, so any future append
534 // won't extend the slice into the other strings.
535 vv
, strs
= strs
[:1:1], strs
[1:]
539 m
[key
] = append(vv
, value
)
548 // noValidation is a no-op validation func for readContinuedLineSlice
549 // that permits any lines.
550 func noValidation(_
[]byte) error
{ return nil }
552 // mustHaveFieldNameColon ensures that, per RFC 7230, the
553 // field-name is on a single line, so the first line must
555 func mustHaveFieldNameColon(line
[]byte) error
{
556 if bytes
.IndexByte(line
, ':') < 0 {
557 return ProtocolError(fmt
.Sprintf("malformed MIME header: missing colon: %q", line
))
562 var nl
= []byte("\n")
564 // upcomingHeaderNewlines returns an approximation of the number of newlines
565 // that will be in this header. If it gets confused, it returns 0.
566 func (r
*Reader
) upcomingHeaderNewlines() (n
int) {
567 // Try to determine the 'hint' size.
568 r
.R
.Peek(1) // force a buffer load if empty
573 peek
, _
:= r
.R
.Peek(s
)
574 return bytes
.Count(peek
, nl
)
577 // CanonicalMIMEHeaderKey returns the canonical format of the
578 // MIME header key s. The canonicalization converts the first
579 // letter and any letter following a hyphen to upper case;
580 // the rest are converted to lowercase. For example, the
581 // canonical key for "accept-encoding" is "Accept-Encoding".
582 // MIME header keys are assumed to be ASCII only.
583 // If s contains a space or invalid header field bytes, it is
584 // returned without modifications.
585 func CanonicalMIMEHeaderKey(s
string) string {
586 commonHeaderOnce
.Do(initCommonHeader
)
588 // Quick check for canonical encoding.
590 for i
:= 0; i
< len(s
); i
++ {
592 if !validHeaderFieldByte(c
) {
595 if upper
&& 'a' <= c
&& c
<= 'z' {
596 return canonicalMIMEHeaderKey([]byte(s
))
598 if !upper
&& 'A' <= c
&& c
<= 'Z' {
599 return canonicalMIMEHeaderKey([]byte(s
))
606 const toLower
= 'a' - 'A'
608 // validHeaderFieldByte reports whether b is a valid byte in a header
609 // field name. RFC 7230 says:
610 // header-field = field-name ":" OWS field-value OWS
611 // field-name = token
612 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
613 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
615 func validHeaderFieldByte(b
byte) bool {
616 return int(b
) < len(isTokenTable
) && isTokenTable
[b
]
619 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
620 // allowed to mutate the provided byte slice before returning the
623 // For invalid inputs (if a contains spaces or non-token bytes), a
624 // is unchanged and a string copy is returned.
625 func canonicalMIMEHeaderKey(a
[]byte) string {
626 // See if a looks like a header key. If not, return it unchanged.
627 for _
, c
:= range a
{
628 if validHeaderFieldByte(c
) {
631 // Don't canonicalize.
636 for i
, c
:= range a
{
637 // Canonicalize: first letter upper case
638 // and upper case after each dash.
639 // (Host, User-Agent, If-Modified-Since).
640 // MIME headers are ASCII only, so no Unicode issues.
641 if upper
&& 'a' <= c
&& c
<= 'z' {
643 } else if !upper
&& 'A' <= c
&& c
<= 'Z' {
647 upper
= c
== '-' // for next time
649 // The compiler recognizes m[string(byteSlice)] as a special
650 // case, so a copy of a's bytes into a new string does not
651 // happen in this map lookup:
652 if v
:= commonHeader
[string(a
)]; v
!= "" {
658 // commonHeader interns common header strings.
659 var commonHeader
map[string]string
661 var commonHeaderOnce sync
.Once
663 func initCommonHeader() {
664 commonHeader
= make(map[string]string)
665 for _
, v
:= range []string{
677 "Content-Transfer-Encoding",
710 // isTokenTable is a copy of net/http/lex.go's isTokenTable.
711 // See https://httpwg.github.io/specs/rfc7230.html#rule.token.separators
712 var isTokenTable
= [127]bool{