1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 // BUG(rsc): To let callers manage exposure to denial of service
17 // attacks, Reader should allow them to set and reset a limit on
18 // the number of bytes read from the connection.
20 // A Reader implements convenience methods for reading requests
21 // or responses from a text protocol network connection.
25 buf
[]byte // a re-usable buffer for readContinuedLineSlice
28 // NewReader returns a new Reader reading from r.
29 func NewReader(r
*bufio
.Reader
) *Reader
{
33 // ReadLine reads a single line from r,
34 // eliding the final \n or \r\n from the returned string.
35 func (r
*Reader
) ReadLine() (string, error
) {
36 line
, err
:= r
.readLineSlice()
37 return string(line
), err
40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
41 func (r
*Reader
) ReadLineBytes() ([]byte, error
) {
42 line
, err
:= r
.readLineSlice()
44 buf
:= make([]byte, len(line
))
51 func (r
*Reader
) readLineSlice() ([]byte, error
) {
55 l
, more
, err
:= r
.R
.ReadLine()
59 // Avoid the copy if the first call produced a full line.
60 if line
== nil && !more
{
63 line
= append(line
, l
...)
71 // ReadContinuedLine reads a possibly continued line from r,
72 // eliding the final trailing ASCII white space.
73 // Lines after the first are considered continuations if they
74 // begin with a space or tab character. In the returned data,
75 // continuation lines are separated from the previous line
76 // only by a single space: the newline and leading white space
79 // For example, consider this input:
85 // The first call to ReadContinuedLine will return "Line 1 continued..."
86 // and the second will return "Line 2".
88 // A line consisting of only white space is never continued.
90 func (r
*Reader
) ReadContinuedLine() (string, error
) {
91 line
, err
:= r
.readContinuedLineSlice()
92 return string(line
), err
95 // trim returns s with leading and trailing spaces and tabs removed.
96 // It does not assume Unicode or UTF-8.
97 func trim(s
[]byte) []byte {
99 for i
< len(s
) && (s
[i
] == ' ' || s
[i
] == '\t') {
103 for n
> i
&& (s
[n
-1] == ' ' || s
[n
-1] == '\t') {
109 // ReadContinuedLineBytes is like ReadContinuedLine but
110 // returns a []byte instead of a string.
111 func (r
*Reader
) ReadContinuedLineBytes() ([]byte, error
) {
112 line
, err
:= r
.readContinuedLineSlice()
114 buf
:= make([]byte, len(line
))
121 func (r
*Reader
) readContinuedLineSlice() ([]byte, error
) {
122 // Read the first line.
123 line
, err
:= r
.readLineSlice()
127 if len(line
) == 0 { // blank line - no continuation
131 // Optimistically assume that we have started to buffer the next line
132 // and it starts with an ASCII letter (the next header key), so we can
133 // avoid copying that buffered data around in memory and skipping over
134 // non-existent whitespace.
135 if r
.R
.Buffered() > 1 {
136 peek
, err
:= r
.R
.Peek(1)
137 if err
== nil && isASCIILetter(peek
[0]) {
138 return trim(line
), nil
142 // ReadByte or the next readLineSlice will flush the read buffer;
143 // copy the slice into buf.
144 r
.buf
= append(r
.buf
[:0], trim(line
)...)
146 // Read continuation lines.
147 for r
.skipSpace() > 0 {
148 line
, err
:= r
.readLineSlice()
152 r
.buf
= append(r
.buf
, ' ')
153 r
.buf
= append(r
.buf
, line
...)
158 // skipSpace skips R over all spaces and returns the number of bytes skipped.
159 func (r
*Reader
) skipSpace() int {
162 c
, err
:= r
.R
.ReadByte()
164 // Bufio will keep err until next read.
167 if c
!= ' ' && c
!= '\t' {
176 func (r
*Reader
) readCodeLine(expectCode
int) (code
int, continued
bool, message
string, err error
) {
177 line
, err
:= r
.ReadLine()
181 return parseCodeLine(line
, expectCode
)
184 func parseCodeLine(line
string, expectCode
int) (code
int, continued
bool, message
string, err error
) {
185 if len(line
) < 4 || line
[3] != ' ' && line
[3] != '-' {
186 err
= ProtocolError("short response: " + line
)
189 continued
= line
[3] == '-'
190 code
, err
= strconv
.Atoi(line
[0:3])
191 if err
!= nil || code
< 100 {
192 err
= ProtocolError("invalid response code: " + line
)
196 if 1 <= expectCode
&& expectCode
< 10 && code
/100 != expectCode ||
197 10 <= expectCode
&& expectCode
< 100 && code
/10 != expectCode ||
198 100 <= expectCode
&& expectCode
< 1000 && code
!= expectCode
{
199 err
= &Error
{code
, message
}
204 // ReadCodeLine reads a response code line of the form
206 // where code is a three-digit status code and the message
207 // extends to the rest of the line. An example of such a line is:
208 // 220 plan9.bell-labs.com ESMTP
210 // If the prefix of the status does not match the digits in expectCode,
211 // ReadCodeLine returns with err set to &Error{code, message}.
212 // For example, if expectCode is 31, an error will be returned if
213 // the status is not in the range [310,319].
215 // If the response is multi-line, ReadCodeLine returns an error.
217 // An expectCode <= 0 disables the check of the status code.
219 func (r
*Reader
) ReadCodeLine(expectCode
int) (code
int, message
string, err error
) {
220 code
, continued
, message
, err
:= r
.readCodeLine(expectCode
)
221 if err
== nil && continued
{
222 err
= ProtocolError("unexpected multi-line response: " + message
)
227 // ReadResponse reads a multi-line response of the form:
229 // code-message line 1
230 // code-message line 2
232 // code message line n
234 // where code is a three-digit status code. The first line starts with the
235 // code and a hyphen. The response is terminated by a line that starts
236 // with the same code followed by a space. Each line in message is
237 // separated by a newline (\n).
239 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
242 // If the prefix of the status does not match the digits in expectCode,
243 // ReadResponse returns with err set to &Error{code, message}.
244 // For example, if expectCode is 31, an error will be returned if
245 // the status is not in the range [310,319].
247 // An expectCode <= 0 disables the check of the status code.
249 func (r
*Reader
) ReadResponse(expectCode
int) (code
int, message
string, err error
) {
250 code
, continued
, message
, err
:= r
.readCodeLine(expectCode
)
251 for err
== nil && continued
{
252 line
, err
:= r
.ReadLine()
258 var moreMessage
string
259 code2
, continued
, moreMessage
, err
= parseCodeLine(line
, expectCode
)
260 if err
!= nil || code2
!= code
{
261 message
+= "\n" + strings
.TrimRight(line
, "\r\n")
265 message
+= "\n" + moreMessage
270 // DotReader returns a new Reader that satisfies Reads using the
271 // decoded text of a dot-encoded block read from r.
272 // The returned Reader is only valid until the next call
275 // Dot encoding is a common framing used for data blocks
276 // in text protocols such as SMTP. The data consists of a sequence
277 // of lines, each of which ends in "\r\n". The sequence itself
278 // ends at a line containing just a dot: ".\r\n". Lines beginning
279 // with a dot are escaped with an additional dot to avoid
280 // looking like the end of the sequence.
282 // The decoded form returned by the Reader's Read method
283 // rewrites the "\r\n" line endings into the simpler "\n",
284 // removes leading dot escapes if present, and stops with error io.EOF
285 // after consuming (and discarding) the end-of-sequence line.
286 func (r
*Reader
) DotReader() io
.Reader
{
288 r
.dot
= &dotReader
{r
: r
}
292 type dotReader
struct {
297 // Read satisfies reads by decoding dot-encoded data read from d.r.
298 func (d
*dotReader
) Read(b
[]byte) (n
int, err error
) {
299 // Run data through a simple state machine to
300 // elide leading dots, rewrite trailing \r\n into \n,
301 // and detect ending .\r\n line.
303 stateBeginLine
= iota // beginning of line; initial state; must be zero
304 stateDot
// read . at beginning of line
305 stateDotCR
// read .\r at beginning of line
306 stateCR
// read \r (possibly at end of line)
307 stateData
// reading data in middle of line
308 stateEOF
// reached .\r\n end marker line
311 for n
< len(b
) && d
.state
!= stateEOF
{
313 c
, err
= br
.ReadByte()
316 err
= io
.ErrUnexpectedEOF
348 // Not part of .\r\n.
349 // Consume leading dot and emit saved \r.
356 d
.state
= stateBeginLine
359 // Not part of \r\n. Emit saved \r
370 d
.state
= stateBeginLine
376 if err
== nil && d
.state
== stateEOF
{
379 if err
!= nil && d
.r
.dot
== d
{
385 // closeDot drains the current DotReader if any,
386 // making sure that it reads until the ending dot line.
387 func (r
*Reader
) closeDot() {
391 buf
:= make([]byte, 128)
393 // When Read reaches EOF or an error,
394 // it will set r.dot == nil.
399 // ReadDotBytes reads a dot-encoding and returns the decoded data.
401 // See the documentation for the DotReader method for details about dot-encoding.
402 func (r
*Reader
) ReadDotBytes() ([]byte, error
) {
403 return ioutil
.ReadAll(r
.DotReader())
406 // ReadDotLines reads a dot-encoding and returns a slice
407 // containing the decoded lines, with the final \r\n or \n elided from each.
409 // See the documentation for the DotReader method for details about dot-encoding.
410 func (r
*Reader
) ReadDotLines() ([]string, error
) {
411 // We could use ReadDotBytes and then Split it,
412 // but reading a line at a time avoids needing a
413 // large contiguous block of memory and is simpler.
418 line
, err
= r
.ReadLine()
421 err
= io
.ErrUnexpectedEOF
426 // Dot by itself marks end; otherwise cut one dot.
427 if len(line
) > 0 && line
[0] == '.' {
438 // ReadMIMEHeader reads a MIME-style header from r.
439 // The header is a sequence of possibly continued Key: Value lines
440 // ending in a blank line.
441 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
442 // sequence of values in the same order encountered in the input.
444 // For example, consider this input:
451 // Given that input, ReadMIMEHeader returns the map:
453 // map[string][]string{
454 // "My-Key": {"Value 1", "Value 2"},
455 // "Long-Key": {"Even Longer Value"},
458 func (r
*Reader
) ReadMIMEHeader() (MIMEHeader
, error
) {
459 // Avoid lots of small slice allocations later by allocating one
460 // large one ahead of time which we'll cut up into smaller
461 // slices. If this isn't big enough later, we allocate small ones.
463 hint
:= r
.upcomingHeaderNewlines()
465 strs
= make([]string, hint
)
468 m
:= make(MIMEHeader
, hint
)
470 kv
, err
:= r
.readContinuedLineSlice()
475 // Key ends at first colon; should not have spaces but
476 // they appear in the wild, violating specs, so we
477 // remove them if present.
478 i
:= bytes
.IndexByte(kv
, ':')
480 return m
, ProtocolError("malformed MIME header line: " + string(kv
))
483 for endKey
> 0 && kv
[endKey
-1] == ' ' {
486 key
:= canonicalMIMEHeaderKey(kv
[:endKey
])
488 // Skip initial spaces in value.
490 for i
< len(kv
) && (kv
[i
] == ' ' || kv
[i
] == '\t') {
493 value
:= string(kv
[i
:])
496 if vv
== nil && len(strs
) > 0 {
497 // More than likely this will be a single-element key.
498 // Most headers aren't multi-valued.
499 // Set the capacity on strs[0] to 1, so any future append
500 // won't extend the slice into the other strings.
501 vv
, strs
= strs
[:1:1], strs
[1:]
505 m
[key
] = append(vv
, value
)
514 // upcomingHeaderNewlines returns an approximation of the number of newlines
515 // that will be in this header. If it gets confused, it returns 0.
516 func (r
*Reader
) upcomingHeaderNewlines() (n
int) {
517 // Try to determine the 'hint' size.
518 r
.R
.Peek(1) // force a buffer load if empty
523 peek
, _
:= r
.R
.Peek(s
)
525 i
:= bytes
.IndexByte(peek
, '\n')
527 // Not present (-1) or found within the next few bytes,
528 // implying we're at the end ("\r\n\r\n" or "\n\n")
537 // CanonicalMIMEHeaderKey returns the canonical format of the
538 // MIME header key s. The canonicalization converts the first
539 // letter and any letter following a hyphen to upper case;
540 // the rest are converted to lowercase. For example, the
541 // canonical key for "accept-encoding" is "Accept-Encoding".
542 // MIME header keys are assumed to be ASCII only.
543 func CanonicalMIMEHeaderKey(s
string) string {
544 // Quick check for canonical encoding.
546 for i
:= 0; i
< len(s
); i
++ {
548 if upper
&& 'a' <= c
&& c
<= 'z' {
549 return canonicalMIMEHeaderKey([]byte(s
))
551 if !upper
&& 'A' <= c
&& c
<= 'Z' {
552 return canonicalMIMEHeaderKey([]byte(s
))
559 const toLower
= 'a' - 'A'
561 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
562 // allowed to mutate the provided byte slice before returning the
564 func canonicalMIMEHeaderKey(a
[]byte) string {
565 // Look for it in commonHeaders , so that we can avoid an
566 // allocation by sharing the strings among all users
567 // of textproto. If we don't find it, a has been canonicalized
568 // so just return string(a).
571 hi
:= len(commonHeaders
)
572 for i
:= 0; i
< len(a
); i
++ {
573 // Canonicalize: first letter upper case
574 // and upper case after each dash.
575 // (Host, User-Agent, If-Modified-Since).
576 // MIME headers are ASCII only, so no Unicode issues.
583 if upper
&& 'a' <= c
&& c
<= 'z' {
585 } else if !upper
&& 'A' <= c
&& c
<= 'Z' {
589 upper
= c
== '-' // for next time
592 for lo
< hi
&& (len(commonHeaders
[lo
]) <= i || commonHeaders
[lo
][i
] < c
) {
595 for hi
> lo
&& commonHeaders
[hi
-1][i
] > c
{
600 if lo
< hi
&& len(commonHeaders
[lo
]) == len(a
) {
601 return commonHeaders
[lo
]
606 var commonHeaders
= []string{
618 "Content-Transfer-Encoding",