1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
17 // BUG(rsc): To let callers manage exposure to denial of service
18 // attacks, Reader should allow them to set and reset a limit on
19 // the number of bytes read from the connection.
21 // A Reader implements convenience methods for reading requests
22 // or responses from a text protocol network connection.
28 // NewReader returns a new Reader reading from r.
29 func NewReader(r
*bufio
.Reader
) *Reader
{
33 // ReadLine reads a single line from r,
34 // eliding the final \n or \r\n from the returned string.
35 func (r
*Reader
) ReadLine() (string, os
.Error
) {
36 line
, err
:= r
.ReadLineBytes()
37 return string(line
), err
40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
41 func (r
*Reader
) ReadLineBytes() ([]byte, os
.Error
) {
43 line
, err
:= r
.R
.ReadBytes('\n')
45 if n
> 0 && line
[n
-1] == '\n' {
47 if n
> 0 && line
[n
-1] == '\r' {
54 var space
= []byte{' '}
56 // ReadContinuedLine reads a possibly continued line from r,
57 // eliding the final trailing ASCII white space.
58 // Lines after the first are considered continuations if they
59 // begin with a space or tab character. In the returned data,
60 // continuation lines are separated from the previous line
61 // only by a single space: the newline and leading white space
64 // For example, consider this input:
70 // The first call to ReadContinuedLine will return "Line 1 continued..."
71 // and the second will return "Line 2".
73 // A line consisting of only white space is never continued.
75 func (r
*Reader
) ReadContinuedLine() (string, os
.Error
) {
76 line
, err
:= r
.ReadContinuedLineBytes()
77 return string(line
), err
80 // trim returns s with leading and trailing spaces and tabs removed.
81 // It does not assume Unicode or UTF-8.
82 func trim(s
[]byte) []byte {
84 for i
< len(s
) && (s
[i
] == ' ' || s
[i
] == '\t') {
88 for n
> i
&& (s
[n
-1] == ' ' || s
[n
-1] == '\t') {
94 // ReadContinuedLineBytes is like ReadContinuedLine but
95 // returns a []byte instead of a string.
96 func (r
*Reader
) ReadContinuedLineBytes() ([]byte, os
.Error
) {
97 // Read the first line.
98 line
, err
:= r
.ReadLineBytes()
102 if len(line
) == 0 { // blank line - no continuation
107 // Look for a continuation line.
108 c
, err
:= r
.R
.ReadByte()
110 // Delay err until we read the byte next time.
113 if c
!= ' ' && c
!= '\t' {
114 // Not a continuation.
119 // Read continuation lines.
121 // Consume leading spaces; one already gone.
123 c
, err
= r
.R
.ReadByte()
127 if c
!= ' ' && c
!= '\t' {
133 cont
, err
= r
.ReadLineBytes()
135 line
= bytes
.Add(line
, space
)
136 line
= bytes
.Add(line
, cont
)
141 // Check for leading space on next line.
142 if c
, err
= r
.R
.ReadByte(); err
!= nil {
145 if c
!= ' ' && c
!= '\t' {
151 // Delay error until next call.
158 func (r
*Reader
) readCodeLine(expectCode
int) (code
int, continued
bool, message
string, err os
.Error
) {
159 line
, err
:= r
.ReadLine()
163 if len(line
) < 4 || line
[3] != ' ' && line
[3] != '-' {
164 err
= ProtocolError("short response: " + line
)
167 continued
= line
[3] == '-'
168 code
, err
= strconv
.Atoi(line
[0:3])
169 if err
!= nil || code
< 100 {
170 err
= ProtocolError("invalid response code: " + line
)
174 if 1 <= expectCode
&& expectCode
< 10 && code
/100 != expectCode ||
175 10 <= expectCode
&& expectCode
< 100 && code
/10 != expectCode ||
176 100 <= expectCode
&& expectCode
< 1000 && code
!= expectCode
{
177 err
= &Error
{code
, message
}
182 // ReadCodeLine reads a response code line of the form
184 // where code is a 3-digit status code and the message
185 // extends to the rest of the line. An example of such a line is:
186 // 220 plan9.bell-labs.com ESMTP
188 // If the prefix of the status does not match the digits in expectCode,
189 // ReadCodeLine returns with err set to &Error{code, message}.
190 // For example, if expectCode is 31, an error will be returned if
191 // the status is not in the range [310,319].
193 // If the response is multi-line, ReadCodeLine returns an error.
195 // An expectCode <= 0 disables the check of the status code.
197 func (r
*Reader
) ReadCodeLine(expectCode
int) (code
int, message
string, err os
.Error
) {
198 code
, continued
, message
, err
:= r
.readCodeLine(expectCode
)
199 if err
== nil && continued
{
200 err
= ProtocolError("unexpected multi-line response: " + message
)
205 // ReadResponse reads a multi-line response of the form
206 // code-message line 1
207 // code-message line 2
209 // code message line n
210 // where code is a 3-digit status code. Each line should have the same code.
211 // The response is terminated by a line that uses a space between the code and
212 // the message line rather than a dash. Each line in message is separated by
215 // If the prefix of the status does not match the digits in expectCode,
216 // ReadResponse returns with err set to &Error{code, message}.
217 // For example, if expectCode is 31, an error will be returned if
218 // the status is not in the range [310,319].
220 // An expectCode <= 0 disables the check of the status code.
222 func (r
*Reader
) ReadResponse(expectCode
int) (code
int, message
string, err os
.Error
) {
223 code
, continued
, message
, err
:= r
.readCodeLine(expectCode
)
224 for err
== nil && continued
{
226 var moreMessage
string
227 code2
, continued
, moreMessage
, err
= r
.readCodeLine(expectCode
)
229 err
= ProtocolError("status code mismatch: " + strconv
.Itoa(code
) + ", " + strconv
.Itoa(code2
))
231 message
+= "\n" + moreMessage
236 // DotReader returns a new Reader that satisfies Reads using the
237 // decoded text of a dot-encoded block read from r.
238 // The returned Reader is only valid until the next call
241 // Dot encoding is a common framing used for data blocks
242 // in text protcols like SMTP. The data consists of a sequence
243 // of lines, each of which ends in "\r\n". The sequence itself
244 // ends at a line containing just a dot: ".\r\n". Lines beginning
245 // with a dot are escaped with an additional dot to avoid
246 // looking like the end of the sequence.
248 // The decoded form returned by the Reader's Read method
249 // rewrites the "\r\n" line endings into the simpler "\n",
250 // removes leading dot escapes if present, and stops with error os.EOF
251 // after consuming (and discarding) the end-of-sequence line.
252 func (r
*Reader
) DotReader() io
.Reader
{
254 r
.dot
= &dotReader
{r
: r
}
258 type dotReader
struct {
263 // Read satisfies reads by decoding dot-encoded data read from d.r.
264 func (d
*dotReader
) Read(b
[]byte) (n
int, err os
.Error
) {
265 // Run data through a simple state machine to
266 // elide leading dots, rewrite trailing \r\n into \n,
267 // and detect ending .\r\n line.
269 stateBeginLine
= iota // beginning of line; initial state; must be zero
270 stateDot
// read . at beginning of line
271 stateDotCR
// read .\r at beginning of line
272 stateCR
// read \r (possibly at end of line)
273 stateData
// reading data in middle of line
274 stateEOF
// reached .\r\n end marker line
277 for n
< len(b
) && d
.state
!= stateEOF
{
279 c
, err
= br
.ReadByte()
282 err
= io
.ErrUnexpectedEOF
314 // Not part of .\r\n.
315 // Consume leading dot and emit saved \r.
322 d
.state
= stateBeginLine
325 // Not part of \r\n. Emit saved \r
336 d
.state
= stateBeginLine
342 if err
== nil && d
.state
== stateEOF
{
345 if err
!= nil && d
.r
.dot
== d
{
351 // closeDot drains the current DotReader if any,
352 // making sure that it reads until the ending dot line.
353 func (r
*Reader
) closeDot() {
357 buf
:= make([]byte, 128)
359 // When Read reaches EOF or an error,
360 // it will set r.dot == nil.
365 // ReadDotBytes reads a dot-encoding and returns the decoded data.
367 // See the documentation for the DotReader method for details about dot-encoding.
368 func (r
*Reader
) ReadDotBytes() ([]byte, os
.Error
) {
369 return ioutil
.ReadAll(r
.DotReader())
372 // ReadDotLines reads a dot-encoding and returns a slice
373 // containing the decoded lines, with the final \r\n or \n elided from each.
375 // See the documentation for the DotReader method for details about dot-encoding.
376 func (r
*Reader
) ReadDotLines() ([]string, os
.Error
) {
377 // We could use ReadDotBytes and then Split it,
378 // but reading a line at a time avoids needing a
379 // large contiguous block of memory and is simpler.
380 var v vector
.StringVector
384 line
, err
= r
.ReadLine()
387 err
= io
.ErrUnexpectedEOF
392 // Dot by itself marks end; otherwise cut one dot.
393 if len(line
) > 0 && line
[0] == '.' {
404 // ReadMIMEHeader reads a MIME-style header from r.
405 // The header is a sequence of possibly continued Key: Value lines
406 // ending in a blank line.
407 // The returned map m maps CanonicalHeaderKey(key) to a
408 // sequence of values in the same order encountered in the input.
410 // For example, consider this input:
417 // Given that input, ReadMIMEHeader returns the map:
419 // map[string][]string{
420 // "My-Key": []string{"Value 1", "Value 2"},
421 // "Long-Key": []string{"Even Longer Value"},
424 func (r
*Reader
) ReadMIMEHeader() (map[string][]string, os
.Error
) {
425 m
:= make(map[string][]string)
427 kv
, err
:= r
.ReadContinuedLineBytes()
432 // Key ends at first colon; must not have spaces.
433 i
:= bytes
.IndexByte(kv
, ':')
434 if i
< 0 || bytes
.IndexByte(kv
[0:i
], ' ') >= 0 {
435 return m
, ProtocolError("malformed MIME header line: " + string(kv
))
437 key
:= CanonicalHeaderKey(string(kv
[0:i
]))
439 // Skip initial spaces in value.
441 for i
< len(kv
) && (kv
[i
] == ' ' || kv
[i
] == '\t') {
444 value
:= string(kv
[i
:])
446 v
:= vector
.StringVector(m
[key
])
457 // CanonicalHeaderKey returns the canonical format of the
458 // MIME header key s. The canonicalization converts the first
459 // letter and any letter following a hyphen to upper case;
460 // the rest are converted to lowercase. For example, the
461 // canonical key for "accept-encoding" is "Accept-Encoding".
462 func CanonicalHeaderKey(s
string) string {
463 // Quick check for canonical encoding.
465 for i
:= 0; i
< len(s
); i
++ {
467 if needUpper
&& 'a' <= c
&& c
<= 'z' {
470 if !needUpper
&& 'A' <= c
&& c
<= 'Z' {
478 // Canonicalize: first letter upper case
479 // and upper case after each dash.
480 // (Host, User-Agent, If-Modified-Since).
481 // MIME headers are ASCII only, so no Unicode issues.
484 for i
, v
:= range a
{
485 if upper
&& 'a' <= v
&& v
<= 'z' {
488 if !upper
&& 'A' <= v
&& v
<= 'Z' {