Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / libgo / go / net / textproto / reader.go
blobaad25539d4e70aa31dc5f5fdc5d97f3a8db7fb4a
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package textproto
7 import (
8 "bufio"
9 "bytes"
10 "container/vector"
11 "io"
12 "io/ioutil"
13 "os"
14 "strconv"
17 // BUG(rsc): To let callers manage exposure to denial of service
18 // attacks, Reader should allow them to set and reset a limit on
19 // the number of bytes read from the connection.
21 // A Reader implements convenience methods for reading requests
22 // or responses from a text protocol network connection.
23 type Reader struct {
24 R *bufio.Reader
25 dot *dotReader
28 // NewReader returns a new Reader reading from r.
29 func NewReader(r *bufio.Reader) *Reader {
30 return &Reader{R: r}
33 // ReadLine reads a single line from r,
34 // eliding the final \n or \r\n from the returned string.
35 func (r *Reader) ReadLine() (string, os.Error) {
36 line, err := r.ReadLineBytes()
37 return string(line), err
40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
41 func (r *Reader) ReadLineBytes() ([]byte, os.Error) {
42 r.closeDot()
43 line, err := r.R.ReadBytes('\n')
44 n := len(line)
45 if n > 0 && line[n-1] == '\n' {
46 n--
47 if n > 0 && line[n-1] == '\r' {
48 n--
51 return line[0:n], err
54 var space = []byte{' '}
56 // ReadContinuedLine reads a possibly continued line from r,
57 // eliding the final trailing ASCII white space.
58 // Lines after the first are considered continuations if they
59 // begin with a space or tab character. In the returned data,
60 // continuation lines are separated from the previous line
61 // only by a single space: the newline and leading white space
62 // are removed.
64 // For example, consider this input:
66 // Line 1
67 // continued...
68 // Line 2
70 // The first call to ReadContinuedLine will return "Line 1 continued..."
71 // and the second will return "Line 2".
73 // A line consisting of only white space is never continued.
75 func (r *Reader) ReadContinuedLine() (string, os.Error) {
76 line, err := r.ReadContinuedLineBytes()
77 return string(line), err
80 // trim returns s with leading and trailing spaces and tabs removed.
81 // It does not assume Unicode or UTF-8.
82 func trim(s []byte) []byte {
83 i := 0
84 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
85 i++
87 n := len(s)
88 for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
89 n--
91 return s[i:n]
94 // ReadContinuedLineBytes is like ReadContinuedLine but
95 // returns a []byte instead of a string.
96 func (r *Reader) ReadContinuedLineBytes() ([]byte, os.Error) {
97 // Read the first line.
98 line, err := r.ReadLineBytes()
99 if err != nil {
100 return line, err
102 if len(line) == 0 { // blank line - no continuation
103 return line, nil
105 line = trim(line)
107 // Look for a continuation line.
108 c, err := r.R.ReadByte()
109 if err != nil {
110 // Delay err until we read the byte next time.
111 return line, nil
113 if c != ' ' && c != '\t' {
114 // Not a continuation.
115 r.R.UnreadByte()
116 return line, nil
119 // Read continuation lines.
120 for {
121 // Consume leading spaces; one already gone.
122 for {
123 c, err = r.R.ReadByte()
124 if err != nil {
125 break
127 if c != ' ' && c != '\t' {
128 r.R.UnreadByte()
129 break
132 var cont []byte
133 cont, err = r.ReadLineBytes()
134 cont = trim(cont)
135 line = bytes.Add(line, space)
136 line = bytes.Add(line, cont)
137 if err != nil {
138 break
141 // Check for leading space on next line.
142 if c, err = r.R.ReadByte(); err != nil {
143 break
145 if c != ' ' && c != '\t' {
146 r.R.UnreadByte()
147 break
151 // Delay error until next call.
152 if len(line) > 0 {
153 err = nil
155 return line, err
158 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err os.Error) {
159 line, err := r.ReadLine()
160 if err != nil {
161 return
163 if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
164 err = ProtocolError("short response: " + line)
165 return
167 continued = line[3] == '-'
168 code, err = strconv.Atoi(line[0:3])
169 if err != nil || code < 100 {
170 err = ProtocolError("invalid response code: " + line)
171 return
173 message = line[4:]
174 if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
175 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
176 100 <= expectCode && expectCode < 1000 && code != expectCode {
177 err = &Error{code, message}
179 return
182 // ReadCodeLine reads a response code line of the form
183 // code message
184 // where code is a 3-digit status code and the message
185 // extends to the rest of the line. An example of such a line is:
186 // 220 plan9.bell-labs.com ESMTP
188 // If the prefix of the status does not match the digits in expectCode,
189 // ReadCodeLine returns with err set to &Error{code, message}.
190 // For example, if expectCode is 31, an error will be returned if
191 // the status is not in the range [310,319].
193 // If the response is multi-line, ReadCodeLine returns an error.
195 // An expectCode <= 0 disables the check of the status code.
197 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err os.Error) {
198 code, continued, message, err := r.readCodeLine(expectCode)
199 if err == nil && continued {
200 err = ProtocolError("unexpected multi-line response: " + message)
202 return
205 // ReadResponse reads a multi-line response of the form
206 // code-message line 1
207 // code-message line 2
208 // ...
209 // code message line n
210 // where code is a 3-digit status code. Each line should have the same code.
211 // The response is terminated by a line that uses a space between the code and
212 // the message line rather than a dash. Each line in message is separated by
213 // a newline (\n).
215 // If the prefix of the status does not match the digits in expectCode,
216 // ReadResponse returns with err set to &Error{code, message}.
217 // For example, if expectCode is 31, an error will be returned if
218 // the status is not in the range [310,319].
220 // An expectCode <= 0 disables the check of the status code.
222 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err os.Error) {
223 code, continued, message, err := r.readCodeLine(expectCode)
224 for err == nil && continued {
225 var code2 int
226 var moreMessage string
227 code2, continued, moreMessage, err = r.readCodeLine(expectCode)
228 if code != code2 {
229 err = ProtocolError("status code mismatch: " + strconv.Itoa(code) + ", " + strconv.Itoa(code2))
231 message += "\n" + moreMessage
233 return
236 // DotReader returns a new Reader that satisfies Reads using the
237 // decoded text of a dot-encoded block read from r.
238 // The returned Reader is only valid until the next call
239 // to a method on r.
241 // Dot encoding is a common framing used for data blocks
242 // in text protcols like SMTP. The data consists of a sequence
243 // of lines, each of which ends in "\r\n". The sequence itself
244 // ends at a line containing just a dot: ".\r\n". Lines beginning
245 // with a dot are escaped with an additional dot to avoid
246 // looking like the end of the sequence.
248 // The decoded form returned by the Reader's Read method
249 // rewrites the "\r\n" line endings into the simpler "\n",
250 // removes leading dot escapes if present, and stops with error os.EOF
251 // after consuming (and discarding) the end-of-sequence line.
252 func (r *Reader) DotReader() io.Reader {
253 r.closeDot()
254 r.dot = &dotReader{r: r}
255 return r.dot
258 type dotReader struct {
259 r *Reader
260 state int
263 // Read satisfies reads by decoding dot-encoded data read from d.r.
264 func (d *dotReader) Read(b []byte) (n int, err os.Error) {
265 // Run data through a simple state machine to
266 // elide leading dots, rewrite trailing \r\n into \n,
267 // and detect ending .\r\n line.
268 const (
269 stateBeginLine = iota // beginning of line; initial state; must be zero
270 stateDot // read . at beginning of line
271 stateDotCR // read .\r at beginning of line
272 stateCR // read \r (possibly at end of line)
273 stateData // reading data in middle of line
274 stateEOF // reached .\r\n end marker line
276 br := d.r.R
277 for n < len(b) && d.state != stateEOF {
278 var c byte
279 c, err = br.ReadByte()
280 if err != nil {
281 if err == os.EOF {
282 err = io.ErrUnexpectedEOF
284 break
286 switch d.state {
287 case stateBeginLine:
288 if c == '.' {
289 d.state = stateDot
290 continue
292 if c == '\r' {
293 d.state = stateCR
294 continue
296 d.state = stateData
298 case stateDot:
299 if c == '\r' {
300 d.state = stateDotCR
301 continue
303 if c == '\n' {
304 d.state = stateEOF
305 continue
307 d.state = stateData
309 case stateDotCR:
310 if c == '\n' {
311 d.state = stateEOF
312 continue
314 // Not part of .\r\n.
315 // Consume leading dot and emit saved \r.
316 br.UnreadByte()
317 c = '\r'
318 d.state = stateData
320 case stateCR:
321 if c == '\n' {
322 d.state = stateBeginLine
323 break
325 // Not part of \r\n. Emit saved \r
326 br.UnreadByte()
327 c = '\r'
328 d.state = stateData
330 case stateData:
331 if c == '\r' {
332 d.state = stateCR
333 continue
335 if c == '\n' {
336 d.state = stateBeginLine
339 b[n] = c
342 if err == nil && d.state == stateEOF {
343 err = os.EOF
345 if err != nil && d.r.dot == d {
346 d.r.dot = nil
348 return
351 // closeDot drains the current DotReader if any,
352 // making sure that it reads until the ending dot line.
353 func (r *Reader) closeDot() {
354 if r.dot == nil {
355 return
357 buf := make([]byte, 128)
358 for r.dot != nil {
359 // When Read reaches EOF or an error,
360 // it will set r.dot == nil.
361 r.dot.Read(buf)
365 // ReadDotBytes reads a dot-encoding and returns the decoded data.
367 // See the documentation for the DotReader method for details about dot-encoding.
368 func (r *Reader) ReadDotBytes() ([]byte, os.Error) {
369 return ioutil.ReadAll(r.DotReader())
372 // ReadDotLines reads a dot-encoding and returns a slice
373 // containing the decoded lines, with the final \r\n or \n elided from each.
375 // See the documentation for the DotReader method for details about dot-encoding.
376 func (r *Reader) ReadDotLines() ([]string, os.Error) {
377 // We could use ReadDotBytes and then Split it,
378 // but reading a line at a time avoids needing a
379 // large contiguous block of memory and is simpler.
380 var v vector.StringVector
381 var err os.Error
382 for {
383 var line string
384 line, err = r.ReadLine()
385 if err != nil {
386 if err == os.EOF {
387 err = io.ErrUnexpectedEOF
389 break
392 // Dot by itself marks end; otherwise cut one dot.
393 if len(line) > 0 && line[0] == '.' {
394 if len(line) == 1 {
395 break
397 line = line[1:]
399 v.Push(line)
401 return v, err
404 // ReadMIMEHeader reads a MIME-style header from r.
405 // The header is a sequence of possibly continued Key: Value lines
406 // ending in a blank line.
407 // The returned map m maps CanonicalHeaderKey(key) to a
408 // sequence of values in the same order encountered in the input.
410 // For example, consider this input:
412 // My-Key: Value 1
413 // Long-Key: Even
414 // Longer Value
415 // My-Key: Value 2
417 // Given that input, ReadMIMEHeader returns the map:
419 // map[string][]string{
420 // "My-Key": []string{"Value 1", "Value 2"},
421 // "Long-Key": []string{"Even Longer Value"},
422 // }
424 func (r *Reader) ReadMIMEHeader() (map[string][]string, os.Error) {
425 m := make(map[string][]string)
426 for {
427 kv, err := r.ReadContinuedLineBytes()
428 if len(kv) == 0 {
429 return m, err
432 // Key ends at first colon; must not have spaces.
433 i := bytes.IndexByte(kv, ':')
434 if i < 0 || bytes.IndexByte(kv[0:i], ' ') >= 0 {
435 return m, ProtocolError("malformed MIME header line: " + string(kv))
437 key := CanonicalHeaderKey(string(kv[0:i]))
439 // Skip initial spaces in value.
440 i++ // skip colon
441 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
444 value := string(kv[i:])
446 v := vector.StringVector(m[key])
447 v.Push(value)
448 m[key] = v
450 if err != nil {
451 return m, err
454 panic("unreachable")
457 // CanonicalHeaderKey returns the canonical format of the
458 // MIME header key s. The canonicalization converts the first
459 // letter and any letter following a hyphen to upper case;
460 // the rest are converted to lowercase. For example, the
461 // canonical key for "accept-encoding" is "Accept-Encoding".
462 func CanonicalHeaderKey(s string) string {
463 // Quick check for canonical encoding.
464 needUpper := true
465 for i := 0; i < len(s); i++ {
466 c := s[i]
467 if needUpper && 'a' <= c && c <= 'z' {
468 goto MustRewrite
470 if !needUpper && 'A' <= c && c <= 'Z' {
471 goto MustRewrite
473 needUpper = c == '-'
475 return s
477 MustRewrite:
478 // Canonicalize: first letter upper case
479 // and upper case after each dash.
480 // (Host, User-Agent, If-Modified-Since).
481 // MIME headers are ASCII only, so no Unicode issues.
482 a := []byte(s)
483 upper := true
484 for i, v := range a {
485 if upper && 'a' <= v && v <= 'z' {
486 a[i] = v + 'A' - 'a'
488 if !upper && 'A' <= v && v <= 'Z' {
489 a[i] = v + 'a' - 'A'
491 upper = v == '-'
493 return string(a)