libgo/go/net/textproto/reader.go

   1 // Copyright 2010 The Go Authors.  All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package textproto
   6
   7 import (
   8         "bufio"
   9         "bytes"
  10         "container/vector"
  11         "io"
  12         "io/ioutil"
  13         "os"
  14         "strconv"
  15 )
  16
  17 // BUG(rsc): To let callers manage exposure to denial of service
  18 // attacks, Reader should allow them to set and reset a limit on
  19 // the number of bytes read from the connection.
  20
  21 // A Reader implements convenience methods for reading requests
  22 // or responses from a text protocol network connection.
  23 type Reader struct {
  24         R   *bufio.Reader
  25         dot *dotReader
  26 }
  27
  28 // NewReader returns a new Reader reading from r.
  29 func NewReader(r *bufio.Reader) *Reader {
  30         return &Reader{R: r}
  31 }
  32
  33 // ReadLine reads a single line from r,
  34 // eliding the final \n or \r\n from the returned string.
  35 func (r *Reader) ReadLine() (string, os.Error) {
  36         line, err := r.ReadLineBytes()
  37         return string(line), err
  38 }
  39
  40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
  41 func (r *Reader) ReadLineBytes() ([]byte, os.Error) {
  42         r.closeDot()
  43         line, err := r.R.ReadBytes('\n')
  44         n := len(line)
  45         if n > 0 && line[n-1] == '\n' {
  46                 n--
  47                 if n > 0 && line[n-1] == '\r' {
  48                         n--
  49                 }
  50         }
  51         return line[0:n], err
  52 }
  53
  54 var space = []byte{' '}
  55
  56 // ReadContinuedLine reads a possibly continued line from r,
  57 // eliding the final trailing ASCII white space.
  58 // Lines after the first are considered continuations if they
  59 // begin with a space or tab character.  In the returned data,
  60 // continuation lines are separated from the previous line
  61 // only by a single space: the newline and leading white space
  62 // are removed.
  63 //
  64 // For example, consider this input:
  65 //
  66 //      Line 1
  67 //        continued...
  68 //      Line 2
  69 //
  70 // The first call to ReadContinuedLine will return "Line 1 continued..."
  71 // and the second will return "Line 2".
  72 //
  73 // A line consisting of only white space is never continued.
  74 //
  75 func (r *Reader) ReadContinuedLine() (string, os.Error) {
  76         line, err := r.ReadContinuedLineBytes()
  77         return string(line), err
  78 }
  79
  80 // trim returns s with leading and trailing spaces and tabs removed.
  81 // It does not assume Unicode or UTF-8.
  82 func trim(s []byte) []byte {
  83         i := 0
  84         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
  85                 i++
  86         }
  87         n := len(s)
  88         for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
  89                 n--
  90         }
  91         return s[i:n]
  92 }
  93
  94 // ReadContinuedLineBytes is like ReadContinuedLine but
  95 // returns a []byte instead of a string.
  96 func (r *Reader) ReadContinuedLineBytes() ([]byte, os.Error) {
  97         // Read the first line.
  98         line, err := r.ReadLineBytes()
  99         if err != nil {
 100                 return line, err
 101         }
 102         if len(line) == 0 { // blank line - no continuation
 103                 return line, nil
 104         }
 105         line = trim(line)
 106
 107         // Look for a continuation line.
 108         c, err := r.R.ReadByte()
 109         if err != nil {
 110                 // Delay err until we read the byte next time.
 111                 return line, nil
 112         }
 113         if c != ' ' && c != '\t' {
 114                 // Not a continuation.
 115                 r.R.UnreadByte()
 116                 return line, nil
 117         }
 118
 119         // Read continuation lines.
 120         for {
 121                 // Consume leading spaces; one already gone.
 122                 for {
 123                         c, err = r.R.ReadByte()
 124                         if err != nil {
 125                                 break
 126                         }
 127                         if c != ' ' && c != '\t' {
 128                                 r.R.UnreadByte()
 129                                 break
 130                         }
 131                 }
 132                 var cont []byte
 133                 cont, err = r.ReadLineBytes()
 134                 cont = trim(cont)
 135                 line = bytes.Add(line, space)
 136                 line = bytes.Add(line, cont)
 137                 if err != nil {
 138                         break
 139                 }
 140
 141                 // Check for leading space on next line.
 142                 if c, err = r.R.ReadByte(); err != nil {
 143                         break
 144                 }
 145                 if c != ' ' && c != '\t' {
 146                         r.R.UnreadByte()
 147                         break
 148                 }
 149         }
 150
 151         // Delay error until next call.
 152         if len(line) > 0 {
 153                 err = nil
 154         }
 155         return line, err
 156 }
 157
 158 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err os.Error) {
 159         line, err := r.ReadLine()
 160         if err != nil {
 161                 return
 162         }
 163         if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
 164                 err = ProtocolError("short response: " + line)
 165                 return
 166         }
 167         continued = line[3] == '-'
 168         code, err = strconv.Atoi(line[0:3])
 169         if err != nil || code < 100 {
 170                 err = ProtocolError("invalid response code: " + line)
 171                 return
 172         }
 173         message = line[4:]
 174         if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
 175                 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
 176                 100 <= expectCode && expectCode < 1000 && code != expectCode {
 177                 err = &Error{code, message}
 178         }
 179         return
 180 }
 181
 182 // ReadCodeLine reads a response code line of the form
 183 //      code message
 184 // where code is a 3-digit status code and the message
 185 // extends to the rest of the line.  An example of such a line is:
 186 //      220 plan9.bell-labs.com ESMTP
 187 //
 188 // If the prefix of the status does not match the digits in expectCode,
 189 // ReadCodeLine returns with err set to &Error{code, message}.
 190 // For example, if expectCode is 31, an error will be returned if
 191 // the status is not in the range [310,319].
 192 //
 193 // If the response is multi-line, ReadCodeLine returns an error.
 194 //
 195 // An expectCode <= 0 disables the check of the status code.
 196 //
 197 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err os.Error) {
 198         code, continued, message, err := r.readCodeLine(expectCode)
 199         if err == nil && continued {
 200                 err = ProtocolError("unexpected multi-line response: " + message)
 201         }
 202         return
 203 }
 204
 205 // ReadResponse reads a multi-line response of the form
 206 //      code-message line 1
 207 //      code-message line 2
 208 //      ...
 209 //      code message line n
 210 // where code is a 3-digit status code. Each line should have the same code.
 211 // The response is terminated by a line that uses a space between the code and
 212 // the message line rather than a dash. Each line in message is separated by
 213 // a newline (\n).
 214 //
 215 // If the prefix of the status does not match the digits in expectCode,
 216 // ReadResponse returns with err set to &Error{code, message}.
 217 // For example, if expectCode is 31, an error will be returned if
 218 // the status is not in the range [310,319].
 219 //
 220 // An expectCode <= 0 disables the check of the status code.
 221 //
 222 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err os.Error) {
 223         code, continued, message, err := r.readCodeLine(expectCode)
 224         for err == nil && continued {
 225                 var code2 int
 226                 var moreMessage string
 227                 code2, continued, moreMessage, err = r.readCodeLine(expectCode)
 228                 if code != code2 {
 229                         err = ProtocolError("status code mismatch: " + strconv.Itoa(code) + ", " + strconv.Itoa(code2))
 230                 }
 231                 message += "\n" + moreMessage
 232         }
 233         return
 234 }
 235
 236 // DotReader returns a new Reader that satisfies Reads using the
 237 // decoded text of a dot-encoded block read from r.
 238 // The returned Reader is only valid until the next call
 239 // to a method on r.
 240 //
 241 // Dot encoding is a common framing used for data blocks
 242 // in text protcols like SMTP.  The data consists of a sequence
 243 // of lines, each of which ends in "\r\n".  The sequence itself
 244 // ends at a line containing just a dot: ".\r\n".  Lines beginning
 245 // with a dot are escaped with an additional dot to avoid
 246 // looking like the end of the sequence.
 247 //
 248 // The decoded form returned by the Reader's Read method
 249 // rewrites the "\r\n" line endings into the simpler "\n",
 250 // removes leading dot escapes if present, and stops with error os.EOF
 251 // after consuming (and discarding) the end-of-sequence line.
 252 func (r *Reader) DotReader() io.Reader {
 253         r.closeDot()
 254         r.dot = &dotReader{r: r}
 255         return r.dot
 256 }
 257
 258 type dotReader struct {
 259         r     *Reader
 260         state int
 261 }
 262
 263 // Read satisfies reads by decoding dot-encoded data read from d.r.
 264 func (d *dotReader) Read(b []byte) (n int, err os.Error) {
 265         // Run data through a simple state machine to
 266         // elide leading dots, rewrite trailing \r\n into \n,
 267         // and detect ending .\r\n line.
 268         const (
 269                 stateBeginLine = iota // beginning of line; initial state; must be zero
 270                 stateDot              // read . at beginning of line
 271                 stateDotCR            // read .\r at beginning of line
 272                 stateCR               // read \r (possibly at end of line)
 273                 stateData             // reading data in middle of line
 274                 stateEOF              // reached .\r\n end marker line
 275         )
 276         br := d.r.R
 277         for n < len(b) && d.state != stateEOF {
 278                 var c byte
 279                 c, err = br.ReadByte()
 280                 if err != nil {
 281                         if err == os.EOF {
 282                                 err = io.ErrUnexpectedEOF
 283                         }
 284                         break
 285                 }
 286                 switch d.state {
 287                 case stateBeginLine:
 288                         if c == '.' {
 289                                 d.state = stateDot
 290                                 continue
 291                         }
 292                         if c == '\r' {
 293                                 d.state = stateCR
 294                                 continue
 295                         }
 296                         d.state = stateData
 297
 298                 case stateDot:
 299                         if c == '\r' {
 300                                 d.state = stateDotCR
 301                                 continue
 302                         }
 303                         if c == '\n' {
 304                                 d.state = stateEOF
 305                                 continue
 306                         }
 307                         d.state = stateData
 308
 309                 case stateDotCR:
 310                         if c == '\n' {
 311                                 d.state = stateEOF
 312                                 continue
 313                         }
 314                         // Not part of .\r\n.
 315                         // Consume leading dot and emit saved \r.
 316                         br.UnreadByte()
 317                         c = '\r'
 318                         d.state = stateData
 319
 320                 case stateCR:
 321                         if c == '\n' {
 322                                 d.state = stateBeginLine
 323                                 break
 324                         }
 325                         // Not part of \r\n.  Emit saved \r
 326                         br.UnreadByte()
 327                         c = '\r'
 328                         d.state = stateData
 329
 330                 case stateData:
 331                         if c == '\r' {
 332                                 d.state = stateCR
 333                                 continue
 334                         }
 335                         if c == '\n' {
 336                                 d.state = stateBeginLine
 337                         }
 338                 }
 339                 b[n] = c
 340                 n++
 341         }
 342         if err == nil && d.state == stateEOF {
 343                 err = os.EOF
 344         }
 345         if err != nil && d.r.dot == d {
 346                 d.r.dot = nil
 347         }
 348         return
 349 }
 350
 351 // closeDot drains the current DotReader if any,
 352 // making sure that it reads until the ending dot line.
 353 func (r *Reader) closeDot() {
 354         if r.dot == nil {
 355                 return
 356         }
 357         buf := make([]byte, 128)
 358         for r.dot != nil {
 359                 // When Read reaches EOF or an error,
 360                 // it will set r.dot == nil.
 361                 r.dot.Read(buf)
 362         }
 363 }
 364
 365 // ReadDotBytes reads a dot-encoding and returns the decoded data.
 366 //
 367 // See the documentation for the DotReader method for details about dot-encoding.
 368 func (r *Reader) ReadDotBytes() ([]byte, os.Error) {
 369         return ioutil.ReadAll(r.DotReader())
 370 }
 371
 372 // ReadDotLines reads a dot-encoding and returns a slice
 373 // containing the decoded lines, with the final \r\n or \n elided from each.
 374 //
 375 // See the documentation for the DotReader method for details about dot-encoding.
 376 func (r *Reader) ReadDotLines() ([]string, os.Error) {
 377         // We could use ReadDotBytes and then Split it,
 378         // but reading a line at a time avoids needing a
 379         // large contiguous block of memory and is simpler.
 380         var v vector.StringVector
 381         var err os.Error
 382         for {
 383                 var line string
 384                 line, err = r.ReadLine()
 385                 if err != nil {
 386                         if err == os.EOF {
 387                                 err = io.ErrUnexpectedEOF
 388                         }
 389                         break
 390                 }
 391
 392                 // Dot by itself marks end; otherwise cut one dot.
 393                 if len(line) > 0 && line[0] == '.' {
 394                         if len(line) == 1 {
 395                                 break
 396                         }
 397                         line = line[1:]
 398                 }
 399                 v.Push(line)
 400         }
 401         return v, err
 402 }
 403
 404 // ReadMIMEHeader reads a MIME-style header from r.
 405 // The header is a sequence of possibly continued Key: Value lines
 406 // ending in a blank line.
 407 // The returned map m maps CanonicalHeaderKey(key) to a
 408 // sequence of values in the same order encountered in the input.
 409 //
 410 // For example, consider this input:
 411 //
 412 //      My-Key: Value 1
 413 //      Long-Key: Even
 414 //             Longer Value
 415 //      My-Key: Value 2
 416 //
 417 // Given that input, ReadMIMEHeader returns the map:
 418 //
 419 //      map[string][]string{
 420 //              "My-Key": []string{"Value 1", "Value 2"},
 421 //              "Long-Key": []string{"Even Longer Value"},
 422 //      }
 423 //
 424 func (r *Reader) ReadMIMEHeader() (map[string][]string, os.Error) {
 425         m := make(map[string][]string)
 426         for {
 427                 kv, err := r.ReadContinuedLineBytes()
 428                 if len(kv) == 0 {
 429                         return m, err
 430                 }
 431
 432                 // Key ends at first colon; must not have spaces.
 433                 i := bytes.IndexByte(kv, ':')
 434                 if i < 0 || bytes.IndexByte(kv[0:i], ' ') >= 0 {
 435                         return m, ProtocolError("malformed MIME header line: " + string(kv))
 436                 }
 437                 key := CanonicalHeaderKey(string(kv[0:i]))
 438
 439                 // Skip initial spaces in value.
 440                 i++ // skip colon
 441                 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
 442                         i++
 443                 }
 444                 value := string(kv[i:])
 445
 446                 v := vector.StringVector(m[key])
 447                 v.Push(value)
 448                 m[key] = v
 449
 450                 if err != nil {
 451                         return m, err
 452                 }
 453         }
 454         panic("unreachable")
 455 }
 456
 457 // CanonicalHeaderKey returns the canonical format of the
 458 // MIME header key s.  The canonicalization converts the first
 459 // letter and any letter following a hyphen to upper case;
 460 // the rest are converted to lowercase.  For example, the
 461 // canonical key for "accept-encoding" is "Accept-Encoding".
 462 func CanonicalHeaderKey(s string) string {
 463         // Quick check for canonical encoding.
 464         needUpper := true
 465         for i := 0; i < len(s); i++ {
 466                 c := s[i]
 467                 if needUpper && 'a' <= c && c <= 'z' {
 468                         goto MustRewrite
 469                 }
 470                 if !needUpper && 'A' <= c && c <= 'Z' {
 471                         goto MustRewrite
 472                 }
 473                 needUpper = c == '-'
 474         }
 475         return s
 476
 477 MustRewrite:
 478         // Canonicalize: first letter upper case
 479         // and upper case after each dash.
 480         // (Host, User-Agent, If-Modified-Since).
 481         // MIME headers are ASCII only, so no Unicode issues.
 482         a := []byte(s)
 483         upper := true
 484         for i, v := range a {
 485                 if upper && 'a' <= v && v <= 'z' {
 486                         a[i] = v + 'A' - 'a'
 487                 }
 488                 if !upper && 'A' <= v && v <= 'Z' {
 489                         a[i] = v + 'a' - 'A'
 490                 }
 491                 upper = v == '-'
 492         }
 493         return string(a)
 494 }