libgo/go/net/textproto/reader.go

   1 // Copyright 2010 The Go Authors.  All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package textproto
   6
   7 import (
   8         "bufio"
   9         "bytes"
  10         "io"
  11         "io/ioutil"
  12         "strconv"
  13         "strings"
  14 )
  15
  16 // BUG(rsc): To let callers manage exposure to denial of service
  17 // attacks, Reader should allow them to set and reset a limit on
  18 // the number of bytes read from the connection.
  19
  20 // A Reader implements convenience methods for reading requests
  21 // or responses from a text protocol network connection.
  22 type Reader struct {
  23         R   *bufio.Reader
  24         dot *dotReader
  25         buf []byte // a re-usable buffer for readContinuedLineSlice
  26 }
  27
  28 // NewReader returns a new Reader reading from r.
  29 func NewReader(r *bufio.Reader) *Reader {
  30         return &Reader{R: r}
  31 }
  32
  33 // ReadLine reads a single line from r,
  34 // eliding the final \n or \r\n from the returned string.
  35 func (r *Reader) ReadLine() (string, error) {
  36         line, err := r.readLineSlice()
  37         return string(line), err
  38 }
  39
  40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
  41 func (r *Reader) ReadLineBytes() ([]byte, error) {
  42         line, err := r.readLineSlice()
  43         if line != nil {
  44                 buf := make([]byte, len(line))
  45                 copy(buf, line)
  46                 line = buf
  47         }
  48         return line, err
  49 }
  50
  51 func (r *Reader) readLineSlice() ([]byte, error) {
  52         r.closeDot()
  53         var line []byte
  54         for {
  55                 l, more, err := r.R.ReadLine()
  56                 if err != nil {
  57                         return nil, err
  58                 }
  59                 // Avoid the copy if the first call produced a full line.
  60                 if line == nil && !more {
  61                         return l, nil
  62                 }
  63                 line = append(line, l...)
  64                 if !more {
  65                         break
  66                 }
  67         }
  68         return line, nil
  69 }
  70
  71 // ReadContinuedLine reads a possibly continued line from r,
  72 // eliding the final trailing ASCII white space.
  73 // Lines after the first are considered continuations if they
  74 // begin with a space or tab character.  In the returned data,
  75 // continuation lines are separated from the previous line
  76 // only by a single space: the newline and leading white space
  77 // are removed.
  78 //
  79 // For example, consider this input:
  80 //
  81 //      Line 1
  82 //        continued...
  83 //      Line 2
  84 //
  85 // The first call to ReadContinuedLine will return "Line 1 continued..."
  86 // and the second will return "Line 2".
  87 //
  88 // A line consisting of only white space is never continued.
  89 //
  90 func (r *Reader) ReadContinuedLine() (string, error) {
  91         line, err := r.readContinuedLineSlice()
  92         return string(line), err
  93 }
  94
  95 // trim returns s with leading and trailing spaces and tabs removed.
  96 // It does not assume Unicode or UTF-8.
  97 func trim(s []byte) []byte {
  98         i := 0
  99         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
 100                 i++
 101         }
 102         n := len(s)
 103         for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
 104                 n--
 105         }
 106         return s[i:n]
 107 }
 108
 109 // ReadContinuedLineBytes is like ReadContinuedLine but
 110 // returns a []byte instead of a string.
 111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
 112         line, err := r.readContinuedLineSlice()
 113         if line != nil {
 114                 buf := make([]byte, len(line))
 115                 copy(buf, line)
 116                 line = buf
 117         }
 118         return line, err
 119 }
 120
 121 func (r *Reader) readContinuedLineSlice() ([]byte, error) {
 122         // Read the first line.
 123         line, err := r.readLineSlice()
 124         if err != nil {
 125                 return nil, err
 126         }
 127         if len(line) == 0 { // blank line - no continuation
 128                 return line, nil
 129         }
 130
 131         // Optimistically assume that we have started to buffer the next line
 132         // and it starts with an ASCII letter (the next header key), so we can
 133         // avoid copying that buffered data around in memory and skipping over
 134         // non-existent whitespace.
 135         if r.R.Buffered() > 1 {
 136                 peek, err := r.R.Peek(1)
 137                 if err == nil && isASCIILetter(peek[0]) {
 138                         return trim(line), nil
 139                 }
 140         }
 141
 142         // ReadByte or the next readLineSlice will flush the read buffer;
 143         // copy the slice into buf.
 144         r.buf = append(r.buf[:0], trim(line)...)
 145
 146         // Read continuation lines.
 147         for r.skipSpace() > 0 {
 148                 line, err := r.readLineSlice()
 149                 if err != nil {
 150                         break
 151                 }
 152                 r.buf = append(r.buf, ' ')
 153                 r.buf = append(r.buf, line...)
 154         }
 155         return r.buf, nil
 156 }
 157
 158 // skipSpace skips R over all spaces and returns the number of bytes skipped.
 159 func (r *Reader) skipSpace() int {
 160         n := 0
 161         for {
 162                 c, err := r.R.ReadByte()
 163                 if err != nil {
 164                         // Bufio will keep err until next read.
 165                         break
 166                 }
 167                 if c != ' ' && c != '\t' {
 168                         r.R.UnreadByte()
 169                         break
 170                 }
 171                 n++
 172         }
 173         return n
 174 }
 175
 176 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
 177         line, err := r.ReadLine()
 178         if err != nil {
 179                 return
 180         }
 181         return parseCodeLine(line, expectCode)
 182 }
 183
 184 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
 185         if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
 186                 err = ProtocolError("short response: " + line)
 187                 return
 188         }
 189         continued = line[3] == '-'
 190         code, err = strconv.Atoi(line[0:3])
 191         if err != nil || code < 100 {
 192                 err = ProtocolError("invalid response code: " + line)
 193                 return
 194         }
 195         message = line[4:]
 196         if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
 197                 10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
 198                 100 <= expectCode && expectCode < 1000 && code != expectCode {
 199                 err = &Error{code, message}
 200         }
 201         return
 202 }
 203
 204 // ReadCodeLine reads a response code line of the form
 205 //      code message
 206 // where code is a three-digit status code and the message
 207 // extends to the rest of the line.  An example of such a line is:
 208 //      220 plan9.bell-labs.com ESMTP
 209 //
 210 // If the prefix of the status does not match the digits in expectCode,
 211 // ReadCodeLine returns with err set to &Error{code, message}.
 212 // For example, if expectCode is 31, an error will be returned if
 213 // the status is not in the range [310,319].
 214 //
 215 // If the response is multi-line, ReadCodeLine returns an error.
 216 //
 217 // An expectCode <= 0 disables the check of the status code.
 218 //
 219 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
 220         code, continued, message, err := r.readCodeLine(expectCode)
 221         if err == nil && continued {
 222                 err = ProtocolError("unexpected multi-line response: " + message)
 223         }
 224         return
 225 }
 226
 227 // ReadResponse reads a multi-line response of the form:
 228 //
 229 //      code-message line 1
 230 //      code-message line 2
 231 //      ...
 232 //      code message line n
 233 //
 234 // where code is a three-digit status code. The first line starts with the
 235 // code and a hyphen. The response is terminated by a line that starts
 236 // with the same code followed by a space. Each line in message is
 237 // separated by a newline (\n).
 238 //
 239 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
 240 // details.
 241 //
 242 // If the prefix of the status does not match the digits in expectCode,
 243 // ReadResponse returns with err set to &Error{code, message}.
 244 // For example, if expectCode is 31, an error will be returned if
 245 // the status is not in the range [310,319].
 246 //
 247 // An expectCode <= 0 disables the check of the status code.
 248 //
 249 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
 250         code, continued, message, err := r.readCodeLine(expectCode)
 251         for err == nil && continued {
 252                 line, err := r.ReadLine()
 253                 if err != nil {
 254                         return 0, "", err
 255                 }
 256
 257                 var code2 int
 258                 var moreMessage string
 259                 code2, continued, moreMessage, err = parseCodeLine(line, expectCode)
 260                 if err != nil || code2 != code {
 261                         message += "\n" + strings.TrimRight(line, "\r\n")
 262                         continued = true
 263                         continue
 264                 }
 265                 message += "\n" + moreMessage
 266         }
 267         return
 268 }
 269
 270 // DotReader returns a new Reader that satisfies Reads using the
 271 // decoded text of a dot-encoded block read from r.
 272 // The returned Reader is only valid until the next call
 273 // to a method on r.
 274 //
 275 // Dot encoding is a common framing used for data blocks
 276 // in text protocols such as SMTP.  The data consists of a sequence
 277 // of lines, each of which ends in "\r\n".  The sequence itself
 278 // ends at a line containing just a dot: ".\r\n".  Lines beginning
 279 // with a dot are escaped with an additional dot to avoid
 280 // looking like the end of the sequence.
 281 //
 282 // The decoded form returned by the Reader's Read method
 283 // rewrites the "\r\n" line endings into the simpler "\n",
 284 // removes leading dot escapes if present, and stops with error io.EOF
 285 // after consuming (and discarding) the end-of-sequence line.
 286 func (r *Reader) DotReader() io.Reader {
 287         r.closeDot()
 288         r.dot = &dotReader{r: r}
 289         return r.dot
 290 }
 291
 292 type dotReader struct {
 293         r     *Reader
 294         state int
 295 }
 296
 297 // Read satisfies reads by decoding dot-encoded data read from d.r.
 298 func (d *dotReader) Read(b []byte) (n int, err error) {
 299         // Run data through a simple state machine to
 300         // elide leading dots, rewrite trailing \r\n into \n,
 301         // and detect ending .\r\n line.
 302         const (
 303                 stateBeginLine = iota // beginning of line; initial state; must be zero
 304                 stateDot              // read . at beginning of line
 305                 stateDotCR            // read .\r at beginning of line
 306                 stateCR               // read \r (possibly at end of line)
 307                 stateData             // reading data in middle of line
 308                 stateEOF              // reached .\r\n end marker line
 309         )
 310         br := d.r.R
 311         for n < len(b) && d.state != stateEOF {
 312                 var c byte
 313                 c, err = br.ReadByte()
 314                 if err != nil {
 315                         if err == io.EOF {
 316                                 err = io.ErrUnexpectedEOF
 317                         }
 318                         break
 319                 }
 320                 switch d.state {
 321                 case stateBeginLine:
 322                         if c == '.' {
 323                                 d.state = stateDot
 324                                 continue
 325                         }
 326                         if c == '\r' {
 327                                 d.state = stateCR
 328                                 continue
 329                         }
 330                         d.state = stateData
 331
 332                 case stateDot:
 333                         if c == '\r' {
 334                                 d.state = stateDotCR
 335                                 continue
 336                         }
 337                         if c == '\n' {
 338                                 d.state = stateEOF
 339                                 continue
 340                         }
 341                         d.state = stateData
 342
 343                 case stateDotCR:
 344                         if c == '\n' {
 345                                 d.state = stateEOF
 346                                 continue
 347                         }
 348                         // Not part of .\r\n.
 349                         // Consume leading dot and emit saved \r.
 350                         br.UnreadByte()
 351                         c = '\r'
 352                         d.state = stateData
 353
 354                 case stateCR:
 355                         if c == '\n' {
 356                                 d.state = stateBeginLine
 357                                 break
 358                         }
 359                         // Not part of \r\n.  Emit saved \r
 360                         br.UnreadByte()
 361                         c = '\r'
 362                         d.state = stateData
 363
 364                 case stateData:
 365                         if c == '\r' {
 366                                 d.state = stateCR
 367                                 continue
 368                         }
 369                         if c == '\n' {
 370                                 d.state = stateBeginLine
 371                         }
 372                 }
 373                 b[n] = c
 374                 n++
 375         }
 376         if err == nil && d.state == stateEOF {
 377                 err = io.EOF
 378         }
 379         if err != nil && d.r.dot == d {
 380                 d.r.dot = nil
 381         }
 382         return
 383 }
 384
 385 // closeDot drains the current DotReader if any,
 386 // making sure that it reads until the ending dot line.
 387 func (r *Reader) closeDot() {
 388         if r.dot == nil {
 389                 return
 390         }
 391         buf := make([]byte, 128)
 392         for r.dot != nil {
 393                 // When Read reaches EOF or an error,
 394                 // it will set r.dot == nil.
 395                 r.dot.Read(buf)
 396         }
 397 }
 398
 399 // ReadDotBytes reads a dot-encoding and returns the decoded data.
 400 //
 401 // See the documentation for the DotReader method for details about dot-encoding.
 402 func (r *Reader) ReadDotBytes() ([]byte, error) {
 403         return ioutil.ReadAll(r.DotReader())
 404 }
 405
 406 // ReadDotLines reads a dot-encoding and returns a slice
 407 // containing the decoded lines, with the final \r\n or \n elided from each.
 408 //
 409 // See the documentation for the DotReader method for details about dot-encoding.
 410 func (r *Reader) ReadDotLines() ([]string, error) {
 411         // We could use ReadDotBytes and then Split it,
 412         // but reading a line at a time avoids needing a
 413         // large contiguous block of memory and is simpler.
 414         var v []string
 415         var err error
 416         for {
 417                 var line string
 418                 line, err = r.ReadLine()
 419                 if err != nil {
 420                         if err == io.EOF {
 421                                 err = io.ErrUnexpectedEOF
 422                         }
 423                         break
 424                 }
 425
 426                 // Dot by itself marks end; otherwise cut one dot.
 427                 if len(line) > 0 && line[0] == '.' {
 428                         if len(line) == 1 {
 429                                 break
 430                         }
 431                         line = line[1:]
 432                 }
 433                 v = append(v, line)
 434         }
 435         return v, err
 436 }
 437
 438 // ReadMIMEHeader reads a MIME-style header from r.
 439 // The header is a sequence of possibly continued Key: Value lines
 440 // ending in a blank line.
 441 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
 442 // sequence of values in the same order encountered in the input.
 443 //
 444 // For example, consider this input:
 445 //
 446 //      My-Key: Value 1
 447 //      Long-Key: Even
 448 //             Longer Value
 449 //      My-Key: Value 2
 450 //
 451 // Given that input, ReadMIMEHeader returns the map:
 452 //
 453 //      map[string][]string{
 454 //              "My-Key": {"Value 1", "Value 2"},
 455 //              "Long-Key": {"Even Longer Value"},
 456 //      }
 457 //
 458 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
 459         // Avoid lots of small slice allocations later by allocating one
 460         // large one ahead of time which we'll cut up into smaller
 461         // slices. If this isn't big enough later, we allocate small ones.
 462         var strs []string
 463         hint := r.upcomingHeaderNewlines()
 464         if hint > 0 {
 465                 strs = make([]string, hint)
 466         }
 467
 468         m := make(MIMEHeader, hint)
 469         for {
 470                 kv, err := r.readContinuedLineSlice()
 471                 if len(kv) == 0 {
 472                         return m, err
 473                 }
 474
 475                 // Key ends at first colon; should not have spaces but
 476                 // they appear in the wild, violating specs, so we
 477                 // remove them if present.
 478                 i := bytes.IndexByte(kv, ':')
 479                 if i < 0 {
 480                         return m, ProtocolError("malformed MIME header line: " + string(kv))
 481                 }
 482                 endKey := i
 483                 for endKey > 0 && kv[endKey-1] == ' ' {
 484                         endKey--
 485                 }
 486                 key := canonicalMIMEHeaderKey(kv[:endKey])
 487
 488                 // Skip initial spaces in value.
 489                 i++ // skip colon
 490                 for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
 491                         i++
 492                 }
 493                 value := string(kv[i:])
 494
 495                 vv := m[key]
 496                 if vv == nil && len(strs) > 0 {
 497                         // More than likely this will be a single-element key.
 498                         // Most headers aren't multi-valued.
 499                         // Set the capacity on strs[0] to 1, so any future append
 500                         // won't extend the slice into the other strings.
 501                         vv, strs = strs[:1:1], strs[1:]
 502                         vv[0] = value
 503                         m[key] = vv
 504                 } else {
 505                         m[key] = append(vv, value)
 506                 }
 507
 508                 if err != nil {
 509                         return m, err
 510                 }
 511         }
 512 }
 513
 514 // upcomingHeaderNewlines returns an approximation of the number of newlines
 515 // that will be in this header. If it gets confused, it returns 0.
 516 func (r *Reader) upcomingHeaderNewlines() (n int) {
 517         // Try to determine the 'hint' size.
 518         r.R.Peek(1) // force a buffer load if empty
 519         s := r.R.Buffered()
 520         if s == 0 {
 521                 return
 522         }
 523         peek, _ := r.R.Peek(s)
 524         for len(peek) > 0 {
 525                 i := bytes.IndexByte(peek, '\n')
 526                 if i < 3 {
 527                         // Not present (-1) or found within the next few bytes,
 528                         // implying we're at the end ("\r\n\r\n" or "\n\n")
 529                         return
 530                 }
 531                 n++
 532                 peek = peek[i+1:]
 533         }
 534         return
 535 }
 536
 537 // CanonicalMIMEHeaderKey returns the canonical format of the
 538 // MIME header key s.  The canonicalization converts the first
 539 // letter and any letter following a hyphen to upper case;
 540 // the rest are converted to lowercase.  For example, the
 541 // canonical key for "accept-encoding" is "Accept-Encoding".
 542 // MIME header keys are assumed to be ASCII only.
 543 func CanonicalMIMEHeaderKey(s string) string {
 544         // Quick check for canonical encoding.
 545         upper := true
 546         for i := 0; i < len(s); i++ {
 547                 c := s[i]
 548                 if upper && 'a' <= c && c <= 'z' {
 549                         return canonicalMIMEHeaderKey([]byte(s))
 550                 }
 551                 if !upper && 'A' <= c && c <= 'Z' {
 552                         return canonicalMIMEHeaderKey([]byte(s))
 553                 }
 554                 upper = c == '-'
 555         }
 556         return s
 557 }
 558
 559 const toLower = 'a' - 'A'
 560
 561 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
 562 // allowed to mutate the provided byte slice before returning the
 563 // string.
 564 func canonicalMIMEHeaderKey(a []byte) string {
 565         // Look for it in commonHeaders , so that we can avoid an
 566         // allocation by sharing the strings among all users
 567         // of textproto. If we don't find it, a has been canonicalized
 568         // so just return string(a).
 569         upper := true
 570         lo := 0
 571         hi := len(commonHeaders)
 572         for i := 0; i < len(a); i++ {
 573                 // Canonicalize: first letter upper case
 574                 // and upper case after each dash.
 575                 // (Host, User-Agent, If-Modified-Since).
 576                 // MIME headers are ASCII only, so no Unicode issues.
 577                 if a[i] == ' ' {
 578                         a[i] = '-'
 579                         upper = true
 580                         continue
 581                 }
 582                 c := a[i]
 583                 if upper && 'a' <= c && c <= 'z' {
 584                         c -= toLower
 585                 } else if !upper && 'A' <= c && c <= 'Z' {
 586                         c += toLower
 587                 }
 588                 a[i] = c
 589                 upper = c == '-' // for next time
 590
 591                 if lo < hi {
 592                         for lo < hi && (len(commonHeaders[lo]) <= i || commonHeaders[lo][i] < c) {
 593                                 lo++
 594                         }
 595                         for hi > lo && commonHeaders[hi-1][i] > c {
 596                                 hi--
 597                         }
 598                 }
 599         }
 600         if lo < hi && len(commonHeaders[lo]) == len(a) {
 601                 return commonHeaders[lo]
 602         }
 603         return string(a)
 604 }
 605
 606 var commonHeaders = []string{
 607         "Accept",
 608         "Accept-Charset",
 609         "Accept-Encoding",
 610         "Accept-Language",
 611         "Accept-Ranges",
 612         "Cache-Control",
 613         "Cc",
 614         "Connection",
 615         "Content-Id",
 616         "Content-Language",
 617         "Content-Length",
 618         "Content-Transfer-Encoding",
 619         "Content-Type",
 620         "Cookie",
 621         "Date",
 622         "Dkim-Signature",
 623         "Etag",
 624         "Expires",
 625         "From",
 626         "Host",
 627         "If-Modified-Since",
 628         "If-None-Match",
 629         "In-Reply-To",
 630         "Last-Modified",
 631         "Location",
 632         "Message-Id",
 633         "Mime-Version",
 634         "Pragma",
 635         "Received",
 636         "Return-Path",
 637         "Server",
 638         "Set-Cookie",
 639         "Subject",
 640         "To",
 641         "User-Agent",
 642         "Via",
 643         "X-Forwarded-For",
 644         "X-Imforwards",
 645         "X-Powered-By",
 646 }