libgo/go/mime/multipart/multipart.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4 //
   5
   6 /*
   7 Package multipart implements MIME multipart parsing, as defined in RFC
   8 2046.
   9
  10 The implementation is sufficient for HTTP (RFC 2388) and the multipart
  11 bodies generated by popular browsers.
  12 */
  13 package multipart
  14
  15 import (
  16         "bufio"
  17         "bytes"
  18         "fmt"
  19         "io"
  20         "io/ioutil"
  21         "mime"
  22         "net/textproto"
  23 )
  24
  25 var emptyParams = make(map[string]string)
  26
  27 // A Part represents a single part in a multipart body.
  28 type Part struct {
  29         // The headers of the body, if any, with the keys canonicalized
  30         // in the same fashion that the Go http.Request headers are.
  31         // For example, "foo-bar" changes case to "Foo-Bar"
  32         //
  33         // As a special case, if the "Content-Transfer-Encoding" header
  34         // has a value of "quoted-printable", that header is instead
  35         // hidden from this map and the body is transparently decoded
  36         // during Read calls.
  37         Header textproto.MIMEHeader
  38
  39         buffer    *bytes.Buffer
  40         mr        *Reader
  41         bytesRead int
  42
  43         disposition       string
  44         dispositionParams map[string]string
  45
  46         // r is either a reader directly reading from mr, or it's a
  47         // wrapper around such a reader, decoding the
  48         // Content-Transfer-Encoding
  49         r io.Reader
  50 }
  51
  52 // FormName returns the name parameter if p has a Content-Disposition
  53 // of type "form-data".  Otherwise it returns the empty string.
  54 func (p *Part) FormName() string {
  55         // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF
  56         // of Content-Disposition value format.
  57         if p.dispositionParams == nil {
  58                 p.parseContentDisposition()
  59         }
  60         if p.disposition != "form-data" {
  61                 return ""
  62         }
  63         return p.dispositionParams["name"]
  64 }
  65
  66 // FileName returns the filename parameter of the Part's
  67 // Content-Disposition header.
  68 func (p *Part) FileName() string {
  69         if p.dispositionParams == nil {
  70                 p.parseContentDisposition()
  71         }
  72         return p.dispositionParams["filename"]
  73 }
  74
  75 func (p *Part) parseContentDisposition() {
  76         v := p.Header.Get("Content-Disposition")
  77         var err error
  78         p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
  79         if err != nil {
  80                 p.dispositionParams = emptyParams
  81         }
  82 }
  83
  84 // NewReader creates a new multipart Reader reading from r using the
  85 // given MIME boundary.
  86 //
  87 // The boundary is usually obtained from the "boundary" parameter of
  88 // the message's "Content-Type" header. Use mime.ParseMediaType to
  89 // parse such headers.
  90 func NewReader(r io.Reader, boundary string) *Reader {
  91         b := []byte("\r\n--" + boundary + "--")
  92         return &Reader{
  93                 bufReader: bufio.NewReader(r),
  94
  95                 nl:               b[:2],
  96                 nlDashBoundary:   b[:len(b)-2],
  97                 dashBoundaryDash: b[2:],
  98                 dashBoundary:     b[2 : len(b)-2],
  99         }
 100 }
 101
 102 func newPart(mr *Reader) (*Part, error) {
 103         bp := &Part{
 104                 Header: make(map[string][]string),
 105                 mr:     mr,
 106                 buffer: new(bytes.Buffer),
 107         }
 108         if err := bp.populateHeaders(); err != nil {
 109                 return nil, err
 110         }
 111         bp.r = partReader{bp}
 112         const cte = "Content-Transfer-Encoding"
 113         if bp.Header.Get(cte) == "quoted-printable" {
 114                 bp.Header.Del(cte)
 115                 bp.r = newQuotedPrintableReader(bp.r)
 116         }
 117         return bp, nil
 118 }
 119
 120 func (bp *Part) populateHeaders() error {
 121         r := textproto.NewReader(bp.mr.bufReader)
 122         header, err := r.ReadMIMEHeader()
 123         if err == nil {
 124                 bp.Header = header
 125         }
 126         return err
 127 }
 128
 129 // Read reads the body of a part, after its headers and before the
 130 // next part (if any) begins.
 131 func (p *Part) Read(d []byte) (n int, err error) {
 132         return p.r.Read(d)
 133 }
 134
 135 // partReader implements io.Reader by reading raw bytes directly from the
 136 // wrapped *Part, without doing any Transfer-Encoding decoding.
 137 type partReader struct {
 138         p *Part
 139 }
 140
 141 func (pr partReader) Read(d []byte) (n int, err error) {
 142         p := pr.p
 143         defer func() {
 144                 p.bytesRead += n
 145         }()
 146         if p.buffer.Len() >= len(d) {
 147                 // Internal buffer of unconsumed data is large enough for
 148                 // the read request.  No need to parse more at the moment.
 149                 return p.buffer.Read(d)
 150         }
 151         peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
 152
 153         // Look for an immediate empty part without a leading \r\n
 154         // before the boundary separator.  Some MIME code makes empty
 155         // parts like this. Most browsers, however, write the \r\n
 156         // before the subsequent boundary even for empty parts and
 157         // won't hit this path.
 158         if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
 159                 return 0, io.EOF
 160         }
 161         unexpectedEOF := err == io.EOF
 162         if err != nil && !unexpectedEOF {
 163                 return 0, fmt.Errorf("multipart: Part Read: %v", err)
 164         }
 165         if peek == nil {
 166                 panic("nil peek buf")
 167         }
 168
 169         // Search the peek buffer for "\r\n--boundary". If found,
 170         // consume everything up to the boundary. If not, consume only
 171         // as much of the peek buffer as cannot hold the boundary
 172         // string.
 173         nCopy := 0
 174         foundBoundary := false
 175         if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 {
 176                 nCopy = idx
 177                 foundBoundary = true
 178         } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
 179                 nCopy = safeCount
 180         } else if unexpectedEOF {
 181                 // If we've run out of peek buffer and the boundary
 182                 // wasn't found (and can't possibly fit), we must have
 183                 // hit the end of the file unexpectedly.
 184                 return 0, io.ErrUnexpectedEOF
 185         }
 186         if nCopy > 0 {
 187                 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil {
 188                         return 0, err
 189                 }
 190         }
 191         n, err = p.buffer.Read(d)
 192         if err == io.EOF && !foundBoundary {
 193                 // If the boundary hasn't been reached there's more to
 194                 // read, so don't pass through an EOF from the buffer
 195                 err = nil
 196         }
 197         return
 198 }
 199
 200 func (p *Part) Close() error {
 201         io.Copy(ioutil.Discard, p)
 202         return nil
 203 }
 204
 205 // Reader is an iterator over parts in a MIME multipart body.
 206 // Reader's underlying parser consumes its input as needed.  Seeking
 207 // isn't supported.
 208 type Reader struct {
 209         bufReader *bufio.Reader
 210
 211         currentPart *Part
 212         partsRead   int
 213
 214         nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
 215         nlDashBoundary   []byte // nl + "--boundary"
 216         dashBoundaryDash []byte // "--boundary--"
 217         dashBoundary     []byte // "--boundary"
 218 }
 219
 220 // NextPart returns the next part in the multipart or an error.
 221 // When there are no more parts, the error io.EOF is returned.
 222 func (r *Reader) NextPart() (*Part, error) {
 223         if r.currentPart != nil {
 224                 r.currentPart.Close()
 225         }
 226
 227         expectNewPart := false
 228         for {
 229                 line, err := r.bufReader.ReadSlice('\n')
 230                 if err == io.EOF && r.isFinalBoundary(line) {
 231                         // If the buffer ends in "--boundary--" without the
 232                         // trailing "\r\n", ReadSlice will return an error
 233                         // (since it's missing the '\n'), but this is a valid
 234                         // multipart EOF so we need to return io.EOF instead of
 235                         // a fmt-wrapped one.
 236                         return nil, io.EOF
 237                 }
 238                 if err != nil {
 239                         return nil, fmt.Errorf("multipart: NextPart: %v", err)
 240                 }
 241
 242                 if r.isBoundaryDelimiterLine(line) {
 243                         r.partsRead++
 244                         bp, err := newPart(r)
 245                         if err != nil {
 246                                 return nil, err
 247                         }
 248                         r.currentPart = bp
 249                         return bp, nil
 250                 }
 251
 252                 if r.isFinalBoundary(line) {
 253                         // Expected EOF
 254                         return nil, io.EOF
 255                 }
 256
 257                 if expectNewPart {
 258                         return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
 259                 }
 260
 261                 if r.partsRead == 0 {
 262                         // skip line
 263                         continue
 264                 }
 265
 266                 // Consume the "\n" or "\r\n" separator between the
 267                 // body of the previous part and the boundary line we
 268                 // now expect will follow. (either a new part or the
 269                 // end boundary)
 270                 if bytes.Equal(line, r.nl) {
 271                         expectNewPart = true
 272                         continue
 273                 }
 274
 275                 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
 276         }
 277 }
 278
 279 // isFinalBoundary reports whether line is the final boundary line
 280 // indicating that all parts are over.
 281 // It matches `^--boundary--[ \t]*(\r\n)?$`
 282 func (mr *Reader) isFinalBoundary(line []byte) bool {
 283         if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
 284                 return false
 285         }
 286         rest := line[len(mr.dashBoundaryDash):]
 287         rest = skipLWSPChar(rest)
 288         return len(rest) == 0 || bytes.Equal(rest, mr.nl)
 289 }
 290
 291 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
 292         // http://tools.ietf.org/html/rfc2046#section-5.1
 293         //   The boundary delimiter line is then defined as a line
 294         //   consisting entirely of two hyphen characters ("-",
 295         //   decimal value 45) followed by the boundary parameter
 296         //   value from the Content-Type header field, optional linear
 297         //   whitespace, and a terminating CRLF.
 298         if !bytes.HasPrefix(line, mr.dashBoundary) {
 299                 return false
 300         }
 301         rest := line[len(mr.dashBoundary):]
 302         rest = skipLWSPChar(rest)
 303
 304         // On the first part, see our lines are ending in \n instead of \r\n
 305         // and switch into that mode if so.  This is a violation of the spec,
 306         // but occurs in practice.
 307         if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
 308                 mr.nl = mr.nl[1:]
 309                 mr.nlDashBoundary = mr.nlDashBoundary[1:]
 310         }
 311         return bytes.Equal(rest, mr.nl)
 312 }
 313
 314 // peekBufferIsEmptyPart reports whether the provided peek-ahead
 315 // buffer represents an empty part. It is called only if we've not
 316 // already read any bytes in this part and checks for the case of MIME
 317 // software not writing the \r\n on empty parts. Some does, some
 318 // doesn't.
 319 //
 320 // This checks that what follows the "--boundary" is actually the end
 321 // ("--boundary--" with optional whitespace) or optional whitespace
 322 // and then a newline, so we don't catch "--boundaryFAKE", in which
 323 // case the whole line is part of the data.
 324 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
 325         // End of parts case.
 326         // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
 327         if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
 328                 rest := peek[len(mr.dashBoundaryDash):]
 329                 rest = skipLWSPChar(rest)
 330                 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
 331         }
 332         if !bytes.HasPrefix(peek, mr.dashBoundary) {
 333                 return false
 334         }
 335         // Test whether rest matches `^[ \t]*\r\n`)
 336         rest := peek[len(mr.dashBoundary):]
 337         rest = skipLWSPChar(rest)
 338         return bytes.HasPrefix(rest, mr.nl)
 339 }
 340
 341 // skipLWSPChar returns b with leading spaces and tabs removed.
 342 // RFC 822 defines:
 343 //    LWSP-char = SPACE / HTAB
 344 func skipLWSPChar(b []byte) []byte {
 345         for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
 346                 b = b[1:]
 347         }
 348         return b
 349 }