1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // HTTP Request reading and parsing.
7 // The http package implements parsing of HTTP requests, replies,
8 // and URLs and provides an extensible HTTP server and a basic
27 maxLineLength
= 4096 // assumed <= bufio.defaultBufSize
30 chunkSize
= 4 << 10 // 4 KB chunks
33 // HTTP request parsing errors.
34 type ProtocolError
struct {
39 ErrLineTooLong
= &ProtocolError
{"header line too long"}
40 ErrHeaderTooLong
= &ProtocolError
{"header too long"}
41 ErrShortBody
= &ProtocolError
{"entity body too short"}
42 ErrNotSupported
= &ProtocolError
{"feature not supported"}
43 ErrUnexpectedTrailer
= &ProtocolError
{"trailer header without chunked transfer encoding"}
44 ErrMissingContentLength
= &ProtocolError
{"missing ContentLength in HEAD response"}
45 ErrNotMultipart
= &ProtocolError
{"request Content-Type isn't multipart/form-data"}
46 ErrMissingBoundary
= &ProtocolError
{"no multipart boundary param Content-Type"}
49 type badStringError
struct {
54 func (e
*badStringError
) String() string { return fmt
.Sprintf("%s %q", e
.what
, e
.str
) }
56 var reqExcludeHeader
= map[string]bool{
60 "Content-Length": true,
61 "Transfer-Encoding": true,
65 // A Request represents a parsed HTTP request header.
67 Method
string // GET, POST, PUT, etc.
68 RawURL
string // The raw URL given in the request.
69 URL
*URL
// Parsed URL.
70 Proto
string // "HTTP/1.0"
74 // A header maps request lines to their values.
77 // accept-encoding: gzip, deflate
78 // Accept-Language: en-us
79 // Connection: keep-alive
83 // Header = map[string]string{
84 // "Accept-Encoding": "gzip, deflate",
85 // "Accept-Language": "en-us",
86 // "Connection": "keep-alive",
89 // HTTP defines that header names are case-insensitive.
90 // The request parser implements this by canonicalizing the
91 // name, making the first character and any characters
92 // following a hyphen uppercase and the rest lowercase.
93 Header
map[string]string
98 // ContentLength records the length of the associated content.
99 // The value -1 indicates that the length is unknown.
100 // Values >= 0 indicate that the given number of bytes may be read from Body.
103 // TransferEncoding lists the transfer encodings from outermost to innermost.
104 // An empty list denotes the "identity" encoding.
105 TransferEncoding
[]string
107 // Whether to close the connection after replying to this request.
110 // The host on which the URL is sought.
111 // Per RFC 2616, this is either the value of the Host: header
112 // or the host name given in the URL itself.
115 // The referring URL, if sent in the request.
117 // Referer is misspelled as in the request itself,
118 // a mistake from the earliest days of HTTP.
119 // This value can also be fetched from the Header map
120 // as Header["Referer"]; the benefit of making it
121 // available as a structure field is that the compiler
122 // can diagnose programs that use the alternate
123 // (correct English) spelling req.Referrer but cannot
124 // diagnose programs that use Header["Referrer"].
127 // The User-Agent: header string, if sent in the request.
130 // The parsed form. Only available after ParseForm is called.
131 Form
map[string][]string
133 // Trailer maps trailer keys to values. Like for Header, if the
134 // response has multiple trailer lines with the same key, they will be
135 // concatenated, delimited by commas.
136 Trailer
map[string]string
139 // ProtoAtLeast returns whether the HTTP protocol used
140 // in the request is at least major.minor.
141 func (r
*Request
) ProtoAtLeast(major
, minor
int) bool {
142 return r
.ProtoMajor
> major ||
143 r
.ProtoMajor
== major
&& r
.ProtoMinor
>= minor
146 // MultipartReader returns a MIME multipart reader if this is a
147 // multipart/form-data POST request, else returns nil and an error.
148 func (r
*Request
) MultipartReader() (multipart
.Reader
, os
.Error
) {
149 v
, ok
:= r
.Header
["Content-Type"]
151 return nil, ErrNotMultipart
153 d
, params
:= mime
.ParseMediaType(v
)
154 if d
!= "multipart/form-data" {
155 return nil, ErrNotMultipart
157 boundary
, ok
:= params
["boundary"]
159 return nil, ErrMissingBoundary
161 return multipart
.NewReader(r
.Body
, boundary
), nil
164 // Return value if nonempty, def otherwise.
165 func valueOrDefault(value
, def
string) string {
172 const defaultUserAgent
= "Go http package"
174 // Write writes an HTTP/1.1 request -- header and body -- in wire format.
175 // This method consults the following fields of req:
177 // RawURL, if non-empty, or else URL
178 // Method (defaults to "GET")
179 // UserAgent (defaults to defaultUserAgent)
184 // If Body is present, Write forces "Transfer-Encoding: chunked" as a header
185 // and then closes Body when finished sending it.
186 func (req
*Request
) Write(w io
.Writer
) os
.Error
{
194 uri
= valueOrDefault(urlEscape(req
.URL
.Path
, encodePath
), "/")
195 if req
.URL
.RawQuery
!= "" {
196 uri
+= "?" + req
.URL
.RawQuery
200 fmt
.Fprintf(w
, "%s %s HTTP/1.1\r\n", valueOrDefault(req
.Method
, "GET"), uri
)
203 fmt
.Fprintf(w
, "Host: %s\r\n", host
)
204 fmt
.Fprintf(w
, "User-Agent: %s\r\n", valueOrDefault(req
.UserAgent
, defaultUserAgent
))
205 if req
.Referer
!= "" {
206 fmt
.Fprintf(w
, "Referer: %s\r\n", req
.Referer
)
209 // Process Body,ContentLength,Close,Trailer
210 tw
, err
:= newTransferWriter(req
)
214 err
= tw
.WriteHeader(w
)
219 // TODO: split long values? (If so, should share code with Conn.Write)
220 // TODO: if Header includes values for Host, User-Agent, or Referer, this
221 // may conflict with the User-Agent or Referer headers we add manually.
222 // One solution would be to remove the Host, UserAgent, and Referer fields
223 // from Request, and introduce Request methods along the lines of
224 // Response.{GetHeader,AddHeader} and string constants for "Host",
225 // "User-Agent" and "Referer".
226 err
= writeSortedKeyValue(w
, req
.Header
, reqExcludeHeader
)
231 io
.WriteString(w
, "\r\n")
233 // Write body and trailer
234 err
= tw
.WriteBody(w
)
242 // Read a line of bytes (up to \n) from b.
243 // Give up if the line exceeds maxLineLength.
244 // The returned bytes are a pointer into storage in
245 // the bufio, so they are only valid until the next bufio read.
246 func readLineBytes(b
*bufio
.Reader
) (p
[]byte, err os
.Error
) {
247 if p
, err
= b
.ReadSlice('\n'); err
!= nil {
248 // We always know when EOF is coming.
249 // If the caller asked for a line, there should be a line.
251 err
= io
.ErrUnexpectedEOF
252 } else if err
== bufio
.ErrBufferFull
{
257 if len(p
) >= maxLineLength
{
258 return nil, ErrLineTooLong
261 // Chop off trailing white space.
263 for i
= len(p
); i
> 0; i
-- {
264 if c
:= p
[i
-1]; c
!= ' ' && c
!= '\r' && c
!= '\t' && c
!= '\n' {
271 // readLineBytes, but convert the bytes into a string.
272 func readLine(b
*bufio
.Reader
) (s
string, err os
.Error
) {
273 p
, e
:= readLineBytes(b
)
277 return string(p
), nil
280 var colon
= []byte{':'}
282 // Read a key/value pair from b.
283 // A key/value has the form Key: Value\r\n
284 // and the Value can continue on multiple lines if each continuation line
285 // starts with a space.
286 func readKeyValue(b
*bufio
.Reader
) (key
, value
string, err os
.Error
) {
287 line
, e
:= readLineBytes(b
)
295 // Scan first line for colon.
296 i
:= bytes
.Index(line
, colon
)
301 key
= string(line
[0:i
])
302 if strings
.Contains(key
, " ") {
303 // Key field has space - no good.
307 // Skip initial space before value.
308 for i
++; i
< len(line
); i
++ {
313 value
= string(line
[i
:])
315 // Look for extension lines, which must begin with space.
325 // Eat leading space.
327 if c
, e
= b
.ReadByte(); e
!= nil {
329 e
= io
.ErrUnexpectedEOF
336 // Read the rest of the line and add to value.
337 if line
, e
= readLineBytes(b
); e
!= nil {
340 value
+= " " + string(line
)
342 if len(value
) >= maxValueLength
{
343 return "", "", &badStringError
{"value too long for key", key
}
346 return key
, value
, nil
349 return "", "", &badStringError
{"malformed header line", string(line
)}
352 // Convert decimal at s[i:len(s)] to integer,
353 // returning value, string position where the digits stopped,
354 // and whether there was a valid number (digits, not too big).
355 func atoi(s
string, i
int) (n
, i1
int, ok
bool) {
357 if i
>= len(s
) || s
[i
] < '0' || s
[i
] > '9' {
361 for ; i
< len(s
) && '0' <= s
[i
] && s
[i
] <= '9'; i
++ {
362 n
= n
*10 + int(s
[i
]-'0')
370 // Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
371 func parseHTTPVersion(vers
string) (int, int, bool) {
372 if len(vers
) < 5 || vers
[0:5] != "HTTP/" {
375 major
, i
, ok
:= atoi(vers
, 5)
376 if !ok || i
>= len(vers
) || vers
[i
] != '.' {
380 minor
, i
, ok
= atoi(vers
, i
+1)
381 if !ok || i
!= len(vers
) {
384 return major
, minor
, true
387 // CanonicalHeaderKey returns the canonical format of the
388 // HTTP header key s. The canonicalization converts the first
389 // letter and any letter following a hyphen to upper case;
390 // the rest are converted to lowercase. For example, the
391 // canonical key for "accept-encoding" is "Accept-Encoding".
392 func CanonicalHeaderKey(s
string) string {
393 // canonicalize: first letter upper case
394 // and upper case after each dash.
395 // (Host, User-Agent, If-Modified-Since).
396 // HTTP headers are ASCII only, so no Unicode issues.
399 for i
:= 0; i
< len(s
); i
++ {
401 if upper
&& 'a' <= v
&& v
<= 'z' {
407 if !upper
&& 'A' <= v
&& v
<= 'Z' {
424 type chunkedReader
struct {
426 n
uint64 // unread bytes in chunk
430 func newChunkedReader(r
*bufio
.Reader
) *chunkedReader
{
431 return &chunkedReader
{r
: r
}
434 func (cr
*chunkedReader
) beginChunk() {
437 line
, cr
.err
= readLine(cr
.r
)
441 cr
.n
, cr
.err
= strconv
.Btoui64(line
, 16)
448 line
, cr
.err
= readLine(cr
.r
)
460 func (cr
*chunkedReader
) Read(b
[]uint8) (n
int, err os
.Error
) {
470 if uint64(len(b
)) > cr
.n
{
473 n
, cr
.err
= cr
.r
.Read(b
)
475 if cr
.n
== 0 && cr
.err
== nil {
476 // end of chunk (CRLF)
478 if _
, cr
.err
= io
.ReadFull(cr
.r
, b
); cr
.err
== nil {
479 if b
[0] != '\r' || b
[1] != '\n' {
480 cr
.err
= os
.NewError("malformed chunked encoding")
487 // ReadRequest reads and parses a request from b.
488 func ReadRequest(b
*bufio
.Reader
) (req
*Request
, err os
.Error
) {
491 // First line: GET /index.html HTTP/1.0
493 if s
, err
= readLine(b
); err
!= nil {
498 if f
= strings
.Split(s
, " ", 3); len(f
) < 3 {
499 return nil, &badStringError
{"malformed HTTP request", s
}
501 req
.Method
, req
.RawURL
, req
.Proto
= f
[0], f
[1], f
[2]
503 if req
.ProtoMajor
, req
.ProtoMinor
, ok
= parseHTTPVersion(req
.Proto
); !ok
{
504 return nil, &badStringError
{"malformed HTTP version", req
.Proto
}
507 if req
.URL
, err
= ParseURL(req
.RawURL
); err
!= nil {
511 // Subsequent lines: Key: value.
513 req
.Header
= make(map[string]string)
515 var key
, value
string
516 if key
, value
, err
= readKeyValue(b
); err
!= nil {
522 if nheader
++; nheader
>= maxHeaderLines
{
523 return nil, ErrHeaderTooLong
526 key
= CanonicalHeaderKey(key
)
528 // RFC 2616 says that if you send the same header key
529 // multiple times, it has to be semantically equivalent
530 // to concatenating the values separated by commas.
531 oldvalue
, present
:= req
.Header
[key
]
533 req
.Header
[key
] = oldvalue
+ "," + value
535 req
.Header
[key
] = value
539 // RFC2616: Must treat
540 // GET /index.html HTTP/1.1
541 // Host: www.google.com
543 // GET http://www.google.com/index.html HTTP/1.1
544 // Host: doesntmatter
545 // the same. In the second case, any Host line is ignored.
546 req
.Host
= req
.URL
.Host
548 req
.Host
= req
.Header
["Host"]
550 req
.Header
["Host"] = "", false
552 fixPragmaCacheControl(req
.Header
)
554 // Pull out useful fields as a convenience to clients.
555 req
.Referer
= req
.Header
["Referer"]
556 req
.Header
["Referer"] = "", false
558 req
.UserAgent
= req
.Header
["User-Agent"]
559 req
.Header
["User-Agent"] = "", false
561 // TODO: Parse specific header values:
575 // If-Unmodified-Since
577 // Proxy-Authorization
579 // TE (transfer-codings)
587 err
= readTransfer(req
, b
)
595 // ParseQuery parses the URL-encoded query string and returns
596 // a map listing the values specified for each key.
597 // ParseQuery always returns a non-nil map containing all the
598 // valid query parameters found; err describes the first decoding error
599 // encountered, if any.
600 func ParseQuery(query
string) (m
map[string][]string, err os
.Error
) {
601 m
= make(map[string][]string)
602 err
= parseQuery(m
, query
)
606 func parseQuery(m
map[string][]string, query
string) (err os
.Error
) {
607 for _
, kv
:= range strings
.Split(query
, "&", -1) {
611 kvPair
:= strings
.Split(kv
, "=", 2)
613 var key
, value
string
615 key
, e
= URLUnescape(kvPair
[0])
616 if e
== nil && len(kvPair
) > 1 {
617 value
, e
= URLUnescape(kvPair
[1])
623 vec
:= vector
.StringVector(m
[key
])
630 // ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
632 func (r
*Request
) ParseForm() (err os
.Error
) {
637 r
.Form
= make(map[string][]string)
639 err
= parseQuery(r
.Form
, r
.URL
.RawQuery
)
641 if r
.Method
== "POST" {
643 return os
.ErrorString("missing form body")
645 ct
:= r
.Header
["Content-Type"]
646 switch strings
.Split(ct
, ";", 2)[0] {
647 case "text/plain", "application/x-www-form-urlencoded", "":
648 b
, e
:= ioutil
.ReadAll(r
.Body
)
655 e
= parseQuery(r
.Form
, string(b
))
659 // TODO(dsymonds): Handle multipart/form-data
661 return &badStringError
{"unknown Content-Type", ct
}
667 // FormValue returns the first value for the named component of the query.
668 // FormValue calls ParseForm if necessary.
669 func (r
*Request
) FormValue(key
string) string {
673 if vs
:= r
.Form
[key
]; len(vs
) > 0 {
679 func (r
*Request
) expectsContinue() bool {
680 expectation
, ok
:= r
.Header
["Expect"]
681 return ok
&& strings
.ToLower(expectation
) == "100-continue"
684 func (r
*Request
) wantsHttp10KeepAlive() bool {
685 if r
.ProtoMajor
!= 1 || r
.ProtoMinor
!= 0 {
688 value
, exists
:= r
.Header
["Connection"]
692 return strings
.Contains(strings
.ToLower(value
), "keep-alive")