Merge from mainline (167278:168000).
[official-gcc/graphite-test-results.git] / libgo / go / http / request.go
blobb88689988d846e708ff6a47041c468513c1b7e47
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // HTTP Request reading and parsing.
7 // The http package implements parsing of HTTP requests, replies,
8 // and URLs and provides an extensible HTTP server and a basic
9 // HTTP client.
10 package http
12 import (
13 "bufio"
14 "bytes"
15 "container/vector"
16 "fmt"
17 "io"
18 "io/ioutil"
19 "mime"
20 "mime/multipart"
21 "os"
22 "strconv"
23 "strings"
26 const (
27 maxLineLength = 4096 // assumed <= bufio.defaultBufSize
28 maxValueLength = 4096
29 maxHeaderLines = 1024
30 chunkSize = 4 << 10 // 4 KB chunks
33 // HTTP request parsing errors.
34 type ProtocolError struct {
35 os.ErrorString
38 var (
39 ErrLineTooLong = &ProtocolError{"header line too long"}
40 ErrHeaderTooLong = &ProtocolError{"header too long"}
41 ErrShortBody = &ProtocolError{"entity body too short"}
42 ErrNotSupported = &ProtocolError{"feature not supported"}
43 ErrUnexpectedTrailer = &ProtocolError{"trailer header without chunked transfer encoding"}
44 ErrMissingContentLength = &ProtocolError{"missing ContentLength in HEAD response"}
45 ErrNotMultipart = &ProtocolError{"request Content-Type isn't multipart/form-data"}
46 ErrMissingBoundary = &ProtocolError{"no multipart boundary param Content-Type"}
49 type badStringError struct {
50 what string
51 str string
54 func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) }
56 var reqExcludeHeader = map[string]bool{
57 "Host": true,
58 "User-Agent": true,
59 "Referer": true,
60 "Content-Length": true,
61 "Transfer-Encoding": true,
62 "Trailer": true,
65 // A Request represents a parsed HTTP request header.
66 type Request struct {
67 Method string // GET, POST, PUT, etc.
68 RawURL string // The raw URL given in the request.
69 URL *URL // Parsed URL.
70 Proto string // "HTTP/1.0"
71 ProtoMajor int // 1
72 ProtoMinor int // 0
74 // A header maps request lines to their values.
75 // If the header says
77 // accept-encoding: gzip, deflate
78 // Accept-Language: en-us
79 // Connection: keep-alive
81 // then
83 // Header = map[string]string{
84 // "Accept-Encoding": "gzip, deflate",
85 // "Accept-Language": "en-us",
86 // "Connection": "keep-alive",
87 // }
89 // HTTP defines that header names are case-insensitive.
90 // The request parser implements this by canonicalizing the
91 // name, making the first character and any characters
92 // following a hyphen uppercase and the rest lowercase.
93 Header map[string]string
95 // The message body.
96 Body io.ReadCloser
98 // ContentLength records the length of the associated content.
99 // The value -1 indicates that the length is unknown.
100 // Values >= 0 indicate that the given number of bytes may be read from Body.
101 ContentLength int64
103 // TransferEncoding lists the transfer encodings from outermost to innermost.
104 // An empty list denotes the "identity" encoding.
105 TransferEncoding []string
107 // Whether to close the connection after replying to this request.
108 Close bool
110 // The host on which the URL is sought.
111 // Per RFC 2616, this is either the value of the Host: header
112 // or the host name given in the URL itself.
113 Host string
115 // The referring URL, if sent in the request.
117 // Referer is misspelled as in the request itself,
118 // a mistake from the earliest days of HTTP.
119 // This value can also be fetched from the Header map
120 // as Header["Referer"]; the benefit of making it
121 // available as a structure field is that the compiler
122 // can diagnose programs that use the alternate
123 // (correct English) spelling req.Referrer but cannot
124 // diagnose programs that use Header["Referrer"].
125 Referer string
127 // The User-Agent: header string, if sent in the request.
128 UserAgent string
130 // The parsed form. Only available after ParseForm is called.
131 Form map[string][]string
133 // Trailer maps trailer keys to values. Like for Header, if the
134 // response has multiple trailer lines with the same key, they will be
135 // concatenated, delimited by commas.
136 Trailer map[string]string
139 // ProtoAtLeast returns whether the HTTP protocol used
140 // in the request is at least major.minor.
141 func (r *Request) ProtoAtLeast(major, minor int) bool {
142 return r.ProtoMajor > major ||
143 r.ProtoMajor == major && r.ProtoMinor >= minor
146 // MultipartReader returns a MIME multipart reader if this is a
147 // multipart/form-data POST request, else returns nil and an error.
148 func (r *Request) MultipartReader() (multipart.Reader, os.Error) {
149 v, ok := r.Header["Content-Type"]
150 if !ok {
151 return nil, ErrNotMultipart
153 d, params := mime.ParseMediaType(v)
154 if d != "multipart/form-data" {
155 return nil, ErrNotMultipart
157 boundary, ok := params["boundary"]
158 if !ok {
159 return nil, ErrMissingBoundary
161 return multipart.NewReader(r.Body, boundary), nil
164 // Return value if nonempty, def otherwise.
165 func valueOrDefault(value, def string) string {
166 if value != "" {
167 return value
169 return def
172 const defaultUserAgent = "Go http package"
174 // Write writes an HTTP/1.1 request -- header and body -- in wire format.
175 // This method consults the following fields of req:
176 // Host
177 // RawURL, if non-empty, or else URL
178 // Method (defaults to "GET")
179 // UserAgent (defaults to defaultUserAgent)
180 // Referer
181 // Header
182 // Body
184 // If Body is present, Write forces "Transfer-Encoding: chunked" as a header
185 // and then closes Body when finished sending it.
186 func (req *Request) Write(w io.Writer) os.Error {
187 host := req.Host
188 if host == "" {
189 host = req.URL.Host
192 uri := req.RawURL
193 if uri == "" {
194 uri = valueOrDefault(urlEscape(req.URL.Path, encodePath), "/")
195 if req.URL.RawQuery != "" {
196 uri += "?" + req.URL.RawQuery
200 fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri)
202 // Header lines
203 fmt.Fprintf(w, "Host: %s\r\n", host)
204 fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent))
205 if req.Referer != "" {
206 fmt.Fprintf(w, "Referer: %s\r\n", req.Referer)
209 // Process Body,ContentLength,Close,Trailer
210 tw, err := newTransferWriter(req)
211 if err != nil {
212 return err
214 err = tw.WriteHeader(w)
215 if err != nil {
216 return err
219 // TODO: split long values? (If so, should share code with Conn.Write)
220 // TODO: if Header includes values for Host, User-Agent, or Referer, this
221 // may conflict with the User-Agent or Referer headers we add manually.
222 // One solution would be to remove the Host, UserAgent, and Referer fields
223 // from Request, and introduce Request methods along the lines of
224 // Response.{GetHeader,AddHeader} and string constants for "Host",
225 // "User-Agent" and "Referer".
226 err = writeSortedKeyValue(w, req.Header, reqExcludeHeader)
227 if err != nil {
228 return err
231 io.WriteString(w, "\r\n")
233 // Write body and trailer
234 err = tw.WriteBody(w)
235 if err != nil {
236 return err
239 return nil
242 // Read a line of bytes (up to \n) from b.
243 // Give up if the line exceeds maxLineLength.
244 // The returned bytes are a pointer into storage in
245 // the bufio, so they are only valid until the next bufio read.
246 func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
247 if p, err = b.ReadSlice('\n'); err != nil {
248 // We always know when EOF is coming.
249 // If the caller asked for a line, there should be a line.
250 if err == os.EOF {
251 err = io.ErrUnexpectedEOF
252 } else if err == bufio.ErrBufferFull {
253 err = ErrLineTooLong
255 return nil, err
257 if len(p) >= maxLineLength {
258 return nil, ErrLineTooLong
261 // Chop off trailing white space.
262 var i int
263 for i = len(p); i > 0; i-- {
264 if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
265 break
268 return p[0:i], nil
271 // readLineBytes, but convert the bytes into a string.
272 func readLine(b *bufio.Reader) (s string, err os.Error) {
273 p, e := readLineBytes(b)
274 if e != nil {
275 return "", e
277 return string(p), nil
280 var colon = []byte{':'}
282 // Read a key/value pair from b.
283 // A key/value has the form Key: Value\r\n
284 // and the Value can continue on multiple lines if each continuation line
285 // starts with a space.
286 func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
287 line, e := readLineBytes(b)
288 if e != nil {
289 return "", "", e
291 if len(line) == 0 {
292 return "", "", nil
295 // Scan first line for colon.
296 i := bytes.Index(line, colon)
297 if i < 0 {
298 goto Malformed
301 key = string(line[0:i])
302 if strings.Contains(key, " ") {
303 // Key field has space - no good.
304 goto Malformed
307 // Skip initial space before value.
308 for i++; i < len(line); i++ {
309 if line[i] != ' ' {
310 break
313 value = string(line[i:])
315 // Look for extension lines, which must begin with space.
316 for {
317 c, e := b.ReadByte()
318 if c != ' ' {
319 if e != os.EOF {
320 b.UnreadByte()
322 break
325 // Eat leading space.
326 for c == ' ' {
327 if c, e = b.ReadByte(); e != nil {
328 if e == os.EOF {
329 e = io.ErrUnexpectedEOF
331 return "", "", e
334 b.UnreadByte()
336 // Read the rest of the line and add to value.
337 if line, e = readLineBytes(b); e != nil {
338 return "", "", e
340 value += " " + string(line)
342 if len(value) >= maxValueLength {
343 return "", "", &badStringError{"value too long for key", key}
346 return key, value, nil
348 Malformed:
349 return "", "", &badStringError{"malformed header line", string(line)}
352 // Convert decimal at s[i:len(s)] to integer,
353 // returning value, string position where the digits stopped,
354 // and whether there was a valid number (digits, not too big).
355 func atoi(s string, i int) (n, i1 int, ok bool) {
356 const Big = 1000000
357 if i >= len(s) || s[i] < '0' || s[i] > '9' {
358 return 0, 0, false
360 n = 0
361 for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
362 n = n*10 + int(s[i]-'0')
363 if n > Big {
364 return 0, 0, false
367 return n, i, true
370 // Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
371 func parseHTTPVersion(vers string) (int, int, bool) {
372 if len(vers) < 5 || vers[0:5] != "HTTP/" {
373 return 0, 0, false
375 major, i, ok := atoi(vers, 5)
376 if !ok || i >= len(vers) || vers[i] != '.' {
377 return 0, 0, false
379 var minor int
380 minor, i, ok = atoi(vers, i+1)
381 if !ok || i != len(vers) {
382 return 0, 0, false
384 return major, minor, true
387 // CanonicalHeaderKey returns the canonical format of the
388 // HTTP header key s. The canonicalization converts the first
389 // letter and any letter following a hyphen to upper case;
390 // the rest are converted to lowercase. For example, the
391 // canonical key for "accept-encoding" is "Accept-Encoding".
392 func CanonicalHeaderKey(s string) string {
393 // canonicalize: first letter upper case
394 // and upper case after each dash.
395 // (Host, User-Agent, If-Modified-Since).
396 // HTTP headers are ASCII only, so no Unicode issues.
397 var a []byte
398 upper := true
399 for i := 0; i < len(s); i++ {
400 v := s[i]
401 if upper && 'a' <= v && v <= 'z' {
402 if a == nil {
403 a = []byte(s)
405 a[i] = v + 'A' - 'a'
407 if !upper && 'A' <= v && v <= 'Z' {
408 if a == nil {
409 a = []byte(s)
411 a[i] = v + 'a' - 'A'
413 upper = false
414 if v == '-' {
415 upper = true
418 if a != nil {
419 return string(a)
421 return s
424 type chunkedReader struct {
425 r *bufio.Reader
426 n uint64 // unread bytes in chunk
427 err os.Error
430 func newChunkedReader(r *bufio.Reader) *chunkedReader {
431 return &chunkedReader{r: r}
434 func (cr *chunkedReader) beginChunk() {
435 // chunk-size CRLF
436 var line string
437 line, cr.err = readLine(cr.r)
438 if cr.err != nil {
439 return
441 cr.n, cr.err = strconv.Btoui64(line, 16)
442 if cr.err != nil {
443 return
445 if cr.n == 0 {
446 // trailer CRLF
447 for {
448 line, cr.err = readLine(cr.r)
449 if cr.err != nil {
450 return
452 if line == "" {
453 break
456 cr.err = os.EOF
460 func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
461 if cr.err != nil {
462 return 0, cr.err
464 if cr.n == 0 {
465 cr.beginChunk()
466 if cr.err != nil {
467 return 0, cr.err
470 if uint64(len(b)) > cr.n {
471 b = b[0:cr.n]
473 n, cr.err = cr.r.Read(b)
474 cr.n -= uint64(n)
475 if cr.n == 0 && cr.err == nil {
476 // end of chunk (CRLF)
477 b := make([]byte, 2)
478 if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
479 if b[0] != '\r' || b[1] != '\n' {
480 cr.err = os.NewError("malformed chunked encoding")
484 return n, cr.err
487 // ReadRequest reads and parses a request from b.
488 func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
489 req = new(Request)
491 // First line: GET /index.html HTTP/1.0
492 var s string
493 if s, err = readLine(b); err != nil {
494 return nil, err
497 var f []string
498 if f = strings.Split(s, " ", 3); len(f) < 3 {
499 return nil, &badStringError{"malformed HTTP request", s}
501 req.Method, req.RawURL, req.Proto = f[0], f[1], f[2]
502 var ok bool
503 if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
504 return nil, &badStringError{"malformed HTTP version", req.Proto}
507 if req.URL, err = ParseURL(req.RawURL); err != nil {
508 return nil, err
511 // Subsequent lines: Key: value.
512 nheader := 0
513 req.Header = make(map[string]string)
514 for {
515 var key, value string
516 if key, value, err = readKeyValue(b); err != nil {
517 return nil, err
519 if key == "" {
520 break
522 if nheader++; nheader >= maxHeaderLines {
523 return nil, ErrHeaderTooLong
526 key = CanonicalHeaderKey(key)
528 // RFC 2616 says that if you send the same header key
529 // multiple times, it has to be semantically equivalent
530 // to concatenating the values separated by commas.
531 oldvalue, present := req.Header[key]
532 if present {
533 req.Header[key] = oldvalue + "," + value
534 } else {
535 req.Header[key] = value
539 // RFC2616: Must treat
540 // GET /index.html HTTP/1.1
541 // Host: www.google.com
542 // and
543 // GET http://www.google.com/index.html HTTP/1.1
544 // Host: doesntmatter
545 // the same. In the second case, any Host line is ignored.
546 req.Host = req.URL.Host
547 if req.Host == "" {
548 req.Host = req.Header["Host"]
550 req.Header["Host"] = "", false
552 fixPragmaCacheControl(req.Header)
554 // Pull out useful fields as a convenience to clients.
555 req.Referer = req.Header["Referer"]
556 req.Header["Referer"] = "", false
558 req.UserAgent = req.Header["User-Agent"]
559 req.Header["User-Agent"] = "", false
561 // TODO: Parse specific header values:
562 // Accept
563 // Accept-Encoding
564 // Accept-Language
565 // Authorization
566 // Cache-Control
567 // Connection
568 // Date
569 // Expect
570 // From
571 // If-Match
572 // If-Modified-Since
573 // If-None-Match
574 // If-Range
575 // If-Unmodified-Since
576 // Max-Forwards
577 // Proxy-Authorization
578 // Referer [sic]
579 // TE (transfer-codings)
580 // Trailer
581 // Transfer-Encoding
582 // Upgrade
583 // User-Agent
584 // Via
585 // Warning
587 err = readTransfer(req, b)
588 if err != nil {
589 return nil, err
592 return req, nil
595 // ParseQuery parses the URL-encoded query string and returns
596 // a map listing the values specified for each key.
597 // ParseQuery always returns a non-nil map containing all the
598 // valid query parameters found; err describes the first decoding error
599 // encountered, if any.
600 func ParseQuery(query string) (m map[string][]string, err os.Error) {
601 m = make(map[string][]string)
602 err = parseQuery(m, query)
603 return
606 func parseQuery(m map[string][]string, query string) (err os.Error) {
607 for _, kv := range strings.Split(query, "&", -1) {
608 if len(kv) == 0 {
609 continue
611 kvPair := strings.Split(kv, "=", 2)
613 var key, value string
614 var e os.Error
615 key, e = URLUnescape(kvPair[0])
616 if e == nil && len(kvPair) > 1 {
617 value, e = URLUnescape(kvPair[1])
619 if e != nil {
620 err = e
621 continue
623 vec := vector.StringVector(m[key])
624 vec.Push(value)
625 m[key] = vec
627 return err
630 // ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
631 // It is idempotent.
632 func (r *Request) ParseForm() (err os.Error) {
633 if r.Form != nil {
634 return
637 r.Form = make(map[string][]string)
638 if r.URL != nil {
639 err = parseQuery(r.Form, r.URL.RawQuery)
641 if r.Method == "POST" {
642 if r.Body == nil {
643 return os.ErrorString("missing form body")
645 ct := r.Header["Content-Type"]
646 switch strings.Split(ct, ";", 2)[0] {
647 case "text/plain", "application/x-www-form-urlencoded", "":
648 b, e := ioutil.ReadAll(r.Body)
649 if e != nil {
650 if err == nil {
651 err = e
653 break
655 e = parseQuery(r.Form, string(b))
656 if err == nil {
657 err = e
659 // TODO(dsymonds): Handle multipart/form-data
660 default:
661 return &badStringError{"unknown Content-Type", ct}
664 return err
667 // FormValue returns the first value for the named component of the query.
668 // FormValue calls ParseForm if necessary.
669 func (r *Request) FormValue(key string) string {
670 if r.Form == nil {
671 r.ParseForm()
673 if vs := r.Form[key]; len(vs) > 0 {
674 return vs[0]
676 return ""
679 func (r *Request) expectsContinue() bool {
680 expectation, ok := r.Header["Expect"]
681 return ok && strings.ToLower(expectation) == "100-continue"
684 func (r *Request) wantsHttp10KeepAlive() bool {
685 if r.ProtoMajor != 1 || r.ProtoMinor != 0 {
686 return false
688 value, exists := r.Header["Connection"]
689 if !exists {
690 return false
692 return strings.Contains(strings.ToLower(value), "keep-alive")