1 """HTTP library functions.
3 This module contains functions for building an HTTP application
4 framework: any one, not just one whose name starts with "Ch". ;) If you
5 reference any modules from some popular framework inside *this* module,
6 FuManChu will personally hang you up by your thumbs and submit you
10 from binascii
import b2a_base64
11 from cherrypy
._cpcompat
import BaseHTTPRequestHandler
, HTTPDate
, ntob
, ntou
, reversed, sorted
12 from cherrypy
._cpcompat
import basestring
, iteritems
, unicodestr
, unquote_qs
13 response_codes
= BaseHTTPRequestHandler
.responses
.copy()
15 # From http://www.cherrypy.org/ticket/361
16 response_codes
[500] = ('Internal Server Error',
17 'The server encountered an unexpected condition '
18 'which prevented it from fulfilling the request.')
19 response_codes
[503] = ('Service Unavailable',
20 'The server is currently unable to handle the '
21 'request due to a temporary overloading or '
22 'maintenance of the server.')
30 """Return the given path \*atoms, joined into a single URL.
32 This will correctly join a SCRIPT_NAME and PATH_INFO into the
33 original URL, even if either atom is blank.
35 url
= "/".join([x
for x
in atoms
if x
])
37 url
= url
.replace("//", "/")
38 # Special-case the final url of "", and return "/" instead.
41 def protocol_from_http(protocol_str
):
42 """Return a protocol tuple from the given 'HTTP/x.y' string."""
43 return int(protocol_str
[5]), int(protocol_str
[7])
45 def get_ranges(headervalue
, content_length
):
46 """Return a list of (start, stop) indices from a Range header, or None.
48 Each (start, stop) tuple will be composed of two ints, which are suitable
49 for use in a slicing operation. That is, the header "Range: bytes=3-6",
50 if applied against a Python string, is requesting resource[3:7]. This
51 function will return the list [(3, 7)].
53 If this function returns an empty list, you should return HTTP 416.
60 bytesunit
, byteranges
= headervalue
.split("=", 1)
61 for brange
in byteranges
.split(","):
62 start
, stop
= [x
.strip() for x
in brange
.split("-", 1)]
65 stop
= content_length
- 1
66 start
, stop
= int(start
), int(stop
)
67 if start
>= content_length
:
68 # From rfc 2616 sec 14.16:
69 # "If the server receives a request (other than one
70 # including an If-Range request-header field) with an
71 # unsatisfiable Range request-header field (that is,
72 # all of whose byte-range-spec values have a first-byte-pos
73 # value greater than the current length of the selected
74 # resource), it SHOULD return a response code of 416
75 # (Requested range not satisfiable)."
78 # From rfc 2616 sec 14.16:
79 # "If the server ignores a byte-range-spec because it
80 # is syntactically invalid, the server SHOULD treat
81 # the request as if the invalid Range header field
82 # did not exist. (Normally, this means return a 200
83 # response containing the full entity)."
85 result
.append((start
, stop
+ 1))
88 # See rfc quote above.
90 # Negative subscript (last N bytes)
91 result
.append((content_length
- int(stop
), content_length
))
96 class HeaderElement(object):
97 """An element (with parameters) from an HTTP header's element list."""
99 def __init__(self
, value
, params
=None):
105 def __cmp__(self
, other
):
106 return cmp(self
.value
, other
.value
)
109 p
= [";%s=%s" % (k
, v
) for k
, v
in iteritems(self
.params
)]
110 return "%s%s" % (self
.value
, "".join(p
))
112 def __unicode__(self
):
113 return ntou(self
.__str
__())
115 def parse(elementstr
):
116 """Transform 'token;key=val' to ('token', {'key': 'val'})."""
117 # Split the element into a value and parameters. The 'value' may
118 # be of the form, "token=token", but we don't split that here.
119 atoms
= [x
.strip() for x
in elementstr
.split(";") if x
.strip()]
123 initial_value
= atoms
.pop(0).strip()
126 atom
= [x
.strip() for x
in atom
.split("=", 1) if x
.strip()]
133 return initial_value
, params
134 parse
= staticmethod(parse
)
136 def from_str(cls
, elementstr
):
137 """Construct an instance from a string of the form 'token;key=val'."""
138 ival
, params
= cls
.parse(elementstr
)
139 return cls(ival
, params
)
140 from_str
= classmethod(from_str
)
143 q_separator
= re
.compile(r
'; *q *=')
145 class AcceptElement(HeaderElement
):
146 """An element (with parameters) from an Accept* header's element list.
148 AcceptElement objects are comparable; the more-preferred object will be
149 "less than" the less-preferred object. They are also therefore sortable;
150 if you sort a list of AcceptElement objects, they will be listed in
151 priority order; the most preferred value will be first. Yes, it should
152 have been the other way around, but it's too late to fix now.
155 def from_str(cls
, elementstr
):
157 # The first "q" parameter (if any) separates the initial
158 # media-range parameter(s) (if any) from the accept-params.
159 atoms
= q_separator
.split(elementstr
, 1)
160 media_range
= atoms
.pop(0).strip()
162 # The qvalue for an Accept header can have extensions. The other
163 # headers cannot, but it's easier to parse them as if they did.
164 qvalue
= HeaderElement
.from_str(atoms
[0].strip())
166 media_type
, params
= cls
.parse(media_range
)
167 if qvalue
is not None:
169 return cls(media_type
, params
)
170 from_str
= classmethod(from_str
)
173 val
= self
.params
.get("q", "1")
174 if isinstance(val
, HeaderElement
):
177 qvalue
= property(qvalue
, doc
="The qvalue, or priority, of this value.")
179 def __cmp__(self
, other
):
180 diff
= cmp(self
.qvalue
, other
.qvalue
)
182 diff
= cmp(str(self
), str(other
))
186 def header_elements(fieldname
, fieldvalue
):
187 """Return a sorted HeaderElement list from a comma-separated header string."""
192 for element
in fieldvalue
.split(","):
193 if fieldname
.startswith("Accept") or fieldname
== 'TE':
194 hv
= AcceptElement
.from_str(element
)
196 hv
= HeaderElement
.from_str(element
)
199 return list(reversed(sorted(result
)))
201 def decode_TEXT(value
):
202 r
"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
203 from email
.Header
import decode_header
204 atoms
= decode_header(value
)
206 for atom
, charset
in atoms
:
207 if charset
is not None:
208 atom
= atom
.decode(charset
)
212 def valid_status(status
):
213 """Return legal HTTP status Code, Reason-phrase and Message.
215 The status arg must be an int, or a str that begins with an int.
217 If status is an int, or a str and no reason-phrase is supplied,
218 a default reason-phrase will be provided.
225 parts
= status
.split(" ", 1)
227 # No reason supplied.
232 reason
= reason
.strip()
237 raise ValueError("Illegal response status from server "
238 "(%s is non-numeric)." % repr(code
))
240 if code
< 100 or code
> 599:
241 raise ValueError("Illegal response status from server "
242 "(%s is out of range)." % repr(code
))
244 if code
not in response_codes
:
245 # code is unknown but not illegal
246 default_reason
, message
= "", ""
248 default_reason
, message
= response_codes
[code
]
251 reason
= default_reason
253 return code
, reason
, message
256 def _parse_qs(qs
, keep_blank_values
=0, strict_parsing
=0, encoding
='utf-8'):
257 """Parse a query given as a string argument.
261 qs: URL-encoded query string to be parsed
263 keep_blank_values: flag indicating whether blank values in
264 URL encoded queries should be treated as blank strings. A
265 true value indicates that blanks should be retained as blank
266 strings. The default false value indicates that blank values
267 are to be ignored and treated as if they were not included.
269 strict_parsing: flag indicating what to do with parsing errors. If
270 false (the default), errors are silently ignored. If true,
271 errors raise a ValueError exception.
273 Returns a dict, as G-d intended.
275 pairs
= [s2
for s1
in qs
.split('&') for s2
in s1
.split(';')]
277 for name_value
in pairs
:
278 if not name_value
and not strict_parsing
:
280 nv
= name_value
.split('=', 1)
283 raise ValueError("bad query field: %r" % (name_value
,))
284 # Handle case of a control-name with no equal sign
285 if keep_blank_values
:
289 if len(nv
[1]) or keep_blank_values
:
290 name
= unquote_qs(nv
[0], encoding
)
291 value
= unquote_qs(nv
[1], encoding
)
293 if not isinstance(d
[name
], list):
295 d
[name
].append(value
)
301 image_map_pattern
= re
.compile(r
"[0-9]+,[0-9]+")
303 def parse_query_string(query_string
, keep_blank_values
=True, encoding
='utf-8'):
304 """Build a params dictionary from a query_string.
306 Duplicate key/value pairs in the provided query_string will be
307 returned as {'key': [val1, val2, ...]}. Single key/values will
308 be returned as strings: {'key': 'value'}.
310 if image_map_pattern
.match(query_string
):
311 # Server-side image map. Map the coords to 'x' and 'y'
312 # (like CGI::Request does).
313 pm
= query_string
.split(",")
314 pm
= {'x': int(pm
[0]), 'y': int(pm
[1])}
316 pm
= _parse_qs(query_string
, keep_blank_values
, encoding
=encoding
)
320 class CaseInsensitiveDict(dict):
321 """A case-insensitive dict subclass.
323 Each key is changed on entry to str(key).title().
326 def __getitem__(self
, key
):
327 return dict.__getitem
__(self
, str(key
).title())
329 def __setitem__(self
, key
, value
):
330 dict.__setitem
__(self
, str(key
).title(), value
)
332 def __delitem__(self
, key
):
333 dict.__delitem
__(self
, str(key
).title())
335 def __contains__(self
, key
):
336 return dict.__contains
__(self
, str(key
).title())
338 def get(self
, key
, default
=None):
339 return dict.get(self
, str(key
).title(), default
)
341 def has_key(self
, key
):
342 return dict.has_key(self
, str(key
).title())
346 self
[str(k
).title()] = E
[k
]
348 def fromkeys(cls
, seq
, value
=None):
351 newdict
[str(k
).title()] = value
353 fromkeys
= classmethod(fromkeys
)
355 def setdefault(self
, key
, x
=None):
356 key
= str(key
).title()
363 def pop(self
, key
, default
):
364 return dict.pop(self
, str(key
).title(), default
)
367 # TEXT = <any OCTET except CTLs, but including LWS>
369 # A CRLF is allowed in the definition of TEXT only as part of a header
370 # field continuation. It is expected that the folding LWS will be
371 # replaced with a single SP before interpretation of the TEXT value."
372 header_translate_table
= ''.join([chr(i
) for i
in xrange(256)])
373 header_translate_deletechars
= ''.join([chr(i
) for i
in xrange(32)]) + chr(127)
376 class HeaderMap(CaseInsensitiveDict
):
377 """A dict subclass for HTTP request and response headers.
379 Each key is changed on entry to str(key).title(). This allows headers
380 to be case-insensitive and avoid duplicates.
382 Values are header values (decoded according to :rfc:`2047` if necessary).
386 encodings
= ["ISO-8859-1"]
388 # Someday, when http-bis is done, this will probably get dropped
389 # since few servers, clients, or intermediaries do it. But until then,
390 # we're going to obey the spec as is.
391 # "Words of *TEXT MAY contain characters from character sets other than
392 # ISO-8859-1 only when encoded according to the rules of RFC 2047."
395 def elements(self
, key
):
396 """Return a sorted list of HeaderElements for the given header."""
397 key
= str(key
).title()
398 value
= self
.get(key
)
399 return header_elements(key
, value
)
401 def values(self
, key
):
402 """Return a sorted list of HeaderElement.value for the given header."""
403 return [e
.value
for e
in self
.elements(key
)]
406 """Transform self into a list of (name, value) tuples."""
408 for k
, v
in self
.items():
409 if isinstance(k
, unicodestr
):
412 if not isinstance(v
, basestring
):
415 if isinstance(v
, unicodestr
):
418 # See header_translate_* constants above.
419 # Replace only if you really know what you're doing.
420 k
= k
.translate(header_translate_table
, header_translate_deletechars
)
421 v
= v
.translate(header_translate_table
, header_translate_deletechars
)
423 header_list
.append((k
, v
))
427 """Return the given header name or value, encoded for HTTP output."""
428 for enc
in self
.encodings
:
431 except UnicodeEncodeError:
434 if self
.protocol
== (1, 1) and self
.use_rfc_2047
:
435 # Encode RFC-2047 TEXT
436 # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
437 # We do our own here instead of using the email module
438 # because we never want to fold lines--folding has
439 # been deprecated by the HTTP working group.
440 v
= b2a_base64(v
.encode('utf-8'))
441 return (ntob('=?utf-8?b?') + v
.strip(ntob('\n')) + ntob('?='))
443 raise ValueError("Could not encode header part %r using "
444 "any of the encodings %r." %
449 """An internet address.
452 Should be the client's host name. If not available (because no DNS
453 lookup is performed), the IP address should be used instead.
461 def __init__(self
, ip
, port
, name
=None):
469 return "httputil.Host(%r, %r, %r)" % (self
.ip
, self
.port
, self
.name
)