Removed spurious static_path.
[smonitor.git] / monitor / cherrypy / lib / httputil.py
blobe00587513a226c8b0bfee2e9a3f2c5a163a74065
1 """HTTP library functions.
3 This module contains functions for building an HTTP application
4 framework: any one, not just one whose name starts with "Ch". ;) If you
5 reference any modules from some popular framework inside *this* module,
6 FuManChu will personally hang you up by your thumbs and submit you
7 to a public caning.
8 """
10 from binascii import b2a_base64
11 from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, reversed, sorted
12 from cherrypy._cpcompat import basestring, iteritems, unicodestr, unquote_qs
13 response_codes = BaseHTTPRequestHandler.responses.copy()
15 # From http://www.cherrypy.org/ticket/361
16 response_codes[500] = ('Internal Server Error',
17 'The server encountered an unexpected condition '
18 'which prevented it from fulfilling the request.')
19 response_codes[503] = ('Service Unavailable',
20 'The server is currently unable to handle the '
21 'request due to a temporary overloading or '
22 'maintenance of the server.')
24 import re
25 import urllib
29 def urljoin(*atoms):
30 """Return the given path \*atoms, joined into a single URL.
32 This will correctly join a SCRIPT_NAME and PATH_INFO into the
33 original URL, even if either atom is blank.
34 """
35 url = "/".join([x for x in atoms if x])
36 while "//" in url:
37 url = url.replace("//", "/")
38 # Special-case the final url of "", and return "/" instead.
39 return url or "/"
41 def protocol_from_http(protocol_str):
42 """Return a protocol tuple from the given 'HTTP/x.y' string."""
43 return int(protocol_str[5]), int(protocol_str[7])
45 def get_ranges(headervalue, content_length):
46 """Return a list of (start, stop) indices from a Range header, or None.
48 Each (start, stop) tuple will be composed of two ints, which are suitable
49 for use in a slicing operation. That is, the header "Range: bytes=3-6",
50 if applied against a Python string, is requesting resource[3:7]. This
51 function will return the list [(3, 7)].
53 If this function returns an empty list, you should return HTTP 416.
54 """
56 if not headervalue:
57 return None
59 result = []
60 bytesunit, byteranges = headervalue.split("=", 1)
61 for brange in byteranges.split(","):
62 start, stop = [x.strip() for x in brange.split("-", 1)]
63 if start:
64 if not stop:
65 stop = content_length - 1
66 start, stop = int(start), int(stop)
67 if start >= content_length:
68 # From rfc 2616 sec 14.16:
69 # "If the server receives a request (other than one
70 # including an If-Range request-header field) with an
71 # unsatisfiable Range request-header field (that is,
72 # all of whose byte-range-spec values have a first-byte-pos
73 # value greater than the current length of the selected
74 # resource), it SHOULD return a response code of 416
75 # (Requested range not satisfiable)."
76 continue
77 if stop < start:
78 # From rfc 2616 sec 14.16:
79 # "If the server ignores a byte-range-spec because it
80 # is syntactically invalid, the server SHOULD treat
81 # the request as if the invalid Range header field
82 # did not exist. (Normally, this means return a 200
83 # response containing the full entity)."
84 return None
85 result.append((start, stop + 1))
86 else:
87 if not stop:
88 # See rfc quote above.
89 return None
90 # Negative subscript (last N bytes)
91 result.append((content_length - int(stop), content_length))
93 return result
96 class HeaderElement(object):
97 """An element (with parameters) from an HTTP header's element list."""
99 def __init__(self, value, params=None):
100 self.value = value
101 if params is None:
102 params = {}
103 self.params = params
105 def __cmp__(self, other):
106 return cmp(self.value, other.value)
108 def __str__(self):
109 p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)]
110 return "%s%s" % (self.value, "".join(p))
112 def __unicode__(self):
113 return ntou(self.__str__())
115 def parse(elementstr):
116 """Transform 'token;key=val' to ('token', {'key': 'val'})."""
117 # Split the element into a value and parameters. The 'value' may
118 # be of the form, "token=token", but we don't split that here.
119 atoms = [x.strip() for x in elementstr.split(";") if x.strip()]
120 if not atoms:
121 initial_value = ''
122 else:
123 initial_value = atoms.pop(0).strip()
124 params = {}
125 for atom in atoms:
126 atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
127 key = atom.pop(0)
128 if atom:
129 val = atom[0]
130 else:
131 val = ""
132 params[key] = val
133 return initial_value, params
134 parse = staticmethod(parse)
136 def from_str(cls, elementstr):
137 """Construct an instance from a string of the form 'token;key=val'."""
138 ival, params = cls.parse(elementstr)
139 return cls(ival, params)
140 from_str = classmethod(from_str)
143 q_separator = re.compile(r'; *q *=')
145 class AcceptElement(HeaderElement):
146 """An element (with parameters) from an Accept* header's element list.
148 AcceptElement objects are comparable; the more-preferred object will be
149 "less than" the less-preferred object. They are also therefore sortable;
150 if you sort a list of AcceptElement objects, they will be listed in
151 priority order; the most preferred value will be first. Yes, it should
152 have been the other way around, but it's too late to fix now.
155 def from_str(cls, elementstr):
156 qvalue = None
157 # The first "q" parameter (if any) separates the initial
158 # media-range parameter(s) (if any) from the accept-params.
159 atoms = q_separator.split(elementstr, 1)
160 media_range = atoms.pop(0).strip()
161 if atoms:
162 # The qvalue for an Accept header can have extensions. The other
163 # headers cannot, but it's easier to parse them as if they did.
164 qvalue = HeaderElement.from_str(atoms[0].strip())
166 media_type, params = cls.parse(media_range)
167 if qvalue is not None:
168 params["q"] = qvalue
169 return cls(media_type, params)
170 from_str = classmethod(from_str)
172 def qvalue(self):
173 val = self.params.get("q", "1")
174 if isinstance(val, HeaderElement):
175 val = val.value
176 return float(val)
177 qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
179 def __cmp__(self, other):
180 diff = cmp(self.qvalue, other.qvalue)
181 if diff == 0:
182 diff = cmp(str(self), str(other))
183 return diff
186 def header_elements(fieldname, fieldvalue):
187 """Return a sorted HeaderElement list from a comma-separated header string."""
188 if not fieldvalue:
189 return []
191 result = []
192 for element in fieldvalue.split(","):
193 if fieldname.startswith("Accept") or fieldname == 'TE':
194 hv = AcceptElement.from_str(element)
195 else:
196 hv = HeaderElement.from_str(element)
197 result.append(hv)
199 return list(reversed(sorted(result)))
201 def decode_TEXT(value):
202 r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
203 from email.Header import decode_header
204 atoms = decode_header(value)
205 decodedvalue = ""
206 for atom, charset in atoms:
207 if charset is not None:
208 atom = atom.decode(charset)
209 decodedvalue += atom
210 return decodedvalue
212 def valid_status(status):
213 """Return legal HTTP status Code, Reason-phrase and Message.
215 The status arg must be an int, or a str that begins with an int.
217 If status is an int, or a str and no reason-phrase is supplied,
218 a default reason-phrase will be provided.
221 if not status:
222 status = 200
224 status = str(status)
225 parts = status.split(" ", 1)
226 if len(parts) == 1:
227 # No reason supplied.
228 code, = parts
229 reason = None
230 else:
231 code, reason = parts
232 reason = reason.strip()
234 try:
235 code = int(code)
236 except ValueError:
237 raise ValueError("Illegal response status from server "
238 "(%s is non-numeric)." % repr(code))
240 if code < 100 or code > 599:
241 raise ValueError("Illegal response status from server "
242 "(%s is out of range)." % repr(code))
244 if code not in response_codes:
245 # code is unknown but not illegal
246 default_reason, message = "", ""
247 else:
248 default_reason, message = response_codes[code]
250 if reason is None:
251 reason = default_reason
253 return code, reason, message
256 def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'):
257 """Parse a query given as a string argument.
259 Arguments:
261 qs: URL-encoded query string to be parsed
263 keep_blank_values: flag indicating whether blank values in
264 URL encoded queries should be treated as blank strings. A
265 true value indicates that blanks should be retained as blank
266 strings. The default false value indicates that blank values
267 are to be ignored and treated as if they were not included.
269 strict_parsing: flag indicating what to do with parsing errors. If
270 false (the default), errors are silently ignored. If true,
271 errors raise a ValueError exception.
273 Returns a dict, as G-d intended.
275 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
276 d = {}
277 for name_value in pairs:
278 if not name_value and not strict_parsing:
279 continue
280 nv = name_value.split('=', 1)
281 if len(nv) != 2:
282 if strict_parsing:
283 raise ValueError("bad query field: %r" % (name_value,))
284 # Handle case of a control-name with no equal sign
285 if keep_blank_values:
286 nv.append('')
287 else:
288 continue
289 if len(nv[1]) or keep_blank_values:
290 name = unquote_qs(nv[0], encoding)
291 value = unquote_qs(nv[1], encoding)
292 if name in d:
293 if not isinstance(d[name], list):
294 d[name] = [d[name]]
295 d[name].append(value)
296 else:
297 d[name] = value
298 return d
301 image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
303 def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'):
304 """Build a params dictionary from a query_string.
306 Duplicate key/value pairs in the provided query_string will be
307 returned as {'key': [val1, val2, ...]}. Single key/values will
308 be returned as strings: {'key': 'value'}.
310 if image_map_pattern.match(query_string):
311 # Server-side image map. Map the coords to 'x' and 'y'
312 # (like CGI::Request does).
313 pm = query_string.split(",")
314 pm = {'x': int(pm[0]), 'y': int(pm[1])}
315 else:
316 pm = _parse_qs(query_string, keep_blank_values, encoding=encoding)
317 return pm
320 class CaseInsensitiveDict(dict):
321 """A case-insensitive dict subclass.
323 Each key is changed on entry to str(key).title().
326 def __getitem__(self, key):
327 return dict.__getitem__(self, str(key).title())
329 def __setitem__(self, key, value):
330 dict.__setitem__(self, str(key).title(), value)
332 def __delitem__(self, key):
333 dict.__delitem__(self, str(key).title())
335 def __contains__(self, key):
336 return dict.__contains__(self, str(key).title())
338 def get(self, key, default=None):
339 return dict.get(self, str(key).title(), default)
341 def has_key(self, key):
342 return dict.has_key(self, str(key).title())
344 def update(self, E):
345 for k in E.keys():
346 self[str(k).title()] = E[k]
348 def fromkeys(cls, seq, value=None):
349 newdict = cls()
350 for k in seq:
351 newdict[str(k).title()] = value
352 return newdict
353 fromkeys = classmethod(fromkeys)
355 def setdefault(self, key, x=None):
356 key = str(key).title()
357 try:
358 return self[key]
359 except KeyError:
360 self[key] = x
361 return x
363 def pop(self, key, default):
364 return dict.pop(self, str(key).title(), default)
367 # TEXT = <any OCTET except CTLs, but including LWS>
369 # A CRLF is allowed in the definition of TEXT only as part of a header
370 # field continuation. It is expected that the folding LWS will be
371 # replaced with a single SP before interpretation of the TEXT value."
372 header_translate_table = ''.join([chr(i) for i in xrange(256)])
373 header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(127)
376 class HeaderMap(CaseInsensitiveDict):
377 """A dict subclass for HTTP request and response headers.
379 Each key is changed on entry to str(key).title(). This allows headers
380 to be case-insensitive and avoid duplicates.
382 Values are header values (decoded according to :rfc:`2047` if necessary).
385 protocol=(1, 1)
386 encodings = ["ISO-8859-1"]
388 # Someday, when http-bis is done, this will probably get dropped
389 # since few servers, clients, or intermediaries do it. But until then,
390 # we're going to obey the spec as is.
391 # "Words of *TEXT MAY contain characters from character sets other than
392 # ISO-8859-1 only when encoded according to the rules of RFC 2047."
393 use_rfc_2047 = True
395 def elements(self, key):
396 """Return a sorted list of HeaderElements for the given header."""
397 key = str(key).title()
398 value = self.get(key)
399 return header_elements(key, value)
401 def values(self, key):
402 """Return a sorted list of HeaderElement.value for the given header."""
403 return [e.value for e in self.elements(key)]
405 def output(self):
406 """Transform self into a list of (name, value) tuples."""
407 header_list = []
408 for k, v in self.items():
409 if isinstance(k, unicodestr):
410 k = self.encode(k)
412 if not isinstance(v, basestring):
413 v = str(v)
415 if isinstance(v, unicodestr):
416 v = self.encode(v)
418 # See header_translate_* constants above.
419 # Replace only if you really know what you're doing.
420 k = k.translate(header_translate_table, header_translate_deletechars)
421 v = v.translate(header_translate_table, header_translate_deletechars)
423 header_list.append((k, v))
424 return header_list
426 def encode(self, v):
427 """Return the given header name or value, encoded for HTTP output."""
428 for enc in self.encodings:
429 try:
430 return v.encode(enc)
431 except UnicodeEncodeError:
432 continue
434 if self.protocol == (1, 1) and self.use_rfc_2047:
435 # Encode RFC-2047 TEXT
436 # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
437 # We do our own here instead of using the email module
438 # because we never want to fold lines--folding has
439 # been deprecated by the HTTP working group.
440 v = b2a_base64(v.encode('utf-8'))
441 return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?='))
443 raise ValueError("Could not encode header part %r using "
444 "any of the encodings %r." %
445 (v, self.encodings))
448 class Host(object):
449 """An internet address.
451 name
452 Should be the client's host name. If not available (because no DNS
453 lookup is performed), the IP address should be used instead.
457 ip = "0.0.0.0"
458 port = 80
459 name = "unknown.tld"
461 def __init__(self, ip, port, name=None):
462 self.ip = ip
463 self.port = port
464 if name is None:
465 name = ip
466 self.name = name
468 def __repr__(self):
469 return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name)