monitor/cherrypy/lib/httputil.py

   1 """HTTP library functions.
   2
   3 This module contains functions for building an HTTP application
   4 framework: any one, not just one whose name starts with "Ch". ;) If you
   5 reference any modules from some popular framework inside *this* module,
   6 FuManChu will personally hang you up by your thumbs and submit you
   7 to a public caning.
   8 """
   9
  10 from binascii import b2a_base64
  11 from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, reversed, sorted
  12 from cherrypy._cpcompat import basestring, iteritems, unicodestr, unquote_qs
  13 response_codes = BaseHTTPRequestHandler.responses.copy()
  14
  15 # From http://www.cherrypy.org/ticket/361
  16 response_codes[500] = ('Internal Server Error',
  17                       'The server encountered an unexpected condition '
  18                       'which prevented it from fulfilling the request.')
  19 response_codes[503] = ('Service Unavailable',
  20                       'The server is currently unable to handle the '
  21                       'request due to a temporary overloading or '
  22                       'maintenance of the server.')
  23
  24 import re
  25 import urllib
  26
  27
  28
  29 def urljoin(*atoms):
  30     """Return the given path \*atoms, joined into a single URL.
  31
  32     This will correctly join a SCRIPT_NAME and PATH_INFO into the
  33     original URL, even if either atom is blank.
  34     """
  35     url = "/".join([x for x in atoms if x])
  36     while "//" in url:
  37         url = url.replace("//", "/")
  38     # Special-case the final url of "", and return "/" instead.
  39     return url or "/"
  40
  41 def protocol_from_http(protocol_str):
  42     """Return a protocol tuple from the given 'HTTP/x.y' string."""
  43     return int(protocol_str[5]), int(protocol_str[7])
  44
  45 def get_ranges(headervalue, content_length):
  46     """Return a list of (start, stop) indices from a Range header, or None.
  47
  48     Each (start, stop) tuple will be composed of two ints, which are suitable
  49     for use in a slicing operation. That is, the header "Range: bytes=3-6",
  50     if applied against a Python string, is requesting resource[3:7]. This
  51     function will return the list [(3, 7)].
  52
  53     If this function returns an empty list, you should return HTTP 416.
  54     """
  55
  56     if not headervalue:
  57         return None
  58
  59     result = []
  60     bytesunit, byteranges = headervalue.split("=", 1)
  61     for brange in byteranges.split(","):
  62         start, stop = [x.strip() for x in brange.split("-", 1)]
  63         if start:
  64             if not stop:
  65                 stop = content_length - 1
  66             start, stop = int(start), int(stop)
  67             if start >= content_length:
  68                 # From rfc 2616 sec 14.16:
  69                 # "If the server receives a request (other than one
  70                 # including an If-Range request-header field) with an
  71                 # unsatisfiable Range request-header field (that is,
  72                 # all of whose byte-range-spec values have a first-byte-pos
  73                 # value greater than the current length of the selected
  74                 # resource), it SHOULD return a response code of 416
  75                 # (Requested range not satisfiable)."
  76                 continue
  77             if stop < start:
  78                 # From rfc 2616 sec 14.16:
  79                 # "If the server ignores a byte-range-spec because it
  80                 # is syntactically invalid, the server SHOULD treat
  81                 # the request as if the invalid Range header field
  82                 # did not exist. (Normally, this means return a 200
  83                 # response containing the full entity)."
  84                 return None
  85             result.append((start, stop + 1))
  86         else:
  87             if not stop:
  88                 # See rfc quote above.
  89                 return None
  90             # Negative subscript (last N bytes)
  91             result.append((content_length - int(stop), content_length))
  92
  93     return result
  94
  95
  96 class HeaderElement(object):
  97     """An element (with parameters) from an HTTP header's element list."""
  98
  99     def __init__(self, value, params=None):
 100         self.value = value
 101         if params is None:
 102             params = {}
 103         self.params = params
 104
 105     def __cmp__(self, other):
 106         return cmp(self.value, other.value)
 107
 108     def __str__(self):
 109         p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)]
 110         return "%s%s" % (self.value, "".join(p))
 111
 112     def __unicode__(self):
 113         return ntou(self.__str__())
 114
 115     def parse(elementstr):
 116         """Transform 'token;key=val' to ('token', {'key': 'val'})."""
 117         # Split the element into a value and parameters. The 'value' may
 118         # be of the form, "token=token", but we don't split that here.
 119         atoms = [x.strip() for x in elementstr.split(";") if x.strip()]
 120         if not atoms:
 121             initial_value = ''
 122         else:
 123             initial_value = atoms.pop(0).strip()
 124         params = {}
 125         for atom in atoms:
 126             atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
 127             key = atom.pop(0)
 128             if atom:
 129                 val = atom[0]
 130             else:
 131                 val = ""
 132             params[key] = val
 133         return initial_value, params
 134     parse = staticmethod(parse)
 135
 136     def from_str(cls, elementstr):
 137         """Construct an instance from a string of the form 'token;key=val'."""
 138         ival, params = cls.parse(elementstr)
 139         return cls(ival, params)
 140     from_str = classmethod(from_str)
 141
 142
 143 q_separator = re.compile(r'; *q *=')
 144
 145 class AcceptElement(HeaderElement):
 146     """An element (with parameters) from an Accept* header's element list.
 147
 148     AcceptElement objects are comparable; the more-preferred object will be
 149     "less than" the less-preferred object. They are also therefore sortable;
 150     if you sort a list of AcceptElement objects, they will be listed in
 151     priority order; the most preferred value will be first. Yes, it should
 152     have been the other way around, but it's too late to fix now.
 153     """
 154
 155     def from_str(cls, elementstr):
 156         qvalue = None
 157         # The first "q" parameter (if any) separates the initial
 158         # media-range parameter(s) (if any) from the accept-params.
 159         atoms = q_separator.split(elementstr, 1)
 160         media_range = atoms.pop(0).strip()
 161         if atoms:
 162             # The qvalue for an Accept header can have extensions. The other
 163             # headers cannot, but it's easier to parse them as if they did.
 164             qvalue = HeaderElement.from_str(atoms[0].strip())
 165
 166         media_type, params = cls.parse(media_range)
 167         if qvalue is not None:
 168             params["q"] = qvalue
 169         return cls(media_type, params)
 170     from_str = classmethod(from_str)
 171
 172     def qvalue(self):
 173         val = self.params.get("q", "1")
 174         if isinstance(val, HeaderElement):
 175             val = val.value
 176         return float(val)
 177     qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
 178
 179     def __cmp__(self, other):
 180         diff = cmp(self.qvalue, other.qvalue)
 181         if diff == 0:
 182             diff = cmp(str(self), str(other))
 183         return diff
 184
 185
 186 def header_elements(fieldname, fieldvalue):
 187     """Return a sorted HeaderElement list from a comma-separated header string."""
 188     if not fieldvalue:
 189         return []
 190
 191     result = []
 192     for element in fieldvalue.split(","):
 193         if fieldname.startswith("Accept") or fieldname == 'TE':
 194             hv = AcceptElement.from_str(element)
 195         else:
 196             hv = HeaderElement.from_str(element)
 197         result.append(hv)
 198
 199     return list(reversed(sorted(result)))
 200
 201 def decode_TEXT(value):
 202     r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
 203     from email.Header import decode_header
 204     atoms = decode_header(value)
 205     decodedvalue = ""
 206     for atom, charset in atoms:
 207         if charset is not None:
 208             atom = atom.decode(charset)
 209         decodedvalue += atom
 210     return decodedvalue
 211
 212 def valid_status(status):
 213     """Return legal HTTP status Code, Reason-phrase and Message.
 214
 215     The status arg must be an int, or a str that begins with an int.
 216
 217     If status is an int, or a str and no reason-phrase is supplied,
 218     a default reason-phrase will be provided.
 219     """
 220
 221     if not status:
 222         status = 200
 223
 224     status = str(status)
 225     parts = status.split(" ", 1)
 226     if len(parts) == 1:
 227         # No reason supplied.
 228         code, = parts
 229         reason = None
 230     else:
 231         code, reason = parts
 232         reason = reason.strip()
 233
 234     try:
 235         code = int(code)
 236     except ValueError:
 237         raise ValueError("Illegal response status from server "
 238                          "(%s is non-numeric)." % repr(code))
 239
 240     if code < 100 or code > 599:
 241         raise ValueError("Illegal response status from server "
 242                          "(%s is out of range)." % repr(code))
 243
 244     if code not in response_codes:
 245         # code is unknown but not illegal
 246         default_reason, message = "", ""
 247     else:
 248         default_reason, message = response_codes[code]
 249
 250     if reason is None:
 251         reason = default_reason
 252
 253     return code, reason, message
 254
 255
 256 def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'):
 257     """Parse a query given as a string argument.
 258
 259     Arguments:
 260
 261     qs: URL-encoded query string to be parsed
 262
 263     keep_blank_values: flag indicating whether blank values in
 264         URL encoded queries should be treated as blank strings.  A
 265         true value indicates that blanks should be retained as blank
 266         strings.  The default false value indicates that blank values
 267         are to be ignored and treated as if they were  not included.
 268
 269     strict_parsing: flag indicating what to do with parsing errors. If
 270         false (the default), errors are silently ignored. If true,
 271         errors raise a ValueError exception.
 272
 273     Returns a dict, as G-d intended.
 274     """
 275     pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 276     d = {}
 277     for name_value in pairs:
 278         if not name_value and not strict_parsing:
 279             continue
 280         nv = name_value.split('=', 1)
 281         if len(nv) != 2:
 282             if strict_parsing:
 283                 raise ValueError("bad query field: %r" % (name_value,))
 284             # Handle case of a control-name with no equal sign
 285             if keep_blank_values:
 286                 nv.append('')
 287             else:
 288                 continue
 289         if len(nv[1]) or keep_blank_values:
 290             name = unquote_qs(nv[0], encoding)
 291             value = unquote_qs(nv[1], encoding)
 292             if name in d:
 293                 if not isinstance(d[name], list):
 294                     d[name] = [d[name]]
 295                 d[name].append(value)
 296             else:
 297                 d[name] = value
 298     return d
 299
 300
 301 image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
 302
 303 def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'):
 304     """Build a params dictionary from a query_string.
 305
 306     Duplicate key/value pairs in the provided query_string will be
 307     returned as {'key': [val1, val2, ...]}. Single key/values will
 308     be returned as strings: {'key': 'value'}.
 309     """
 310     if image_map_pattern.match(query_string):
 311         # Server-side image map. Map the coords to 'x' and 'y'
 312         # (like CGI::Request does).
 313         pm = query_string.split(",")
 314         pm = {'x': int(pm[0]), 'y': int(pm[1])}
 315     else:
 316         pm = _parse_qs(query_string, keep_blank_values, encoding=encoding)
 317     return pm
 318
 319
 320 class CaseInsensitiveDict(dict):
 321     """A case-insensitive dict subclass.
 322
 323     Each key is changed on entry to str(key).title().
 324     """
 325
 326     def __getitem__(self, key):
 327         return dict.__getitem__(self, str(key).title())
 328
 329     def __setitem__(self, key, value):
 330         dict.__setitem__(self, str(key).title(), value)
 331
 332     def __delitem__(self, key):
 333         dict.__delitem__(self, str(key).title())
 334
 335     def __contains__(self, key):
 336         return dict.__contains__(self, str(key).title())
 337
 338     def get(self, key, default=None):
 339         return dict.get(self, str(key).title(), default)
 340
 341     def has_key(self, key):
 342         return dict.has_key(self, str(key).title())
 343
 344     def update(self, E):
 345         for k in E.keys():
 346             self[str(k).title()] = E[k]
 347
 348     def fromkeys(cls, seq, value=None):
 349         newdict = cls()
 350         for k in seq:
 351             newdict[str(k).title()] = value
 352         return newdict
 353     fromkeys = classmethod(fromkeys)
 354
 355     def setdefault(self, key, x=None):
 356         key = str(key).title()
 357         try:
 358             return self[key]
 359         except KeyError:
 360             self[key] = x
 361             return x
 362
 363     def pop(self, key, default):
 364         return dict.pop(self, str(key).title(), default)
 365
 366
 367 #   TEXT = <any OCTET except CTLs, but including LWS>
 368 #
 369 # A CRLF is allowed in the definition of TEXT only as part of a header
 370 # field continuation. It is expected that the folding LWS will be
 371 # replaced with a single SP before interpretation of the TEXT value."
 372 header_translate_table = ''.join([chr(i) for i in xrange(256)])
 373 header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(127)
 374
 375
 376 class HeaderMap(CaseInsensitiveDict):
 377     """A dict subclass for HTTP request and response headers.
 378
 379     Each key is changed on entry to str(key).title(). This allows headers
 380     to be case-insensitive and avoid duplicates.
 381
 382     Values are header values (decoded according to :rfc:`2047` if necessary).
 383     """
 384
 385     protocol=(1, 1)
 386     encodings = ["ISO-8859-1"]
 387
 388     # Someday, when http-bis is done, this will probably get dropped
 389     # since few servers, clients, or intermediaries do it. But until then,
 390     # we're going to obey the spec as is.
 391     # "Words of *TEXT MAY contain characters from character sets other than
 392     # ISO-8859-1 only when encoded according to the rules of RFC 2047."
 393     use_rfc_2047 = True
 394
 395     def elements(self, key):
 396         """Return a sorted list of HeaderElements for the given header."""
 397         key = str(key).title()
 398         value = self.get(key)
 399         return header_elements(key, value)
 400
 401     def values(self, key):
 402         """Return a sorted list of HeaderElement.value for the given header."""
 403         return [e.value for e in self.elements(key)]
 404
 405     def output(self):
 406         """Transform self into a list of (name, value) tuples."""
 407         header_list = []
 408         for k, v in self.items():
 409             if isinstance(k, unicodestr):
 410                 k = self.encode(k)
 411
 412             if not isinstance(v, basestring):
 413                 v = str(v)
 414
 415             if isinstance(v, unicodestr):
 416                 v = self.encode(v)
 417
 418             # See header_translate_* constants above.
 419             # Replace only if you really know what you're doing.
 420             k = k.translate(header_translate_table, header_translate_deletechars)
 421             v = v.translate(header_translate_table, header_translate_deletechars)
 422
 423             header_list.append((k, v))
 424         return header_list
 425
 426     def encode(self, v):
 427         """Return the given header name or value, encoded for HTTP output."""
 428         for enc in self.encodings:
 429             try:
 430                 return v.encode(enc)
 431             except UnicodeEncodeError:
 432                 continue
 433
 434         if self.protocol == (1, 1) and self.use_rfc_2047:
 435             # Encode RFC-2047 TEXT
 436             # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
 437             # We do our own here instead of using the email module
 438             # because we never want to fold lines--folding has
 439             # been deprecated by the HTTP working group.
 440             v = b2a_base64(v.encode('utf-8'))
 441             return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?='))
 442
 443         raise ValueError("Could not encode header part %r using "
 444                          "any of the encodings %r." %
 445                          (v, self.encodings))
 446
 447
 448 class Host(object):
 449     """An internet address.
 450
 451     name
 452         Should be the client's host name. If not available (because no DNS
 453         lookup is performed), the IP address should be used instead.
 454
 455     """
 456
 457     ip = "0.0.0.0"
 458     port = 80
 459     name = "unknown.tld"
 460
 461     def __init__(self, ip, port, name=None):
 462         self.ip = ip
 463         self.port = port
 464         if name is None:
 465             name = ip
 466         self.name = name
 467
 468     def __repr__(self):
 469         return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name)