Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 import socket
  70 from sys import py3kwarning
  71 from urlparse import urlsplit
  72 import warnings
  73 with warnings.catch_warnings():
  74     if py3kwarning:
  75         warnings.filterwarnings("ignore", ".*mimetools has been removed",
  76                                 DeprecationWarning)
  77     import mimetools
  78
  79 try:
  80     from cStringIO import StringIO
  81 except ImportError:
  82     from StringIO import StringIO
  83
  84 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  85            "HTTPException", "NotConnected", "UnknownProtocol",
  86            "UnknownTransferEncoding", "UnimplementedFileMode",
  87            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  88            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  89            "BadStatusLine", "error", "responses"]
  90
  91 HTTP_PORT = 80
  92 HTTPS_PORT = 443
  93
  94 _UNKNOWN = 'UNKNOWN'
  95
  96 # connection states
  97 _CS_IDLE = 'Idle'
  98 _CS_REQ_STARTED = 'Request-started'
  99 _CS_REQ_SENT = 'Request-sent'
 100
 101 # status codes
 102 # informational
 103 CONTINUE = 100
 104 SWITCHING_PROTOCOLS = 101
 105 PROCESSING = 102
 106
 107 # successful
 108 OK = 200
 109 CREATED = 201
 110 ACCEPTED = 202
 111 NON_AUTHORITATIVE_INFORMATION = 203
 112 NO_CONTENT = 204
 113 RESET_CONTENT = 205
 114 PARTIAL_CONTENT = 206
 115 MULTI_STATUS = 207
 116 IM_USED = 226
 117
 118 # redirection
 119 MULTIPLE_CHOICES = 300
 120 MOVED_PERMANENTLY = 301
 121 FOUND = 302
 122 SEE_OTHER = 303
 123 NOT_MODIFIED = 304
 124 USE_PROXY = 305
 125 TEMPORARY_REDIRECT = 307
 126
 127 # client error
 128 BAD_REQUEST = 400
 129 UNAUTHORIZED = 401
 130 PAYMENT_REQUIRED = 402
 131 FORBIDDEN = 403
 132 NOT_FOUND = 404
 133 METHOD_NOT_ALLOWED = 405
 134 NOT_ACCEPTABLE = 406
 135 PROXY_AUTHENTICATION_REQUIRED = 407
 136 REQUEST_TIMEOUT = 408
 137 CONFLICT = 409
 138 GONE = 410
 139 LENGTH_REQUIRED = 411
 140 PRECONDITION_FAILED = 412
 141 REQUEST_ENTITY_TOO_LARGE = 413
 142 REQUEST_URI_TOO_LONG = 414
 143 UNSUPPORTED_MEDIA_TYPE = 415
 144 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 145 EXPECTATION_FAILED = 417
 146 UNPROCESSABLE_ENTITY = 422
 147 LOCKED = 423
 148 FAILED_DEPENDENCY = 424
 149 UPGRADE_REQUIRED = 426
 150
 151 # server error
 152 INTERNAL_SERVER_ERROR = 500
 153 NOT_IMPLEMENTED = 501
 154 BAD_GATEWAY = 502
 155 SERVICE_UNAVAILABLE = 503
 156 GATEWAY_TIMEOUT = 504
 157 HTTP_VERSION_NOT_SUPPORTED = 505
 158 INSUFFICIENT_STORAGE = 507
 159 NOT_EXTENDED = 510
 160
 161 # Mapping status codes to official W3C names
 162 responses = {
 163     100: 'Continue',
 164     101: 'Switching Protocols',
 165
 166     200: 'OK',
 167     201: 'Created',
 168     202: 'Accepted',
 169     203: 'Non-Authoritative Information',
 170     204: 'No Content',
 171     205: 'Reset Content',
 172     206: 'Partial Content',
 173
 174     300: 'Multiple Choices',
 175     301: 'Moved Permanently',
 176     302: 'Found',
 177     303: 'See Other',
 178     304: 'Not Modified',
 179     305: 'Use Proxy',
 180     306: '(Unused)',
 181     307: 'Temporary Redirect',
 182
 183     400: 'Bad Request',
 184     401: 'Unauthorized',
 185     402: 'Payment Required',
 186     403: 'Forbidden',
 187     404: 'Not Found',
 188     405: 'Method Not Allowed',
 189     406: 'Not Acceptable',
 190     407: 'Proxy Authentication Required',
 191     408: 'Request Timeout',
 192     409: 'Conflict',
 193     410: 'Gone',
 194     411: 'Length Required',
 195     412: 'Precondition Failed',
 196     413: 'Request Entity Too Large',
 197     414: 'Request-URI Too Long',
 198     415: 'Unsupported Media Type',
 199     416: 'Requested Range Not Satisfiable',
 200     417: 'Expectation Failed',
 201
 202     500: 'Internal Server Error',
 203     501: 'Not Implemented',
 204     502: 'Bad Gateway',
 205     503: 'Service Unavailable',
 206     504: 'Gateway Timeout',
 207     505: 'HTTP Version Not Supported',
 208 }
 209
 210 # maximal amount of data to read at one time in _safe_read
 211 MAXAMOUNT = 1048576
 212
 213 class HTTPMessage(mimetools.Message):
 214
 215     def addheader(self, key, value):
 216         """Add header for field key handling repeats."""
 217         prev = self.dict.get(key)
 218         if prev is None:
 219             self.dict[key] = value
 220         else:
 221             combined = ", ".join((prev, value))
 222             self.dict[key] = combined
 223
 224     def addcontinue(self, key, more):
 225         """Add more field data from a continuation line."""
 226         prev = self.dict[key]
 227         self.dict[key] = prev + "\n " + more
 228
 229     def readheaders(self):
 230         """Read header lines.
 231
 232         Read header lines up to the entirely blank line that terminates them.
 233         The (normally blank) line that ends the headers is skipped, but not
 234         included in the returned list.  If a non-header line ends the headers,
 235         (which is an error), an attempt is made to backspace over it; it is
 236         never included in the returned list.
 237
 238         The variable self.status is set to the empty string if all went well,
 239         otherwise it is an error message.  The variable self.headers is a
 240         completely uninterpreted list of lines contained in the header (so
 241         printing them will reproduce the header exactly as it appears in the
 242         file).
 243
 244         If multiple header fields with the same name occur, they are combined
 245         according to the rules in RFC 2616 sec 4.2:
 246
 247         Appending each subsequent field-value to the first, each separated
 248         by a comma. The order in which header fields with the same field-name
 249         are received is significant to the interpretation of the combined
 250         field value.
 251         """
 252         # XXX The implementation overrides the readheaders() method of
 253         # rfc822.Message.  The base class design isn't amenable to
 254         # customized behavior here so the method here is a copy of the
 255         # base class code with a few small changes.
 256
 257         self.dict = {}
 258         self.unixfrom = ''
 259         self.headers = hlist = []
 260         self.status = ''
 261         headerseen = ""
 262         firstline = 1
 263         startofline = unread = tell = None
 264         if hasattr(self.fp, 'unread'):
 265             unread = self.fp.unread
 266         elif self.seekable:
 267             tell = self.fp.tell
 268         while True:
 269             if tell:
 270                 try:
 271                     startofline = tell()
 272                 except IOError:
 273                     startofline = tell = None
 274                     self.seekable = 0
 275             line = self.fp.readline()
 276             if not line:
 277                 self.status = 'EOF in headers'
 278                 break
 279             # Skip unix From name time lines
 280             if firstline and line.startswith('From '):
 281                 self.unixfrom = self.unixfrom + line
 282                 continue
 283             firstline = 0
 284             if headerseen and line[0] in ' \t':
 285                 # XXX Not sure if continuation lines are handled properly
 286                 # for http and/or for repeating headers
 287                 # It's a continuation line.
 288                 hlist.append(line)
 289                 self.addcontinue(headerseen, line.strip())
 290                 continue
 291             elif self.iscomment(line):
 292                 # It's a comment.  Ignore it.
 293                 continue
 294             elif self.islast(line):
 295                 # Note! No pushback here!  The delimiter line gets eaten.
 296                 break
 297             headerseen = self.isheader(line)
 298             if headerseen:
 299                 # It's a legal header line, save it.
 300                 hlist.append(line)
 301                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 302                 continue
 303             else:
 304                 # It's not a header line; throw it back and stop here.
 305                 if not self.dict:
 306                     self.status = 'No headers'
 307                 else:
 308                     self.status = 'Non-header line where header expected'
 309                 # Try to undo the read.
 310                 if unread:
 311                     unread(line)
 312                 elif tell:
 313                     self.fp.seek(startofline)
 314                 else:
 315                     self.status = self.status + '; bad seek'
 316                 break
 317
 318 class HTTPResponse:
 319
 320     # strict: If true, raise BadStatusLine if the status line can't be
 321     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 322     # false because it prevents clients from talking to HTTP/0.9
 323     # servers.  Note that a response with a sufficiently corrupted
 324     # status line will look like an HTTP/0.9 response.
 325
 326     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 327
 328     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
 329         if buffering:
 330             # The caller won't be using any sock.recv() calls, so buffering
 331             # is fine and recommendef for performance
 332             self.fp = sock.makefile('rb')
 333         else:
 334             # The buffer size is specified as zero, because the headers of
 335             # the response are read with readline().  If the reads were
 336             # buffered the readline() calls could consume some of the
 337             # response, which make be read via a recv() on the underlying
 338             # socket.
 339             self.fp = sock.makefile('rb', 0)
 340         self.debuglevel = debuglevel
 341         self.strict = strict
 342         self._method = method
 343
 344         self.msg = None
 345
 346         # from the Status-Line of the response
 347         self.version = _UNKNOWN # HTTP-Version
 348         self.status = _UNKNOWN  # Status-Code
 349         self.reason = _UNKNOWN  # Reason-Phrase
 350
 351         self.chunked = _UNKNOWN         # is "chunked" being used?
 352         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 353         self.length = _UNKNOWN          # number of bytes left in response
 354         self.will_close = _UNKNOWN      # conn will close at end of response
 355
 356     def _read_status(self):
 357         # Initialize with Simple-Response defaults
 358         line = self.fp.readline()
 359         if self.debuglevel > 0:
 360             print "reply:", repr(line)
 361         if not line:
 362             # Presumably, the server closed the connection before
 363             # sending a valid response.
 364             raise BadStatusLine(line)
 365         try:
 366             [version, status, reason] = line.split(None, 2)
 367         except ValueError:
 368             try:
 369                 [version, status] = line.split(None, 1)
 370                 reason = ""
 371             except ValueError:
 372                 # empty version will cause next test to fail and status
 373                 # will be treated as 0.9 response.
 374                 version = ""
 375         if not version.startswith('HTTP/'):
 376             if self.strict:
 377                 self.close()
 378                 raise BadStatusLine(line)
 379             else:
 380                 # assume it's a Simple-Response from an 0.9 server
 381                 self.fp = LineAndFileWrapper(line, self.fp)
 382                 return "HTTP/0.9", 200, ""
 383
 384         # The status code is a three-digit number
 385         try:
 386             status = int(status)
 387             if status < 100 or status > 999:
 388                 raise BadStatusLine(line)
 389         except ValueError:
 390             raise BadStatusLine(line)
 391         return version, status, reason
 392
 393     def begin(self):
 394         if self.msg is not None:
 395             # we've already started reading the response
 396             return
 397
 398         # read until we get a non-100 response
 399         while True:
 400             version, status, reason = self._read_status()
 401             if status != CONTINUE:
 402                 break
 403             # skip the header from the 100 response
 404             while True:
 405                 skip = self.fp.readline().strip()
 406                 if not skip:
 407                     break
 408                 if self.debuglevel > 0:
 409                     print "header:", skip
 410
 411         self.status = status
 412         self.reason = reason.strip()
 413         if version == 'HTTP/1.0':
 414             self.version = 10
 415         elif version.startswith('HTTP/1.'):
 416             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 417         elif version == 'HTTP/0.9':
 418             self.version = 9
 419         else:
 420             raise UnknownProtocol(version)
 421
 422         if self.version == 9:
 423             self.length = None
 424             self.chunked = 0
 425             self.will_close = 1
 426             self.msg = HTTPMessage(StringIO())
 427             return
 428
 429         self.msg = HTTPMessage(self.fp, 0)
 430         if self.debuglevel > 0:
 431             for hdr in self.msg.headers:
 432                 print "header:", hdr,
 433
 434         # don't let the msg keep an fp
 435         self.msg.fp = None
 436
 437         # are we using the chunked-style of transfer encoding?
 438         tr_enc = self.msg.getheader('transfer-encoding')
 439         if tr_enc and tr_enc.lower() == "chunked":
 440             self.chunked = 1
 441             self.chunk_left = None
 442         else:
 443             self.chunked = 0
 444
 445         # will the connection close at the end of the response?
 446         self.will_close = self._check_close()
 447
 448         # do we have a Content-Length?
 449         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 450         length = self.msg.getheader('content-length')
 451         if length and not self.chunked:
 452             try:
 453                 self.length = int(length)
 454             except ValueError:
 455                 self.length = None
 456             else:
 457                 if self.length < 0:  # ignore nonsensical negative lengths
 458                     self.length = None
 459         else:
 460             self.length = None
 461
 462         # does the body have a fixed length? (of zero)
 463         if (status == NO_CONTENT or status == NOT_MODIFIED or
 464             100 <= status < 200 or      # 1xx codes
 465             self._method == 'HEAD'):
 466             self.length = 0
 467
 468         # if the connection remains open, and we aren't using chunked, and
 469         # a content-length was not provided, then assume that the connection
 470         # WILL close.
 471         if not self.will_close and \
 472            not self.chunked and \
 473            self.length is None:
 474             self.will_close = 1
 475
 476     def _check_close(self):
 477         conn = self.msg.getheader('connection')
 478         if self.version == 11:
 479             # An HTTP/1.1 proxy is assumed to stay open unless
 480             # explicitly closed.
 481             conn = self.msg.getheader('connection')
 482             if conn and "close" in conn.lower():
 483                 return True
 484             return False
 485
 486         # Some HTTP/1.0 implementations have support for persistent
 487         # connections, using rules different than HTTP/1.1.
 488
 489         # For older HTTP, Keep-Alive indicates persistent connection.
 490         if self.msg.getheader('keep-alive'):
 491             return False
 492
 493         # At least Akamai returns a "Connection: Keep-Alive" header,
 494         # which was supposed to be sent by the client.
 495         if conn and "keep-alive" in conn.lower():
 496             return False
 497
 498         # Proxy-Connection is a netscape hack.
 499         pconn = self.msg.getheader('proxy-connection')
 500         if pconn and "keep-alive" in pconn.lower():
 501             return False
 502
 503         # otherwise, assume it will close
 504         return True
 505
 506     def close(self):
 507         if self.fp:
 508             self.fp.close()
 509             self.fp = None
 510
 511     def isclosed(self):
 512         # NOTE: it is possible that we will not ever call self.close(). This
 513         #       case occurs when will_close is TRUE, length is None, and we
 514         #       read up to the last byte, but NOT past it.
 515         #
 516         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 517         #          called, meaning self.isclosed() is meaningful.
 518         return self.fp is None
 519
 520     # XXX It would be nice to have readline and __iter__ for this, too.
 521
 522     def read(self, amt=None):
 523         if self.fp is None:
 524             return ''
 525
 526         if self.chunked:
 527             return self._read_chunked(amt)
 528
 529         if amt is None:
 530             # unbounded read
 531             if self.length is None:
 532                 s = self.fp.read()
 533             else:
 534                 s = self._safe_read(self.length)
 535                 self.length = 0
 536             self.close()        # we read everything
 537             return s
 538
 539         if self.length is not None:
 540             if amt > self.length:
 541                 # clip the read to the "end of response"
 542                 amt = self.length
 543
 544         # we do not use _safe_read() here because this may be a .will_close
 545         # connection, and the user is reading more bytes than will be provided
 546         # (for example, reading in 1k chunks)
 547         s = self.fp.read(amt)
 548         if self.length is not None:
 549             self.length -= len(s)
 550             if not self.length:
 551                 self.close()
 552         return s
 553
 554     def _read_chunked(self, amt):
 555         assert self.chunked != _UNKNOWN
 556         chunk_left = self.chunk_left
 557         value = ''
 558
 559         # XXX This accumulates chunks by repeated string concatenation,
 560         # which is not efficient as the number or size of chunks gets big.
 561         while True:
 562             if chunk_left is None:
 563                 line = self.fp.readline()
 564                 i = line.find(';')
 565                 if i >= 0:
 566                     line = line[:i] # strip chunk-extensions
 567                 try:
 568                     chunk_left = int(line, 16)
 569                 except ValueError:
 570                     # close the connection as protocol synchronisation is
 571                     # probably lost
 572                     self.close()
 573                     raise IncompleteRead(value)
 574                 if chunk_left == 0:
 575                     break
 576             if amt is None:
 577                 value += self._safe_read(chunk_left)
 578             elif amt < chunk_left:
 579                 value += self._safe_read(amt)
 580                 self.chunk_left = chunk_left - amt
 581                 return value
 582             elif amt == chunk_left:
 583                 value += self._safe_read(amt)
 584                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 585                 self.chunk_left = None
 586                 return value
 587             else:
 588                 value += self._safe_read(chunk_left)
 589                 amt -= chunk_left
 590
 591             # we read the whole chunk, get another
 592             self._safe_read(2)      # toss the CRLF at the end of the chunk
 593             chunk_left = None
 594
 595         # read and discard trailer up to the CRLF terminator
 596         ### note: we shouldn't have any trailers!
 597         while True:
 598             line = self.fp.readline()
 599             if not line:
 600                 # a vanishingly small number of sites EOF without
 601                 # sending the trailer
 602                 break
 603             if line == '\r\n':
 604                 break
 605
 606         # we read everything; close the "file"
 607         self.close()
 608
 609         return value
 610
 611     def _safe_read(self, amt):
 612         """Read the number of bytes requested, compensating for partial reads.
 613
 614         Normally, we have a blocking socket, but a read() can be interrupted
 615         by a signal (resulting in a partial read).
 616
 617         Note that we cannot distinguish between EOF and an interrupt when zero
 618         bytes have been read. IncompleteRead() will be raised in this
 619         situation.
 620
 621         This function should be used when <amt> bytes "should" be present for
 622         reading. If the bytes are truly not available (due to EOF), then the
 623         IncompleteRead exception can be used to detect the problem.
 624         """
 625         s = []
 626         while amt > 0:
 627             chunk = self.fp.read(min(amt, MAXAMOUNT))
 628             if not chunk:
 629                 raise IncompleteRead(''.join(s), amt)
 630             s.append(chunk)
 631             amt -= len(chunk)
 632         return ''.join(s)
 633
 634     def getheader(self, name, default=None):
 635         if self.msg is None:
 636             raise ResponseNotReady()
 637         return self.msg.getheader(name, default)
 638
 639     def getheaders(self):
 640         """Return list of (header, value) tuples."""
 641         if self.msg is None:
 642             raise ResponseNotReady()
 643         return self.msg.items()
 644
 645
 646 class HTTPConnection:
 647
 648     _http_vsn = 11
 649     _http_vsn_str = 'HTTP/1.1'
 650
 651     response_class = HTTPResponse
 652     default_port = HTTP_PORT
 653     auto_open = 1
 654     debuglevel = 0
 655     strict = 0
 656
 657     def __init__(self, host, port=None, strict=None,
 658                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 659         self.timeout = timeout
 660         self.sock = None
 661         self._buffer = []
 662         self.__response = None
 663         self.__state = _CS_IDLE
 664         self._method = None
 665
 666         self._set_hostport(host, port)
 667         if strict is not None:
 668             self.strict = strict
 669
 670     def _set_hostport(self, host, port):
 671         if port is None:
 672             i = host.rfind(':')
 673             j = host.rfind(']')         # ipv6 addresses have [...]
 674             if i > j:
 675                 try:
 676                     port = int(host[i+1:])
 677                 except ValueError:
 678                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 679                 host = host[:i]
 680             else:
 681                 port = self.default_port
 682             if host and host[0] == '[' and host[-1] == ']':
 683                 host = host[1:-1]
 684         self.host = host
 685         self.port = port
 686
 687     def set_debuglevel(self, level):
 688         self.debuglevel = level
 689
 690     def connect(self):
 691         """Connect to the host and port specified in __init__."""
 692         self.sock = socket.create_connection((self.host,self.port),
 693                                              self.timeout)
 694
 695     def close(self):
 696         """Close the connection to the HTTP server."""
 697         if self.sock:
 698             self.sock.close()   # close it manually... there may be other refs
 699             self.sock = None
 700         if self.__response:
 701             self.__response.close()
 702             self.__response = None
 703         self.__state = _CS_IDLE
 704
 705     def send(self, str):
 706         """Send `str' to the server."""
 707         if self.sock is None:
 708             if self.auto_open:
 709                 self.connect()
 710             else:
 711                 raise NotConnected()
 712
 713         # send the data to the server. if we get a broken pipe, then close
 714         # the socket. we want to reconnect when somebody tries to send again.
 715         #
 716         # NOTE: we DO propagate the error, though, because we cannot simply
 717         #       ignore the error... the caller will know if they can retry.
 718         if self.debuglevel > 0:
 719             print "send:", repr(str)
 720         try:
 721             blocksize=8192
 722             if hasattr(str,'read') :
 723                 if self.debuglevel > 0: print "sendIng a read()able"
 724                 data=str.read(blocksize)
 725                 while data:
 726                     self.sock.sendall(data)
 727                     data=str.read(blocksize)
 728             else:
 729                 self.sock.sendall(str)
 730         except socket.error, v:
 731             if v[0] == 32:      # Broken pipe
 732                 self.close()
 733             raise
 734
 735     def _output(self, s):
 736         """Add a line of output to the current request buffer.
 737
 738         Assumes that the line does *not* end with \\r\\n.
 739         """
 740         self._buffer.append(s)
 741
 742     def _send_output(self, message_body=None):
 743         """Send the currently buffered request and clear the buffer.
 744
 745         Appends an extra \\r\\n to the buffer.
 746         A message_body may be specified, to be appended to the request.
 747         """
 748         self._buffer.extend(("", ""))
 749         msg = "\r\n".join(self._buffer)
 750         del self._buffer[:]
 751         # If msg and message_body are sent in a single send() call,
 752         # it will avoid performance problems caused by the interaction
 753         # between delayed ack and the Nagle algorithim.
 754         if isinstance(message_body, str):
 755             msg += message_body
 756             message_body = None
 757         self.send(msg)
 758         if message_body is not None:
 759             #message_body was not a string (i.e. it is a file) and
 760             #we must run the risk of Nagle
 761             self.send(message_body)
 762
 763     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 764         """Send a request to the server.
 765
 766         `method' specifies an HTTP request method, e.g. 'GET'.
 767         `url' specifies the object being requested, e.g. '/index.html'.
 768         `skip_host' if True does not add automatically a 'Host:' header
 769         `skip_accept_encoding' if True does not add automatically an
 770            'Accept-Encoding:' header
 771         """
 772
 773         # if a prior response has been completed, then forget about it.
 774         if self.__response and self.__response.isclosed():
 775             self.__response = None
 776
 777
 778         # in certain cases, we cannot issue another request on this connection.
 779         # this occurs when:
 780         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 781         #   2) a response to a previous request has signalled that it is going
 782         #      to close the connection upon completion.
 783         #   3) the headers for the previous response have not been read, thus
 784         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 785         #
 786         # if there is no prior response, then we can request at will.
 787         #
 788         # if point (2) is true, then we will have passed the socket to the
 789         # response (effectively meaning, "there is no prior response"), and
 790         # will open a new one when a new request is made.
 791         #
 792         # Note: if a prior response exists, then we *can* start a new request.
 793         #       We are not allowed to begin fetching the response to this new
 794         #       request, however, until that prior response is complete.
 795         #
 796         if self.__state == _CS_IDLE:
 797             self.__state = _CS_REQ_STARTED
 798         else:
 799             raise CannotSendRequest()
 800
 801         # Save the method we use, we need it later in the response phase
 802         self._method = method
 803         if not url:
 804             url = '/'
 805         str = '%s %s %s' % (method, url, self._http_vsn_str)
 806
 807         self._output(str)
 808
 809         if self._http_vsn == 11:
 810             # Issue some standard headers for better HTTP/1.1 compliance
 811
 812             if not skip_host:
 813                 # this header is issued *only* for HTTP/1.1
 814                 # connections. more specifically, this means it is
 815                 # only issued when the client uses the new
 816                 # HTTPConnection() class. backwards-compat clients
 817                 # will be using HTTP/1.0 and those clients may be
 818                 # issuing this header themselves. we should NOT issue
 819                 # it twice; some web servers (such as Apache) barf
 820                 # when they see two Host: headers
 821
 822                 # If we need a non-standard port,include it in the
 823                 # header.  If the request is going through a proxy,
 824                 # but the host of the actual URL, not the host of the
 825                 # proxy.
 826
 827                 netloc = ''
 828                 if url.startswith('http'):
 829                     nil, netloc, nil, nil, nil = urlsplit(url)
 830
 831                 if netloc:
 832                     try:
 833                         netloc_enc = netloc.encode("ascii")
 834                     except UnicodeEncodeError:
 835                         netloc_enc = netloc.encode("idna")
 836                     self.putheader('Host', netloc_enc)
 837                 else:
 838                     try:
 839                         host_enc = self.host.encode("ascii")
 840                     except UnicodeEncodeError:
 841                         host_enc = self.host.encode("idna")
 842                     if self.port == self.default_port:
 843                         self.putheader('Host', host_enc)
 844                     else:
 845                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 846
 847             # note: we are assuming that clients will not attempt to set these
 848             #       headers since *this* library must deal with the
 849             #       consequences. this also means that when the supporting
 850             #       libraries are updated to recognize other forms, then this
 851             #       code should be changed (removed or updated).
 852
 853             # we only want a Content-Encoding of "identity" since we don't
 854             # support encodings such as x-gzip or x-deflate.
 855             if not skip_accept_encoding:
 856                 self.putheader('Accept-Encoding', 'identity')
 857
 858             # we can accept "chunked" Transfer-Encodings, but no others
 859             # NOTE: no TE header implies *only* "chunked"
 860             #self.putheader('TE', 'chunked')
 861
 862             # if TE is supplied in the header, then it must appear in a
 863             # Connection header.
 864             #self.putheader('Connection', 'TE')
 865
 866         else:
 867             # For HTTP/1.0, the server will assume "not chunked"
 868             pass
 869
 870     def putheader(self, header, *values):
 871         """Send a request header line to the server.
 872
 873         For example: h.putheader('Accept', 'text/html')
 874         """
 875         if self.__state != _CS_REQ_STARTED:
 876             raise CannotSendHeader()
 877
 878         str = '%s: %s' % (header, '\r\n\t'.join(values))
 879         self._output(str)
 880
 881     def endheaders(self, message_body=None):
 882         """Indicate that the last header line has been sent to the server.
 883
 884         This method sends the request to the server.  The optional
 885         message_body argument can be used to pass message body
 886         associated with the request.  The message body will be sent in
 887         the same packet as the message headers if possible.  The
 888         message_body should be a string.
 889         """
 890         if self.__state == _CS_REQ_STARTED:
 891             self.__state = _CS_REQ_SENT
 892         else:
 893             raise CannotSendHeader()
 894         self._send_output(message_body)
 895
 896     def request(self, method, url, body=None, headers={}):
 897         """Send a complete request to the server."""
 898
 899         try:
 900             self._send_request(method, url, body, headers)
 901         except socket.error, v:
 902             # trap 'Broken pipe' if we're allowed to automatically reconnect
 903             if v[0] != 32 or not self.auto_open:
 904                 raise
 905             # try one more time
 906             self._send_request(method, url, body, headers)
 907
 908     def _set_content_length(self, body):
 909         # Set the content-length based on the body.
 910         thelen = None
 911         try:
 912             thelen = str(len(body))
 913         except TypeError, te:
 914             # If this is a file-like object, try to
 915             # fstat its file descriptor
 916             import os
 917             try:
 918                 thelen = str(os.fstat(body.fileno()).st_size)
 919             except (AttributeError, OSError):
 920                 # Don't send a length if this failed
 921                 if self.debuglevel > 0: print "Cannot stat!!"
 922
 923         if thelen is not None:
 924             self.putheader('Content-Length', thelen)
 925
 926     def _send_request(self, method, url, body, headers):
 927         # honour explicitly requested Host: and Accept-Encoding headers
 928         header_names = dict.fromkeys([k.lower() for k in headers])
 929         skips = {}
 930         if 'host' in header_names:
 931             skips['skip_host'] = 1
 932         if 'accept-encoding' in header_names:
 933             skips['skip_accept_encoding'] = 1
 934
 935         self.putrequest(method, url, **skips)
 936
 937         if body and ('content-length' not in header_names):
 938             self._set_content_length(body)
 939         for hdr, value in headers.iteritems():
 940             self.putheader(hdr, value)
 941         self.endheaders(body)
 942
 943     def getresponse(self, buffering=False):
 944         "Get the response from the server."
 945
 946         # if a prior response has been completed, then forget about it.
 947         if self.__response and self.__response.isclosed():
 948             self.__response = None
 949
 950         #
 951         # if a prior response exists, then it must be completed (otherwise, we
 952         # cannot read this response's header to determine the connection-close
 953         # behavior)
 954         #
 955         # note: if a prior response existed, but was connection-close, then the
 956         # socket and response were made independent of this HTTPConnection
 957         # object since a new request requires that we open a whole new
 958         # connection
 959         #
 960         # this means the prior response had one of two states:
 961         #   1) will_close: this connection was reset and the prior socket and
 962         #                  response operate independently
 963         #   2) persistent: the response was retained and we await its
 964         #                  isclosed() status to become true.
 965         #
 966         if self.__state != _CS_REQ_SENT or self.__response:
 967             raise ResponseNotReady()
 968
 969         args = (self.sock,)
 970         kwds = {"strict":self.strict, "method":self._method}
 971         if self.debuglevel > 0:
 972             args += (self.debuglevel,)
 973         if buffering:
 974             #only add this keyword if non-default, for compatibility with
 975             #other response_classes.
 976             kwds["buffering"] = True;
 977         response = self.response_class(*args, **kwds)
 978
 979         response.begin()
 980         assert response.will_close != _UNKNOWN
 981         self.__state = _CS_IDLE
 982
 983         if response.will_close:
 984             # this effectively passes the connection to the response
 985             self.close()
 986         else:
 987             # remember this, so we can tell when it is complete
 988             self.__response = response
 989
 990         return response
 991
 992
 993 class HTTP:
 994     "Compatibility class with httplib.py from 1.5."
 995
 996     _http_vsn = 10
 997     _http_vsn_str = 'HTTP/1.0'
 998
 999     debuglevel = 0
1000
1001     _connection_class = HTTPConnection
1002
1003     def __init__(self, host='', port=None, strict=None):
1004         "Provide a default host, since the superclass requires one."
1005
1006         # some joker passed 0 explicitly, meaning default port
1007         if port == 0:
1008             port = None
1009
1010         # Note that we may pass an empty string as the host; this will throw
1011         # an error when we attempt to connect. Presumably, the client code
1012         # will call connect before then, with a proper host.
1013         self._setup(self._connection_class(host, port, strict))
1014
1015     def _setup(self, conn):
1016         self._conn = conn
1017
1018         # set up delegation to flesh out interface
1019         self.send = conn.send
1020         self.putrequest = conn.putrequest
1021         self.putheader = conn.putheader
1022         self.endheaders = conn.endheaders
1023         self.set_debuglevel = conn.set_debuglevel
1024
1025         conn._http_vsn = self._http_vsn
1026         conn._http_vsn_str = self._http_vsn_str
1027
1028         self.file = None
1029
1030     def connect(self, host=None, port=None):
1031         "Accept arguments to set the host/port, since the superclass doesn't."
1032
1033         if host is not None:
1034             self._conn._set_hostport(host, port)
1035         self._conn.connect()
1036
1037     def getfile(self):
1038         "Provide a getfile, since the superclass' does not use this concept."
1039         return self.file
1040
1041     def getreply(self, buffering=False):
1042         """Compat definition since superclass does not define it.
1043
1044         Returns a tuple consisting of:
1045         - server status code (e.g. '200' if all goes well)
1046         - server "reason" corresponding to status code
1047         - any RFC822 headers in the response from the server
1048         """
1049         try:
1050             if not buffering:
1051                 response = self._conn.getresponse()
1052             else:
1053                 #only add this keyword if non-default for compatibility
1054                 #with other connection classes
1055                 response = self._conn.getresponse(buffering)
1056         except BadStatusLine, e:
1057             ### hmm. if getresponse() ever closes the socket on a bad request,
1058             ### then we are going to have problems with self.sock
1059
1060             ### should we keep this behavior? do people use it?
1061             # keep the socket open (as a file), and return it
1062             self.file = self._conn.sock.makefile('rb', 0)
1063
1064             # close our socket -- we want to restart after any protocol error
1065             self.close()
1066
1067             self.headers = None
1068             return -1, e.line, None
1069
1070         self.headers = response.msg
1071         self.file = response.fp
1072         return response.status, response.reason, response.msg
1073
1074     def close(self):
1075         self._conn.close()
1076
1077         # note that self.file == response.fp, which gets closed by the
1078         # superclass. just clear the object ref here.
1079         ### hmm. messy. if status==-1, then self.file is owned by us.
1080         ### well... we aren't explicitly closing, but losing this ref will
1081         ### do it
1082         self.file = None
1083
1084 try:
1085     import ssl
1086 except ImportError:
1087     pass
1088 else:
1089     class HTTPSConnection(HTTPConnection):
1090         "This class allows communication via SSL."
1091
1092         default_port = HTTPS_PORT
1093
1094         def __init__(self, host, port=None, key_file=None, cert_file=None,
1095                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1096             HTTPConnection.__init__(self, host, port, strict, timeout)
1097             self.key_file = key_file
1098             self.cert_file = cert_file
1099
1100         def connect(self):
1101             "Connect to a host on a given (SSL) port."
1102
1103             sock = socket.create_connection((self.host, self.port), self.timeout)
1104             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1105
1106     __all__.append("HTTPSConnection")
1107
1108     class HTTPS(HTTP):
1109         """Compatibility with 1.5 httplib interface
1110
1111         Python 1.5.2 did not have an HTTPS class, but it defined an
1112         interface for sending http requests that is also useful for
1113         https.
1114         """
1115
1116         _connection_class = HTTPSConnection
1117
1118         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1119                      strict=None):
1120             # provide a default host, pass the X509 cert info
1121
1122             # urf. compensate for bad input.
1123             if port == 0:
1124                 port = None
1125             self._setup(self._connection_class(host, port, key_file,
1126                                                cert_file, strict))
1127
1128             # we never actually use these for anything, but we keep them
1129             # here for compatibility with post-1.5.2 CVS.
1130             self.key_file = key_file
1131             self.cert_file = cert_file
1132
1133
1134     def FakeSocket (sock, sslobj):
1135         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1136                       "Use the result of ssl.wrap_socket() directly instead.",
1137                       DeprecationWarning, stacklevel=2)
1138         return sslobj
1139
1140
1141 class HTTPException(Exception):
1142     # Subclasses that define an __init__ must call Exception.__init__
1143     # or define self.args.  Otherwise, str() will fail.
1144     pass
1145
1146 class NotConnected(HTTPException):
1147     pass
1148
1149 class InvalidURL(HTTPException):
1150     pass
1151
1152 class UnknownProtocol(HTTPException):
1153     def __init__(self, version):
1154         self.args = version,
1155         self.version = version
1156
1157 class UnknownTransferEncoding(HTTPException):
1158     pass
1159
1160 class UnimplementedFileMode(HTTPException):
1161     pass
1162
1163 class IncompleteRead(HTTPException):
1164     def __init__(self, partial, expected=None):
1165         self.args = partial,
1166         self.partial = partial
1167         self.expected = expected
1168     def __repr__(self):
1169         if self.expected is not None:
1170             e = ', %i more expected' % self.expected
1171         else:
1172             e = ''
1173         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1174     def __str__(self):
1175         return repr(self)
1176
1177 class ImproperConnectionState(HTTPException):
1178     pass
1179
1180 class CannotSendRequest(ImproperConnectionState):
1181     pass
1182
1183 class CannotSendHeader(ImproperConnectionState):
1184     pass
1185
1186 class ResponseNotReady(ImproperConnectionState):
1187     pass
1188
1189 class BadStatusLine(HTTPException):
1190     def __init__(self, line):
1191         self.args = line,
1192         self.line = line
1193
1194 # for backwards compatibility
1195 error = HTTPException
1196
1197 class LineAndFileWrapper:
1198     """A limited file-like object for HTTP/0.9 responses."""
1199
1200     # The status-line parsing code calls readline(), which normally
1201     # get the HTTP status line.  For a 0.9 response, however, this is
1202     # actually the first line of the body!  Clients need to get a
1203     # readable file object that contains that line.
1204
1205     def __init__(self, line, file):
1206         self._line = line
1207         self._file = file
1208         self._line_consumed = 0
1209         self._line_offset = 0
1210         self._line_left = len(line)
1211
1212     def __getattr__(self, attr):
1213         return getattr(self._file, attr)
1214
1215     def _done(self):
1216         # called when the last byte is read from the line.  After the
1217         # call, all read methods are delegated to the underlying file
1218         # object.
1219         self._line_consumed = 1
1220         self.read = self._file.read
1221         self.readline = self._file.readline
1222         self.readlines = self._file.readlines
1223
1224     def read(self, amt=None):
1225         if self._line_consumed:
1226             return self._file.read(amt)
1227         assert self._line_left
1228         if amt is None or amt > self._line_left:
1229             s = self._line[self._line_offset:]
1230             self._done()
1231             if amt is None:
1232                 return s + self._file.read()
1233             else:
1234                 return s + self._file.read(amt - len(s))
1235         else:
1236             assert amt <= self._line_left
1237             i = self._line_offset
1238             j = i + amt
1239             s = self._line[i:j]
1240             self._line_offset = j
1241             self._line_left -= amt
1242             if self._line_left == 0:
1243                 self._done()
1244             return s
1245
1246     def readline(self):
1247         if self._line_consumed:
1248             return self._file.readline()
1249         assert self._line_left
1250         s = self._line[self._line_offset:]
1251         self._done()
1252         return s
1253
1254     def readlines(self, size=None):
1255         if self._line_consumed:
1256             return self._file.readlines(size)
1257         assert self._line_left
1258         L = [self._line[self._line_offset:]]
1259         self._done()
1260         if size is None:
1261             return L + self._file.readlines()
1262         else:
1263             return L + self._file.readlines(size)
1264
1265 def test():
1266     """Test this module.
1267
1268     A hodge podge of tests collected here, because they have too many
1269     external dependencies for the regular test suite.
1270     """
1271
1272     import sys
1273     import getopt
1274     opts, args = getopt.getopt(sys.argv[1:], 'd')
1275     dl = 0
1276     for o, a in opts:
1277         if o == '-d': dl = dl + 1
1278     host = 'www.python.org'
1279     selector = '/'
1280     if args[0:]: host = args[0]
1281     if args[1:]: selector = args[1]
1282     h = HTTP()
1283     h.set_debuglevel(dl)
1284     h.connect(host)
1285     h.putrequest('GET', selector)
1286     h.endheaders()
1287     status, reason, headers = h.getreply()
1288     print 'status =', status
1289     print 'reason =', reason
1290     print "read", len(h.getfile().read())
1291     print
1292     if headers:
1293         for header in headers.headers: print header.strip()
1294     print
1295
1296     # minimal test that code to extract host from url works
1297     class HTTP11(HTTP):
1298         _http_vsn = 11
1299         _http_vsn_str = 'HTTP/1.1'
1300
1301     h = HTTP11('www.python.org')
1302     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1303     h.endheaders()
1304     h.getreply()
1305     h.close()
1306
1307     try:
1308         import ssl
1309     except ImportError:
1310         pass
1311     else:
1312
1313         for host, selector in (('sourceforge.net', '/projects/python'),
1314                                ):
1315             print "https://%s%s" % (host, selector)
1316             hs = HTTPS()
1317             hs.set_debuglevel(dl)
1318             hs.connect(host)
1319             hs.putrequest('GET', selector)
1320             hs.endheaders()
1321             status, reason, headers = hs.getreply()
1322             print 'status =', status
1323             print 'reason =', reason
1324             print "read", len(hs.getfile().read())
1325             print
1326             if headers:
1327                 for header in headers.headers: print header.strip()
1328             print
1329
1330 if __name__ == '__main__':
1331     test()