Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 import socket
  70 from sys import py3kwarning
  71 from urlparse import urlsplit
  72 import warnings
  73 with warnings.catch_warnings():
  74     if py3kwarning:
  75         warnings.filterwarnings("ignore", ".*mimetools has been removed",
  76                                 DeprecationWarning)
  77     import mimetools
  78
  79 try:
  80     from cStringIO import StringIO
  81 except ImportError:
  82     from StringIO import StringIO
  83
  84 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  85            "HTTPException", "NotConnected", "UnknownProtocol",
  86            "UnknownTransferEncoding", "UnimplementedFileMode",
  87            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  88            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  89            "BadStatusLine", "error", "responses"]
  90
  91 HTTP_PORT = 80
  92 HTTPS_PORT = 443
  93
  94 _UNKNOWN = 'UNKNOWN'
  95
  96 # connection states
  97 _CS_IDLE = 'Idle'
  98 _CS_REQ_STARTED = 'Request-started'
  99 _CS_REQ_SENT = 'Request-sent'
 100
 101 # status codes
 102 # informational
 103 CONTINUE = 100
 104 SWITCHING_PROTOCOLS = 101
 105 PROCESSING = 102
 106
 107 # successful
 108 OK = 200
 109 CREATED = 201
 110 ACCEPTED = 202
 111 NON_AUTHORITATIVE_INFORMATION = 203
 112 NO_CONTENT = 204
 113 RESET_CONTENT = 205
 114 PARTIAL_CONTENT = 206
 115 MULTI_STATUS = 207
 116 IM_USED = 226
 117
 118 # redirection
 119 MULTIPLE_CHOICES = 300
 120 MOVED_PERMANENTLY = 301
 121 FOUND = 302
 122 SEE_OTHER = 303
 123 NOT_MODIFIED = 304
 124 USE_PROXY = 305
 125 TEMPORARY_REDIRECT = 307
 126
 127 # client error
 128 BAD_REQUEST = 400
 129 UNAUTHORIZED = 401
 130 PAYMENT_REQUIRED = 402
 131 FORBIDDEN = 403
 132 NOT_FOUND = 404
 133 METHOD_NOT_ALLOWED = 405
 134 NOT_ACCEPTABLE = 406
 135 PROXY_AUTHENTICATION_REQUIRED = 407
 136 REQUEST_TIMEOUT = 408
 137 CONFLICT = 409
 138 GONE = 410
 139 LENGTH_REQUIRED = 411
 140 PRECONDITION_FAILED = 412
 141 REQUEST_ENTITY_TOO_LARGE = 413
 142 REQUEST_URI_TOO_LONG = 414
 143 UNSUPPORTED_MEDIA_TYPE = 415
 144 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 145 EXPECTATION_FAILED = 417
 146 UNPROCESSABLE_ENTITY = 422
 147 LOCKED = 423
 148 FAILED_DEPENDENCY = 424
 149 UPGRADE_REQUIRED = 426
 150
 151 # server error
 152 INTERNAL_SERVER_ERROR = 500
 153 NOT_IMPLEMENTED = 501
 154 BAD_GATEWAY = 502
 155 SERVICE_UNAVAILABLE = 503
 156 GATEWAY_TIMEOUT = 504
 157 HTTP_VERSION_NOT_SUPPORTED = 505
 158 INSUFFICIENT_STORAGE = 507
 159 NOT_EXTENDED = 510
 160
 161 # Mapping status codes to official W3C names
 162 responses = {
 163     100: 'Continue',
 164     101: 'Switching Protocols',
 165
 166     200: 'OK',
 167     201: 'Created',
 168     202: 'Accepted',
 169     203: 'Non-Authoritative Information',
 170     204: 'No Content',
 171     205: 'Reset Content',
 172     206: 'Partial Content',
 173
 174     300: 'Multiple Choices',
 175     301: 'Moved Permanently',
 176     302: 'Found',
 177     303: 'See Other',
 178     304: 'Not Modified',
 179     305: 'Use Proxy',
 180     306: '(Unused)',
 181     307: 'Temporary Redirect',
 182
 183     400: 'Bad Request',
 184     401: 'Unauthorized',
 185     402: 'Payment Required',
 186     403: 'Forbidden',
 187     404: 'Not Found',
 188     405: 'Method Not Allowed',
 189     406: 'Not Acceptable',
 190     407: 'Proxy Authentication Required',
 191     408: 'Request Timeout',
 192     409: 'Conflict',
 193     410: 'Gone',
 194     411: 'Length Required',
 195     412: 'Precondition Failed',
 196     413: 'Request Entity Too Large',
 197     414: 'Request-URI Too Long',
 198     415: 'Unsupported Media Type',
 199     416: 'Requested Range Not Satisfiable',
 200     417: 'Expectation Failed',
 201
 202     500: 'Internal Server Error',
 203     501: 'Not Implemented',
 204     502: 'Bad Gateway',
 205     503: 'Service Unavailable',
 206     504: 'Gateway Timeout',
 207     505: 'HTTP Version Not Supported',
 208 }
 209
 210 # maximal amount of data to read at one time in _safe_read
 211 MAXAMOUNT = 1048576
 212
 213 class HTTPMessage(mimetools.Message):
 214
 215     def addheader(self, key, value):
 216         """Add header for field key handling repeats."""
 217         prev = self.dict.get(key)
 218         if prev is None:
 219             self.dict[key] = value
 220         else:
 221             combined = ", ".join((prev, value))
 222             self.dict[key] = combined
 223
 224     def addcontinue(self, key, more):
 225         """Add more field data from a continuation line."""
 226         prev = self.dict[key]
 227         self.dict[key] = prev + "\n " + more
 228
 229     def readheaders(self):
 230         """Read header lines.
 231
 232         Read header lines up to the entirely blank line that terminates them.
 233         The (normally blank) line that ends the headers is skipped, but not
 234         included in the returned list.  If a non-header line ends the headers,
 235         (which is an error), an attempt is made to backspace over it; it is
 236         never included in the returned list.
 237
 238         The variable self.status is set to the empty string if all went well,
 239         otherwise it is an error message.  The variable self.headers is a
 240         completely uninterpreted list of lines contained in the header (so
 241         printing them will reproduce the header exactly as it appears in the
 242         file).
 243
 244         If multiple header fields with the same name occur, they are combined
 245         according to the rules in RFC 2616 sec 4.2:
 246
 247         Appending each subsequent field-value to the first, each separated
 248         by a comma. The order in which header fields with the same field-name
 249         are received is significant to the interpretation of the combined
 250         field value.
 251         """
 252         # XXX The implementation overrides the readheaders() method of
 253         # rfc822.Message.  The base class design isn't amenable to
 254         # customized behavior here so the method here is a copy of the
 255         # base class code with a few small changes.
 256
 257         self.dict = {}
 258         self.unixfrom = ''
 259         self.headers = hlist = []
 260         self.status = ''
 261         headerseen = ""
 262         firstline = 1
 263         startofline = unread = tell = None
 264         if hasattr(self.fp, 'unread'):
 265             unread = self.fp.unread
 266         elif self.seekable:
 267             tell = self.fp.tell
 268         while True:
 269             if tell:
 270                 try:
 271                     startofline = tell()
 272                 except IOError:
 273                     startofline = tell = None
 274                     self.seekable = 0
 275             line = self.fp.readline()
 276             if not line:
 277                 self.status = 'EOF in headers'
 278                 break
 279             # Skip unix From name time lines
 280             if firstline and line.startswith('From '):
 281                 self.unixfrom = self.unixfrom + line
 282                 continue
 283             firstline = 0
 284             if headerseen and line[0] in ' \t':
 285                 # XXX Not sure if continuation lines are handled properly
 286                 # for http and/or for repeating headers
 287                 # It's a continuation line.
 288                 hlist.append(line)
 289                 self.addcontinue(headerseen, line.strip())
 290                 continue
 291             elif self.iscomment(line):
 292                 # It's a comment.  Ignore it.
 293                 continue
 294             elif self.islast(line):
 295                 # Note! No pushback here!  The delimiter line gets eaten.
 296                 break
 297             headerseen = self.isheader(line)
 298             if headerseen:
 299                 # It's a legal header line, save it.
 300                 hlist.append(line)
 301                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 302                 continue
 303             else:
 304                 # It's not a header line; throw it back and stop here.
 305                 if not self.dict:
 306                     self.status = 'No headers'
 307                 else:
 308                     self.status = 'Non-header line where header expected'
 309                 # Try to undo the read.
 310                 if unread:
 311                     unread(line)
 312                 elif tell:
 313                     self.fp.seek(startofline)
 314                 else:
 315                     self.status = self.status + '; bad seek'
 316                 break
 317
 318 class HTTPResponse:
 319
 320     # strict: If true, raise BadStatusLine if the status line can't be
 321     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 322     # false because it prevents clients from talking to HTTP/0.9
 323     # servers.  Note that a response with a sufficiently corrupted
 324     # status line will look like an HTTP/0.9 response.
 325
 326     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 327
 328     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
 329         if buffering:
 330             # The caller won't be using any sock.recv() calls, so buffering
 331             # is fine and recommended for performance.
 332             self.fp = sock.makefile('rb')
 333         else:
 334             # The buffer size is specified as zero, because the headers of
 335             # the response are read with readline().  If the reads were
 336             # buffered the readline() calls could consume some of the
 337             # response, which make be read via a recv() on the underlying
 338             # socket.
 339             self.fp = sock.makefile('rb', 0)
 340         self.debuglevel = debuglevel
 341         self.strict = strict
 342         self._method = method
 343
 344         self.msg = None
 345
 346         # from the Status-Line of the response
 347         self.version = _UNKNOWN # HTTP-Version
 348         self.status = _UNKNOWN  # Status-Code
 349         self.reason = _UNKNOWN  # Reason-Phrase
 350
 351         self.chunked = _UNKNOWN         # is "chunked" being used?
 352         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 353         self.length = _UNKNOWN          # number of bytes left in response
 354         self.will_close = _UNKNOWN      # conn will close at end of response
 355
 356     def _read_status(self):
 357         # Initialize with Simple-Response defaults
 358         line = self.fp.readline()
 359         if self.debuglevel > 0:
 360             print "reply:", repr(line)
 361         if not line:
 362             # Presumably, the server closed the connection before
 363             # sending a valid response.
 364             raise BadStatusLine(line)
 365         try:
 366             [version, status, reason] = line.split(None, 2)
 367         except ValueError:
 368             try:
 369                 [version, status] = line.split(None, 1)
 370                 reason = ""
 371             except ValueError:
 372                 # empty version will cause next test to fail and status
 373                 # will be treated as 0.9 response.
 374                 version = ""
 375         if not version.startswith('HTTP/'):
 376             if self.strict:
 377                 self.close()
 378                 raise BadStatusLine(line)
 379             else:
 380                 # assume it's a Simple-Response from an 0.9 server
 381                 self.fp = LineAndFileWrapper(line, self.fp)
 382                 return "HTTP/0.9", 200, ""
 383
 384         # The status code is a three-digit number
 385         try:
 386             status = int(status)
 387             if status < 100 or status > 999:
 388                 raise BadStatusLine(line)
 389         except ValueError:
 390             raise BadStatusLine(line)
 391         return version, status, reason
 392
 393     def begin(self):
 394         if self.msg is not None:
 395             # we've already started reading the response
 396             return
 397
 398         # read until we get a non-100 response
 399         while True:
 400             version, status, reason = self._read_status()
 401             if status != CONTINUE:
 402                 break
 403             # skip the header from the 100 response
 404             while True:
 405                 skip = self.fp.readline().strip()
 406                 if not skip:
 407                     break
 408                 if self.debuglevel > 0:
 409                     print "header:", skip
 410
 411         self.status = status
 412         self.reason = reason.strip()
 413         if version == 'HTTP/1.0':
 414             self.version = 10
 415         elif version.startswith('HTTP/1.'):
 416             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 417         elif version == 'HTTP/0.9':
 418             self.version = 9
 419         else:
 420             raise UnknownProtocol(version)
 421
 422         if self.version == 9:
 423             self.length = None
 424             self.chunked = 0
 425             self.will_close = 1
 426             self.msg = HTTPMessage(StringIO())
 427             return
 428
 429         self.msg = HTTPMessage(self.fp, 0)
 430         if self.debuglevel > 0:
 431             for hdr in self.msg.headers:
 432                 print "header:", hdr,
 433
 434         # don't let the msg keep an fp
 435         self.msg.fp = None
 436
 437         # are we using the chunked-style of transfer encoding?
 438         tr_enc = self.msg.getheader('transfer-encoding')
 439         if tr_enc and tr_enc.lower() == "chunked":
 440             self.chunked = 1
 441             self.chunk_left = None
 442         else:
 443             self.chunked = 0
 444
 445         # will the connection close at the end of the response?
 446         self.will_close = self._check_close()
 447
 448         # do we have a Content-Length?
 449         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 450         length = self.msg.getheader('content-length')
 451         if length and not self.chunked:
 452             try:
 453                 self.length = int(length)
 454             except ValueError:
 455                 self.length = None
 456             else:
 457                 if self.length < 0:  # ignore nonsensical negative lengths
 458                     self.length = None
 459         else:
 460             self.length = None
 461
 462         # does the body have a fixed length? (of zero)
 463         if (status == NO_CONTENT or status == NOT_MODIFIED or
 464             100 <= status < 200 or      # 1xx codes
 465             self._method == 'HEAD'):
 466             self.length = 0
 467
 468         # if the connection remains open, and we aren't using chunked, and
 469         # a content-length was not provided, then assume that the connection
 470         # WILL close.
 471         if not self.will_close and \
 472            not self.chunked and \
 473            self.length is None:
 474             self.will_close = 1
 475
 476     def _check_close(self):
 477         conn = self.msg.getheader('connection')
 478         if self.version == 11:
 479             # An HTTP/1.1 proxy is assumed to stay open unless
 480             # explicitly closed.
 481             conn = self.msg.getheader('connection')
 482             if conn and "close" in conn.lower():
 483                 return True
 484             return False
 485
 486         # Some HTTP/1.0 implementations have support for persistent
 487         # connections, using rules different than HTTP/1.1.
 488
 489         # For older HTTP, Keep-Alive indicates persistent connection.
 490         if self.msg.getheader('keep-alive'):
 491             return False
 492
 493         # At least Akamai returns a "Connection: Keep-Alive" header,
 494         # which was supposed to be sent by the client.
 495         if conn and "keep-alive" in conn.lower():
 496             return False
 497
 498         # Proxy-Connection is a netscape hack.
 499         pconn = self.msg.getheader('proxy-connection')
 500         if pconn and "keep-alive" in pconn.lower():
 501             return False
 502
 503         # otherwise, assume it will close
 504         return True
 505
 506     def close(self):
 507         if self.fp:
 508             self.fp.close()
 509             self.fp = None
 510
 511     def isclosed(self):
 512         # NOTE: it is possible that we will not ever call self.close(). This
 513         #       case occurs when will_close is TRUE, length is None, and we
 514         #       read up to the last byte, but NOT past it.
 515         #
 516         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 517         #          called, meaning self.isclosed() is meaningful.
 518         return self.fp is None
 519
 520     # XXX It would be nice to have readline and __iter__ for this, too.
 521
 522     def read(self, amt=None):
 523         if self.fp is None:
 524             return ''
 525
 526         if self.chunked:
 527             return self._read_chunked(amt)
 528
 529         if amt is None:
 530             # unbounded read
 531             if self.length is None:
 532                 s = self.fp.read()
 533             else:
 534                 s = self._safe_read(self.length)
 535                 self.length = 0
 536             self.close()        # we read everything
 537             return s
 538
 539         if self.length is not None:
 540             if amt > self.length:
 541                 # clip the read to the "end of response"
 542                 amt = self.length
 543
 544         # we do not use _safe_read() here because this may be a .will_close
 545         # connection, and the user is reading more bytes than will be provided
 546         # (for example, reading in 1k chunks)
 547         s = self.fp.read(amt)
 548         if self.length is not None:
 549             self.length -= len(s)
 550             if not self.length:
 551                 self.close()
 552         return s
 553
 554     def _read_chunked(self, amt):
 555         assert self.chunked != _UNKNOWN
 556         chunk_left = self.chunk_left
 557         value = []
 558         while True:
 559             if chunk_left is None:
 560                 line = self.fp.readline()
 561                 i = line.find(';')
 562                 if i >= 0:
 563                     line = line[:i] # strip chunk-extensions
 564                 try:
 565                     chunk_left = int(line, 16)
 566                 except ValueError:
 567                     # close the connection as protocol synchronisation is
 568                     # probably lost
 569                     self.close()
 570                     raise IncompleteRead(''.join(value))
 571                 if chunk_left == 0:
 572                     break
 573             if amt is None:
 574                 value.append(self._safe_read(chunk_left))
 575             elif amt < chunk_left:
 576                 value.append(self._safe_read(amt))
 577                 self.chunk_left = chunk_left - amt
 578                 return ''.join(value)
 579             elif amt == chunk_left:
 580                 value.append(self._safe_read(amt))
 581                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 582                 self.chunk_left = None
 583                 return ''.join(value)
 584             else:
 585                 value.append(self._safe_read(chunk_left))
 586                 amt -= chunk_left
 587
 588             # we read the whole chunk, get another
 589             self._safe_read(2)      # toss the CRLF at the end of the chunk
 590             chunk_left = None
 591
 592         # read and discard trailer up to the CRLF terminator
 593         ### note: we shouldn't have any trailers!
 594         while True:
 595             line = self.fp.readline()
 596             if not line:
 597                 # a vanishingly small number of sites EOF without
 598                 # sending the trailer
 599                 break
 600             if line == '\r\n':
 601                 break
 602
 603         # we read everything; close the "file"
 604         self.close()
 605
 606         return ''.join(value)
 607
 608     def _safe_read(self, amt):
 609         """Read the number of bytes requested, compensating for partial reads.
 610
 611         Normally, we have a blocking socket, but a read() can be interrupted
 612         by a signal (resulting in a partial read).
 613
 614         Note that we cannot distinguish between EOF and an interrupt when zero
 615         bytes have been read. IncompleteRead() will be raised in this
 616         situation.
 617
 618         This function should be used when <amt> bytes "should" be present for
 619         reading. If the bytes are truly not available (due to EOF), then the
 620         IncompleteRead exception can be used to detect the problem.
 621         """
 622         # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
 623         # return less than x bytes unless EOF is encountered.  It now handles
 624         # signal interruptions (socket.error EINTR) internally.  This code
 625         # never caught that exception anyways.  It seems largely pointless.
 626         # self.fp.read(amt) will work fine.
 627         s = []
 628         while amt > 0:
 629             chunk = self.fp.read(min(amt, MAXAMOUNT))
 630             if not chunk:
 631                 raise IncompleteRead(''.join(s), amt)
 632             s.append(chunk)
 633             amt -= len(chunk)
 634         return ''.join(s)
 635
 636     def getheader(self, name, default=None):
 637         if self.msg is None:
 638             raise ResponseNotReady()
 639         return self.msg.getheader(name, default)
 640
 641     def getheaders(self):
 642         """Return list of (header, value) tuples."""
 643         if self.msg is None:
 644             raise ResponseNotReady()
 645         return self.msg.items()
 646
 647
 648 class HTTPConnection:
 649
 650     _http_vsn = 11
 651     _http_vsn_str = 'HTTP/1.1'
 652
 653     response_class = HTTPResponse
 654     default_port = HTTP_PORT
 655     auto_open = 1
 656     debuglevel = 0
 657     strict = 0
 658
 659     def __init__(self, host, port=None, strict=None,
 660                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 661         self.timeout = timeout
 662         self.sock = None
 663         self._buffer = []
 664         self.__response = None
 665         self.__state = _CS_IDLE
 666         self._method = None
 667         self._tunnel_host = None
 668         self._tunnel_port = None
 669
 670         self._set_hostport(host, port)
 671         if strict is not None:
 672             self.strict = strict
 673
 674     def set_tunnel(self, host, port=None):
 675         """ Sets up the host and the port for the HTTP CONNECT Tunnelling."""
 676         self._tunnel_host = host
 677         self._tunnel_port = port
 678
 679     def _set_hostport(self, host, port):
 680         if port is None:
 681             i = host.rfind(':')
 682             j = host.rfind(']')         # ipv6 addresses have [...]
 683             if i > j:
 684                 try:
 685                     port = int(host[i+1:])
 686                 except ValueError:
 687                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 688                 host = host[:i]
 689             else:
 690                 port = self.default_port
 691             if host and host[0] == '[' and host[-1] == ']':
 692                 host = host[1:-1]
 693         self.host = host
 694         self.port = port
 695
 696     def set_debuglevel(self, level):
 697         self.debuglevel = level
 698
 699     def _tunnel(self):
 700         self._set_hostport(self._tunnel_host, self._tunnel_port)
 701         self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
 702         response = self.response_class(self.sock, strict = self.strict,
 703                                        method = self._method)
 704         (version, code, message) = response._read_status()
 705
 706         if code != 200:
 707             self.close()
 708             raise socket.error, "Tunnel connection failed: %d %s" % (code,
 709                                                                      message.strip())
 710         while True:
 711             line = response.fp.readline()
 712             if line == '\r\n': break
 713
 714
 715     def connect(self):
 716         """Connect to the host and port specified in __init__."""
 717         self.sock = socket.create_connection((self.host,self.port),
 718                                              self.timeout)
 719
 720         if self._tunnel_host:
 721             self._tunnel()
 722
 723     def close(self):
 724         """Close the connection to the HTTP server."""
 725         if self.sock:
 726             self.sock.close()   # close it manually... there may be other refs
 727             self.sock = None
 728         if self.__response:
 729             self.__response.close()
 730             self.__response = None
 731         self.__state = _CS_IDLE
 732
 733     def send(self, str):
 734         """Send `str' to the server."""
 735         if self.sock is None:
 736             if self.auto_open:
 737                 self.connect()
 738             else:
 739                 raise NotConnected()
 740
 741         # send the data to the server. if we get a broken pipe, then close
 742         # the socket. we want to reconnect when somebody tries to send again.
 743         #
 744         # NOTE: we DO propagate the error, though, because we cannot simply
 745         #       ignore the error... the caller will know if they can retry.
 746         if self.debuglevel > 0:
 747             print "send:", repr(str)
 748         try:
 749             blocksize=8192
 750             if hasattr(str,'read') :
 751                 if self.debuglevel > 0: print "sendIng a read()able"
 752                 data=str.read(blocksize)
 753                 while data:
 754                     self.sock.sendall(data)
 755                     data=str.read(blocksize)
 756             else:
 757                 self.sock.sendall(str)
 758         except socket.error, v:
 759             if v[0] == 32:      # Broken pipe
 760                 self.close()
 761             raise
 762
 763     def _output(self, s):
 764         """Add a line of output to the current request buffer.
 765
 766         Assumes that the line does *not* end with \\r\\n.
 767         """
 768         self._buffer.append(s)
 769
 770     def _send_output(self, message_body=None):
 771         """Send the currently buffered request and clear the buffer.
 772
 773         Appends an extra \\r\\n to the buffer.
 774         A message_body may be specified, to be appended to the request.
 775         """
 776         self._buffer.extend(("", ""))
 777         msg = "\r\n".join(self._buffer)
 778         del self._buffer[:]
 779         # If msg and message_body are sent in a single send() call,
 780         # it will avoid performance problems caused by the interaction
 781         # between delayed ack and the Nagle algorithim.
 782         if isinstance(message_body, str):
 783             msg += message_body
 784             message_body = None
 785         self.send(msg)
 786         if message_body is not None:
 787             #message_body was not a string (i.e. it is a file) and
 788             #we must run the risk of Nagle
 789             self.send(message_body)
 790
 791     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 792         """Send a request to the server.
 793
 794         `method' specifies an HTTP request method, e.g. 'GET'.
 795         `url' specifies the object being requested, e.g. '/index.html'.
 796         `skip_host' if True does not add automatically a 'Host:' header
 797         `skip_accept_encoding' if True does not add automatically an
 798            'Accept-Encoding:' header
 799         """
 800
 801         # if a prior response has been completed, then forget about it.
 802         if self.__response and self.__response.isclosed():
 803             self.__response = None
 804
 805
 806         # in certain cases, we cannot issue another request on this connection.
 807         # this occurs when:
 808         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 809         #   2) a response to a previous request has signalled that it is going
 810         #      to close the connection upon completion.
 811         #   3) the headers for the previous response have not been read, thus
 812         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 813         #
 814         # if there is no prior response, then we can request at will.
 815         #
 816         # if point (2) is true, then we will have passed the socket to the
 817         # response (effectively meaning, "there is no prior response"), and
 818         # will open a new one when a new request is made.
 819         #
 820         # Note: if a prior response exists, then we *can* start a new request.
 821         #       We are not allowed to begin fetching the response to this new
 822         #       request, however, until that prior response is complete.
 823         #
 824         if self.__state == _CS_IDLE:
 825             self.__state = _CS_REQ_STARTED
 826         else:
 827             raise CannotSendRequest()
 828
 829         # Save the method we use, we need it later in the response phase
 830         self._method = method
 831         if not url:
 832             url = '/'
 833         str = '%s %s %s' % (method, url, self._http_vsn_str)
 834
 835         self._output(str)
 836
 837         if self._http_vsn == 11:
 838             # Issue some standard headers for better HTTP/1.1 compliance
 839
 840             if not skip_host:
 841                 # this header is issued *only* for HTTP/1.1
 842                 # connections. more specifically, this means it is
 843                 # only issued when the client uses the new
 844                 # HTTPConnection() class. backwards-compat clients
 845                 # will be using HTTP/1.0 and those clients may be
 846                 # issuing this header themselves. we should NOT issue
 847                 # it twice; some web servers (such as Apache) barf
 848                 # when they see two Host: headers
 849
 850                 # If we need a non-standard port,include it in the
 851                 # header.  If the request is going through a proxy,
 852                 # but the host of the actual URL, not the host of the
 853                 # proxy.
 854
 855                 netloc = ''
 856                 if url.startswith('http'):
 857                     nil, netloc, nil, nil, nil = urlsplit(url)
 858
 859                 if netloc:
 860                     try:
 861                         netloc_enc = netloc.encode("ascii")
 862                     except UnicodeEncodeError:
 863                         netloc_enc = netloc.encode("idna")
 864                     self.putheader('Host', netloc_enc)
 865                 else:
 866                     try:
 867                         host_enc = self.host.encode("ascii")
 868                     except UnicodeEncodeError:
 869                         host_enc = self.host.encode("idna")
 870                     if self.port == self.default_port:
 871                         self.putheader('Host', host_enc)
 872                     else:
 873                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 874
 875             # note: we are assuming that clients will not attempt to set these
 876             #       headers since *this* library must deal with the
 877             #       consequences. this also means that when the supporting
 878             #       libraries are updated to recognize other forms, then this
 879             #       code should be changed (removed or updated).
 880
 881             # we only want a Content-Encoding of "identity" since we don't
 882             # support encodings such as x-gzip or x-deflate.
 883             if not skip_accept_encoding:
 884                 self.putheader('Accept-Encoding', 'identity')
 885
 886             # we can accept "chunked" Transfer-Encodings, but no others
 887             # NOTE: no TE header implies *only* "chunked"
 888             #self.putheader('TE', 'chunked')
 889
 890             # if TE is supplied in the header, then it must appear in a
 891             # Connection header.
 892             #self.putheader('Connection', 'TE')
 893
 894         else:
 895             # For HTTP/1.0, the server will assume "not chunked"
 896             pass
 897
 898     def putheader(self, header, *values):
 899         """Send a request header line to the server.
 900
 901         For example: h.putheader('Accept', 'text/html')
 902         """
 903         if self.__state != _CS_REQ_STARTED:
 904             raise CannotSendHeader()
 905
 906         str = '%s: %s' % (header, '\r\n\t'.join(values))
 907         self._output(str)
 908
 909     def endheaders(self, message_body=None):
 910         """Indicate that the last header line has been sent to the server.
 911
 912         This method sends the request to the server.  The optional
 913         message_body argument can be used to pass message body
 914         associated with the request.  The message body will be sent in
 915         the same packet as the message headers if possible.  The
 916         message_body should be a string.
 917         """
 918         if self.__state == _CS_REQ_STARTED:
 919             self.__state = _CS_REQ_SENT
 920         else:
 921             raise CannotSendHeader()
 922         self._send_output(message_body)
 923
 924     def request(self, method, url, body=None, headers={}):
 925         """Send a complete request to the server."""
 926
 927         try:
 928             self._send_request(method, url, body, headers)
 929         except socket.error, v:
 930             # trap 'Broken pipe' if we're allowed to automatically reconnect
 931             if v[0] != 32 or not self.auto_open:
 932                 raise
 933             # try one more time
 934             self._send_request(method, url, body, headers)
 935
 936     def _set_content_length(self, body):
 937         # Set the content-length based on the body.
 938         thelen = None
 939         try:
 940             thelen = str(len(body))
 941         except TypeError, te:
 942             # If this is a file-like object, try to
 943             # fstat its file descriptor
 944             import os
 945             try:
 946                 thelen = str(os.fstat(body.fileno()).st_size)
 947             except (AttributeError, OSError):
 948                 # Don't send a length if this failed
 949                 if self.debuglevel > 0: print "Cannot stat!!"
 950
 951         if thelen is not None:
 952             self.putheader('Content-Length', thelen)
 953
 954     def _send_request(self, method, url, body, headers):
 955         # honour explicitly requested Host: and Accept-Encoding headers
 956         header_names = dict.fromkeys([k.lower() for k in headers])
 957         skips = {}
 958         if 'host' in header_names:
 959             skips['skip_host'] = 1
 960         if 'accept-encoding' in header_names:
 961             skips['skip_accept_encoding'] = 1
 962
 963         self.putrequest(method, url, **skips)
 964
 965         if body and ('content-length' not in header_names):
 966             self._set_content_length(body)
 967         for hdr, value in headers.iteritems():
 968             self.putheader(hdr, value)
 969         self.endheaders(body)
 970
 971     def getresponse(self, buffering=False):
 972         "Get the response from the server."
 973
 974         # if a prior response has been completed, then forget about it.
 975         if self.__response and self.__response.isclosed():
 976             self.__response = None
 977
 978         #
 979         # if a prior response exists, then it must be completed (otherwise, we
 980         # cannot read this response's header to determine the connection-close
 981         # behavior)
 982         #
 983         # note: if a prior response existed, but was connection-close, then the
 984         # socket and response were made independent of this HTTPConnection
 985         # object since a new request requires that we open a whole new
 986         # connection
 987         #
 988         # this means the prior response had one of two states:
 989         #   1) will_close: this connection was reset and the prior socket and
 990         #                  response operate independently
 991         #   2) persistent: the response was retained and we await its
 992         #                  isclosed() status to become true.
 993         #
 994         if self.__state != _CS_REQ_SENT or self.__response:
 995             raise ResponseNotReady()
 996
 997         args = (self.sock,)
 998         kwds = {"strict":self.strict, "method":self._method}
 999         if self.debuglevel > 0:
1000             args += (self.debuglevel,)
1001         if buffering:
1002             #only add this keyword if non-default, for compatibility with
1003             #other response_classes.
1004             kwds["buffering"] = True;
1005         response = self.response_class(*args, **kwds)
1006
1007         response.begin()
1008         assert response.will_close != _UNKNOWN
1009         self.__state = _CS_IDLE
1010
1011         if response.will_close:
1012             # this effectively passes the connection to the response
1013             self.close()
1014         else:
1015             # remember this, so we can tell when it is complete
1016             self.__response = response
1017
1018         return response
1019
1020
1021 class HTTP:
1022     "Compatibility class with httplib.py from 1.5."
1023
1024     _http_vsn = 10
1025     _http_vsn_str = 'HTTP/1.0'
1026
1027     debuglevel = 0
1028
1029     _connection_class = HTTPConnection
1030
1031     def __init__(self, host='', port=None, strict=None):
1032         "Provide a default host, since the superclass requires one."
1033
1034         # some joker passed 0 explicitly, meaning default port
1035         if port == 0:
1036             port = None
1037
1038         # Note that we may pass an empty string as the host; this will throw
1039         # an error when we attempt to connect. Presumably, the client code
1040         # will call connect before then, with a proper host.
1041         self._setup(self._connection_class(host, port, strict))
1042
1043     def _setup(self, conn):
1044         self._conn = conn
1045
1046         # set up delegation to flesh out interface
1047         self.send = conn.send
1048         self.putrequest = conn.putrequest
1049         self.putheader = conn.putheader
1050         self.endheaders = conn.endheaders
1051         self.set_debuglevel = conn.set_debuglevel
1052
1053         conn._http_vsn = self._http_vsn
1054         conn._http_vsn_str = self._http_vsn_str
1055
1056         self.file = None
1057
1058     def connect(self, host=None, port=None):
1059         "Accept arguments to set the host/port, since the superclass doesn't."
1060
1061         if host is not None:
1062             self._conn._set_hostport(host, port)
1063         self._conn.connect()
1064
1065     def getfile(self):
1066         "Provide a getfile, since the superclass' does not use this concept."
1067         return self.file
1068
1069     def getreply(self, buffering=False):
1070         """Compat definition since superclass does not define it.
1071
1072         Returns a tuple consisting of:
1073         - server status code (e.g. '200' if all goes well)
1074         - server "reason" corresponding to status code
1075         - any RFC822 headers in the response from the server
1076         """
1077         try:
1078             if not buffering:
1079                 response = self._conn.getresponse()
1080             else:
1081                 #only add this keyword if non-default for compatibility
1082                 #with other connection classes
1083                 response = self._conn.getresponse(buffering)
1084         except BadStatusLine, e:
1085             ### hmm. if getresponse() ever closes the socket on a bad request,
1086             ### then we are going to have problems with self.sock
1087
1088             ### should we keep this behavior? do people use it?
1089             # keep the socket open (as a file), and return it
1090             self.file = self._conn.sock.makefile('rb', 0)
1091
1092             # close our socket -- we want to restart after any protocol error
1093             self.close()
1094
1095             self.headers = None
1096             return -1, e.line, None
1097
1098         self.headers = response.msg
1099         self.file = response.fp
1100         return response.status, response.reason, response.msg
1101
1102     def close(self):
1103         self._conn.close()
1104
1105         # note that self.file == response.fp, which gets closed by the
1106         # superclass. just clear the object ref here.
1107         ### hmm. messy. if status==-1, then self.file is owned by us.
1108         ### well... we aren't explicitly closing, but losing this ref will
1109         ### do it
1110         self.file = None
1111
1112 try:
1113     import ssl
1114 except ImportError:
1115     pass
1116 else:
1117     class HTTPSConnection(HTTPConnection):
1118         "This class allows communication via SSL."
1119
1120         default_port = HTTPS_PORT
1121
1122         def __init__(self, host, port=None, key_file=None, cert_file=None,
1123                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1124             HTTPConnection.__init__(self, host, port, strict, timeout)
1125             self.key_file = key_file
1126             self.cert_file = cert_file
1127
1128         def connect(self):
1129             "Connect to a host on a given (SSL) port."
1130
1131             sock = socket.create_connection((self.host, self.port), self.timeout)
1132             if self._tunnel_host:
1133                 self.sock = sock
1134                 self._tunnel()
1135             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1136
1137     __all__.append("HTTPSConnection")
1138
1139     class HTTPS(HTTP):
1140         """Compatibility with 1.5 httplib interface
1141
1142         Python 1.5.2 did not have an HTTPS class, but it defined an
1143         interface for sending http requests that is also useful for
1144         https.
1145         """
1146
1147         _connection_class = HTTPSConnection
1148
1149         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1150                      strict=None):
1151             # provide a default host, pass the X509 cert info
1152
1153             # urf. compensate for bad input.
1154             if port == 0:
1155                 port = None
1156             self._setup(self._connection_class(host, port, key_file,
1157                                                cert_file, strict))
1158
1159             # we never actually use these for anything, but we keep them
1160             # here for compatibility with post-1.5.2 CVS.
1161             self.key_file = key_file
1162             self.cert_file = cert_file
1163
1164
1165     def FakeSocket (sock, sslobj):
1166         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1167                       "Use the result of ssl.wrap_socket() directly instead.",
1168                       DeprecationWarning, stacklevel=2)
1169         return sslobj
1170
1171
1172 class HTTPException(Exception):
1173     # Subclasses that define an __init__ must call Exception.__init__
1174     # or define self.args.  Otherwise, str() will fail.
1175     pass
1176
1177 class NotConnected(HTTPException):
1178     pass
1179
1180 class InvalidURL(HTTPException):
1181     pass
1182
1183 class UnknownProtocol(HTTPException):
1184     def __init__(self, version):
1185         self.args = version,
1186         self.version = version
1187
1188 class UnknownTransferEncoding(HTTPException):
1189     pass
1190
1191 class UnimplementedFileMode(HTTPException):
1192     pass
1193
1194 class IncompleteRead(HTTPException):
1195     def __init__(self, partial, expected=None):
1196         self.args = partial,
1197         self.partial = partial
1198         self.expected = expected
1199     def __repr__(self):
1200         if self.expected is not None:
1201             e = ', %i more expected' % self.expected
1202         else:
1203             e = ''
1204         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1205     def __str__(self):
1206         return repr(self)
1207
1208 class ImproperConnectionState(HTTPException):
1209     pass
1210
1211 class CannotSendRequest(ImproperConnectionState):
1212     pass
1213
1214 class CannotSendHeader(ImproperConnectionState):
1215     pass
1216
1217 class ResponseNotReady(ImproperConnectionState):
1218     pass
1219
1220 class BadStatusLine(HTTPException):
1221     def __init__(self, line):
1222         self.args = line,
1223         self.line = line
1224
1225 # for backwards compatibility
1226 error = HTTPException
1227
1228 class LineAndFileWrapper:
1229     """A limited file-like object for HTTP/0.9 responses."""
1230
1231     # The status-line parsing code calls readline(), which normally
1232     # get the HTTP status line.  For a 0.9 response, however, this is
1233     # actually the first line of the body!  Clients need to get a
1234     # readable file object that contains that line.
1235
1236     def __init__(self, line, file):
1237         self._line = line
1238         self._file = file
1239         self._line_consumed = 0
1240         self._line_offset = 0
1241         self._line_left = len(line)
1242
1243     def __getattr__(self, attr):
1244         return getattr(self._file, attr)
1245
1246     def _done(self):
1247         # called when the last byte is read from the line.  After the
1248         # call, all read methods are delegated to the underlying file
1249         # object.
1250         self._line_consumed = 1
1251         self.read = self._file.read
1252         self.readline = self._file.readline
1253         self.readlines = self._file.readlines
1254
1255     def read(self, amt=None):
1256         if self._line_consumed:
1257             return self._file.read(amt)
1258         assert self._line_left
1259         if amt is None or amt > self._line_left:
1260             s = self._line[self._line_offset:]
1261             self._done()
1262             if amt is None:
1263                 return s + self._file.read()
1264             else:
1265                 return s + self._file.read(amt - len(s))
1266         else:
1267             assert amt <= self._line_left
1268             i = self._line_offset
1269             j = i + amt
1270             s = self._line[i:j]
1271             self._line_offset = j
1272             self._line_left -= amt
1273             if self._line_left == 0:
1274                 self._done()
1275             return s
1276
1277     def readline(self):
1278         if self._line_consumed:
1279             return self._file.readline()
1280         assert self._line_left
1281         s = self._line[self._line_offset:]
1282         self._done()
1283         return s
1284
1285     def readlines(self, size=None):
1286         if self._line_consumed:
1287             return self._file.readlines(size)
1288         assert self._line_left
1289         L = [self._line[self._line_offset:]]
1290         self._done()
1291         if size is None:
1292             return L + self._file.readlines()
1293         else:
1294             return L + self._file.readlines(size)
1295
1296 def test():
1297     """Test this module.
1298
1299     A hodge podge of tests collected here, because they have too many
1300     external dependencies for the regular test suite.
1301     """
1302
1303     import sys
1304     import getopt
1305     opts, args = getopt.getopt(sys.argv[1:], 'd')
1306     dl = 0
1307     for o, a in opts:
1308         if o == '-d': dl = dl + 1
1309     host = 'www.python.org'
1310     selector = '/'
1311     if args[0:]: host = args[0]
1312     if args[1:]: selector = args[1]
1313     h = HTTP()
1314     h.set_debuglevel(dl)
1315     h.connect(host)
1316     h.putrequest('GET', selector)
1317     h.endheaders()
1318     status, reason, headers = h.getreply()
1319     print 'status =', status
1320     print 'reason =', reason
1321     print "read", len(h.getfile().read())
1322     print
1323     if headers:
1324         for header in headers.headers: print header.strip()
1325     print
1326
1327     # minimal test that code to extract host from url works
1328     class HTTP11(HTTP):
1329         _http_vsn = 11
1330         _http_vsn_str = 'HTTP/1.1'
1331
1332     h = HTTP11('www.python.org')
1333     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1334     h.endheaders()
1335     h.getreply()
1336     h.close()
1337
1338     try:
1339         import ssl
1340     except ImportError:
1341         pass
1342     else:
1343
1344         for host, selector in (('sourceforge.net', '/projects/python'),
1345                                ):
1346             print "https://%s%s" % (host, selector)
1347             hs = HTTPS()
1348             hs.set_debuglevel(dl)
1349             hs.connect(host)
1350             hs.putrequest('GET', selector)
1351             hs.endheaders()
1352             status, reason, headers = hs.getreply()
1353             print 'status =', status
1354             print 'reason =', reason
1355             print "read", len(hs.getfile().read())
1356             print
1357             if headers:
1358                 for header in headers.headers: print header.strip()
1359             print
1360
1361 if __name__ == '__main__':
1362     test()