Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 from array import array
  70 import socket
  71 from sys import py3kwarning
  72 from urlparse import urlsplit
  73 import warnings
  74 with warnings.catch_warnings():
  75     if py3kwarning:
  76         warnings.filterwarnings("ignore", ".*mimetools has been removed",
  77                                 DeprecationWarning)
  78     import mimetools
  79
  80 try:
  81     from cStringIO import StringIO
  82 except ImportError:
  83     from StringIO import StringIO
  84
  85 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  86            "HTTPException", "NotConnected", "UnknownProtocol",
  87            "UnknownTransferEncoding", "UnimplementedFileMode",
  88            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  89            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  90            "BadStatusLine", "error", "responses"]
  91
  92 HTTP_PORT = 80
  93 HTTPS_PORT = 443
  94
  95 _UNKNOWN = 'UNKNOWN'
  96
  97 # connection states
  98 _CS_IDLE = 'Idle'
  99 _CS_REQ_STARTED = 'Request-started'
 100 _CS_REQ_SENT = 'Request-sent'
 101
 102 # status codes
 103 # informational
 104 CONTINUE = 100
 105 SWITCHING_PROTOCOLS = 101
 106 PROCESSING = 102
 107
 108 # successful
 109 OK = 200
 110 CREATED = 201
 111 ACCEPTED = 202
 112 NON_AUTHORITATIVE_INFORMATION = 203
 113 NO_CONTENT = 204
 114 RESET_CONTENT = 205
 115 PARTIAL_CONTENT = 206
 116 MULTI_STATUS = 207
 117 IM_USED = 226
 118
 119 # redirection
 120 MULTIPLE_CHOICES = 300
 121 MOVED_PERMANENTLY = 301
 122 FOUND = 302
 123 SEE_OTHER = 303
 124 NOT_MODIFIED = 304
 125 USE_PROXY = 305
 126 TEMPORARY_REDIRECT = 307
 127
 128 # client error
 129 BAD_REQUEST = 400
 130 UNAUTHORIZED = 401
 131 PAYMENT_REQUIRED = 402
 132 FORBIDDEN = 403
 133 NOT_FOUND = 404
 134 METHOD_NOT_ALLOWED = 405
 135 NOT_ACCEPTABLE = 406
 136 PROXY_AUTHENTICATION_REQUIRED = 407
 137 REQUEST_TIMEOUT = 408
 138 CONFLICT = 409
 139 GONE = 410
 140 LENGTH_REQUIRED = 411
 141 PRECONDITION_FAILED = 412
 142 REQUEST_ENTITY_TOO_LARGE = 413
 143 REQUEST_URI_TOO_LONG = 414
 144 UNSUPPORTED_MEDIA_TYPE = 415
 145 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 146 EXPECTATION_FAILED = 417
 147 UNPROCESSABLE_ENTITY = 422
 148 LOCKED = 423
 149 FAILED_DEPENDENCY = 424
 150 UPGRADE_REQUIRED = 426
 151
 152 # server error
 153 INTERNAL_SERVER_ERROR = 500
 154 NOT_IMPLEMENTED = 501
 155 BAD_GATEWAY = 502
 156 SERVICE_UNAVAILABLE = 503
 157 GATEWAY_TIMEOUT = 504
 158 HTTP_VERSION_NOT_SUPPORTED = 505
 159 INSUFFICIENT_STORAGE = 507
 160 NOT_EXTENDED = 510
 161
 162 # Mapping status codes to official W3C names
 163 responses = {
 164     100: 'Continue',
 165     101: 'Switching Protocols',
 166
 167     200: 'OK',
 168     201: 'Created',
 169     202: 'Accepted',
 170     203: 'Non-Authoritative Information',
 171     204: 'No Content',
 172     205: 'Reset Content',
 173     206: 'Partial Content',
 174
 175     300: 'Multiple Choices',
 176     301: 'Moved Permanently',
 177     302: 'Found',
 178     303: 'See Other',
 179     304: 'Not Modified',
 180     305: 'Use Proxy',
 181     306: '(Unused)',
 182     307: 'Temporary Redirect',
 183
 184     400: 'Bad Request',
 185     401: 'Unauthorized',
 186     402: 'Payment Required',
 187     403: 'Forbidden',
 188     404: 'Not Found',
 189     405: 'Method Not Allowed',
 190     406: 'Not Acceptable',
 191     407: 'Proxy Authentication Required',
 192     408: 'Request Timeout',
 193     409: 'Conflict',
 194     410: 'Gone',
 195     411: 'Length Required',
 196     412: 'Precondition Failed',
 197     413: 'Request Entity Too Large',
 198     414: 'Request-URI Too Long',
 199     415: 'Unsupported Media Type',
 200     416: 'Requested Range Not Satisfiable',
 201     417: 'Expectation Failed',
 202
 203     500: 'Internal Server Error',
 204     501: 'Not Implemented',
 205     502: 'Bad Gateway',
 206     503: 'Service Unavailable',
 207     504: 'Gateway Timeout',
 208     505: 'HTTP Version Not Supported',
 209 }
 210
 211 # maximal amount of data to read at one time in _safe_read
 212 MAXAMOUNT = 1048576
 213
 214 class HTTPMessage(mimetools.Message):
 215
 216     def addheader(self, key, value):
 217         """Add header for field key handling repeats."""
 218         prev = self.dict.get(key)
 219         if prev is None:
 220             self.dict[key] = value
 221         else:
 222             combined = ", ".join((prev, value))
 223             self.dict[key] = combined
 224
 225     def addcontinue(self, key, more):
 226         """Add more field data from a continuation line."""
 227         prev = self.dict[key]
 228         self.dict[key] = prev + "\n " + more
 229
 230     def readheaders(self):
 231         """Read header lines.
 232
 233         Read header lines up to the entirely blank line that terminates them.
 234         The (normally blank) line that ends the headers is skipped, but not
 235         included in the returned list.  If a non-header line ends the headers,
 236         (which is an error), an attempt is made to backspace over it; it is
 237         never included in the returned list.
 238
 239         The variable self.status is set to the empty string if all went well,
 240         otherwise it is an error message.  The variable self.headers is a
 241         completely uninterpreted list of lines contained in the header (so
 242         printing them will reproduce the header exactly as it appears in the
 243         file).
 244
 245         If multiple header fields with the same name occur, they are combined
 246         according to the rules in RFC 2616 sec 4.2:
 247
 248         Appending each subsequent field-value to the first, each separated
 249         by a comma. The order in which header fields with the same field-name
 250         are received is significant to the interpretation of the combined
 251         field value.
 252         """
 253         # XXX The implementation overrides the readheaders() method of
 254         # rfc822.Message.  The base class design isn't amenable to
 255         # customized behavior here so the method here is a copy of the
 256         # base class code with a few small changes.
 257
 258         self.dict = {}
 259         self.unixfrom = ''
 260         self.headers = hlist = []
 261         self.status = ''
 262         headerseen = ""
 263         firstline = 1
 264         startofline = unread = tell = None
 265         if hasattr(self.fp, 'unread'):
 266             unread = self.fp.unread
 267         elif self.seekable:
 268             tell = self.fp.tell
 269         while True:
 270             if tell:
 271                 try:
 272                     startofline = tell()
 273                 except IOError:
 274                     startofline = tell = None
 275                     self.seekable = 0
 276             line = self.fp.readline()
 277             if not line:
 278                 self.status = 'EOF in headers'
 279                 break
 280             # Skip unix From name time lines
 281             if firstline and line.startswith('From '):
 282                 self.unixfrom = self.unixfrom + line
 283                 continue
 284             firstline = 0
 285             if headerseen and line[0] in ' \t':
 286                 # XXX Not sure if continuation lines are handled properly
 287                 # for http and/or for repeating headers
 288                 # It's a continuation line.
 289                 hlist.append(line)
 290                 self.addcontinue(headerseen, line.strip())
 291                 continue
 292             elif self.iscomment(line):
 293                 # It's a comment.  Ignore it.
 294                 continue
 295             elif self.islast(line):
 296                 # Note! No pushback here!  The delimiter line gets eaten.
 297                 break
 298             headerseen = self.isheader(line)
 299             if headerseen:
 300                 # It's a legal header line, save it.
 301                 hlist.append(line)
 302                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 303                 continue
 304             else:
 305                 # It's not a header line; throw it back and stop here.
 306                 if not self.dict:
 307                     self.status = 'No headers'
 308                 else:
 309                     self.status = 'Non-header line where header expected'
 310                 # Try to undo the read.
 311                 if unread:
 312                     unread(line)
 313                 elif tell:
 314                     self.fp.seek(startofline)
 315                 else:
 316                     self.status = self.status + '; bad seek'
 317                 break
 318
 319 class HTTPResponse:
 320
 321     # strict: If true, raise BadStatusLine if the status line can't be
 322     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 323     # false because it prevents clients from talking to HTTP/0.9
 324     # servers.  Note that a response with a sufficiently corrupted
 325     # status line will look like an HTTP/0.9 response.
 326
 327     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 328
 329     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
 330         if buffering:
 331             # The caller won't be using any sock.recv() calls, so buffering
 332             # is fine and recommended for performance.
 333             self.fp = sock.makefile('rb')
 334         else:
 335             # The buffer size is specified as zero, because the headers of
 336             # the response are read with readline().  If the reads were
 337             # buffered the readline() calls could consume some of the
 338             # response, which make be read via a recv() on the underlying
 339             # socket.
 340             self.fp = sock.makefile('rb', 0)
 341         self.debuglevel = debuglevel
 342         self.strict = strict
 343         self._method = method
 344
 345         self.msg = None
 346
 347         # from the Status-Line of the response
 348         self.version = _UNKNOWN # HTTP-Version
 349         self.status = _UNKNOWN  # Status-Code
 350         self.reason = _UNKNOWN  # Reason-Phrase
 351
 352         self.chunked = _UNKNOWN         # is "chunked" being used?
 353         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 354         self.length = _UNKNOWN          # number of bytes left in response
 355         self.will_close = _UNKNOWN      # conn will close at end of response
 356
 357     def _read_status(self):
 358         # Initialize with Simple-Response defaults
 359         line = self.fp.readline()
 360         if self.debuglevel > 0:
 361             print "reply:", repr(line)
 362         if not line:
 363             # Presumably, the server closed the connection before
 364             # sending a valid response.
 365             raise BadStatusLine(line)
 366         try:
 367             [version, status, reason] = line.split(None, 2)
 368         except ValueError:
 369             try:
 370                 [version, status] = line.split(None, 1)
 371                 reason = ""
 372             except ValueError:
 373                 # empty version will cause next test to fail and status
 374                 # will be treated as 0.9 response.
 375                 version = ""
 376         if not version.startswith('HTTP/'):
 377             if self.strict:
 378                 self.close()
 379                 raise BadStatusLine(line)
 380             else:
 381                 # assume it's a Simple-Response from an 0.9 server
 382                 self.fp = LineAndFileWrapper(line, self.fp)
 383                 return "HTTP/0.9", 200, ""
 384
 385         # The status code is a three-digit number
 386         try:
 387             status = int(status)
 388             if status < 100 or status > 999:
 389                 raise BadStatusLine(line)
 390         except ValueError:
 391             raise BadStatusLine(line)
 392         return version, status, reason
 393
 394     def begin(self):
 395         if self.msg is not None:
 396             # we've already started reading the response
 397             return
 398
 399         # read until we get a non-100 response
 400         while True:
 401             version, status, reason = self._read_status()
 402             if status != CONTINUE:
 403                 break
 404             # skip the header from the 100 response
 405             while True:
 406                 skip = self.fp.readline().strip()
 407                 if not skip:
 408                     break
 409                 if self.debuglevel > 0:
 410                     print "header:", skip
 411
 412         self.status = status
 413         self.reason = reason.strip()
 414         if version == 'HTTP/1.0':
 415             self.version = 10
 416         elif version.startswith('HTTP/1.'):
 417             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 418         elif version == 'HTTP/0.9':
 419             self.version = 9
 420         else:
 421             raise UnknownProtocol(version)
 422
 423         if self.version == 9:
 424             self.length = None
 425             self.chunked = 0
 426             self.will_close = 1
 427             self.msg = HTTPMessage(StringIO())
 428             return
 429
 430         self.msg = HTTPMessage(self.fp, 0)
 431         if self.debuglevel > 0:
 432             for hdr in self.msg.headers:
 433                 print "header:", hdr,
 434
 435         # don't let the msg keep an fp
 436         self.msg.fp = None
 437
 438         # are we using the chunked-style of transfer encoding?
 439         tr_enc = self.msg.getheader('transfer-encoding')
 440         if tr_enc and tr_enc.lower() == "chunked":
 441             self.chunked = 1
 442             self.chunk_left = None
 443         else:
 444             self.chunked = 0
 445
 446         # will the connection close at the end of the response?
 447         self.will_close = self._check_close()
 448
 449         # do we have a Content-Length?
 450         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 451         length = self.msg.getheader('content-length')
 452         if length and not self.chunked:
 453             try:
 454                 self.length = int(length)
 455             except ValueError:
 456                 self.length = None
 457             else:
 458                 if self.length < 0:  # ignore nonsensical negative lengths
 459                     self.length = None
 460         else:
 461             self.length = None
 462
 463         # does the body have a fixed length? (of zero)
 464         if (status == NO_CONTENT or status == NOT_MODIFIED or
 465             100 <= status < 200 or      # 1xx codes
 466             self._method == 'HEAD'):
 467             self.length = 0
 468
 469         # if the connection remains open, and we aren't using chunked, and
 470         # a content-length was not provided, then assume that the connection
 471         # WILL close.
 472         if not self.will_close and \
 473            not self.chunked and \
 474            self.length is None:
 475             self.will_close = 1
 476
 477     def _check_close(self):
 478         conn = self.msg.getheader('connection')
 479         if self.version == 11:
 480             # An HTTP/1.1 proxy is assumed to stay open unless
 481             # explicitly closed.
 482             conn = self.msg.getheader('connection')
 483             if conn and "close" in conn.lower():
 484                 return True
 485             return False
 486
 487         # Some HTTP/1.0 implementations have support for persistent
 488         # connections, using rules different than HTTP/1.1.
 489
 490         # For older HTTP, Keep-Alive indicates persistent connection.
 491         if self.msg.getheader('keep-alive'):
 492             return False
 493
 494         # At least Akamai returns a "Connection: Keep-Alive" header,
 495         # which was supposed to be sent by the client.
 496         if conn and "keep-alive" in conn.lower():
 497             return False
 498
 499         # Proxy-Connection is a netscape hack.
 500         pconn = self.msg.getheader('proxy-connection')
 501         if pconn and "keep-alive" in pconn.lower():
 502             return False
 503
 504         # otherwise, assume it will close
 505         return True
 506
 507     def close(self):
 508         if self.fp:
 509             self.fp.close()
 510             self.fp = None
 511
 512     def isclosed(self):
 513         # NOTE: it is possible that we will not ever call self.close(). This
 514         #       case occurs when will_close is TRUE, length is None, and we
 515         #       read up to the last byte, but NOT past it.
 516         #
 517         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 518         #          called, meaning self.isclosed() is meaningful.
 519         return self.fp is None
 520
 521     # XXX It would be nice to have readline and __iter__ for this, too.
 522
 523     def read(self, amt=None):
 524         if self.fp is None:
 525             return ''
 526
 527         if self.chunked:
 528             return self._read_chunked(amt)
 529
 530         if amt is None:
 531             # unbounded read
 532             if self.length is None:
 533                 s = self.fp.read()
 534             else:
 535                 s = self._safe_read(self.length)
 536                 self.length = 0
 537             self.close()        # we read everything
 538             return s
 539
 540         if self.length is not None:
 541             if amt > self.length:
 542                 # clip the read to the "end of response"
 543                 amt = self.length
 544
 545         # we do not use _safe_read() here because this may be a .will_close
 546         # connection, and the user is reading more bytes than will be provided
 547         # (for example, reading in 1k chunks)
 548         s = self.fp.read(amt)
 549         if self.length is not None:
 550             self.length -= len(s)
 551             if not self.length:
 552                 self.close()
 553         return s
 554
 555     def _read_chunked(self, amt):
 556         assert self.chunked != _UNKNOWN
 557         chunk_left = self.chunk_left
 558         value = []
 559         while True:
 560             if chunk_left is None:
 561                 line = self.fp.readline()
 562                 i = line.find(';')
 563                 if i >= 0:
 564                     line = line[:i] # strip chunk-extensions
 565                 try:
 566                     chunk_left = int(line, 16)
 567                 except ValueError:
 568                     # close the connection as protocol synchronisation is
 569                     # probably lost
 570                     self.close()
 571                     raise IncompleteRead(''.join(value))
 572                 if chunk_left == 0:
 573                     break
 574             if amt is None:
 575                 value.append(self._safe_read(chunk_left))
 576             elif amt < chunk_left:
 577                 value.append(self._safe_read(amt))
 578                 self.chunk_left = chunk_left - amt
 579                 return ''.join(value)
 580             elif amt == chunk_left:
 581                 value.append(self._safe_read(amt))
 582                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 583                 self.chunk_left = None
 584                 return ''.join(value)
 585             else:
 586                 value.append(self._safe_read(chunk_left))
 587                 amt -= chunk_left
 588
 589             # we read the whole chunk, get another
 590             self._safe_read(2)      # toss the CRLF at the end of the chunk
 591             chunk_left = None
 592
 593         # read and discard trailer up to the CRLF terminator
 594         ### note: we shouldn't have any trailers!
 595         while True:
 596             line = self.fp.readline()
 597             if not line:
 598                 # a vanishingly small number of sites EOF without
 599                 # sending the trailer
 600                 break
 601             if line == '\r\n':
 602                 break
 603
 604         # we read everything; close the "file"
 605         self.close()
 606
 607         return ''.join(value)
 608
 609     def _safe_read(self, amt):
 610         """Read the number of bytes requested, compensating for partial reads.
 611
 612         Normally, we have a blocking socket, but a read() can be interrupted
 613         by a signal (resulting in a partial read).
 614
 615         Note that we cannot distinguish between EOF and an interrupt when zero
 616         bytes have been read. IncompleteRead() will be raised in this
 617         situation.
 618
 619         This function should be used when <amt> bytes "should" be present for
 620         reading. If the bytes are truly not available (due to EOF), then the
 621         IncompleteRead exception can be used to detect the problem.
 622         """
 623         # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
 624         # return less than x bytes unless EOF is encountered.  It now handles
 625         # signal interruptions (socket.error EINTR) internally.  This code
 626         # never caught that exception anyways.  It seems largely pointless.
 627         # self.fp.read(amt) will work fine.
 628         s = []
 629         while amt > 0:
 630             chunk = self.fp.read(min(amt, MAXAMOUNT))
 631             if not chunk:
 632                 raise IncompleteRead(''.join(s), amt)
 633             s.append(chunk)
 634             amt -= len(chunk)
 635         return ''.join(s)
 636
 637     def getheader(self, name, default=None):
 638         if self.msg is None:
 639             raise ResponseNotReady()
 640         return self.msg.getheader(name, default)
 641
 642     def getheaders(self):
 643         """Return list of (header, value) tuples."""
 644         if self.msg is None:
 645             raise ResponseNotReady()
 646         return self.msg.items()
 647
 648
 649 class HTTPConnection:
 650
 651     _http_vsn = 11
 652     _http_vsn_str = 'HTTP/1.1'
 653
 654     response_class = HTTPResponse
 655     default_port = HTTP_PORT
 656     auto_open = 1
 657     debuglevel = 0
 658     strict = 0
 659
 660     def __init__(self, host, port=None, strict=None,
 661                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 662         self.timeout = timeout
 663         self.sock = None
 664         self._buffer = []
 665         self.__response = None
 666         self.__state = _CS_IDLE
 667         self._method = None
 668         self._tunnel_host = None
 669         self._tunnel_port = None
 670         self._tunnel_headers = {}
 671
 672         self._set_hostport(host, port)
 673         if strict is not None:
 674             self.strict = strict
 675
 676     def set_tunnel(self, host, port=None, headers=None):
 677         """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
 678
 679         The headers argument should be a mapping of extra HTTP headers
 680         to send with the CONNECT request.
 681         """
 682         self._tunnel_host = host
 683         self._tunnel_port = port
 684         if headers:
 685             self._tunnel_headers = headers
 686         else:
 687             self._tunnel_headers.clear()
 688
 689     def _set_hostport(self, host, port):
 690         if port is None:
 691             i = host.rfind(':')
 692             j = host.rfind(']')         # ipv6 addresses have [...]
 693             if i > j:
 694                 try:
 695                     port = int(host[i+1:])
 696                 except ValueError:
 697                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 698                 host = host[:i]
 699             else:
 700                 port = self.default_port
 701             if host and host[0] == '[' and host[-1] == ']':
 702                 host = host[1:-1]
 703         self.host = host
 704         self.port = port
 705
 706     def set_debuglevel(self, level):
 707         self.debuglevel = level
 708
 709     def _tunnel(self):
 710         self._set_hostport(self._tunnel_host, self._tunnel_port)
 711         self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
 712         for header, value in self._tunnel_headers.iteritems():
 713             self.send("%s: %s\r\n" % (header, value))
 714         self.send("\r\n")
 715         response = self.response_class(self.sock, strict = self.strict,
 716                                        method = self._method)
 717         (version, code, message) = response._read_status()
 718
 719         if code != 200:
 720             self.close()
 721             raise socket.error("Tunnel connection failed: %d %s" % (code,
 722                                                                     message.strip()))
 723         while True:
 724             line = response.fp.readline()
 725             if line == '\r\n': break
 726
 727
 728     def connect(self):
 729         """Connect to the host and port specified in __init__."""
 730         self.sock = socket.create_connection((self.host,self.port),
 731                                              self.timeout)
 732
 733         if self._tunnel_host:
 734             self._tunnel()
 735
 736     def close(self):
 737         """Close the connection to the HTTP server."""
 738         if self.sock:
 739             self.sock.close()   # close it manually... there may be other refs
 740             self.sock = None
 741         if self.__response:
 742             self.__response.close()
 743             self.__response = None
 744         self.__state = _CS_IDLE
 745
 746     def send(self, str):
 747         """Send `str' to the server."""
 748         if self.sock is None:
 749             if self.auto_open:
 750                 self.connect()
 751             else:
 752                 raise NotConnected()
 753
 754         # send the data to the server. if we get a broken pipe, then close
 755         # the socket. we want to reconnect when somebody tries to send again.
 756         #
 757         # NOTE: we DO propagate the error, though, because we cannot simply
 758         #       ignore the error... the caller will know if they can retry.
 759         if self.debuglevel > 0:
 760             print "send:", repr(str)
 761         try:
 762             blocksize=8192
 763             if hasattr(str,'read') and not isinstance(str, array):
 764                 if self.debuglevel > 0: print "sendIng a read()able"
 765                 data=str.read(blocksize)
 766                 while data:
 767                     self.sock.sendall(data)
 768                     data=str.read(blocksize)
 769             else:
 770                 self.sock.sendall(str)
 771         except socket.error, v:
 772             if v.args[0] == 32:      # Broken pipe
 773                 self.close()
 774             raise
 775
 776     def _output(self, s):
 777         """Add a line of output to the current request buffer.
 778
 779         Assumes that the line does *not* end with \\r\\n.
 780         """
 781         self._buffer.append(s)
 782
 783     def _send_output(self, message_body=None):
 784         """Send the currently buffered request and clear the buffer.
 785
 786         Appends an extra \\r\\n to the buffer.
 787         A message_body may be specified, to be appended to the request.
 788         """
 789         self._buffer.extend(("", ""))
 790         msg = "\r\n".join(self._buffer)
 791         del self._buffer[:]
 792         # If msg and message_body are sent in a single send() call,
 793         # it will avoid performance problems caused by the interaction
 794         # between delayed ack and the Nagle algorithim.
 795         if isinstance(message_body, str):
 796             msg += message_body
 797             message_body = None
 798         self.send(msg)
 799         if message_body is not None:
 800             #message_body was not a string (i.e. it is a file) and
 801             #we must run the risk of Nagle
 802             self.send(message_body)
 803
 804     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 805         """Send a request to the server.
 806
 807         `method' specifies an HTTP request method, e.g. 'GET'.
 808         `url' specifies the object being requested, e.g. '/index.html'.
 809         `skip_host' if True does not add automatically a 'Host:' header
 810         `skip_accept_encoding' if True does not add automatically an
 811            'Accept-Encoding:' header
 812         """
 813
 814         # if a prior response has been completed, then forget about it.
 815         if self.__response and self.__response.isclosed():
 816             self.__response = None
 817
 818
 819         # in certain cases, we cannot issue another request on this connection.
 820         # this occurs when:
 821         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 822         #   2) a response to a previous request has signalled that it is going
 823         #      to close the connection upon completion.
 824         #   3) the headers for the previous response have not been read, thus
 825         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 826         #
 827         # if there is no prior response, then we can request at will.
 828         #
 829         # if point (2) is true, then we will have passed the socket to the
 830         # response (effectively meaning, "there is no prior response"), and
 831         # will open a new one when a new request is made.
 832         #
 833         # Note: if a prior response exists, then we *can* start a new request.
 834         #       We are not allowed to begin fetching the response to this new
 835         #       request, however, until that prior response is complete.
 836         #
 837         if self.__state == _CS_IDLE:
 838             self.__state = _CS_REQ_STARTED
 839         else:
 840             raise CannotSendRequest()
 841
 842         # Save the method we use, we need it later in the response phase
 843         self._method = method
 844         if not url:
 845             url = '/'
 846         str = '%s %s %s' % (method, url, self._http_vsn_str)
 847
 848         self._output(str)
 849
 850         if self._http_vsn == 11:
 851             # Issue some standard headers for better HTTP/1.1 compliance
 852
 853             if not skip_host:
 854                 # this header is issued *only* for HTTP/1.1
 855                 # connections. more specifically, this means it is
 856                 # only issued when the client uses the new
 857                 # HTTPConnection() class. backwards-compat clients
 858                 # will be using HTTP/1.0 and those clients may be
 859                 # issuing this header themselves. we should NOT issue
 860                 # it twice; some web servers (such as Apache) barf
 861                 # when they see two Host: headers
 862
 863                 # If we need a non-standard port,include it in the
 864                 # header.  If the request is going through a proxy,
 865                 # but the host of the actual URL, not the host of the
 866                 # proxy.
 867
 868                 netloc = ''
 869                 if url.startswith('http'):
 870                     nil, netloc, nil, nil, nil = urlsplit(url)
 871
 872                 if netloc:
 873                     try:
 874                         netloc_enc = netloc.encode("ascii")
 875                     except UnicodeEncodeError:
 876                         netloc_enc = netloc.encode("idna")
 877                     self.putheader('Host', netloc_enc)
 878                 else:
 879                     try:
 880                         host_enc = self.host.encode("ascii")
 881                     except UnicodeEncodeError:
 882                         host_enc = self.host.encode("idna")
 883                     if self.port == self.default_port:
 884                         self.putheader('Host', host_enc)
 885                     else:
 886                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 887
 888             # note: we are assuming that clients will not attempt to set these
 889             #       headers since *this* library must deal with the
 890             #       consequences. this also means that when the supporting
 891             #       libraries are updated to recognize other forms, then this
 892             #       code should be changed (removed or updated).
 893
 894             # we only want a Content-Encoding of "identity" since we don't
 895             # support encodings such as x-gzip or x-deflate.
 896             if not skip_accept_encoding:
 897                 self.putheader('Accept-Encoding', 'identity')
 898
 899             # we can accept "chunked" Transfer-Encodings, but no others
 900             # NOTE: no TE header implies *only* "chunked"
 901             #self.putheader('TE', 'chunked')
 902
 903             # if TE is supplied in the header, then it must appear in a
 904             # Connection header.
 905             #self.putheader('Connection', 'TE')
 906
 907         else:
 908             # For HTTP/1.0, the server will assume "not chunked"
 909             pass
 910
 911     def putheader(self, header, *values):
 912         """Send a request header line to the server.
 913
 914         For example: h.putheader('Accept', 'text/html')
 915         """
 916         if self.__state != _CS_REQ_STARTED:
 917             raise CannotSendHeader()
 918
 919         str = '%s: %s' % (header, '\r\n\t'.join(values))
 920         self._output(str)
 921
 922     def endheaders(self, message_body=None):
 923         """Indicate that the last header line has been sent to the server.
 924
 925         This method sends the request to the server.  The optional
 926         message_body argument can be used to pass message body
 927         associated with the request.  The message body will be sent in
 928         the same packet as the message headers if possible.  The
 929         message_body should be a string.
 930         """
 931         if self.__state == _CS_REQ_STARTED:
 932             self.__state = _CS_REQ_SENT
 933         else:
 934             raise CannotSendHeader()
 935         self._send_output(message_body)
 936
 937     def request(self, method, url, body=None, headers={}):
 938         """Send a complete request to the server."""
 939
 940         try:
 941             self._send_request(method, url, body, headers)
 942         except socket.error, v:
 943             # trap 'Broken pipe' if we're allowed to automatically reconnect
 944             if v.args[0] != 32 or not self.auto_open:
 945                 raise
 946             # try one more time
 947             self._send_request(method, url, body, headers)
 948
 949     def _set_content_length(self, body):
 950         # Set the content-length based on the body.
 951         thelen = None
 952         try:
 953             thelen = str(len(body))
 954         except TypeError, te:
 955             # If this is a file-like object, try to
 956             # fstat its file descriptor
 957             import os
 958             try:
 959                 thelen = str(os.fstat(body.fileno()).st_size)
 960             except (AttributeError, OSError):
 961                 # Don't send a length if this failed
 962                 if self.debuglevel > 0: print "Cannot stat!!"
 963
 964         if thelen is not None:
 965             self.putheader('Content-Length', thelen)
 966
 967     def _send_request(self, method, url, body, headers):
 968         # honour explicitly requested Host: and Accept-Encoding headers
 969         header_names = dict.fromkeys([k.lower() for k in headers])
 970         skips = {}
 971         if 'host' in header_names:
 972             skips['skip_host'] = 1
 973         if 'accept-encoding' in header_names:
 974             skips['skip_accept_encoding'] = 1
 975
 976         self.putrequest(method, url, **skips)
 977
 978         if body and ('content-length' not in header_names):
 979             self._set_content_length(body)
 980         for hdr, value in headers.iteritems():
 981             self.putheader(hdr, value)
 982         self.endheaders(body)
 983
 984     def getresponse(self, buffering=False):
 985         "Get the response from the server."
 986
 987         # if a prior response has been completed, then forget about it.
 988         if self.__response and self.__response.isclosed():
 989             self.__response = None
 990
 991         #
 992         # if a prior response exists, then it must be completed (otherwise, we
 993         # cannot read this response's header to determine the connection-close
 994         # behavior)
 995         #
 996         # note: if a prior response existed, but was connection-close, then the
 997         # socket and response were made independent of this HTTPConnection
 998         # object since a new request requires that we open a whole new
 999         # connection
1000         #
1001         # this means the prior response had one of two states:
1002         #   1) will_close: this connection was reset and the prior socket and
1003         #                  response operate independently
1004         #   2) persistent: the response was retained and we await its
1005         #                  isclosed() status to become true.
1006         #
1007         if self.__state != _CS_REQ_SENT or self.__response:
1008             raise ResponseNotReady()
1009
1010         args = (self.sock,)
1011         kwds = {"strict":self.strict, "method":self._method}
1012         if self.debuglevel > 0:
1013             args += (self.debuglevel,)
1014         if buffering:
1015             #only add this keyword if non-default, for compatibility with
1016             #other response_classes.
1017             kwds["buffering"] = True;
1018         response = self.response_class(*args, **kwds)
1019
1020         response.begin()
1021         assert response.will_close != _UNKNOWN
1022         self.__state = _CS_IDLE
1023
1024         if response.will_close:
1025             # this effectively passes the connection to the response
1026             self.close()
1027         else:
1028             # remember this, so we can tell when it is complete
1029             self.__response = response
1030
1031         return response
1032
1033
1034 class HTTP:
1035     "Compatibility class with httplib.py from 1.5."
1036
1037     _http_vsn = 10
1038     _http_vsn_str = 'HTTP/1.0'
1039
1040     debuglevel = 0
1041
1042     _connection_class = HTTPConnection
1043
1044     def __init__(self, host='', port=None, strict=None):
1045         "Provide a default host, since the superclass requires one."
1046
1047         # some joker passed 0 explicitly, meaning default port
1048         if port == 0:
1049             port = None
1050
1051         # Note that we may pass an empty string as the host; this will throw
1052         # an error when we attempt to connect. Presumably, the client code
1053         # will call connect before then, with a proper host.
1054         self._setup(self._connection_class(host, port, strict))
1055
1056     def _setup(self, conn):
1057         self._conn = conn
1058
1059         # set up delegation to flesh out interface
1060         self.send = conn.send
1061         self.putrequest = conn.putrequest
1062         self.putheader = conn.putheader
1063         self.endheaders = conn.endheaders
1064         self.set_debuglevel = conn.set_debuglevel
1065
1066         conn._http_vsn = self._http_vsn
1067         conn._http_vsn_str = self._http_vsn_str
1068
1069         self.file = None
1070
1071     def connect(self, host=None, port=None):
1072         "Accept arguments to set the host/port, since the superclass doesn't."
1073
1074         if host is not None:
1075             self._conn._set_hostport(host, port)
1076         self._conn.connect()
1077
1078     def getfile(self):
1079         "Provide a getfile, since the superclass' does not use this concept."
1080         return self.file
1081
1082     def getreply(self, buffering=False):
1083         """Compat definition since superclass does not define it.
1084
1085         Returns a tuple consisting of:
1086         - server status code (e.g. '200' if all goes well)
1087         - server "reason" corresponding to status code
1088         - any RFC822 headers in the response from the server
1089         """
1090         try:
1091             if not buffering:
1092                 response = self._conn.getresponse()
1093             else:
1094                 #only add this keyword if non-default for compatibility
1095                 #with other connection classes
1096                 response = self._conn.getresponse(buffering)
1097         except BadStatusLine, e:
1098             ### hmm. if getresponse() ever closes the socket on a bad request,
1099             ### then we are going to have problems with self.sock
1100
1101             ### should we keep this behavior? do people use it?
1102             # keep the socket open (as a file), and return it
1103             self.file = self._conn.sock.makefile('rb', 0)
1104
1105             # close our socket -- we want to restart after any protocol error
1106             self.close()
1107
1108             self.headers = None
1109             return -1, e.line, None
1110
1111         self.headers = response.msg
1112         self.file = response.fp
1113         return response.status, response.reason, response.msg
1114
1115     def close(self):
1116         self._conn.close()
1117
1118         # note that self.file == response.fp, which gets closed by the
1119         # superclass. just clear the object ref here.
1120         ### hmm. messy. if status==-1, then self.file is owned by us.
1121         ### well... we aren't explicitly closing, but losing this ref will
1122         ### do it
1123         self.file = None
1124
1125 try:
1126     import ssl
1127 except ImportError:
1128     pass
1129 else:
1130     class HTTPSConnection(HTTPConnection):
1131         "This class allows communication via SSL."
1132
1133         default_port = HTTPS_PORT
1134
1135         def __init__(self, host, port=None, key_file=None, cert_file=None,
1136                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1137             HTTPConnection.__init__(self, host, port, strict, timeout)
1138             self.key_file = key_file
1139             self.cert_file = cert_file
1140
1141         def connect(self):
1142             "Connect to a host on a given (SSL) port."
1143
1144             sock = socket.create_connection((self.host, self.port), self.timeout)
1145             if self._tunnel_host:
1146                 self.sock = sock
1147                 self._tunnel()
1148             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1149
1150     __all__.append("HTTPSConnection")
1151
1152     class HTTPS(HTTP):
1153         """Compatibility with 1.5 httplib interface
1154
1155         Python 1.5.2 did not have an HTTPS class, but it defined an
1156         interface for sending http requests that is also useful for
1157         https.
1158         """
1159
1160         _connection_class = HTTPSConnection
1161
1162         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1163                      strict=None):
1164             # provide a default host, pass the X509 cert info
1165
1166             # urf. compensate for bad input.
1167             if port == 0:
1168                 port = None
1169             self._setup(self._connection_class(host, port, key_file,
1170                                                cert_file, strict))
1171
1172             # we never actually use these for anything, but we keep them
1173             # here for compatibility with post-1.5.2 CVS.
1174             self.key_file = key_file
1175             self.cert_file = cert_file
1176
1177
1178     def FakeSocket (sock, sslobj):
1179         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1180                       "Use the result of ssl.wrap_socket() directly instead.",
1181                       DeprecationWarning, stacklevel=2)
1182         return sslobj
1183
1184
1185 class HTTPException(Exception):
1186     # Subclasses that define an __init__ must call Exception.__init__
1187     # or define self.args.  Otherwise, str() will fail.
1188     pass
1189
1190 class NotConnected(HTTPException):
1191     pass
1192
1193 class InvalidURL(HTTPException):
1194     pass
1195
1196 class UnknownProtocol(HTTPException):
1197     def __init__(self, version):
1198         self.args = version,
1199         self.version = version
1200
1201 class UnknownTransferEncoding(HTTPException):
1202     pass
1203
1204 class UnimplementedFileMode(HTTPException):
1205     pass
1206
1207 class IncompleteRead(HTTPException):
1208     def __init__(self, partial, expected=None):
1209         self.args = partial,
1210         self.partial = partial
1211         self.expected = expected
1212     def __repr__(self):
1213         if self.expected is not None:
1214             e = ', %i more expected' % self.expected
1215         else:
1216             e = ''
1217         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1218     def __str__(self):
1219         return repr(self)
1220
1221 class ImproperConnectionState(HTTPException):
1222     pass
1223
1224 class CannotSendRequest(ImproperConnectionState):
1225     pass
1226
1227 class CannotSendHeader(ImproperConnectionState):
1228     pass
1229
1230 class ResponseNotReady(ImproperConnectionState):
1231     pass
1232
1233 class BadStatusLine(HTTPException):
1234     def __init__(self, line):
1235         self.args = line,
1236         self.line = line
1237
1238 # for backwards compatibility
1239 error = HTTPException
1240
1241 class LineAndFileWrapper:
1242     """A limited file-like object for HTTP/0.9 responses."""
1243
1244     # The status-line parsing code calls readline(), which normally
1245     # get the HTTP status line.  For a 0.9 response, however, this is
1246     # actually the first line of the body!  Clients need to get a
1247     # readable file object that contains that line.
1248
1249     def __init__(self, line, file):
1250         self._line = line
1251         self._file = file
1252         self._line_consumed = 0
1253         self._line_offset = 0
1254         self._line_left = len(line)
1255
1256     def __getattr__(self, attr):
1257         return getattr(self._file, attr)
1258
1259     def _done(self):
1260         # called when the last byte is read from the line.  After the
1261         # call, all read methods are delegated to the underlying file
1262         # object.
1263         self._line_consumed = 1
1264         self.read = self._file.read
1265         self.readline = self._file.readline
1266         self.readlines = self._file.readlines
1267
1268     def read(self, amt=None):
1269         if self._line_consumed:
1270             return self._file.read(amt)
1271         assert self._line_left
1272         if amt is None or amt > self._line_left:
1273             s = self._line[self._line_offset:]
1274             self._done()
1275             if amt is None:
1276                 return s + self._file.read()
1277             else:
1278                 return s + self._file.read(amt - len(s))
1279         else:
1280             assert amt <= self._line_left
1281             i = self._line_offset
1282             j = i + amt
1283             s = self._line[i:j]
1284             self._line_offset = j
1285             self._line_left -= amt
1286             if self._line_left == 0:
1287                 self._done()
1288             return s
1289
1290     def readline(self):
1291         if self._line_consumed:
1292             return self._file.readline()
1293         assert self._line_left
1294         s = self._line[self._line_offset:]
1295         self._done()
1296         return s
1297
1298     def readlines(self, size=None):
1299         if self._line_consumed:
1300             return self._file.readlines(size)
1301         assert self._line_left
1302         L = [self._line[self._line_offset:]]
1303         self._done()
1304         if size is None:
1305             return L + self._file.readlines()
1306         else:
1307             return L + self._file.readlines(size)
1308
1309 def test():
1310     """Test this module.
1311
1312     A hodge podge of tests collected here, because they have too many
1313     external dependencies for the regular test suite.
1314     """
1315
1316     import sys
1317     import getopt
1318     opts, args = getopt.getopt(sys.argv[1:], 'd')
1319     dl = 0
1320     for o, a in opts:
1321         if o == '-d': dl = dl + 1
1322     host = 'www.python.org'
1323     selector = '/'
1324     if args[0:]: host = args[0]
1325     if args[1:]: selector = args[1]
1326     h = HTTP()
1327     h.set_debuglevel(dl)
1328     h.connect(host)
1329     h.putrequest('GET', selector)
1330     h.endheaders()
1331     status, reason, headers = h.getreply()
1332     print 'status =', status
1333     print 'reason =', reason
1334     print "read", len(h.getfile().read())
1335     print
1336     if headers:
1337         for header in headers.headers: print header.strip()
1338     print
1339
1340     # minimal test that code to extract host from url works
1341     class HTTP11(HTTP):
1342         _http_vsn = 11
1343         _http_vsn_str = 'HTTP/1.1'
1344
1345     h = HTTP11('www.python.org')
1346     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1347     h.endheaders()
1348     h.getreply()
1349     h.close()
1350
1351     try:
1352         import ssl
1353     except ImportError:
1354         pass
1355     else:
1356
1357         for host, selector in (('sourceforge.net', '/projects/python'),
1358                                ):
1359             print "https://%s%s" % (host, selector)
1360             hs = HTTPS()
1361             hs.set_debuglevel(dl)
1362             hs.connect(host)
1363             hs.putrequest('GET', selector)
1364             hs.endheaders()
1365             status, reason, headers = hs.getreply()
1366             print 'status =', status
1367             print 'reason =', reason
1368             print "read", len(hs.getfile().read())
1369             print
1370             if headers:
1371                 for header in headers.headers: print header.strip()
1372             print
1373
1374 if __name__ == '__main__':
1375     test()