Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 import socket
  70 from sys import py3kwarning
  71 from urlparse import urlsplit
  72 import warnings
  73 with warnings.catch_warnings():
  74     if py3kwarning:
  75         warnings.filterwarnings("ignore", ".*mimetools has been removed",
  76                                 DeprecationWarning)
  77     import mimetools
  78
  79 try:
  80     from cStringIO import StringIO
  81 except ImportError:
  82     from StringIO import StringIO
  83
  84 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  85            "HTTPException", "NotConnected", "UnknownProtocol",
  86            "UnknownTransferEncoding", "UnimplementedFileMode",
  87            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  88            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  89            "BadStatusLine", "error", "responses"]
  90
  91 HTTP_PORT = 80
  92 HTTPS_PORT = 443
  93
  94 _UNKNOWN = 'UNKNOWN'
  95
  96 # connection states
  97 _CS_IDLE = 'Idle'
  98 _CS_REQ_STARTED = 'Request-started'
  99 _CS_REQ_SENT = 'Request-sent'
 100
 101 # status codes
 102 # informational
 103 CONTINUE = 100
 104 SWITCHING_PROTOCOLS = 101
 105 PROCESSING = 102
 106
 107 # successful
 108 OK = 200
 109 CREATED = 201
 110 ACCEPTED = 202
 111 NON_AUTHORITATIVE_INFORMATION = 203
 112 NO_CONTENT = 204
 113 RESET_CONTENT = 205
 114 PARTIAL_CONTENT = 206
 115 MULTI_STATUS = 207
 116 IM_USED = 226
 117
 118 # redirection
 119 MULTIPLE_CHOICES = 300
 120 MOVED_PERMANENTLY = 301
 121 FOUND = 302
 122 SEE_OTHER = 303
 123 NOT_MODIFIED = 304
 124 USE_PROXY = 305
 125 TEMPORARY_REDIRECT = 307
 126
 127 # client error
 128 BAD_REQUEST = 400
 129 UNAUTHORIZED = 401
 130 PAYMENT_REQUIRED = 402
 131 FORBIDDEN = 403
 132 NOT_FOUND = 404
 133 METHOD_NOT_ALLOWED = 405
 134 NOT_ACCEPTABLE = 406
 135 PROXY_AUTHENTICATION_REQUIRED = 407
 136 REQUEST_TIMEOUT = 408
 137 CONFLICT = 409
 138 GONE = 410
 139 LENGTH_REQUIRED = 411
 140 PRECONDITION_FAILED = 412
 141 REQUEST_ENTITY_TOO_LARGE = 413
 142 REQUEST_URI_TOO_LONG = 414
 143 UNSUPPORTED_MEDIA_TYPE = 415
 144 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 145 EXPECTATION_FAILED = 417
 146 UNPROCESSABLE_ENTITY = 422
 147 LOCKED = 423
 148 FAILED_DEPENDENCY = 424
 149 UPGRADE_REQUIRED = 426
 150
 151 # server error
 152 INTERNAL_SERVER_ERROR = 500
 153 NOT_IMPLEMENTED = 501
 154 BAD_GATEWAY = 502
 155 SERVICE_UNAVAILABLE = 503
 156 GATEWAY_TIMEOUT = 504
 157 HTTP_VERSION_NOT_SUPPORTED = 505
 158 INSUFFICIENT_STORAGE = 507
 159 NOT_EXTENDED = 510
 160
 161 # Mapping status codes to official W3C names
 162 responses = {
 163     100: 'Continue',
 164     101: 'Switching Protocols',
 165
 166     200: 'OK',
 167     201: 'Created',
 168     202: 'Accepted',
 169     203: 'Non-Authoritative Information',
 170     204: 'No Content',
 171     205: 'Reset Content',
 172     206: 'Partial Content',
 173
 174     300: 'Multiple Choices',
 175     301: 'Moved Permanently',
 176     302: 'Found',
 177     303: 'See Other',
 178     304: 'Not Modified',
 179     305: 'Use Proxy',
 180     306: '(Unused)',
 181     307: 'Temporary Redirect',
 182
 183     400: 'Bad Request',
 184     401: 'Unauthorized',
 185     402: 'Payment Required',
 186     403: 'Forbidden',
 187     404: 'Not Found',
 188     405: 'Method Not Allowed',
 189     406: 'Not Acceptable',
 190     407: 'Proxy Authentication Required',
 191     408: 'Request Timeout',
 192     409: 'Conflict',
 193     410: 'Gone',
 194     411: 'Length Required',
 195     412: 'Precondition Failed',
 196     413: 'Request Entity Too Large',
 197     414: 'Request-URI Too Long',
 198     415: 'Unsupported Media Type',
 199     416: 'Requested Range Not Satisfiable',
 200     417: 'Expectation Failed',
 201
 202     500: 'Internal Server Error',
 203     501: 'Not Implemented',
 204     502: 'Bad Gateway',
 205     503: 'Service Unavailable',
 206     504: 'Gateway Timeout',
 207     505: 'HTTP Version Not Supported',
 208 }
 209
 210 # maximal amount of data to read at one time in _safe_read
 211 MAXAMOUNT = 1048576
 212
 213 class HTTPMessage(mimetools.Message):
 214
 215     def addheader(self, key, value):
 216         """Add header for field key handling repeats."""
 217         prev = self.dict.get(key)
 218         if prev is None:
 219             self.dict[key] = value
 220         else:
 221             combined = ", ".join((prev, value))
 222             self.dict[key] = combined
 223
 224     def addcontinue(self, key, more):
 225         """Add more field data from a continuation line."""
 226         prev = self.dict[key]
 227         self.dict[key] = prev + "\n " + more
 228
 229     def readheaders(self):
 230         """Read header lines.
 231
 232         Read header lines up to the entirely blank line that terminates them.
 233         The (normally blank) line that ends the headers is skipped, but not
 234         included in the returned list.  If a non-header line ends the headers,
 235         (which is an error), an attempt is made to backspace over it; it is
 236         never included in the returned list.
 237
 238         The variable self.status is set to the empty string if all went well,
 239         otherwise it is an error message.  The variable self.headers is a
 240         completely uninterpreted list of lines contained in the header (so
 241         printing them will reproduce the header exactly as it appears in the
 242         file).
 243
 244         If multiple header fields with the same name occur, they are combined
 245         according to the rules in RFC 2616 sec 4.2:
 246
 247         Appending each subsequent field-value to the first, each separated
 248         by a comma. The order in which header fields with the same field-name
 249         are received is significant to the interpretation of the combined
 250         field value.
 251         """
 252         # XXX The implementation overrides the readheaders() method of
 253         # rfc822.Message.  The base class design isn't amenable to
 254         # customized behavior here so the method here is a copy of the
 255         # base class code with a few small changes.
 256
 257         self.dict = {}
 258         self.unixfrom = ''
 259         self.headers = hlist = []
 260         self.status = ''
 261         headerseen = ""
 262         firstline = 1
 263         startofline = unread = tell = None
 264         if hasattr(self.fp, 'unread'):
 265             unread = self.fp.unread
 266         elif self.seekable:
 267             tell = self.fp.tell
 268         while True:
 269             if tell:
 270                 try:
 271                     startofline = tell()
 272                 except IOError:
 273                     startofline = tell = None
 274                     self.seekable = 0
 275             line = self.fp.readline()
 276             if not line:
 277                 self.status = 'EOF in headers'
 278                 break
 279             # Skip unix From name time lines
 280             if firstline and line.startswith('From '):
 281                 self.unixfrom = self.unixfrom + line
 282                 continue
 283             firstline = 0
 284             if headerseen and line[0] in ' \t':
 285                 # XXX Not sure if continuation lines are handled properly
 286                 # for http and/or for repeating headers
 287                 # It's a continuation line.
 288                 hlist.append(line)
 289                 self.addcontinue(headerseen, line.strip())
 290                 continue
 291             elif self.iscomment(line):
 292                 # It's a comment.  Ignore it.
 293                 continue
 294             elif self.islast(line):
 295                 # Note! No pushback here!  The delimiter line gets eaten.
 296                 break
 297             headerseen = self.isheader(line)
 298             if headerseen:
 299                 # It's a legal header line, save it.
 300                 hlist.append(line)
 301                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 302                 continue
 303             else:
 304                 # It's not a header line; throw it back and stop here.
 305                 if not self.dict:
 306                     self.status = 'No headers'
 307                 else:
 308                     self.status = 'Non-header line where header expected'
 309                 # Try to undo the read.
 310                 if unread:
 311                     unread(line)
 312                 elif tell:
 313                     self.fp.seek(startofline)
 314                 else:
 315                     self.status = self.status + '; bad seek'
 316                 break
 317
 318 class HTTPResponse:
 319
 320     # strict: If true, raise BadStatusLine if the status line can't be
 321     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 322     # false because it prevents clients from talking to HTTP/0.9
 323     # servers.  Note that a response with a sufficiently corrupted
 324     # status line will look like an HTTP/0.9 response.
 325
 326     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 327
 328     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
 329         if buffering:
 330             # The caller won't be using any sock.recv() calls, so buffering
 331             # is fine and recommendef for performance
 332             self.fp = sock.makefile('rb')
 333         else:
 334             # The buffer size is specified as zero, because the headers of
 335             # the response are read with readline().  If the reads were
 336             # buffered the readline() calls could consume some of the
 337             # response, which make be read via a recv() on the underlying
 338             # socket.
 339             self.fp = sock.makefile('rb', 0)
 340         self.debuglevel = debuglevel
 341         self.strict = strict
 342         self._method = method
 343
 344         self.msg = None
 345
 346         # from the Status-Line of the response
 347         self.version = _UNKNOWN # HTTP-Version
 348         self.status = _UNKNOWN  # Status-Code
 349         self.reason = _UNKNOWN  # Reason-Phrase
 350
 351         self.chunked = _UNKNOWN         # is "chunked" being used?
 352         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 353         self.length = _UNKNOWN          # number of bytes left in response
 354         self.will_close = _UNKNOWN      # conn will close at end of response
 355
 356     def _read_status(self):
 357         # Initialize with Simple-Response defaults
 358         line = self.fp.readline()
 359         if self.debuglevel > 0:
 360             print "reply:", repr(line)
 361         if not line:
 362             # Presumably, the server closed the connection before
 363             # sending a valid response.
 364             raise BadStatusLine(line)
 365         try:
 366             [version, status, reason] = line.split(None, 2)
 367         except ValueError:
 368             try:
 369                 [version, status] = line.split(None, 1)
 370                 reason = ""
 371             except ValueError:
 372                 # empty version will cause next test to fail and status
 373                 # will be treated as 0.9 response.
 374                 version = ""
 375         if not version.startswith('HTTP/'):
 376             if self.strict:
 377                 self.close()
 378                 raise BadStatusLine(line)
 379             else:
 380                 # assume it's a Simple-Response from an 0.9 server
 381                 self.fp = LineAndFileWrapper(line, self.fp)
 382                 return "HTTP/0.9", 200, ""
 383
 384         # The status code is a three-digit number
 385         try:
 386             status = int(status)
 387             if status < 100 or status > 999:
 388                 raise BadStatusLine(line)
 389         except ValueError:
 390             raise BadStatusLine(line)
 391         return version, status, reason
 392
 393     def begin(self):
 394         if self.msg is not None:
 395             # we've already started reading the response
 396             return
 397
 398         # read until we get a non-100 response
 399         while True:
 400             version, status, reason = self._read_status()
 401             if status != CONTINUE:
 402                 break
 403             # skip the header from the 100 response
 404             while True:
 405                 skip = self.fp.readline().strip()
 406                 if not skip:
 407                     break
 408                 if self.debuglevel > 0:
 409                     print "header:", skip
 410
 411         self.status = status
 412         self.reason = reason.strip()
 413         if version == 'HTTP/1.0':
 414             self.version = 10
 415         elif version.startswith('HTTP/1.'):
 416             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 417         elif version == 'HTTP/0.9':
 418             self.version = 9
 419         else:
 420             raise UnknownProtocol(version)
 421
 422         if self.version == 9:
 423             self.length = None
 424             self.chunked = 0
 425             self.will_close = 1
 426             self.msg = HTTPMessage(StringIO())
 427             return
 428
 429         self.msg = HTTPMessage(self.fp, 0)
 430         if self.debuglevel > 0:
 431             for hdr in self.msg.headers:
 432                 print "header:", hdr,
 433
 434         # don't let the msg keep an fp
 435         self.msg.fp = None
 436
 437         # are we using the chunked-style of transfer encoding?
 438         tr_enc = self.msg.getheader('transfer-encoding')
 439         if tr_enc and tr_enc.lower() == "chunked":
 440             self.chunked = 1
 441             self.chunk_left = None
 442         else:
 443             self.chunked = 0
 444
 445         # will the connection close at the end of the response?
 446         self.will_close = self._check_close()
 447
 448         # do we have a Content-Length?
 449         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 450         length = self.msg.getheader('content-length')
 451         if length and not self.chunked:
 452             try:
 453                 self.length = int(length)
 454             except ValueError:
 455                 self.length = None
 456             else:
 457                 if self.length < 0:  # ignore nonsensical negative lengths
 458                     self.length = None
 459         else:
 460             self.length = None
 461
 462         # does the body have a fixed length? (of zero)
 463         if (status == NO_CONTENT or status == NOT_MODIFIED or
 464             100 <= status < 200 or      # 1xx codes
 465             self._method == 'HEAD'):
 466             self.length = 0
 467
 468         # if the connection remains open, and we aren't using chunked, and
 469         # a content-length was not provided, then assume that the connection
 470         # WILL close.
 471         if not self.will_close and \
 472            not self.chunked and \
 473            self.length is None:
 474             self.will_close = 1
 475
 476     def _check_close(self):
 477         conn = self.msg.getheader('connection')
 478         if self.version == 11:
 479             # An HTTP/1.1 proxy is assumed to stay open unless
 480             # explicitly closed.
 481             conn = self.msg.getheader('connection')
 482             if conn and "close" in conn.lower():
 483                 return True
 484             return False
 485
 486         # Some HTTP/1.0 implementations have support for persistent
 487         # connections, using rules different than HTTP/1.1.
 488
 489         # For older HTTP, Keep-Alive indicates persistent connection.
 490         if self.msg.getheader('keep-alive'):
 491             return False
 492
 493         # At least Akamai returns a "Connection: Keep-Alive" header,
 494         # which was supposed to be sent by the client.
 495         if conn and "keep-alive" in conn.lower():
 496             return False
 497
 498         # Proxy-Connection is a netscape hack.
 499         pconn = self.msg.getheader('proxy-connection')
 500         if pconn and "keep-alive" in pconn.lower():
 501             return False
 502
 503         # otherwise, assume it will close
 504         return True
 505
 506     def close(self):
 507         if self.fp:
 508             self.fp.close()
 509             self.fp = None
 510
 511     def isclosed(self):
 512         # NOTE: it is possible that we will not ever call self.close(). This
 513         #       case occurs when will_close is TRUE, length is None, and we
 514         #       read up to the last byte, but NOT past it.
 515         #
 516         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 517         #          called, meaning self.isclosed() is meaningful.
 518         return self.fp is None
 519
 520     # XXX It would be nice to have readline and __iter__ for this, too.
 521
 522     def read(self, amt=None):
 523         if self.fp is None:
 524             return ''
 525
 526         if self.chunked:
 527             return self._read_chunked(amt)
 528
 529         if amt is None:
 530             # unbounded read
 531             if self.length is None:
 532                 s = self.fp.read()
 533             else:
 534                 s = self._safe_read(self.length)
 535                 self.length = 0
 536             self.close()        # we read everything
 537             return s
 538
 539         if self.length is not None:
 540             if amt > self.length:
 541                 # clip the read to the "end of response"
 542                 amt = self.length
 543
 544         # we do not use _safe_read() here because this may be a .will_close
 545         # connection, and the user is reading more bytes than will be provided
 546         # (for example, reading in 1k chunks)
 547         s = self.fp.read(amt)
 548         if self.length is not None:
 549             self.length -= len(s)
 550             if not self.length:
 551                 self.close()
 552         return s
 553
 554     def _read_chunked(self, amt):
 555         assert self.chunked != _UNKNOWN
 556         chunk_left = self.chunk_left
 557         value = ''
 558
 559         # XXX This accumulates chunks by repeated string concatenation,
 560         # which is not efficient as the number or size of chunks gets big.
 561         while True:
 562             if chunk_left is None:
 563                 line = self.fp.readline()
 564                 i = line.find(';')
 565                 if i >= 0:
 566                     line = line[:i] # strip chunk-extensions
 567                 try:
 568                     chunk_left = int(line, 16)
 569                 except ValueError:
 570                     # close the connection as protocol synchronisation is
 571                     # probably lost
 572                     self.close()
 573                     raise IncompleteRead(value)
 574                 if chunk_left == 0:
 575                     break
 576             if amt is None:
 577                 value += self._safe_read(chunk_left)
 578             elif amt < chunk_left:
 579                 value += self._safe_read(amt)
 580                 self.chunk_left = chunk_left - amt
 581                 return value
 582             elif amt == chunk_left:
 583                 value += self._safe_read(amt)
 584                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 585                 self.chunk_left = None
 586                 return value
 587             else:
 588                 value += self._safe_read(chunk_left)
 589                 amt -= chunk_left
 590
 591             # we read the whole chunk, get another
 592             self._safe_read(2)      # toss the CRLF at the end of the chunk
 593             chunk_left = None
 594
 595         # read and discard trailer up to the CRLF terminator
 596         ### note: we shouldn't have any trailers!
 597         while True:
 598             line = self.fp.readline()
 599             if not line:
 600                 # a vanishingly small number of sites EOF without
 601                 # sending the trailer
 602                 break
 603             if line == '\r\n':
 604                 break
 605
 606         # we read everything; close the "file"
 607         self.close()
 608
 609         return value
 610
 611     def _safe_read(self, amt):
 612         """Read the number of bytes requested, compensating for partial reads.
 613
 614         Normally, we have a blocking socket, but a read() can be interrupted
 615         by a signal (resulting in a partial read).
 616
 617         Note that we cannot distinguish between EOF and an interrupt when zero
 618         bytes have been read. IncompleteRead() will be raised in this
 619         situation.
 620
 621         This function should be used when <amt> bytes "should" be present for
 622         reading. If the bytes are truly not available (due to EOF), then the
 623         IncompleteRead exception can be used to detect the problem.
 624         """
 625         s = []
 626         while amt > 0:
 627             chunk = self.fp.read(min(amt, MAXAMOUNT))
 628             if not chunk:
 629                 raise IncompleteRead(''.join(s), amt)
 630             s.append(chunk)
 631             amt -= len(chunk)
 632         return ''.join(s)
 633
 634     def getheader(self, name, default=None):
 635         if self.msg is None:
 636             raise ResponseNotReady()
 637         return self.msg.getheader(name, default)
 638
 639     def getheaders(self):
 640         """Return list of (header, value) tuples."""
 641         if self.msg is None:
 642             raise ResponseNotReady()
 643         return self.msg.items()
 644
 645
 646 class HTTPConnection:
 647
 648     _http_vsn = 11
 649     _http_vsn_str = 'HTTP/1.1'
 650
 651     response_class = HTTPResponse
 652     default_port = HTTP_PORT
 653     auto_open = 1
 654     debuglevel = 0
 655     strict = 0
 656
 657     def __init__(self, host, port=None, strict=None,
 658                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 659         self.timeout = timeout
 660         self.sock = None
 661         self._buffer = []
 662         self.__response = None
 663         self.__state = _CS_IDLE
 664         self._method = None
 665         self._tunnel_host = None
 666         self._tunnel_port = None
 667
 668         self._set_hostport(host, port)
 669         if strict is not None:
 670             self.strict = strict
 671
 672     def set_tunnel(self, host, port=None):
 673         """ Sets up the host and the port for the HTTP CONNECT Tunnelling."""
 674         self._tunnel_host = host
 675         self._tunnel_port = port
 676
 677     def _set_hostport(self, host, port):
 678         if port is None:
 679             i = host.rfind(':')
 680             j = host.rfind(']')         # ipv6 addresses have [...]
 681             if i > j:
 682                 try:
 683                     port = int(host[i+1:])
 684                 except ValueError:
 685                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 686                 host = host[:i]
 687             else:
 688                 port = self.default_port
 689             if host and host[0] == '[' and host[-1] == ']':
 690                 host = host[1:-1]
 691         self.host = host
 692         self.port = port
 693
 694     def set_debuglevel(self, level):
 695         self.debuglevel = level
 696
 697     def _tunnel(self):
 698         self._set_hostport(self._tunnel_host, self._tunnel_port)
 699         self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
 700         response = self.response_class(self.sock, strict = self.strict,
 701                                        method = self._method)
 702         (version, code, message) = response._read_status()
 703
 704         if code != 200:
 705             self.close()
 706             raise socket.error, "Tunnel connection failed: %d %s" % (code,
 707                                                                      message.strip())
 708         while True:
 709             line = response.fp.readline()
 710             if line == '\r\n': break
 711
 712
 713     def connect(self):
 714         """Connect to the host and port specified in __init__."""
 715         self.sock = socket.create_connection((self.host,self.port),
 716                                              self.timeout)
 717
 718         if self._tunnel_host:
 719             self._tunnel()
 720
 721     def close(self):
 722         """Close the connection to the HTTP server."""
 723         if self.sock:
 724             self.sock.close()   # close it manually... there may be other refs
 725             self.sock = None
 726         if self.__response:
 727             self.__response.close()
 728             self.__response = None
 729         self.__state = _CS_IDLE
 730
 731     def send(self, str):
 732         """Send `str' to the server."""
 733         if self.sock is None:
 734             if self.auto_open:
 735                 self.connect()
 736             else:
 737                 raise NotConnected()
 738
 739         # send the data to the server. if we get a broken pipe, then close
 740         # the socket. we want to reconnect when somebody tries to send again.
 741         #
 742         # NOTE: we DO propagate the error, though, because we cannot simply
 743         #       ignore the error... the caller will know if they can retry.
 744         if self.debuglevel > 0:
 745             print "send:", repr(str)
 746         try:
 747             blocksize=8192
 748             if hasattr(str,'read') :
 749                 if self.debuglevel > 0: print "sendIng a read()able"
 750                 data=str.read(blocksize)
 751                 while data:
 752                     self.sock.sendall(data)
 753                     data=str.read(blocksize)
 754             else:
 755                 self.sock.sendall(str)
 756         except socket.error, v:
 757             if v[0] == 32:      # Broken pipe
 758                 self.close()
 759             raise
 760
 761     def _output(self, s):
 762         """Add a line of output to the current request buffer.
 763
 764         Assumes that the line does *not* end with \\r\\n.
 765         """
 766         self._buffer.append(s)
 767
 768     def _send_output(self, message_body=None):
 769         """Send the currently buffered request and clear the buffer.
 770
 771         Appends an extra \\r\\n to the buffer.
 772         A message_body may be specified, to be appended to the request.
 773         """
 774         self._buffer.extend(("", ""))
 775         msg = "\r\n".join(self._buffer)
 776         del self._buffer[:]
 777         # If msg and message_body are sent in a single send() call,
 778         # it will avoid performance problems caused by the interaction
 779         # between delayed ack and the Nagle algorithim.
 780         if isinstance(message_body, str):
 781             msg += message_body
 782             message_body = None
 783         self.send(msg)
 784         if message_body is not None:
 785             #message_body was not a string (i.e. it is a file) and
 786             #we must run the risk of Nagle
 787             self.send(message_body)
 788
 789     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 790         """Send a request to the server.
 791
 792         `method' specifies an HTTP request method, e.g. 'GET'.
 793         `url' specifies the object being requested, e.g. '/index.html'.
 794         `skip_host' if True does not add automatically a 'Host:' header
 795         `skip_accept_encoding' if True does not add automatically an
 796            'Accept-Encoding:' header
 797         """
 798
 799         # if a prior response has been completed, then forget about it.
 800         if self.__response and self.__response.isclosed():
 801             self.__response = None
 802
 803
 804         # in certain cases, we cannot issue another request on this connection.
 805         # this occurs when:
 806         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 807         #   2) a response to a previous request has signalled that it is going
 808         #      to close the connection upon completion.
 809         #   3) the headers for the previous response have not been read, thus
 810         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 811         #
 812         # if there is no prior response, then we can request at will.
 813         #
 814         # if point (2) is true, then we will have passed the socket to the
 815         # response (effectively meaning, "there is no prior response"), and
 816         # will open a new one when a new request is made.
 817         #
 818         # Note: if a prior response exists, then we *can* start a new request.
 819         #       We are not allowed to begin fetching the response to this new
 820         #       request, however, until that prior response is complete.
 821         #
 822         if self.__state == _CS_IDLE:
 823             self.__state = _CS_REQ_STARTED
 824         else:
 825             raise CannotSendRequest()
 826
 827         # Save the method we use, we need it later in the response phase
 828         self._method = method
 829         if not url:
 830             url = '/'
 831         str = '%s %s %s' % (method, url, self._http_vsn_str)
 832
 833         self._output(str)
 834
 835         if self._http_vsn == 11:
 836             # Issue some standard headers for better HTTP/1.1 compliance
 837
 838             if not skip_host:
 839                 # this header is issued *only* for HTTP/1.1
 840                 # connections. more specifically, this means it is
 841                 # only issued when the client uses the new
 842                 # HTTPConnection() class. backwards-compat clients
 843                 # will be using HTTP/1.0 and those clients may be
 844                 # issuing this header themselves. we should NOT issue
 845                 # it twice; some web servers (such as Apache) barf
 846                 # when they see two Host: headers
 847
 848                 # If we need a non-standard port,include it in the
 849                 # header.  If the request is going through a proxy,
 850                 # but the host of the actual URL, not the host of the
 851                 # proxy.
 852
 853                 netloc = ''
 854                 if url.startswith('http'):
 855                     nil, netloc, nil, nil, nil = urlsplit(url)
 856
 857                 if netloc:
 858                     try:
 859                         netloc_enc = netloc.encode("ascii")
 860                     except UnicodeEncodeError:
 861                         netloc_enc = netloc.encode("idna")
 862                     self.putheader('Host', netloc_enc)
 863                 else:
 864                     try:
 865                         host_enc = self.host.encode("ascii")
 866                     except UnicodeEncodeError:
 867                         host_enc = self.host.encode("idna")
 868                     if self.port == self.default_port:
 869                         self.putheader('Host', host_enc)
 870                     else:
 871                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 872
 873             # note: we are assuming that clients will not attempt to set these
 874             #       headers since *this* library must deal with the
 875             #       consequences. this also means that when the supporting
 876             #       libraries are updated to recognize other forms, then this
 877             #       code should be changed (removed or updated).
 878
 879             # we only want a Content-Encoding of "identity" since we don't
 880             # support encodings such as x-gzip or x-deflate.
 881             if not skip_accept_encoding:
 882                 self.putheader('Accept-Encoding', 'identity')
 883
 884             # we can accept "chunked" Transfer-Encodings, but no others
 885             # NOTE: no TE header implies *only* "chunked"
 886             #self.putheader('TE', 'chunked')
 887
 888             # if TE is supplied in the header, then it must appear in a
 889             # Connection header.
 890             #self.putheader('Connection', 'TE')
 891
 892         else:
 893             # For HTTP/1.0, the server will assume "not chunked"
 894             pass
 895
 896     def putheader(self, header, *values):
 897         """Send a request header line to the server.
 898
 899         For example: h.putheader('Accept', 'text/html')
 900         """
 901         if self.__state != _CS_REQ_STARTED:
 902             raise CannotSendHeader()
 903
 904         str = '%s: %s' % (header, '\r\n\t'.join(values))
 905         self._output(str)
 906
 907     def endheaders(self, message_body=None):
 908         """Indicate that the last header line has been sent to the server.
 909
 910         This method sends the request to the server.  The optional
 911         message_body argument can be used to pass message body
 912         associated with the request.  The message body will be sent in
 913         the same packet as the message headers if possible.  The
 914         message_body should be a string.
 915         """
 916         if self.__state == _CS_REQ_STARTED:
 917             self.__state = _CS_REQ_SENT
 918         else:
 919             raise CannotSendHeader()
 920         self._send_output(message_body)
 921
 922     def request(self, method, url, body=None, headers={}):
 923         """Send a complete request to the server."""
 924
 925         try:
 926             self._send_request(method, url, body, headers)
 927         except socket.error, v:
 928             # trap 'Broken pipe' if we're allowed to automatically reconnect
 929             if v[0] != 32 or not self.auto_open:
 930                 raise
 931             # try one more time
 932             self._send_request(method, url, body, headers)
 933
 934     def _set_content_length(self, body):
 935         # Set the content-length based on the body.
 936         thelen = None
 937         try:
 938             thelen = str(len(body))
 939         except TypeError, te:
 940             # If this is a file-like object, try to
 941             # fstat its file descriptor
 942             import os
 943             try:
 944                 thelen = str(os.fstat(body.fileno()).st_size)
 945             except (AttributeError, OSError):
 946                 # Don't send a length if this failed
 947                 if self.debuglevel > 0: print "Cannot stat!!"
 948
 949         if thelen is not None:
 950             self.putheader('Content-Length', thelen)
 951
 952     def _send_request(self, method, url, body, headers):
 953         # honour explicitly requested Host: and Accept-Encoding headers
 954         header_names = dict.fromkeys([k.lower() for k in headers])
 955         skips = {}
 956         if 'host' in header_names:
 957             skips['skip_host'] = 1
 958         if 'accept-encoding' in header_names:
 959             skips['skip_accept_encoding'] = 1
 960
 961         self.putrequest(method, url, **skips)
 962
 963         if body and ('content-length' not in header_names):
 964             self._set_content_length(body)
 965         for hdr, value in headers.iteritems():
 966             self.putheader(hdr, value)
 967         self.endheaders(body)
 968
 969     def getresponse(self, buffering=False):
 970         "Get the response from the server."
 971
 972         # if a prior response has been completed, then forget about it.
 973         if self.__response and self.__response.isclosed():
 974             self.__response = None
 975
 976         #
 977         # if a prior response exists, then it must be completed (otherwise, we
 978         # cannot read this response's header to determine the connection-close
 979         # behavior)
 980         #
 981         # note: if a prior response existed, but was connection-close, then the
 982         # socket and response were made independent of this HTTPConnection
 983         # object since a new request requires that we open a whole new
 984         # connection
 985         #
 986         # this means the prior response had one of two states:
 987         #   1) will_close: this connection was reset and the prior socket and
 988         #                  response operate independently
 989         #   2) persistent: the response was retained and we await its
 990         #                  isclosed() status to become true.
 991         #
 992         if self.__state != _CS_REQ_SENT or self.__response:
 993             raise ResponseNotReady()
 994
 995         args = (self.sock,)
 996         kwds = {"strict":self.strict, "method":self._method}
 997         if self.debuglevel > 0:
 998             args += (self.debuglevel,)
 999         if buffering:
1000             #only add this keyword if non-default, for compatibility with
1001             #other response_classes.
1002             kwds["buffering"] = True;
1003         response = self.response_class(*args, **kwds)
1004
1005         response.begin()
1006         assert response.will_close != _UNKNOWN
1007         self.__state = _CS_IDLE
1008
1009         if response.will_close:
1010             # this effectively passes the connection to the response
1011             self.close()
1012         else:
1013             # remember this, so we can tell when it is complete
1014             self.__response = response
1015
1016         return response
1017
1018
1019 class HTTP:
1020     "Compatibility class with httplib.py from 1.5."
1021
1022     _http_vsn = 10
1023     _http_vsn_str = 'HTTP/1.0'
1024
1025     debuglevel = 0
1026
1027     _connection_class = HTTPConnection
1028
1029     def __init__(self, host='', port=None, strict=None):
1030         "Provide a default host, since the superclass requires one."
1031
1032         # some joker passed 0 explicitly, meaning default port
1033         if port == 0:
1034             port = None
1035
1036         # Note that we may pass an empty string as the host; this will throw
1037         # an error when we attempt to connect. Presumably, the client code
1038         # will call connect before then, with a proper host.
1039         self._setup(self._connection_class(host, port, strict))
1040
1041     def _setup(self, conn):
1042         self._conn = conn
1043
1044         # set up delegation to flesh out interface
1045         self.send = conn.send
1046         self.putrequest = conn.putrequest
1047         self.putheader = conn.putheader
1048         self.endheaders = conn.endheaders
1049         self.set_debuglevel = conn.set_debuglevel
1050
1051         conn._http_vsn = self._http_vsn
1052         conn._http_vsn_str = self._http_vsn_str
1053
1054         self.file = None
1055
1056     def connect(self, host=None, port=None):
1057         "Accept arguments to set the host/port, since the superclass doesn't."
1058
1059         if host is not None:
1060             self._conn._set_hostport(host, port)
1061         self._conn.connect()
1062
1063     def getfile(self):
1064         "Provide a getfile, since the superclass' does not use this concept."
1065         return self.file
1066
1067     def getreply(self, buffering=False):
1068         """Compat definition since superclass does not define it.
1069
1070         Returns a tuple consisting of:
1071         - server status code (e.g. '200' if all goes well)
1072         - server "reason" corresponding to status code
1073         - any RFC822 headers in the response from the server
1074         """
1075         try:
1076             if not buffering:
1077                 response = self._conn.getresponse()
1078             else:
1079                 #only add this keyword if non-default for compatibility
1080                 #with other connection classes
1081                 response = self._conn.getresponse(buffering)
1082         except BadStatusLine, e:
1083             ### hmm. if getresponse() ever closes the socket on a bad request,
1084             ### then we are going to have problems with self.sock
1085
1086             ### should we keep this behavior? do people use it?
1087             # keep the socket open (as a file), and return it
1088             self.file = self._conn.sock.makefile('rb', 0)
1089
1090             # close our socket -- we want to restart after any protocol error
1091             self.close()
1092
1093             self.headers = None
1094             return -1, e.line, None
1095
1096         self.headers = response.msg
1097         self.file = response.fp
1098         return response.status, response.reason, response.msg
1099
1100     def close(self):
1101         self._conn.close()
1102
1103         # note that self.file == response.fp, which gets closed by the
1104         # superclass. just clear the object ref here.
1105         ### hmm. messy. if status==-1, then self.file is owned by us.
1106         ### well... we aren't explicitly closing, but losing this ref will
1107         ### do it
1108         self.file = None
1109
1110 try:
1111     import ssl
1112 except ImportError:
1113     pass
1114 else:
1115     class HTTPSConnection(HTTPConnection):
1116         "This class allows communication via SSL."
1117
1118         default_port = HTTPS_PORT
1119
1120         def __init__(self, host, port=None, key_file=None, cert_file=None,
1121                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1122             HTTPConnection.__init__(self, host, port, strict, timeout)
1123             self.key_file = key_file
1124             self.cert_file = cert_file
1125
1126         def connect(self):
1127             "Connect to a host on a given (SSL) port."
1128
1129             sock = socket.create_connection((self.host, self.port), self.timeout)
1130             if self._tunnel_host:
1131                 self.sock = sock
1132                 self._tunnel()
1133             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1134
1135     __all__.append("HTTPSConnection")
1136
1137     class HTTPS(HTTP):
1138         """Compatibility with 1.5 httplib interface
1139
1140         Python 1.5.2 did not have an HTTPS class, but it defined an
1141         interface for sending http requests that is also useful for
1142         https.
1143         """
1144
1145         _connection_class = HTTPSConnection
1146
1147         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1148                      strict=None):
1149             # provide a default host, pass the X509 cert info
1150
1151             # urf. compensate for bad input.
1152             if port == 0:
1153                 port = None
1154             self._setup(self._connection_class(host, port, key_file,
1155                                                cert_file, strict))
1156
1157             # we never actually use these for anything, but we keep them
1158             # here for compatibility with post-1.5.2 CVS.
1159             self.key_file = key_file
1160             self.cert_file = cert_file
1161
1162
1163     def FakeSocket (sock, sslobj):
1164         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1165                       "Use the result of ssl.wrap_socket() directly instead.",
1166                       DeprecationWarning, stacklevel=2)
1167         return sslobj
1168
1169
1170 class HTTPException(Exception):
1171     # Subclasses that define an __init__ must call Exception.__init__
1172     # or define self.args.  Otherwise, str() will fail.
1173     pass
1174
1175 class NotConnected(HTTPException):
1176     pass
1177
1178 class InvalidURL(HTTPException):
1179     pass
1180
1181 class UnknownProtocol(HTTPException):
1182     def __init__(self, version):
1183         self.args = version,
1184         self.version = version
1185
1186 class UnknownTransferEncoding(HTTPException):
1187     pass
1188
1189 class UnimplementedFileMode(HTTPException):
1190     pass
1191
1192 class IncompleteRead(HTTPException):
1193     def __init__(self, partial, expected=None):
1194         self.args = partial,
1195         self.partial = partial
1196         self.expected = expected
1197     def __repr__(self):
1198         if self.expected is not None:
1199             e = ', %i more expected' % self.expected
1200         else:
1201             e = ''
1202         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1203     def __str__(self):
1204         return repr(self)
1205
1206 class ImproperConnectionState(HTTPException):
1207     pass
1208
1209 class CannotSendRequest(ImproperConnectionState):
1210     pass
1211
1212 class CannotSendHeader(ImproperConnectionState):
1213     pass
1214
1215 class ResponseNotReady(ImproperConnectionState):
1216     pass
1217
1218 class BadStatusLine(HTTPException):
1219     def __init__(self, line):
1220         self.args = line,
1221         self.line = line
1222
1223 # for backwards compatibility
1224 error = HTTPException
1225
1226 class LineAndFileWrapper:
1227     """A limited file-like object for HTTP/0.9 responses."""
1228
1229     # The status-line parsing code calls readline(), which normally
1230     # get the HTTP status line.  For a 0.9 response, however, this is
1231     # actually the first line of the body!  Clients need to get a
1232     # readable file object that contains that line.
1233
1234     def __init__(self, line, file):
1235         self._line = line
1236         self._file = file
1237         self._line_consumed = 0
1238         self._line_offset = 0
1239         self._line_left = len(line)
1240
1241     def __getattr__(self, attr):
1242         return getattr(self._file, attr)
1243
1244     def _done(self):
1245         # called when the last byte is read from the line.  After the
1246         # call, all read methods are delegated to the underlying file
1247         # object.
1248         self._line_consumed = 1
1249         self.read = self._file.read
1250         self.readline = self._file.readline
1251         self.readlines = self._file.readlines
1252
1253     def read(self, amt=None):
1254         if self._line_consumed:
1255             return self._file.read(amt)
1256         assert self._line_left
1257         if amt is None or amt > self._line_left:
1258             s = self._line[self._line_offset:]
1259             self._done()
1260             if amt is None:
1261                 return s + self._file.read()
1262             else:
1263                 return s + self._file.read(amt - len(s))
1264         else:
1265             assert amt <= self._line_left
1266             i = self._line_offset
1267             j = i + amt
1268             s = self._line[i:j]
1269             self._line_offset = j
1270             self._line_left -= amt
1271             if self._line_left == 0:
1272                 self._done()
1273             return s
1274
1275     def readline(self):
1276         if self._line_consumed:
1277             return self._file.readline()
1278         assert self._line_left
1279         s = self._line[self._line_offset:]
1280         self._done()
1281         return s
1282
1283     def readlines(self, size=None):
1284         if self._line_consumed:
1285             return self._file.readlines(size)
1286         assert self._line_left
1287         L = [self._line[self._line_offset:]]
1288         self._done()
1289         if size is None:
1290             return L + self._file.readlines()
1291         else:
1292             return L + self._file.readlines(size)
1293
1294 def test():
1295     """Test this module.
1296
1297     A hodge podge of tests collected here, because they have too many
1298     external dependencies for the regular test suite.
1299     """
1300
1301     import sys
1302     import getopt
1303     opts, args = getopt.getopt(sys.argv[1:], 'd')
1304     dl = 0
1305     for o, a in opts:
1306         if o == '-d': dl = dl + 1
1307     host = 'www.python.org'
1308     selector = '/'
1309     if args[0:]: host = args[0]
1310     if args[1:]: selector = args[1]
1311     h = HTTP()
1312     h.set_debuglevel(dl)
1313     h.connect(host)
1314     h.putrequest('GET', selector)
1315     h.endheaders()
1316     status, reason, headers = h.getreply()
1317     print 'status =', status
1318     print 'reason =', reason
1319     print "read", len(h.getfile().read())
1320     print
1321     if headers:
1322         for header in headers.headers: print header.strip()
1323     print
1324
1325     # minimal test that code to extract host from url works
1326     class HTTP11(HTTP):
1327         _http_vsn = 11
1328         _http_vsn_str = 'HTTP/1.1'
1329
1330     h = HTTP11('www.python.org')
1331     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1332     h.endheaders()
1333     h.getreply()
1334     h.close()
1335
1336     try:
1337         import ssl
1338     except ImportError:
1339         pass
1340     else:
1341
1342         for host, selector in (('sourceforge.net', '/projects/python'),
1343                                ):
1344             print "https://%s%s" % (host, selector)
1345             hs = HTTPS()
1346             hs.set_debuglevel(dl)
1347             hs.connect(host)
1348             hs.putrequest('GET', selector)
1349             hs.endheaders()
1350             status, reason, headers = hs.getreply()
1351             print 'status =', status
1352             print 'reason =', reason
1353             print "read", len(hs.getfile().read())
1354             print
1355             if headers:
1356                 for header in headers.headers: print header.strip()
1357             print
1358
1359 if __name__ == '__main__':
1360     test()