Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 import errno
  70 import mimetools
  71 import socket
  72 from urlparse import urlsplit
  73 import warnings
  74
  75 try:
  76     from cStringIO import StringIO
  77 except ImportError:
  78     from StringIO import StringIO
  79
  80 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  81            "HTTPException", "NotConnected", "UnknownProtocol",
  82            "UnknownTransferEncoding", "UnimplementedFileMode",
  83            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  84            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  85            "BadStatusLine", "error", "responses"]
  86
  87 HTTP_PORT = 80
  88 HTTPS_PORT = 443
  89
  90 _UNKNOWN = 'UNKNOWN'
  91
  92 # connection states
  93 _CS_IDLE = 'Idle'
  94 _CS_REQ_STARTED = 'Request-started'
  95 _CS_REQ_SENT = 'Request-sent'
  96
  97 # status codes
  98 # informational
  99 CONTINUE = 100
 100 SWITCHING_PROTOCOLS = 101
 101 PROCESSING = 102
 102
 103 # successful
 104 OK = 200
 105 CREATED = 201
 106 ACCEPTED = 202
 107 NON_AUTHORITATIVE_INFORMATION = 203
 108 NO_CONTENT = 204
 109 RESET_CONTENT = 205
 110 PARTIAL_CONTENT = 206
 111 MULTI_STATUS = 207
 112 IM_USED = 226
 113
 114 # redirection
 115 MULTIPLE_CHOICES = 300
 116 MOVED_PERMANENTLY = 301
 117 FOUND = 302
 118 SEE_OTHER = 303
 119 NOT_MODIFIED = 304
 120 USE_PROXY = 305
 121 TEMPORARY_REDIRECT = 307
 122
 123 # client error
 124 BAD_REQUEST = 400
 125 UNAUTHORIZED = 401
 126 PAYMENT_REQUIRED = 402
 127 FORBIDDEN = 403
 128 NOT_FOUND = 404
 129 METHOD_NOT_ALLOWED = 405
 130 NOT_ACCEPTABLE = 406
 131 PROXY_AUTHENTICATION_REQUIRED = 407
 132 REQUEST_TIMEOUT = 408
 133 CONFLICT = 409
 134 GONE = 410
 135 LENGTH_REQUIRED = 411
 136 PRECONDITION_FAILED = 412
 137 REQUEST_ENTITY_TOO_LARGE = 413
 138 REQUEST_URI_TOO_LONG = 414
 139 UNSUPPORTED_MEDIA_TYPE = 415
 140 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 141 EXPECTATION_FAILED = 417
 142 UNPROCESSABLE_ENTITY = 422
 143 LOCKED = 423
 144 FAILED_DEPENDENCY = 424
 145 UPGRADE_REQUIRED = 426
 146
 147 # server error
 148 INTERNAL_SERVER_ERROR = 500
 149 NOT_IMPLEMENTED = 501
 150 BAD_GATEWAY = 502
 151 SERVICE_UNAVAILABLE = 503
 152 GATEWAY_TIMEOUT = 504
 153 HTTP_VERSION_NOT_SUPPORTED = 505
 154 INSUFFICIENT_STORAGE = 507
 155 NOT_EXTENDED = 510
 156
 157 # Mapping status codes to official W3C names
 158 responses = {
 159     100: 'Continue',
 160     101: 'Switching Protocols',
 161
 162     200: 'OK',
 163     201: 'Created',
 164     202: 'Accepted',
 165     203: 'Non-Authoritative Information',
 166     204: 'No Content',
 167     205: 'Reset Content',
 168     206: 'Partial Content',
 169
 170     300: 'Multiple Choices',
 171     301: 'Moved Permanently',
 172     302: 'Found',
 173     303: 'See Other',
 174     304: 'Not Modified',
 175     305: 'Use Proxy',
 176     306: '(Unused)',
 177     307: 'Temporary Redirect',
 178
 179     400: 'Bad Request',
 180     401: 'Unauthorized',
 181     402: 'Payment Required',
 182     403: 'Forbidden',
 183     404: 'Not Found',
 184     405: 'Method Not Allowed',
 185     406: 'Not Acceptable',
 186     407: 'Proxy Authentication Required',
 187     408: 'Request Timeout',
 188     409: 'Conflict',
 189     410: 'Gone',
 190     411: 'Length Required',
 191     412: 'Precondition Failed',
 192     413: 'Request Entity Too Large',
 193     414: 'Request-URI Too Long',
 194     415: 'Unsupported Media Type',
 195     416: 'Requested Range Not Satisfiable',
 196     417: 'Expectation Failed',
 197
 198     500: 'Internal Server Error',
 199     501: 'Not Implemented',
 200     502: 'Bad Gateway',
 201     503: 'Service Unavailable',
 202     504: 'Gateway Timeout',
 203     505: 'HTTP Version Not Supported',
 204 }
 205
 206 # maximal amount of data to read at one time in _safe_read
 207 MAXAMOUNT = 1048576
 208
 209 class HTTPMessage(mimetools.Message):
 210
 211     def addheader(self, key, value):
 212         """Add header for field key handling repeats."""
 213         prev = self.dict.get(key)
 214         if prev is None:
 215             self.dict[key] = value
 216         else:
 217             combined = ", ".join((prev, value))
 218             self.dict[key] = combined
 219
 220     def addcontinue(self, key, more):
 221         """Add more field data from a continuation line."""
 222         prev = self.dict[key]
 223         self.dict[key] = prev + "\n " + more
 224
 225     def readheaders(self):
 226         """Read header lines.
 227
 228         Read header lines up to the entirely blank line that terminates them.
 229         The (normally blank) line that ends the headers is skipped, but not
 230         included in the returned list.  If a non-header line ends the headers,
 231         (which is an error), an attempt is made to backspace over it; it is
 232         never included in the returned list.
 233
 234         The variable self.status is set to the empty string if all went well,
 235         otherwise it is an error message.  The variable self.headers is a
 236         completely uninterpreted list of lines contained in the header (so
 237         printing them will reproduce the header exactly as it appears in the
 238         file).
 239
 240         If multiple header fields with the same name occur, they are combined
 241         according to the rules in RFC 2616 sec 4.2:
 242
 243         Appending each subsequent field-value to the first, each separated
 244         by a comma. The order in which header fields with the same field-name
 245         are received is significant to the interpretation of the combined
 246         field value.
 247         """
 248         # XXX The implementation overrides the readheaders() method of
 249         # rfc822.Message.  The base class design isn't amenable to
 250         # customized behavior here so the method here is a copy of the
 251         # base class code with a few small changes.
 252
 253         self.dict = {}
 254         self.unixfrom = ''
 255         self.headers = hlist = []
 256         self.status = ''
 257         headerseen = ""
 258         firstline = 1
 259         startofline = unread = tell = None
 260         if hasattr(self.fp, 'unread'):
 261             unread = self.fp.unread
 262         elif self.seekable:
 263             tell = self.fp.tell
 264         while True:
 265             if tell:
 266                 try:
 267                     startofline = tell()
 268                 except IOError:
 269                     startofline = tell = None
 270                     self.seekable = 0
 271             line = self.fp.readline()
 272             if not line:
 273                 self.status = 'EOF in headers'
 274                 break
 275             # Skip unix From name time lines
 276             if firstline and line.startswith('From '):
 277                 self.unixfrom = self.unixfrom + line
 278                 continue
 279             firstline = 0
 280             if headerseen and line[0] in ' \t':
 281                 # XXX Not sure if continuation lines are handled properly
 282                 # for http and/or for repeating headers
 283                 # It's a continuation line.
 284                 hlist.append(line)
 285                 self.addcontinue(headerseen, line.strip())
 286                 continue
 287             elif self.iscomment(line):
 288                 # It's a comment.  Ignore it.
 289                 continue
 290             elif self.islast(line):
 291                 # Note! No pushback here!  The delimiter line gets eaten.
 292                 break
 293             headerseen = self.isheader(line)
 294             if headerseen:
 295                 # It's a legal header line, save it.
 296                 hlist.append(line)
 297                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 298                 continue
 299             else:
 300                 # It's not a header line; throw it back and stop here.
 301                 if not self.dict:
 302                     self.status = 'No headers'
 303                 else:
 304                     self.status = 'Non-header line where header expected'
 305                 # Try to undo the read.
 306                 if unread:
 307                     unread(line)
 308                 elif tell:
 309                     self.fp.seek(startofline)
 310                 else:
 311                     self.status = self.status + '; bad seek'
 312                 break
 313
 314 class HTTPResponse:
 315
 316     # strict: If true, raise BadStatusLine if the status line can't be
 317     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 318     # false because it prevents clients from talking to HTTP/0.9
 319     # servers.  Note that a response with a sufficiently corrupted
 320     # status line will look like an HTTP/0.9 response.
 321
 322     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 323
 324     def __init__(self, sock, debuglevel=0, strict=0, method=None):
 325         self.fp = sock.makefile('rb', 0)
 326         self.debuglevel = debuglevel
 327         self.strict = strict
 328         self._method = method
 329
 330         self.msg = None
 331
 332         # from the Status-Line of the response
 333         self.version = _UNKNOWN # HTTP-Version
 334         self.status = _UNKNOWN  # Status-Code
 335         self.reason = _UNKNOWN  # Reason-Phrase
 336
 337         self.chunked = _UNKNOWN         # is "chunked" being used?
 338         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 339         self.length = _UNKNOWN          # number of bytes left in response
 340         self.will_close = _UNKNOWN      # conn will close at end of response
 341
 342     def _read_status(self):
 343         # Initialize with Simple-Response defaults
 344         line = self.fp.readline()
 345         if self.debuglevel > 0:
 346             print "reply:", repr(line)
 347         if not line:
 348             # Presumably, the server closed the connection before
 349             # sending a valid response.
 350             raise BadStatusLine(line)
 351         try:
 352             [version, status, reason] = line.split(None, 2)
 353         except ValueError:
 354             try:
 355                 [version, status] = line.split(None, 1)
 356                 reason = ""
 357             except ValueError:
 358                 # empty version will cause next test to fail and status
 359                 # will be treated as 0.9 response.
 360                 version = ""
 361         if not version.startswith('HTTP/'):
 362             if self.strict:
 363                 self.close()
 364                 raise BadStatusLine(line)
 365             else:
 366                 # assume it's a Simple-Response from an 0.9 server
 367                 self.fp = LineAndFileWrapper(line, self.fp)
 368                 return "HTTP/0.9", 200, ""
 369
 370         # The status code is a three-digit number
 371         try:
 372             status = int(status)
 373             if status < 100 or status > 999:
 374                 raise BadStatusLine(line)
 375         except ValueError:
 376             raise BadStatusLine(line)
 377         return version, status, reason
 378
 379     def begin(self):
 380         if self.msg is not None:
 381             # we've already started reading the response
 382             return
 383
 384         # read until we get a non-100 response
 385         while True:
 386             version, status, reason = self._read_status()
 387             if status != CONTINUE:
 388                 break
 389             # skip the header from the 100 response
 390             while True:
 391                 skip = self.fp.readline().strip()
 392                 if not skip:
 393                     break
 394                 if self.debuglevel > 0:
 395                     print "header:", skip
 396
 397         self.status = status
 398         self.reason = reason.strip()
 399         if version == 'HTTP/1.0':
 400             self.version = 10
 401         elif version.startswith('HTTP/1.'):
 402             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 403         elif version == 'HTTP/0.9':
 404             self.version = 9
 405         else:
 406             raise UnknownProtocol(version)
 407
 408         if self.version == 9:
 409             self.length = None
 410             self.chunked = 0
 411             self.will_close = 1
 412             self.msg = HTTPMessage(StringIO())
 413             return
 414
 415         self.msg = HTTPMessage(self.fp, 0)
 416         if self.debuglevel > 0:
 417             for hdr in self.msg.headers:
 418                 print "header:", hdr,
 419
 420         # don't let the msg keep an fp
 421         self.msg.fp = None
 422
 423         # are we using the chunked-style of transfer encoding?
 424         tr_enc = self.msg.getheader('transfer-encoding')
 425         if tr_enc and tr_enc.lower() == "chunked":
 426             self.chunked = 1
 427             self.chunk_left = None
 428         else:
 429             self.chunked = 0
 430
 431         # will the connection close at the end of the response?
 432         self.will_close = self._check_close()
 433
 434         # do we have a Content-Length?
 435         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 436         length = self.msg.getheader('content-length')
 437         if length and not self.chunked:
 438             try:
 439                 self.length = int(length)
 440             except ValueError:
 441                 self.length = None
 442         else:
 443             self.length = None
 444
 445         # does the body have a fixed length? (of zero)
 446         if (status == NO_CONTENT or status == NOT_MODIFIED or
 447             100 <= status < 200 or      # 1xx codes
 448             self._method == 'HEAD'):
 449             self.length = 0
 450
 451         # if the connection remains open, and we aren't using chunked, and
 452         # a content-length was not provided, then assume that the connection
 453         # WILL close.
 454         if not self.will_close and \
 455            not self.chunked and \
 456            self.length is None:
 457             self.will_close = 1
 458
 459     def _check_close(self):
 460         conn = self.msg.getheader('connection')
 461         if self.version == 11:
 462             # An HTTP/1.1 proxy is assumed to stay open unless
 463             # explicitly closed.
 464             conn = self.msg.getheader('connection')
 465             if conn and "close" in conn.lower():
 466                 return True
 467             return False
 468
 469         # Some HTTP/1.0 implementations have support for persistent
 470         # connections, using rules different than HTTP/1.1.
 471
 472         # For older HTTP, Keep-Alive indicates persistent connection.
 473         if self.msg.getheader('keep-alive'):
 474             return False
 475
 476         # At least Akamai returns a "Connection: Keep-Alive" header,
 477         # which was supposed to be sent by the client.
 478         if conn and "keep-alive" in conn.lower():
 479             return False
 480
 481         # Proxy-Connection is a netscape hack.
 482         pconn = self.msg.getheader('proxy-connection')
 483         if pconn and "keep-alive" in pconn.lower():
 484             return False
 485
 486         # otherwise, assume it will close
 487         return True
 488
 489     def close(self):
 490         if self.fp:
 491             self.fp.close()
 492             self.fp = None
 493
 494     def isclosed(self):
 495         # NOTE: it is possible that we will not ever call self.close(). This
 496         #       case occurs when will_close is TRUE, length is None, and we
 497         #       read up to the last byte, but NOT past it.
 498         #
 499         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 500         #          called, meaning self.isclosed() is meaningful.
 501         return self.fp is None
 502
 503     # XXX It would be nice to have readline and __iter__ for this, too.
 504
 505     def read(self, amt=None):
 506         if self.fp is None:
 507             return ''
 508
 509         if self.chunked:
 510             return self._read_chunked(amt)
 511
 512         if amt is None:
 513             # unbounded read
 514             if self.length is None:
 515                 s = self.fp.read()
 516             else:
 517                 s = self._safe_read(self.length)
 518                 self.length = 0
 519             self.close()        # we read everything
 520             return s
 521
 522         if self.length is not None:
 523             if amt > self.length:
 524                 # clip the read to the "end of response"
 525                 amt = self.length
 526
 527         # we do not use _safe_read() here because this may be a .will_close
 528         # connection, and the user is reading more bytes than will be provided
 529         # (for example, reading in 1k chunks)
 530         s = self.fp.read(amt)
 531         if self.length is not None:
 532             self.length -= len(s)
 533             if not self.length:
 534                 self.close()
 535         return s
 536
 537     def _read_chunked(self, amt):
 538         assert self.chunked != _UNKNOWN
 539         chunk_left = self.chunk_left
 540         value = ''
 541
 542         # XXX This accumulates chunks by repeated string concatenation,
 543         # which is not efficient as the number or size of chunks gets big.
 544         while True:
 545             if chunk_left is None:
 546                 line = self.fp.readline()
 547                 i = line.find(';')
 548                 if i >= 0:
 549                     line = line[:i] # strip chunk-extensions
 550                 chunk_left = int(line, 16)
 551                 if chunk_left == 0:
 552                     break
 553             if amt is None:
 554                 value += self._safe_read(chunk_left)
 555             elif amt < chunk_left:
 556                 value += self._safe_read(amt)
 557                 self.chunk_left = chunk_left - amt
 558                 return value
 559             elif amt == chunk_left:
 560                 value += self._safe_read(amt)
 561                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 562                 self.chunk_left = None
 563                 return value
 564             else:
 565                 value += self._safe_read(chunk_left)
 566                 amt -= chunk_left
 567
 568             # we read the whole chunk, get another
 569             self._safe_read(2)      # toss the CRLF at the end of the chunk
 570             chunk_left = None
 571
 572         # read and discard trailer up to the CRLF terminator
 573         ### note: we shouldn't have any trailers!
 574         while True:
 575             line = self.fp.readline()
 576             if line == '\r\n':
 577                 break
 578
 579         # we read everything; close the "file"
 580         self.close()
 581
 582         return value
 583
 584     def _safe_read(self, amt):
 585         """Read the number of bytes requested, compensating for partial reads.
 586
 587         Normally, we have a blocking socket, but a read() can be interrupted
 588         by a signal (resulting in a partial read).
 589
 590         Note that we cannot distinguish between EOF and an interrupt when zero
 591         bytes have been read. IncompleteRead() will be raised in this
 592         situation.
 593
 594         This function should be used when <amt> bytes "should" be present for
 595         reading. If the bytes are truly not available (due to EOF), then the
 596         IncompleteRead exception can be used to detect the problem.
 597         """
 598         s = []
 599         while amt > 0:
 600             chunk = self.fp.read(min(amt, MAXAMOUNT))
 601             if not chunk:
 602                 raise IncompleteRead(s)
 603             s.append(chunk)
 604             amt -= len(chunk)
 605         return ''.join(s)
 606
 607     def getheader(self, name, default=None):
 608         if self.msg is None:
 609             raise ResponseNotReady()
 610         return self.msg.getheader(name, default)
 611
 612     def getheaders(self):
 613         """Return list of (header, value) tuples."""
 614         if self.msg is None:
 615             raise ResponseNotReady()
 616         return self.msg.items()
 617
 618
 619 class HTTPConnection:
 620
 621     _http_vsn = 11
 622     _http_vsn_str = 'HTTP/1.1'
 623
 624     response_class = HTTPResponse
 625     default_port = HTTP_PORT
 626     auto_open = 1
 627     debuglevel = 0
 628     strict = 0
 629
 630     def __init__(self, host, port=None, strict=None, timeout=None):
 631         self.timeout = timeout
 632         self.sock = None
 633         self._buffer = []
 634         self.__response = None
 635         self.__state = _CS_IDLE
 636         self._method = None
 637
 638         self._set_hostport(host, port)
 639         if strict is not None:
 640             self.strict = strict
 641
 642     def _set_hostport(self, host, port):
 643         if port is None:
 644             i = host.rfind(':')
 645             j = host.rfind(']')         # ipv6 addresses have [...]
 646             if i > j:
 647                 try:
 648                     port = int(host[i+1:])
 649                 except ValueError:
 650                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 651                 host = host[:i]
 652             else:
 653                 port = self.default_port
 654             if host and host[0] == '[' and host[-1] == ']':
 655                 host = host[1:-1]
 656         self.host = host
 657         self.port = port
 658
 659     def set_debuglevel(self, level):
 660         self.debuglevel = level
 661
 662     def connect(self):
 663         """Connect to the host and port specified in __init__."""
 664         self.sock = socket.create_connection((self.host,self.port),
 665                                              self.timeout)
 666
 667     def close(self):
 668         """Close the connection to the HTTP server."""
 669         if self.sock:
 670             self.sock.close()   # close it manually... there may be other refs
 671             self.sock = None
 672         if self.__response:
 673             self.__response.close()
 674             self.__response = None
 675         self.__state = _CS_IDLE
 676
 677     def send(self, str):
 678         """Send `str' to the server."""
 679         if self.sock is None:
 680             if self.auto_open:
 681                 self.connect()
 682             else:
 683                 raise NotConnected()
 684
 685         # send the data to the server. if we get a broken pipe, then close
 686         # the socket. we want to reconnect when somebody tries to send again.
 687         #
 688         # NOTE: we DO propagate the error, though, because we cannot simply
 689         #       ignore the error... the caller will know if they can retry.
 690         if self.debuglevel > 0:
 691             print "send:", repr(str)
 692         try:
 693             blocksize=8192
 694             if hasattr(str,'read') :
 695                 if self.debuglevel > 0: print "sendIng a read()able"
 696                 data=str.read(blocksize)
 697                 while data:
 698                     self.sock.sendall(data)
 699                     data=str.read(blocksize)
 700             else:
 701                 self.sock.sendall(str)
 702         except socket.error, v:
 703             if v[0] == 32:      # Broken pipe
 704                 self.close()
 705             raise
 706
 707     def _output(self, s):
 708         """Add a line of output to the current request buffer.
 709
 710         Assumes that the line does *not* end with \\r\\n.
 711         """
 712         self._buffer.append(s)
 713
 714     def _send_output(self):
 715         """Send the currently buffered request and clear the buffer.
 716
 717         Appends an extra \\r\\n to the buffer.
 718         """
 719         self._buffer.extend(("", ""))
 720         msg = "\r\n".join(self._buffer)
 721         del self._buffer[:]
 722         self.send(msg)
 723
 724     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 725         """Send a request to the server.
 726
 727         `method' specifies an HTTP request method, e.g. 'GET'.
 728         `url' specifies the object being requested, e.g. '/index.html'.
 729         `skip_host' if True does not add automatically a 'Host:' header
 730         `skip_accept_encoding' if True does not add automatically an
 731            'Accept-Encoding:' header
 732         """
 733
 734         # if a prior response has been completed, then forget about it.
 735         if self.__response and self.__response.isclosed():
 736             self.__response = None
 737
 738
 739         # in certain cases, we cannot issue another request on this connection.
 740         # this occurs when:
 741         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 742         #   2) a response to a previous request has signalled that it is going
 743         #      to close the connection upon completion.
 744         #   3) the headers for the previous response have not been read, thus
 745         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 746         #
 747         # if there is no prior response, then we can request at will.
 748         #
 749         # if point (2) is true, then we will have passed the socket to the
 750         # response (effectively meaning, "there is no prior response"), and
 751         # will open a new one when a new request is made.
 752         #
 753         # Note: if a prior response exists, then we *can* start a new request.
 754         #       We are not allowed to begin fetching the response to this new
 755         #       request, however, until that prior response is complete.
 756         #
 757         if self.__state == _CS_IDLE:
 758             self.__state = _CS_REQ_STARTED
 759         else:
 760             raise CannotSendRequest()
 761
 762         # Save the method we use, we need it later in the response phase
 763         self._method = method
 764         if not url:
 765             url = '/'
 766         str = '%s %s %s' % (method, url, self._http_vsn_str)
 767
 768         self._output(str)
 769
 770         if self._http_vsn == 11:
 771             # Issue some standard headers for better HTTP/1.1 compliance
 772
 773             if not skip_host:
 774                 # this header is issued *only* for HTTP/1.1
 775                 # connections. more specifically, this means it is
 776                 # only issued when the client uses the new
 777                 # HTTPConnection() class. backwards-compat clients
 778                 # will be using HTTP/1.0 and those clients may be
 779                 # issuing this header themselves. we should NOT issue
 780                 # it twice; some web servers (such as Apache) barf
 781                 # when they see two Host: headers
 782
 783                 # If we need a non-standard port,include it in the
 784                 # header.  If the request is going through a proxy,
 785                 # but the host of the actual URL, not the host of the
 786                 # proxy.
 787
 788                 netloc = ''
 789                 if url.startswith('http'):
 790                     nil, netloc, nil, nil, nil = urlsplit(url)
 791
 792                 if netloc:
 793                     try:
 794                         netloc_enc = netloc.encode("ascii")
 795                     except UnicodeEncodeError:
 796                         netloc_enc = netloc.encode("idna")
 797                     self.putheader('Host', netloc_enc)
 798                 else:
 799                     try:
 800                         host_enc = self.host.encode("ascii")
 801                     except UnicodeEncodeError:
 802                         host_enc = self.host.encode("idna")
 803                     if self.port == HTTP_PORT:
 804                         self.putheader('Host', host_enc)
 805                     else:
 806                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 807
 808             # note: we are assuming that clients will not attempt to set these
 809             #       headers since *this* library must deal with the
 810             #       consequences. this also means that when the supporting
 811             #       libraries are updated to recognize other forms, then this
 812             #       code should be changed (removed or updated).
 813
 814             # we only want a Content-Encoding of "identity" since we don't
 815             # support encodings such as x-gzip or x-deflate.
 816             if not skip_accept_encoding:
 817                 self.putheader('Accept-Encoding', 'identity')
 818
 819             # we can accept "chunked" Transfer-Encodings, but no others
 820             # NOTE: no TE header implies *only* "chunked"
 821             #self.putheader('TE', 'chunked')
 822
 823             # if TE is supplied in the header, then it must appear in a
 824             # Connection header.
 825             #self.putheader('Connection', 'TE')
 826
 827         else:
 828             # For HTTP/1.0, the server will assume "not chunked"
 829             pass
 830
 831     def putheader(self, header, value):
 832         """Send a request header line to the server.
 833
 834         For example: h.putheader('Accept', 'text/html')
 835         """
 836         if self.__state != _CS_REQ_STARTED:
 837             raise CannotSendHeader()
 838
 839         str = '%s: %s' % (header, value)
 840         self._output(str)
 841
 842     def endheaders(self):
 843         """Indicate that the last header line has been sent to the server."""
 844
 845         if self.__state == _CS_REQ_STARTED:
 846             self.__state = _CS_REQ_SENT
 847         else:
 848             raise CannotSendHeader()
 849
 850         self._send_output()
 851
 852     def request(self, method, url, body=None, headers={}):
 853         """Send a complete request to the server."""
 854
 855         try:
 856             self._send_request(method, url, body, headers)
 857         except socket.error, v:
 858             # trap 'Broken pipe' if we're allowed to automatically reconnect
 859             if v[0] != 32 or not self.auto_open:
 860                 raise
 861             # try one more time
 862             self._send_request(method, url, body, headers)
 863
 864     def _send_request(self, method, url, body, headers):
 865         # honour explicitly requested Host: and Accept-Encoding headers
 866         header_names = dict.fromkeys([k.lower() for k in headers])
 867         skips = {}
 868         if 'host' in header_names:
 869             skips['skip_host'] = 1
 870         if 'accept-encoding' in header_names:
 871             skips['skip_accept_encoding'] = 1
 872
 873         self.putrequest(method, url, **skips)
 874
 875         if body and ('content-length' not in header_names):
 876             thelen=None
 877             try:
 878                 thelen=str(len(body))
 879             except TypeError, te:
 880                 # If this is a file-like object, try to
 881                 # fstat its file descriptor
 882                 import os
 883                 try:
 884                     thelen = str(os.fstat(body.fileno()).st_size)
 885                 except (AttributeError, OSError):
 886                     # Don't send a length if this failed
 887                     if self.debuglevel > 0: print "Cannot stat!!"
 888
 889             if thelen is not None:
 890                 self.putheader('Content-Length',thelen)
 891         for hdr, value in headers.iteritems():
 892             self.putheader(hdr, value)
 893         self.endheaders()
 894
 895         if body:
 896             self.send(body)
 897
 898     def getresponse(self):
 899         "Get the response from the server."
 900
 901         # if a prior response has been completed, then forget about it.
 902         if self.__response and self.__response.isclosed():
 903             self.__response = None
 904
 905         #
 906         # if a prior response exists, then it must be completed (otherwise, we
 907         # cannot read this response's header to determine the connection-close
 908         # behavior)
 909         #
 910         # note: if a prior response existed, but was connection-close, then the
 911         # socket and response were made independent of this HTTPConnection
 912         # object since a new request requires that we open a whole new
 913         # connection
 914         #
 915         # this means the prior response had one of two states:
 916         #   1) will_close: this connection was reset and the prior socket and
 917         #                  response operate independently
 918         #   2) persistent: the response was retained and we await its
 919         #                  isclosed() status to become true.
 920         #
 921         if self.__state != _CS_REQ_SENT or self.__response:
 922             raise ResponseNotReady()
 923
 924         if self.debuglevel > 0:
 925             response = self.response_class(self.sock, self.debuglevel,
 926                                            strict=self.strict,
 927                                            method=self._method)
 928         else:
 929             response = self.response_class(self.sock, strict=self.strict,
 930                                            method=self._method)
 931
 932         response.begin()
 933         assert response.will_close != _UNKNOWN
 934         self.__state = _CS_IDLE
 935
 936         if response.will_close:
 937             # this effectively passes the connection to the response
 938             self.close()
 939         else:
 940             # remember this, so we can tell when it is complete
 941             self.__response = response
 942
 943         return response
 944
 945
 946 class HTTP:
 947     "Compatibility class with httplib.py from 1.5."
 948
 949     _http_vsn = 10
 950     _http_vsn_str = 'HTTP/1.0'
 951
 952     debuglevel = 0
 953
 954     _connection_class = HTTPConnection
 955
 956     def __init__(self, host='', port=None, strict=None):
 957         "Provide a default host, since the superclass requires one."
 958
 959         # some joker passed 0 explicitly, meaning default port
 960         if port == 0:
 961             port = None
 962
 963         # Note that we may pass an empty string as the host; this will throw
 964         # an error when we attempt to connect. Presumably, the client code
 965         # will call connect before then, with a proper host.
 966         self._setup(self._connection_class(host, port, strict))
 967
 968     def _setup(self, conn):
 969         self._conn = conn
 970
 971         # set up delegation to flesh out interface
 972         self.send = conn.send
 973         self.putrequest = conn.putrequest
 974         self.endheaders = conn.endheaders
 975         self.set_debuglevel = conn.set_debuglevel
 976
 977         conn._http_vsn = self._http_vsn
 978         conn._http_vsn_str = self._http_vsn_str
 979
 980         self.file = None
 981
 982     def connect(self, host=None, port=None):
 983         "Accept arguments to set the host/port, since the superclass doesn't."
 984
 985         if host is not None:
 986             self._conn._set_hostport(host, port)
 987         self._conn.connect()
 988
 989     def getfile(self):
 990         "Provide a getfile, since the superclass' does not use this concept."
 991         return self.file
 992
 993     def putheader(self, header, *values):
 994         "The superclass allows only one value argument."
 995         self._conn.putheader(header, '\r\n\t'.join(values))
 996
 997     def getreply(self):
 998         """Compat definition since superclass does not define it.
 999
1000         Returns a tuple consisting of:
1001         - server status code (e.g. '200' if all goes well)
1002         - server "reason" corresponding to status code
1003         - any RFC822 headers in the response from the server
1004         """
1005         try:
1006             response = self._conn.getresponse()
1007         except BadStatusLine, e:
1008             ### hmm. if getresponse() ever closes the socket on a bad request,
1009             ### then we are going to have problems with self.sock
1010
1011             ### should we keep this behavior? do people use it?
1012             # keep the socket open (as a file), and return it
1013             self.file = self._conn.sock.makefile('rb', 0)
1014
1015             # close our socket -- we want to restart after any protocol error
1016             self.close()
1017
1018             self.headers = None
1019             return -1, e.line, None
1020
1021         self.headers = response.msg
1022         self.file = response.fp
1023         return response.status, response.reason, response.msg
1024
1025     def close(self):
1026         self._conn.close()
1027
1028         # note that self.file == response.fp, which gets closed by the
1029         # superclass. just clear the object ref here.
1030         ### hmm. messy. if status==-1, then self.file is owned by us.
1031         ### well... we aren't explicitly closing, but losing this ref will
1032         ### do it
1033         self.file = None
1034
1035 try:
1036     import ssl
1037 except ImportError:
1038     pass
1039 else:
1040     class HTTPSConnection(HTTPConnection):
1041         "This class allows communication via SSL."
1042
1043         default_port = HTTPS_PORT
1044
1045         def __init__(self, host, port=None, key_file=None, cert_file=None,
1046                      strict=None, timeout=None):
1047             HTTPConnection.__init__(self, host, port, strict, timeout)
1048             self.key_file = key_file
1049             self.cert_file = cert_file
1050
1051         def connect(self):
1052             "Connect to a host on a given (SSL) port."
1053
1054             sock = socket.create_connection((self.host, self.port), self.timeout)
1055             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1056
1057     __all__.append("HTTPSConnection")
1058
1059     class HTTPS(HTTP):
1060         """Compatibility with 1.5 httplib interface
1061
1062         Python 1.5.2 did not have an HTTPS class, but it defined an
1063         interface for sending http requests that is also useful for
1064         https.
1065         """
1066
1067         _connection_class = HTTPSConnection
1068
1069         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1070                      strict=None):
1071             # provide a default host, pass the X509 cert info
1072
1073             # urf. compensate for bad input.
1074             if port == 0:
1075                 port = None
1076             self._setup(self._connection_class(host, port, key_file,
1077                                                cert_file, strict))
1078
1079             # we never actually use these for anything, but we keep them
1080             # here for compatibility with post-1.5.2 CVS.
1081             self.key_file = key_file
1082             self.cert_file = cert_file
1083
1084
1085     def FakeSocket (sock, sslobj):
1086         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1087                       "Use the result of ssl.wrap_socket() directly instead.",
1088                       DeprecationWarning, stacklevel=2)
1089         return sslobj
1090
1091
1092 class HTTPException(Exception):
1093     # Subclasses that define an __init__ must call Exception.__init__
1094     # or define self.args.  Otherwise, str() will fail.
1095     pass
1096
1097 class NotConnected(HTTPException):
1098     pass
1099
1100 class InvalidURL(HTTPException):
1101     pass
1102
1103 class UnknownProtocol(HTTPException):
1104     def __init__(self, version):
1105         self.args = version,
1106         self.version = version
1107
1108 class UnknownTransferEncoding(HTTPException):
1109     pass
1110
1111 class UnimplementedFileMode(HTTPException):
1112     pass
1113
1114 class IncompleteRead(HTTPException):
1115     def __init__(self, partial):
1116         self.args = partial,
1117         self.partial = partial
1118
1119 class ImproperConnectionState(HTTPException):
1120     pass
1121
1122 class CannotSendRequest(ImproperConnectionState):
1123     pass
1124
1125 class CannotSendHeader(ImproperConnectionState):
1126     pass
1127
1128 class ResponseNotReady(ImproperConnectionState):
1129     pass
1130
1131 class BadStatusLine(HTTPException):
1132     def __init__(self, line):
1133         self.args = line,
1134         self.line = line
1135
1136 # for backwards compatibility
1137 error = HTTPException
1138
1139 class LineAndFileWrapper:
1140     """A limited file-like object for HTTP/0.9 responses."""
1141
1142     # The status-line parsing code calls readline(), which normally
1143     # get the HTTP status line.  For a 0.9 response, however, this is
1144     # actually the first line of the body!  Clients need to get a
1145     # readable file object that contains that line.
1146
1147     def __init__(self, line, file):
1148         self._line = line
1149         self._file = file
1150         self._line_consumed = 0
1151         self._line_offset = 0
1152         self._line_left = len(line)
1153
1154     def __getattr__(self, attr):
1155         return getattr(self._file, attr)
1156
1157     def _done(self):
1158         # called when the last byte is read from the line.  After the
1159         # call, all read methods are delegated to the underlying file
1160         # object.
1161         self._line_consumed = 1
1162         self.read = self._file.read
1163         self.readline = self._file.readline
1164         self.readlines = self._file.readlines
1165
1166     def read(self, amt=None):
1167         if self._line_consumed:
1168             return self._file.read(amt)
1169         assert self._line_left
1170         if amt is None or amt > self._line_left:
1171             s = self._line[self._line_offset:]
1172             self._done()
1173             if amt is None:
1174                 return s + self._file.read()
1175             else:
1176                 return s + self._file.read(amt - len(s))
1177         else:
1178             assert amt <= self._line_left
1179             i = self._line_offset
1180             j = i + amt
1181             s = self._line[i:j]
1182             self._line_offset = j
1183             self._line_left -= amt
1184             if self._line_left == 0:
1185                 self._done()
1186             return s
1187
1188     def readline(self):
1189         if self._line_consumed:
1190             return self._file.readline()
1191         assert self._line_left
1192         s = self._line[self._line_offset:]
1193         self._done()
1194         return s
1195
1196     def readlines(self, size=None):
1197         if self._line_consumed:
1198             return self._file.readlines(size)
1199         assert self._line_left
1200         L = [self._line[self._line_offset:]]
1201         self._done()
1202         if size is None:
1203             return L + self._file.readlines()
1204         else:
1205             return L + self._file.readlines(size)
1206
1207 def test():
1208     """Test this module.
1209
1210     A hodge podge of tests collected here, because they have too many
1211     external dependencies for the regular test suite.
1212     """
1213
1214     import sys
1215     import getopt
1216     opts, args = getopt.getopt(sys.argv[1:], 'd')
1217     dl = 0
1218     for o, a in opts:
1219         if o == '-d': dl = dl + 1
1220     host = 'www.python.org'
1221     selector = '/'
1222     if args[0:]: host = args[0]
1223     if args[1:]: selector = args[1]
1224     h = HTTP()
1225     h.set_debuglevel(dl)
1226     h.connect(host)
1227     h.putrequest('GET', selector)
1228     h.endheaders()
1229     status, reason, headers = h.getreply()
1230     print 'status =', status
1231     print 'reason =', reason
1232     print "read", len(h.getfile().read())
1233     print
1234     if headers:
1235         for header in headers.headers: print header.strip()
1236     print
1237
1238     # minimal test that code to extract host from url works
1239     class HTTP11(HTTP):
1240         _http_vsn = 11
1241         _http_vsn_str = 'HTTP/1.1'
1242
1243     h = HTTP11('www.python.org')
1244     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1245     h.endheaders()
1246     h.getreply()
1247     h.close()
1248
1249     try:
1250         import ssl
1251     except ImportError:
1252         pass
1253     else:
1254
1255         for host, selector in (('sourceforge.net', '/projects/python'),
1256                                ):
1257             print "https://%s%s" % (host, selector)
1258             hs = HTTPS()
1259             hs.set_debuglevel(dl)
1260             hs.connect(host)
1261             hs.putrequest('GET', selector)
1262             hs.endheaders()
1263             status, reason, headers = hs.getreply()
1264             print 'status =', status
1265             print 'reason =', reason
1266             print "read", len(hs.getfile().read())
1267             print
1268             if headers:
1269                 for header in headers.headers: print header.strip()
1270             print
1271
1272 if __name__ == '__main__':
1273     test()