Lib/httplib.py

   1 """HTTP/1.1 client library
   2
   3 <intro stuff goes here>
   4 <other stuff, too>
   5
   6 HTTPConnection goes through a number of "states", which define when a client
   7 may legally make another request or fetch the response for a particular
   8 request. This diagram details these state transitions:
   9
  10     (null)
  11       |
  12       | HTTPConnection()
  13       v
  14     Idle
  15       |
  16       | putrequest()
  17       v
  18     Request-started
  19       |
  20       | ( putheader() )*  endheaders()
  21       v
  22     Request-sent
  23       |
  24       | response = getresponse()
  25       v
  26     Unread-response   [Response-headers-read]
  27       |\____________________
  28       |                     |
  29       | response.read()     | putrequest()
  30       v                     v
  31     Idle                  Req-started-unread-response
  32                      ______/|
  33                    /        |
  34    response.read() |        | ( putheader() )*  endheaders()
  35                    v        v
  36        Request-started    Req-sent-unread-response
  37                             |
  38                             | response.read()
  39                             v
  40                           Request-sent
  41
  42 This diagram presents the following rules:
  43   -- a second request may not be started until {response-headers-read}
  44   -- a response [object] cannot be retrieved until {request-sent}
  45   -- there is no differentiation between an unread response body and a
  46      partially read response body
  47
  48 Note: this enforcement is applied by the HTTPConnection class. The
  49       HTTPResponse class does not enforce this state machine, which
  50       implies sophisticated clients may accelerate the request/response
  51       pipeline. Caution should be taken, though: accelerating the states
  52       beyond the above pattern may imply knowledge of the server's
  53       connection-close behavior for certain requests. For example, it
  54       is impossible to tell whether the server will close the connection
  55       UNTIL the response headers have been read; this means that further
  56       requests cannot be placed into the pipeline until it is known that
  57       the server will NOT be closing the connection.
  58
  59 Logical State                  __state            __response
  60 -------------                  -------            ----------
  61 Idle                           _CS_IDLE           None
  62 Request-started                _CS_REQ_STARTED    None
  63 Request-sent                   _CS_REQ_SENT       None
  64 Unread-response                _CS_IDLE           <response_class>
  65 Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66 Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67 """
  68
  69 from array import array
  70 import socket
  71 from sys import py3kwarning
  72 from urlparse import urlsplit
  73 import warnings
  74 with warnings.catch_warnings():
  75     if py3kwarning:
  76         warnings.filterwarnings("ignore", ".*mimetools has been removed",
  77                                 DeprecationWarning)
  78     import mimetools
  79
  80 try:
  81     from cStringIO import StringIO
  82 except ImportError:
  83     from StringIO import StringIO
  84
  85 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
  86            "HTTPException", "NotConnected", "UnknownProtocol",
  87            "UnknownTransferEncoding", "UnimplementedFileMode",
  88            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  89            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  90            "BadStatusLine", "error", "responses"]
  91
  92 HTTP_PORT = 80
  93 HTTPS_PORT = 443
  94
  95 _UNKNOWN = 'UNKNOWN'
  96
  97 # connection states
  98 _CS_IDLE = 'Idle'
  99 _CS_REQ_STARTED = 'Request-started'
 100 _CS_REQ_SENT = 'Request-sent'
 101
 102 # status codes
 103 # informational
 104 CONTINUE = 100
 105 SWITCHING_PROTOCOLS = 101
 106 PROCESSING = 102
 107
 108 # successful
 109 OK = 200
 110 CREATED = 201
 111 ACCEPTED = 202
 112 NON_AUTHORITATIVE_INFORMATION = 203
 113 NO_CONTENT = 204
 114 RESET_CONTENT = 205
 115 PARTIAL_CONTENT = 206
 116 MULTI_STATUS = 207
 117 IM_USED = 226
 118
 119 # redirection
 120 MULTIPLE_CHOICES = 300
 121 MOVED_PERMANENTLY = 301
 122 FOUND = 302
 123 SEE_OTHER = 303
 124 NOT_MODIFIED = 304
 125 USE_PROXY = 305
 126 TEMPORARY_REDIRECT = 307
 127
 128 # client error
 129 BAD_REQUEST = 400
 130 UNAUTHORIZED = 401
 131 PAYMENT_REQUIRED = 402
 132 FORBIDDEN = 403
 133 NOT_FOUND = 404
 134 METHOD_NOT_ALLOWED = 405
 135 NOT_ACCEPTABLE = 406
 136 PROXY_AUTHENTICATION_REQUIRED = 407
 137 REQUEST_TIMEOUT = 408
 138 CONFLICT = 409
 139 GONE = 410
 140 LENGTH_REQUIRED = 411
 141 PRECONDITION_FAILED = 412
 142 REQUEST_ENTITY_TOO_LARGE = 413
 143 REQUEST_URI_TOO_LONG = 414
 144 UNSUPPORTED_MEDIA_TYPE = 415
 145 REQUESTED_RANGE_NOT_SATISFIABLE = 416
 146 EXPECTATION_FAILED = 417
 147 UNPROCESSABLE_ENTITY = 422
 148 LOCKED = 423
 149 FAILED_DEPENDENCY = 424
 150 UPGRADE_REQUIRED = 426
 151
 152 # server error
 153 INTERNAL_SERVER_ERROR = 500
 154 NOT_IMPLEMENTED = 501
 155 BAD_GATEWAY = 502
 156 SERVICE_UNAVAILABLE = 503
 157 GATEWAY_TIMEOUT = 504
 158 HTTP_VERSION_NOT_SUPPORTED = 505
 159 INSUFFICIENT_STORAGE = 507
 160 NOT_EXTENDED = 510
 161
 162 # Mapping status codes to official W3C names
 163 responses = {
 164     100: 'Continue',
 165     101: 'Switching Protocols',
 166
 167     200: 'OK',
 168     201: 'Created',
 169     202: 'Accepted',
 170     203: 'Non-Authoritative Information',
 171     204: 'No Content',
 172     205: 'Reset Content',
 173     206: 'Partial Content',
 174
 175     300: 'Multiple Choices',
 176     301: 'Moved Permanently',
 177     302: 'Found',
 178     303: 'See Other',
 179     304: 'Not Modified',
 180     305: 'Use Proxy',
 181     306: '(Unused)',
 182     307: 'Temporary Redirect',
 183
 184     400: 'Bad Request',
 185     401: 'Unauthorized',
 186     402: 'Payment Required',
 187     403: 'Forbidden',
 188     404: 'Not Found',
 189     405: 'Method Not Allowed',
 190     406: 'Not Acceptable',
 191     407: 'Proxy Authentication Required',
 192     408: 'Request Timeout',
 193     409: 'Conflict',
 194     410: 'Gone',
 195     411: 'Length Required',
 196     412: 'Precondition Failed',
 197     413: 'Request Entity Too Large',
 198     414: 'Request-URI Too Long',
 199     415: 'Unsupported Media Type',
 200     416: 'Requested Range Not Satisfiable',
 201     417: 'Expectation Failed',
 202
 203     500: 'Internal Server Error',
 204     501: 'Not Implemented',
 205     502: 'Bad Gateway',
 206     503: 'Service Unavailable',
 207     504: 'Gateway Timeout',
 208     505: 'HTTP Version Not Supported',
 209 }
 210
 211 # maximal amount of data to read at one time in _safe_read
 212 MAXAMOUNT = 1048576
 213
 214 class HTTPMessage(mimetools.Message):
 215
 216     def addheader(self, key, value):
 217         """Add header for field key handling repeats."""
 218         prev = self.dict.get(key)
 219         if prev is None:
 220             self.dict[key] = value
 221         else:
 222             combined = ", ".join((prev, value))
 223             self.dict[key] = combined
 224
 225     def addcontinue(self, key, more):
 226         """Add more field data from a continuation line."""
 227         prev = self.dict[key]
 228         self.dict[key] = prev + "\n " + more
 229
 230     def readheaders(self):
 231         """Read header lines.
 232
 233         Read header lines up to the entirely blank line that terminates them.
 234         The (normally blank) line that ends the headers is skipped, but not
 235         included in the returned list.  If a non-header line ends the headers,
 236         (which is an error), an attempt is made to backspace over it; it is
 237         never included in the returned list.
 238
 239         The variable self.status is set to the empty string if all went well,
 240         otherwise it is an error message.  The variable self.headers is a
 241         completely uninterpreted list of lines contained in the header (so
 242         printing them will reproduce the header exactly as it appears in the
 243         file).
 244
 245         If multiple header fields with the same name occur, they are combined
 246         according to the rules in RFC 2616 sec 4.2:
 247
 248         Appending each subsequent field-value to the first, each separated
 249         by a comma. The order in which header fields with the same field-name
 250         are received is significant to the interpretation of the combined
 251         field value.
 252         """
 253         # XXX The implementation overrides the readheaders() method of
 254         # rfc822.Message.  The base class design isn't amenable to
 255         # customized behavior here so the method here is a copy of the
 256         # base class code with a few small changes.
 257
 258         self.dict = {}
 259         self.unixfrom = ''
 260         self.headers = hlist = []
 261         self.status = ''
 262         headerseen = ""
 263         firstline = 1
 264         startofline = unread = tell = None
 265         if hasattr(self.fp, 'unread'):
 266             unread = self.fp.unread
 267         elif self.seekable:
 268             tell = self.fp.tell
 269         while True:
 270             if tell:
 271                 try:
 272                     startofline = tell()
 273                 except IOError:
 274                     startofline = tell = None
 275                     self.seekable = 0
 276             line = self.fp.readline()
 277             if not line:
 278                 self.status = 'EOF in headers'
 279                 break
 280             # Skip unix From name time lines
 281             if firstline and line.startswith('From '):
 282                 self.unixfrom = self.unixfrom + line
 283                 continue
 284             firstline = 0
 285             if headerseen and line[0] in ' \t':
 286                 # XXX Not sure if continuation lines are handled properly
 287                 # for http and/or for repeating headers
 288                 # It's a continuation line.
 289                 hlist.append(line)
 290                 self.addcontinue(headerseen, line.strip())
 291                 continue
 292             elif self.iscomment(line):
 293                 # It's a comment.  Ignore it.
 294                 continue
 295             elif self.islast(line):
 296                 # Note! No pushback here!  The delimiter line gets eaten.
 297                 break
 298             headerseen = self.isheader(line)
 299             if headerseen:
 300                 # It's a legal header line, save it.
 301                 hlist.append(line)
 302                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 303                 continue
 304             else:
 305                 # It's not a header line; throw it back and stop here.
 306                 if not self.dict:
 307                     self.status = 'No headers'
 308                 else:
 309                     self.status = 'Non-header line where header expected'
 310                 # Try to undo the read.
 311                 if unread:
 312                     unread(line)
 313                 elif tell:
 314                     self.fp.seek(startofline)
 315                 else:
 316                     self.status = self.status + '; bad seek'
 317                 break
 318
 319 class HTTPResponse:
 320
 321     # strict: If true, raise BadStatusLine if the status line can't be
 322     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 323     # false because it prevents clients from talking to HTTP/0.9
 324     # servers.  Note that a response with a sufficiently corrupted
 325     # status line will look like an HTTP/0.9 response.
 326
 327     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 328
 329     def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
 330         if buffering:
 331             # The caller won't be using any sock.recv() calls, so buffering
 332             # is fine and recommended for performance.
 333             self.fp = sock.makefile('rb')
 334         else:
 335             # The buffer size is specified as zero, because the headers of
 336             # the response are read with readline().  If the reads were
 337             # buffered the readline() calls could consume some of the
 338             # response, which make be read via a recv() on the underlying
 339             # socket.
 340             self.fp = sock.makefile('rb', 0)
 341         self.debuglevel = debuglevel
 342         self.strict = strict
 343         self._method = method
 344
 345         self.msg = None
 346
 347         # from the Status-Line of the response
 348         self.version = _UNKNOWN # HTTP-Version
 349         self.status = _UNKNOWN  # Status-Code
 350         self.reason = _UNKNOWN  # Reason-Phrase
 351
 352         self.chunked = _UNKNOWN         # is "chunked" being used?
 353         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 354         self.length = _UNKNOWN          # number of bytes left in response
 355         self.will_close = _UNKNOWN      # conn will close at end of response
 356
 357     def _read_status(self):
 358         # Initialize with Simple-Response defaults
 359         line = self.fp.readline()
 360         if self.debuglevel > 0:
 361             print "reply:", repr(line)
 362         if not line:
 363             # Presumably, the server closed the connection before
 364             # sending a valid response.
 365             raise BadStatusLine(line)
 366         try:
 367             [version, status, reason] = line.split(None, 2)
 368         except ValueError:
 369             try:
 370                 [version, status] = line.split(None, 1)
 371                 reason = ""
 372             except ValueError:
 373                 # empty version will cause next test to fail and status
 374                 # will be treated as 0.9 response.
 375                 version = ""
 376         if not version.startswith('HTTP/'):
 377             if self.strict:
 378                 self.close()
 379                 raise BadStatusLine(line)
 380             else:
 381                 # assume it's a Simple-Response from an 0.9 server
 382                 self.fp = LineAndFileWrapper(line, self.fp)
 383                 return "HTTP/0.9", 200, ""
 384
 385         # The status code is a three-digit number
 386         try:
 387             status = int(status)
 388             if status < 100 or status > 999:
 389                 raise BadStatusLine(line)
 390         except ValueError:
 391             raise BadStatusLine(line)
 392         return version, status, reason
 393
 394     def begin(self):
 395         if self.msg is not None:
 396             # we've already started reading the response
 397             return
 398
 399         # read until we get a non-100 response
 400         while True:
 401             version, status, reason = self._read_status()
 402             if status != CONTINUE:
 403                 break
 404             # skip the header from the 100 response
 405             while True:
 406                 skip = self.fp.readline().strip()
 407                 if not skip:
 408                     break
 409                 if self.debuglevel > 0:
 410                     print "header:", skip
 411
 412         self.status = status
 413         self.reason = reason.strip()
 414         if version == 'HTTP/1.0':
 415             self.version = 10
 416         elif version.startswith('HTTP/1.'):
 417             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 418         elif version == 'HTTP/0.9':
 419             self.version = 9
 420         else:
 421             raise UnknownProtocol(version)
 422
 423         if self.version == 9:
 424             self.length = None
 425             self.chunked = 0
 426             self.will_close = 1
 427             self.msg = HTTPMessage(StringIO())
 428             return
 429
 430         self.msg = HTTPMessage(self.fp, 0)
 431         if self.debuglevel > 0:
 432             for hdr in self.msg.headers:
 433                 print "header:", hdr,
 434
 435         # don't let the msg keep an fp
 436         self.msg.fp = None
 437
 438         # are we using the chunked-style of transfer encoding?
 439         tr_enc = self.msg.getheader('transfer-encoding')
 440         if tr_enc and tr_enc.lower() == "chunked":
 441             self.chunked = 1
 442             self.chunk_left = None
 443         else:
 444             self.chunked = 0
 445
 446         # will the connection close at the end of the response?
 447         self.will_close = self._check_close()
 448
 449         # do we have a Content-Length?
 450         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 451         length = self.msg.getheader('content-length')
 452         if length and not self.chunked:
 453             try:
 454                 self.length = int(length)
 455             except ValueError:
 456                 self.length = None
 457             else:
 458                 if self.length < 0:  # ignore nonsensical negative lengths
 459                     self.length = None
 460         else:
 461             self.length = None
 462
 463         # does the body have a fixed length? (of zero)
 464         if (status == NO_CONTENT or status == NOT_MODIFIED or
 465             100 <= status < 200 or      # 1xx codes
 466             self._method == 'HEAD'):
 467             self.length = 0
 468
 469         # if the connection remains open, and we aren't using chunked, and
 470         # a content-length was not provided, then assume that the connection
 471         # WILL close.
 472         if not self.will_close and \
 473            not self.chunked and \
 474            self.length is None:
 475             self.will_close = 1
 476
 477     def _check_close(self):
 478         conn = self.msg.getheader('connection')
 479         if self.version == 11:
 480             # An HTTP/1.1 proxy is assumed to stay open unless
 481             # explicitly closed.
 482             conn = self.msg.getheader('connection')
 483             if conn and "close" in conn.lower():
 484                 return True
 485             return False
 486
 487         # Some HTTP/1.0 implementations have support for persistent
 488         # connections, using rules different than HTTP/1.1.
 489
 490         # For older HTTP, Keep-Alive indicates persistent connection.
 491         if self.msg.getheader('keep-alive'):
 492             return False
 493
 494         # At least Akamai returns a "Connection: Keep-Alive" header,
 495         # which was supposed to be sent by the client.
 496         if conn and "keep-alive" in conn.lower():
 497             return False
 498
 499         # Proxy-Connection is a netscape hack.
 500         pconn = self.msg.getheader('proxy-connection')
 501         if pconn and "keep-alive" in pconn.lower():
 502             return False
 503
 504         # otherwise, assume it will close
 505         return True
 506
 507     def close(self):
 508         if self.fp:
 509             self.fp.close()
 510             self.fp = None
 511
 512     def isclosed(self):
 513         # NOTE: it is possible that we will not ever call self.close(). This
 514         #       case occurs when will_close is TRUE, length is None, and we
 515         #       read up to the last byte, but NOT past it.
 516         #
 517         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 518         #          called, meaning self.isclosed() is meaningful.
 519         return self.fp is None
 520
 521     # XXX It would be nice to have readline and __iter__ for this, too.
 522
 523     def read(self, amt=None):
 524         if self.fp is None:
 525             return ''
 526
 527         if self.chunked:
 528             return self._read_chunked(amt)
 529
 530         if amt is None:
 531             # unbounded read
 532             if self.length is None:
 533                 s = self.fp.read()
 534             else:
 535                 s = self._safe_read(self.length)
 536                 self.length = 0
 537             self.close()        # we read everything
 538             return s
 539
 540         if self.length is not None:
 541             if amt > self.length:
 542                 # clip the read to the "end of response"
 543                 amt = self.length
 544
 545         # we do not use _safe_read() here because this may be a .will_close
 546         # connection, and the user is reading more bytes than will be provided
 547         # (for example, reading in 1k chunks)
 548         s = self.fp.read(amt)
 549         if self.length is not None:
 550             self.length -= len(s)
 551             if not self.length:
 552                 self.close()
 553         return s
 554
 555     def _read_chunked(self, amt):
 556         assert self.chunked != _UNKNOWN
 557         chunk_left = self.chunk_left
 558         value = []
 559         while True:
 560             if chunk_left is None:
 561                 line = self.fp.readline()
 562                 i = line.find(';')
 563                 if i >= 0:
 564                     line = line[:i] # strip chunk-extensions
 565                 try:
 566                     chunk_left = int(line, 16)
 567                 except ValueError:
 568                     # close the connection as protocol synchronisation is
 569                     # probably lost
 570                     self.close()
 571                     raise IncompleteRead(''.join(value))
 572                 if chunk_left == 0:
 573                     break
 574             if amt is None:
 575                 value.append(self._safe_read(chunk_left))
 576             elif amt < chunk_left:
 577                 value.append(self._safe_read(amt))
 578                 self.chunk_left = chunk_left - amt
 579                 return ''.join(value)
 580             elif amt == chunk_left:
 581                 value.append(self._safe_read(amt))
 582                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 583                 self.chunk_left = None
 584                 return ''.join(value)
 585             else:
 586                 value.append(self._safe_read(chunk_left))
 587                 amt -= chunk_left
 588
 589             # we read the whole chunk, get another
 590             self._safe_read(2)      # toss the CRLF at the end of the chunk
 591             chunk_left = None
 592
 593         # read and discard trailer up to the CRLF terminator
 594         ### note: we shouldn't have any trailers!
 595         while True:
 596             line = self.fp.readline()
 597             if not line:
 598                 # a vanishingly small number of sites EOF without
 599                 # sending the trailer
 600                 break
 601             if line == '\r\n':
 602                 break
 603
 604         # we read everything; close the "file"
 605         self.close()
 606
 607         return ''.join(value)
 608
 609     def _safe_read(self, amt):
 610         """Read the number of bytes requested, compensating for partial reads.
 611
 612         Normally, we have a blocking socket, but a read() can be interrupted
 613         by a signal (resulting in a partial read).
 614
 615         Note that we cannot distinguish between EOF and an interrupt when zero
 616         bytes have been read. IncompleteRead() will be raised in this
 617         situation.
 618
 619         This function should be used when <amt> bytes "should" be present for
 620         reading. If the bytes are truly not available (due to EOF), then the
 621         IncompleteRead exception can be used to detect the problem.
 622         """
 623         # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
 624         # return less than x bytes unless EOF is encountered.  It now handles
 625         # signal interruptions (socket.error EINTR) internally.  This code
 626         # never caught that exception anyways.  It seems largely pointless.
 627         # self.fp.read(amt) will work fine.
 628         s = []
 629         while amt > 0:
 630             chunk = self.fp.read(min(amt, MAXAMOUNT))
 631             if not chunk:
 632                 raise IncompleteRead(''.join(s), amt)
 633             s.append(chunk)
 634             amt -= len(chunk)
 635         return ''.join(s)
 636
 637     def getheader(self, name, default=None):
 638         if self.msg is None:
 639             raise ResponseNotReady()
 640         return self.msg.getheader(name, default)
 641
 642     def getheaders(self):
 643         """Return list of (header, value) tuples."""
 644         if self.msg is None:
 645             raise ResponseNotReady()
 646         return self.msg.items()
 647
 648
 649 class HTTPConnection:
 650
 651     _http_vsn = 11
 652     _http_vsn_str = 'HTTP/1.1'
 653
 654     response_class = HTTPResponse
 655     default_port = HTTP_PORT
 656     auto_open = 1
 657     debuglevel = 0
 658     strict = 0
 659
 660     def __init__(self, host, port=None, strict=None,
 661                  timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
 662         self.timeout = timeout
 663         self.source_address = source_address
 664         self.sock = None
 665         self._buffer = []
 666         self.__response = None
 667         self.__state = _CS_IDLE
 668         self._method = None
 669         self._tunnel_host = None
 670         self._tunnel_port = None
 671         self._tunnel_headers = {}
 672
 673         self._set_hostport(host, port)
 674         if strict is not None:
 675             self.strict = strict
 676
 677     def set_tunnel(self, host, port=None, headers=None):
 678         """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
 679
 680         The headers argument should be a mapping of extra HTTP headers
 681         to send with the CONNECT request.
 682         """
 683         self._tunnel_host = host
 684         self._tunnel_port = port
 685         if headers:
 686             self._tunnel_headers = headers
 687         else:
 688             self._tunnel_headers.clear()
 689
 690     def _set_hostport(self, host, port):
 691         if port is None:
 692             i = host.rfind(':')
 693             j = host.rfind(']')         # ipv6 addresses have [...]
 694             if i > j:
 695                 try:
 696                     port = int(host[i+1:])
 697                 except ValueError:
 698                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 699                 host = host[:i]
 700             else:
 701                 port = self.default_port
 702             if host and host[0] == '[' and host[-1] == ']':
 703                 host = host[1:-1]
 704         self.host = host
 705         self.port = port
 706
 707     def set_debuglevel(self, level):
 708         self.debuglevel = level
 709
 710     def _tunnel(self):
 711         self._set_hostport(self._tunnel_host, self._tunnel_port)
 712         self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
 713         for header, value in self._tunnel_headers.iteritems():
 714             self.send("%s: %s\r\n" % (header, value))
 715         self.send("\r\n")
 716         response = self.response_class(self.sock, strict = self.strict,
 717                                        method = self._method)
 718         (version, code, message) = response._read_status()
 719
 720         if code != 200:
 721             self.close()
 722             raise socket.error("Tunnel connection failed: %d %s" % (code,
 723                                                                     message.strip()))
 724         while True:
 725             line = response.fp.readline()
 726             if line == '\r\n': break
 727
 728
 729     def connect(self):
 730         """Connect to the host and port specified in __init__."""
 731         self.sock = socket.create_connection((self.host,self.port),
 732                                              self.timeout, self.source_address)
 733
 734         if self._tunnel_host:
 735             self._tunnel()
 736
 737     def close(self):
 738         """Close the connection to the HTTP server."""
 739         if self.sock:
 740             self.sock.close()   # close it manually... there may be other refs
 741             self.sock = None
 742         if self.__response:
 743             self.__response.close()
 744             self.__response = None
 745         self.__state = _CS_IDLE
 746
 747     def send(self, str):
 748         """Send `str' to the server."""
 749         if self.sock is None:
 750             if self.auto_open:
 751                 self.connect()
 752             else:
 753                 raise NotConnected()
 754
 755         # send the data to the server. if we get a broken pipe, then close
 756         # the socket. we want to reconnect when somebody tries to send again.
 757         #
 758         # NOTE: we DO propagate the error, though, because we cannot simply
 759         #       ignore the error... the caller will know if they can retry.
 760         if self.debuglevel > 0:
 761             print "send:", repr(str)
 762         try:
 763             blocksize=8192
 764             if hasattr(str,'read') and not isinstance(str, array):
 765                 if self.debuglevel > 0: print "sendIng a read()able"
 766                 data=str.read(blocksize)
 767                 while data:
 768                     self.sock.sendall(data)
 769                     data=str.read(blocksize)
 770             else:
 771                 self.sock.sendall(str)
 772         except socket.error, v:
 773             if v.args[0] == 32:      # Broken pipe
 774                 self.close()
 775             raise
 776
 777     def _output(self, s):
 778         """Add a line of output to the current request buffer.
 779
 780         Assumes that the line does *not* end with \\r\\n.
 781         """
 782         self._buffer.append(s)
 783
 784     def _send_output(self, message_body=None):
 785         """Send the currently buffered request and clear the buffer.
 786
 787         Appends an extra \\r\\n to the buffer.
 788         A message_body may be specified, to be appended to the request.
 789         """
 790         self._buffer.extend(("", ""))
 791         msg = "\r\n".join(self._buffer)
 792         del self._buffer[:]
 793         # If msg and message_body are sent in a single send() call,
 794         # it will avoid performance problems caused by the interaction
 795         # between delayed ack and the Nagle algorithim.
 796         if isinstance(message_body, str):
 797             msg += message_body
 798             message_body = None
 799         self.send(msg)
 800         if message_body is not None:
 801             #message_body was not a string (i.e. it is a file) and
 802             #we must run the risk of Nagle
 803             self.send(message_body)
 804
 805     def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
 806         """Send a request to the server.
 807
 808         `method' specifies an HTTP request method, e.g. 'GET'.
 809         `url' specifies the object being requested, e.g. '/index.html'.
 810         `skip_host' if True does not add automatically a 'Host:' header
 811         `skip_accept_encoding' if True does not add automatically an
 812            'Accept-Encoding:' header
 813         """
 814
 815         # if a prior response has been completed, then forget about it.
 816         if self.__response and self.__response.isclosed():
 817             self.__response = None
 818
 819
 820         # in certain cases, we cannot issue another request on this connection.
 821         # this occurs when:
 822         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 823         #   2) a response to a previous request has signalled that it is going
 824         #      to close the connection upon completion.
 825         #   3) the headers for the previous response have not been read, thus
 826         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 827         #
 828         # if there is no prior response, then we can request at will.
 829         #
 830         # if point (2) is true, then we will have passed the socket to the
 831         # response (effectively meaning, "there is no prior response"), and
 832         # will open a new one when a new request is made.
 833         #
 834         # Note: if a prior response exists, then we *can* start a new request.
 835         #       We are not allowed to begin fetching the response to this new
 836         #       request, however, until that prior response is complete.
 837         #
 838         if self.__state == _CS_IDLE:
 839             self.__state = _CS_REQ_STARTED
 840         else:
 841             raise CannotSendRequest()
 842
 843         # Save the method we use, we need it later in the response phase
 844         self._method = method
 845         if not url:
 846             url = '/'
 847         str = '%s %s %s' % (method, url, self._http_vsn_str)
 848
 849         self._output(str)
 850
 851         if self._http_vsn == 11:
 852             # Issue some standard headers for better HTTP/1.1 compliance
 853
 854             if not skip_host:
 855                 # this header is issued *only* for HTTP/1.1
 856                 # connections. more specifically, this means it is
 857                 # only issued when the client uses the new
 858                 # HTTPConnection() class. backwards-compat clients
 859                 # will be using HTTP/1.0 and those clients may be
 860                 # issuing this header themselves. we should NOT issue
 861                 # it twice; some web servers (such as Apache) barf
 862                 # when they see two Host: headers
 863
 864                 # If we need a non-standard port,include it in the
 865                 # header.  If the request is going through a proxy,
 866                 # but the host of the actual URL, not the host of the
 867                 # proxy.
 868
 869                 netloc = ''
 870                 if url.startswith('http'):
 871                     nil, netloc, nil, nil, nil = urlsplit(url)
 872
 873                 if netloc:
 874                     try:
 875                         netloc_enc = netloc.encode("ascii")
 876                     except UnicodeEncodeError:
 877                         netloc_enc = netloc.encode("idna")
 878                     self.putheader('Host', netloc_enc)
 879                 else:
 880                     try:
 881                         host_enc = self.host.encode("ascii")
 882                     except UnicodeEncodeError:
 883                         host_enc = self.host.encode("idna")
 884                     if self.port == self.default_port:
 885                         self.putheader('Host', host_enc)
 886                     else:
 887                         self.putheader('Host', "%s:%s" % (host_enc, self.port))
 888
 889             # note: we are assuming that clients will not attempt to set these
 890             #       headers since *this* library must deal with the
 891             #       consequences. this also means that when the supporting
 892             #       libraries are updated to recognize other forms, then this
 893             #       code should be changed (removed or updated).
 894
 895             # we only want a Content-Encoding of "identity" since we don't
 896             # support encodings such as x-gzip or x-deflate.
 897             if not skip_accept_encoding:
 898                 self.putheader('Accept-Encoding', 'identity')
 899
 900             # we can accept "chunked" Transfer-Encodings, but no others
 901             # NOTE: no TE header implies *only* "chunked"
 902             #self.putheader('TE', 'chunked')
 903
 904             # if TE is supplied in the header, then it must appear in a
 905             # Connection header.
 906             #self.putheader('Connection', 'TE')
 907
 908         else:
 909             # For HTTP/1.0, the server will assume "not chunked"
 910             pass
 911
 912     def putheader(self, header, *values):
 913         """Send a request header line to the server.
 914
 915         For example: h.putheader('Accept', 'text/html')
 916         """
 917         if self.__state != _CS_REQ_STARTED:
 918             raise CannotSendHeader()
 919
 920         str = '%s: %s' % (header, '\r\n\t'.join(values))
 921         self._output(str)
 922
 923     def endheaders(self, message_body=None):
 924         """Indicate that the last header line has been sent to the server.
 925
 926         This method sends the request to the server.  The optional
 927         message_body argument can be used to pass message body
 928         associated with the request.  The message body will be sent in
 929         the same packet as the message headers if possible.  The
 930         message_body should be a string.
 931         """
 932         if self.__state == _CS_REQ_STARTED:
 933             self.__state = _CS_REQ_SENT
 934         else:
 935             raise CannotSendHeader()
 936         self._send_output(message_body)
 937
 938     def request(self, method, url, body=None, headers={}):
 939         """Send a complete request to the server."""
 940
 941         try:
 942             self._send_request(method, url, body, headers)
 943         except socket.error, v:
 944             # trap 'Broken pipe' if we're allowed to automatically reconnect
 945             if v.args[0] != 32 or not self.auto_open:
 946                 raise
 947             # try one more time
 948             self._send_request(method, url, body, headers)
 949
 950     def _set_content_length(self, body):
 951         # Set the content-length based on the body.
 952         thelen = None
 953         try:
 954             thelen = str(len(body))
 955         except TypeError, te:
 956             # If this is a file-like object, try to
 957             # fstat its file descriptor
 958             import os
 959             try:
 960                 thelen = str(os.fstat(body.fileno()).st_size)
 961             except (AttributeError, OSError):
 962                 # Don't send a length if this failed
 963                 if self.debuglevel > 0: print "Cannot stat!!"
 964
 965         if thelen is not None:
 966             self.putheader('Content-Length', thelen)
 967
 968     def _send_request(self, method, url, body, headers):
 969         # honour explicitly requested Host: and Accept-Encoding headers
 970         header_names = dict.fromkeys([k.lower() for k in headers])
 971         skips = {}
 972         if 'host' in header_names:
 973             skips['skip_host'] = 1
 974         if 'accept-encoding' in header_names:
 975             skips['skip_accept_encoding'] = 1
 976
 977         self.putrequest(method, url, **skips)
 978
 979         if body and ('content-length' not in header_names):
 980             self._set_content_length(body)
 981         for hdr, value in headers.iteritems():
 982             self.putheader(hdr, value)
 983         self.endheaders(body)
 984
 985     def getresponse(self, buffering=False):
 986         "Get the response from the server."
 987
 988         # if a prior response has been completed, then forget about it.
 989         if self.__response and self.__response.isclosed():
 990             self.__response = None
 991
 992         #
 993         # if a prior response exists, then it must be completed (otherwise, we
 994         # cannot read this response's header to determine the connection-close
 995         # behavior)
 996         #
 997         # note: if a prior response existed, but was connection-close, then the
 998         # socket and response were made independent of this HTTPConnection
 999         # object since a new request requires that we open a whole new
1000         # connection
1001         #
1002         # this means the prior response had one of two states:
1003         #   1) will_close: this connection was reset and the prior socket and
1004         #                  response operate independently
1005         #   2) persistent: the response was retained and we await its
1006         #                  isclosed() status to become true.
1007         #
1008         if self.__state != _CS_REQ_SENT or self.__response:
1009             raise ResponseNotReady()
1010
1011         args = (self.sock,)
1012         kwds = {"strict":self.strict, "method":self._method}
1013         if self.debuglevel > 0:
1014             args += (self.debuglevel,)
1015         if buffering:
1016             #only add this keyword if non-default, for compatibility with
1017             #other response_classes.
1018             kwds["buffering"] = True;
1019         response = self.response_class(*args, **kwds)
1020
1021         response.begin()
1022         assert response.will_close != _UNKNOWN
1023         self.__state = _CS_IDLE
1024
1025         if response.will_close:
1026             # this effectively passes the connection to the response
1027             self.close()
1028         else:
1029             # remember this, so we can tell when it is complete
1030             self.__response = response
1031
1032         return response
1033
1034
1035 class HTTP:
1036     "Compatibility class with httplib.py from 1.5."
1037
1038     _http_vsn = 10
1039     _http_vsn_str = 'HTTP/1.0'
1040
1041     debuglevel = 0
1042
1043     _connection_class = HTTPConnection
1044
1045     def __init__(self, host='', port=None, strict=None):
1046         "Provide a default host, since the superclass requires one."
1047
1048         # some joker passed 0 explicitly, meaning default port
1049         if port == 0:
1050             port = None
1051
1052         # Note that we may pass an empty string as the host; this will throw
1053         # an error when we attempt to connect. Presumably, the client code
1054         # will call connect before then, with a proper host.
1055         self._setup(self._connection_class(host, port, strict))
1056
1057     def _setup(self, conn):
1058         self._conn = conn
1059
1060         # set up delegation to flesh out interface
1061         self.send = conn.send
1062         self.putrequest = conn.putrequest
1063         self.putheader = conn.putheader
1064         self.endheaders = conn.endheaders
1065         self.set_debuglevel = conn.set_debuglevel
1066
1067         conn._http_vsn = self._http_vsn
1068         conn._http_vsn_str = self._http_vsn_str
1069
1070         self.file = None
1071
1072     def connect(self, host=None, port=None):
1073         "Accept arguments to set the host/port, since the superclass doesn't."
1074
1075         if host is not None:
1076             self._conn._set_hostport(host, port)
1077         self._conn.connect()
1078
1079     def getfile(self):
1080         "Provide a getfile, since the superclass' does not use this concept."
1081         return self.file
1082
1083     def getreply(self, buffering=False):
1084         """Compat definition since superclass does not define it.
1085
1086         Returns a tuple consisting of:
1087         - server status code (e.g. '200' if all goes well)
1088         - server "reason" corresponding to status code
1089         - any RFC822 headers in the response from the server
1090         """
1091         try:
1092             if not buffering:
1093                 response = self._conn.getresponse()
1094             else:
1095                 #only add this keyword if non-default for compatibility
1096                 #with other connection classes
1097                 response = self._conn.getresponse(buffering)
1098         except BadStatusLine, e:
1099             ### hmm. if getresponse() ever closes the socket on a bad request,
1100             ### then we are going to have problems with self.sock
1101
1102             ### should we keep this behavior? do people use it?
1103             # keep the socket open (as a file), and return it
1104             self.file = self._conn.sock.makefile('rb', 0)
1105
1106             # close our socket -- we want to restart after any protocol error
1107             self.close()
1108
1109             self.headers = None
1110             return -1, e.line, None
1111
1112         self.headers = response.msg
1113         self.file = response.fp
1114         return response.status, response.reason, response.msg
1115
1116     def close(self):
1117         self._conn.close()
1118
1119         # note that self.file == response.fp, which gets closed by the
1120         # superclass. just clear the object ref here.
1121         ### hmm. messy. if status==-1, then self.file is owned by us.
1122         ### well... we aren't explicitly closing, but losing this ref will
1123         ### do it
1124         self.file = None
1125
1126 try:
1127     import ssl
1128 except ImportError:
1129     pass
1130 else:
1131     class HTTPSConnection(HTTPConnection):
1132         "This class allows communication via SSL."
1133
1134         default_port = HTTPS_PORT
1135
1136         def __init__(self, host, port=None, key_file=None, cert_file=None,
1137                      strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1138                      source_address=None):
1139             HTTPConnection.__init__(self, host, port, strict, timeout,
1140                                     source_address)
1141             self.key_file = key_file
1142             self.cert_file = cert_file
1143
1144         def connect(self):
1145             "Connect to a host on a given (SSL) port."
1146
1147             sock = socket.create_connection((self.host, self.port),
1148                                             self.timeout, self.source_address)
1149             if self._tunnel_host:
1150                 self.sock = sock
1151                 self._tunnel()
1152             self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1153
1154     __all__.append("HTTPSConnection")
1155
1156     class HTTPS(HTTP):
1157         """Compatibility with 1.5 httplib interface
1158
1159         Python 1.5.2 did not have an HTTPS class, but it defined an
1160         interface for sending http requests that is also useful for
1161         https.
1162         """
1163
1164         _connection_class = HTTPSConnection
1165
1166         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1167                      strict=None):
1168             # provide a default host, pass the X509 cert info
1169
1170             # urf. compensate for bad input.
1171             if port == 0:
1172                 port = None
1173             self._setup(self._connection_class(host, port, key_file,
1174                                                cert_file, strict))
1175
1176             # we never actually use these for anything, but we keep them
1177             # here for compatibility with post-1.5.2 CVS.
1178             self.key_file = key_file
1179             self.cert_file = cert_file
1180
1181
1182     def FakeSocket (sock, sslobj):
1183         warnings.warn("FakeSocket is deprecated, and won't be in 3.x.  " +
1184                       "Use the result of ssl.wrap_socket() directly instead.",
1185                       DeprecationWarning, stacklevel=2)
1186         return sslobj
1187
1188
1189 class HTTPException(Exception):
1190     # Subclasses that define an __init__ must call Exception.__init__
1191     # or define self.args.  Otherwise, str() will fail.
1192     pass
1193
1194 class NotConnected(HTTPException):
1195     pass
1196
1197 class InvalidURL(HTTPException):
1198     pass
1199
1200 class UnknownProtocol(HTTPException):
1201     def __init__(self, version):
1202         self.args = version,
1203         self.version = version
1204
1205 class UnknownTransferEncoding(HTTPException):
1206     pass
1207
1208 class UnimplementedFileMode(HTTPException):
1209     pass
1210
1211 class IncompleteRead(HTTPException):
1212     def __init__(self, partial, expected=None):
1213         self.args = partial,
1214         self.partial = partial
1215         self.expected = expected
1216     def __repr__(self):
1217         if self.expected is not None:
1218             e = ', %i more expected' % self.expected
1219         else:
1220             e = ''
1221         return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1222     def __str__(self):
1223         return repr(self)
1224
1225 class ImproperConnectionState(HTTPException):
1226     pass
1227
1228 class CannotSendRequest(ImproperConnectionState):
1229     pass
1230
1231 class CannotSendHeader(ImproperConnectionState):
1232     pass
1233
1234 class ResponseNotReady(ImproperConnectionState):
1235     pass
1236
1237 class BadStatusLine(HTTPException):
1238     def __init__(self, line):
1239         self.args = line,
1240         self.line = line
1241
1242 # for backwards compatibility
1243 error = HTTPException
1244
1245 class LineAndFileWrapper:
1246     """A limited file-like object for HTTP/0.9 responses."""
1247
1248     # The status-line parsing code calls readline(), which normally
1249     # get the HTTP status line.  For a 0.9 response, however, this is
1250     # actually the first line of the body!  Clients need to get a
1251     # readable file object that contains that line.
1252
1253     def __init__(self, line, file):
1254         self._line = line
1255         self._file = file
1256         self._line_consumed = 0
1257         self._line_offset = 0
1258         self._line_left = len(line)
1259
1260     def __getattr__(self, attr):
1261         return getattr(self._file, attr)
1262
1263     def _done(self):
1264         # called when the last byte is read from the line.  After the
1265         # call, all read methods are delegated to the underlying file
1266         # object.
1267         self._line_consumed = 1
1268         self.read = self._file.read
1269         self.readline = self._file.readline
1270         self.readlines = self._file.readlines
1271
1272     def read(self, amt=None):
1273         if self._line_consumed:
1274             return self._file.read(amt)
1275         assert self._line_left
1276         if amt is None or amt > self._line_left:
1277             s = self._line[self._line_offset:]
1278             self._done()
1279             if amt is None:
1280                 return s + self._file.read()
1281             else:
1282                 return s + self._file.read(amt - len(s))
1283         else:
1284             assert amt <= self._line_left
1285             i = self._line_offset
1286             j = i + amt
1287             s = self._line[i:j]
1288             self._line_offset = j
1289             self._line_left -= amt
1290             if self._line_left == 0:
1291                 self._done()
1292             return s
1293
1294     def readline(self):
1295         if self._line_consumed:
1296             return self._file.readline()
1297         assert self._line_left
1298         s = self._line[self._line_offset:]
1299         self._done()
1300         return s
1301
1302     def readlines(self, size=None):
1303         if self._line_consumed:
1304             return self._file.readlines(size)
1305         assert self._line_left
1306         L = [self._line[self._line_offset:]]
1307         self._done()
1308         if size is None:
1309             return L + self._file.readlines()
1310         else:
1311             return L + self._file.readlines(size)
1312
1313 def test():
1314     """Test this module.
1315
1316     A hodge podge of tests collected here, because they have too many
1317     external dependencies for the regular test suite.
1318     """
1319
1320     import sys
1321     import getopt
1322     opts, args = getopt.getopt(sys.argv[1:], 'd')
1323     dl = 0
1324     for o, a in opts:
1325         if o == '-d': dl = dl + 1
1326     host = 'www.python.org'
1327     selector = '/'
1328     if args[0:]: host = args[0]
1329     if args[1:]: selector = args[1]
1330     h = HTTP()
1331     h.set_debuglevel(dl)
1332     h.connect(host)
1333     h.putrequest('GET', selector)
1334     h.endheaders()
1335     status, reason, headers = h.getreply()
1336     print 'status =', status
1337     print 'reason =', reason
1338     print "read", len(h.getfile().read())
1339     print
1340     if headers:
1341         for header in headers.headers: print header.strip()
1342     print
1343
1344     # minimal test that code to extract host from url works
1345     class HTTP11(HTTP):
1346         _http_vsn = 11
1347         _http_vsn_str = 'HTTP/1.1'
1348
1349     h = HTTP11('www.python.org')
1350     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1351     h.endheaders()
1352     h.getreply()
1353     h.close()
1354
1355     try:
1356         import ssl
1357     except ImportError:
1358         pass
1359     else:
1360
1361         for host, selector in (('sourceforge.net', '/projects/python'),
1362                                ):
1363             print "https://%s%s" % (host, selector)
1364             hs = HTTPS()
1365             hs.set_debuglevel(dl)
1366             hs.connect(host)
1367             hs.putrequest('GET', selector)
1368             hs.endheaders()
1369             status, reason, headers = hs.getreply()
1370             print 'status =', status
1371             print 'reason =', reason
1372             print "read", len(hs.getfile().read())
1373             print
1374             if headers:
1375                 for header in headers.headers: print header.strip()
1376             print
1377
1378 if __name__ == '__main__':
1379     test()