1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
70 from sys
import py3kwarning
71 from urlparse
import urlsplit
73 with warnings
.catch_warnings():
75 warnings
.filterwarnings("ignore", ".*mimetools has been removed",
80 from cStringIO
import StringIO
82 from StringIO
import StringIO
84 __all__
= ["HTTP", "HTTPResponse", "HTTPConnection",
85 "HTTPException", "NotConnected", "UnknownProtocol",
86 "UnknownTransferEncoding", "UnimplementedFileMode",
87 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
88 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
89 "BadStatusLine", "error", "responses"]
98 _CS_REQ_STARTED
= 'Request-started'
99 _CS_REQ_SENT
= 'Request-sent'
104 SWITCHING_PROTOCOLS
= 101
111 NON_AUTHORITATIVE_INFORMATION
= 203
114 PARTIAL_CONTENT
= 206
119 MULTIPLE_CHOICES
= 300
120 MOVED_PERMANENTLY
= 301
125 TEMPORARY_REDIRECT
= 307
130 PAYMENT_REQUIRED
= 402
133 METHOD_NOT_ALLOWED
= 405
135 PROXY_AUTHENTICATION_REQUIRED
= 407
136 REQUEST_TIMEOUT
= 408
139 LENGTH_REQUIRED
= 411
140 PRECONDITION_FAILED
= 412
141 REQUEST_ENTITY_TOO_LARGE
= 413
142 REQUEST_URI_TOO_LONG
= 414
143 UNSUPPORTED_MEDIA_TYPE
= 415
144 REQUESTED_RANGE_NOT_SATISFIABLE
= 416
145 EXPECTATION_FAILED
= 417
146 UNPROCESSABLE_ENTITY
= 422
148 FAILED_DEPENDENCY
= 424
149 UPGRADE_REQUIRED
= 426
152 INTERNAL_SERVER_ERROR
= 500
153 NOT_IMPLEMENTED
= 501
155 SERVICE_UNAVAILABLE
= 503
156 GATEWAY_TIMEOUT
= 504
157 HTTP_VERSION_NOT_SUPPORTED
= 505
158 INSUFFICIENT_STORAGE
= 507
161 # Mapping status codes to official W3C names
164 101: 'Switching Protocols',
169 203: 'Non-Authoritative Information',
171 205: 'Reset Content',
172 206: 'Partial Content',
174 300: 'Multiple Choices',
175 301: 'Moved Permanently',
181 307: 'Temporary Redirect',
185 402: 'Payment Required',
188 405: 'Method Not Allowed',
189 406: 'Not Acceptable',
190 407: 'Proxy Authentication Required',
191 408: 'Request Timeout',
194 411: 'Length Required',
195 412: 'Precondition Failed',
196 413: 'Request Entity Too Large',
197 414: 'Request-URI Too Long',
198 415: 'Unsupported Media Type',
199 416: 'Requested Range Not Satisfiable',
200 417: 'Expectation Failed',
202 500: 'Internal Server Error',
203 501: 'Not Implemented',
205 503: 'Service Unavailable',
206 504: 'Gateway Timeout',
207 505: 'HTTP Version Not Supported',
210 # maximal amount of data to read at one time in _safe_read
213 class HTTPMessage(mimetools
.Message
):
215 def addheader(self
, key
, value
):
216 """Add header for field key handling repeats."""
217 prev
= self
.dict.get(key
)
219 self
.dict[key
] = value
221 combined
= ", ".join((prev
, value
))
222 self
.dict[key
] = combined
224 def addcontinue(self
, key
, more
):
225 """Add more field data from a continuation line."""
226 prev
= self
.dict[key
]
227 self
.dict[key
] = prev
+ "\n " + more
229 def readheaders(self
):
230 """Read header lines.
232 Read header lines up to the entirely blank line that terminates them.
233 The (normally blank) line that ends the headers is skipped, but not
234 included in the returned list. If a non-header line ends the headers,
235 (which is an error), an attempt is made to backspace over it; it is
236 never included in the returned list.
238 The variable self.status is set to the empty string if all went well,
239 otherwise it is an error message. The variable self.headers is a
240 completely uninterpreted list of lines contained in the header (so
241 printing them will reproduce the header exactly as it appears in the
244 If multiple header fields with the same name occur, they are combined
245 according to the rules in RFC 2616 sec 4.2:
247 Appending each subsequent field-value to the first, each separated
248 by a comma. The order in which header fields with the same field-name
249 are received is significant to the interpretation of the combined
252 # XXX The implementation overrides the readheaders() method of
253 # rfc822.Message. The base class design isn't amenable to
254 # customized behavior here so the method here is a copy of the
255 # base class code with a few small changes.
259 self
.headers
= hlist
= []
263 startofline
= unread
= tell
= None
264 if hasattr(self
.fp
, 'unread'):
265 unread
= self
.fp
.unread
273 startofline
= tell
= None
275 line
= self
.fp
.readline()
277 self
.status
= 'EOF in headers'
279 # Skip unix From name time lines
280 if firstline
and line
.startswith('From '):
281 self
.unixfrom
= self
.unixfrom
+ line
284 if headerseen
and line
[0] in ' \t':
285 # XXX Not sure if continuation lines are handled properly
286 # for http and/or for repeating headers
287 # It's a continuation line.
289 self
.addcontinue(headerseen
, line
.strip())
291 elif self
.iscomment(line
):
292 # It's a comment. Ignore it.
294 elif self
.islast(line
):
295 # Note! No pushback here! The delimiter line gets eaten.
297 headerseen
= self
.isheader(line
)
299 # It's a legal header line, save it.
301 self
.addheader(headerseen
, line
[len(headerseen
)+1:].strip())
304 # It's not a header line; throw it back and stop here.
306 self
.status
= 'No headers'
308 self
.status
= 'Non-header line where header expected'
309 # Try to undo the read.
313 self
.fp
.seek(startofline
)
315 self
.status
= self
.status
+ '; bad seek'
320 # strict: If true, raise BadStatusLine if the status line can't be
321 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
322 # false because it prevents clients from talking to HTTP/0.9
323 # servers. Note that a response with a sufficiently corrupted
324 # status line will look like an HTTP/0.9 response.
326 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328 def __init__(self
, sock
, debuglevel
=0, strict
=0, method
=None, buffering
=False):
330 # The caller won't be using any sock.recv() calls, so buffering
331 # is fine and recommended for performance.
332 self
.fp
= sock
.makefile('rb')
334 # The buffer size is specified as zero, because the headers of
335 # the response are read with readline(). If the reads were
336 # buffered the readline() calls could consume some of the
337 # response, which make be read via a recv() on the underlying
339 self
.fp
= sock
.makefile('rb', 0)
340 self
.debuglevel
= debuglevel
342 self
._method
= method
346 # from the Status-Line of the response
347 self
.version
= _UNKNOWN
# HTTP-Version
348 self
.status
= _UNKNOWN
# Status-Code
349 self
.reason
= _UNKNOWN
# Reason-Phrase
351 self
.chunked
= _UNKNOWN
# is "chunked" being used?
352 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
353 self
.length
= _UNKNOWN
# number of bytes left in response
354 self
.will_close
= _UNKNOWN
# conn will close at end of response
356 def _read_status(self
):
357 # Initialize with Simple-Response defaults
358 line
= self
.fp
.readline()
359 if self
.debuglevel
> 0:
360 print "reply:", repr(line
)
362 # Presumably, the server closed the connection before
363 # sending a valid response.
364 raise BadStatusLine(line
)
366 [version
, status
, reason
] = line
.split(None, 2)
369 [version
, status
] = line
.split(None, 1)
372 # empty version will cause next test to fail and status
373 # will be treated as 0.9 response.
375 if not version
.startswith('HTTP/'):
378 raise BadStatusLine(line
)
380 # assume it's a Simple-Response from an 0.9 server
381 self
.fp
= LineAndFileWrapper(line
, self
.fp
)
382 return "HTTP/0.9", 200, ""
384 # The status code is a three-digit number
387 if status
< 100 or status
> 999:
388 raise BadStatusLine(line
)
390 raise BadStatusLine(line
)
391 return version
, status
, reason
394 if self
.msg
is not None:
395 # we've already started reading the response
398 # read until we get a non-100 response
400 version
, status
, reason
= self
._read
_status
()
401 if status
!= CONTINUE
:
403 # skip the header from the 100 response
405 skip
= self
.fp
.readline().strip()
408 if self
.debuglevel
> 0:
409 print "header:", skip
412 self
.reason
= reason
.strip()
413 if version
== 'HTTP/1.0':
415 elif version
.startswith('HTTP/1.'):
416 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
417 elif version
== 'HTTP/0.9':
420 raise UnknownProtocol(version
)
422 if self
.version
== 9:
426 self
.msg
= HTTPMessage(StringIO())
429 self
.msg
= HTTPMessage(self
.fp
, 0)
430 if self
.debuglevel
> 0:
431 for hdr
in self
.msg
.headers
:
432 print "header:", hdr
,
434 # don't let the msg keep an fp
437 # are we using the chunked-style of transfer encoding?
438 tr_enc
= self
.msg
.getheader('transfer-encoding')
439 if tr_enc
and tr_enc
.lower() == "chunked":
441 self
.chunk_left
= None
445 # will the connection close at the end of the response?
446 self
.will_close
= self
._check
_close
()
448 # do we have a Content-Length?
449 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
450 length
= self
.msg
.getheader('content-length')
451 if length
and not self
.chunked
:
453 self
.length
= int(length
)
457 if self
.length
< 0: # ignore nonsensical negative lengths
462 # does the body have a fixed length? (of zero)
463 if (status
== NO_CONTENT
or status
== NOT_MODIFIED
or
464 100 <= status
< 200 or # 1xx codes
465 self
._method
== 'HEAD'):
468 # if the connection remains open, and we aren't using chunked, and
469 # a content-length was not provided, then assume that the connection
471 if not self
.will_close
and \
472 not self
.chunked
and \
476 def _check_close(self
):
477 conn
= self
.msg
.getheader('connection')
478 if self
.version
== 11:
479 # An HTTP/1.1 proxy is assumed to stay open unless
481 conn
= self
.msg
.getheader('connection')
482 if conn
and "close" in conn
.lower():
486 # Some HTTP/1.0 implementations have support for persistent
487 # connections, using rules different than HTTP/1.1.
489 # For older HTTP, Keep-Alive indicates persistent connection.
490 if self
.msg
.getheader('keep-alive'):
493 # At least Akamai returns a "Connection: Keep-Alive" header,
494 # which was supposed to be sent by the client.
495 if conn
and "keep-alive" in conn
.lower():
498 # Proxy-Connection is a netscape hack.
499 pconn
= self
.msg
.getheader('proxy-connection')
500 if pconn
and "keep-alive" in pconn
.lower():
503 # otherwise, assume it will close
512 # NOTE: it is possible that we will not ever call self.close(). This
513 # case occurs when will_close is TRUE, length is None, and we
514 # read up to the last byte, but NOT past it.
516 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
517 # called, meaning self.isclosed() is meaningful.
518 return self
.fp
is None
520 # XXX It would be nice to have readline and __iter__ for this, too.
522 def read(self
, amt
=None):
527 return self
._read
_chunked
(amt
)
531 if self
.length
is None:
534 s
= self
._safe
_read
(self
.length
)
536 self
.close() # we read everything
539 if self
.length
is not None:
540 if amt
> self
.length
:
541 # clip the read to the "end of response"
544 # we do not use _safe_read() here because this may be a .will_close
545 # connection, and the user is reading more bytes than will be provided
546 # (for example, reading in 1k chunks)
547 s
= self
.fp
.read(amt
)
548 if self
.length
is not None:
549 self
.length
-= len(s
)
554 def _read_chunked(self
, amt
):
555 assert self
.chunked
!= _UNKNOWN
556 chunk_left
= self
.chunk_left
559 if chunk_left
is None:
560 line
= self
.fp
.readline()
563 line
= line
[:i
] # strip chunk-extensions
565 chunk_left
= int(line
, 16)
567 # close the connection as protocol synchronisation is
570 raise IncompleteRead(''.join(value
))
574 value
.append(self
._safe
_read
(chunk_left
))
575 elif amt
< chunk_left
:
576 value
.append(self
._safe
_read
(amt
))
577 self
.chunk_left
= chunk_left
- amt
578 return ''.join(value
)
579 elif amt
== chunk_left
:
580 value
.append(self
._safe
_read
(amt
))
581 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
582 self
.chunk_left
= None
583 return ''.join(value
)
585 value
.append(self
._safe
_read
(chunk_left
))
588 # we read the whole chunk, get another
589 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
592 # read and discard trailer up to the CRLF terminator
593 ### note: we shouldn't have any trailers!
595 line
= self
.fp
.readline()
597 # a vanishingly small number of sites EOF without
598 # sending the trailer
603 # we read everything; close the "file"
606 return ''.join(value
)
608 def _safe_read(self
, amt
):
609 """Read the number of bytes requested, compensating for partial reads.
611 Normally, we have a blocking socket, but a read() can be interrupted
612 by a signal (resulting in a partial read).
614 Note that we cannot distinguish between EOF and an interrupt when zero
615 bytes have been read. IncompleteRead() will be raised in this
618 This function should be used when <amt> bytes "should" be present for
619 reading. If the bytes are truly not available (due to EOF), then the
620 IncompleteRead exception can be used to detect the problem.
622 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
623 # return less than x bytes unless EOF is encountered. It now handles
624 # signal interruptions (socket.error EINTR) internally. This code
625 # never caught that exception anyways. It seems largely pointless.
626 # self.fp.read(amt) will work fine.
629 chunk
= self
.fp
.read(min(amt
, MAXAMOUNT
))
631 raise IncompleteRead(''.join(s
), amt
)
636 def getheader(self
, name
, default
=None):
638 raise ResponseNotReady()
639 return self
.msg
.getheader(name
, default
)
641 def getheaders(self
):
642 """Return list of (header, value) tuples."""
644 raise ResponseNotReady()
645 return self
.msg
.items()
648 class HTTPConnection
:
651 _http_vsn_str
= 'HTTP/1.1'
653 response_class
= HTTPResponse
654 default_port
= HTTP_PORT
659 def __init__(self
, host
, port
=None, strict
=None,
660 timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
):
661 self
.timeout
= timeout
664 self
.__response
= None
665 self
.__state
= _CS_IDLE
667 self
._tunnel
_host
= None
668 self
._tunnel
_port
= None
670 self
._set
_hostport
(host
, port
)
671 if strict
is not None:
674 def set_tunnel(self
, host
, port
=None):
675 """ Sets up the host and the port for the HTTP CONNECT Tunnelling."""
676 self
._tunnel
_host
= host
677 self
._tunnel
_port
= port
679 def _set_hostport(self
, host
, port
):
682 j
= host
.rfind(']') # ipv6 addresses have [...]
685 port
= int(host
[i
+1:])
687 raise InvalidURL("nonnumeric port: '%s'" % host
[i
+1:])
690 port
= self
.default_port
691 if host
and host
[0] == '[' and host
[-1] == ']':
696 def set_debuglevel(self
, level
):
697 self
.debuglevel
= level
700 self
._set
_hostport
(self
._tunnel
_host
, self
._tunnel
_port
)
701 self
.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self
.host
, self
.port
))
702 response
= self
.response_class(self
.sock
, strict
= self
.strict
,
703 method
= self
._method
)
704 (version
, code
, message
) = response
._read
_status
()
708 raise socket
.error
, "Tunnel connection failed: %d %s" % (code
,
711 line
= response
.fp
.readline()
712 if line
== '\r\n': break
716 """Connect to the host and port specified in __init__."""
717 self
.sock
= socket
.create_connection((self
.host
,self
.port
),
720 if self
._tunnel
_host
:
724 """Close the connection to the HTTP server."""
726 self
.sock
.close() # close it manually... there may be other refs
729 self
.__response
.close()
730 self
.__response
= None
731 self
.__state
= _CS_IDLE
734 """Send `str' to the server."""
735 if self
.sock
is None:
741 # send the data to the server. if we get a broken pipe, then close
742 # the socket. we want to reconnect when somebody tries to send again.
744 # NOTE: we DO propagate the error, though, because we cannot simply
745 # ignore the error... the caller will know if they can retry.
746 if self
.debuglevel
> 0:
747 print "send:", repr(str)
750 if hasattr(str,'read') :
751 if self
.debuglevel
> 0: print "sendIng a read()able"
752 data
=str.read(blocksize
)
754 self
.sock
.sendall(data
)
755 data
=str.read(blocksize
)
757 self
.sock
.sendall(str)
758 except socket
.error
, v
:
759 if v
[0] == 32: # Broken pipe
763 def _output(self
, s
):
764 """Add a line of output to the current request buffer.
766 Assumes that the line does *not* end with \\r\\n.
768 self
._buffer
.append(s
)
770 def _send_output(self
, message_body
=None):
771 """Send the currently buffered request and clear the buffer.
773 Appends an extra \\r\\n to the buffer.
774 A message_body may be specified, to be appended to the request.
776 self
._buffer
.extend(("", ""))
777 msg
= "\r\n".join(self
._buffer
)
779 # If msg and message_body are sent in a single send() call,
780 # it will avoid performance problems caused by the interaction
781 # between delayed ack and the Nagle algorithim.
782 if isinstance(message_body
, str):
786 if message_body
is not None:
787 #message_body was not a string (i.e. it is a file) and
788 #we must run the risk of Nagle
789 self
.send(message_body
)
791 def putrequest(self
, method
, url
, skip_host
=0, skip_accept_encoding
=0):
792 """Send a request to the server.
794 `method' specifies an HTTP request method, e.g. 'GET'.
795 `url' specifies the object being requested, e.g. '/index.html'.
796 `skip_host' if True does not add automatically a 'Host:' header
797 `skip_accept_encoding' if True does not add automatically an
798 'Accept-Encoding:' header
801 # if a prior response has been completed, then forget about it.
802 if self
.__response
and self
.__response
.isclosed():
803 self
.__response
= None
806 # in certain cases, we cannot issue another request on this connection.
808 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
809 # 2) a response to a previous request has signalled that it is going
810 # to close the connection upon completion.
811 # 3) the headers for the previous response have not been read, thus
812 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
814 # if there is no prior response, then we can request at will.
816 # if point (2) is true, then we will have passed the socket to the
817 # response (effectively meaning, "there is no prior response"), and
818 # will open a new one when a new request is made.
820 # Note: if a prior response exists, then we *can* start a new request.
821 # We are not allowed to begin fetching the response to this new
822 # request, however, until that prior response is complete.
824 if self
.__state
== _CS_IDLE
:
825 self
.__state
= _CS_REQ_STARTED
827 raise CannotSendRequest()
829 # Save the method we use, we need it later in the response phase
830 self
._method
= method
833 str = '%s %s %s' % (method
, url
, self
._http
_vsn
_str
)
837 if self
._http
_vsn
== 11:
838 # Issue some standard headers for better HTTP/1.1 compliance
841 # this header is issued *only* for HTTP/1.1
842 # connections. more specifically, this means it is
843 # only issued when the client uses the new
844 # HTTPConnection() class. backwards-compat clients
845 # will be using HTTP/1.0 and those clients may be
846 # issuing this header themselves. we should NOT issue
847 # it twice; some web servers (such as Apache) barf
848 # when they see two Host: headers
850 # If we need a non-standard port,include it in the
851 # header. If the request is going through a proxy,
852 # but the host of the actual URL, not the host of the
856 if url
.startswith('http'):
857 nil
, netloc
, nil
, nil
, nil
= urlsplit(url
)
861 netloc_enc
= netloc
.encode("ascii")
862 except UnicodeEncodeError:
863 netloc_enc
= netloc
.encode("idna")
864 self
.putheader('Host', netloc_enc
)
867 host_enc
= self
.host
.encode("ascii")
868 except UnicodeEncodeError:
869 host_enc
= self
.host
.encode("idna")
870 if self
.port
== self
.default_port
:
871 self
.putheader('Host', host_enc
)
873 self
.putheader('Host', "%s:%s" % (host_enc
, self
.port
))
875 # note: we are assuming that clients will not attempt to set these
876 # headers since *this* library must deal with the
877 # consequences. this also means that when the supporting
878 # libraries are updated to recognize other forms, then this
879 # code should be changed (removed or updated).
881 # we only want a Content-Encoding of "identity" since we don't
882 # support encodings such as x-gzip or x-deflate.
883 if not skip_accept_encoding
:
884 self
.putheader('Accept-Encoding', 'identity')
886 # we can accept "chunked" Transfer-Encodings, but no others
887 # NOTE: no TE header implies *only* "chunked"
888 #self.putheader('TE', 'chunked')
890 # if TE is supplied in the header, then it must appear in a
892 #self.putheader('Connection', 'TE')
895 # For HTTP/1.0, the server will assume "not chunked"
898 def putheader(self
, header
, *values
):
899 """Send a request header line to the server.
901 For example: h.putheader('Accept', 'text/html')
903 if self
.__state
!= _CS_REQ_STARTED
:
904 raise CannotSendHeader()
906 str = '%s: %s' % (header
, '\r\n\t'.join(values
))
909 def endheaders(self
, message_body
=None):
910 """Indicate that the last header line has been sent to the server.
912 This method sends the request to the server. The optional
913 message_body argument can be used to pass message body
914 associated with the request. The message body will be sent in
915 the same packet as the message headers if possible. The
916 message_body should be a string.
918 if self
.__state
== _CS_REQ_STARTED
:
919 self
.__state
= _CS_REQ_SENT
921 raise CannotSendHeader()
922 self
._send
_output
(message_body
)
924 def request(self
, method
, url
, body
=None, headers
={}):
925 """Send a complete request to the server."""
928 self
._send
_request
(method
, url
, body
, headers
)
929 except socket
.error
, v
:
930 # trap 'Broken pipe' if we're allowed to automatically reconnect
931 if v
[0] != 32 or not self
.auto_open
:
934 self
._send
_request
(method
, url
, body
, headers
)
936 def _set_content_length(self
, body
):
937 # Set the content-length based on the body.
940 thelen
= str(len(body
))
941 except TypeError, te
:
942 # If this is a file-like object, try to
943 # fstat its file descriptor
946 thelen
= str(os
.fstat(body
.fileno()).st_size
)
947 except (AttributeError, OSError):
948 # Don't send a length if this failed
949 if self
.debuglevel
> 0: print "Cannot stat!!"
951 if thelen
is not None:
952 self
.putheader('Content-Length', thelen
)
954 def _send_request(self
, method
, url
, body
, headers
):
955 # honour explicitly requested Host: and Accept-Encoding headers
956 header_names
= dict.fromkeys([k
.lower() for k
in headers
])
958 if 'host' in header_names
:
959 skips
['skip_host'] = 1
960 if 'accept-encoding' in header_names
:
961 skips
['skip_accept_encoding'] = 1
963 self
.putrequest(method
, url
, **skips
)
965 if body
and ('content-length' not in header_names
):
966 self
._set
_content
_length
(body
)
967 for hdr
, value
in headers
.iteritems():
968 self
.putheader(hdr
, value
)
969 self
.endheaders(body
)
971 def getresponse(self
, buffering
=False):
972 "Get the response from the server."
974 # if a prior response has been completed, then forget about it.
975 if self
.__response
and self
.__response
.isclosed():
976 self
.__response
= None
979 # if a prior response exists, then it must be completed (otherwise, we
980 # cannot read this response's header to determine the connection-close
983 # note: if a prior response existed, but was connection-close, then the
984 # socket and response were made independent of this HTTPConnection
985 # object since a new request requires that we open a whole new
988 # this means the prior response had one of two states:
989 # 1) will_close: this connection was reset and the prior socket and
990 # response operate independently
991 # 2) persistent: the response was retained and we await its
992 # isclosed() status to become true.
994 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
995 raise ResponseNotReady()
998 kwds
= {"strict":self
.strict
, "method":self
._method
}
999 if self
.debuglevel
> 0:
1000 args
+= (self
.debuglevel
,)
1002 #only add this keyword if non-default, for compatibility with
1003 #other response_classes.
1004 kwds
["buffering"] = True;
1005 response
= self
.response_class(*args
, **kwds
)
1008 assert response
.will_close
!= _UNKNOWN
1009 self
.__state
= _CS_IDLE
1011 if response
.will_close
:
1012 # this effectively passes the connection to the response
1015 # remember this, so we can tell when it is complete
1016 self
.__response
= response
1022 "Compatibility class with httplib.py from 1.5."
1025 _http_vsn_str
= 'HTTP/1.0'
1029 _connection_class
= HTTPConnection
1031 def __init__(self
, host
='', port
=None, strict
=None):
1032 "Provide a default host, since the superclass requires one."
1034 # some joker passed 0 explicitly, meaning default port
1038 # Note that we may pass an empty string as the host; this will throw
1039 # an error when we attempt to connect. Presumably, the client code
1040 # will call connect before then, with a proper host.
1041 self
._setup
(self
._connection
_class
(host
, port
, strict
))
1043 def _setup(self
, conn
):
1046 # set up delegation to flesh out interface
1047 self
.send
= conn
.send
1048 self
.putrequest
= conn
.putrequest
1049 self
.putheader
= conn
.putheader
1050 self
.endheaders
= conn
.endheaders
1051 self
.set_debuglevel
= conn
.set_debuglevel
1053 conn
._http
_vsn
= self
._http
_vsn
1054 conn
._http
_vsn
_str
= self
._http
_vsn
_str
1058 def connect(self
, host
=None, port
=None):
1059 "Accept arguments to set the host/port, since the superclass doesn't."
1061 if host
is not None:
1062 self
._conn
._set
_hostport
(host
, port
)
1063 self
._conn
.connect()
1066 "Provide a getfile, since the superclass' does not use this concept."
1069 def getreply(self
, buffering
=False):
1070 """Compat definition since superclass does not define it.
1072 Returns a tuple consisting of:
1073 - server status code (e.g. '200' if all goes well)
1074 - server "reason" corresponding to status code
1075 - any RFC822 headers in the response from the server
1079 response
= self
._conn
.getresponse()
1081 #only add this keyword if non-default for compatibility
1082 #with other connection classes
1083 response
= self
._conn
.getresponse(buffering
)
1084 except BadStatusLine
, e
:
1085 ### hmm. if getresponse() ever closes the socket on a bad request,
1086 ### then we are going to have problems with self.sock
1088 ### should we keep this behavior? do people use it?
1089 # keep the socket open (as a file), and return it
1090 self
.file = self
._conn
.sock
.makefile('rb', 0)
1092 # close our socket -- we want to restart after any protocol error
1096 return -1, e
.line
, None
1098 self
.headers
= response
.msg
1099 self
.file = response
.fp
1100 return response
.status
, response
.reason
, response
.msg
1105 # note that self.file == response.fp, which gets closed by the
1106 # superclass. just clear the object ref here.
1107 ### hmm. messy. if status==-1, then self.file is owned by us.
1108 ### well... we aren't explicitly closing, but losing this ref will
1117 class HTTPSConnection(HTTPConnection
):
1118 "This class allows communication via SSL."
1120 default_port
= HTTPS_PORT
1122 def __init__(self
, host
, port
=None, key_file
=None, cert_file
=None,
1123 strict
=None, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
):
1124 HTTPConnection
.__init
__(self
, host
, port
, strict
, timeout
)
1125 self
.key_file
= key_file
1126 self
.cert_file
= cert_file
1129 "Connect to a host on a given (SSL) port."
1131 sock
= socket
.create_connection((self
.host
, self
.port
), self
.timeout
)
1132 if self
._tunnel
_host
:
1135 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
)
1137 __all__
.append("HTTPSConnection")
1140 """Compatibility with 1.5 httplib interface
1142 Python 1.5.2 did not have an HTTPS class, but it defined an
1143 interface for sending http requests that is also useful for
1147 _connection_class
= HTTPSConnection
1149 def __init__(self
, host
='', port
=None, key_file
=None, cert_file
=None,
1151 # provide a default host, pass the X509 cert info
1153 # urf. compensate for bad input.
1156 self
._setup
(self
._connection
_class
(host
, port
, key_file
,
1159 # we never actually use these for anything, but we keep them
1160 # here for compatibility with post-1.5.2 CVS.
1161 self
.key_file
= key_file
1162 self
.cert_file
= cert_file
1165 def FakeSocket (sock
, sslobj
):
1166 warnings
.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1167 "Use the result of ssl.wrap_socket() directly instead.",
1168 DeprecationWarning, stacklevel
=2)
1172 class HTTPException(Exception):
1173 # Subclasses that define an __init__ must call Exception.__init__
1174 # or define self.args. Otherwise, str() will fail.
1177 class NotConnected(HTTPException
):
1180 class InvalidURL(HTTPException
):
1183 class UnknownProtocol(HTTPException
):
1184 def __init__(self
, version
):
1185 self
.args
= version
,
1186 self
.version
= version
1188 class UnknownTransferEncoding(HTTPException
):
1191 class UnimplementedFileMode(HTTPException
):
1194 class IncompleteRead(HTTPException
):
1195 def __init__(self
, partial
, expected
=None):
1196 self
.args
= partial
,
1197 self
.partial
= partial
1198 self
.expected
= expected
1200 if self
.expected
is not None:
1201 e
= ', %i more expected' % self
.expected
1204 return 'IncompleteRead(%i bytes read%s)' % (len(self
.partial
), e
)
1208 class ImproperConnectionState(HTTPException
):
1211 class CannotSendRequest(ImproperConnectionState
):
1214 class CannotSendHeader(ImproperConnectionState
):
1217 class ResponseNotReady(ImproperConnectionState
):
1220 class BadStatusLine(HTTPException
):
1221 def __init__(self
, line
):
1225 # for backwards compatibility
1226 error
= HTTPException
1228 class LineAndFileWrapper
:
1229 """A limited file-like object for HTTP/0.9 responses."""
1231 # The status-line parsing code calls readline(), which normally
1232 # get the HTTP status line. For a 0.9 response, however, this is
1233 # actually the first line of the body! Clients need to get a
1234 # readable file object that contains that line.
1236 def __init__(self
, line
, file):
1239 self
._line
_consumed
= 0
1240 self
._line
_offset
= 0
1241 self
._line
_left
= len(line
)
1243 def __getattr__(self
, attr
):
1244 return getattr(self
._file
, attr
)
1247 # called when the last byte is read from the line. After the
1248 # call, all read methods are delegated to the underlying file
1250 self
._line
_consumed
= 1
1251 self
.read
= self
._file
.read
1252 self
.readline
= self
._file
.readline
1253 self
.readlines
= self
._file
.readlines
1255 def read(self
, amt
=None):
1256 if self
._line
_consumed
:
1257 return self
._file
.read(amt
)
1258 assert self
._line
_left
1259 if amt
is None or amt
> self
._line
_left
:
1260 s
= self
._line
[self
._line
_offset
:]
1263 return s
+ self
._file
.read()
1265 return s
+ self
._file
.read(amt
- len(s
))
1267 assert amt
<= self
._line
_left
1268 i
= self
._line
_offset
1271 self
._line
_offset
= j
1272 self
._line
_left
-= amt
1273 if self
._line
_left
== 0:
1278 if self
._line
_consumed
:
1279 return self
._file
.readline()
1280 assert self
._line
_left
1281 s
= self
._line
[self
._line
_offset
:]
1285 def readlines(self
, size
=None):
1286 if self
._line
_consumed
:
1287 return self
._file
.readlines(size
)
1288 assert self
._line
_left
1289 L
= [self
._line
[self
._line
_offset
:]]
1292 return L
+ self
._file
.readlines()
1294 return L
+ self
._file
.readlines(size
)
1297 """Test this module.
1299 A hodge podge of tests collected here, because they have too many
1300 external dependencies for the regular test suite.
1305 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
1308 if o
== '-d': dl
= dl
+ 1
1309 host
= 'www.python.org'
1311 if args
[0:]: host
= args
[0]
1312 if args
[1:]: selector
= args
[1]
1314 h
.set_debuglevel(dl
)
1316 h
.putrequest('GET', selector
)
1318 status
, reason
, headers
= h
.getreply()
1319 print 'status =', status
1320 print 'reason =', reason
1321 print "read", len(h
.getfile().read())
1324 for header
in headers
.headers
: print header
.strip()
1327 # minimal test that code to extract host from url works
1330 _http_vsn_str
= 'HTTP/1.1'
1332 h
= HTTP11('www.python.org')
1333 h
.putrequest('GET', 'http://www.python.org/~jeremy/')
1344 for host
, selector
in (('sourceforge.net', '/projects/python'),
1346 print "https://%s%s" % (host
, selector
)
1348 hs
.set_debuglevel(dl
)
1350 hs
.putrequest('GET', selector
)
1352 status
, reason
, headers
= hs
.getreply()
1353 print 'status =', status
1354 print 'reason =', reason
1355 print "read", len(hs
.getfile().read())
1358 for header
in headers
.headers
: print header
.strip()
1361 if __name__
== '__main__':