1 """HTTP/1.1 client library
3 <intro stuff goes here>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
20 | ( putheader() )* endheaders()
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
29 | response.read() | putrequest()
31 Idle Req-started-unread-response
34 response.read() | | ( putheader() )* endheaders()
36 Request-started Req-sent-unread-response
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
69 from array
import array
71 from sys
import py3kwarning
72 from urlparse
import urlsplit
74 with warnings
.catch_warnings():
76 warnings
.filterwarnings("ignore", ".*mimetools has been removed",
81 from cStringIO
import StringIO
83 from StringIO
import StringIO
85 __all__
= ["HTTP", "HTTPResponse", "HTTPConnection",
86 "HTTPException", "NotConnected", "UnknownProtocol",
87 "UnknownTransferEncoding", "UnimplementedFileMode",
88 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
89 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
90 "BadStatusLine", "error", "responses"]
99 _CS_REQ_STARTED
= 'Request-started'
100 _CS_REQ_SENT
= 'Request-sent'
105 SWITCHING_PROTOCOLS
= 101
112 NON_AUTHORITATIVE_INFORMATION
= 203
115 PARTIAL_CONTENT
= 206
120 MULTIPLE_CHOICES
= 300
121 MOVED_PERMANENTLY
= 301
126 TEMPORARY_REDIRECT
= 307
131 PAYMENT_REQUIRED
= 402
134 METHOD_NOT_ALLOWED
= 405
136 PROXY_AUTHENTICATION_REQUIRED
= 407
137 REQUEST_TIMEOUT
= 408
140 LENGTH_REQUIRED
= 411
141 PRECONDITION_FAILED
= 412
142 REQUEST_ENTITY_TOO_LARGE
= 413
143 REQUEST_URI_TOO_LONG
= 414
144 UNSUPPORTED_MEDIA_TYPE
= 415
145 REQUESTED_RANGE_NOT_SATISFIABLE
= 416
146 EXPECTATION_FAILED
= 417
147 UNPROCESSABLE_ENTITY
= 422
149 FAILED_DEPENDENCY
= 424
150 UPGRADE_REQUIRED
= 426
153 INTERNAL_SERVER_ERROR
= 500
154 NOT_IMPLEMENTED
= 501
156 SERVICE_UNAVAILABLE
= 503
157 GATEWAY_TIMEOUT
= 504
158 HTTP_VERSION_NOT_SUPPORTED
= 505
159 INSUFFICIENT_STORAGE
= 507
162 # Mapping status codes to official W3C names
165 101: 'Switching Protocols',
170 203: 'Non-Authoritative Information',
172 205: 'Reset Content',
173 206: 'Partial Content',
175 300: 'Multiple Choices',
176 301: 'Moved Permanently',
182 307: 'Temporary Redirect',
186 402: 'Payment Required',
189 405: 'Method Not Allowed',
190 406: 'Not Acceptable',
191 407: 'Proxy Authentication Required',
192 408: 'Request Timeout',
195 411: 'Length Required',
196 412: 'Precondition Failed',
197 413: 'Request Entity Too Large',
198 414: 'Request-URI Too Long',
199 415: 'Unsupported Media Type',
200 416: 'Requested Range Not Satisfiable',
201 417: 'Expectation Failed',
203 500: 'Internal Server Error',
204 501: 'Not Implemented',
206 503: 'Service Unavailable',
207 504: 'Gateway Timeout',
208 505: 'HTTP Version Not Supported',
211 # maximal amount of data to read at one time in _safe_read
214 class HTTPMessage(mimetools
.Message
):
216 def addheader(self
, key
, value
):
217 """Add header for field key handling repeats."""
218 prev
= self
.dict.get(key
)
220 self
.dict[key
] = value
222 combined
= ", ".join((prev
, value
))
223 self
.dict[key
] = combined
225 def addcontinue(self
, key
, more
):
226 """Add more field data from a continuation line."""
227 prev
= self
.dict[key
]
228 self
.dict[key
] = prev
+ "\n " + more
230 def readheaders(self
):
231 """Read header lines.
233 Read header lines up to the entirely blank line that terminates them.
234 The (normally blank) line that ends the headers is skipped, but not
235 included in the returned list. If a non-header line ends the headers,
236 (which is an error), an attempt is made to backspace over it; it is
237 never included in the returned list.
239 The variable self.status is set to the empty string if all went well,
240 otherwise it is an error message. The variable self.headers is a
241 completely uninterpreted list of lines contained in the header (so
242 printing them will reproduce the header exactly as it appears in the
245 If multiple header fields with the same name occur, they are combined
246 according to the rules in RFC 2616 sec 4.2:
248 Appending each subsequent field-value to the first, each separated
249 by a comma. The order in which header fields with the same field-name
250 are received is significant to the interpretation of the combined
253 # XXX The implementation overrides the readheaders() method of
254 # rfc822.Message. The base class design isn't amenable to
255 # customized behavior here so the method here is a copy of the
256 # base class code with a few small changes.
260 self
.headers
= hlist
= []
264 startofline
= unread
= tell
= None
265 if hasattr(self
.fp
, 'unread'):
266 unread
= self
.fp
.unread
274 startofline
= tell
= None
276 line
= self
.fp
.readline()
278 self
.status
= 'EOF in headers'
280 # Skip unix From name time lines
281 if firstline
and line
.startswith('From '):
282 self
.unixfrom
= self
.unixfrom
+ line
285 if headerseen
and line
[0] in ' \t':
286 # XXX Not sure if continuation lines are handled properly
287 # for http and/or for repeating headers
288 # It's a continuation line.
290 self
.addcontinue(headerseen
, line
.strip())
292 elif self
.iscomment(line
):
293 # It's a comment. Ignore it.
295 elif self
.islast(line
):
296 # Note! No pushback here! The delimiter line gets eaten.
298 headerseen
= self
.isheader(line
)
300 # It's a legal header line, save it.
302 self
.addheader(headerseen
, line
[len(headerseen
)+1:].strip())
305 # It's not a header line; throw it back and stop here.
307 self
.status
= 'No headers'
309 self
.status
= 'Non-header line where header expected'
310 # Try to undo the read.
314 self
.fp
.seek(startofline
)
316 self
.status
= self
.status
+ '; bad seek'
321 # strict: If true, raise BadStatusLine if the status line can't be
322 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
323 # false because it prevents clients from talking to HTTP/0.9
324 # servers. Note that a response with a sufficiently corrupted
325 # status line will look like an HTTP/0.9 response.
327 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
329 def __init__(self
, sock
, debuglevel
=0, strict
=0, method
=None, buffering
=False):
331 # The caller won't be using any sock.recv() calls, so buffering
332 # is fine and recommended for performance.
333 self
.fp
= sock
.makefile('rb')
335 # The buffer size is specified as zero, because the headers of
336 # the response are read with readline(). If the reads were
337 # buffered the readline() calls could consume some of the
338 # response, which make be read via a recv() on the underlying
340 self
.fp
= sock
.makefile('rb', 0)
341 self
.debuglevel
= debuglevel
343 self
._method
= method
347 # from the Status-Line of the response
348 self
.version
= _UNKNOWN
# HTTP-Version
349 self
.status
= _UNKNOWN
# Status-Code
350 self
.reason
= _UNKNOWN
# Reason-Phrase
352 self
.chunked
= _UNKNOWN
# is "chunked" being used?
353 self
.chunk_left
= _UNKNOWN
# bytes left to read in current chunk
354 self
.length
= _UNKNOWN
# number of bytes left in response
355 self
.will_close
= _UNKNOWN
# conn will close at end of response
357 def _read_status(self
):
358 # Initialize with Simple-Response defaults
359 line
= self
.fp
.readline()
360 if self
.debuglevel
> 0:
361 print "reply:", repr(line
)
363 # Presumably, the server closed the connection before
364 # sending a valid response.
365 raise BadStatusLine(line
)
367 [version
, status
, reason
] = line
.split(None, 2)
370 [version
, status
] = line
.split(None, 1)
373 # empty version will cause next test to fail and status
374 # will be treated as 0.9 response.
376 if not version
.startswith('HTTP/'):
379 raise BadStatusLine(line
)
381 # assume it's a Simple-Response from an 0.9 server
382 self
.fp
= LineAndFileWrapper(line
, self
.fp
)
383 return "HTTP/0.9", 200, ""
385 # The status code is a three-digit number
388 if status
< 100 or status
> 999:
389 raise BadStatusLine(line
)
391 raise BadStatusLine(line
)
392 return version
, status
, reason
395 if self
.msg
is not None:
396 # we've already started reading the response
399 # read until we get a non-100 response
401 version
, status
, reason
= self
._read
_status
()
402 if status
!= CONTINUE
:
404 # skip the header from the 100 response
406 skip
= self
.fp
.readline().strip()
409 if self
.debuglevel
> 0:
410 print "header:", skip
413 self
.reason
= reason
.strip()
414 if version
== 'HTTP/1.0':
416 elif version
.startswith('HTTP/1.'):
417 self
.version
= 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
418 elif version
== 'HTTP/0.9':
421 raise UnknownProtocol(version
)
423 if self
.version
== 9:
427 self
.msg
= HTTPMessage(StringIO())
430 self
.msg
= HTTPMessage(self
.fp
, 0)
431 if self
.debuglevel
> 0:
432 for hdr
in self
.msg
.headers
:
433 print "header:", hdr
,
435 # don't let the msg keep an fp
438 # are we using the chunked-style of transfer encoding?
439 tr_enc
= self
.msg
.getheader('transfer-encoding')
440 if tr_enc
and tr_enc
.lower() == "chunked":
442 self
.chunk_left
= None
446 # will the connection close at the end of the response?
447 self
.will_close
= self
._check
_close
()
449 # do we have a Content-Length?
450 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
451 length
= self
.msg
.getheader('content-length')
452 if length
and not self
.chunked
:
454 self
.length
= int(length
)
458 if self
.length
< 0: # ignore nonsensical negative lengths
463 # does the body have a fixed length? (of zero)
464 if (status
== NO_CONTENT
or status
== NOT_MODIFIED
or
465 100 <= status
< 200 or # 1xx codes
466 self
._method
== 'HEAD'):
469 # if the connection remains open, and we aren't using chunked, and
470 # a content-length was not provided, then assume that the connection
472 if not self
.will_close
and \
473 not self
.chunked
and \
477 def _check_close(self
):
478 conn
= self
.msg
.getheader('connection')
479 if self
.version
== 11:
480 # An HTTP/1.1 proxy is assumed to stay open unless
482 conn
= self
.msg
.getheader('connection')
483 if conn
and "close" in conn
.lower():
487 # Some HTTP/1.0 implementations have support for persistent
488 # connections, using rules different than HTTP/1.1.
490 # For older HTTP, Keep-Alive indicates persistent connection.
491 if self
.msg
.getheader('keep-alive'):
494 # At least Akamai returns a "Connection: Keep-Alive" header,
495 # which was supposed to be sent by the client.
496 if conn
and "keep-alive" in conn
.lower():
499 # Proxy-Connection is a netscape hack.
500 pconn
= self
.msg
.getheader('proxy-connection')
501 if pconn
and "keep-alive" in pconn
.lower():
504 # otherwise, assume it will close
513 # NOTE: it is possible that we will not ever call self.close(). This
514 # case occurs when will_close is TRUE, length is None, and we
515 # read up to the last byte, but NOT past it.
517 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
518 # called, meaning self.isclosed() is meaningful.
519 return self
.fp
is None
521 # XXX It would be nice to have readline and __iter__ for this, too.
523 def read(self
, amt
=None):
528 return self
._read
_chunked
(amt
)
532 if self
.length
is None:
535 s
= self
._safe
_read
(self
.length
)
537 self
.close() # we read everything
540 if self
.length
is not None:
541 if amt
> self
.length
:
542 # clip the read to the "end of response"
545 # we do not use _safe_read() here because this may be a .will_close
546 # connection, and the user is reading more bytes than will be provided
547 # (for example, reading in 1k chunks)
548 s
= self
.fp
.read(amt
)
549 if self
.length
is not None:
550 self
.length
-= len(s
)
555 def _read_chunked(self
, amt
):
556 assert self
.chunked
!= _UNKNOWN
557 chunk_left
= self
.chunk_left
560 if chunk_left
is None:
561 line
= self
.fp
.readline()
564 line
= line
[:i
] # strip chunk-extensions
566 chunk_left
= int(line
, 16)
568 # close the connection as protocol synchronisation is
571 raise IncompleteRead(''.join(value
))
575 value
.append(self
._safe
_read
(chunk_left
))
576 elif amt
< chunk_left
:
577 value
.append(self
._safe
_read
(amt
))
578 self
.chunk_left
= chunk_left
- amt
579 return ''.join(value
)
580 elif amt
== chunk_left
:
581 value
.append(self
._safe
_read
(amt
))
582 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
583 self
.chunk_left
= None
584 return ''.join(value
)
586 value
.append(self
._safe
_read
(chunk_left
))
589 # we read the whole chunk, get another
590 self
._safe
_read
(2) # toss the CRLF at the end of the chunk
593 # read and discard trailer up to the CRLF terminator
594 ### note: we shouldn't have any trailers!
596 line
= self
.fp
.readline()
598 # a vanishingly small number of sites EOF without
599 # sending the trailer
604 # we read everything; close the "file"
607 return ''.join(value
)
609 def _safe_read(self
, amt
):
610 """Read the number of bytes requested, compensating for partial reads.
612 Normally, we have a blocking socket, but a read() can be interrupted
613 by a signal (resulting in a partial read).
615 Note that we cannot distinguish between EOF and an interrupt when zero
616 bytes have been read. IncompleteRead() will be raised in this
619 This function should be used when <amt> bytes "should" be present for
620 reading. If the bytes are truly not available (due to EOF), then the
621 IncompleteRead exception can be used to detect the problem.
623 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
624 # return less than x bytes unless EOF is encountered. It now handles
625 # signal interruptions (socket.error EINTR) internally. This code
626 # never caught that exception anyways. It seems largely pointless.
627 # self.fp.read(amt) will work fine.
630 chunk
= self
.fp
.read(min(amt
, MAXAMOUNT
))
632 raise IncompleteRead(''.join(s
), amt
)
637 def getheader(self
, name
, default
=None):
639 raise ResponseNotReady()
640 return self
.msg
.getheader(name
, default
)
642 def getheaders(self
):
643 """Return list of (header, value) tuples."""
645 raise ResponseNotReady()
646 return self
.msg
.items()
649 class HTTPConnection
:
652 _http_vsn_str
= 'HTTP/1.1'
654 response_class
= HTTPResponse
655 default_port
= HTTP_PORT
660 def __init__(self
, host
, port
=None, strict
=None,
661 timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
, source_address
=None):
662 self
.timeout
= timeout
663 self
.source_address
= source_address
666 self
.__response
= None
667 self
.__state
= _CS_IDLE
669 self
._tunnel
_host
= None
670 self
._tunnel
_port
= None
671 self
._tunnel
_headers
= {}
673 self
._set
_hostport
(host
, port
)
674 if strict
is not None:
677 def set_tunnel(self
, host
, port
=None, headers
=None):
678 """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
680 The headers argument should be a mapping of extra HTTP headers
681 to send with the CONNECT request.
683 self
._tunnel
_host
= host
684 self
._tunnel
_port
= port
686 self
._tunnel
_headers
= headers
688 self
._tunnel
_headers
.clear()
690 def _set_hostport(self
, host
, port
):
693 j
= host
.rfind(']') # ipv6 addresses have [...]
696 port
= int(host
[i
+1:])
698 raise InvalidURL("nonnumeric port: '%s'" % host
[i
+1:])
701 port
= self
.default_port
702 if host
and host
[0] == '[' and host
[-1] == ']':
707 def set_debuglevel(self
, level
):
708 self
.debuglevel
= level
711 self
._set
_hostport
(self
._tunnel
_host
, self
._tunnel
_port
)
712 self
.send("CONNECT %s:%d HTTP/1.0\r\n" % (self
.host
, self
.port
))
713 for header
, value
in self
._tunnel
_headers
.iteritems():
714 self
.send("%s: %s\r\n" % (header
, value
))
716 response
= self
.response_class(self
.sock
, strict
= self
.strict
,
717 method
= self
._method
)
718 (version
, code
, message
) = response
._read
_status
()
722 raise socket
.error("Tunnel connection failed: %d %s" % (code
,
725 line
= response
.fp
.readline()
726 if line
== '\r\n': break
730 """Connect to the host and port specified in __init__."""
731 self
.sock
= socket
.create_connection((self
.host
,self
.port
),
732 self
.timeout
, self
.source_address
)
734 if self
._tunnel
_host
:
738 """Close the connection to the HTTP server."""
740 self
.sock
.close() # close it manually... there may be other refs
743 self
.__response
.close()
744 self
.__response
= None
745 self
.__state
= _CS_IDLE
748 """Send `str' to the server."""
749 if self
.sock
is None:
755 # send the data to the server. if we get a broken pipe, then close
756 # the socket. we want to reconnect when somebody tries to send again.
758 # NOTE: we DO propagate the error, though, because we cannot simply
759 # ignore the error... the caller will know if they can retry.
760 if self
.debuglevel
> 0:
761 print "send:", repr(str)
764 if hasattr(str,'read') and not isinstance(str, array
):
765 if self
.debuglevel
> 0: print "sendIng a read()able"
766 data
=str.read(blocksize
)
768 self
.sock
.sendall(data
)
769 data
=str.read(blocksize
)
771 self
.sock
.sendall(str)
772 except socket
.error
, v
:
773 if v
.args
[0] == 32: # Broken pipe
777 def _output(self
, s
):
778 """Add a line of output to the current request buffer.
780 Assumes that the line does *not* end with \\r\\n.
782 self
._buffer
.append(s
)
784 def _send_output(self
, message_body
=None):
785 """Send the currently buffered request and clear the buffer.
787 Appends an extra \\r\\n to the buffer.
788 A message_body may be specified, to be appended to the request.
790 self
._buffer
.extend(("", ""))
791 msg
= "\r\n".join(self
._buffer
)
793 # If msg and message_body are sent in a single send() call,
794 # it will avoid performance problems caused by the interaction
795 # between delayed ack and the Nagle algorithim.
796 if isinstance(message_body
, str):
800 if message_body
is not None:
801 #message_body was not a string (i.e. it is a file) and
802 #we must run the risk of Nagle
803 self
.send(message_body
)
805 def putrequest(self
, method
, url
, skip_host
=0, skip_accept_encoding
=0):
806 """Send a request to the server.
808 `method' specifies an HTTP request method, e.g. 'GET'.
809 `url' specifies the object being requested, e.g. '/index.html'.
810 `skip_host' if True does not add automatically a 'Host:' header
811 `skip_accept_encoding' if True does not add automatically an
812 'Accept-Encoding:' header
815 # if a prior response has been completed, then forget about it.
816 if self
.__response
and self
.__response
.isclosed():
817 self
.__response
= None
820 # in certain cases, we cannot issue another request on this connection.
822 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
823 # 2) a response to a previous request has signalled that it is going
824 # to close the connection upon completion.
825 # 3) the headers for the previous response have not been read, thus
826 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
828 # if there is no prior response, then we can request at will.
830 # if point (2) is true, then we will have passed the socket to the
831 # response (effectively meaning, "there is no prior response"), and
832 # will open a new one when a new request is made.
834 # Note: if a prior response exists, then we *can* start a new request.
835 # We are not allowed to begin fetching the response to this new
836 # request, however, until that prior response is complete.
838 if self
.__state
== _CS_IDLE
:
839 self
.__state
= _CS_REQ_STARTED
841 raise CannotSendRequest()
843 # Save the method we use, we need it later in the response phase
844 self
._method
= method
847 str = '%s %s %s' % (method
, url
, self
._http
_vsn
_str
)
851 if self
._http
_vsn
== 11:
852 # Issue some standard headers for better HTTP/1.1 compliance
855 # this header is issued *only* for HTTP/1.1
856 # connections. more specifically, this means it is
857 # only issued when the client uses the new
858 # HTTPConnection() class. backwards-compat clients
859 # will be using HTTP/1.0 and those clients may be
860 # issuing this header themselves. we should NOT issue
861 # it twice; some web servers (such as Apache) barf
862 # when they see two Host: headers
864 # If we need a non-standard port,include it in the
865 # header. If the request is going through a proxy,
866 # but the host of the actual URL, not the host of the
870 if url
.startswith('http'):
871 nil
, netloc
, nil
, nil
, nil
= urlsplit(url
)
875 netloc_enc
= netloc
.encode("ascii")
876 except UnicodeEncodeError:
877 netloc_enc
= netloc
.encode("idna")
878 self
.putheader('Host', netloc_enc
)
881 host_enc
= self
.host
.encode("ascii")
882 except UnicodeEncodeError:
883 host_enc
= self
.host
.encode("idna")
884 if self
.port
== self
.default_port
:
885 self
.putheader('Host', host_enc
)
887 self
.putheader('Host', "%s:%s" % (host_enc
, self
.port
))
889 # note: we are assuming that clients will not attempt to set these
890 # headers since *this* library must deal with the
891 # consequences. this also means that when the supporting
892 # libraries are updated to recognize other forms, then this
893 # code should be changed (removed or updated).
895 # we only want a Content-Encoding of "identity" since we don't
896 # support encodings such as x-gzip or x-deflate.
897 if not skip_accept_encoding
:
898 self
.putheader('Accept-Encoding', 'identity')
900 # we can accept "chunked" Transfer-Encodings, but no others
901 # NOTE: no TE header implies *only* "chunked"
902 #self.putheader('TE', 'chunked')
904 # if TE is supplied in the header, then it must appear in a
906 #self.putheader('Connection', 'TE')
909 # For HTTP/1.0, the server will assume "not chunked"
912 def putheader(self
, header
, *values
):
913 """Send a request header line to the server.
915 For example: h.putheader('Accept', 'text/html')
917 if self
.__state
!= _CS_REQ_STARTED
:
918 raise CannotSendHeader()
920 str = '%s: %s' % (header
, '\r\n\t'.join(values
))
923 def endheaders(self
, message_body
=None):
924 """Indicate that the last header line has been sent to the server.
926 This method sends the request to the server. The optional
927 message_body argument can be used to pass message body
928 associated with the request. The message body will be sent in
929 the same packet as the message headers if possible. The
930 message_body should be a string.
932 if self
.__state
== _CS_REQ_STARTED
:
933 self
.__state
= _CS_REQ_SENT
935 raise CannotSendHeader()
936 self
._send
_output
(message_body
)
938 def request(self
, method
, url
, body
=None, headers
={}):
939 """Send a complete request to the server."""
942 self
._send
_request
(method
, url
, body
, headers
)
943 except socket
.error
, v
:
944 # trap 'Broken pipe' if we're allowed to automatically reconnect
945 if v
.args
[0] != 32 or not self
.auto_open
:
948 self
._send
_request
(method
, url
, body
, headers
)
950 def _set_content_length(self
, body
):
951 # Set the content-length based on the body.
954 thelen
= str(len(body
))
955 except TypeError, te
:
956 # If this is a file-like object, try to
957 # fstat its file descriptor
960 thelen
= str(os
.fstat(body
.fileno()).st_size
)
961 except (AttributeError, OSError):
962 # Don't send a length if this failed
963 if self
.debuglevel
> 0: print "Cannot stat!!"
965 if thelen
is not None:
966 self
.putheader('Content-Length', thelen
)
968 def _send_request(self
, method
, url
, body
, headers
):
969 # honour explicitly requested Host: and Accept-Encoding headers
970 header_names
= dict.fromkeys([k
.lower() for k
in headers
])
972 if 'host' in header_names
:
973 skips
['skip_host'] = 1
974 if 'accept-encoding' in header_names
:
975 skips
['skip_accept_encoding'] = 1
977 self
.putrequest(method
, url
, **skips
)
979 if body
and ('content-length' not in header_names
):
980 self
._set
_content
_length
(body
)
981 for hdr
, value
in headers
.iteritems():
982 self
.putheader(hdr
, value
)
983 self
.endheaders(body
)
985 def getresponse(self
, buffering
=False):
986 "Get the response from the server."
988 # if a prior response has been completed, then forget about it.
989 if self
.__response
and self
.__response
.isclosed():
990 self
.__response
= None
993 # if a prior response exists, then it must be completed (otherwise, we
994 # cannot read this response's header to determine the connection-close
997 # note: if a prior response existed, but was connection-close, then the
998 # socket and response were made independent of this HTTPConnection
999 # object since a new request requires that we open a whole new
1002 # this means the prior response had one of two states:
1003 # 1) will_close: this connection was reset and the prior socket and
1004 # response operate independently
1005 # 2) persistent: the response was retained and we await its
1006 # isclosed() status to become true.
1008 if self
.__state
!= _CS_REQ_SENT
or self
.__response
:
1009 raise ResponseNotReady()
1012 kwds
= {"strict":self
.strict
, "method":self
._method
}
1013 if self
.debuglevel
> 0:
1014 args
+= (self
.debuglevel
,)
1016 #only add this keyword if non-default, for compatibility with
1017 #other response_classes.
1018 kwds
["buffering"] = True;
1019 response
= self
.response_class(*args
, **kwds
)
1022 assert response
.will_close
!= _UNKNOWN
1023 self
.__state
= _CS_IDLE
1025 if response
.will_close
:
1026 # this effectively passes the connection to the response
1029 # remember this, so we can tell when it is complete
1030 self
.__response
= response
1036 "Compatibility class with httplib.py from 1.5."
1039 _http_vsn_str
= 'HTTP/1.0'
1043 _connection_class
= HTTPConnection
1045 def __init__(self
, host
='', port
=None, strict
=None):
1046 "Provide a default host, since the superclass requires one."
1048 # some joker passed 0 explicitly, meaning default port
1052 # Note that we may pass an empty string as the host; this will throw
1053 # an error when we attempt to connect. Presumably, the client code
1054 # will call connect before then, with a proper host.
1055 self
._setup
(self
._connection
_class
(host
, port
, strict
))
1057 def _setup(self
, conn
):
1060 # set up delegation to flesh out interface
1061 self
.send
= conn
.send
1062 self
.putrequest
= conn
.putrequest
1063 self
.putheader
= conn
.putheader
1064 self
.endheaders
= conn
.endheaders
1065 self
.set_debuglevel
= conn
.set_debuglevel
1067 conn
._http
_vsn
= self
._http
_vsn
1068 conn
._http
_vsn
_str
= self
._http
_vsn
_str
1072 def connect(self
, host
=None, port
=None):
1073 "Accept arguments to set the host/port, since the superclass doesn't."
1075 if host
is not None:
1076 self
._conn
._set
_hostport
(host
, port
)
1077 self
._conn
.connect()
1080 "Provide a getfile, since the superclass' does not use this concept."
1083 def getreply(self
, buffering
=False):
1084 """Compat definition since superclass does not define it.
1086 Returns a tuple consisting of:
1087 - server status code (e.g. '200' if all goes well)
1088 - server "reason" corresponding to status code
1089 - any RFC822 headers in the response from the server
1093 response
= self
._conn
.getresponse()
1095 #only add this keyword if non-default for compatibility
1096 #with other connection classes
1097 response
= self
._conn
.getresponse(buffering
)
1098 except BadStatusLine
, e
:
1099 ### hmm. if getresponse() ever closes the socket on a bad request,
1100 ### then we are going to have problems with self.sock
1102 ### should we keep this behavior? do people use it?
1103 # keep the socket open (as a file), and return it
1104 self
.file = self
._conn
.sock
.makefile('rb', 0)
1106 # close our socket -- we want to restart after any protocol error
1110 return -1, e
.line
, None
1112 self
.headers
= response
.msg
1113 self
.file = response
.fp
1114 return response
.status
, response
.reason
, response
.msg
1119 # note that self.file == response.fp, which gets closed by the
1120 # superclass. just clear the object ref here.
1121 ### hmm. messy. if status==-1, then self.file is owned by us.
1122 ### well... we aren't explicitly closing, but losing this ref will
1131 class HTTPSConnection(HTTPConnection
):
1132 "This class allows communication via SSL."
1134 default_port
= HTTPS_PORT
1136 def __init__(self
, host
, port
=None, key_file
=None, cert_file
=None,
1137 strict
=None, timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
,
1138 source_address
=None):
1139 HTTPConnection
.__init
__(self
, host
, port
, strict
, timeout
,
1141 self
.key_file
= key_file
1142 self
.cert_file
= cert_file
1145 "Connect to a host on a given (SSL) port."
1147 sock
= socket
.create_connection((self
.host
, self
.port
),
1148 self
.timeout
, self
.source_address
)
1149 if self
._tunnel
_host
:
1152 self
.sock
= ssl
.wrap_socket(sock
, self
.key_file
, self
.cert_file
)
1154 __all__
.append("HTTPSConnection")
1157 """Compatibility with 1.5 httplib interface
1159 Python 1.5.2 did not have an HTTPS class, but it defined an
1160 interface for sending http requests that is also useful for
1164 _connection_class
= HTTPSConnection
1166 def __init__(self
, host
='', port
=None, key_file
=None, cert_file
=None,
1168 # provide a default host, pass the X509 cert info
1170 # urf. compensate for bad input.
1173 self
._setup
(self
._connection
_class
(host
, port
, key_file
,
1176 # we never actually use these for anything, but we keep them
1177 # here for compatibility with post-1.5.2 CVS.
1178 self
.key_file
= key_file
1179 self
.cert_file
= cert_file
1182 def FakeSocket (sock
, sslobj
):
1183 warnings
.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1184 "Use the result of ssl.wrap_socket() directly instead.",
1185 DeprecationWarning, stacklevel
=2)
1189 class HTTPException(Exception):
1190 # Subclasses that define an __init__ must call Exception.__init__
1191 # or define self.args. Otherwise, str() will fail.
1194 class NotConnected(HTTPException
):
1197 class InvalidURL(HTTPException
):
1200 class UnknownProtocol(HTTPException
):
1201 def __init__(self
, version
):
1202 self
.args
= version
,
1203 self
.version
= version
1205 class UnknownTransferEncoding(HTTPException
):
1208 class UnimplementedFileMode(HTTPException
):
1211 class IncompleteRead(HTTPException
):
1212 def __init__(self
, partial
, expected
=None):
1213 self
.args
= partial
,
1214 self
.partial
= partial
1215 self
.expected
= expected
1217 if self
.expected
is not None:
1218 e
= ', %i more expected' % self
.expected
1221 return 'IncompleteRead(%i bytes read%s)' % (len(self
.partial
), e
)
1225 class ImproperConnectionState(HTTPException
):
1228 class CannotSendRequest(ImproperConnectionState
):
1231 class CannotSendHeader(ImproperConnectionState
):
1234 class ResponseNotReady(ImproperConnectionState
):
1237 class BadStatusLine(HTTPException
):
1238 def __init__(self
, line
):
1242 # for backwards compatibility
1243 error
= HTTPException
1245 class LineAndFileWrapper
:
1246 """A limited file-like object for HTTP/0.9 responses."""
1248 # The status-line parsing code calls readline(), which normally
1249 # get the HTTP status line. For a 0.9 response, however, this is
1250 # actually the first line of the body! Clients need to get a
1251 # readable file object that contains that line.
1253 def __init__(self
, line
, file):
1256 self
._line
_consumed
= 0
1257 self
._line
_offset
= 0
1258 self
._line
_left
= len(line
)
1260 def __getattr__(self
, attr
):
1261 return getattr(self
._file
, attr
)
1264 # called when the last byte is read from the line. After the
1265 # call, all read methods are delegated to the underlying file
1267 self
._line
_consumed
= 1
1268 self
.read
= self
._file
.read
1269 self
.readline
= self
._file
.readline
1270 self
.readlines
= self
._file
.readlines
1272 def read(self
, amt
=None):
1273 if self
._line
_consumed
:
1274 return self
._file
.read(amt
)
1275 assert self
._line
_left
1276 if amt
is None or amt
> self
._line
_left
:
1277 s
= self
._line
[self
._line
_offset
:]
1280 return s
+ self
._file
.read()
1282 return s
+ self
._file
.read(amt
- len(s
))
1284 assert amt
<= self
._line
_left
1285 i
= self
._line
_offset
1288 self
._line
_offset
= j
1289 self
._line
_left
-= amt
1290 if self
._line
_left
== 0:
1295 if self
._line
_consumed
:
1296 return self
._file
.readline()
1297 assert self
._line
_left
1298 s
= self
._line
[self
._line
_offset
:]
1302 def readlines(self
, size
=None):
1303 if self
._line
_consumed
:
1304 return self
._file
.readlines(size
)
1305 assert self
._line
_left
1306 L
= [self
._line
[self
._line
_offset
:]]
1309 return L
+ self
._file
.readlines()
1311 return L
+ self
._file
.readlines(size
)
1314 """Test this module.
1316 A hodge podge of tests collected here, because they have too many
1317 external dependencies for the regular test suite.
1322 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'd')
1325 if o
== '-d': dl
= dl
+ 1
1326 host
= 'www.python.org'
1328 if args
[0:]: host
= args
[0]
1329 if args
[1:]: selector
= args
[1]
1331 h
.set_debuglevel(dl
)
1333 h
.putrequest('GET', selector
)
1335 status
, reason
, headers
= h
.getreply()
1336 print 'status =', status
1337 print 'reason =', reason
1338 print "read", len(h
.getfile().read())
1341 for header
in headers
.headers
: print header
.strip()
1344 # minimal test that code to extract host from url works
1347 _http_vsn_str
= 'HTTP/1.1'
1349 h
= HTTP11('www.python.org')
1350 h
.putrequest('GET', 'http://www.python.org/~jeremy/')
1361 for host
, selector
in (('sourceforge.net', '/projects/python'),
1363 print "https://%s%s" % (host
, selector
)
1365 hs
.set_debuglevel(dl
)
1367 hs
.putrequest('GET', selector
)
1369 status
, reason
, headers
= hs
.getreply()
1370 print 'status =', status
1371 print 'reason =', reason
1372 print "read", len(hs
.getfile().read())
1375 for header
in headers
.headers
: print header
.strip()
1378 if __name__
== '__main__':