Issue #6644: Fix compile error on AIX.
[python.git] / Lib / httplib.py
blob6fc573398f5e63b9892c4830ea4625388ab6410d
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 from sys import py3kwarning
71 from urlparse import urlsplit
72 import warnings
73 with warnings.catch_warnings():
74 if py3kwarning:
75 warnings.filterwarnings("ignore", ".*mimetools has been removed",
76 DeprecationWarning)
77 import mimetools
79 try:
80 from cStringIO import StringIO
81 except ImportError:
82 from StringIO import StringIO
84 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
85 "HTTPException", "NotConnected", "UnknownProtocol",
86 "UnknownTransferEncoding", "UnimplementedFileMode",
87 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
88 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
89 "BadStatusLine", "error", "responses"]
91 HTTP_PORT = 80
92 HTTPS_PORT = 443
94 _UNKNOWN = 'UNKNOWN'
96 # connection states
97 _CS_IDLE = 'Idle'
98 _CS_REQ_STARTED = 'Request-started'
99 _CS_REQ_SENT = 'Request-sent'
101 # status codes
102 # informational
103 CONTINUE = 100
104 SWITCHING_PROTOCOLS = 101
105 PROCESSING = 102
107 # successful
108 OK = 200
109 CREATED = 201
110 ACCEPTED = 202
111 NON_AUTHORITATIVE_INFORMATION = 203
112 NO_CONTENT = 204
113 RESET_CONTENT = 205
114 PARTIAL_CONTENT = 206
115 MULTI_STATUS = 207
116 IM_USED = 226
118 # redirection
119 MULTIPLE_CHOICES = 300
120 MOVED_PERMANENTLY = 301
121 FOUND = 302
122 SEE_OTHER = 303
123 NOT_MODIFIED = 304
124 USE_PROXY = 305
125 TEMPORARY_REDIRECT = 307
127 # client error
128 BAD_REQUEST = 400
129 UNAUTHORIZED = 401
130 PAYMENT_REQUIRED = 402
131 FORBIDDEN = 403
132 NOT_FOUND = 404
133 METHOD_NOT_ALLOWED = 405
134 NOT_ACCEPTABLE = 406
135 PROXY_AUTHENTICATION_REQUIRED = 407
136 REQUEST_TIMEOUT = 408
137 CONFLICT = 409
138 GONE = 410
139 LENGTH_REQUIRED = 411
140 PRECONDITION_FAILED = 412
141 REQUEST_ENTITY_TOO_LARGE = 413
142 REQUEST_URI_TOO_LONG = 414
143 UNSUPPORTED_MEDIA_TYPE = 415
144 REQUESTED_RANGE_NOT_SATISFIABLE = 416
145 EXPECTATION_FAILED = 417
146 UNPROCESSABLE_ENTITY = 422
147 LOCKED = 423
148 FAILED_DEPENDENCY = 424
149 UPGRADE_REQUIRED = 426
151 # server error
152 INTERNAL_SERVER_ERROR = 500
153 NOT_IMPLEMENTED = 501
154 BAD_GATEWAY = 502
155 SERVICE_UNAVAILABLE = 503
156 GATEWAY_TIMEOUT = 504
157 HTTP_VERSION_NOT_SUPPORTED = 505
158 INSUFFICIENT_STORAGE = 507
159 NOT_EXTENDED = 510
161 # Mapping status codes to official W3C names
162 responses = {
163 100: 'Continue',
164 101: 'Switching Protocols',
166 200: 'OK',
167 201: 'Created',
168 202: 'Accepted',
169 203: 'Non-Authoritative Information',
170 204: 'No Content',
171 205: 'Reset Content',
172 206: 'Partial Content',
174 300: 'Multiple Choices',
175 301: 'Moved Permanently',
176 302: 'Found',
177 303: 'See Other',
178 304: 'Not Modified',
179 305: 'Use Proxy',
180 306: '(Unused)',
181 307: 'Temporary Redirect',
183 400: 'Bad Request',
184 401: 'Unauthorized',
185 402: 'Payment Required',
186 403: 'Forbidden',
187 404: 'Not Found',
188 405: 'Method Not Allowed',
189 406: 'Not Acceptable',
190 407: 'Proxy Authentication Required',
191 408: 'Request Timeout',
192 409: 'Conflict',
193 410: 'Gone',
194 411: 'Length Required',
195 412: 'Precondition Failed',
196 413: 'Request Entity Too Large',
197 414: 'Request-URI Too Long',
198 415: 'Unsupported Media Type',
199 416: 'Requested Range Not Satisfiable',
200 417: 'Expectation Failed',
202 500: 'Internal Server Error',
203 501: 'Not Implemented',
204 502: 'Bad Gateway',
205 503: 'Service Unavailable',
206 504: 'Gateway Timeout',
207 505: 'HTTP Version Not Supported',
210 # maximal amount of data to read at one time in _safe_read
211 MAXAMOUNT = 1048576
213 class HTTPMessage(mimetools.Message):
215 def addheader(self, key, value):
216 """Add header for field key handling repeats."""
217 prev = self.dict.get(key)
218 if prev is None:
219 self.dict[key] = value
220 else:
221 combined = ", ".join((prev, value))
222 self.dict[key] = combined
224 def addcontinue(self, key, more):
225 """Add more field data from a continuation line."""
226 prev = self.dict[key]
227 self.dict[key] = prev + "\n " + more
229 def readheaders(self):
230 """Read header lines.
232 Read header lines up to the entirely blank line that terminates them.
233 The (normally blank) line that ends the headers is skipped, but not
234 included in the returned list. If a non-header line ends the headers,
235 (which is an error), an attempt is made to backspace over it; it is
236 never included in the returned list.
238 The variable self.status is set to the empty string if all went well,
239 otherwise it is an error message. The variable self.headers is a
240 completely uninterpreted list of lines contained in the header (so
241 printing them will reproduce the header exactly as it appears in the
242 file).
244 If multiple header fields with the same name occur, they are combined
245 according to the rules in RFC 2616 sec 4.2:
247 Appending each subsequent field-value to the first, each separated
248 by a comma. The order in which header fields with the same field-name
249 are received is significant to the interpretation of the combined
250 field value.
252 # XXX The implementation overrides the readheaders() method of
253 # rfc822.Message. The base class design isn't amenable to
254 # customized behavior here so the method here is a copy of the
255 # base class code with a few small changes.
257 self.dict = {}
258 self.unixfrom = ''
259 self.headers = hlist = []
260 self.status = ''
261 headerseen = ""
262 firstline = 1
263 startofline = unread = tell = None
264 if hasattr(self.fp, 'unread'):
265 unread = self.fp.unread
266 elif self.seekable:
267 tell = self.fp.tell
268 while True:
269 if tell:
270 try:
271 startofline = tell()
272 except IOError:
273 startofline = tell = None
274 self.seekable = 0
275 line = self.fp.readline()
276 if not line:
277 self.status = 'EOF in headers'
278 break
279 # Skip unix From name time lines
280 if firstline and line.startswith('From '):
281 self.unixfrom = self.unixfrom + line
282 continue
283 firstline = 0
284 if headerseen and line[0] in ' \t':
285 # XXX Not sure if continuation lines are handled properly
286 # for http and/or for repeating headers
287 # It's a continuation line.
288 hlist.append(line)
289 self.addcontinue(headerseen, line.strip())
290 continue
291 elif self.iscomment(line):
292 # It's a comment. Ignore it.
293 continue
294 elif self.islast(line):
295 # Note! No pushback here! The delimiter line gets eaten.
296 break
297 headerseen = self.isheader(line)
298 if headerseen:
299 # It's a legal header line, save it.
300 hlist.append(line)
301 self.addheader(headerseen, line[len(headerseen)+1:].strip())
302 continue
303 else:
304 # It's not a header line; throw it back and stop here.
305 if not self.dict:
306 self.status = 'No headers'
307 else:
308 self.status = 'Non-header line where header expected'
309 # Try to undo the read.
310 if unread:
311 unread(line)
312 elif tell:
313 self.fp.seek(startofline)
314 else:
315 self.status = self.status + '; bad seek'
316 break
318 class HTTPResponse:
320 # strict: If true, raise BadStatusLine if the status line can't be
321 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
322 # false because it prevents clients from talking to HTTP/0.9
323 # servers. Note that a response with a sufficiently corrupted
324 # status line will look like an HTTP/0.9 response.
326 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
329 if buffering:
330 # The caller won't be using any sock.recv() calls, so buffering
331 # is fine and recommendef for performance
332 self.fp = sock.makefile('rb')
333 else:
334 # The buffer size is specified as zero, because the headers of
335 # the response are read with readline(). If the reads were
336 # buffered the readline() calls could consume some of the
337 # response, which make be read via a recv() on the underlying
338 # socket.
339 self.fp = sock.makefile('rb', 0)
340 self.debuglevel = debuglevel
341 self.strict = strict
342 self._method = method
344 self.msg = None
346 # from the Status-Line of the response
347 self.version = _UNKNOWN # HTTP-Version
348 self.status = _UNKNOWN # Status-Code
349 self.reason = _UNKNOWN # Reason-Phrase
351 self.chunked = _UNKNOWN # is "chunked" being used?
352 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
353 self.length = _UNKNOWN # number of bytes left in response
354 self.will_close = _UNKNOWN # conn will close at end of response
356 def _read_status(self):
357 # Initialize with Simple-Response defaults
358 line = self.fp.readline()
359 if self.debuglevel > 0:
360 print "reply:", repr(line)
361 if not line:
362 # Presumably, the server closed the connection before
363 # sending a valid response.
364 raise BadStatusLine(line)
365 try:
366 [version, status, reason] = line.split(None, 2)
367 except ValueError:
368 try:
369 [version, status] = line.split(None, 1)
370 reason = ""
371 except ValueError:
372 # empty version will cause next test to fail and status
373 # will be treated as 0.9 response.
374 version = ""
375 if not version.startswith('HTTP/'):
376 if self.strict:
377 self.close()
378 raise BadStatusLine(line)
379 else:
380 # assume it's a Simple-Response from an 0.9 server
381 self.fp = LineAndFileWrapper(line, self.fp)
382 return "HTTP/0.9", 200, ""
384 # The status code is a three-digit number
385 try:
386 status = int(status)
387 if status < 100 or status > 999:
388 raise BadStatusLine(line)
389 except ValueError:
390 raise BadStatusLine(line)
391 return version, status, reason
393 def begin(self):
394 if self.msg is not None:
395 # we've already started reading the response
396 return
398 # read until we get a non-100 response
399 while True:
400 version, status, reason = self._read_status()
401 if status != CONTINUE:
402 break
403 # skip the header from the 100 response
404 while True:
405 skip = self.fp.readline().strip()
406 if not skip:
407 break
408 if self.debuglevel > 0:
409 print "header:", skip
411 self.status = status
412 self.reason = reason.strip()
413 if version == 'HTTP/1.0':
414 self.version = 10
415 elif version.startswith('HTTP/1.'):
416 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
417 elif version == 'HTTP/0.9':
418 self.version = 9
419 else:
420 raise UnknownProtocol(version)
422 if self.version == 9:
423 self.length = None
424 self.chunked = 0
425 self.will_close = 1
426 self.msg = HTTPMessage(StringIO())
427 return
429 self.msg = HTTPMessage(self.fp, 0)
430 if self.debuglevel > 0:
431 for hdr in self.msg.headers:
432 print "header:", hdr,
434 # don't let the msg keep an fp
435 self.msg.fp = None
437 # are we using the chunked-style of transfer encoding?
438 tr_enc = self.msg.getheader('transfer-encoding')
439 if tr_enc and tr_enc.lower() == "chunked":
440 self.chunked = 1
441 self.chunk_left = None
442 else:
443 self.chunked = 0
445 # will the connection close at the end of the response?
446 self.will_close = self._check_close()
448 # do we have a Content-Length?
449 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
450 length = self.msg.getheader('content-length')
451 if length and not self.chunked:
452 try:
453 self.length = int(length)
454 except ValueError:
455 self.length = None
456 else:
457 if self.length < 0: # ignore nonsensical negative lengths
458 self.length = None
459 else:
460 self.length = None
462 # does the body have a fixed length? (of zero)
463 if (status == NO_CONTENT or status == NOT_MODIFIED or
464 100 <= status < 200 or # 1xx codes
465 self._method == 'HEAD'):
466 self.length = 0
468 # if the connection remains open, and we aren't using chunked, and
469 # a content-length was not provided, then assume that the connection
470 # WILL close.
471 if not self.will_close and \
472 not self.chunked and \
473 self.length is None:
474 self.will_close = 1
476 def _check_close(self):
477 conn = self.msg.getheader('connection')
478 if self.version == 11:
479 # An HTTP/1.1 proxy is assumed to stay open unless
480 # explicitly closed.
481 conn = self.msg.getheader('connection')
482 if conn and "close" in conn.lower():
483 return True
484 return False
486 # Some HTTP/1.0 implementations have support for persistent
487 # connections, using rules different than HTTP/1.1.
489 # For older HTTP, Keep-Alive indicates persistent connection.
490 if self.msg.getheader('keep-alive'):
491 return False
493 # At least Akamai returns a "Connection: Keep-Alive" header,
494 # which was supposed to be sent by the client.
495 if conn and "keep-alive" in conn.lower():
496 return False
498 # Proxy-Connection is a netscape hack.
499 pconn = self.msg.getheader('proxy-connection')
500 if pconn and "keep-alive" in pconn.lower():
501 return False
503 # otherwise, assume it will close
504 return True
506 def close(self):
507 if self.fp:
508 self.fp.close()
509 self.fp = None
511 def isclosed(self):
512 # NOTE: it is possible that we will not ever call self.close(). This
513 # case occurs when will_close is TRUE, length is None, and we
514 # read up to the last byte, but NOT past it.
516 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
517 # called, meaning self.isclosed() is meaningful.
518 return self.fp is None
520 # XXX It would be nice to have readline and __iter__ for this, too.
522 def read(self, amt=None):
523 if self.fp is None:
524 return ''
526 if self.chunked:
527 return self._read_chunked(amt)
529 if amt is None:
530 # unbounded read
531 if self.length is None:
532 s = self.fp.read()
533 else:
534 s = self._safe_read(self.length)
535 self.length = 0
536 self.close() # we read everything
537 return s
539 if self.length is not None:
540 if amt > self.length:
541 # clip the read to the "end of response"
542 amt = self.length
544 # we do not use _safe_read() here because this may be a .will_close
545 # connection, and the user is reading more bytes than will be provided
546 # (for example, reading in 1k chunks)
547 s = self.fp.read(amt)
548 if self.length is not None:
549 self.length -= len(s)
550 if not self.length:
551 self.close()
552 return s
554 def _read_chunked(self, amt):
555 assert self.chunked != _UNKNOWN
556 chunk_left = self.chunk_left
557 value = ''
559 # XXX This accumulates chunks by repeated string concatenation,
560 # which is not efficient as the number or size of chunks gets big.
561 while True:
562 if chunk_left is None:
563 line = self.fp.readline()
564 i = line.find(';')
565 if i >= 0:
566 line = line[:i] # strip chunk-extensions
567 try:
568 chunk_left = int(line, 16)
569 except ValueError:
570 # close the connection as protocol synchronisation is
571 # probably lost
572 self.close()
573 raise IncompleteRead(value)
574 if chunk_left == 0:
575 break
576 if amt is None:
577 value += self._safe_read(chunk_left)
578 elif amt < chunk_left:
579 value += self._safe_read(amt)
580 self.chunk_left = chunk_left - amt
581 return value
582 elif amt == chunk_left:
583 value += self._safe_read(amt)
584 self._safe_read(2) # toss the CRLF at the end of the chunk
585 self.chunk_left = None
586 return value
587 else:
588 value += self._safe_read(chunk_left)
589 amt -= chunk_left
591 # we read the whole chunk, get another
592 self._safe_read(2) # toss the CRLF at the end of the chunk
593 chunk_left = None
595 # read and discard trailer up to the CRLF terminator
596 ### note: we shouldn't have any trailers!
597 while True:
598 line = self.fp.readline()
599 if not line:
600 # a vanishingly small number of sites EOF without
601 # sending the trailer
602 break
603 if line == '\r\n':
604 break
606 # we read everything; close the "file"
607 self.close()
609 return value
611 def _safe_read(self, amt):
612 """Read the number of bytes requested, compensating for partial reads.
614 Normally, we have a blocking socket, but a read() can be interrupted
615 by a signal (resulting in a partial read).
617 Note that we cannot distinguish between EOF and an interrupt when zero
618 bytes have been read. IncompleteRead() will be raised in this
619 situation.
621 This function should be used when <amt> bytes "should" be present for
622 reading. If the bytes are truly not available (due to EOF), then the
623 IncompleteRead exception can be used to detect the problem.
625 s = []
626 while amt > 0:
627 chunk = self.fp.read(min(amt, MAXAMOUNT))
628 if not chunk:
629 raise IncompleteRead(''.join(s), amt)
630 s.append(chunk)
631 amt -= len(chunk)
632 return ''.join(s)
634 def getheader(self, name, default=None):
635 if self.msg is None:
636 raise ResponseNotReady()
637 return self.msg.getheader(name, default)
639 def getheaders(self):
640 """Return list of (header, value) tuples."""
641 if self.msg is None:
642 raise ResponseNotReady()
643 return self.msg.items()
646 class HTTPConnection:
648 _http_vsn = 11
649 _http_vsn_str = 'HTTP/1.1'
651 response_class = HTTPResponse
652 default_port = HTTP_PORT
653 auto_open = 1
654 debuglevel = 0
655 strict = 0
657 def __init__(self, host, port=None, strict=None,
658 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
659 self.timeout = timeout
660 self.sock = None
661 self._buffer = []
662 self.__response = None
663 self.__state = _CS_IDLE
664 self._method = None
665 self._tunnel_host = None
666 self._tunnel_port = None
668 self._set_hostport(host, port)
669 if strict is not None:
670 self.strict = strict
672 def set_tunnel(self, host, port=None):
673 """ Sets up the host and the port for the HTTP CONNECT Tunnelling."""
674 self._tunnel_host = host
675 self._tunnel_port = port
677 def _set_hostport(self, host, port):
678 if port is None:
679 i = host.rfind(':')
680 j = host.rfind(']') # ipv6 addresses have [...]
681 if i > j:
682 try:
683 port = int(host[i+1:])
684 except ValueError:
685 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
686 host = host[:i]
687 else:
688 port = self.default_port
689 if host and host[0] == '[' and host[-1] == ']':
690 host = host[1:-1]
691 self.host = host
692 self.port = port
694 def set_debuglevel(self, level):
695 self.debuglevel = level
697 def _tunnel(self):
698 self._set_hostport(self._tunnel_host, self._tunnel_port)
699 self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
700 response = self.response_class(self.sock, strict = self.strict,
701 method = self._method)
702 (version, code, message) = response._read_status()
704 if code != 200:
705 self.close()
706 raise socket.error, "Tunnel connection failed: %d %s" % (code,
707 message.strip())
708 while True:
709 line = response.fp.readline()
710 if line == '\r\n': break
713 def connect(self):
714 """Connect to the host and port specified in __init__."""
715 self.sock = socket.create_connection((self.host,self.port),
716 self.timeout)
718 if self._tunnel_host:
719 self._tunnel()
721 def close(self):
722 """Close the connection to the HTTP server."""
723 if self.sock:
724 self.sock.close() # close it manually... there may be other refs
725 self.sock = None
726 if self.__response:
727 self.__response.close()
728 self.__response = None
729 self.__state = _CS_IDLE
731 def send(self, str):
732 """Send `str' to the server."""
733 if self.sock is None:
734 if self.auto_open:
735 self.connect()
736 else:
737 raise NotConnected()
739 # send the data to the server. if we get a broken pipe, then close
740 # the socket. we want to reconnect when somebody tries to send again.
742 # NOTE: we DO propagate the error, though, because we cannot simply
743 # ignore the error... the caller will know if they can retry.
744 if self.debuglevel > 0:
745 print "send:", repr(str)
746 try:
747 blocksize=8192
748 if hasattr(str,'read') :
749 if self.debuglevel > 0: print "sendIng a read()able"
750 data=str.read(blocksize)
751 while data:
752 self.sock.sendall(data)
753 data=str.read(blocksize)
754 else:
755 self.sock.sendall(str)
756 except socket.error, v:
757 if v[0] == 32: # Broken pipe
758 self.close()
759 raise
761 def _output(self, s):
762 """Add a line of output to the current request buffer.
764 Assumes that the line does *not* end with \\r\\n.
766 self._buffer.append(s)
768 def _send_output(self, message_body=None):
769 """Send the currently buffered request and clear the buffer.
771 Appends an extra \\r\\n to the buffer.
772 A message_body may be specified, to be appended to the request.
774 self._buffer.extend(("", ""))
775 msg = "\r\n".join(self._buffer)
776 del self._buffer[:]
777 # If msg and message_body are sent in a single send() call,
778 # it will avoid performance problems caused by the interaction
779 # between delayed ack and the Nagle algorithim.
780 if isinstance(message_body, str):
781 msg += message_body
782 message_body = None
783 self.send(msg)
784 if message_body is not None:
785 #message_body was not a string (i.e. it is a file) and
786 #we must run the risk of Nagle
787 self.send(message_body)
789 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
790 """Send a request to the server.
792 `method' specifies an HTTP request method, e.g. 'GET'.
793 `url' specifies the object being requested, e.g. '/index.html'.
794 `skip_host' if True does not add automatically a 'Host:' header
795 `skip_accept_encoding' if True does not add automatically an
796 'Accept-Encoding:' header
799 # if a prior response has been completed, then forget about it.
800 if self.__response and self.__response.isclosed():
801 self.__response = None
804 # in certain cases, we cannot issue another request on this connection.
805 # this occurs when:
806 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
807 # 2) a response to a previous request has signalled that it is going
808 # to close the connection upon completion.
809 # 3) the headers for the previous response have not been read, thus
810 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
812 # if there is no prior response, then we can request at will.
814 # if point (2) is true, then we will have passed the socket to the
815 # response (effectively meaning, "there is no prior response"), and
816 # will open a new one when a new request is made.
818 # Note: if a prior response exists, then we *can* start a new request.
819 # We are not allowed to begin fetching the response to this new
820 # request, however, until that prior response is complete.
822 if self.__state == _CS_IDLE:
823 self.__state = _CS_REQ_STARTED
824 else:
825 raise CannotSendRequest()
827 # Save the method we use, we need it later in the response phase
828 self._method = method
829 if not url:
830 url = '/'
831 str = '%s %s %s' % (method, url, self._http_vsn_str)
833 self._output(str)
835 if self._http_vsn == 11:
836 # Issue some standard headers for better HTTP/1.1 compliance
838 if not skip_host:
839 # this header is issued *only* for HTTP/1.1
840 # connections. more specifically, this means it is
841 # only issued when the client uses the new
842 # HTTPConnection() class. backwards-compat clients
843 # will be using HTTP/1.0 and those clients may be
844 # issuing this header themselves. we should NOT issue
845 # it twice; some web servers (such as Apache) barf
846 # when they see two Host: headers
848 # If we need a non-standard port,include it in the
849 # header. If the request is going through a proxy,
850 # but the host of the actual URL, not the host of the
851 # proxy.
853 netloc = ''
854 if url.startswith('http'):
855 nil, netloc, nil, nil, nil = urlsplit(url)
857 if netloc:
858 try:
859 netloc_enc = netloc.encode("ascii")
860 except UnicodeEncodeError:
861 netloc_enc = netloc.encode("idna")
862 self.putheader('Host', netloc_enc)
863 else:
864 try:
865 host_enc = self.host.encode("ascii")
866 except UnicodeEncodeError:
867 host_enc = self.host.encode("idna")
868 if self.port == self.default_port:
869 self.putheader('Host', host_enc)
870 else:
871 self.putheader('Host', "%s:%s" % (host_enc, self.port))
873 # note: we are assuming that clients will not attempt to set these
874 # headers since *this* library must deal with the
875 # consequences. this also means that when the supporting
876 # libraries are updated to recognize other forms, then this
877 # code should be changed (removed or updated).
879 # we only want a Content-Encoding of "identity" since we don't
880 # support encodings such as x-gzip or x-deflate.
881 if not skip_accept_encoding:
882 self.putheader('Accept-Encoding', 'identity')
884 # we can accept "chunked" Transfer-Encodings, but no others
885 # NOTE: no TE header implies *only* "chunked"
886 #self.putheader('TE', 'chunked')
888 # if TE is supplied in the header, then it must appear in a
889 # Connection header.
890 #self.putheader('Connection', 'TE')
892 else:
893 # For HTTP/1.0, the server will assume "not chunked"
894 pass
896 def putheader(self, header, *values):
897 """Send a request header line to the server.
899 For example: h.putheader('Accept', 'text/html')
901 if self.__state != _CS_REQ_STARTED:
902 raise CannotSendHeader()
904 str = '%s: %s' % (header, '\r\n\t'.join(values))
905 self._output(str)
907 def endheaders(self, message_body=None):
908 """Indicate that the last header line has been sent to the server.
910 This method sends the request to the server. The optional
911 message_body argument can be used to pass message body
912 associated with the request. The message body will be sent in
913 the same packet as the message headers if possible. The
914 message_body should be a string.
916 if self.__state == _CS_REQ_STARTED:
917 self.__state = _CS_REQ_SENT
918 else:
919 raise CannotSendHeader()
920 self._send_output(message_body)
922 def request(self, method, url, body=None, headers={}):
923 """Send a complete request to the server."""
925 try:
926 self._send_request(method, url, body, headers)
927 except socket.error, v:
928 # trap 'Broken pipe' if we're allowed to automatically reconnect
929 if v[0] != 32 or not self.auto_open:
930 raise
931 # try one more time
932 self._send_request(method, url, body, headers)
934 def _set_content_length(self, body):
935 # Set the content-length based on the body.
936 thelen = None
937 try:
938 thelen = str(len(body))
939 except TypeError, te:
940 # If this is a file-like object, try to
941 # fstat its file descriptor
942 import os
943 try:
944 thelen = str(os.fstat(body.fileno()).st_size)
945 except (AttributeError, OSError):
946 # Don't send a length if this failed
947 if self.debuglevel > 0: print "Cannot stat!!"
949 if thelen is not None:
950 self.putheader('Content-Length', thelen)
952 def _send_request(self, method, url, body, headers):
953 # honour explicitly requested Host: and Accept-Encoding headers
954 header_names = dict.fromkeys([k.lower() for k in headers])
955 skips = {}
956 if 'host' in header_names:
957 skips['skip_host'] = 1
958 if 'accept-encoding' in header_names:
959 skips['skip_accept_encoding'] = 1
961 self.putrequest(method, url, **skips)
963 if body and ('content-length' not in header_names):
964 self._set_content_length(body)
965 for hdr, value in headers.iteritems():
966 self.putheader(hdr, value)
967 self.endheaders(body)
969 def getresponse(self, buffering=False):
970 "Get the response from the server."
972 # if a prior response has been completed, then forget about it.
973 if self.__response and self.__response.isclosed():
974 self.__response = None
977 # if a prior response exists, then it must be completed (otherwise, we
978 # cannot read this response's header to determine the connection-close
979 # behavior)
981 # note: if a prior response existed, but was connection-close, then the
982 # socket and response were made independent of this HTTPConnection
983 # object since a new request requires that we open a whole new
984 # connection
986 # this means the prior response had one of two states:
987 # 1) will_close: this connection was reset and the prior socket and
988 # response operate independently
989 # 2) persistent: the response was retained and we await its
990 # isclosed() status to become true.
992 if self.__state != _CS_REQ_SENT or self.__response:
993 raise ResponseNotReady()
995 args = (self.sock,)
996 kwds = {"strict":self.strict, "method":self._method}
997 if self.debuglevel > 0:
998 args += (self.debuglevel,)
999 if buffering:
1000 #only add this keyword if non-default, for compatibility with
1001 #other response_classes.
1002 kwds["buffering"] = True;
1003 response = self.response_class(*args, **kwds)
1005 response.begin()
1006 assert response.will_close != _UNKNOWN
1007 self.__state = _CS_IDLE
1009 if response.will_close:
1010 # this effectively passes the connection to the response
1011 self.close()
1012 else:
1013 # remember this, so we can tell when it is complete
1014 self.__response = response
1016 return response
1019 class HTTP:
1020 "Compatibility class with httplib.py from 1.5."
1022 _http_vsn = 10
1023 _http_vsn_str = 'HTTP/1.0'
1025 debuglevel = 0
1027 _connection_class = HTTPConnection
1029 def __init__(self, host='', port=None, strict=None):
1030 "Provide a default host, since the superclass requires one."
1032 # some joker passed 0 explicitly, meaning default port
1033 if port == 0:
1034 port = None
1036 # Note that we may pass an empty string as the host; this will throw
1037 # an error when we attempt to connect. Presumably, the client code
1038 # will call connect before then, with a proper host.
1039 self._setup(self._connection_class(host, port, strict))
1041 def _setup(self, conn):
1042 self._conn = conn
1044 # set up delegation to flesh out interface
1045 self.send = conn.send
1046 self.putrequest = conn.putrequest
1047 self.putheader = conn.putheader
1048 self.endheaders = conn.endheaders
1049 self.set_debuglevel = conn.set_debuglevel
1051 conn._http_vsn = self._http_vsn
1052 conn._http_vsn_str = self._http_vsn_str
1054 self.file = None
1056 def connect(self, host=None, port=None):
1057 "Accept arguments to set the host/port, since the superclass doesn't."
1059 if host is not None:
1060 self._conn._set_hostport(host, port)
1061 self._conn.connect()
1063 def getfile(self):
1064 "Provide a getfile, since the superclass' does not use this concept."
1065 return self.file
1067 def getreply(self, buffering=False):
1068 """Compat definition since superclass does not define it.
1070 Returns a tuple consisting of:
1071 - server status code (e.g. '200' if all goes well)
1072 - server "reason" corresponding to status code
1073 - any RFC822 headers in the response from the server
1075 try:
1076 if not buffering:
1077 response = self._conn.getresponse()
1078 else:
1079 #only add this keyword if non-default for compatibility
1080 #with other connection classes
1081 response = self._conn.getresponse(buffering)
1082 except BadStatusLine, e:
1083 ### hmm. if getresponse() ever closes the socket on a bad request,
1084 ### then we are going to have problems with self.sock
1086 ### should we keep this behavior? do people use it?
1087 # keep the socket open (as a file), and return it
1088 self.file = self._conn.sock.makefile('rb', 0)
1090 # close our socket -- we want to restart after any protocol error
1091 self.close()
1093 self.headers = None
1094 return -1, e.line, None
1096 self.headers = response.msg
1097 self.file = response.fp
1098 return response.status, response.reason, response.msg
1100 def close(self):
1101 self._conn.close()
1103 # note that self.file == response.fp, which gets closed by the
1104 # superclass. just clear the object ref here.
1105 ### hmm. messy. if status==-1, then self.file is owned by us.
1106 ### well... we aren't explicitly closing, but losing this ref will
1107 ### do it
1108 self.file = None
1110 try:
1111 import ssl
1112 except ImportError:
1113 pass
1114 else:
1115 class HTTPSConnection(HTTPConnection):
1116 "This class allows communication via SSL."
1118 default_port = HTTPS_PORT
1120 def __init__(self, host, port=None, key_file=None, cert_file=None,
1121 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1122 HTTPConnection.__init__(self, host, port, strict, timeout)
1123 self.key_file = key_file
1124 self.cert_file = cert_file
1126 def connect(self):
1127 "Connect to a host on a given (SSL) port."
1129 sock = socket.create_connection((self.host, self.port), self.timeout)
1130 if self._tunnel_host:
1131 self.sock = sock
1132 self._tunnel()
1133 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1135 __all__.append("HTTPSConnection")
1137 class HTTPS(HTTP):
1138 """Compatibility with 1.5 httplib interface
1140 Python 1.5.2 did not have an HTTPS class, but it defined an
1141 interface for sending http requests that is also useful for
1142 https.
1145 _connection_class = HTTPSConnection
1147 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1148 strict=None):
1149 # provide a default host, pass the X509 cert info
1151 # urf. compensate for bad input.
1152 if port == 0:
1153 port = None
1154 self._setup(self._connection_class(host, port, key_file,
1155 cert_file, strict))
1157 # we never actually use these for anything, but we keep them
1158 # here for compatibility with post-1.5.2 CVS.
1159 self.key_file = key_file
1160 self.cert_file = cert_file
1163 def FakeSocket (sock, sslobj):
1164 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1165 "Use the result of ssl.wrap_socket() directly instead.",
1166 DeprecationWarning, stacklevel=2)
1167 return sslobj
1170 class HTTPException(Exception):
1171 # Subclasses that define an __init__ must call Exception.__init__
1172 # or define self.args. Otherwise, str() will fail.
1173 pass
1175 class NotConnected(HTTPException):
1176 pass
1178 class InvalidURL(HTTPException):
1179 pass
1181 class UnknownProtocol(HTTPException):
1182 def __init__(self, version):
1183 self.args = version,
1184 self.version = version
1186 class UnknownTransferEncoding(HTTPException):
1187 pass
1189 class UnimplementedFileMode(HTTPException):
1190 pass
1192 class IncompleteRead(HTTPException):
1193 def __init__(self, partial, expected=None):
1194 self.args = partial,
1195 self.partial = partial
1196 self.expected = expected
1197 def __repr__(self):
1198 if self.expected is not None:
1199 e = ', %i more expected' % self.expected
1200 else:
1201 e = ''
1202 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1203 def __str__(self):
1204 return repr(self)
1206 class ImproperConnectionState(HTTPException):
1207 pass
1209 class CannotSendRequest(ImproperConnectionState):
1210 pass
1212 class CannotSendHeader(ImproperConnectionState):
1213 pass
1215 class ResponseNotReady(ImproperConnectionState):
1216 pass
1218 class BadStatusLine(HTTPException):
1219 def __init__(self, line):
1220 self.args = line,
1221 self.line = line
1223 # for backwards compatibility
1224 error = HTTPException
1226 class LineAndFileWrapper:
1227 """A limited file-like object for HTTP/0.9 responses."""
1229 # The status-line parsing code calls readline(), which normally
1230 # get the HTTP status line. For a 0.9 response, however, this is
1231 # actually the first line of the body! Clients need to get a
1232 # readable file object that contains that line.
1234 def __init__(self, line, file):
1235 self._line = line
1236 self._file = file
1237 self._line_consumed = 0
1238 self._line_offset = 0
1239 self._line_left = len(line)
1241 def __getattr__(self, attr):
1242 return getattr(self._file, attr)
1244 def _done(self):
1245 # called when the last byte is read from the line. After the
1246 # call, all read methods are delegated to the underlying file
1247 # object.
1248 self._line_consumed = 1
1249 self.read = self._file.read
1250 self.readline = self._file.readline
1251 self.readlines = self._file.readlines
1253 def read(self, amt=None):
1254 if self._line_consumed:
1255 return self._file.read(amt)
1256 assert self._line_left
1257 if amt is None or amt > self._line_left:
1258 s = self._line[self._line_offset:]
1259 self._done()
1260 if amt is None:
1261 return s + self._file.read()
1262 else:
1263 return s + self._file.read(amt - len(s))
1264 else:
1265 assert amt <= self._line_left
1266 i = self._line_offset
1267 j = i + amt
1268 s = self._line[i:j]
1269 self._line_offset = j
1270 self._line_left -= amt
1271 if self._line_left == 0:
1272 self._done()
1273 return s
1275 def readline(self):
1276 if self._line_consumed:
1277 return self._file.readline()
1278 assert self._line_left
1279 s = self._line[self._line_offset:]
1280 self._done()
1281 return s
1283 def readlines(self, size=None):
1284 if self._line_consumed:
1285 return self._file.readlines(size)
1286 assert self._line_left
1287 L = [self._line[self._line_offset:]]
1288 self._done()
1289 if size is None:
1290 return L + self._file.readlines()
1291 else:
1292 return L + self._file.readlines(size)
1294 def test():
1295 """Test this module.
1297 A hodge podge of tests collected here, because they have too many
1298 external dependencies for the regular test suite.
1301 import sys
1302 import getopt
1303 opts, args = getopt.getopt(sys.argv[1:], 'd')
1304 dl = 0
1305 for o, a in opts:
1306 if o == '-d': dl = dl + 1
1307 host = 'www.python.org'
1308 selector = '/'
1309 if args[0:]: host = args[0]
1310 if args[1:]: selector = args[1]
1311 h = HTTP()
1312 h.set_debuglevel(dl)
1313 h.connect(host)
1314 h.putrequest('GET', selector)
1315 h.endheaders()
1316 status, reason, headers = h.getreply()
1317 print 'status =', status
1318 print 'reason =', reason
1319 print "read", len(h.getfile().read())
1320 print
1321 if headers:
1322 for header in headers.headers: print header.strip()
1323 print
1325 # minimal test that code to extract host from url works
1326 class HTTP11(HTTP):
1327 _http_vsn = 11
1328 _http_vsn_str = 'HTTP/1.1'
1330 h = HTTP11('www.python.org')
1331 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1332 h.endheaders()
1333 h.getreply()
1334 h.close()
1336 try:
1337 import ssl
1338 except ImportError:
1339 pass
1340 else:
1342 for host, selector in (('sourceforge.net', '/projects/python'),
1344 print "https://%s%s" % (host, selector)
1345 hs = HTTPS()
1346 hs.set_debuglevel(dl)
1347 hs.connect(host)
1348 hs.putrequest('GET', selector)
1349 hs.endheaders()
1350 status, reason, headers = hs.getreply()
1351 print 'status =', status
1352 print 'reason =', reason
1353 print "read", len(hs.getfile().read())
1354 print
1355 if headers:
1356 for header in headers.headers: print header.strip()
1357 print
1359 if __name__ == '__main__':
1360 test()