Issue #6857: Fix Decimal formatting to be consistent with existing float
[python.git] / Lib / httplib.py
blob30cabcef2bad6fe87b88192b9a1ccba545c02dac
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 from sys import py3kwarning
71 from urlparse import urlsplit
72 import warnings
73 with warnings.catch_warnings():
74 if py3kwarning:
75 warnings.filterwarnings("ignore", ".*mimetools has been removed",
76 DeprecationWarning)
77 import mimetools
79 try:
80 from cStringIO import StringIO
81 except ImportError:
82 from StringIO import StringIO
84 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
85 "HTTPException", "NotConnected", "UnknownProtocol",
86 "UnknownTransferEncoding", "UnimplementedFileMode",
87 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
88 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
89 "BadStatusLine", "error", "responses"]
91 HTTP_PORT = 80
92 HTTPS_PORT = 443
94 _UNKNOWN = 'UNKNOWN'
96 # connection states
97 _CS_IDLE = 'Idle'
98 _CS_REQ_STARTED = 'Request-started'
99 _CS_REQ_SENT = 'Request-sent'
101 # status codes
102 # informational
103 CONTINUE = 100
104 SWITCHING_PROTOCOLS = 101
105 PROCESSING = 102
107 # successful
108 OK = 200
109 CREATED = 201
110 ACCEPTED = 202
111 NON_AUTHORITATIVE_INFORMATION = 203
112 NO_CONTENT = 204
113 RESET_CONTENT = 205
114 PARTIAL_CONTENT = 206
115 MULTI_STATUS = 207
116 IM_USED = 226
118 # redirection
119 MULTIPLE_CHOICES = 300
120 MOVED_PERMANENTLY = 301
121 FOUND = 302
122 SEE_OTHER = 303
123 NOT_MODIFIED = 304
124 USE_PROXY = 305
125 TEMPORARY_REDIRECT = 307
127 # client error
128 BAD_REQUEST = 400
129 UNAUTHORIZED = 401
130 PAYMENT_REQUIRED = 402
131 FORBIDDEN = 403
132 NOT_FOUND = 404
133 METHOD_NOT_ALLOWED = 405
134 NOT_ACCEPTABLE = 406
135 PROXY_AUTHENTICATION_REQUIRED = 407
136 REQUEST_TIMEOUT = 408
137 CONFLICT = 409
138 GONE = 410
139 LENGTH_REQUIRED = 411
140 PRECONDITION_FAILED = 412
141 REQUEST_ENTITY_TOO_LARGE = 413
142 REQUEST_URI_TOO_LONG = 414
143 UNSUPPORTED_MEDIA_TYPE = 415
144 REQUESTED_RANGE_NOT_SATISFIABLE = 416
145 EXPECTATION_FAILED = 417
146 UNPROCESSABLE_ENTITY = 422
147 LOCKED = 423
148 FAILED_DEPENDENCY = 424
149 UPGRADE_REQUIRED = 426
151 # server error
152 INTERNAL_SERVER_ERROR = 500
153 NOT_IMPLEMENTED = 501
154 BAD_GATEWAY = 502
155 SERVICE_UNAVAILABLE = 503
156 GATEWAY_TIMEOUT = 504
157 HTTP_VERSION_NOT_SUPPORTED = 505
158 INSUFFICIENT_STORAGE = 507
159 NOT_EXTENDED = 510
161 # Mapping status codes to official W3C names
162 responses = {
163 100: 'Continue',
164 101: 'Switching Protocols',
166 200: 'OK',
167 201: 'Created',
168 202: 'Accepted',
169 203: 'Non-Authoritative Information',
170 204: 'No Content',
171 205: 'Reset Content',
172 206: 'Partial Content',
174 300: 'Multiple Choices',
175 301: 'Moved Permanently',
176 302: 'Found',
177 303: 'See Other',
178 304: 'Not Modified',
179 305: 'Use Proxy',
180 306: '(Unused)',
181 307: 'Temporary Redirect',
183 400: 'Bad Request',
184 401: 'Unauthorized',
185 402: 'Payment Required',
186 403: 'Forbidden',
187 404: 'Not Found',
188 405: 'Method Not Allowed',
189 406: 'Not Acceptable',
190 407: 'Proxy Authentication Required',
191 408: 'Request Timeout',
192 409: 'Conflict',
193 410: 'Gone',
194 411: 'Length Required',
195 412: 'Precondition Failed',
196 413: 'Request Entity Too Large',
197 414: 'Request-URI Too Long',
198 415: 'Unsupported Media Type',
199 416: 'Requested Range Not Satisfiable',
200 417: 'Expectation Failed',
202 500: 'Internal Server Error',
203 501: 'Not Implemented',
204 502: 'Bad Gateway',
205 503: 'Service Unavailable',
206 504: 'Gateway Timeout',
207 505: 'HTTP Version Not Supported',
210 # maximal amount of data to read at one time in _safe_read
211 MAXAMOUNT = 1048576
213 class HTTPMessage(mimetools.Message):
215 def addheader(self, key, value):
216 """Add header for field key handling repeats."""
217 prev = self.dict.get(key)
218 if prev is None:
219 self.dict[key] = value
220 else:
221 combined = ", ".join((prev, value))
222 self.dict[key] = combined
224 def addcontinue(self, key, more):
225 """Add more field data from a continuation line."""
226 prev = self.dict[key]
227 self.dict[key] = prev + "\n " + more
229 def readheaders(self):
230 """Read header lines.
232 Read header lines up to the entirely blank line that terminates them.
233 The (normally blank) line that ends the headers is skipped, but not
234 included in the returned list. If a non-header line ends the headers,
235 (which is an error), an attempt is made to backspace over it; it is
236 never included in the returned list.
238 The variable self.status is set to the empty string if all went well,
239 otherwise it is an error message. The variable self.headers is a
240 completely uninterpreted list of lines contained in the header (so
241 printing them will reproduce the header exactly as it appears in the
242 file).
244 If multiple header fields with the same name occur, they are combined
245 according to the rules in RFC 2616 sec 4.2:
247 Appending each subsequent field-value to the first, each separated
248 by a comma. The order in which header fields with the same field-name
249 are received is significant to the interpretation of the combined
250 field value.
252 # XXX The implementation overrides the readheaders() method of
253 # rfc822.Message. The base class design isn't amenable to
254 # customized behavior here so the method here is a copy of the
255 # base class code with a few small changes.
257 self.dict = {}
258 self.unixfrom = ''
259 self.headers = hlist = []
260 self.status = ''
261 headerseen = ""
262 firstline = 1
263 startofline = unread = tell = None
264 if hasattr(self.fp, 'unread'):
265 unread = self.fp.unread
266 elif self.seekable:
267 tell = self.fp.tell
268 while True:
269 if tell:
270 try:
271 startofline = tell()
272 except IOError:
273 startofline = tell = None
274 self.seekable = 0
275 line = self.fp.readline()
276 if not line:
277 self.status = 'EOF in headers'
278 break
279 # Skip unix From name time lines
280 if firstline and line.startswith('From '):
281 self.unixfrom = self.unixfrom + line
282 continue
283 firstline = 0
284 if headerseen and line[0] in ' \t':
285 # XXX Not sure if continuation lines are handled properly
286 # for http and/or for repeating headers
287 # It's a continuation line.
288 hlist.append(line)
289 self.addcontinue(headerseen, line.strip())
290 continue
291 elif self.iscomment(line):
292 # It's a comment. Ignore it.
293 continue
294 elif self.islast(line):
295 # Note! No pushback here! The delimiter line gets eaten.
296 break
297 headerseen = self.isheader(line)
298 if headerseen:
299 # It's a legal header line, save it.
300 hlist.append(line)
301 self.addheader(headerseen, line[len(headerseen)+1:].strip())
302 continue
303 else:
304 # It's not a header line; throw it back and stop here.
305 if not self.dict:
306 self.status = 'No headers'
307 else:
308 self.status = 'Non-header line where header expected'
309 # Try to undo the read.
310 if unread:
311 unread(line)
312 elif tell:
313 self.fp.seek(startofline)
314 else:
315 self.status = self.status + '; bad seek'
316 break
318 class HTTPResponse:
320 # strict: If true, raise BadStatusLine if the status line can't be
321 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
322 # false because it prevents clients from talking to HTTP/0.9
323 # servers. Note that a response with a sufficiently corrupted
324 # status line will look like an HTTP/0.9 response.
326 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
329 if buffering:
330 # The caller won't be using any sock.recv() calls, so buffering
331 # is fine and recommended for performance.
332 self.fp = sock.makefile('rb')
333 else:
334 # The buffer size is specified as zero, because the headers of
335 # the response are read with readline(). If the reads were
336 # buffered the readline() calls could consume some of the
337 # response, which make be read via a recv() on the underlying
338 # socket.
339 self.fp = sock.makefile('rb', 0)
340 self.debuglevel = debuglevel
341 self.strict = strict
342 self._method = method
344 self.msg = None
346 # from the Status-Line of the response
347 self.version = _UNKNOWN # HTTP-Version
348 self.status = _UNKNOWN # Status-Code
349 self.reason = _UNKNOWN # Reason-Phrase
351 self.chunked = _UNKNOWN # is "chunked" being used?
352 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
353 self.length = _UNKNOWN # number of bytes left in response
354 self.will_close = _UNKNOWN # conn will close at end of response
356 def _read_status(self):
357 # Initialize with Simple-Response defaults
358 line = self.fp.readline()
359 if self.debuglevel > 0:
360 print "reply:", repr(line)
361 if not line:
362 # Presumably, the server closed the connection before
363 # sending a valid response.
364 raise BadStatusLine(line)
365 try:
366 [version, status, reason] = line.split(None, 2)
367 except ValueError:
368 try:
369 [version, status] = line.split(None, 1)
370 reason = ""
371 except ValueError:
372 # empty version will cause next test to fail and status
373 # will be treated as 0.9 response.
374 version = ""
375 if not version.startswith('HTTP/'):
376 if self.strict:
377 self.close()
378 raise BadStatusLine(line)
379 else:
380 # assume it's a Simple-Response from an 0.9 server
381 self.fp = LineAndFileWrapper(line, self.fp)
382 return "HTTP/0.9", 200, ""
384 # The status code is a three-digit number
385 try:
386 status = int(status)
387 if status < 100 or status > 999:
388 raise BadStatusLine(line)
389 except ValueError:
390 raise BadStatusLine(line)
391 return version, status, reason
393 def begin(self):
394 if self.msg is not None:
395 # we've already started reading the response
396 return
398 # read until we get a non-100 response
399 while True:
400 version, status, reason = self._read_status()
401 if status != CONTINUE:
402 break
403 # skip the header from the 100 response
404 while True:
405 skip = self.fp.readline().strip()
406 if not skip:
407 break
408 if self.debuglevel > 0:
409 print "header:", skip
411 self.status = status
412 self.reason = reason.strip()
413 if version == 'HTTP/1.0':
414 self.version = 10
415 elif version.startswith('HTTP/1.'):
416 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
417 elif version == 'HTTP/0.9':
418 self.version = 9
419 else:
420 raise UnknownProtocol(version)
422 if self.version == 9:
423 self.length = None
424 self.chunked = 0
425 self.will_close = 1
426 self.msg = HTTPMessage(StringIO())
427 return
429 self.msg = HTTPMessage(self.fp, 0)
430 if self.debuglevel > 0:
431 for hdr in self.msg.headers:
432 print "header:", hdr,
434 # don't let the msg keep an fp
435 self.msg.fp = None
437 # are we using the chunked-style of transfer encoding?
438 tr_enc = self.msg.getheader('transfer-encoding')
439 if tr_enc and tr_enc.lower() == "chunked":
440 self.chunked = 1
441 self.chunk_left = None
442 else:
443 self.chunked = 0
445 # will the connection close at the end of the response?
446 self.will_close = self._check_close()
448 # do we have a Content-Length?
449 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
450 length = self.msg.getheader('content-length')
451 if length and not self.chunked:
452 try:
453 self.length = int(length)
454 except ValueError:
455 self.length = None
456 else:
457 if self.length < 0: # ignore nonsensical negative lengths
458 self.length = None
459 else:
460 self.length = None
462 # does the body have a fixed length? (of zero)
463 if (status == NO_CONTENT or status == NOT_MODIFIED or
464 100 <= status < 200 or # 1xx codes
465 self._method == 'HEAD'):
466 self.length = 0
468 # if the connection remains open, and we aren't using chunked, and
469 # a content-length was not provided, then assume that the connection
470 # WILL close.
471 if not self.will_close and \
472 not self.chunked and \
473 self.length is None:
474 self.will_close = 1
476 def _check_close(self):
477 conn = self.msg.getheader('connection')
478 if self.version == 11:
479 # An HTTP/1.1 proxy is assumed to stay open unless
480 # explicitly closed.
481 conn = self.msg.getheader('connection')
482 if conn and "close" in conn.lower():
483 return True
484 return False
486 # Some HTTP/1.0 implementations have support for persistent
487 # connections, using rules different than HTTP/1.1.
489 # For older HTTP, Keep-Alive indicates persistent connection.
490 if self.msg.getheader('keep-alive'):
491 return False
493 # At least Akamai returns a "Connection: Keep-Alive" header,
494 # which was supposed to be sent by the client.
495 if conn and "keep-alive" in conn.lower():
496 return False
498 # Proxy-Connection is a netscape hack.
499 pconn = self.msg.getheader('proxy-connection')
500 if pconn and "keep-alive" in pconn.lower():
501 return False
503 # otherwise, assume it will close
504 return True
506 def close(self):
507 if self.fp:
508 self.fp.close()
509 self.fp = None
511 def isclosed(self):
512 # NOTE: it is possible that we will not ever call self.close(). This
513 # case occurs when will_close is TRUE, length is None, and we
514 # read up to the last byte, but NOT past it.
516 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
517 # called, meaning self.isclosed() is meaningful.
518 return self.fp is None
520 # XXX It would be nice to have readline and __iter__ for this, too.
522 def read(self, amt=None):
523 if self.fp is None:
524 return ''
526 if self.chunked:
527 return self._read_chunked(amt)
529 if amt is None:
530 # unbounded read
531 if self.length is None:
532 s = self.fp.read()
533 else:
534 s = self._safe_read(self.length)
535 self.length = 0
536 self.close() # we read everything
537 return s
539 if self.length is not None:
540 if amt > self.length:
541 # clip the read to the "end of response"
542 amt = self.length
544 # we do not use _safe_read() here because this may be a .will_close
545 # connection, and the user is reading more bytes than will be provided
546 # (for example, reading in 1k chunks)
547 s = self.fp.read(amt)
548 if self.length is not None:
549 self.length -= len(s)
550 if not self.length:
551 self.close()
552 return s
554 def _read_chunked(self, amt):
555 assert self.chunked != _UNKNOWN
556 chunk_left = self.chunk_left
557 value = []
558 while True:
559 if chunk_left is None:
560 line = self.fp.readline()
561 i = line.find(';')
562 if i >= 0:
563 line = line[:i] # strip chunk-extensions
564 try:
565 chunk_left = int(line, 16)
566 except ValueError:
567 # close the connection as protocol synchronisation is
568 # probably lost
569 self.close()
570 raise IncompleteRead(''.join(value))
571 if chunk_left == 0:
572 break
573 if amt is None:
574 value.append(self._safe_read(chunk_left))
575 elif amt < chunk_left:
576 value.append(self._safe_read(amt))
577 self.chunk_left = chunk_left - amt
578 return ''.join(value)
579 elif amt == chunk_left:
580 value.append(self._safe_read(amt))
581 self._safe_read(2) # toss the CRLF at the end of the chunk
582 self.chunk_left = None
583 return ''.join(value)
584 else:
585 value.append(self._safe_read(chunk_left))
586 amt -= chunk_left
588 # we read the whole chunk, get another
589 self._safe_read(2) # toss the CRLF at the end of the chunk
590 chunk_left = None
592 # read and discard trailer up to the CRLF terminator
593 ### note: we shouldn't have any trailers!
594 while True:
595 line = self.fp.readline()
596 if not line:
597 # a vanishingly small number of sites EOF without
598 # sending the trailer
599 break
600 if line == '\r\n':
601 break
603 # we read everything; close the "file"
604 self.close()
606 return ''.join(value)
608 def _safe_read(self, amt):
609 """Read the number of bytes requested, compensating for partial reads.
611 Normally, we have a blocking socket, but a read() can be interrupted
612 by a signal (resulting in a partial read).
614 Note that we cannot distinguish between EOF and an interrupt when zero
615 bytes have been read. IncompleteRead() will be raised in this
616 situation.
618 This function should be used when <amt> bytes "should" be present for
619 reading. If the bytes are truly not available (due to EOF), then the
620 IncompleteRead exception can be used to detect the problem.
622 # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
623 # return less than x bytes unless EOF is encountered. It now handles
624 # signal interruptions (socket.error EINTR) internally. This code
625 # never caught that exception anyways. It seems largely pointless.
626 # self.fp.read(amt) will work fine.
627 s = []
628 while amt > 0:
629 chunk = self.fp.read(min(amt, MAXAMOUNT))
630 if not chunk:
631 raise IncompleteRead(''.join(s), amt)
632 s.append(chunk)
633 amt -= len(chunk)
634 return ''.join(s)
636 def getheader(self, name, default=None):
637 if self.msg is None:
638 raise ResponseNotReady()
639 return self.msg.getheader(name, default)
641 def getheaders(self):
642 """Return list of (header, value) tuples."""
643 if self.msg is None:
644 raise ResponseNotReady()
645 return self.msg.items()
648 class HTTPConnection:
650 _http_vsn = 11
651 _http_vsn_str = 'HTTP/1.1'
653 response_class = HTTPResponse
654 default_port = HTTP_PORT
655 auto_open = 1
656 debuglevel = 0
657 strict = 0
659 def __init__(self, host, port=None, strict=None,
660 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
661 self.timeout = timeout
662 self.sock = None
663 self._buffer = []
664 self.__response = None
665 self.__state = _CS_IDLE
666 self._method = None
667 self._tunnel_host = None
668 self._tunnel_port = None
670 self._set_hostport(host, port)
671 if strict is not None:
672 self.strict = strict
674 def set_tunnel(self, host, port=None):
675 """ Sets up the host and the port for the HTTP CONNECT Tunnelling."""
676 self._tunnel_host = host
677 self._tunnel_port = port
679 def _set_hostport(self, host, port):
680 if port is None:
681 i = host.rfind(':')
682 j = host.rfind(']') # ipv6 addresses have [...]
683 if i > j:
684 try:
685 port = int(host[i+1:])
686 except ValueError:
687 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
688 host = host[:i]
689 else:
690 port = self.default_port
691 if host and host[0] == '[' and host[-1] == ']':
692 host = host[1:-1]
693 self.host = host
694 self.port = port
696 def set_debuglevel(self, level):
697 self.debuglevel = level
699 def _tunnel(self):
700 self._set_hostport(self._tunnel_host, self._tunnel_port)
701 self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self.host, self.port))
702 response = self.response_class(self.sock, strict = self.strict,
703 method = self._method)
704 (version, code, message) = response._read_status()
706 if code != 200:
707 self.close()
708 raise socket.error, "Tunnel connection failed: %d %s" % (code,
709 message.strip())
710 while True:
711 line = response.fp.readline()
712 if line == '\r\n': break
715 def connect(self):
716 """Connect to the host and port specified in __init__."""
717 self.sock = socket.create_connection((self.host,self.port),
718 self.timeout)
720 if self._tunnel_host:
721 self._tunnel()
723 def close(self):
724 """Close the connection to the HTTP server."""
725 if self.sock:
726 self.sock.close() # close it manually... there may be other refs
727 self.sock = None
728 if self.__response:
729 self.__response.close()
730 self.__response = None
731 self.__state = _CS_IDLE
733 def send(self, str):
734 """Send `str' to the server."""
735 if self.sock is None:
736 if self.auto_open:
737 self.connect()
738 else:
739 raise NotConnected()
741 # send the data to the server. if we get a broken pipe, then close
742 # the socket. we want to reconnect when somebody tries to send again.
744 # NOTE: we DO propagate the error, though, because we cannot simply
745 # ignore the error... the caller will know if they can retry.
746 if self.debuglevel > 0:
747 print "send:", repr(str)
748 try:
749 blocksize=8192
750 if hasattr(str,'read') :
751 if self.debuglevel > 0: print "sendIng a read()able"
752 data=str.read(blocksize)
753 while data:
754 self.sock.sendall(data)
755 data=str.read(blocksize)
756 else:
757 self.sock.sendall(str)
758 except socket.error, v:
759 if v[0] == 32: # Broken pipe
760 self.close()
761 raise
763 def _output(self, s):
764 """Add a line of output to the current request buffer.
766 Assumes that the line does *not* end with \\r\\n.
768 self._buffer.append(s)
770 def _send_output(self, message_body=None):
771 """Send the currently buffered request and clear the buffer.
773 Appends an extra \\r\\n to the buffer.
774 A message_body may be specified, to be appended to the request.
776 self._buffer.extend(("", ""))
777 msg = "\r\n".join(self._buffer)
778 del self._buffer[:]
779 # If msg and message_body are sent in a single send() call,
780 # it will avoid performance problems caused by the interaction
781 # between delayed ack and the Nagle algorithim.
782 if isinstance(message_body, str):
783 msg += message_body
784 message_body = None
785 self.send(msg)
786 if message_body is not None:
787 #message_body was not a string (i.e. it is a file) and
788 #we must run the risk of Nagle
789 self.send(message_body)
791 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
792 """Send a request to the server.
794 `method' specifies an HTTP request method, e.g. 'GET'.
795 `url' specifies the object being requested, e.g. '/index.html'.
796 `skip_host' if True does not add automatically a 'Host:' header
797 `skip_accept_encoding' if True does not add automatically an
798 'Accept-Encoding:' header
801 # if a prior response has been completed, then forget about it.
802 if self.__response and self.__response.isclosed():
803 self.__response = None
806 # in certain cases, we cannot issue another request on this connection.
807 # this occurs when:
808 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
809 # 2) a response to a previous request has signalled that it is going
810 # to close the connection upon completion.
811 # 3) the headers for the previous response have not been read, thus
812 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
814 # if there is no prior response, then we can request at will.
816 # if point (2) is true, then we will have passed the socket to the
817 # response (effectively meaning, "there is no prior response"), and
818 # will open a new one when a new request is made.
820 # Note: if a prior response exists, then we *can* start a new request.
821 # We are not allowed to begin fetching the response to this new
822 # request, however, until that prior response is complete.
824 if self.__state == _CS_IDLE:
825 self.__state = _CS_REQ_STARTED
826 else:
827 raise CannotSendRequest()
829 # Save the method we use, we need it later in the response phase
830 self._method = method
831 if not url:
832 url = '/'
833 str = '%s %s %s' % (method, url, self._http_vsn_str)
835 self._output(str)
837 if self._http_vsn == 11:
838 # Issue some standard headers for better HTTP/1.1 compliance
840 if not skip_host:
841 # this header is issued *only* for HTTP/1.1
842 # connections. more specifically, this means it is
843 # only issued when the client uses the new
844 # HTTPConnection() class. backwards-compat clients
845 # will be using HTTP/1.0 and those clients may be
846 # issuing this header themselves. we should NOT issue
847 # it twice; some web servers (such as Apache) barf
848 # when they see two Host: headers
850 # If we need a non-standard port,include it in the
851 # header. If the request is going through a proxy,
852 # but the host of the actual URL, not the host of the
853 # proxy.
855 netloc = ''
856 if url.startswith('http'):
857 nil, netloc, nil, nil, nil = urlsplit(url)
859 if netloc:
860 try:
861 netloc_enc = netloc.encode("ascii")
862 except UnicodeEncodeError:
863 netloc_enc = netloc.encode("idna")
864 self.putheader('Host', netloc_enc)
865 else:
866 try:
867 host_enc = self.host.encode("ascii")
868 except UnicodeEncodeError:
869 host_enc = self.host.encode("idna")
870 if self.port == self.default_port:
871 self.putheader('Host', host_enc)
872 else:
873 self.putheader('Host', "%s:%s" % (host_enc, self.port))
875 # note: we are assuming that clients will not attempt to set these
876 # headers since *this* library must deal with the
877 # consequences. this also means that when the supporting
878 # libraries are updated to recognize other forms, then this
879 # code should be changed (removed or updated).
881 # we only want a Content-Encoding of "identity" since we don't
882 # support encodings such as x-gzip or x-deflate.
883 if not skip_accept_encoding:
884 self.putheader('Accept-Encoding', 'identity')
886 # we can accept "chunked" Transfer-Encodings, but no others
887 # NOTE: no TE header implies *only* "chunked"
888 #self.putheader('TE', 'chunked')
890 # if TE is supplied in the header, then it must appear in a
891 # Connection header.
892 #self.putheader('Connection', 'TE')
894 else:
895 # For HTTP/1.0, the server will assume "not chunked"
896 pass
898 def putheader(self, header, *values):
899 """Send a request header line to the server.
901 For example: h.putheader('Accept', 'text/html')
903 if self.__state != _CS_REQ_STARTED:
904 raise CannotSendHeader()
906 str = '%s: %s' % (header, '\r\n\t'.join(values))
907 self._output(str)
909 def endheaders(self, message_body=None):
910 """Indicate that the last header line has been sent to the server.
912 This method sends the request to the server. The optional
913 message_body argument can be used to pass message body
914 associated with the request. The message body will be sent in
915 the same packet as the message headers if possible. The
916 message_body should be a string.
918 if self.__state == _CS_REQ_STARTED:
919 self.__state = _CS_REQ_SENT
920 else:
921 raise CannotSendHeader()
922 self._send_output(message_body)
924 def request(self, method, url, body=None, headers={}):
925 """Send a complete request to the server."""
927 try:
928 self._send_request(method, url, body, headers)
929 except socket.error, v:
930 # trap 'Broken pipe' if we're allowed to automatically reconnect
931 if v[0] != 32 or not self.auto_open:
932 raise
933 # try one more time
934 self._send_request(method, url, body, headers)
936 def _set_content_length(self, body):
937 # Set the content-length based on the body.
938 thelen = None
939 try:
940 thelen = str(len(body))
941 except TypeError, te:
942 # If this is a file-like object, try to
943 # fstat its file descriptor
944 import os
945 try:
946 thelen = str(os.fstat(body.fileno()).st_size)
947 except (AttributeError, OSError):
948 # Don't send a length if this failed
949 if self.debuglevel > 0: print "Cannot stat!!"
951 if thelen is not None:
952 self.putheader('Content-Length', thelen)
954 def _send_request(self, method, url, body, headers):
955 # honour explicitly requested Host: and Accept-Encoding headers
956 header_names = dict.fromkeys([k.lower() for k in headers])
957 skips = {}
958 if 'host' in header_names:
959 skips['skip_host'] = 1
960 if 'accept-encoding' in header_names:
961 skips['skip_accept_encoding'] = 1
963 self.putrequest(method, url, **skips)
965 if body and ('content-length' not in header_names):
966 self._set_content_length(body)
967 for hdr, value in headers.iteritems():
968 self.putheader(hdr, value)
969 self.endheaders(body)
971 def getresponse(self, buffering=False):
972 "Get the response from the server."
974 # if a prior response has been completed, then forget about it.
975 if self.__response and self.__response.isclosed():
976 self.__response = None
979 # if a prior response exists, then it must be completed (otherwise, we
980 # cannot read this response's header to determine the connection-close
981 # behavior)
983 # note: if a prior response existed, but was connection-close, then the
984 # socket and response were made independent of this HTTPConnection
985 # object since a new request requires that we open a whole new
986 # connection
988 # this means the prior response had one of two states:
989 # 1) will_close: this connection was reset and the prior socket and
990 # response operate independently
991 # 2) persistent: the response was retained and we await its
992 # isclosed() status to become true.
994 if self.__state != _CS_REQ_SENT or self.__response:
995 raise ResponseNotReady()
997 args = (self.sock,)
998 kwds = {"strict":self.strict, "method":self._method}
999 if self.debuglevel > 0:
1000 args += (self.debuglevel,)
1001 if buffering:
1002 #only add this keyword if non-default, for compatibility with
1003 #other response_classes.
1004 kwds["buffering"] = True;
1005 response = self.response_class(*args, **kwds)
1007 response.begin()
1008 assert response.will_close != _UNKNOWN
1009 self.__state = _CS_IDLE
1011 if response.will_close:
1012 # this effectively passes the connection to the response
1013 self.close()
1014 else:
1015 # remember this, so we can tell when it is complete
1016 self.__response = response
1018 return response
1021 class HTTP:
1022 "Compatibility class with httplib.py from 1.5."
1024 _http_vsn = 10
1025 _http_vsn_str = 'HTTP/1.0'
1027 debuglevel = 0
1029 _connection_class = HTTPConnection
1031 def __init__(self, host='', port=None, strict=None):
1032 "Provide a default host, since the superclass requires one."
1034 # some joker passed 0 explicitly, meaning default port
1035 if port == 0:
1036 port = None
1038 # Note that we may pass an empty string as the host; this will throw
1039 # an error when we attempt to connect. Presumably, the client code
1040 # will call connect before then, with a proper host.
1041 self._setup(self._connection_class(host, port, strict))
1043 def _setup(self, conn):
1044 self._conn = conn
1046 # set up delegation to flesh out interface
1047 self.send = conn.send
1048 self.putrequest = conn.putrequest
1049 self.putheader = conn.putheader
1050 self.endheaders = conn.endheaders
1051 self.set_debuglevel = conn.set_debuglevel
1053 conn._http_vsn = self._http_vsn
1054 conn._http_vsn_str = self._http_vsn_str
1056 self.file = None
1058 def connect(self, host=None, port=None):
1059 "Accept arguments to set the host/port, since the superclass doesn't."
1061 if host is not None:
1062 self._conn._set_hostport(host, port)
1063 self._conn.connect()
1065 def getfile(self):
1066 "Provide a getfile, since the superclass' does not use this concept."
1067 return self.file
1069 def getreply(self, buffering=False):
1070 """Compat definition since superclass does not define it.
1072 Returns a tuple consisting of:
1073 - server status code (e.g. '200' if all goes well)
1074 - server "reason" corresponding to status code
1075 - any RFC822 headers in the response from the server
1077 try:
1078 if not buffering:
1079 response = self._conn.getresponse()
1080 else:
1081 #only add this keyword if non-default for compatibility
1082 #with other connection classes
1083 response = self._conn.getresponse(buffering)
1084 except BadStatusLine, e:
1085 ### hmm. if getresponse() ever closes the socket on a bad request,
1086 ### then we are going to have problems with self.sock
1088 ### should we keep this behavior? do people use it?
1089 # keep the socket open (as a file), and return it
1090 self.file = self._conn.sock.makefile('rb', 0)
1092 # close our socket -- we want to restart after any protocol error
1093 self.close()
1095 self.headers = None
1096 return -1, e.line, None
1098 self.headers = response.msg
1099 self.file = response.fp
1100 return response.status, response.reason, response.msg
1102 def close(self):
1103 self._conn.close()
1105 # note that self.file == response.fp, which gets closed by the
1106 # superclass. just clear the object ref here.
1107 ### hmm. messy. if status==-1, then self.file is owned by us.
1108 ### well... we aren't explicitly closing, but losing this ref will
1109 ### do it
1110 self.file = None
1112 try:
1113 import ssl
1114 except ImportError:
1115 pass
1116 else:
1117 class HTTPSConnection(HTTPConnection):
1118 "This class allows communication via SSL."
1120 default_port = HTTPS_PORT
1122 def __init__(self, host, port=None, key_file=None, cert_file=None,
1123 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1124 HTTPConnection.__init__(self, host, port, strict, timeout)
1125 self.key_file = key_file
1126 self.cert_file = cert_file
1128 def connect(self):
1129 "Connect to a host on a given (SSL) port."
1131 sock = socket.create_connection((self.host, self.port), self.timeout)
1132 if self._tunnel_host:
1133 self.sock = sock
1134 self._tunnel()
1135 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1137 __all__.append("HTTPSConnection")
1139 class HTTPS(HTTP):
1140 """Compatibility with 1.5 httplib interface
1142 Python 1.5.2 did not have an HTTPS class, but it defined an
1143 interface for sending http requests that is also useful for
1144 https.
1147 _connection_class = HTTPSConnection
1149 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1150 strict=None):
1151 # provide a default host, pass the X509 cert info
1153 # urf. compensate for bad input.
1154 if port == 0:
1155 port = None
1156 self._setup(self._connection_class(host, port, key_file,
1157 cert_file, strict))
1159 # we never actually use these for anything, but we keep them
1160 # here for compatibility with post-1.5.2 CVS.
1161 self.key_file = key_file
1162 self.cert_file = cert_file
1165 def FakeSocket (sock, sslobj):
1166 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1167 "Use the result of ssl.wrap_socket() directly instead.",
1168 DeprecationWarning, stacklevel=2)
1169 return sslobj
1172 class HTTPException(Exception):
1173 # Subclasses that define an __init__ must call Exception.__init__
1174 # or define self.args. Otherwise, str() will fail.
1175 pass
1177 class NotConnected(HTTPException):
1178 pass
1180 class InvalidURL(HTTPException):
1181 pass
1183 class UnknownProtocol(HTTPException):
1184 def __init__(self, version):
1185 self.args = version,
1186 self.version = version
1188 class UnknownTransferEncoding(HTTPException):
1189 pass
1191 class UnimplementedFileMode(HTTPException):
1192 pass
1194 class IncompleteRead(HTTPException):
1195 def __init__(self, partial, expected=None):
1196 self.args = partial,
1197 self.partial = partial
1198 self.expected = expected
1199 def __repr__(self):
1200 if self.expected is not None:
1201 e = ', %i more expected' % self.expected
1202 else:
1203 e = ''
1204 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1205 def __str__(self):
1206 return repr(self)
1208 class ImproperConnectionState(HTTPException):
1209 pass
1211 class CannotSendRequest(ImproperConnectionState):
1212 pass
1214 class CannotSendHeader(ImproperConnectionState):
1215 pass
1217 class ResponseNotReady(ImproperConnectionState):
1218 pass
1220 class BadStatusLine(HTTPException):
1221 def __init__(self, line):
1222 self.args = line,
1223 self.line = line
1225 # for backwards compatibility
1226 error = HTTPException
1228 class LineAndFileWrapper:
1229 """A limited file-like object for HTTP/0.9 responses."""
1231 # The status-line parsing code calls readline(), which normally
1232 # get the HTTP status line. For a 0.9 response, however, this is
1233 # actually the first line of the body! Clients need to get a
1234 # readable file object that contains that line.
1236 def __init__(self, line, file):
1237 self._line = line
1238 self._file = file
1239 self._line_consumed = 0
1240 self._line_offset = 0
1241 self._line_left = len(line)
1243 def __getattr__(self, attr):
1244 return getattr(self._file, attr)
1246 def _done(self):
1247 # called when the last byte is read from the line. After the
1248 # call, all read methods are delegated to the underlying file
1249 # object.
1250 self._line_consumed = 1
1251 self.read = self._file.read
1252 self.readline = self._file.readline
1253 self.readlines = self._file.readlines
1255 def read(self, amt=None):
1256 if self._line_consumed:
1257 return self._file.read(amt)
1258 assert self._line_left
1259 if amt is None or amt > self._line_left:
1260 s = self._line[self._line_offset:]
1261 self._done()
1262 if amt is None:
1263 return s + self._file.read()
1264 else:
1265 return s + self._file.read(amt - len(s))
1266 else:
1267 assert amt <= self._line_left
1268 i = self._line_offset
1269 j = i + amt
1270 s = self._line[i:j]
1271 self._line_offset = j
1272 self._line_left -= amt
1273 if self._line_left == 0:
1274 self._done()
1275 return s
1277 def readline(self):
1278 if self._line_consumed:
1279 return self._file.readline()
1280 assert self._line_left
1281 s = self._line[self._line_offset:]
1282 self._done()
1283 return s
1285 def readlines(self, size=None):
1286 if self._line_consumed:
1287 return self._file.readlines(size)
1288 assert self._line_left
1289 L = [self._line[self._line_offset:]]
1290 self._done()
1291 if size is None:
1292 return L + self._file.readlines()
1293 else:
1294 return L + self._file.readlines(size)
1296 def test():
1297 """Test this module.
1299 A hodge podge of tests collected here, because they have too many
1300 external dependencies for the regular test suite.
1303 import sys
1304 import getopt
1305 opts, args = getopt.getopt(sys.argv[1:], 'd')
1306 dl = 0
1307 for o, a in opts:
1308 if o == '-d': dl = dl + 1
1309 host = 'www.python.org'
1310 selector = '/'
1311 if args[0:]: host = args[0]
1312 if args[1:]: selector = args[1]
1313 h = HTTP()
1314 h.set_debuglevel(dl)
1315 h.connect(host)
1316 h.putrequest('GET', selector)
1317 h.endheaders()
1318 status, reason, headers = h.getreply()
1319 print 'status =', status
1320 print 'reason =', reason
1321 print "read", len(h.getfile().read())
1322 print
1323 if headers:
1324 for header in headers.headers: print header.strip()
1325 print
1327 # minimal test that code to extract host from url works
1328 class HTTP11(HTTP):
1329 _http_vsn = 11
1330 _http_vsn_str = 'HTTP/1.1'
1332 h = HTTP11('www.python.org')
1333 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1334 h.endheaders()
1335 h.getreply()
1336 h.close()
1338 try:
1339 import ssl
1340 except ImportError:
1341 pass
1342 else:
1344 for host, selector in (('sourceforge.net', '/projects/python'),
1346 print "https://%s%s" % (host, selector)
1347 hs = HTTPS()
1348 hs.set_debuglevel(dl)
1349 hs.connect(host)
1350 hs.putrequest('GET', selector)
1351 hs.endheaders()
1352 status, reason, headers = hs.getreply()
1353 print 'status =', status
1354 print 'reason =', reason
1355 print "read", len(hs.getfile().read())
1356 print
1357 if headers:
1358 for header in headers.headers: print header.strip()
1359 print
1361 if __name__ == '__main__':
1362 test()