Issue #5768: Change to Unicode output logic and test case for same.
[python.git] / Lib / httplib.py
blob2e749eabb6d1b045ccdbdce42d0fdabade08a5ae
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 from sys import py3kwarning
71 from urlparse import urlsplit
72 import warnings
73 with warnings.catch_warnings():
74 if py3kwarning:
75 warnings.filterwarnings("ignore", ".*mimetools has been removed",
76 DeprecationWarning)
77 import mimetools
79 try:
80 from cStringIO import StringIO
81 except ImportError:
82 from StringIO import StringIO
84 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
85 "HTTPException", "NotConnected", "UnknownProtocol",
86 "UnknownTransferEncoding", "UnimplementedFileMode",
87 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
88 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
89 "BadStatusLine", "error", "responses"]
91 HTTP_PORT = 80
92 HTTPS_PORT = 443
94 _UNKNOWN = 'UNKNOWN'
96 # connection states
97 _CS_IDLE = 'Idle'
98 _CS_REQ_STARTED = 'Request-started'
99 _CS_REQ_SENT = 'Request-sent'
101 # status codes
102 # informational
103 CONTINUE = 100
104 SWITCHING_PROTOCOLS = 101
105 PROCESSING = 102
107 # successful
108 OK = 200
109 CREATED = 201
110 ACCEPTED = 202
111 NON_AUTHORITATIVE_INFORMATION = 203
112 NO_CONTENT = 204
113 RESET_CONTENT = 205
114 PARTIAL_CONTENT = 206
115 MULTI_STATUS = 207
116 IM_USED = 226
118 # redirection
119 MULTIPLE_CHOICES = 300
120 MOVED_PERMANENTLY = 301
121 FOUND = 302
122 SEE_OTHER = 303
123 NOT_MODIFIED = 304
124 USE_PROXY = 305
125 TEMPORARY_REDIRECT = 307
127 # client error
128 BAD_REQUEST = 400
129 UNAUTHORIZED = 401
130 PAYMENT_REQUIRED = 402
131 FORBIDDEN = 403
132 NOT_FOUND = 404
133 METHOD_NOT_ALLOWED = 405
134 NOT_ACCEPTABLE = 406
135 PROXY_AUTHENTICATION_REQUIRED = 407
136 REQUEST_TIMEOUT = 408
137 CONFLICT = 409
138 GONE = 410
139 LENGTH_REQUIRED = 411
140 PRECONDITION_FAILED = 412
141 REQUEST_ENTITY_TOO_LARGE = 413
142 REQUEST_URI_TOO_LONG = 414
143 UNSUPPORTED_MEDIA_TYPE = 415
144 REQUESTED_RANGE_NOT_SATISFIABLE = 416
145 EXPECTATION_FAILED = 417
146 UNPROCESSABLE_ENTITY = 422
147 LOCKED = 423
148 FAILED_DEPENDENCY = 424
149 UPGRADE_REQUIRED = 426
151 # server error
152 INTERNAL_SERVER_ERROR = 500
153 NOT_IMPLEMENTED = 501
154 BAD_GATEWAY = 502
155 SERVICE_UNAVAILABLE = 503
156 GATEWAY_TIMEOUT = 504
157 HTTP_VERSION_NOT_SUPPORTED = 505
158 INSUFFICIENT_STORAGE = 507
159 NOT_EXTENDED = 510
161 # Mapping status codes to official W3C names
162 responses = {
163 100: 'Continue',
164 101: 'Switching Protocols',
166 200: 'OK',
167 201: 'Created',
168 202: 'Accepted',
169 203: 'Non-Authoritative Information',
170 204: 'No Content',
171 205: 'Reset Content',
172 206: 'Partial Content',
174 300: 'Multiple Choices',
175 301: 'Moved Permanently',
176 302: 'Found',
177 303: 'See Other',
178 304: 'Not Modified',
179 305: 'Use Proxy',
180 306: '(Unused)',
181 307: 'Temporary Redirect',
183 400: 'Bad Request',
184 401: 'Unauthorized',
185 402: 'Payment Required',
186 403: 'Forbidden',
187 404: 'Not Found',
188 405: 'Method Not Allowed',
189 406: 'Not Acceptable',
190 407: 'Proxy Authentication Required',
191 408: 'Request Timeout',
192 409: 'Conflict',
193 410: 'Gone',
194 411: 'Length Required',
195 412: 'Precondition Failed',
196 413: 'Request Entity Too Large',
197 414: 'Request-URI Too Long',
198 415: 'Unsupported Media Type',
199 416: 'Requested Range Not Satisfiable',
200 417: 'Expectation Failed',
202 500: 'Internal Server Error',
203 501: 'Not Implemented',
204 502: 'Bad Gateway',
205 503: 'Service Unavailable',
206 504: 'Gateway Timeout',
207 505: 'HTTP Version Not Supported',
210 # maximal amount of data to read at one time in _safe_read
211 MAXAMOUNT = 1048576
213 class HTTPMessage(mimetools.Message):
215 def addheader(self, key, value):
216 """Add header for field key handling repeats."""
217 prev = self.dict.get(key)
218 if prev is None:
219 self.dict[key] = value
220 else:
221 combined = ", ".join((prev, value))
222 self.dict[key] = combined
224 def addcontinue(self, key, more):
225 """Add more field data from a continuation line."""
226 prev = self.dict[key]
227 self.dict[key] = prev + "\n " + more
229 def readheaders(self):
230 """Read header lines.
232 Read header lines up to the entirely blank line that terminates them.
233 The (normally blank) line that ends the headers is skipped, but not
234 included in the returned list. If a non-header line ends the headers,
235 (which is an error), an attempt is made to backspace over it; it is
236 never included in the returned list.
238 The variable self.status is set to the empty string if all went well,
239 otherwise it is an error message. The variable self.headers is a
240 completely uninterpreted list of lines contained in the header (so
241 printing them will reproduce the header exactly as it appears in the
242 file).
244 If multiple header fields with the same name occur, they are combined
245 according to the rules in RFC 2616 sec 4.2:
247 Appending each subsequent field-value to the first, each separated
248 by a comma. The order in which header fields with the same field-name
249 are received is significant to the interpretation of the combined
250 field value.
252 # XXX The implementation overrides the readheaders() method of
253 # rfc822.Message. The base class design isn't amenable to
254 # customized behavior here so the method here is a copy of the
255 # base class code with a few small changes.
257 self.dict = {}
258 self.unixfrom = ''
259 self.headers = hlist = []
260 self.status = ''
261 headerseen = ""
262 firstline = 1
263 startofline = unread = tell = None
264 if hasattr(self.fp, 'unread'):
265 unread = self.fp.unread
266 elif self.seekable:
267 tell = self.fp.tell
268 while True:
269 if tell:
270 try:
271 startofline = tell()
272 except IOError:
273 startofline = tell = None
274 self.seekable = 0
275 line = self.fp.readline()
276 if not line:
277 self.status = 'EOF in headers'
278 break
279 # Skip unix From name time lines
280 if firstline and line.startswith('From '):
281 self.unixfrom = self.unixfrom + line
282 continue
283 firstline = 0
284 if headerseen and line[0] in ' \t':
285 # XXX Not sure if continuation lines are handled properly
286 # for http and/or for repeating headers
287 # It's a continuation line.
288 hlist.append(line)
289 self.addcontinue(headerseen, line.strip())
290 continue
291 elif self.iscomment(line):
292 # It's a comment. Ignore it.
293 continue
294 elif self.islast(line):
295 # Note! No pushback here! The delimiter line gets eaten.
296 break
297 headerseen = self.isheader(line)
298 if headerseen:
299 # It's a legal header line, save it.
300 hlist.append(line)
301 self.addheader(headerseen, line[len(headerseen)+1:].strip())
302 continue
303 else:
304 # It's not a header line; throw it back and stop here.
305 if not self.dict:
306 self.status = 'No headers'
307 else:
308 self.status = 'Non-header line where header expected'
309 # Try to undo the read.
310 if unread:
311 unread(line)
312 elif tell:
313 self.fp.seek(startofline)
314 else:
315 self.status = self.status + '; bad seek'
316 break
318 class HTTPResponse:
320 # strict: If true, raise BadStatusLine if the status line can't be
321 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
322 # false because it prevents clients from talking to HTTP/0.9
323 # servers. Note that a response with a sufficiently corrupted
324 # status line will look like an HTTP/0.9 response.
326 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328 def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
329 if buffering:
330 # The caller won't be using any sock.recv() calls, so buffering
331 # is fine and recommendef for performance
332 self.fp = sock.makefile('rb')
333 else:
334 # The buffer size is specified as zero, because the headers of
335 # the response are read with readline(). If the reads were
336 # buffered the readline() calls could consume some of the
337 # response, which make be read via a recv() on the underlying
338 # socket.
339 self.fp = sock.makefile('rb', 0)
340 self.debuglevel = debuglevel
341 self.strict = strict
342 self._method = method
344 self.msg = None
346 # from the Status-Line of the response
347 self.version = _UNKNOWN # HTTP-Version
348 self.status = _UNKNOWN # Status-Code
349 self.reason = _UNKNOWN # Reason-Phrase
351 self.chunked = _UNKNOWN # is "chunked" being used?
352 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
353 self.length = _UNKNOWN # number of bytes left in response
354 self.will_close = _UNKNOWN # conn will close at end of response
356 def _read_status(self):
357 # Initialize with Simple-Response defaults
358 line = self.fp.readline()
359 if self.debuglevel > 0:
360 print "reply:", repr(line)
361 if not line:
362 # Presumably, the server closed the connection before
363 # sending a valid response.
364 raise BadStatusLine(line)
365 try:
366 [version, status, reason] = line.split(None, 2)
367 except ValueError:
368 try:
369 [version, status] = line.split(None, 1)
370 reason = ""
371 except ValueError:
372 # empty version will cause next test to fail and status
373 # will be treated as 0.9 response.
374 version = ""
375 if not version.startswith('HTTP/'):
376 if self.strict:
377 self.close()
378 raise BadStatusLine(line)
379 else:
380 # assume it's a Simple-Response from an 0.9 server
381 self.fp = LineAndFileWrapper(line, self.fp)
382 return "HTTP/0.9", 200, ""
384 # The status code is a three-digit number
385 try:
386 status = int(status)
387 if status < 100 or status > 999:
388 raise BadStatusLine(line)
389 except ValueError:
390 raise BadStatusLine(line)
391 return version, status, reason
393 def begin(self):
394 if self.msg is not None:
395 # we've already started reading the response
396 return
398 # read until we get a non-100 response
399 while True:
400 version, status, reason = self._read_status()
401 if status != CONTINUE:
402 break
403 # skip the header from the 100 response
404 while True:
405 skip = self.fp.readline().strip()
406 if not skip:
407 break
408 if self.debuglevel > 0:
409 print "header:", skip
411 self.status = status
412 self.reason = reason.strip()
413 if version == 'HTTP/1.0':
414 self.version = 10
415 elif version.startswith('HTTP/1.'):
416 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
417 elif version == 'HTTP/0.9':
418 self.version = 9
419 else:
420 raise UnknownProtocol(version)
422 if self.version == 9:
423 self.length = None
424 self.chunked = 0
425 self.will_close = 1
426 self.msg = HTTPMessage(StringIO())
427 return
429 self.msg = HTTPMessage(self.fp, 0)
430 if self.debuglevel > 0:
431 for hdr in self.msg.headers:
432 print "header:", hdr,
434 # don't let the msg keep an fp
435 self.msg.fp = None
437 # are we using the chunked-style of transfer encoding?
438 tr_enc = self.msg.getheader('transfer-encoding')
439 if tr_enc and tr_enc.lower() == "chunked":
440 self.chunked = 1
441 self.chunk_left = None
442 else:
443 self.chunked = 0
445 # will the connection close at the end of the response?
446 self.will_close = self._check_close()
448 # do we have a Content-Length?
449 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
450 length = self.msg.getheader('content-length')
451 if length and not self.chunked:
452 try:
453 self.length = int(length)
454 except ValueError:
455 self.length = None
456 else:
457 if self.length < 0: # ignore nonsensical negative lengths
458 self.length = None
459 else:
460 self.length = None
462 # does the body have a fixed length? (of zero)
463 if (status == NO_CONTENT or status == NOT_MODIFIED or
464 100 <= status < 200 or # 1xx codes
465 self._method == 'HEAD'):
466 self.length = 0
468 # if the connection remains open, and we aren't using chunked, and
469 # a content-length was not provided, then assume that the connection
470 # WILL close.
471 if not self.will_close and \
472 not self.chunked and \
473 self.length is None:
474 self.will_close = 1
476 def _check_close(self):
477 conn = self.msg.getheader('connection')
478 if self.version == 11:
479 # An HTTP/1.1 proxy is assumed to stay open unless
480 # explicitly closed.
481 conn = self.msg.getheader('connection')
482 if conn and "close" in conn.lower():
483 return True
484 return False
486 # Some HTTP/1.0 implementations have support for persistent
487 # connections, using rules different than HTTP/1.1.
489 # For older HTTP, Keep-Alive indicates persistent connection.
490 if self.msg.getheader('keep-alive'):
491 return False
493 # At least Akamai returns a "Connection: Keep-Alive" header,
494 # which was supposed to be sent by the client.
495 if conn and "keep-alive" in conn.lower():
496 return False
498 # Proxy-Connection is a netscape hack.
499 pconn = self.msg.getheader('proxy-connection')
500 if pconn and "keep-alive" in pconn.lower():
501 return False
503 # otherwise, assume it will close
504 return True
506 def close(self):
507 if self.fp:
508 self.fp.close()
509 self.fp = None
511 def isclosed(self):
512 # NOTE: it is possible that we will not ever call self.close(). This
513 # case occurs when will_close is TRUE, length is None, and we
514 # read up to the last byte, but NOT past it.
516 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
517 # called, meaning self.isclosed() is meaningful.
518 return self.fp is None
520 # XXX It would be nice to have readline and __iter__ for this, too.
522 def read(self, amt=None):
523 if self.fp is None:
524 return ''
526 if self.chunked:
527 return self._read_chunked(amt)
529 if amt is None:
530 # unbounded read
531 if self.length is None:
532 s = self.fp.read()
533 else:
534 s = self._safe_read(self.length)
535 self.length = 0
536 self.close() # we read everything
537 return s
539 if self.length is not None:
540 if amt > self.length:
541 # clip the read to the "end of response"
542 amt = self.length
544 # we do not use _safe_read() here because this may be a .will_close
545 # connection, and the user is reading more bytes than will be provided
546 # (for example, reading in 1k chunks)
547 s = self.fp.read(amt)
548 if self.length is not None:
549 self.length -= len(s)
550 if not self.length:
551 self.close()
552 return s
554 def _read_chunked(self, amt):
555 assert self.chunked != _UNKNOWN
556 chunk_left = self.chunk_left
557 value = ''
559 # XXX This accumulates chunks by repeated string concatenation,
560 # which is not efficient as the number or size of chunks gets big.
561 while True:
562 if chunk_left is None:
563 line = self.fp.readline()
564 i = line.find(';')
565 if i >= 0:
566 line = line[:i] # strip chunk-extensions
567 try:
568 chunk_left = int(line, 16)
569 except ValueError:
570 # close the connection as protocol synchronisation is
571 # probably lost
572 self.close()
573 raise IncompleteRead(value)
574 if chunk_left == 0:
575 break
576 if amt is None:
577 value += self._safe_read(chunk_left)
578 elif amt < chunk_left:
579 value += self._safe_read(amt)
580 self.chunk_left = chunk_left - amt
581 return value
582 elif amt == chunk_left:
583 value += self._safe_read(amt)
584 self._safe_read(2) # toss the CRLF at the end of the chunk
585 self.chunk_left = None
586 return value
587 else:
588 value += self._safe_read(chunk_left)
589 amt -= chunk_left
591 # we read the whole chunk, get another
592 self._safe_read(2) # toss the CRLF at the end of the chunk
593 chunk_left = None
595 # read and discard trailer up to the CRLF terminator
596 ### note: we shouldn't have any trailers!
597 while True:
598 line = self.fp.readline()
599 if not line:
600 # a vanishingly small number of sites EOF without
601 # sending the trailer
602 break
603 if line == '\r\n':
604 break
606 # we read everything; close the "file"
607 self.close()
609 return value
611 def _safe_read(self, amt):
612 """Read the number of bytes requested, compensating for partial reads.
614 Normally, we have a blocking socket, but a read() can be interrupted
615 by a signal (resulting in a partial read).
617 Note that we cannot distinguish between EOF and an interrupt when zero
618 bytes have been read. IncompleteRead() will be raised in this
619 situation.
621 This function should be used when <amt> bytes "should" be present for
622 reading. If the bytes are truly not available (due to EOF), then the
623 IncompleteRead exception can be used to detect the problem.
625 s = []
626 while amt > 0:
627 chunk = self.fp.read(min(amt, MAXAMOUNT))
628 if not chunk:
629 raise IncompleteRead(''.join(s), amt)
630 s.append(chunk)
631 amt -= len(chunk)
632 return ''.join(s)
634 def getheader(self, name, default=None):
635 if self.msg is None:
636 raise ResponseNotReady()
637 return self.msg.getheader(name, default)
639 def getheaders(self):
640 """Return list of (header, value) tuples."""
641 if self.msg is None:
642 raise ResponseNotReady()
643 return self.msg.items()
646 class HTTPConnection:
648 _http_vsn = 11
649 _http_vsn_str = 'HTTP/1.1'
651 response_class = HTTPResponse
652 default_port = HTTP_PORT
653 auto_open = 1
654 debuglevel = 0
655 strict = 0
657 def __init__(self, host, port=None, strict=None,
658 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
659 self.timeout = timeout
660 self.sock = None
661 self._buffer = []
662 self.__response = None
663 self.__state = _CS_IDLE
664 self._method = None
666 self._set_hostport(host, port)
667 if strict is not None:
668 self.strict = strict
670 def _set_hostport(self, host, port):
671 if port is None:
672 i = host.rfind(':')
673 j = host.rfind(']') # ipv6 addresses have [...]
674 if i > j:
675 try:
676 port = int(host[i+1:])
677 except ValueError:
678 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
679 host = host[:i]
680 else:
681 port = self.default_port
682 if host and host[0] == '[' and host[-1] == ']':
683 host = host[1:-1]
684 self.host = host
685 self.port = port
687 def set_debuglevel(self, level):
688 self.debuglevel = level
690 def connect(self):
691 """Connect to the host and port specified in __init__."""
692 self.sock = socket.create_connection((self.host,self.port),
693 self.timeout)
695 def close(self):
696 """Close the connection to the HTTP server."""
697 if self.sock:
698 self.sock.close() # close it manually... there may be other refs
699 self.sock = None
700 if self.__response:
701 self.__response.close()
702 self.__response = None
703 self.__state = _CS_IDLE
705 def send(self, str):
706 """Send `str' to the server."""
707 if self.sock is None:
708 if self.auto_open:
709 self.connect()
710 else:
711 raise NotConnected()
713 # send the data to the server. if we get a broken pipe, then close
714 # the socket. we want to reconnect when somebody tries to send again.
716 # NOTE: we DO propagate the error, though, because we cannot simply
717 # ignore the error... the caller will know if they can retry.
718 if self.debuglevel > 0:
719 print "send:", repr(str)
720 try:
721 blocksize=8192
722 if hasattr(str,'read') :
723 if self.debuglevel > 0: print "sendIng a read()able"
724 data=str.read(blocksize)
725 while data:
726 self.sock.sendall(data)
727 data=str.read(blocksize)
728 else:
729 self.sock.sendall(str)
730 except socket.error, v:
731 if v[0] == 32: # Broken pipe
732 self.close()
733 raise
735 def _output(self, s):
736 """Add a line of output to the current request buffer.
738 Assumes that the line does *not* end with \\r\\n.
740 self._buffer.append(s)
742 def _send_output(self, message_body=None):
743 """Send the currently buffered request and clear the buffer.
745 Appends an extra \\r\\n to the buffer.
746 A message_body may be specified, to be appended to the request.
748 self._buffer.extend(("", ""))
749 msg = "\r\n".join(self._buffer)
750 del self._buffer[:]
751 # If msg and message_body are sent in a single send() call,
752 # it will avoid performance problems caused by the interaction
753 # between delayed ack and the Nagle algorithim.
754 if isinstance(message_body, str):
755 msg += message_body
756 message_body = None
757 self.send(msg)
758 if message_body is not None:
759 #message_body was not a string (i.e. it is a file) and
760 #we must run the risk of Nagle
761 self.send(message_body)
763 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
764 """Send a request to the server.
766 `method' specifies an HTTP request method, e.g. 'GET'.
767 `url' specifies the object being requested, e.g. '/index.html'.
768 `skip_host' if True does not add automatically a 'Host:' header
769 `skip_accept_encoding' if True does not add automatically an
770 'Accept-Encoding:' header
773 # if a prior response has been completed, then forget about it.
774 if self.__response and self.__response.isclosed():
775 self.__response = None
778 # in certain cases, we cannot issue another request on this connection.
779 # this occurs when:
780 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
781 # 2) a response to a previous request has signalled that it is going
782 # to close the connection upon completion.
783 # 3) the headers for the previous response have not been read, thus
784 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
786 # if there is no prior response, then we can request at will.
788 # if point (2) is true, then we will have passed the socket to the
789 # response (effectively meaning, "there is no prior response"), and
790 # will open a new one when a new request is made.
792 # Note: if a prior response exists, then we *can* start a new request.
793 # We are not allowed to begin fetching the response to this new
794 # request, however, until that prior response is complete.
796 if self.__state == _CS_IDLE:
797 self.__state = _CS_REQ_STARTED
798 else:
799 raise CannotSendRequest()
801 # Save the method we use, we need it later in the response phase
802 self._method = method
803 if not url:
804 url = '/'
805 str = '%s %s %s' % (method, url, self._http_vsn_str)
807 self._output(str)
809 if self._http_vsn == 11:
810 # Issue some standard headers for better HTTP/1.1 compliance
812 if not skip_host:
813 # this header is issued *only* for HTTP/1.1
814 # connections. more specifically, this means it is
815 # only issued when the client uses the new
816 # HTTPConnection() class. backwards-compat clients
817 # will be using HTTP/1.0 and those clients may be
818 # issuing this header themselves. we should NOT issue
819 # it twice; some web servers (such as Apache) barf
820 # when they see two Host: headers
822 # If we need a non-standard port,include it in the
823 # header. If the request is going through a proxy,
824 # but the host of the actual URL, not the host of the
825 # proxy.
827 netloc = ''
828 if url.startswith('http'):
829 nil, netloc, nil, nil, nil = urlsplit(url)
831 if netloc:
832 try:
833 netloc_enc = netloc.encode("ascii")
834 except UnicodeEncodeError:
835 netloc_enc = netloc.encode("idna")
836 self.putheader('Host', netloc_enc)
837 else:
838 try:
839 host_enc = self.host.encode("ascii")
840 except UnicodeEncodeError:
841 host_enc = self.host.encode("idna")
842 if self.port == self.default_port:
843 self.putheader('Host', host_enc)
844 else:
845 self.putheader('Host', "%s:%s" % (host_enc, self.port))
847 # note: we are assuming that clients will not attempt to set these
848 # headers since *this* library must deal with the
849 # consequences. this also means that when the supporting
850 # libraries are updated to recognize other forms, then this
851 # code should be changed (removed or updated).
853 # we only want a Content-Encoding of "identity" since we don't
854 # support encodings such as x-gzip or x-deflate.
855 if not skip_accept_encoding:
856 self.putheader('Accept-Encoding', 'identity')
858 # we can accept "chunked" Transfer-Encodings, but no others
859 # NOTE: no TE header implies *only* "chunked"
860 #self.putheader('TE', 'chunked')
862 # if TE is supplied in the header, then it must appear in a
863 # Connection header.
864 #self.putheader('Connection', 'TE')
866 else:
867 # For HTTP/1.0, the server will assume "not chunked"
868 pass
870 def putheader(self, header, *values):
871 """Send a request header line to the server.
873 For example: h.putheader('Accept', 'text/html')
875 if self.__state != _CS_REQ_STARTED:
876 raise CannotSendHeader()
878 str = '%s: %s' % (header, '\r\n\t'.join(values))
879 self._output(str)
881 def endheaders(self, message_body=None):
882 """Indicate that the last header line has been sent to the server.
884 This method sends the request to the server. The optional
885 message_body argument can be used to pass message body
886 associated with the request. The message body will be sent in
887 the same packet as the message headers if possible. The
888 message_body should be a string.
890 if self.__state == _CS_REQ_STARTED:
891 self.__state = _CS_REQ_SENT
892 else:
893 raise CannotSendHeader()
894 self._send_output(message_body)
896 def request(self, method, url, body=None, headers={}):
897 """Send a complete request to the server."""
899 try:
900 self._send_request(method, url, body, headers)
901 except socket.error, v:
902 # trap 'Broken pipe' if we're allowed to automatically reconnect
903 if v[0] != 32 or not self.auto_open:
904 raise
905 # try one more time
906 self._send_request(method, url, body, headers)
908 def _set_content_length(self, body):
909 # Set the content-length based on the body.
910 thelen = None
911 try:
912 thelen = str(len(body))
913 except TypeError, te:
914 # If this is a file-like object, try to
915 # fstat its file descriptor
916 import os
917 try:
918 thelen = str(os.fstat(body.fileno()).st_size)
919 except (AttributeError, OSError):
920 # Don't send a length if this failed
921 if self.debuglevel > 0: print "Cannot stat!!"
923 if thelen is not None:
924 self.putheader('Content-Length', thelen)
926 def _send_request(self, method, url, body, headers):
927 # honour explicitly requested Host: and Accept-Encoding headers
928 header_names = dict.fromkeys([k.lower() for k in headers])
929 skips = {}
930 if 'host' in header_names:
931 skips['skip_host'] = 1
932 if 'accept-encoding' in header_names:
933 skips['skip_accept_encoding'] = 1
935 self.putrequest(method, url, **skips)
937 if body and ('content-length' not in header_names):
938 self._set_content_length(body)
939 for hdr, value in headers.iteritems():
940 self.putheader(hdr, value)
941 self.endheaders(body)
943 def getresponse(self, buffering=False):
944 "Get the response from the server."
946 # if a prior response has been completed, then forget about it.
947 if self.__response and self.__response.isclosed():
948 self.__response = None
951 # if a prior response exists, then it must be completed (otherwise, we
952 # cannot read this response's header to determine the connection-close
953 # behavior)
955 # note: if a prior response existed, but was connection-close, then the
956 # socket and response were made independent of this HTTPConnection
957 # object since a new request requires that we open a whole new
958 # connection
960 # this means the prior response had one of two states:
961 # 1) will_close: this connection was reset and the prior socket and
962 # response operate independently
963 # 2) persistent: the response was retained and we await its
964 # isclosed() status to become true.
966 if self.__state != _CS_REQ_SENT or self.__response:
967 raise ResponseNotReady()
969 args = (self.sock,)
970 kwds = {"strict":self.strict, "method":self._method}
971 if self.debuglevel > 0:
972 args += (self.debuglevel,)
973 if buffering:
974 #only add this keyword if non-default, for compatibility with
975 #other response_classes.
976 kwds["buffering"] = True;
977 response = self.response_class(*args, **kwds)
979 response.begin()
980 assert response.will_close != _UNKNOWN
981 self.__state = _CS_IDLE
983 if response.will_close:
984 # this effectively passes the connection to the response
985 self.close()
986 else:
987 # remember this, so we can tell when it is complete
988 self.__response = response
990 return response
993 class HTTP:
994 "Compatibility class with httplib.py from 1.5."
996 _http_vsn = 10
997 _http_vsn_str = 'HTTP/1.0'
999 debuglevel = 0
1001 _connection_class = HTTPConnection
1003 def __init__(self, host='', port=None, strict=None):
1004 "Provide a default host, since the superclass requires one."
1006 # some joker passed 0 explicitly, meaning default port
1007 if port == 0:
1008 port = None
1010 # Note that we may pass an empty string as the host; this will throw
1011 # an error when we attempt to connect. Presumably, the client code
1012 # will call connect before then, with a proper host.
1013 self._setup(self._connection_class(host, port, strict))
1015 def _setup(self, conn):
1016 self._conn = conn
1018 # set up delegation to flesh out interface
1019 self.send = conn.send
1020 self.putrequest = conn.putrequest
1021 self.putheader = conn.putheader
1022 self.endheaders = conn.endheaders
1023 self.set_debuglevel = conn.set_debuglevel
1025 conn._http_vsn = self._http_vsn
1026 conn._http_vsn_str = self._http_vsn_str
1028 self.file = None
1030 def connect(self, host=None, port=None):
1031 "Accept arguments to set the host/port, since the superclass doesn't."
1033 if host is not None:
1034 self._conn._set_hostport(host, port)
1035 self._conn.connect()
1037 def getfile(self):
1038 "Provide a getfile, since the superclass' does not use this concept."
1039 return self.file
1041 def getreply(self, buffering=False):
1042 """Compat definition since superclass does not define it.
1044 Returns a tuple consisting of:
1045 - server status code (e.g. '200' if all goes well)
1046 - server "reason" corresponding to status code
1047 - any RFC822 headers in the response from the server
1049 try:
1050 if not buffering:
1051 response = self._conn.getresponse()
1052 else:
1053 #only add this keyword if non-default for compatibility
1054 #with other connection classes
1055 response = self._conn.getresponse(buffering)
1056 except BadStatusLine, e:
1057 ### hmm. if getresponse() ever closes the socket on a bad request,
1058 ### then we are going to have problems with self.sock
1060 ### should we keep this behavior? do people use it?
1061 # keep the socket open (as a file), and return it
1062 self.file = self._conn.sock.makefile('rb', 0)
1064 # close our socket -- we want to restart after any protocol error
1065 self.close()
1067 self.headers = None
1068 return -1, e.line, None
1070 self.headers = response.msg
1071 self.file = response.fp
1072 return response.status, response.reason, response.msg
1074 def close(self):
1075 self._conn.close()
1077 # note that self.file == response.fp, which gets closed by the
1078 # superclass. just clear the object ref here.
1079 ### hmm. messy. if status==-1, then self.file is owned by us.
1080 ### well... we aren't explicitly closing, but losing this ref will
1081 ### do it
1082 self.file = None
1084 try:
1085 import ssl
1086 except ImportError:
1087 pass
1088 else:
1089 class HTTPSConnection(HTTPConnection):
1090 "This class allows communication via SSL."
1092 default_port = HTTPS_PORT
1094 def __init__(self, host, port=None, key_file=None, cert_file=None,
1095 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1096 HTTPConnection.__init__(self, host, port, strict, timeout)
1097 self.key_file = key_file
1098 self.cert_file = cert_file
1100 def connect(self):
1101 "Connect to a host on a given (SSL) port."
1103 sock = socket.create_connection((self.host, self.port), self.timeout)
1104 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1106 __all__.append("HTTPSConnection")
1108 class HTTPS(HTTP):
1109 """Compatibility with 1.5 httplib interface
1111 Python 1.5.2 did not have an HTTPS class, but it defined an
1112 interface for sending http requests that is also useful for
1113 https.
1116 _connection_class = HTTPSConnection
1118 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1119 strict=None):
1120 # provide a default host, pass the X509 cert info
1122 # urf. compensate for bad input.
1123 if port == 0:
1124 port = None
1125 self._setup(self._connection_class(host, port, key_file,
1126 cert_file, strict))
1128 # we never actually use these for anything, but we keep them
1129 # here for compatibility with post-1.5.2 CVS.
1130 self.key_file = key_file
1131 self.cert_file = cert_file
1134 def FakeSocket (sock, sslobj):
1135 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1136 "Use the result of ssl.wrap_socket() directly instead.",
1137 DeprecationWarning, stacklevel=2)
1138 return sslobj
1141 class HTTPException(Exception):
1142 # Subclasses that define an __init__ must call Exception.__init__
1143 # or define self.args. Otherwise, str() will fail.
1144 pass
1146 class NotConnected(HTTPException):
1147 pass
1149 class InvalidURL(HTTPException):
1150 pass
1152 class UnknownProtocol(HTTPException):
1153 def __init__(self, version):
1154 self.args = version,
1155 self.version = version
1157 class UnknownTransferEncoding(HTTPException):
1158 pass
1160 class UnimplementedFileMode(HTTPException):
1161 pass
1163 class IncompleteRead(HTTPException):
1164 def __init__(self, partial, expected=None):
1165 self.args = partial,
1166 self.partial = partial
1167 self.expected = expected
1168 def __repr__(self):
1169 if self.expected is not None:
1170 e = ', %i more expected' % self.expected
1171 else:
1172 e = ''
1173 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1174 def __str__(self):
1175 return repr(self)
1177 class ImproperConnectionState(HTTPException):
1178 pass
1180 class CannotSendRequest(ImproperConnectionState):
1181 pass
1183 class CannotSendHeader(ImproperConnectionState):
1184 pass
1186 class ResponseNotReady(ImproperConnectionState):
1187 pass
1189 class BadStatusLine(HTTPException):
1190 def __init__(self, line):
1191 self.args = line,
1192 self.line = line
1194 # for backwards compatibility
1195 error = HTTPException
1197 class LineAndFileWrapper:
1198 """A limited file-like object for HTTP/0.9 responses."""
1200 # The status-line parsing code calls readline(), which normally
1201 # get the HTTP status line. For a 0.9 response, however, this is
1202 # actually the first line of the body! Clients need to get a
1203 # readable file object that contains that line.
1205 def __init__(self, line, file):
1206 self._line = line
1207 self._file = file
1208 self._line_consumed = 0
1209 self._line_offset = 0
1210 self._line_left = len(line)
1212 def __getattr__(self, attr):
1213 return getattr(self._file, attr)
1215 def _done(self):
1216 # called when the last byte is read from the line. After the
1217 # call, all read methods are delegated to the underlying file
1218 # object.
1219 self._line_consumed = 1
1220 self.read = self._file.read
1221 self.readline = self._file.readline
1222 self.readlines = self._file.readlines
1224 def read(self, amt=None):
1225 if self._line_consumed:
1226 return self._file.read(amt)
1227 assert self._line_left
1228 if amt is None or amt > self._line_left:
1229 s = self._line[self._line_offset:]
1230 self._done()
1231 if amt is None:
1232 return s + self._file.read()
1233 else:
1234 return s + self._file.read(amt - len(s))
1235 else:
1236 assert amt <= self._line_left
1237 i = self._line_offset
1238 j = i + amt
1239 s = self._line[i:j]
1240 self._line_offset = j
1241 self._line_left -= amt
1242 if self._line_left == 0:
1243 self._done()
1244 return s
1246 def readline(self):
1247 if self._line_consumed:
1248 return self._file.readline()
1249 assert self._line_left
1250 s = self._line[self._line_offset:]
1251 self._done()
1252 return s
1254 def readlines(self, size=None):
1255 if self._line_consumed:
1256 return self._file.readlines(size)
1257 assert self._line_left
1258 L = [self._line[self._line_offset:]]
1259 self._done()
1260 if size is None:
1261 return L + self._file.readlines()
1262 else:
1263 return L + self._file.readlines(size)
1265 def test():
1266 """Test this module.
1268 A hodge podge of tests collected here, because they have too many
1269 external dependencies for the regular test suite.
1272 import sys
1273 import getopt
1274 opts, args = getopt.getopt(sys.argv[1:], 'd')
1275 dl = 0
1276 for o, a in opts:
1277 if o == '-d': dl = dl + 1
1278 host = 'www.python.org'
1279 selector = '/'
1280 if args[0:]: host = args[0]
1281 if args[1:]: selector = args[1]
1282 h = HTTP()
1283 h.set_debuglevel(dl)
1284 h.connect(host)
1285 h.putrequest('GET', selector)
1286 h.endheaders()
1287 status, reason, headers = h.getreply()
1288 print 'status =', status
1289 print 'reason =', reason
1290 print "read", len(h.getfile().read())
1291 print
1292 if headers:
1293 for header in headers.headers: print header.strip()
1294 print
1296 # minimal test that code to extract host from url works
1297 class HTTP11(HTTP):
1298 _http_vsn = 11
1299 _http_vsn_str = 'HTTP/1.1'
1301 h = HTTP11('www.python.org')
1302 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1303 h.endheaders()
1304 h.getreply()
1305 h.close()
1307 try:
1308 import ssl
1309 except ImportError:
1310 pass
1311 else:
1313 for host, selector in (('sourceforge.net', '/projects/python'),
1315 print "https://%s%s" % (host, selector)
1316 hs = HTTPS()
1317 hs.set_debuglevel(dl)
1318 hs.connect(host)
1319 hs.putrequest('GET', selector)
1320 hs.endheaders()
1321 status, reason, headers = hs.getreply()
1322 print 'status =', status
1323 print 'reason =', reason
1324 print "read", len(hs.getfile().read())
1325 print
1326 if headers:
1327 for header in headers.headers: print header.strip()
1328 print
1330 if __name__ == '__main__':
1331 test()