logging: fixed lack of use of encoding attribute specified on a stream.
[python.git] / Lib / httplib.py
blob62cd0c74974f908b79ab52fbeea0855f0aba5925
1 """HTTP/1.1 client library
3 <intro stuff goes here>
4 <other stuff, too>
6 HTTPConnection goes through a number of "states", which define when a client
7 may legally make another request or fetch the response for a particular
8 request. This diagram details these state transitions:
10 (null)
12 | HTTPConnection()
14 Idle
16 | putrequest()
18 Request-started
20 | ( putheader() )* endheaders()
22 Request-sent
24 | response = getresponse()
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
38 | response.read()
40 Request-sent
42 This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
48 Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
59 Logical State __state __response
60 ------------- ------- ----------
61 Idle _CS_IDLE None
62 Request-started _CS_REQ_STARTED None
63 Request-sent _CS_REQ_SENT None
64 Unread-response _CS_IDLE <response_class>
65 Req-started-unread-response _CS_REQ_STARTED <response_class>
66 Req-sent-unread-response _CS_REQ_SENT <response_class>
67 """
69 import socket
70 from urlparse import urlsplit
71 import warnings
72 from test.test_support import catch_warning
73 with catch_warning(record=False):
74 warnings.filterwarnings("ignore", ".*mimetools has been removed",
75 DeprecationWarning)
76 import mimetools
78 try:
79 from cStringIO import StringIO
80 except ImportError:
81 from StringIO import StringIO
83 __all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
84 "HTTPException", "NotConnected", "UnknownProtocol",
85 "UnknownTransferEncoding", "UnimplementedFileMode",
86 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
87 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
88 "BadStatusLine", "error", "responses"]
90 HTTP_PORT = 80
91 HTTPS_PORT = 443
93 _UNKNOWN = 'UNKNOWN'
95 # connection states
96 _CS_IDLE = 'Idle'
97 _CS_REQ_STARTED = 'Request-started'
98 _CS_REQ_SENT = 'Request-sent'
100 # status codes
101 # informational
102 CONTINUE = 100
103 SWITCHING_PROTOCOLS = 101
104 PROCESSING = 102
106 # successful
107 OK = 200
108 CREATED = 201
109 ACCEPTED = 202
110 NON_AUTHORITATIVE_INFORMATION = 203
111 NO_CONTENT = 204
112 RESET_CONTENT = 205
113 PARTIAL_CONTENT = 206
114 MULTI_STATUS = 207
115 IM_USED = 226
117 # redirection
118 MULTIPLE_CHOICES = 300
119 MOVED_PERMANENTLY = 301
120 FOUND = 302
121 SEE_OTHER = 303
122 NOT_MODIFIED = 304
123 USE_PROXY = 305
124 TEMPORARY_REDIRECT = 307
126 # client error
127 BAD_REQUEST = 400
128 UNAUTHORIZED = 401
129 PAYMENT_REQUIRED = 402
130 FORBIDDEN = 403
131 NOT_FOUND = 404
132 METHOD_NOT_ALLOWED = 405
133 NOT_ACCEPTABLE = 406
134 PROXY_AUTHENTICATION_REQUIRED = 407
135 REQUEST_TIMEOUT = 408
136 CONFLICT = 409
137 GONE = 410
138 LENGTH_REQUIRED = 411
139 PRECONDITION_FAILED = 412
140 REQUEST_ENTITY_TOO_LARGE = 413
141 REQUEST_URI_TOO_LONG = 414
142 UNSUPPORTED_MEDIA_TYPE = 415
143 REQUESTED_RANGE_NOT_SATISFIABLE = 416
144 EXPECTATION_FAILED = 417
145 UNPROCESSABLE_ENTITY = 422
146 LOCKED = 423
147 FAILED_DEPENDENCY = 424
148 UPGRADE_REQUIRED = 426
150 # server error
151 INTERNAL_SERVER_ERROR = 500
152 NOT_IMPLEMENTED = 501
153 BAD_GATEWAY = 502
154 SERVICE_UNAVAILABLE = 503
155 GATEWAY_TIMEOUT = 504
156 HTTP_VERSION_NOT_SUPPORTED = 505
157 INSUFFICIENT_STORAGE = 507
158 NOT_EXTENDED = 510
160 # Mapping status codes to official W3C names
161 responses = {
162 100: 'Continue',
163 101: 'Switching Protocols',
165 200: 'OK',
166 201: 'Created',
167 202: 'Accepted',
168 203: 'Non-Authoritative Information',
169 204: 'No Content',
170 205: 'Reset Content',
171 206: 'Partial Content',
173 300: 'Multiple Choices',
174 301: 'Moved Permanently',
175 302: 'Found',
176 303: 'See Other',
177 304: 'Not Modified',
178 305: 'Use Proxy',
179 306: '(Unused)',
180 307: 'Temporary Redirect',
182 400: 'Bad Request',
183 401: 'Unauthorized',
184 402: 'Payment Required',
185 403: 'Forbidden',
186 404: 'Not Found',
187 405: 'Method Not Allowed',
188 406: 'Not Acceptable',
189 407: 'Proxy Authentication Required',
190 408: 'Request Timeout',
191 409: 'Conflict',
192 410: 'Gone',
193 411: 'Length Required',
194 412: 'Precondition Failed',
195 413: 'Request Entity Too Large',
196 414: 'Request-URI Too Long',
197 415: 'Unsupported Media Type',
198 416: 'Requested Range Not Satisfiable',
199 417: 'Expectation Failed',
201 500: 'Internal Server Error',
202 501: 'Not Implemented',
203 502: 'Bad Gateway',
204 503: 'Service Unavailable',
205 504: 'Gateway Timeout',
206 505: 'HTTP Version Not Supported',
209 # maximal amount of data to read at one time in _safe_read
210 MAXAMOUNT = 1048576
212 class HTTPMessage(mimetools.Message):
214 def addheader(self, key, value):
215 """Add header for field key handling repeats."""
216 prev = self.dict.get(key)
217 if prev is None:
218 self.dict[key] = value
219 else:
220 combined = ", ".join((prev, value))
221 self.dict[key] = combined
223 def addcontinue(self, key, more):
224 """Add more field data from a continuation line."""
225 prev = self.dict[key]
226 self.dict[key] = prev + "\n " + more
228 def readheaders(self):
229 """Read header lines.
231 Read header lines up to the entirely blank line that terminates them.
232 The (normally blank) line that ends the headers is skipped, but not
233 included in the returned list. If a non-header line ends the headers,
234 (which is an error), an attempt is made to backspace over it; it is
235 never included in the returned list.
237 The variable self.status is set to the empty string if all went well,
238 otherwise it is an error message. The variable self.headers is a
239 completely uninterpreted list of lines contained in the header (so
240 printing them will reproduce the header exactly as it appears in the
241 file).
243 If multiple header fields with the same name occur, they are combined
244 according to the rules in RFC 2616 sec 4.2:
246 Appending each subsequent field-value to the first, each separated
247 by a comma. The order in which header fields with the same field-name
248 are received is significant to the interpretation of the combined
249 field value.
251 # XXX The implementation overrides the readheaders() method of
252 # rfc822.Message. The base class design isn't amenable to
253 # customized behavior here so the method here is a copy of the
254 # base class code with a few small changes.
256 self.dict = {}
257 self.unixfrom = ''
258 self.headers = hlist = []
259 self.status = ''
260 headerseen = ""
261 firstline = 1
262 startofline = unread = tell = None
263 if hasattr(self.fp, 'unread'):
264 unread = self.fp.unread
265 elif self.seekable:
266 tell = self.fp.tell
267 while True:
268 if tell:
269 try:
270 startofline = tell()
271 except IOError:
272 startofline = tell = None
273 self.seekable = 0
274 line = self.fp.readline()
275 if not line:
276 self.status = 'EOF in headers'
277 break
278 # Skip unix From name time lines
279 if firstline and line.startswith('From '):
280 self.unixfrom = self.unixfrom + line
281 continue
282 firstline = 0
283 if headerseen and line[0] in ' \t':
284 # XXX Not sure if continuation lines are handled properly
285 # for http and/or for repeating headers
286 # It's a continuation line.
287 hlist.append(line)
288 self.addcontinue(headerseen, line.strip())
289 continue
290 elif self.iscomment(line):
291 # It's a comment. Ignore it.
292 continue
293 elif self.islast(line):
294 # Note! No pushback here! The delimiter line gets eaten.
295 break
296 headerseen = self.isheader(line)
297 if headerseen:
298 # It's a legal header line, save it.
299 hlist.append(line)
300 self.addheader(headerseen, line[len(headerseen)+1:].strip())
301 continue
302 else:
303 # It's not a header line; throw it back and stop here.
304 if not self.dict:
305 self.status = 'No headers'
306 else:
307 self.status = 'Non-header line where header expected'
308 # Try to undo the read.
309 if unread:
310 unread(line)
311 elif tell:
312 self.fp.seek(startofline)
313 else:
314 self.status = self.status + '; bad seek'
315 break
317 class HTTPResponse:
319 # strict: If true, raise BadStatusLine if the status line can't be
320 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
321 # false because it prevents clients from talking to HTTP/0.9
322 # servers. Note that a response with a sufficiently corrupted
323 # status line will look like an HTTP/0.9 response.
325 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
327 def __init__(self, sock, debuglevel=0, strict=0, method=None):
328 self.fp = sock.makefile('rb', 0)
329 self.debuglevel = debuglevel
330 self.strict = strict
331 self._method = method
333 self.msg = None
335 # from the Status-Line of the response
336 self.version = _UNKNOWN # HTTP-Version
337 self.status = _UNKNOWN # Status-Code
338 self.reason = _UNKNOWN # Reason-Phrase
340 self.chunked = _UNKNOWN # is "chunked" being used?
341 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
342 self.length = _UNKNOWN # number of bytes left in response
343 self.will_close = _UNKNOWN # conn will close at end of response
345 def _read_status(self):
346 # Initialize with Simple-Response defaults
347 line = self.fp.readline()
348 if self.debuglevel > 0:
349 print "reply:", repr(line)
350 if not line:
351 # Presumably, the server closed the connection before
352 # sending a valid response.
353 raise BadStatusLine(line)
354 try:
355 [version, status, reason] = line.split(None, 2)
356 except ValueError:
357 try:
358 [version, status] = line.split(None, 1)
359 reason = ""
360 except ValueError:
361 # empty version will cause next test to fail and status
362 # will be treated as 0.9 response.
363 version = ""
364 if not version.startswith('HTTP/'):
365 if self.strict:
366 self.close()
367 raise BadStatusLine(line)
368 else:
369 # assume it's a Simple-Response from an 0.9 server
370 self.fp = LineAndFileWrapper(line, self.fp)
371 return "HTTP/0.9", 200, ""
373 # The status code is a three-digit number
374 try:
375 status = int(status)
376 if status < 100 or status > 999:
377 raise BadStatusLine(line)
378 except ValueError:
379 raise BadStatusLine(line)
380 return version, status, reason
382 def begin(self):
383 if self.msg is not None:
384 # we've already started reading the response
385 return
387 # read until we get a non-100 response
388 while True:
389 version, status, reason = self._read_status()
390 if status != CONTINUE:
391 break
392 # skip the header from the 100 response
393 while True:
394 skip = self.fp.readline().strip()
395 if not skip:
396 break
397 if self.debuglevel > 0:
398 print "header:", skip
400 self.status = status
401 self.reason = reason.strip()
402 if version == 'HTTP/1.0':
403 self.version = 10
404 elif version.startswith('HTTP/1.'):
405 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
406 elif version == 'HTTP/0.9':
407 self.version = 9
408 else:
409 raise UnknownProtocol(version)
411 if self.version == 9:
412 self.length = None
413 self.chunked = 0
414 self.will_close = 1
415 self.msg = HTTPMessage(StringIO())
416 return
418 self.msg = HTTPMessage(self.fp, 0)
419 if self.debuglevel > 0:
420 for hdr in self.msg.headers:
421 print "header:", hdr,
423 # don't let the msg keep an fp
424 self.msg.fp = None
426 # are we using the chunked-style of transfer encoding?
427 tr_enc = self.msg.getheader('transfer-encoding')
428 if tr_enc and tr_enc.lower() == "chunked":
429 self.chunked = 1
430 self.chunk_left = None
431 else:
432 self.chunked = 0
434 # will the connection close at the end of the response?
435 self.will_close = self._check_close()
437 # do we have a Content-Length?
438 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
439 length = self.msg.getheader('content-length')
440 if length and not self.chunked:
441 try:
442 self.length = int(length)
443 except ValueError:
444 self.length = None
445 else:
446 if self.length < 0: # ignore nonsensical negative lengths
447 self.length = None
448 else:
449 self.length = None
451 # does the body have a fixed length? (of zero)
452 if (status == NO_CONTENT or status == NOT_MODIFIED or
453 100 <= status < 200 or # 1xx codes
454 self._method == 'HEAD'):
455 self.length = 0
457 # if the connection remains open, and we aren't using chunked, and
458 # a content-length was not provided, then assume that the connection
459 # WILL close.
460 if not self.will_close and \
461 not self.chunked and \
462 self.length is None:
463 self.will_close = 1
465 def _check_close(self):
466 conn = self.msg.getheader('connection')
467 if self.version == 11:
468 # An HTTP/1.1 proxy is assumed to stay open unless
469 # explicitly closed.
470 conn = self.msg.getheader('connection')
471 if conn and "close" in conn.lower():
472 return True
473 return False
475 # Some HTTP/1.0 implementations have support for persistent
476 # connections, using rules different than HTTP/1.1.
478 # For older HTTP, Keep-Alive indicates persistent connection.
479 if self.msg.getheader('keep-alive'):
480 return False
482 # At least Akamai returns a "Connection: Keep-Alive" header,
483 # which was supposed to be sent by the client.
484 if conn and "keep-alive" in conn.lower():
485 return False
487 # Proxy-Connection is a netscape hack.
488 pconn = self.msg.getheader('proxy-connection')
489 if pconn and "keep-alive" in pconn.lower():
490 return False
492 # otherwise, assume it will close
493 return True
495 def close(self):
496 if self.fp:
497 self.fp.close()
498 self.fp = None
500 def isclosed(self):
501 # NOTE: it is possible that we will not ever call self.close(). This
502 # case occurs when will_close is TRUE, length is None, and we
503 # read up to the last byte, but NOT past it.
505 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
506 # called, meaning self.isclosed() is meaningful.
507 return self.fp is None
509 # XXX It would be nice to have readline and __iter__ for this, too.
511 def read(self, amt=None):
512 if self.fp is None:
513 return ''
515 if self.chunked:
516 return self._read_chunked(amt)
518 if amt is None:
519 # unbounded read
520 if self.length is None:
521 s = self.fp.read()
522 else:
523 s = self._safe_read(self.length)
524 self.length = 0
525 self.close() # we read everything
526 return s
528 if self.length is not None:
529 if amt > self.length:
530 # clip the read to the "end of response"
531 amt = self.length
533 # we do not use _safe_read() here because this may be a .will_close
534 # connection, and the user is reading more bytes than will be provided
535 # (for example, reading in 1k chunks)
536 s = self.fp.read(amt)
537 if self.length is not None:
538 self.length -= len(s)
539 if not self.length:
540 self.close()
541 return s
543 def _read_chunked(self, amt):
544 assert self.chunked != _UNKNOWN
545 chunk_left = self.chunk_left
546 value = ''
548 # XXX This accumulates chunks by repeated string concatenation,
549 # which is not efficient as the number or size of chunks gets big.
550 while True:
551 if chunk_left is None:
552 line = self.fp.readline()
553 i = line.find(';')
554 if i >= 0:
555 line = line[:i] # strip chunk-extensions
556 try:
557 chunk_left = int(line, 16)
558 except ValueError:
559 # close the connection as protocol synchronisation is
560 # probably lost
561 self.close()
562 raise IncompleteRead(value)
563 if chunk_left == 0:
564 break
565 if amt is None:
566 value += self._safe_read(chunk_left)
567 elif amt < chunk_left:
568 value += self._safe_read(amt)
569 self.chunk_left = chunk_left - amt
570 return value
571 elif amt == chunk_left:
572 value += self._safe_read(amt)
573 self._safe_read(2) # toss the CRLF at the end of the chunk
574 self.chunk_left = None
575 return value
576 else:
577 value += self._safe_read(chunk_left)
578 amt -= chunk_left
580 # we read the whole chunk, get another
581 self._safe_read(2) # toss the CRLF at the end of the chunk
582 chunk_left = None
584 # read and discard trailer up to the CRLF terminator
585 ### note: we shouldn't have any trailers!
586 while True:
587 line = self.fp.readline()
588 if not line:
589 # a vanishingly small number of sites EOF without
590 # sending the trailer
591 break
592 if line == '\r\n':
593 break
595 # we read everything; close the "file"
596 self.close()
598 return value
600 def _safe_read(self, amt):
601 """Read the number of bytes requested, compensating for partial reads.
603 Normally, we have a blocking socket, but a read() can be interrupted
604 by a signal (resulting in a partial read).
606 Note that we cannot distinguish between EOF and an interrupt when zero
607 bytes have been read. IncompleteRead() will be raised in this
608 situation.
610 This function should be used when <amt> bytes "should" be present for
611 reading. If the bytes are truly not available (due to EOF), then the
612 IncompleteRead exception can be used to detect the problem.
614 s = []
615 while amt > 0:
616 chunk = self.fp.read(min(amt, MAXAMOUNT))
617 if not chunk:
618 raise IncompleteRead(s)
619 s.append(chunk)
620 amt -= len(chunk)
621 return ''.join(s)
623 def getheader(self, name, default=None):
624 if self.msg is None:
625 raise ResponseNotReady()
626 return self.msg.getheader(name, default)
628 def getheaders(self):
629 """Return list of (header, value) tuples."""
630 if self.msg is None:
631 raise ResponseNotReady()
632 return self.msg.items()
635 class HTTPConnection:
637 _http_vsn = 11
638 _http_vsn_str = 'HTTP/1.1'
640 response_class = HTTPResponse
641 default_port = HTTP_PORT
642 auto_open = 1
643 debuglevel = 0
644 strict = 0
646 def __init__(self, host, port=None, strict=None,
647 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
648 self.timeout = timeout
649 self.sock = None
650 self._buffer = []
651 self.__response = None
652 self.__state = _CS_IDLE
653 self._method = None
655 self._set_hostport(host, port)
656 if strict is not None:
657 self.strict = strict
659 def _set_hostport(self, host, port):
660 if port is None:
661 i = host.rfind(':')
662 j = host.rfind(']') # ipv6 addresses have [...]
663 if i > j:
664 try:
665 port = int(host[i+1:])
666 except ValueError:
667 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
668 host = host[:i]
669 else:
670 port = self.default_port
671 if host and host[0] == '[' and host[-1] == ']':
672 host = host[1:-1]
673 self.host = host
674 self.port = port
676 def set_debuglevel(self, level):
677 self.debuglevel = level
679 def connect(self):
680 """Connect to the host and port specified in __init__."""
681 self.sock = socket.create_connection((self.host,self.port),
682 self.timeout)
684 def close(self):
685 """Close the connection to the HTTP server."""
686 if self.sock:
687 self.sock.close() # close it manually... there may be other refs
688 self.sock = None
689 if self.__response:
690 self.__response.close()
691 self.__response = None
692 self.__state = _CS_IDLE
694 def send(self, str):
695 """Send `str' to the server."""
696 if self.sock is None:
697 if self.auto_open:
698 self.connect()
699 else:
700 raise NotConnected()
702 # send the data to the server. if we get a broken pipe, then close
703 # the socket. we want to reconnect when somebody tries to send again.
705 # NOTE: we DO propagate the error, though, because we cannot simply
706 # ignore the error... the caller will know if they can retry.
707 if self.debuglevel > 0:
708 print "send:", repr(str)
709 try:
710 blocksize=8192
711 if hasattr(str,'read') :
712 if self.debuglevel > 0: print "sendIng a read()able"
713 data=str.read(blocksize)
714 while data:
715 self.sock.sendall(data)
716 data=str.read(blocksize)
717 else:
718 self.sock.sendall(str)
719 except socket.error, v:
720 if v[0] == 32: # Broken pipe
721 self.close()
722 raise
724 def _output(self, s):
725 """Add a line of output to the current request buffer.
727 Assumes that the line does *not* end with \\r\\n.
729 self._buffer.append(s)
731 def _send_output(self):
732 """Send the currently buffered request and clear the buffer.
734 Appends an extra \\r\\n to the buffer.
736 self._buffer.extend(("", ""))
737 msg = "\r\n".join(self._buffer)
738 del self._buffer[:]
739 self.send(msg)
741 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
742 """Send a request to the server.
744 `method' specifies an HTTP request method, e.g. 'GET'.
745 `url' specifies the object being requested, e.g. '/index.html'.
746 `skip_host' if True does not add automatically a 'Host:' header
747 `skip_accept_encoding' if True does not add automatically an
748 'Accept-Encoding:' header
751 # if a prior response has been completed, then forget about it.
752 if self.__response and self.__response.isclosed():
753 self.__response = None
756 # in certain cases, we cannot issue another request on this connection.
757 # this occurs when:
758 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
759 # 2) a response to a previous request has signalled that it is going
760 # to close the connection upon completion.
761 # 3) the headers for the previous response have not been read, thus
762 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
764 # if there is no prior response, then we can request at will.
766 # if point (2) is true, then we will have passed the socket to the
767 # response (effectively meaning, "there is no prior response"), and
768 # will open a new one when a new request is made.
770 # Note: if a prior response exists, then we *can* start a new request.
771 # We are not allowed to begin fetching the response to this new
772 # request, however, until that prior response is complete.
774 if self.__state == _CS_IDLE:
775 self.__state = _CS_REQ_STARTED
776 else:
777 raise CannotSendRequest()
779 # Save the method we use, we need it later in the response phase
780 self._method = method
781 if not url:
782 url = '/'
783 str = '%s %s %s' % (method, url, self._http_vsn_str)
785 self._output(str)
787 if self._http_vsn == 11:
788 # Issue some standard headers for better HTTP/1.1 compliance
790 if not skip_host:
791 # this header is issued *only* for HTTP/1.1
792 # connections. more specifically, this means it is
793 # only issued when the client uses the new
794 # HTTPConnection() class. backwards-compat clients
795 # will be using HTTP/1.0 and those clients may be
796 # issuing this header themselves. we should NOT issue
797 # it twice; some web servers (such as Apache) barf
798 # when they see two Host: headers
800 # If we need a non-standard port,include it in the
801 # header. If the request is going through a proxy,
802 # but the host of the actual URL, not the host of the
803 # proxy.
805 netloc = ''
806 if url.startswith('http'):
807 nil, netloc, nil, nil, nil = urlsplit(url)
809 if netloc:
810 try:
811 netloc_enc = netloc.encode("ascii")
812 except UnicodeEncodeError:
813 netloc_enc = netloc.encode("idna")
814 self.putheader('Host', netloc_enc)
815 else:
816 try:
817 host_enc = self.host.encode("ascii")
818 except UnicodeEncodeError:
819 host_enc = self.host.encode("idna")
820 if self.port == self.default_port:
821 self.putheader('Host', host_enc)
822 else:
823 self.putheader('Host', "%s:%s" % (host_enc, self.port))
825 # note: we are assuming that clients will not attempt to set these
826 # headers since *this* library must deal with the
827 # consequences. this also means that when the supporting
828 # libraries are updated to recognize other forms, then this
829 # code should be changed (removed or updated).
831 # we only want a Content-Encoding of "identity" since we don't
832 # support encodings such as x-gzip or x-deflate.
833 if not skip_accept_encoding:
834 self.putheader('Accept-Encoding', 'identity')
836 # we can accept "chunked" Transfer-Encodings, but no others
837 # NOTE: no TE header implies *only* "chunked"
838 #self.putheader('TE', 'chunked')
840 # if TE is supplied in the header, then it must appear in a
841 # Connection header.
842 #self.putheader('Connection', 'TE')
844 else:
845 # For HTTP/1.0, the server will assume "not chunked"
846 pass
848 def putheader(self, header, value):
849 """Send a request header line to the server.
851 For example: h.putheader('Accept', 'text/html')
853 if self.__state != _CS_REQ_STARTED:
854 raise CannotSendHeader()
856 str = '%s: %s' % (header, value)
857 self._output(str)
859 def endheaders(self):
860 """Indicate that the last header line has been sent to the server."""
862 if self.__state == _CS_REQ_STARTED:
863 self.__state = _CS_REQ_SENT
864 else:
865 raise CannotSendHeader()
867 self._send_output()
869 def request(self, method, url, body=None, headers={}):
870 """Send a complete request to the server."""
872 try:
873 self._send_request(method, url, body, headers)
874 except socket.error, v:
875 # trap 'Broken pipe' if we're allowed to automatically reconnect
876 if v[0] != 32 or not self.auto_open:
877 raise
878 # try one more time
879 self._send_request(method, url, body, headers)
881 def _send_request(self, method, url, body, headers):
882 # honour explicitly requested Host: and Accept-Encoding headers
883 header_names = dict.fromkeys([k.lower() for k in headers])
884 skips = {}
885 if 'host' in header_names:
886 skips['skip_host'] = 1
887 if 'accept-encoding' in header_names:
888 skips['skip_accept_encoding'] = 1
890 self.putrequest(method, url, **skips)
892 if body and ('content-length' not in header_names):
893 thelen=None
894 try:
895 thelen=str(len(body))
896 except TypeError, te:
897 # If this is a file-like object, try to
898 # fstat its file descriptor
899 import os
900 try:
901 thelen = str(os.fstat(body.fileno()).st_size)
902 except (AttributeError, OSError):
903 # Don't send a length if this failed
904 if self.debuglevel > 0: print "Cannot stat!!"
906 if thelen is not None:
907 self.putheader('Content-Length',thelen)
908 for hdr, value in headers.iteritems():
909 self.putheader(hdr, value)
910 self.endheaders()
912 if body:
913 self.send(body)
915 def getresponse(self):
916 "Get the response from the server."
918 # if a prior response has been completed, then forget about it.
919 if self.__response and self.__response.isclosed():
920 self.__response = None
923 # if a prior response exists, then it must be completed (otherwise, we
924 # cannot read this response's header to determine the connection-close
925 # behavior)
927 # note: if a prior response existed, but was connection-close, then the
928 # socket and response were made independent of this HTTPConnection
929 # object since a new request requires that we open a whole new
930 # connection
932 # this means the prior response had one of two states:
933 # 1) will_close: this connection was reset and the prior socket and
934 # response operate independently
935 # 2) persistent: the response was retained and we await its
936 # isclosed() status to become true.
938 if self.__state != _CS_REQ_SENT or self.__response:
939 raise ResponseNotReady()
941 if self.debuglevel > 0:
942 response = self.response_class(self.sock, self.debuglevel,
943 strict=self.strict,
944 method=self._method)
945 else:
946 response = self.response_class(self.sock, strict=self.strict,
947 method=self._method)
949 response.begin()
950 assert response.will_close != _UNKNOWN
951 self.__state = _CS_IDLE
953 if response.will_close:
954 # this effectively passes the connection to the response
955 self.close()
956 else:
957 # remember this, so we can tell when it is complete
958 self.__response = response
960 return response
963 class HTTP:
964 "Compatibility class with httplib.py from 1.5."
966 _http_vsn = 10
967 _http_vsn_str = 'HTTP/1.0'
969 debuglevel = 0
971 _connection_class = HTTPConnection
973 def __init__(self, host='', port=None, strict=None):
974 "Provide a default host, since the superclass requires one."
976 # some joker passed 0 explicitly, meaning default port
977 if port == 0:
978 port = None
980 # Note that we may pass an empty string as the host; this will throw
981 # an error when we attempt to connect. Presumably, the client code
982 # will call connect before then, with a proper host.
983 self._setup(self._connection_class(host, port, strict))
985 def _setup(self, conn):
986 self._conn = conn
988 # set up delegation to flesh out interface
989 self.send = conn.send
990 self.putrequest = conn.putrequest
991 self.endheaders = conn.endheaders
992 self.set_debuglevel = conn.set_debuglevel
994 conn._http_vsn = self._http_vsn
995 conn._http_vsn_str = self._http_vsn_str
997 self.file = None
999 def connect(self, host=None, port=None):
1000 "Accept arguments to set the host/port, since the superclass doesn't."
1002 if host is not None:
1003 self._conn._set_hostport(host, port)
1004 self._conn.connect()
1006 def getfile(self):
1007 "Provide a getfile, since the superclass' does not use this concept."
1008 return self.file
1010 def putheader(self, header, *values):
1011 "The superclass allows only one value argument."
1012 self._conn.putheader(header, '\r\n\t'.join(values))
1014 def getreply(self):
1015 """Compat definition since superclass does not define it.
1017 Returns a tuple consisting of:
1018 - server status code (e.g. '200' if all goes well)
1019 - server "reason" corresponding to status code
1020 - any RFC822 headers in the response from the server
1022 try:
1023 response = self._conn.getresponse()
1024 except BadStatusLine, e:
1025 ### hmm. if getresponse() ever closes the socket on a bad request,
1026 ### then we are going to have problems with self.sock
1028 ### should we keep this behavior? do people use it?
1029 # keep the socket open (as a file), and return it
1030 self.file = self._conn.sock.makefile('rb', 0)
1032 # close our socket -- we want to restart after any protocol error
1033 self.close()
1035 self.headers = None
1036 return -1, e.line, None
1038 self.headers = response.msg
1039 self.file = response.fp
1040 return response.status, response.reason, response.msg
1042 def close(self):
1043 self._conn.close()
1045 # note that self.file == response.fp, which gets closed by the
1046 # superclass. just clear the object ref here.
1047 ### hmm. messy. if status==-1, then self.file is owned by us.
1048 ### well... we aren't explicitly closing, but losing this ref will
1049 ### do it
1050 self.file = None
1052 try:
1053 import ssl
1054 except ImportError:
1055 pass
1056 else:
1057 class HTTPSConnection(HTTPConnection):
1058 "This class allows communication via SSL."
1060 default_port = HTTPS_PORT
1062 def __init__(self, host, port=None, key_file=None, cert_file=None,
1063 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1064 HTTPConnection.__init__(self, host, port, strict, timeout)
1065 self.key_file = key_file
1066 self.cert_file = cert_file
1068 def connect(self):
1069 "Connect to a host on a given (SSL) port."
1071 sock = socket.create_connection((self.host, self.port), self.timeout)
1072 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1074 __all__.append("HTTPSConnection")
1076 class HTTPS(HTTP):
1077 """Compatibility with 1.5 httplib interface
1079 Python 1.5.2 did not have an HTTPS class, but it defined an
1080 interface for sending http requests that is also useful for
1081 https.
1084 _connection_class = HTTPSConnection
1086 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1087 strict=None):
1088 # provide a default host, pass the X509 cert info
1090 # urf. compensate for bad input.
1091 if port == 0:
1092 port = None
1093 self._setup(self._connection_class(host, port, key_file,
1094 cert_file, strict))
1096 # we never actually use these for anything, but we keep them
1097 # here for compatibility with post-1.5.2 CVS.
1098 self.key_file = key_file
1099 self.cert_file = cert_file
1102 def FakeSocket (sock, sslobj):
1103 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1104 "Use the result of ssl.wrap_socket() directly instead.",
1105 DeprecationWarning, stacklevel=2)
1106 return sslobj
1109 class HTTPException(Exception):
1110 # Subclasses that define an __init__ must call Exception.__init__
1111 # or define self.args. Otherwise, str() will fail.
1112 pass
1114 class NotConnected(HTTPException):
1115 pass
1117 class InvalidURL(HTTPException):
1118 pass
1120 class UnknownProtocol(HTTPException):
1121 def __init__(self, version):
1122 self.args = version,
1123 self.version = version
1125 class UnknownTransferEncoding(HTTPException):
1126 pass
1128 class UnimplementedFileMode(HTTPException):
1129 pass
1131 class IncompleteRead(HTTPException):
1132 def __init__(self, partial):
1133 self.args = partial,
1134 self.partial = partial
1136 class ImproperConnectionState(HTTPException):
1137 pass
1139 class CannotSendRequest(ImproperConnectionState):
1140 pass
1142 class CannotSendHeader(ImproperConnectionState):
1143 pass
1145 class ResponseNotReady(ImproperConnectionState):
1146 pass
1148 class BadStatusLine(HTTPException):
1149 def __init__(self, line):
1150 self.args = line,
1151 self.line = line
1153 # for backwards compatibility
1154 error = HTTPException
1156 class LineAndFileWrapper:
1157 """A limited file-like object for HTTP/0.9 responses."""
1159 # The status-line parsing code calls readline(), which normally
1160 # get the HTTP status line. For a 0.9 response, however, this is
1161 # actually the first line of the body! Clients need to get a
1162 # readable file object that contains that line.
1164 def __init__(self, line, file):
1165 self._line = line
1166 self._file = file
1167 self._line_consumed = 0
1168 self._line_offset = 0
1169 self._line_left = len(line)
1171 def __getattr__(self, attr):
1172 return getattr(self._file, attr)
1174 def _done(self):
1175 # called when the last byte is read from the line. After the
1176 # call, all read methods are delegated to the underlying file
1177 # object.
1178 self._line_consumed = 1
1179 self.read = self._file.read
1180 self.readline = self._file.readline
1181 self.readlines = self._file.readlines
1183 def read(self, amt=None):
1184 if self._line_consumed:
1185 return self._file.read(amt)
1186 assert self._line_left
1187 if amt is None or amt > self._line_left:
1188 s = self._line[self._line_offset:]
1189 self._done()
1190 if amt is None:
1191 return s + self._file.read()
1192 else:
1193 return s + self._file.read(amt - len(s))
1194 else:
1195 assert amt <= self._line_left
1196 i = self._line_offset
1197 j = i + amt
1198 s = self._line[i:j]
1199 self._line_offset = j
1200 self._line_left -= amt
1201 if self._line_left == 0:
1202 self._done()
1203 return s
1205 def readline(self):
1206 if self._line_consumed:
1207 return self._file.readline()
1208 assert self._line_left
1209 s = self._line[self._line_offset:]
1210 self._done()
1211 return s
1213 def readlines(self, size=None):
1214 if self._line_consumed:
1215 return self._file.readlines(size)
1216 assert self._line_left
1217 L = [self._line[self._line_offset:]]
1218 self._done()
1219 if size is None:
1220 return L + self._file.readlines()
1221 else:
1222 return L + self._file.readlines(size)
1224 def test():
1225 """Test this module.
1227 A hodge podge of tests collected here, because they have too many
1228 external dependencies for the regular test suite.
1231 import sys
1232 import getopt
1233 opts, args = getopt.getopt(sys.argv[1:], 'd')
1234 dl = 0
1235 for o, a in opts:
1236 if o == '-d': dl = dl + 1
1237 host = 'www.python.org'
1238 selector = '/'
1239 if args[0:]: host = args[0]
1240 if args[1:]: selector = args[1]
1241 h = HTTP()
1242 h.set_debuglevel(dl)
1243 h.connect(host)
1244 h.putrequest('GET', selector)
1245 h.endheaders()
1246 status, reason, headers = h.getreply()
1247 print 'status =', status
1248 print 'reason =', reason
1249 print "read", len(h.getfile().read())
1250 print
1251 if headers:
1252 for header in headers.headers: print header.strip()
1253 print
1255 # minimal test that code to extract host from url works
1256 class HTTP11(HTTP):
1257 _http_vsn = 11
1258 _http_vsn_str = 'HTTP/1.1'
1260 h = HTTP11('www.python.org')
1261 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1262 h.endheaders()
1263 h.getreply()
1264 h.close()
1266 try:
1267 import ssl
1268 except ImportError:
1269 pass
1270 else:
1272 for host, selector in (('sourceforge.net', '/projects/python'),
1274 print "https://%s%s" % (host, selector)
1275 hs = HTTPS()
1276 hs.set_debuglevel(dl)
1277 hs.connect(host)
1278 hs.putrequest('GET', selector)
1279 hs.endheaders()
1280 status, reason, headers = hs.getreply()
1281 print 'status =', status
1282 print 'reason =', reason
1283 print "read", len(hs.getfile().read())
1284 print
1285 if headers:
1286 for header in headers.headers: print header.strip()
1287 print
1289 if __name__ == '__main__':
1290 test()