mechanize/_http.py

   1 """HTTP related handlers.
   2
   3 Note that some other HTTP handlers live in more specific modules: _auth.py,
   4 _gzip.py, etc.
   5
   6
   7 Copyright 2002-2006 John J Lee <jjl@pobox.com>
   8
   9 This code is free software; you can redistribute it and/or modify it
  10 under the terms of the BSD or ZPL 2.1 licenses (see the file
  11 COPYING.txt included with the distribution).
  12
  13 """
  14
  15 import copy, time, tempfile, htmlentitydefs, re, logging, socket, \
  16        urllib2, urllib, httplib, sgmllib
  17 from urllib2 import URLError, HTTPError, BaseHandler
  18 from cStringIO import StringIO
  19
  20 from _request import Request
  21 from _util import isstringlike
  22 from _response import closeable_response, response_seek_wrapper
  23 from _html import unescape, unescape_charref
  24 from _headersutil import is_html
  25 from _clientcookie import CookieJar, request_host
  26 import _rfc3986
  27
  28 debug = logging.getLogger("mechanize").debug
  29
  30 # monkeypatch urllib2.HTTPError to show URL
  31 ## def urllib2_str(self):
  32 ##     return 'HTTP Error %s: %s (%s)' % (
  33 ##         self.code, self.msg, self.geturl())
  34 ## urllib2.HTTPError.__str__ = urllib2_str
  35
  36
  37 CHUNK = 1024  # size of chunks fed to HTML HEAD parser, in bytes
  38 DEFAULT_ENCODING = 'latin-1'
  39
  40
  41 # This adds "refresh" to the list of redirectables and provides a redirection
  42 # algorithm that doesn't go into a loop in the presence of cookies
  43 # (Python 2.4 has this new algorithm, 2.3 doesn't).
  44 class HTTPRedirectHandler(BaseHandler):
  45     # maximum number of redirections to any single URL
  46     # this is needed because of the state that cookies introduce
  47     max_repeats = 4
  48     # maximum total number of redirections (regardless of URL) before
  49     # assuming we're in a loop
  50     max_redirections = 10
  51
  52     # Implementation notes:
  53
  54     # To avoid the server sending us into an infinite loop, the request
  55     # object needs to track what URLs we have already seen.  Do this by
  56     # adding a handler-specific attribute to the Request object.  The value
  57     # of the dict is used to count the number of times the same URL has
  58     # been visited.  This is needed because visiting the same URL twice
  59     # does not necessarily imply a loop, thanks to state introduced by
  60     # cookies.
  61
  62     # Always unhandled redirection codes:
  63     # 300 Multiple Choices: should not handle this here.
  64     # 304 Not Modified: no need to handle here: only of interest to caches
  65     #     that do conditional GETs
  66     # 305 Use Proxy: probably not worth dealing with here
  67     # 306 Unused: what was this for in the previous versions of protocol??
  68
  69     def redirect_request(self, newurl, req, fp, code, msg, headers):
  70         """Return a Request or None in response to a redirect.
  71
  72         This is called by the http_error_30x methods when a redirection
  73         response is received.  If a redirection should take place, return a
  74         new Request to allow http_error_30x to perform the redirect;
  75         otherwise, return None to indicate that an HTTPError should be
  76         raised.
  77
  78         """
  79         if code in (301, 302, 303, "refresh") or \
  80                (code == 307 and not req.has_data()):
  81             # Strictly (according to RFC 2616), 301 or 302 in response to
  82             # a POST MUST NOT cause a redirection without confirmation
  83             # from the user (of urllib2, in this case).  In practice,
  84             # essentially all clients do redirect in this case, so we do
  85             # the same.
  86             # XXX really refresh redirections should be visiting; tricky to
  87             #  fix, so this will wait until post-stable release
  88             new = Request(newurl,
  89                           headers=req.headers,
  90                           origin_req_host=req.get_origin_req_host(),
  91                           unverifiable=True,
  92                           visit=False,
  93                           )
  94             new._origin_req = getattr(req, "_origin_req", req)
  95             return new
  96         else:
  97             raise HTTPError(req.get_full_url(), code, msg, headers, fp)
  98
  99     def http_error_302(self, req, fp, code, msg, headers):
 100         # Some servers (incorrectly) return multiple Location headers
 101         # (so probably same goes for URI).  Use first header.
 102         if headers.has_key('location'):
 103             newurl = headers.getheaders('location')[0]
 104         elif headers.has_key('uri'):
 105             newurl = headers.getheaders('uri')[0]
 106         else:
 107             return
 108         newurl = _rfc3986.clean_url(newurl, "latin-1")
 109         newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
 110
 111         # XXX Probably want to forget about the state of the current
 112         # request, although that might interact poorly with other
 113         # handlers that also use handler-specific request attributes
 114         new = self.redirect_request(newurl, req, fp, code, msg, headers)
 115         if new is None:
 116             return
 117
 118         # loop detection
 119         # .redirect_dict has a key url if url was previously visited.
 120         if hasattr(req, 'redirect_dict'):
 121             visited = new.redirect_dict = req.redirect_dict
 122             if (visited.get(newurl, 0) >= self.max_repeats or
 123                 len(visited) >= self.max_redirections):
 124                 raise HTTPError(req.get_full_url(), code,
 125                                 self.inf_msg + msg, headers, fp)
 126         else:
 127             visited = new.redirect_dict = req.redirect_dict = {}
 128         visited[newurl] = visited.get(newurl, 0) + 1
 129
 130         # Don't close the fp until we are sure that we won't use it
 131         # with HTTPError.
 132         fp.read()
 133         fp.close()
 134
 135         return self.parent.open(new)
 136
 137     http_error_301 = http_error_303 = http_error_307 = http_error_302
 138     http_error_refresh = http_error_302
 139
 140     inf_msg = "The HTTP server returned a redirect error that would " \
 141               "lead to an infinite loop.\n" \
 142               "The last 30x error message was:\n"
 143
 144
 145 # XXX would self.reset() work, instead of raising this exception?
 146 class EndOfHeadError(Exception): pass
 147 class AbstractHeadParser:
 148     # only these elements are allowed in or before HEAD of document
 149     head_elems = ("html", "head",
 150                   "title", "base",
 151                   "script", "style", "meta", "link", "object")
 152     _entitydefs = htmlentitydefs.name2codepoint
 153     _encoding = DEFAULT_ENCODING
 154
 155     def __init__(self):
 156         self.http_equiv = []
 157
 158     def start_meta(self, attrs):
 159         http_equiv = content = None
 160         for key, value in attrs:
 161             if key == "http-equiv":
 162                 http_equiv = self.unescape_attr_if_required(value)
 163             elif key == "content":
 164                 content = self.unescape_attr_if_required(value)
 165         if http_equiv is not None and content is not None:
 166             self.http_equiv.append((http_equiv, content))
 167
 168     def end_head(self):
 169         raise EndOfHeadError()
 170
 171     def handle_entityref(self, name):
 172         #debug("%s", name)
 173         self.handle_data(unescape(
 174             '&%s;' % name, self._entitydefs, self._encoding))
 175
 176     def handle_charref(self, name):
 177         #debug("%s", name)
 178         self.handle_data(unescape_charref(name, self._encoding))
 179
 180     def unescape_attr(self, name):
 181         #debug("%s", name)
 182         return unescape(name, self._entitydefs, self._encoding)
 183
 184     def unescape_attrs(self, attrs):
 185         #debug("%s", attrs)
 186         escaped_attrs = {}
 187         for key, val in attrs.items():
 188             escaped_attrs[key] = self.unescape_attr(val)
 189         return escaped_attrs
 190
 191     def unknown_entityref(self, ref):
 192         self.handle_data("&%s;" % ref)
 193
 194     def unknown_charref(self, ref):
 195         self.handle_data("&#%s;" % ref)
 196
 197
 198 try:
 199     import HTMLParser
 200 except ImportError:
 201     pass
 202 else:
 203     class XHTMLCompatibleHeadParser(AbstractHeadParser,
 204                                     HTMLParser.HTMLParser):
 205         def __init__(self):
 206             HTMLParser.HTMLParser.__init__(self)
 207             AbstractHeadParser.__init__(self)
 208
 209         def handle_starttag(self, tag, attrs):
 210             if tag not in self.head_elems:
 211                 raise EndOfHeadError()
 212             try:
 213                 method = getattr(self, 'start_' + tag)
 214             except AttributeError:
 215                 try:
 216                     method = getattr(self, 'do_' + tag)
 217                 except AttributeError:
 218                     pass # unknown tag
 219                 else:
 220                     method(attrs)
 221             else:
 222                 method(attrs)
 223
 224         def handle_endtag(self, tag):
 225             if tag not in self.head_elems:
 226                 raise EndOfHeadError()
 227             try:
 228                 method = getattr(self, 'end_' + tag)
 229             except AttributeError:
 230                 pass # unknown tag
 231             else:
 232                 method()
 233
 234         def unescape(self, name):
 235             # Use the entitydefs passed into constructor, not
 236             # HTMLParser.HTMLParser's entitydefs.
 237             return self.unescape_attr(name)
 238
 239         def unescape_attr_if_required(self, name):
 240             return name  # HTMLParser.HTMLParser already did it
 241
 242 class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
 243
 244     def _not_called(self):
 245         assert False
 246
 247     def __init__(self):
 248         sgmllib.SGMLParser.__init__(self)
 249         AbstractHeadParser.__init__(self)
 250
 251     def handle_starttag(self, tag, method, attrs):
 252         if tag not in self.head_elems:
 253             raise EndOfHeadError()
 254         if tag == "meta":
 255             method(attrs)
 256
 257     def unknown_starttag(self, tag, attrs):
 258         self.handle_starttag(tag, self._not_called, attrs)
 259
 260     def handle_endtag(self, tag, method):
 261         if tag in self.head_elems:
 262             method()
 263         else:
 264             raise EndOfHeadError()
 265
 266     def unescape_attr_if_required(self, name):
 267         return self.unescape_attr(name)
 268
 269 def parse_head(fileobj, parser):
 270     """Return a list of key, value pairs."""
 271     while 1:
 272         data = fileobj.read(CHUNK)
 273         try:
 274             parser.feed(data)
 275         except EndOfHeadError:
 276             break
 277         if len(data) != CHUNK:
 278             # this should only happen if there is no HTML body, or if
 279             # CHUNK is big
 280             break
 281     return parser.http_equiv
 282
 283 class HTTPEquivProcessor(BaseHandler):
 284     """Append META HTTP-EQUIV headers to regular HTTP headers."""
 285
 286     handler_order = 300  # before handlers that look at HTTP headers
 287
 288     def __init__(self, head_parser_class=HeadParser,
 289                  i_want_broken_xhtml_support=False,
 290                  ):
 291         self.head_parser_class = head_parser_class
 292         self._allow_xhtml = i_want_broken_xhtml_support
 293
 294     def http_response(self, request, response):
 295         if not hasattr(response, "seek"):
 296             response = response_seek_wrapper(response)
 297         http_message = response.info()
 298         url = response.geturl()
 299         ct_hdrs = http_message.getheaders("content-type")
 300         if is_html(ct_hdrs, url, self._allow_xhtml):
 301             try:
 302                 try:
 303                     html_headers = parse_head(response, self.head_parser_class())
 304                 finally:
 305                     response.seek(0)
 306             except (HTMLParser.HTMLParseError,
 307                     sgmllib.SGMLParseError):
 308                 pass
 309             else:
 310                 for hdr, val in html_headers:
 311                     # add a header
 312                     http_message.dict[hdr.lower()] = val
 313                     text = hdr + ": " + val
 314                     for line in text.split("\n"):
 315                         http_message.headers.append(line + "\n")
 316         return response
 317
 318     https_response = http_response
 319
 320 class HTTPCookieProcessor(BaseHandler):
 321     """Handle HTTP cookies.
 322
 323     Public attributes:
 324
 325     cookiejar: CookieJar instance
 326
 327     """
 328     def __init__(self, cookiejar=None):
 329         if cookiejar is None:
 330             cookiejar = CookieJar()
 331         self.cookiejar = cookiejar
 332
 333     def http_request(self, request):
 334         self.cookiejar.add_cookie_header(request)
 335         return request
 336
 337     def http_response(self, request, response):
 338         self.cookiejar.extract_cookies(response, request)
 339         return response
 340
 341     https_request = http_request
 342     https_response = http_response
 343
 344 try:
 345     import robotparser
 346 except ImportError:
 347     pass
 348 else:
 349     class MechanizeRobotFileParser(robotparser.RobotFileParser):
 350
 351         def __init__(self, url='', opener=None):
 352             import _opener
 353             robotparser.RobotFileParser.__init__(self, url)
 354             self._opener = opener
 355
 356         def set_opener(self, opener=None):
 357             if opener is None:
 358                 opener = _opener.OpenerDirector()
 359             self._opener = opener
 360
 361         def read(self):
 362             """Reads the robots.txt URL and feeds it to the parser."""
 363             if self._opener is None:
 364                 self.set_opener()
 365             req = Request(self.url, unverifiable=True, visit=False)
 366             try:
 367                 f = self._opener.open(req)
 368             except HTTPError, f:
 369                 pass
 370             except (IOError, socket.error, OSError), exc:
 371                 robotparser._debug("ignoring error opening %r: %s" %
 372                                    (self.url, exc))
 373                 return
 374             lines = []
 375             line = f.readline()
 376             while line:
 377                 lines.append(line.strip())
 378                 line = f.readline()
 379             status = f.code
 380             if status == 401 or status == 403:
 381                 self.disallow_all = True
 382                 robotparser._debug("disallow all")
 383             elif status >= 400:
 384                 self.allow_all = True
 385                 robotparser._debug("allow all")
 386             elif status == 200 and lines:
 387                 robotparser._debug("parse lines")
 388                 self.parse(lines)
 389
 390     class RobotExclusionError(urllib2.HTTPError):
 391         def __init__(self, request, *args):
 392             apply(urllib2.HTTPError.__init__, (self,)+args)
 393             self.request = request
 394
 395     class HTTPRobotRulesProcessor(BaseHandler):
 396         # before redirections, after everything else
 397         handler_order = 800
 398
 399         try:
 400             from httplib import HTTPMessage
 401         except:
 402             from mimetools import Message
 403             http_response_class = Message
 404         else:
 405             http_response_class = HTTPMessage
 406
 407         def __init__(self, rfp_class=MechanizeRobotFileParser):
 408             self.rfp_class = rfp_class
 409             self.rfp = None
 410             self._host = None
 411
 412         def http_request(self, request):
 413             scheme = request.get_type()
 414             if scheme not in ["http", "https"]:
 415                 # robots exclusion only applies to HTTP
 416                 return request
 417
 418             if request.get_selector() == "/robots.txt":
 419                 # /robots.txt is always OK to fetch
 420                 return request
 421
 422             host = request.get_host()
 423
 424             # robots.txt requests don't need to be allowed by robots.txt :-)
 425             origin_req = getattr(request, "_origin_req", None)
 426             if (origin_req is not None and
 427                 origin_req.get_selector() == "/robots.txt" and
 428                 origin_req.get_host() == host
 429                 ):
 430                 return request
 431
 432             if host != self._host:
 433                 self.rfp = self.rfp_class()
 434                 try:
 435                     self.rfp.set_opener(self.parent)
 436                 except AttributeError:
 437                     debug("%r instance does not support set_opener" %
 438                           self.rfp.__class__)
 439                 self.rfp.set_url(scheme+"://"+host+"/robots.txt")
 440                 self.rfp.read()
 441                 self._host = host
 442
 443             ua = request.get_header("User-agent", "")
 444             if self.rfp.can_fetch(ua, request.get_full_url()):
 445                 return request
 446             else:
 447                 # XXX This should really have raised URLError.  Too late now...
 448                 msg = "request disallowed by robots.txt"
 449                 raise RobotExclusionError(
 450                     request,
 451                     request.get_full_url(),
 452                     403, msg,
 453                     self.http_response_class(StringIO()), StringIO(msg))
 454
 455         https_request = http_request
 456
 457 class HTTPRefererProcessor(BaseHandler):
 458     """Add Referer header to requests.
 459
 460     This only makes sense if you use each RefererProcessor for a single
 461     chain of requests only (so, for example, if you use a single
 462     HTTPRefererProcessor to fetch a series of URLs extracted from a single
 463     page, this will break).
 464
 465     There's a proper implementation of this in mechanize.Browser.
 466
 467     """
 468     def __init__(self):
 469         self.referer = None
 470
 471     def http_request(self, request):
 472         if ((self.referer is not None) and
 473             not request.has_header("Referer")):
 474             request.add_unredirected_header("Referer", self.referer)
 475         return request
 476
 477     def http_response(self, request, response):
 478         self.referer = response.geturl()
 479         return response
 480
 481     https_request = http_request
 482     https_response = http_response
 483
 484
 485 def clean_refresh_url(url):
 486     # e.g. Firefox 1.5 does (something like) this
 487     if ((url.startswith('"') and url.endswith('"')) or
 488         (url.startswith("'") and url.endswith("'"))):
 489         url = url[1:-1]
 490     return _rfc3986.clean_url(url, "latin-1")  # XXX encoding
 491
 492 def parse_refresh_header(refresh):
 493     """
 494     >>> parse_refresh_header("1; url=http://example.com/")
 495     (1.0, 'http://example.com/')
 496     >>> parse_refresh_header("1; url='http://example.com/'")
 497     (1.0, 'http://example.com/')
 498     >>> parse_refresh_header("1")
 499     (1.0, None)
 500     >>> parse_refresh_header("blah")
 501     Traceback (most recent call last):
 502     ValueError: invalid literal for float(): blah
 503
 504     """
 505
 506     ii = refresh.find(";")
 507     if ii != -1:
 508         pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
 509         jj = newurl_spec.find("=")
 510         key = None
 511         if jj != -1:
 512             key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
 513             newurl = clean_refresh_url(newurl)
 514         if key is None or key.strip().lower() != "url":
 515             raise ValueError()
 516     else:
 517         pause, newurl = float(refresh), None
 518     return pause, newurl
 519
 520 class HTTPRefreshProcessor(BaseHandler):
 521     """Perform HTTP Refresh redirections.
 522
 523     Note that if a non-200 HTTP code has occurred (for example, a 30x
 524     redirect), this processor will do nothing.
 525
 526     By default, only zero-time Refresh headers are redirected.  Use the
 527     max_time attribute / constructor argument to allow Refresh with longer
 528     pauses.  Use the honor_time attribute / constructor argument to control
 529     whether the requested pause is honoured (with a time.sleep()) or
 530     skipped in favour of immediate redirection.
 531
 532     Public attributes:
 533
 534     max_time: see above
 535     honor_time: see above
 536
 537     """
 538     handler_order = 1000
 539
 540     def __init__(self, max_time=0, honor_time=True):
 541         self.max_time = max_time
 542         self.honor_time = honor_time
 543
 544     def http_response(self, request, response):
 545         code, msg, hdrs = response.code, response.msg, response.info()
 546
 547         if code == 200 and hdrs.has_key("refresh"):
 548             refresh = hdrs.getheaders("refresh")[0]
 549             try:
 550                 pause, newurl = parse_refresh_header(refresh)
 551             except ValueError:
 552                 debug("bad Refresh header: %r" % refresh)
 553                 return response
 554             if newurl is None:
 555                 newurl = response.geturl()
 556             if (self.max_time is None) or (pause <= self.max_time):
 557                 if pause > 1E-3 and self.honor_time:
 558                     time.sleep(pause)
 559                 hdrs["location"] = newurl
 560                 # hardcoded http is NOT a bug
 561                 response = self.parent.error(
 562                     "http", request, response,
 563                     "refresh", msg, hdrs)
 564
 565         return response
 566
 567     https_response = http_response
 568
 569 class HTTPErrorProcessor(BaseHandler):
 570     """Process HTTP error responses.
 571
 572     The purpose of this handler is to to allow other response processors a
 573     look-in by removing the call to parent.error() from
 574     AbstractHTTPHandler.
 575
 576     For non-200 error codes, this just passes the job on to the
 577     Handler.<proto>_error_<code> methods, via the OpenerDirector.error
 578     method.  Eventually, urllib2.HTTPDefaultErrorHandler will raise an
 579     HTTPError if no other handler handles the error.
 580
 581     """
 582     handler_order = 1000  # after all other processors
 583
 584     def http_response(self, request, response):
 585         code, msg, hdrs = response.code, response.msg, response.info()
 586
 587         if code != 200:
 588             # hardcoded http is NOT a bug
 589             response = self.parent.error(
 590                 "http", request, response, code, msg, hdrs)
 591
 592         return response
 593
 594     https_response = http_response
 595
 596
 597 class HTTPDefaultErrorHandler(BaseHandler):
 598     def http_error_default(self, req, fp, code, msg, hdrs):
 599         # why these error methods took the code, msg, headers args in the first
 600         # place rather than a response object, I don't know, but to avoid
 601         # multiple wrapping, we're discarding them
 602
 603         if isinstance(fp, urllib2.HTTPError):
 604             response = fp
 605         else:
 606             response = urllib2.HTTPError(
 607                 req.get_full_url(), code, msg, hdrs, fp)
 608         assert code == response.code
 609         assert msg == response.msg
 610         assert hdrs == response.hdrs
 611         raise response
 612
 613
 614 class AbstractHTTPHandler(BaseHandler):
 615
 616     def __init__(self, debuglevel=0):
 617         self._debuglevel = debuglevel
 618
 619     def set_http_debuglevel(self, level):
 620         self._debuglevel = level
 621
 622     def do_request_(self, request):
 623         host = request.get_host()
 624         if not host:
 625             raise URLError('no host given')
 626
 627         if request.has_data():  # POST
 628             data = request.get_data()
 629             if not request.has_header('Content-type'):
 630                 request.add_unredirected_header(
 631                     'Content-type',
 632                     'application/x-www-form-urlencoded')
 633
 634         scheme, sel = urllib.splittype(request.get_selector())
 635         sel_host, sel_path = urllib.splithost(sel)
 636         if not request.has_header('Host'):
 637             request.add_unredirected_header('Host', sel_host or host)
 638         for name, value in self.parent.addheaders:
 639             name = name.capitalize()
 640             if not request.has_header(name):
 641                 request.add_unredirected_header(name, value)
 642
 643         return request
 644
 645     def do_open(self, http_class, req):
 646         """Return an addinfourl object for the request, using http_class.
 647
 648         http_class must implement the HTTPConnection API from httplib.
 649         The addinfourl return value is a file-like object.  It also
 650         has methods and attributes including:
 651             - info(): return a mimetools.Message object for the headers
 652             - geturl(): return the original request URL
 653             - code: HTTP status code
 654         """
 655         host = req.get_host()
 656         if not host:
 657             raise URLError('no host given')
 658
 659         h = http_class(host) # will parse host:port
 660         h.set_debuglevel(self._debuglevel)
 661
 662         headers = dict(req.headers)
 663         headers.update(req.unredirected_hdrs)
 664         # We want to make an HTTP/1.1 request, but the addinfourl
 665         # class isn't prepared to deal with a persistent connection.
 666         # It will try to read all remaining data from the socket,
 667         # which will block while the server waits for the next request.
 668         # So make sure the connection gets closed after the (only)
 669         # request.
 670         headers["Connection"] = "close"
 671         headers = dict(
 672             [(name.title(), val) for name, val in headers.items()])
 673         try:
 674             h.request(req.get_method(), req.get_selector(), req.data, headers)
 675             r = h.getresponse()
 676         except socket.error, err: # XXX what error?
 677             raise URLError(err)
 678
 679         # Pick apart the HTTPResponse object to get the addinfourl
 680         # object initialized properly.
 681
 682         # Wrap the HTTPResponse object in socket's file object adapter
 683         # for Windows.  That adapter calls recv(), so delegate recv()
 684         # to read().  This weird wrapping allows the returned object to
 685         # have readline() and readlines() methods.
 686
 687         # XXX It might be better to extract the read buffering code
 688         # out of socket._fileobject() and into a base class.
 689
 690         r.recv = r.read
 691         fp = socket._fileobject(r)
 692
 693         resp = closeable_response(fp, r.msg, req.get_full_url(),
 694                                   r.status, r.reason)
 695         return resp
 696
 697
 698 class HTTPHandler(AbstractHTTPHandler):
 699     def http_open(self, req):
 700         return self.do_open(httplib.HTTPConnection, req)
 701
 702     http_request = AbstractHTTPHandler.do_request_
 703
 704 if hasattr(httplib, 'HTTPS'):
 705
 706     class HTTPSConnectionFactory:
 707         def __init__(self, key_file, cert_file):
 708             self._key_file = key_file
 709             self._cert_file = cert_file
 710         def __call__(self, hostport):
 711             return httplib.HTTPSConnection(
 712                 hostport,
 713                 key_file=self._key_file, cert_file=self._cert_file)
 714
 715     class HTTPSHandler(AbstractHTTPHandler):
 716         def __init__(self, client_cert_manager=None):
 717             AbstractHTTPHandler.__init__(self)
 718             self.client_cert_manager = client_cert_manager
 719
 720         def https_open(self, req):
 721             if self.client_cert_manager is not None:
 722                 key_file, cert_file = self.client_cert_manager.find_key_cert(
 723                     req.get_full_url())
 724                 conn_factory = HTTPSConnectionFactory(key_file, cert_file)
 725             else:
 726                 conn_factory = httplib.HTTPSConnection
 727             return self.do_open(conn_factory, req)
 728
 729         https_request = AbstractHTTPHandler.do_request_