Lib/urllib2.py

   1 """An extensible library for opening URLs using a variety of protocols
   2
   3 The simplest way to use this module is to call the urlopen function,
   4 which accepts a string containing a URL or a Request object (described
   5 below).  It opens the URL and returns the results as file-like
   6 object; the returned object has some extra methods described below.
   7
   8 The OpenerDirector manages a collection of Handler objects that do
   9 all the actual work.  Each Handler implements a particular protocol or
  10 option.  The OpenerDirector is a composite object that invokes the
  11 Handlers needed to open the requested URL.  For example, the
  12 HTTPHandler performs HTTP GET and POST requests and deals with
  13 non-error returns.  The HTTPRedirectHandler automatically deals with
  14 HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
  15 deals with digest authentication.
  16
  17 urlopen(url, data=None) -- Basic usage is the same as original
  18 urllib.  pass the url and optionally data to post to an HTTP URL, and
  19 get a file-like object back.  One difference is that you can also pass
  20 a Request instance instead of URL.  Raises a URLError (subclass of
  21 IOError); for HTTP errors, raises an HTTPError, which can also be
  22 treated as a valid response.
  23
  24 build_opener -- Function that creates a new OpenerDirector instance.
  25 Will install the default handlers.  Accepts one or more Handlers as
  26 arguments, either instances or Handler classes that it will
  27 instantiate.  If one of the argument is a subclass of the default
  28 handler, the argument will be installed instead of the default.
  29
  30 install_opener -- Installs a new opener as the default opener.
  31
  32 objects of interest:
  33
  34 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
  35 the Handler classes, while dealing with requests and responses.
  36
  37 Request -- An object that encapsulates the state of a request.  The
  38 state can be as simple as the URL.  It can also include extra HTTP
  39 headers, e.g. a User-Agent.
  40
  41 BaseHandler --
  42
  43 exceptions:
  44 URLError -- A subclass of IOError, individual protocols have their own
  45 specific subclass.
  46
  47 HTTPError -- Also a valid HTTP response, so you can treat an HTTP error
  48 as an exceptional event or valid response.
  49
  50 internals:
  51 BaseHandler and parent
  52 _call_chain conventions
  53
  54 Example usage:
  55
  56 import urllib2
  57
  58 # set up authentication info
  59 authinfo = urllib2.HTTPBasicAuthHandler()
  60 authinfo.add_password(realm='PDQ Application',
  61                       uri='https://mahler:8092/site-updates.py',
  62                       user='klem',
  63                       passwd='geheim$parole')
  64
  65 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
  66
  67 # build a new opener that adds authentication and caching FTP handlers
  68 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
  69
  70 # install it
  71 urllib2.install_opener(opener)
  72
  73 f = urllib2.urlopen('http://www.python.org/')
  74
  75
  76 """
  77
  78 # XXX issues:
  79 # If an authentication error handler that tries to perform
  80 # authentication for some reason but fails, how should the error be
  81 # signalled?  The client needs to know the HTTP error code.  But if
  82 # the handler knows that the problem was, e.g., that it didn't know
  83 # that hash algo that requested in the challenge, it would be good to
  84 # pass that information along to the client, too.
  85 # ftp errors aren't handled cleanly
  86 # check digest against correct (i.e. non-apache) implementation
  87
  88 # Possible extensions:
  89 # complex proxies  XXX not sure what exactly was meant by this
  90 # abstract factory for opener
  91
  92 import base64
  93 import hashlib
  94 import httplib
  95 import mimetools
  96 import os
  97 import posixpath
  98 import random
  99 import re
 100 import socket
 101 import sys
 102 import time
 103 import urlparse
 104 import bisect
 105
 106 try:
 107     from cStringIO import StringIO
 108 except ImportError:
 109     from StringIO import StringIO
 110
 111 from urllib import (unwrap, unquote, splittype, splithost, quote,
 112      addinfourl, splitport,
 113      splitattr, ftpwrapper, splituser, splitpasswd, splitvalue)
 114
 115 # support for FileHandler, proxies via environment variables
 116 from urllib import localhost, url2pathname, getproxies, proxy_bypass
 117
 118 # used in User-Agent header sent
 119 __version__ = sys.version[:3]
 120
 121 _opener = None
 122 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 123     global _opener
 124     if _opener is None:
 125         _opener = build_opener()
 126     return _opener.open(url, data, timeout)
 127
 128 def install_opener(opener):
 129     global _opener
 130     _opener = opener
 131
 132 # do these error classes make sense?
 133 # make sure all of the IOError stuff is overridden.  we just want to be
 134 # subtypes.
 135
 136 class URLError(IOError):
 137     # URLError is a sub-type of IOError, but it doesn't share any of
 138     # the implementation.  need to override __init__ and __str__.
 139     # It sets self.args for compatibility with other EnvironmentError
 140     # subclasses, but args doesn't have the typical format with errno in
 141     # slot 0 and strerror in slot 1.  This may be better than nothing.
 142     def __init__(self, reason):
 143         self.args = reason,
 144         self.reason = reason
 145
 146     def __str__(self):
 147         return '<urlopen error %s>' % self.reason
 148
 149 class HTTPError(URLError, addinfourl):
 150     """Raised when HTTP error occurs, but also acts like non-error return"""
 151     __super_init = addinfourl.__init__
 152
 153     def __init__(self, url, code, msg, hdrs, fp):
 154         self.code = code
 155         self.msg = msg
 156         self.hdrs = hdrs
 157         self.fp = fp
 158         self.filename = url
 159         # The addinfourl classes depend on fp being a valid file
 160         # object.  In some cases, the HTTPError may not have a valid
 161         # file object.  If this happens, the simplest workaround is to
 162         # not initialize the base classes.
 163         if fp is not None:
 164             self.__super_init(fp, hdrs, url, code)
 165
 166     def __str__(self):
 167         return 'HTTP Error %s: %s' % (self.code, self.msg)
 168
 169 # copied from cookielib.py
 170 _cut_port_re = re.compile(r":\d+$")
 171 def request_host(request):
 172     """Return request-host, as defined by RFC 2965.
 173
 174     Variation from RFC: returned value is lowercased, for convenient
 175     comparison.
 176
 177     """
 178     url = request.get_full_url()
 179     host = urlparse.urlparse(url)[1]
 180     if host == "":
 181         host = request.get_header("Host", "")
 182
 183     # remove port, if present
 184     host = _cut_port_re.sub("", host, 1)
 185     return host.lower()
 186
 187 class Request:
 188
 189     def __init__(self, url, data=None, headers={},
 190                  origin_req_host=None, unverifiable=False):
 191         # unwrap('<URL:type://host/path>') --> 'type://host/path'
 192         self.__original = unwrap(url)
 193         self.type = None
 194         # self.__r_type is what's left after doing the splittype
 195         self.host = None
 196         self.port = None
 197         self._tunnel_host = None
 198         self.data = data
 199         self.headers = {}
 200         for key, value in headers.items():
 201             self.add_header(key, value)
 202         self.unredirected_hdrs = {}
 203         if origin_req_host is None:
 204             origin_req_host = request_host(self)
 205         self.origin_req_host = origin_req_host
 206         self.unverifiable = unverifiable
 207
 208     def __getattr__(self, attr):
 209         # XXX this is a fallback mechanism to guard against these
 210         # methods getting called in a non-standard order.  this may be
 211         # too complicated and/or unnecessary.
 212         # XXX should the __r_XXX attributes be public?
 213         if attr[:12] == '_Request__r_':
 214             name = attr[12:]
 215             if hasattr(Request, 'get_' + name):
 216                 getattr(self, 'get_' + name)()
 217                 return getattr(self, attr)
 218         raise AttributeError, attr
 219
 220     def get_method(self):
 221         if self.has_data():
 222             return "POST"
 223         else:
 224             return "GET"
 225
 226     # XXX these helper methods are lame
 227
 228     def add_data(self, data):
 229         self.data = data
 230
 231     def has_data(self):
 232         return self.data is not None
 233
 234     def get_data(self):
 235         return self.data
 236
 237     def get_full_url(self):
 238         return self.__original
 239
 240     def get_type(self):
 241         if self.type is None:
 242             self.type, self.__r_type = splittype(self.__original)
 243             if self.type is None:
 244                 raise ValueError, "unknown url type: %s" % self.__original
 245         return self.type
 246
 247     def get_host(self):
 248         if self.host is None:
 249             self.host, self.__r_host = splithost(self.__r_type)
 250             if self.host:
 251                 self.host = unquote(self.host)
 252         return self.host
 253
 254     def get_selector(self):
 255         return self.__r_host
 256
 257     def set_proxy(self, host, type):
 258         if self.type == 'https' and not self._tunnel_host:
 259             self._tunnel_host = self.host
 260         else:
 261             self.type = type
 262             self.__r_host = self.__original
 263
 264         self.host = host
 265
 266     def has_proxy(self):
 267         return self.__r_host == self.__original
 268
 269     def get_origin_req_host(self):
 270         return self.origin_req_host
 271
 272     def is_unverifiable(self):
 273         return self.unverifiable
 274
 275     def add_header(self, key, val):
 276         # useful for something like authentication
 277         self.headers[key.capitalize()] = val
 278
 279     def add_unredirected_header(self, key, val):
 280         # will not be added to a redirected request
 281         self.unredirected_hdrs[key.capitalize()] = val
 282
 283     def has_header(self, header_name):
 284         return (header_name in self.headers or
 285                 header_name in self.unredirected_hdrs)
 286
 287     def get_header(self, header_name, default=None):
 288         return self.headers.get(
 289             header_name,
 290             self.unredirected_hdrs.get(header_name, default))
 291
 292     def header_items(self):
 293         hdrs = self.unredirected_hdrs.copy()
 294         hdrs.update(self.headers)
 295         return hdrs.items()
 296
 297 class OpenerDirector:
 298     def __init__(self):
 299         client_version = "Python-urllib/%s" % __version__
 300         self.addheaders = [('User-agent', client_version)]
 301         # manage the individual handlers
 302         self.handlers = []
 303         self.handle_open = {}
 304         self.handle_error = {}
 305         self.process_response = {}
 306         self.process_request = {}
 307
 308     def add_handler(self, handler):
 309         if not hasattr(handler, "add_parent"):
 310             raise TypeError("expected BaseHandler instance, got %r" %
 311                             type(handler))
 312
 313         added = False
 314         for meth in dir(handler):
 315             if meth in ["redirect_request", "do_open", "proxy_open"]:
 316                 # oops, coincidental match
 317                 continue
 318
 319             i = meth.find("_")
 320             protocol = meth[:i]
 321             condition = meth[i+1:]
 322
 323             if condition.startswith("error"):
 324                 j = condition.find("_") + i + 1
 325                 kind = meth[j+1:]
 326                 try:
 327                     kind = int(kind)
 328                 except ValueError:
 329                     pass
 330                 lookup = self.handle_error.get(protocol, {})
 331                 self.handle_error[protocol] = lookup
 332             elif condition == "open":
 333                 kind = protocol
 334                 lookup = self.handle_open
 335             elif condition == "response":
 336                 kind = protocol
 337                 lookup = self.process_response
 338             elif condition == "request":
 339                 kind = protocol
 340                 lookup = self.process_request
 341             else:
 342                 continue
 343
 344             handlers = lookup.setdefault(kind, [])
 345             if handlers:
 346                 bisect.insort(handlers, handler)
 347             else:
 348                 handlers.append(handler)
 349             added = True
 350
 351         if added:
 352             # the handlers must work in an specific order, the order
 353             # is specified in a Handler attribute
 354             bisect.insort(self.handlers, handler)
 355             handler.add_parent(self)
 356
 357     def close(self):
 358         # Only exists for backwards compatibility.
 359         pass
 360
 361     def _call_chain(self, chain, kind, meth_name, *args):
 362         # Handlers raise an exception if no one else should try to handle
 363         # the request, or return None if they can't but another handler
 364         # could.  Otherwise, they return the response.
 365         handlers = chain.get(kind, ())
 366         for handler in handlers:
 367             func = getattr(handler, meth_name)
 368
 369             result = func(*args)
 370             if result is not None:
 371                 return result
 372
 373     def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
 374         # accept a URL or a Request object
 375         if isinstance(fullurl, basestring):
 376             req = Request(fullurl, data)
 377         else:
 378             req = fullurl
 379             if data is not None:
 380                 req.add_data(data)
 381
 382         req.timeout = timeout
 383         protocol = req.get_type()
 384
 385         # pre-process request
 386         meth_name = protocol+"_request"
 387         for processor in self.process_request.get(protocol, []):
 388             meth = getattr(processor, meth_name)
 389             req = meth(req)
 390
 391         response = self._open(req, data)
 392
 393         # post-process response
 394         meth_name = protocol+"_response"
 395         for processor in self.process_response.get(protocol, []):
 396             meth = getattr(processor, meth_name)
 397             response = meth(req, response)
 398
 399         return response
 400
 401     def _open(self, req, data=None):
 402         result = self._call_chain(self.handle_open, 'default',
 403                                   'default_open', req)
 404         if result:
 405             return result
 406
 407         protocol = req.get_type()
 408         result = self._call_chain(self.handle_open, protocol, protocol +
 409                                   '_open', req)
 410         if result:
 411             return result
 412
 413         return self._call_chain(self.handle_open, 'unknown',
 414                                 'unknown_open', req)
 415
 416     def error(self, proto, *args):
 417         if proto in ('http', 'https'):
 418             # XXX http[s] protocols are special-cased
 419             dict = self.handle_error['http'] # https is not different than http
 420             proto = args[2]  # YUCK!
 421             meth_name = 'http_error_%s' % proto
 422             http_err = 1
 423             orig_args = args
 424         else:
 425             dict = self.handle_error
 426             meth_name = proto + '_error'
 427             http_err = 0
 428         args = (dict, proto, meth_name) + args
 429         result = self._call_chain(*args)
 430         if result:
 431             return result
 432
 433         if http_err:
 434             args = (dict, 'default', 'http_error_default') + orig_args
 435             return self._call_chain(*args)
 436
 437 # XXX probably also want an abstract factory that knows when it makes
 438 # sense to skip a superclass in favor of a subclass and when it might
 439 # make sense to include both
 440
 441 def build_opener(*handlers):
 442     """Create an opener object from a list of handlers.
 443
 444     The opener will use several default handlers, including support
 445     for HTTP, FTP and when applicable, HTTPS.
 446
 447     If any of the handlers passed as arguments are subclasses of the
 448     default handlers, the default handlers will not be used.
 449     """
 450     import types
 451     def isclass(obj):
 452         return isinstance(obj, (types.ClassType, type))
 453
 454     opener = OpenerDirector()
 455     default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
 456                        HTTPDefaultErrorHandler, HTTPRedirectHandler,
 457                        FTPHandler, FileHandler, HTTPErrorProcessor]
 458     if hasattr(httplib, 'HTTPS'):
 459         default_classes.append(HTTPSHandler)
 460     skip = set()
 461     for klass in default_classes:
 462         for check in handlers:
 463             if isclass(check):
 464                 if issubclass(check, klass):
 465                     skip.add(klass)
 466             elif isinstance(check, klass):
 467                 skip.add(klass)
 468     for klass in skip:
 469         default_classes.remove(klass)
 470
 471     for klass in default_classes:
 472         opener.add_handler(klass())
 473
 474     for h in handlers:
 475         if isclass(h):
 476             h = h()
 477         opener.add_handler(h)
 478     return opener
 479
 480 class BaseHandler:
 481     handler_order = 500
 482
 483     def add_parent(self, parent):
 484         self.parent = parent
 485
 486     def close(self):
 487         # Only exists for backwards compatibility
 488         pass
 489
 490     def __lt__(self, other):
 491         if not hasattr(other, "handler_order"):
 492             # Try to preserve the old behavior of having custom classes
 493             # inserted after default ones (works only for custom user
 494             # classes which are not aware of handler_order).
 495             return True
 496         return self.handler_order < other.handler_order
 497
 498
 499 class HTTPErrorProcessor(BaseHandler):
 500     """Process HTTP error responses."""
 501     handler_order = 1000  # after all other processing
 502
 503     def http_response(self, request, response):
 504         code, msg, hdrs = response.code, response.msg, response.info()
 505
 506         # According to RFC 2616, "2xx" code indicates that the client's
 507         # request was successfully received, understood, and accepted.
 508         if not (200 <= code < 300):
 509             response = self.parent.error(
 510                 'http', request, response, code, msg, hdrs)
 511
 512         return response
 513
 514     https_response = http_response
 515
 516 class HTTPDefaultErrorHandler(BaseHandler):
 517     def http_error_default(self, req, fp, code, msg, hdrs):
 518         raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
 519
 520 class HTTPRedirectHandler(BaseHandler):
 521     # maximum number of redirections to any single URL
 522     # this is needed because of the state that cookies introduce
 523     max_repeats = 4
 524     # maximum total number of redirections (regardless of URL) before
 525     # assuming we're in a loop
 526     max_redirections = 10
 527
 528     def redirect_request(self, req, fp, code, msg, headers, newurl):
 529         """Return a Request or None in response to a redirect.
 530
 531         This is called by the http_error_30x methods when a
 532         redirection response is received.  If a redirection should
 533         take place, return a new Request to allow http_error_30x to
 534         perform the redirect.  Otherwise, raise HTTPError if no-one
 535         else should try to handle this url.  Return None if you can't
 536         but another Handler might.
 537         """
 538         m = req.get_method()
 539         if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
 540             or code in (301, 302, 303) and m == "POST"):
 541             # Strictly (according to RFC 2616), 301 or 302 in response
 542             # to a POST MUST NOT cause a redirection without confirmation
 543             # from the user (of urllib2, in this case).  In practice,
 544             # essentially all clients do redirect in this case, so we
 545             # do the same.
 546             # be conciliant with URIs containing a space
 547             newurl = newurl.replace(' ', '%20')
 548             newheaders = dict((k,v) for k,v in req.headers.items()
 549                               if k.lower() not in ("content-length", "content-type")
 550                              )
 551             return Request(newurl,
 552                            headers=newheaders,
 553                            origin_req_host=req.get_origin_req_host(),
 554                            unverifiable=True)
 555         else:
 556             raise HTTPError(req.get_full_url(), code, msg, headers, fp)
 557
 558     # Implementation note: To avoid the server sending us into an
 559     # infinite loop, the request object needs to track what URLs we
 560     # have already seen.  Do this by adding a handler-specific
 561     # attribute to the Request object.
 562     def http_error_302(self, req, fp, code, msg, headers):
 563         # Some servers (incorrectly) return multiple Location headers
 564         # (so probably same goes for URI).  Use first header.
 565         if 'location' in headers:
 566             newurl = headers.getheaders('location')[0]
 567         elif 'uri' in headers:
 568             newurl = headers.getheaders('uri')[0]
 569         else:
 570             return
 571
 572         # fix a possible malformed URL
 573         urlparts = urlparse.urlparse(newurl)
 574         if not urlparts.path:
 575             urlparts = list(urlparts)
 576             urlparts[2] = "/"
 577         newurl = urlparse.urlunparse(urlparts)
 578
 579         newurl = urlparse.urljoin(req.get_full_url(), newurl)
 580
 581         # XXX Probably want to forget about the state of the current
 582         # request, although that might interact poorly with other
 583         # handlers that also use handler-specific request attributes
 584         new = self.redirect_request(req, fp, code, msg, headers, newurl)
 585         if new is None:
 586             return
 587
 588         # loop detection
 589         # .redirect_dict has a key url if url was previously visited.
 590         if hasattr(req, 'redirect_dict'):
 591             visited = new.redirect_dict = req.redirect_dict
 592             if (visited.get(newurl, 0) >= self.max_repeats or
 593                 len(visited) >= self.max_redirections):
 594                 raise HTTPError(req.get_full_url(), code,
 595                                 self.inf_msg + msg, headers, fp)
 596         else:
 597             visited = new.redirect_dict = req.redirect_dict = {}
 598         visited[newurl] = visited.get(newurl, 0) + 1
 599
 600         # Don't close the fp until we are sure that we won't use it
 601         # with HTTPError.
 602         fp.read()
 603         fp.close()
 604
 605         return self.parent.open(new, timeout=req.timeout)
 606
 607     http_error_301 = http_error_303 = http_error_307 = http_error_302
 608
 609     inf_msg = "The HTTP server returned a redirect error that would " \
 610               "lead to an infinite loop.\n" \
 611               "The last 30x error message was:\n"
 612
 613
 614 def _parse_proxy(proxy):
 615     """Return (scheme, user, password, host/port) given a URL or an authority.
 616
 617     If a URL is supplied, it must have an authority (host:port) component.
 618     According to RFC 3986, having an authority component means the URL must
 619     have two slashes after the scheme:
 620
 621     >>> _parse_proxy('file:/ftp.example.com/')
 622     Traceback (most recent call last):
 623     ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
 624
 625     The first three items of the returned tuple may be None.
 626
 627     Examples of authority parsing:
 628
 629     >>> _parse_proxy('proxy.example.com')
 630     (None, None, None, 'proxy.example.com')
 631     >>> _parse_proxy('proxy.example.com:3128')
 632     (None, None, None, 'proxy.example.com:3128')
 633
 634     The authority component may optionally include userinfo (assumed to be
 635     username:password):
 636
 637     >>> _parse_proxy('joe:password@proxy.example.com')
 638     (None, 'joe', 'password', 'proxy.example.com')
 639     >>> _parse_proxy('joe:password@proxy.example.com:3128')
 640     (None, 'joe', 'password', 'proxy.example.com:3128')
 641
 642     Same examples, but with URLs instead:
 643
 644     >>> _parse_proxy('http://proxy.example.com/')
 645     ('http', None, None, 'proxy.example.com')
 646     >>> _parse_proxy('http://proxy.example.com:3128/')
 647     ('http', None, None, 'proxy.example.com:3128')
 648     >>> _parse_proxy('http://joe:password@proxy.example.com/')
 649     ('http', 'joe', 'password', 'proxy.example.com')
 650     >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
 651     ('http', 'joe', 'password', 'proxy.example.com:3128')
 652
 653     Everything after the authority is ignored:
 654
 655     >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
 656     ('ftp', 'joe', 'password', 'proxy.example.com')
 657
 658     Test for no trailing '/' case:
 659
 660     >>> _parse_proxy('http://joe:password@proxy.example.com')
 661     ('http', 'joe', 'password', 'proxy.example.com')
 662
 663     """
 664     scheme, r_scheme = splittype(proxy)
 665     if not r_scheme.startswith("/"):
 666         # authority
 667         scheme = None
 668         authority = proxy
 669     else:
 670         # URL
 671         if not r_scheme.startswith("//"):
 672             raise ValueError("proxy URL with no authority: %r" % proxy)
 673         # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
 674         # and 3.3.), path is empty or starts with '/'
 675         end = r_scheme.find("/", 2)
 676         if end == -1:
 677             end = None
 678         authority = r_scheme[2:end]
 679     userinfo, hostport = splituser(authority)
 680     if userinfo is not None:
 681         user, password = splitpasswd(userinfo)
 682     else:
 683         user = password = None
 684     return scheme, user, password, hostport
 685
 686 class ProxyHandler(BaseHandler):
 687     # Proxies must be in front
 688     handler_order = 100
 689
 690     def __init__(self, proxies=None):
 691         if proxies is None:
 692             proxies = getproxies()
 693         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 694         self.proxies = proxies
 695         for type, url in proxies.items():
 696             setattr(self, '%s_open' % type,
 697                     lambda r, proxy=url, type=type, meth=self.proxy_open: \
 698                     meth(r, proxy, type))
 699
 700     def proxy_open(self, req, proxy, type):
 701         orig_type = req.get_type()
 702         proxy_type, user, password, hostport = _parse_proxy(proxy)
 703
 704         if proxy_type is None:
 705             proxy_type = orig_type
 706
 707         if req.host and proxy_bypass(req.host):
 708             return None
 709
 710         if user and password:
 711             user_pass = '%s:%s' % (unquote(user), unquote(password))
 712             creds = base64.b64encode(user_pass).strip()
 713             req.add_header('Proxy-authorization', 'Basic ' + creds)
 714         hostport = unquote(hostport)
 715         req.set_proxy(hostport, proxy_type)
 716
 717         if orig_type == proxy_type or orig_type == 'https':
 718             # let other handlers take care of it
 719             return None
 720         else:
 721             # need to start over, because the other handlers don't
 722             # grok the proxy's URL type
 723             # e.g. if we have a constructor arg proxies like so:
 724             # {'http': 'ftp://proxy.example.com'}, we may end up turning
 725             # a request for http://acme.example.com/a into one for
 726             # ftp://proxy.example.com/a
 727             return self.parent.open(req, timeout=req.timeout)
 728
 729 class HTTPPasswordMgr:
 730
 731     def __init__(self):
 732         self.passwd = {}
 733
 734     def add_password(self, realm, uri, user, passwd):
 735         # uri could be a single URI or a sequence
 736         if isinstance(uri, basestring):
 737             uri = [uri]
 738         if not realm in self.passwd:
 739             self.passwd[realm] = {}
 740         for default_port in True, False:
 741             reduced_uri = tuple(
 742                 [self.reduce_uri(u, default_port) for u in uri])
 743             self.passwd[realm][reduced_uri] = (user, passwd)
 744
 745     def find_user_password(self, realm, authuri):
 746         domains = self.passwd.get(realm, {})
 747         for default_port in True, False:
 748             reduced_authuri = self.reduce_uri(authuri, default_port)
 749             for uris, authinfo in domains.iteritems():
 750                 for uri in uris:
 751                     if self.is_suburi(uri, reduced_authuri):
 752                         return authinfo
 753         return None, None
 754
 755     def reduce_uri(self, uri, default_port=True):
 756         """Accept authority or URI and extract only the authority and path."""
 757         # note HTTP URLs do not have a userinfo component
 758         parts = urlparse.urlsplit(uri)
 759         if parts[1]:
 760             # URI
 761             scheme = parts[0]
 762             authority = parts[1]
 763             path = parts[2] or '/'
 764         else:
 765             # host or host:port
 766             scheme = None
 767             authority = uri
 768             path = '/'
 769         host, port = splitport(authority)
 770         if default_port and port is None and scheme is not None:
 771             dport = {"http": 80,
 772                      "https": 443,
 773                      }.get(scheme)
 774             if dport is not None:
 775                 authority = "%s:%d" % (host, dport)
 776         return authority, path
 777
 778     def is_suburi(self, base, test):
 779         """Check if test is below base in a URI tree
 780
 781         Both args must be URIs in reduced form.
 782         """
 783         if base == test:
 784             return True
 785         if base[0] != test[0]:
 786             return False
 787         common = posixpath.commonprefix((base[1], test[1]))
 788         if len(common) == len(base[1]):
 789             return True
 790         return False
 791
 792
 793 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
 794
 795     def find_user_password(self, realm, authuri):
 796         user, password = HTTPPasswordMgr.find_user_password(self, realm,
 797                                                             authuri)
 798         if user is not None:
 799             return user, password
 800         return HTTPPasswordMgr.find_user_password(self, None, authuri)
 801
 802
 803 class AbstractBasicAuthHandler:
 804
 805     # XXX this allows for multiple auth-schemes, but will stupidly pick
 806     # the last one with a realm specified.
 807
 808     # allow for double- and single-quoted realm values
 809     # (single quotes are a violation of the RFC, but appear in the wild)
 810     rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
 811                     'realm=(["\'])(.*?)\\2', re.I)
 812
 813     # XXX could pre-emptively send auth info already accepted (RFC 2617,
 814     # end of section 2, and section 1.2 immediately after "credentials"
 815     # production).
 816
 817     def __init__(self, password_mgr=None):
 818         if password_mgr is None:
 819             password_mgr = HTTPPasswordMgr()
 820         self.passwd = password_mgr
 821         self.add_password = self.passwd.add_password
 822         self.retried = 0
 823
 824     def http_error_auth_reqed(self, authreq, host, req, headers):
 825         # host may be an authority (without userinfo) or a URL with an
 826         # authority
 827         # XXX could be multiple headers
 828         authreq = headers.get(authreq, None)
 829
 830         if self.retried > 5:
 831             # retry sending the username:password 5 times before failing.
 832             raise HTTPError(req.get_full_url(), 401, "basic auth failed",
 833                             headers, None)
 834         else:
 835             self.retried += 1
 836
 837         if authreq:
 838             mo = AbstractBasicAuthHandler.rx.search(authreq)
 839             if mo:
 840                 scheme, quote, realm = mo.groups()
 841                 if scheme.lower() == 'basic':
 842                     return self.retry_http_basic_auth(host, req, realm)
 843
 844     def retry_http_basic_auth(self, host, req, realm):
 845         user, pw = self.passwd.find_user_password(realm, host)
 846         if pw is not None:
 847             raw = "%s:%s" % (user, pw)
 848             auth = 'Basic %s' % base64.b64encode(raw).strip()
 849             if req.headers.get(self.auth_header, None) == auth:
 850                 return None
 851             req.add_unredirected_header(self.auth_header, auth)
 852             return self.parent.open(req, timeout=req.timeout)
 853         else:
 854             return None
 855
 856
 857 class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
 858
 859     auth_header = 'Authorization'
 860
 861     def http_error_401(self, req, fp, code, msg, headers):
 862         url = req.get_full_url()
 863         return self.http_error_auth_reqed('www-authenticate',
 864                                           url, req, headers)
 865
 866
 867 class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
 868
 869     auth_header = 'Proxy-authorization'
 870
 871     def http_error_407(self, req, fp, code, msg, headers):
 872         # http_error_auth_reqed requires that there is no userinfo component in
 873         # authority.  Assume there isn't one, since urllib2 does not (and
 874         # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
 875         # userinfo.
 876         authority = req.get_host()
 877         return self.http_error_auth_reqed('proxy-authenticate',
 878                                           authority, req, headers)
 879
 880
 881 def randombytes(n):
 882     """Return n random bytes."""
 883     # Use /dev/urandom if it is available.  Fall back to random module
 884     # if not.  It might be worthwhile to extend this function to use
 885     # other platform-specific mechanisms for getting random bytes.
 886     if os.path.exists("/dev/urandom"):
 887         f = open("/dev/urandom")
 888         s = f.read(n)
 889         f.close()
 890         return s
 891     else:
 892         L = [chr(random.randrange(0, 256)) for i in range(n)]
 893         return "".join(L)
 894
 895 class AbstractDigestAuthHandler:
 896     # Digest authentication is specified in RFC 2617.
 897
 898     # XXX The client does not inspect the Authentication-Info header
 899     # in a successful response.
 900
 901     # XXX It should be possible to test this implementation against
 902     # a mock server that just generates a static set of challenges.
 903
 904     # XXX qop="auth-int" supports is shaky
 905
 906     def __init__(self, passwd=None):
 907         if passwd is None:
 908             passwd = HTTPPasswordMgr()
 909         self.passwd = passwd
 910         self.add_password = self.passwd.add_password
 911         self.retried = 0
 912         self.nonce_count = 0
 913         self.last_nonce = None
 914
 915     def reset_retry_count(self):
 916         self.retried = 0
 917
 918     def http_error_auth_reqed(self, auth_header, host, req, headers):
 919         authreq = headers.get(auth_header, None)
 920         if self.retried > 5:
 921             # Don't fail endlessly - if we failed once, we'll probably
 922             # fail a second time. Hm. Unless the Password Manager is
 923             # prompting for the information. Crap. This isn't great
 924             # but it's better than the current 'repeat until recursion
 925             # depth exceeded' approach <wink>
 926             raise HTTPError(req.get_full_url(), 401, "digest auth failed",
 927                             headers, None)
 928         else:
 929             self.retried += 1
 930         if authreq:
 931             scheme = authreq.split()[0]
 932             if scheme.lower() == 'digest':
 933                 return self.retry_http_digest_auth(req, authreq)
 934
 935     def retry_http_digest_auth(self, req, auth):
 936         token, challenge = auth.split(' ', 1)
 937         chal = parse_keqv_list(parse_http_list(challenge))
 938         auth = self.get_authorization(req, chal)
 939         if auth:
 940             auth_val = 'Digest %s' % auth
 941             if req.headers.get(self.auth_header, None) == auth_val:
 942                 return None
 943             req.add_unredirected_header(self.auth_header, auth_val)
 944             resp = self.parent.open(req, timeout=req.timeout)
 945             return resp
 946
 947     def get_cnonce(self, nonce):
 948         # The cnonce-value is an opaque
 949         # quoted string value provided by the client and used by both client
 950         # and server to avoid chosen plaintext attacks, to provide mutual
 951         # authentication, and to provide some message integrity protection.
 952         # This isn't a fabulous effort, but it's probably Good Enough.
 953         dig = hashlib.sha1("%s:%s:%s:%s" % (self.nonce_count, nonce, time.ctime(),
 954                                             randombytes(8))).hexdigest()
 955         return dig[:16]
 956
 957     def get_authorization(self, req, chal):
 958         try:
 959             realm = chal['realm']
 960             nonce = chal['nonce']
 961             qop = chal.get('qop')
 962             algorithm = chal.get('algorithm', 'MD5')
 963             # mod_digest doesn't send an opaque, even though it isn't
 964             # supposed to be optional
 965             opaque = chal.get('opaque', None)
 966         except KeyError:
 967             return None
 968
 969         H, KD = self.get_algorithm_impls(algorithm)
 970         if H is None:
 971             return None
 972
 973         user, pw = self.passwd.find_user_password(realm, req.get_full_url())
 974         if user is None:
 975             return None
 976
 977         # XXX not implemented yet
 978         if req.has_data():
 979             entdig = self.get_entity_digest(req.get_data(), chal)
 980         else:
 981             entdig = None
 982
 983         A1 = "%s:%s:%s" % (user, realm, pw)
 984         A2 = "%s:%s" % (req.get_method(),
 985                         # XXX selector: what about proxies and full urls
 986                         req.get_selector())
 987         if qop == 'auth':
 988             if nonce == self.last_nonce:
 989                 self.nonce_count += 1
 990             else:
 991                 self.nonce_count = 1
 992                 self.last_nonce = nonce
 993
 994             ncvalue = '%08x' % self.nonce_count
 995             cnonce = self.get_cnonce(nonce)
 996             noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
 997             respdig = KD(H(A1), noncebit)
 998         elif qop is None:
 999             respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1000         else:
1001             # XXX handle auth-int.
1002             raise URLError("qop '%s' is not supported." % qop)
1003
1004         # XXX should the partial digests be encoded too?
1005
1006         base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1007                'response="%s"' % (user, realm, nonce, req.get_selector(),
1008                                   respdig)
1009         if opaque:
1010             base += ', opaque="%s"' % opaque
1011         if entdig:
1012             base += ', digest="%s"' % entdig
1013         base += ', algorithm="%s"' % algorithm
1014         if qop:
1015             base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1016         return base
1017
1018     def get_algorithm_impls(self, algorithm):
1019         # algorithm should be case-insensitive according to RFC2617
1020         algorithm = algorithm.upper()
1021         # lambdas assume digest modules are imported at the top level
1022         if algorithm == 'MD5':
1023             H = lambda x: hashlib.md5(x).hexdigest()
1024         elif algorithm == 'SHA':
1025             H = lambda x: hashlib.sha1(x).hexdigest()
1026         # XXX MD5-sess
1027         KD = lambda s, d: H("%s:%s" % (s, d))
1028         return H, KD
1029
1030     def get_entity_digest(self, data, chal):
1031         # XXX not implemented yet
1032         return None
1033
1034
1035 class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1036     """An authentication protocol defined by RFC 2069
1037
1038     Digest authentication improves on basic authentication because it
1039     does not transmit passwords in the clear.
1040     """
1041
1042     auth_header = 'Authorization'
1043     handler_order = 490  # before Basic auth
1044
1045     def http_error_401(self, req, fp, code, msg, headers):
1046         host = urlparse.urlparse(req.get_full_url())[1]
1047         retry = self.http_error_auth_reqed('www-authenticate',
1048                                            host, req, headers)
1049         self.reset_retry_count()
1050         return retry
1051
1052
1053 class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1054
1055     auth_header = 'Proxy-Authorization'
1056     handler_order = 490  # before Basic auth
1057
1058     def http_error_407(self, req, fp, code, msg, headers):
1059         host = req.get_host()
1060         retry = self.http_error_auth_reqed('proxy-authenticate',
1061                                            host, req, headers)
1062         self.reset_retry_count()
1063         return retry
1064
1065 class AbstractHTTPHandler(BaseHandler):
1066
1067     def __init__(self, debuglevel=0):
1068         self._debuglevel = debuglevel
1069
1070     def set_http_debuglevel(self, level):
1071         self._debuglevel = level
1072
1073     def do_request_(self, request):
1074         host = request.get_host()
1075         if not host:
1076             raise URLError('no host given')
1077
1078         if request.has_data():  # POST
1079             data = request.get_data()
1080             if not request.has_header('Content-type'):
1081                 request.add_unredirected_header(
1082                     'Content-type',
1083                     'application/x-www-form-urlencoded')
1084             if not request.has_header('Content-length'):
1085                 request.add_unredirected_header(
1086                     'Content-length', '%d' % len(data))
1087
1088         sel_host = host
1089         if request.has_proxy():
1090             scheme, sel = splittype(request.get_selector())
1091             sel_host, sel_path = splithost(sel)
1092
1093         if not request.has_header('Host'):
1094             request.add_unredirected_header('Host', sel_host)
1095         for name, value in self.parent.addheaders:
1096             name = name.capitalize()
1097             if not request.has_header(name):
1098                 request.add_unredirected_header(name, value)
1099
1100         return request
1101
1102     def do_open(self, http_class, req):
1103         """Return an addinfourl object for the request, using http_class.
1104
1105         http_class must implement the HTTPConnection API from httplib.
1106         The addinfourl return value is a file-like object.  It also
1107         has methods and attributes including:
1108             - info(): return a mimetools.Message object for the headers
1109             - geturl(): return the original request URL
1110             - code: HTTP status code
1111         """
1112         host = req.get_host()
1113         if not host:
1114             raise URLError('no host given')
1115
1116         h = http_class(host, timeout=req.timeout) # will parse host:port
1117         h.set_debuglevel(self._debuglevel)
1118
1119         headers = dict(req.headers)
1120         headers.update(req.unredirected_hdrs)
1121         # We want to make an HTTP/1.1 request, but the addinfourl
1122         # class isn't prepared to deal with a persistent connection.
1123         # It will try to read all remaining data from the socket,
1124         # which will block while the server waits for the next request.
1125         # So make sure the connection gets closed after the (only)
1126         # request.
1127         headers["Connection"] = "close"
1128         headers = dict(
1129             (name.title(), val) for name, val in headers.items())
1130
1131         if req._tunnel_host:
1132             tunnel_headers = {}
1133             proxy_auth_hdr = "Proxy-Authorization"
1134             if proxy_auth_hdr in headers:
1135                 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1136                 # Proxy-Authorization should not be sent to origin
1137                 # server.
1138                 del headers[proxy_auth_hdr]
1139             h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1140
1141         try:
1142             h.request(req.get_method(), req.get_selector(), req.data, headers)
1143             try:
1144                 r = h.getresponse(buffering=True)
1145             except TypeError: #buffering kw not supported
1146                 r = h.getresponse()
1147         except socket.error, err: # XXX what error?
1148             raise URLError(err)
1149
1150         # Pick apart the HTTPResponse object to get the addinfourl
1151         # object initialized properly.
1152
1153         # Wrap the HTTPResponse object in socket's file object adapter
1154         # for Windows.  That adapter calls recv(), so delegate recv()
1155         # to read().  This weird wrapping allows the returned object to
1156         # have readline() and readlines() methods.
1157
1158         # XXX It might be better to extract the read buffering code
1159         # out of socket._fileobject() and into a base class.
1160
1161         r.recv = r.read
1162         fp = socket._fileobject(r, close=True)
1163
1164         resp = addinfourl(fp, r.msg, req.get_full_url())
1165         resp.code = r.status
1166         resp.msg = r.reason
1167         return resp
1168
1169
1170 class HTTPHandler(AbstractHTTPHandler):
1171
1172     def http_open(self, req):
1173         return self.do_open(httplib.HTTPConnection, req)
1174
1175     http_request = AbstractHTTPHandler.do_request_
1176
1177 if hasattr(httplib, 'HTTPS'):
1178     class HTTPSHandler(AbstractHTTPHandler):
1179
1180         def https_open(self, req):
1181             return self.do_open(httplib.HTTPSConnection, req)
1182
1183         https_request = AbstractHTTPHandler.do_request_
1184
1185 class HTTPCookieProcessor(BaseHandler):
1186     def __init__(self, cookiejar=None):
1187         import cookielib
1188         if cookiejar is None:
1189             cookiejar = cookielib.CookieJar()
1190         self.cookiejar = cookiejar
1191
1192     def http_request(self, request):
1193         self.cookiejar.add_cookie_header(request)
1194         return request
1195
1196     def http_response(self, request, response):
1197         self.cookiejar.extract_cookies(response, request)
1198         return response
1199
1200     https_request = http_request
1201     https_response = http_response
1202
1203 class UnknownHandler(BaseHandler):
1204     def unknown_open(self, req):
1205         type = req.get_type()
1206         raise URLError('unknown url type: %s' % type)
1207
1208 def parse_keqv_list(l):
1209     """Parse list of key=value strings where keys are not duplicated."""
1210     parsed = {}
1211     for elt in l:
1212         k, v = elt.split('=', 1)
1213         if v[0] == '"' and v[-1] == '"':
1214             v = v[1:-1]
1215         parsed[k] = v
1216     return parsed
1217
1218 def parse_http_list(s):
1219     """Parse lists as described by RFC 2068 Section 2.
1220
1221     In particular, parse comma-separated lists where the elements of
1222     the list may include quoted-strings.  A quoted-string could
1223     contain a comma.  A non-quoted string could have quotes in the
1224     middle.  Neither commas nor quotes count if they are escaped.
1225     Only double-quotes count, not single-quotes.
1226     """
1227     res = []
1228     part = ''
1229
1230     escape = quote = False
1231     for cur in s:
1232         if escape:
1233             part += cur
1234             escape = False
1235             continue
1236         if quote:
1237             if cur == '\\':
1238                 escape = True
1239                 continue
1240             elif cur == '"':
1241                 quote = False
1242             part += cur
1243             continue
1244
1245         if cur == ',':
1246             res.append(part)
1247             part = ''
1248             continue
1249
1250         if cur == '"':
1251             quote = True
1252
1253         part += cur
1254
1255     # append last part
1256     if part:
1257         res.append(part)
1258
1259     return [part.strip() for part in res]
1260
1261 class FileHandler(BaseHandler):
1262     # Use local file or FTP depending on form of URL
1263     def file_open(self, req):
1264         url = req.get_selector()
1265         if url[:2] == '//' and url[2:3] != '/':
1266             req.type = 'ftp'
1267             return self.parent.open(req)
1268         else:
1269             return self.open_local_file(req)
1270
1271     # names for the localhost
1272     names = None
1273     def get_names(self):
1274         if FileHandler.names is None:
1275             try:
1276                 FileHandler.names = tuple(
1277                     socket.gethostbyname_ex('localhost')[2] +
1278                     socket.gethostbyname_ex(socket.gethostname())[2])
1279             except socket.gaierror:
1280                 FileHandler.names = (socket.gethostbyname('localhost'),)
1281         return FileHandler.names
1282
1283     # not entirely sure what the rules are here
1284     def open_local_file(self, req):
1285         import email.utils
1286         import mimetypes
1287         host = req.get_host()
1288         filename = req.get_selector()
1289         localfile = url2pathname(filename)
1290         try:
1291             stats = os.stat(localfile)
1292             size = stats.st_size
1293             modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1294             mtype = mimetypes.guess_type(filename)[0]
1295             headers = mimetools.Message(StringIO(
1296                 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1297                 (mtype or 'text/plain', size, modified)))
1298             if host:
1299                 host, port = splitport(host)
1300             if not host or \
1301                 (not port and socket.gethostbyname(host) in self.get_names()):
1302                 if host:
1303                     origurl = 'file://' + host + filename
1304                 else:
1305                     origurl = 'file://' + filename
1306                 return addinfourl(open(localfile, 'rb'), headers, origurl)
1307         except OSError, msg:
1308             # urllib2 users shouldn't expect OSErrors coming from urlopen()
1309             raise URLError(msg)
1310         raise URLError('file not on local host')
1311
1312 class FTPHandler(BaseHandler):
1313     def ftp_open(self, req):
1314         import ftplib
1315         import mimetypes
1316         host = req.get_host()
1317         if not host:
1318             raise URLError('ftp error: no host given')
1319         host, port = splitport(host)
1320         if port is None:
1321             port = ftplib.FTP_PORT
1322         else:
1323             port = int(port)
1324
1325         # username/password handling
1326         user, host = splituser(host)
1327         if user:
1328             user, passwd = splitpasswd(user)
1329         else:
1330             passwd = None
1331         host = unquote(host)
1332         user = unquote(user or '')
1333         passwd = unquote(passwd or '')
1334
1335         try:
1336             host = socket.gethostbyname(host)
1337         except socket.error, msg:
1338             raise URLError(msg)
1339         path, attrs = splitattr(req.get_selector())
1340         dirs = path.split('/')
1341         dirs = map(unquote, dirs)
1342         dirs, file = dirs[:-1], dirs[-1]
1343         if dirs and not dirs[0]:
1344             dirs = dirs[1:]
1345         try:
1346             fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1347             type = file and 'I' or 'D'
1348             for attr in attrs:
1349                 attr, value = splitvalue(attr)
1350                 if attr.lower() == 'type' and \
1351                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
1352                     type = value.upper()
1353             fp, retrlen = fw.retrfile(file, type)
1354             headers = ""
1355             mtype = mimetypes.guess_type(req.get_full_url())[0]
1356             if mtype:
1357                 headers += "Content-type: %s\n" % mtype
1358             if retrlen is not None and retrlen >= 0:
1359                 headers += "Content-length: %d\n" % retrlen
1360             sf = StringIO(headers)
1361             headers = mimetools.Message(sf)
1362             return addinfourl(fp, headers, req.get_full_url())
1363         except ftplib.all_errors, msg:
1364             raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2]
1365
1366     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1367         fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
1368 ##        fw.ftp.set_debuglevel(1)
1369         return fw
1370
1371 class CacheFTPHandler(FTPHandler):
1372     # XXX would be nice to have pluggable cache strategies
1373     # XXX this stuff is definitely not thread safe
1374     def __init__(self):
1375         self.cache = {}
1376         self.timeout = {}
1377         self.soonest = 0
1378         self.delay = 60
1379         self.max_conns = 16
1380
1381     def setTimeout(self, t):
1382         self.delay = t
1383
1384     def setMaxConns(self, m):
1385         self.max_conns = m
1386
1387     def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1388         key = user, host, port, '/'.join(dirs), timeout
1389         if key in self.cache:
1390             self.timeout[key] = time.time() + self.delay
1391         else:
1392             self.cache[key] = ftpwrapper(user, passwd, host, port, dirs, timeout)
1393             self.timeout[key] = time.time() + self.delay
1394         self.check_cache()
1395         return self.cache[key]
1396
1397     def check_cache(self):
1398         # first check for old ones
1399         t = time.time()
1400         if self.soonest <= t:
1401             for k, v in self.timeout.items():
1402                 if v < t:
1403                     self.cache[k].close()
1404                     del self.cache[k]
1405                     del self.timeout[k]
1406         self.soonest = min(self.timeout.values())
1407
1408         # then check the size
1409         if len(self.cache) == self.max_conns:
1410             for k, v in self.timeout.items():
1411                 if v == self.soonest:
1412                     del self.cache[k]
1413                     del self.timeout[k]
1414                     break
1415             self.soonest = min(self.timeout.values())