Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30 from urlparse import urljoin as basejoin
  31
  32 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  33            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  34            "urlencode", "url2pathname", "pathname2url", "splittag",
  35            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  36            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  37            "splitnport", "splitquery", "splitattr", "splitvalue",
  38            "splitgophertype", "getproxies"]
  39
  40 __version__ = '1.17'    # XXX This version is not always updated :-(
  41
  42 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  43
  44 # Helper for non-unix systems
  45 if os.name == 'mac':
  46     from macurl2path import url2pathname, pathname2url
  47 elif os.name == 'nt':
  48     from nturl2path import url2pathname, pathname2url
  49 elif os.name == 'riscos':
  50     from rourl2path import url2pathname, pathname2url
  51 else:
  52     def url2pathname(pathname):
  53         """OS-specific conversion from a relative URL of the 'file' scheme
  54         to a file system path; not recommended for general use."""
  55         return unquote(pathname)
  56
  57     def pathname2url(pathname):
  58         """OS-specific conversion from a file system path to a relative URL
  59         of the 'file' scheme; not recommended for general use."""
  60         return quote(pathname)
  61
  62 # This really consists of two pieces:
  63 # (1) a class which handles opening of all sorts of URLs
  64 #     (plus assorted utilities etc.)
  65 # (2) a set of functions for parsing URLs
  66 # XXX Should these be separated out into different modules?
  67
  68
  69 # Shortcut for basic usage
  70 _urlopener = None
  71 def urlopen(url, data=None, proxies=None):
  72     """urlopen(url [, data]) -> open file-like object"""
  73     global _urlopener
  74     if proxies is not None:
  75         opener = FancyURLopener(proxies=proxies)
  76     elif not _urlopener:
  77         opener = FancyURLopener()
  78         _urlopener = opener
  79     else:
  80         opener = _urlopener
  81     if data is None:
  82         return opener.open(url)
  83     else:
  84         return opener.open(url, data)
  85 def urlretrieve(url, filename=None, reporthook=None, data=None):
  86     global _urlopener
  87     if not _urlopener:
  88         _urlopener = FancyURLopener()
  89     return _urlopener.retrieve(url, filename, reporthook, data)
  90 def urlcleanup():
  91     if _urlopener:
  92         _urlopener.cleanup()
  93
  94 # exception raised when downloaded size does not match content-length
  95 class ContentTooShortError(IOError):
  96     def __init__(self, message, content):
  97         IOError.__init__(self, message)
  98         self.content = content
  99
 100 ftpcache = {}
 101 class URLopener:
 102     """Class to open URLs.
 103     This is a class rather than just a subroutine because we may need
 104     more than one set of global protocol-specific options.
 105     Note -- this is a base class for those who don't want the
 106     automatic handling of errors type 302 (relocated) and 401
 107     (authorization needed)."""
 108
 109     __tempfiles = None
 110
 111     version = "Python-urllib/%s" % __version__
 112
 113     # Constructor
 114     def __init__(self, proxies=None, **x509):
 115         if proxies is None:
 116             proxies = getproxies()
 117         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 118         self.proxies = proxies
 119         self.key_file = x509.get('key_file')
 120         self.cert_file = x509.get('cert_file')
 121         self.addheaders = [('User-Agent', self.version)]
 122         self.__tempfiles = []
 123         self.__unlink = os.unlink # See cleanup()
 124         self.tempcache = None
 125         # Undocumented feature: if you assign {} to tempcache,
 126         # it is used to cache files retrieved with
 127         # self.retrieve().  This is not enabled by default
 128         # since it does not work for changing documents (and I
 129         # haven't got the logic to check expiration headers
 130         # yet).
 131         self.ftpcache = ftpcache
 132         # Undocumented feature: you can use a different
 133         # ftp cache by assigning to the .ftpcache member;
 134         # in case you want logically independent URL openers
 135         # XXX This is not threadsafe.  Bah.
 136
 137     def __del__(self):
 138         self.close()
 139
 140     def close(self):
 141         self.cleanup()
 142
 143     def cleanup(self):
 144         # This code sometimes runs when the rest of this module
 145         # has already been deleted, so it can't use any globals
 146         # or import anything.
 147         if self.__tempfiles:
 148             for file in self.__tempfiles:
 149                 try:
 150                     self.__unlink(file)
 151                 except OSError:
 152                     pass
 153             del self.__tempfiles[:]
 154         if self.tempcache:
 155             self.tempcache.clear()
 156
 157     def addheader(self, *args):
 158         """Add a header to be used by the HTTP interface only
 159         e.g. u.addheader('Accept', 'sound/basic')"""
 160         self.addheaders.append(args)
 161
 162     # External interface
 163     def open(self, fullurl, data=None):
 164         """Use URLopener().open(file) instead of open(file, 'r')."""
 165         fullurl = unwrap(toBytes(fullurl))
 166         if self.tempcache and fullurl in self.tempcache:
 167             filename, headers = self.tempcache[fullurl]
 168             fp = open(filename, 'rb')
 169             return addinfourl(fp, headers, fullurl)
 170         urltype, url = splittype(fullurl)
 171         if not urltype:
 172             urltype = 'file'
 173         if urltype in self.proxies:
 174             proxy = self.proxies[urltype]
 175             urltype, proxyhost = splittype(proxy)
 176             host, selector = splithost(proxyhost)
 177             url = (host, fullurl) # Signal special case to open_*()
 178         else:
 179             proxy = None
 180         name = 'open_' + urltype
 181         self.type = urltype
 182         name = name.replace('-', '_')
 183         if not hasattr(self, name):
 184             if proxy:
 185                 return self.open_unknown_proxy(proxy, fullurl, data)
 186             else:
 187                 return self.open_unknown(fullurl, data)
 188         try:
 189             if data is None:
 190                 return getattr(self, name)(url)
 191             else:
 192                 return getattr(self, name)(url, data)
 193         except socket.error, msg:
 194             raise IOError, ('socket error', msg), sys.exc_info()[2]
 195
 196     def open_unknown(self, fullurl, data=None):
 197         """Overridable interface to open unknown URL type."""
 198         type, url = splittype(fullurl)
 199         raise IOError, ('url error', 'unknown url type', type)
 200
 201     def open_unknown_proxy(self, proxy, fullurl, data=None):
 202         """Overridable interface to open unknown URL type."""
 203         type, url = splittype(fullurl)
 204         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 205
 206     # External interface
 207     def retrieve(self, url, filename=None, reporthook=None, data=None):
 208         """retrieve(url) returns (filename, headers) for a local object
 209         or (tempfilename, headers) for a remote object."""
 210         url = unwrap(toBytes(url))
 211         if self.tempcache and url in self.tempcache:
 212             return self.tempcache[url]
 213         type, url1 = splittype(url)
 214         if filename is None and (not type or type == 'file'):
 215             try:
 216                 fp = self.open_local_file(url1)
 217                 hdrs = fp.info()
 218                 del fp
 219                 return url2pathname(splithost(url1)[1]), hdrs
 220             except IOError, msg:
 221                 pass
 222         fp = self.open(url, data)
 223         headers = fp.info()
 224         if filename:
 225             tfp = open(filename, 'wb')
 226         else:
 227             import tempfile
 228             garbage, path = splittype(url)
 229             garbage, path = splithost(path or "")
 230             path, garbage = splitquery(path or "")
 231             path, garbage = splitattr(path or "")
 232             suffix = os.path.splitext(path)[1]
 233             (fd, filename) = tempfile.mkstemp(suffix)
 234             self.__tempfiles.append(filename)
 235             tfp = os.fdopen(fd, 'wb')
 236         result = filename, headers
 237         if self.tempcache is not None:
 238             self.tempcache[url] = result
 239         bs = 1024*8
 240         size = -1
 241         read = 0
 242         blocknum = 0
 243         if reporthook:
 244             if "content-length" in headers:
 245                 size = int(headers["Content-Length"])
 246             reporthook(blocknum, bs, size)
 247         while 1:
 248             block = fp.read(bs)
 249             if block == "":
 250                 break
 251             read += len(block)
 252             tfp.write(block)
 253             blocknum += 1
 254             if reporthook:
 255                 reporthook(blocknum, bs, size)
 256         fp.close()
 257         tfp.close()
 258         del fp
 259         del tfp
 260
 261         # raise exception if actual size does not match content-length header
 262         if size >= 0 and read < size:
 263             raise ContentTooShortError("retrieval incomplete: got only %i out "
 264                                        "of %i bytes" % (read, size), result)
 265
 266         return result
 267
 268     # Each method named open_<type> knows how to open that type of URL
 269
 270     def open_http(self, url, data=None):
 271         """Use HTTP protocol."""
 272         import httplib
 273         user_passwd = None
 274         proxy_passwd= None
 275         if isinstance(url, str):
 276             host, selector = splithost(url)
 277             if host:
 278                 user_passwd, host = splituser(host)
 279                 host = unquote(host)
 280             realhost = host
 281         else:
 282             host, selector = url
 283             # check whether the proxy contains authorization information
 284             proxy_passwd, host = splituser(host)
 285             # now we proceed with the url we want to obtain
 286             urltype, rest = splittype(selector)
 287             url = rest
 288             user_passwd = None
 289             if urltype.lower() != 'http':
 290                 realhost = None
 291             else:
 292                 realhost, rest = splithost(rest)
 293                 if realhost:
 294                     user_passwd, realhost = splituser(realhost)
 295                 if user_passwd:
 296                     selector = "%s://%s%s" % (urltype, realhost, rest)
 297                 if proxy_bypass(realhost):
 298                     host = realhost
 299
 300             #print "proxy via http:", host, selector
 301         if not host: raise IOError, ('http error', 'no host given')
 302
 303         if proxy_passwd:
 304             import base64
 305             proxy_auth = base64.b64encode(proxy_passwd).strip()
 306         else:
 307             proxy_auth = None
 308
 309         if user_passwd:
 310             import base64
 311             auth = base64.b64encode(user_passwd).strip()
 312         else:
 313             auth = None
 314         h = httplib.HTTP(host)
 315         if data is not None:
 316             h.putrequest('POST', selector)
 317             h.putheader('Content-Type', 'application/x-www-form-urlencoded')
 318             h.putheader('Content-Length', '%d' % len(data))
 319         else:
 320             h.putrequest('GET', selector)
 321         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 322         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 323         if realhost: h.putheader('Host', realhost)
 324         for args in self.addheaders: h.putheader(*args)
 325         h.endheaders()
 326         if data is not None:
 327             h.send(data)
 328         errcode, errmsg, headers = h.getreply()
 329         fp = h.getfile()
 330         if errcode == -1:
 331             if fp: fp.close()
 332             # something went wrong with the HTTP status line
 333             raise IOError, ('http protocol error', 0,
 334                             'got a bad status line', None)
 335         if errcode == 200:
 336             return addinfourl(fp, headers, "http:" + url)
 337         else:
 338             if data is None:
 339                 return self.http_error(url, fp, errcode, errmsg, headers)
 340             else:
 341                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 342
 343     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 344         """Handle http errors.
 345         Derived class can override this, or provide specific handlers
 346         named http_error_DDD where DDD is the 3-digit error code."""
 347         # First check if there's a specific handler for this error
 348         name = 'http_error_%d' % errcode
 349         if hasattr(self, name):
 350             method = getattr(self, name)
 351             if data is None:
 352                 result = method(url, fp, errcode, errmsg, headers)
 353             else:
 354                 result = method(url, fp, errcode, errmsg, headers, data)
 355             if result: return result
 356         return self.http_error_default(url, fp, errcode, errmsg, headers)
 357
 358     def http_error_default(self, url, fp, errcode, errmsg, headers):
 359         """Default error handler: close the connection and raise IOError."""
 360         void = fp.read()
 361         fp.close()
 362         raise IOError, ('http error', errcode, errmsg, headers)
 363
 364     if hasattr(socket, "ssl"):
 365         def open_https(self, url, data=None):
 366             """Use HTTPS protocol."""
 367             import httplib
 368             user_passwd = None
 369             proxy_passwd = None
 370             if isinstance(url, str):
 371                 host, selector = splithost(url)
 372                 if host:
 373                     user_passwd, host = splituser(host)
 374                     host = unquote(host)
 375                 realhost = host
 376             else:
 377                 host, selector = url
 378                 # here, we determine, whether the proxy contains authorization information
 379                 proxy_passwd, host = splituser(host)
 380                 urltype, rest = splittype(selector)
 381                 url = rest
 382                 user_passwd = None
 383                 if urltype.lower() != 'https':
 384                     realhost = None
 385                 else:
 386                     realhost, rest = splithost(rest)
 387                     if realhost:
 388                         user_passwd, realhost = splituser(realhost)
 389                     if user_passwd:
 390                         selector = "%s://%s%s" % (urltype, realhost, rest)
 391                 #print "proxy via https:", host, selector
 392             if not host: raise IOError, ('https error', 'no host given')
 393             if proxy_passwd:
 394                 import base64
 395                 proxy_auth = base64.b64encode(proxy_passwd).strip()
 396             else:
 397                 proxy_auth = None
 398             if user_passwd:
 399                 import base64
 400                 auth = base64.b64encode(user_passwd).strip()
 401             else:
 402                 auth = None
 403             h = httplib.HTTPS(host, 0,
 404                               key_file=self.key_file,
 405                               cert_file=self.cert_file)
 406             if data is not None:
 407                 h.putrequest('POST', selector)
 408                 h.putheader('Content-Type',
 409                             'application/x-www-form-urlencoded')
 410                 h.putheader('Content-Length', '%d' % len(data))
 411             else:
 412                 h.putrequest('GET', selector)
 413             if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 414             if auth: h.putheader('Authorization', 'Basic %s' % auth)
 415             if realhost: h.putheader('Host', realhost)
 416             for args in self.addheaders: h.putheader(*args)
 417             h.endheaders()
 418             if data is not None:
 419                 h.send(data)
 420             errcode, errmsg, headers = h.getreply()
 421             fp = h.getfile()
 422             if errcode == -1:
 423                 if fp: fp.close()
 424                 # something went wrong with the HTTP status line
 425                 raise IOError, ('http protocol error', 0,
 426                                 'got a bad status line', None)
 427             if errcode == 200:
 428                 return addinfourl(fp, headers, "https:" + url)
 429             else:
 430                 if data is None:
 431                     return self.http_error(url, fp, errcode, errmsg, headers)
 432                 else:
 433                     return self.http_error(url, fp, errcode, errmsg, headers,
 434                                            data)
 435
 436     def open_gopher(self, url):
 437         """Use Gopher protocol."""
 438         if not isinstance(url, str):
 439             raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
 440         import gopherlib
 441         host, selector = splithost(url)
 442         if not host: raise IOError, ('gopher error', 'no host given')
 443         host = unquote(host)
 444         type, selector = splitgophertype(selector)
 445         selector, query = splitquery(selector)
 446         selector = unquote(selector)
 447         if query:
 448             query = unquote(query)
 449             fp = gopherlib.send_query(selector, query, host)
 450         else:
 451             fp = gopherlib.send_selector(selector, host)
 452         return addinfourl(fp, noheaders(), "gopher:" + url)
 453
 454     def open_file(self, url):
 455         """Use local file or FTP depending on form of URL."""
 456         if not isinstance(url, str):
 457             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
 458         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 459             return self.open_ftp(url)
 460         else:
 461             return self.open_local_file(url)
 462
 463     def open_local_file(self, url):
 464         """Use local file."""
 465         import mimetypes, mimetools, email.utils
 466         try:
 467             from cStringIO import StringIO
 468         except ImportError:
 469             from StringIO import StringIO
 470         host, file = splithost(url)
 471         localname = url2pathname(file)
 472         try:
 473             stats = os.stat(localname)
 474         except OSError, e:
 475             raise IOError(e.errno, e.strerror, e.filename)
 476         size = stats.st_size
 477         modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
 478         mtype = mimetypes.guess_type(url)[0]
 479         headers = mimetools.Message(StringIO(
 480             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 481             (mtype or 'text/plain', size, modified)))
 482         if not host:
 483             urlfile = file
 484             if file[:1] == '/':
 485                 urlfile = 'file://' + file
 486             return addinfourl(open(localname, 'rb'),
 487                               headers, urlfile)
 488         host, port = splitport(host)
 489         if not port \
 490            and socket.gethostbyname(host) in (localhost(), thishost()):
 491             urlfile = file
 492             if file[:1] == '/':
 493                 urlfile = 'file://' + file
 494             return addinfourl(open(localname, 'rb'),
 495                               headers, urlfile)
 496         raise IOError, ('local file error', 'not on local host')
 497
 498     def open_ftp(self, url):
 499         """Use FTP protocol."""
 500         if not isinstance(url, str):
 501             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
 502         import mimetypes, mimetools
 503         try:
 504             from cStringIO import StringIO
 505         except ImportError:
 506             from StringIO import StringIO
 507         host, path = splithost(url)
 508         if not host: raise IOError, ('ftp error', 'no host given')
 509         host, port = splitport(host)
 510         user, host = splituser(host)
 511         if user: user, passwd = splitpasswd(user)
 512         else: passwd = None
 513         host = unquote(host)
 514         user = unquote(user or '')
 515         passwd = unquote(passwd or '')
 516         host = socket.gethostbyname(host)
 517         if not port:
 518             import ftplib
 519             port = ftplib.FTP_PORT
 520         else:
 521             port = int(port)
 522         path, attrs = splitattr(path)
 523         path = unquote(path)
 524         dirs = path.split('/')
 525         dirs, file = dirs[:-1], dirs[-1]
 526         if dirs and not dirs[0]: dirs = dirs[1:]
 527         if dirs and not dirs[0]: dirs[0] = '/'
 528         key = user, host, port, '/'.join(dirs)
 529         # XXX thread unsafe!
 530         if len(self.ftpcache) > MAXFTPCACHE:
 531             # Prune the cache, rather arbitrarily
 532             for k in self.ftpcache.keys():
 533                 if k != key:
 534                     v = self.ftpcache[k]
 535                     del self.ftpcache[k]
 536                     v.close()
 537         try:
 538             if not key in self.ftpcache:
 539                 self.ftpcache[key] = \
 540                     ftpwrapper(user, passwd, host, port, dirs)
 541             if not file: type = 'D'
 542             else: type = 'I'
 543             for attr in attrs:
 544                 attr, value = splitvalue(attr)
 545                 if attr.lower() == 'type' and \
 546                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 547                     type = value.upper()
 548             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 549             mtype = mimetypes.guess_type("ftp:" + url)[0]
 550             headers = ""
 551             if mtype:
 552                 headers += "Content-Type: %s\n" % mtype
 553             if retrlen is not None and retrlen >= 0:
 554                 headers += "Content-Length: %d\n" % retrlen
 555             headers = mimetools.Message(StringIO(headers))
 556             return addinfourl(fp, headers, "ftp:" + url)
 557         except ftperrors(), msg:
 558             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 559
 560     def open_data(self, url, data=None):
 561         """Use "data" URL."""
 562         if not isinstance(url, str):
 563             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
 564         # ignore POSTed data
 565         #
 566         # syntax of data URLs:
 567         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 568         # mediatype := [ type "/" subtype ] *( ";" parameter )
 569         # data      := *urlchar
 570         # parameter := attribute "=" value
 571         import mimetools
 572         try:
 573             from cStringIO import StringIO
 574         except ImportError:
 575             from StringIO import StringIO
 576         try:
 577             [type, data] = url.split(',', 1)
 578         except ValueError:
 579             raise IOError, ('data error', 'bad data URL')
 580         if not type:
 581             type = 'text/plain;charset=US-ASCII'
 582         semi = type.rfind(';')
 583         if semi >= 0 and '=' not in type[semi:]:
 584             encoding = type[semi+1:]
 585             type = type[:semi]
 586         else:
 587             encoding = ''
 588         msg = []
 589         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 590                                             time.gmtime(time.time())))
 591         msg.append('Content-type: %s' % type)
 592         if encoding == 'base64':
 593             import base64
 594             data = base64.decodestring(data)
 595         else:
 596             data = unquote(data)
 597         msg.append('Content-Length: %d' % len(data))
 598         msg.append('')
 599         msg.append(data)
 600         msg = '\n'.join(msg)
 601         f = StringIO(msg)
 602         headers = mimetools.Message(f, 0)
 603         #f.fileno = None     # needed for addinfourl
 604         return addinfourl(f, headers, url)
 605
 606
 607 class FancyURLopener(URLopener):
 608     """Derived class with handlers for errors we can handle (perhaps)."""
 609
 610     def __init__(self, *args, **kwargs):
 611         URLopener.__init__(self, *args, **kwargs)
 612         self.auth_cache = {}
 613         self.tries = 0
 614         self.maxtries = 10
 615
 616     def http_error_default(self, url, fp, errcode, errmsg, headers):
 617         """Default error handling -- don't raise an exception."""
 618         return addinfourl(fp, headers, "http:" + url)
 619
 620     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 621         """Error 302 -- relocated (temporarily)."""
 622         self.tries += 1
 623         if self.maxtries and self.tries >= self.maxtries:
 624             if hasattr(self, "http_error_500"):
 625                 meth = self.http_error_500
 626             else:
 627                 meth = self.http_error_default
 628             self.tries = 0
 629             return meth(url, fp, 500,
 630                         "Internal Server Error: Redirect Recursion", headers)
 631         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 632                                         data)
 633         self.tries = 0
 634         return result
 635
 636     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 637         if 'location' in headers:
 638             newurl = headers['location']
 639         elif 'uri' in headers:
 640             newurl = headers['uri']
 641         else:
 642             return
 643         void = fp.read()
 644         fp.close()
 645         # In case the server sent a relative URL, join with original:
 646         newurl = basejoin(self.type + ":" + url, newurl)
 647         return self.open(newurl)
 648
 649     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 650         """Error 301 -- also relocated (permanently)."""
 651         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 652
 653     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 654         """Error 303 -- also relocated (essentially identical to 302)."""
 655         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 656
 657     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
 658         """Error 307 -- relocated, but turn POST into error."""
 659         if data is None:
 660             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 661         else:
 662             return self.http_error_default(url, fp, errcode, errmsg, headers)
 663
 664     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 665         """Error 401 -- authentication required.
 666         This function supports Basic authentication only."""
 667         if not 'www-authenticate' in headers:
 668             URLopener.http_error_default(self, url, fp,
 669                                          errcode, errmsg, headers)
 670         stuff = headers['www-authenticate']
 671         import re
 672         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 673         if not match:
 674             URLopener.http_error_default(self, url, fp,
 675                                          errcode, errmsg, headers)
 676         scheme, realm = match.groups()
 677         if scheme.lower() != 'basic':
 678             URLopener.http_error_default(self, url, fp,
 679                                          errcode, errmsg, headers)
 680         name = 'retry_' + self.type + '_basic_auth'
 681         if data is None:
 682             return getattr(self,name)(url, realm)
 683         else:
 684             return getattr(self,name)(url, realm, data)
 685
 686     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
 687         """Error 407 -- proxy authentication required.
 688         This function supports Basic authentication only."""
 689         if not 'proxy-authenticate' in headers:
 690             URLopener.http_error_default(self, url, fp,
 691                                          errcode, errmsg, headers)
 692         stuff = headers['proxy-authenticate']
 693         import re
 694         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 695         if not match:
 696             URLopener.http_error_default(self, url, fp,
 697                                          errcode, errmsg, headers)
 698         scheme, realm = match.groups()
 699         if scheme.lower() != 'basic':
 700             URLopener.http_error_default(self, url, fp,
 701                                          errcode, errmsg, headers)
 702         name = 'retry_proxy_' + self.type + '_basic_auth'
 703         if data is None:
 704             return getattr(self,name)(url, realm)
 705         else:
 706             return getattr(self,name)(url, realm, data)
 707
 708     def retry_proxy_http_basic_auth(self, url, realm, data=None):
 709         host, selector = splithost(url)
 710         newurl = 'http://' + host + selector
 711         proxy = self.proxies['http']
 712         urltype, proxyhost = splittype(proxy)
 713         proxyhost, proxyselector = splithost(proxyhost)
 714         i = proxyhost.find('@') + 1
 715         proxyhost = proxyhost[i:]
 716         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 717         if not (user or passwd): return None
 718         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 719         self.proxies['http'] = 'http://' + proxyhost + proxyselector
 720         if data is None:
 721             return self.open(newurl)
 722         else:
 723             return self.open(newurl, data)
 724
 725     def retry_proxy_https_basic_auth(self, url, realm, data=None):
 726         host, selector = splithost(url)
 727         newurl = 'https://' + host + selector
 728         proxy = self.proxies['https']
 729         urltype, proxyhost = splittype(proxy)
 730         proxyhost, proxyselector = splithost(proxyhost)
 731         i = proxyhost.find('@') + 1
 732         proxyhost = proxyhost[i:]
 733         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 734         if not (user or passwd): return None
 735         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 736         self.proxies['https'] = 'https://' + proxyhost + proxyselector
 737         if data is None:
 738             return self.open(newurl)
 739         else:
 740             return self.open(newurl, data)
 741
 742     def retry_http_basic_auth(self, url, realm, data=None):
 743         host, selector = splithost(url)
 744         i = host.find('@') + 1
 745         host = host[i:]
 746         user, passwd = self.get_user_passwd(host, realm, i)
 747         if not (user or passwd): return None
 748         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 749         newurl = 'http://' + host + selector
 750         if data is None:
 751             return self.open(newurl)
 752         else:
 753             return self.open(newurl, data)
 754
 755     def retry_https_basic_auth(self, url, realm, data=None):
 756         host, selector = splithost(url)
 757         i = host.find('@') + 1
 758         host = host[i:]
 759         user, passwd = self.get_user_passwd(host, realm, i)
 760         if not (user or passwd): return None
 761         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 762         newurl = 'https://' + host + selector
 763         if data is None:
 764             return self.open(newurl)
 765         else:
 766             return self.open(newurl, data)
 767
 768     def get_user_passwd(self, host, realm, clear_cache = 0):
 769         key = realm + '@' + host.lower()
 770         if key in self.auth_cache:
 771             if clear_cache:
 772                 del self.auth_cache[key]
 773             else:
 774                 return self.auth_cache[key]
 775         user, passwd = self.prompt_user_passwd(host, realm)
 776         if user or passwd: self.auth_cache[key] = (user, passwd)
 777         return user, passwd
 778
 779     def prompt_user_passwd(self, host, realm):
 780         """Override this in a GUI environment!"""
 781         import getpass
 782         try:
 783             user = raw_input("Enter username for %s at %s: " % (realm,
 784                                                                 host))
 785             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 786                 (user, realm, host))
 787             return user, passwd
 788         except KeyboardInterrupt:
 789             print
 790             return None, None
 791
 792
 793 # Utility functions
 794
 795 _localhost = None
 796 def localhost():
 797     """Return the IP address of the magic hostname 'localhost'."""
 798     global _localhost
 799     if _localhost is None:
 800         _localhost = socket.gethostbyname('localhost')
 801     return _localhost
 802
 803 _thishost = None
 804 def thishost():
 805     """Return the IP address of the current host."""
 806     global _thishost
 807     if _thishost is None:
 808         _thishost = socket.gethostbyname(socket.gethostname())
 809     return _thishost
 810
 811 _ftperrors = None
 812 def ftperrors():
 813     """Return the set of errors raised by the FTP class."""
 814     global _ftperrors
 815     if _ftperrors is None:
 816         import ftplib
 817         _ftperrors = ftplib.all_errors
 818     return _ftperrors
 819
 820 _noheaders = None
 821 def noheaders():
 822     """Return an empty mimetools.Message object."""
 823     global _noheaders
 824     if _noheaders is None:
 825         import mimetools
 826         try:
 827             from cStringIO import StringIO
 828         except ImportError:
 829             from StringIO import StringIO
 830         _noheaders = mimetools.Message(StringIO(), 0)
 831         _noheaders.fp.close()   # Recycle file descriptor
 832     return _noheaders
 833
 834
 835 # Utility classes
 836
 837 class ftpwrapper:
 838     """Class used by open_ftp() for cache of open FTP connections."""
 839
 840     def __init__(self, user, passwd, host, port, dirs):
 841         self.user = user
 842         self.passwd = passwd
 843         self.host = host
 844         self.port = port
 845         self.dirs = dirs
 846         self.init()
 847
 848     def init(self):
 849         import ftplib
 850         self.busy = 0
 851         self.ftp = ftplib.FTP()
 852         self.ftp.connect(self.host, self.port)
 853         self.ftp.login(self.user, self.passwd)
 854         for dir in self.dirs:
 855             self.ftp.cwd(dir)
 856
 857     def retrfile(self, file, type):
 858         import ftplib
 859         self.endtransfer()
 860         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 861         else: cmd = 'TYPE ' + type; isdir = 0
 862         try:
 863             self.ftp.voidcmd(cmd)
 864         except ftplib.all_errors:
 865             self.init()
 866             self.ftp.voidcmd(cmd)
 867         conn = None
 868         if file and not isdir:
 869             # Try to retrieve as a file
 870             try:
 871                 cmd = 'RETR ' + file
 872                 conn = self.ftp.ntransfercmd(cmd)
 873             except ftplib.error_perm, reason:
 874                 if str(reason)[:3] != '550':
 875                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 876         if not conn:
 877             # Set transfer mode to ASCII!
 878             self.ftp.voidcmd('TYPE A')
 879             # Try a directory listing
 880             if file: cmd = 'LIST ' + file
 881             else: cmd = 'LIST'
 882             conn = self.ftp.ntransfercmd(cmd)
 883         self.busy = 1
 884         # Pass back both a suitably decorated object and a retrieval length
 885         return (addclosehook(conn[0].makefile('rb'),
 886                              self.endtransfer), conn[1])
 887     def endtransfer(self):
 888         if not self.busy:
 889             return
 890         self.busy = 0
 891         try:
 892             self.ftp.voidresp()
 893         except ftperrors():
 894             pass
 895
 896     def close(self):
 897         self.endtransfer()
 898         try:
 899             self.ftp.close()
 900         except ftperrors():
 901             pass
 902
 903 class addbase:
 904     """Base class for addinfo and addclosehook."""
 905
 906     def __init__(self, fp):
 907         self.fp = fp
 908         self.read = self.fp.read
 909         self.readline = self.fp.readline
 910         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 911         if hasattr(self.fp, "fileno"):
 912             self.fileno = self.fp.fileno
 913         else:
 914             self.fileno = lambda: None
 915         if hasattr(self.fp, "__iter__"):
 916             self.__iter__ = self.fp.__iter__
 917             if hasattr(self.fp, "next"):
 918                 self.next = self.fp.next
 919
 920     def __repr__(self):
 921         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
 922                                              id(self), self.fp)
 923
 924     def close(self):
 925         self.read = None
 926         self.readline = None
 927         self.readlines = None
 928         self.fileno = None
 929         if self.fp: self.fp.close()
 930         self.fp = None
 931
 932 class addclosehook(addbase):
 933     """Class to add a close hook to an open file."""
 934
 935     def __init__(self, fp, closehook, *hookargs):
 936         addbase.__init__(self, fp)
 937         self.closehook = closehook
 938         self.hookargs = hookargs
 939
 940     def close(self):
 941         addbase.close(self)
 942         if self.closehook:
 943             self.closehook(*self.hookargs)
 944             self.closehook = None
 945             self.hookargs = None
 946
 947 class addinfo(addbase):
 948     """class to add an info() method to an open file."""
 949
 950     def __init__(self, fp, headers):
 951         addbase.__init__(self, fp)
 952         self.headers = headers
 953
 954     def info(self):
 955         return self.headers
 956
 957 class addinfourl(addbase):
 958     """class to add info() and geturl() methods to an open file."""
 959
 960     def __init__(self, fp, headers, url):
 961         addbase.__init__(self, fp)
 962         self.headers = headers
 963         self.url = url
 964
 965     def info(self):
 966         return self.headers
 967
 968     def geturl(self):
 969         return self.url
 970
 971
 972 # Utilities to parse URLs (most of these return None for missing parts):
 973 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 974 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 975 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 976 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 977 # splitpasswd('user:passwd') -> 'user', 'passwd'
 978 # splitport('host:port') --> 'host', 'port'
 979 # splitquery('/path?query') --> '/path', 'query'
 980 # splittag('/path#tag') --> '/path', 'tag'
 981 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 982 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 983 # splitvalue('attr=value') --> 'attr', 'value'
 984 # splitgophertype('/Xselector') --> 'X', 'selector'
 985 # unquote('abc%20def') -> 'abc def'
 986 # quote('abc def') -> 'abc%20def')
 987
 988 try:
 989     unicode
 990 except NameError:
 991     def _is_unicode(x):
 992         return 0
 993 else:
 994     def _is_unicode(x):
 995         return isinstance(x, unicode)
 996
 997 def toBytes(url):
 998     """toBytes(u"URL") --> 'URL'."""
 999     # Most URL schemes require ASCII. If that changes, the conversion
1000     # can be relaxed
1001     if _is_unicode(url):
1002         try:
1003             url = url.encode("ASCII")
1004         except UnicodeError:
1005             raise UnicodeError("URL " + repr(url) +
1006                                " contains non-ASCII characters")
1007     return url
1008
1009 def unwrap(url):
1010     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1011     url = url.strip()
1012     if url[:1] == '<' and url[-1:] == '>':
1013         url = url[1:-1].strip()
1014     if url[:4] == 'URL:': url = url[4:].strip()
1015     return url
1016
1017 _typeprog = None
1018 def splittype(url):
1019     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1020     global _typeprog
1021     if _typeprog is None:
1022         import re
1023         _typeprog = re.compile('^([^/:]+):')
1024
1025     match = _typeprog.match(url)
1026     if match:
1027         scheme = match.group(1)
1028         return scheme.lower(), url[len(scheme) + 1:]
1029     return None, url
1030
1031 _hostprog = None
1032 def splithost(url):
1033     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1034     global _hostprog
1035     if _hostprog is None:
1036         import re
1037         _hostprog = re.compile('^//([^/?]*)(.*)$')
1038
1039     match = _hostprog.match(url)
1040     if match: return match.group(1, 2)
1041     return None, url
1042
1043 _userprog = None
1044 def splituser(host):
1045     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1046     global _userprog
1047     if _userprog is None:
1048         import re
1049         _userprog = re.compile('^(.*)@(.*)$')
1050
1051     match = _userprog.match(host)
1052     if match: return map(unquote, match.group(1, 2))
1053     return None, host
1054
1055 _passwdprog = None
1056 def splitpasswd(user):
1057     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1058     global _passwdprog
1059     if _passwdprog is None:
1060         import re
1061         _passwdprog = re.compile('^([^:]*):(.*)$')
1062
1063     match = _passwdprog.match(user)
1064     if match: return match.group(1, 2)
1065     return user, None
1066
1067 # splittag('/path#tag') --> '/path', 'tag'
1068 _portprog = None
1069 def splitport(host):
1070     """splitport('host:port') --> 'host', 'port'."""
1071     global _portprog
1072     if _portprog is None:
1073         import re
1074         _portprog = re.compile('^(.*):([0-9]+)$')
1075
1076     match = _portprog.match(host)
1077     if match: return match.group(1, 2)
1078     return host, None
1079
1080 _nportprog = None
1081 def splitnport(host, defport=-1):
1082     """Split host and port, returning numeric port.
1083     Return given default port if no ':' found; defaults to -1.
1084     Return numerical port if a valid number are found after ':'.
1085     Return None if ':' but not a valid number."""
1086     global _nportprog
1087     if _nportprog is None:
1088         import re
1089         _nportprog = re.compile('^(.*):(.*)$')
1090
1091     match = _nportprog.match(host)
1092     if match:
1093         host, port = match.group(1, 2)
1094         try:
1095             if not port: raise ValueError, "no digits"
1096             nport = int(port)
1097         except ValueError:
1098             nport = None
1099         return host, nport
1100     return host, defport
1101
1102 _queryprog = None
1103 def splitquery(url):
1104     """splitquery('/path?query') --> '/path', 'query'."""
1105     global _queryprog
1106     if _queryprog is None:
1107         import re
1108         _queryprog = re.compile('^(.*)\?([^?]*)$')
1109
1110     match = _queryprog.match(url)
1111     if match: return match.group(1, 2)
1112     return url, None
1113
1114 _tagprog = None
1115 def splittag(url):
1116     """splittag('/path#tag') --> '/path', 'tag'."""
1117     global _tagprog
1118     if _tagprog is None:
1119         import re
1120         _tagprog = re.compile('^(.*)#([^#]*)$')
1121
1122     match = _tagprog.match(url)
1123     if match: return match.group(1, 2)
1124     return url, None
1125
1126 def splitattr(url):
1127     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1128         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1129     words = url.split(';')
1130     return words[0], words[1:]
1131
1132 _valueprog = None
1133 def splitvalue(attr):
1134     """splitvalue('attr=value') --> 'attr', 'value'."""
1135     global _valueprog
1136     if _valueprog is None:
1137         import re
1138         _valueprog = re.compile('^([^=]*)=(.*)$')
1139
1140     match = _valueprog.match(attr)
1141     if match: return match.group(1, 2)
1142     return attr, None
1143
1144 def splitgophertype(selector):
1145     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1146     if selector[:1] == '/' and selector[1:2]:
1147         return selector[1], selector[2:]
1148     return None, selector
1149
1150 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1151 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1152
1153 def unquote(s):
1154     """unquote('abc%20def') -> 'abc def'."""
1155     res = s.split('%')
1156     for i in xrange(1, len(res)):
1157         item = res[i]
1158         try:
1159             res[i] = _hextochr[item[:2]] + item[2:]
1160         except KeyError:
1161             res[i] = '%' + item
1162         except UnicodeDecodeError:
1163             res[i] = unichr(int(item[:2], 16)) + item[2:]
1164     return "".join(res)
1165
1166 def unquote_plus(s):
1167     """unquote('%7e/abc+def') -> '~/abc def'"""
1168     s = s.replace('+', ' ')
1169     return unquote(s)
1170
1171 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1172                'abcdefghijklmnopqrstuvwxyz'
1173                '0123456789' '_.-')
1174 _safemaps = {}
1175
1176 def quote(s, safe = '/'):
1177     """quote('abc def') -> 'abc%20def'
1178
1179     Each part of a URL, e.g. the path info, the query, etc., has a
1180     different set of reserved characters that must be quoted.
1181
1182     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1183     the following reserved characters.
1184
1185     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1186                   "$" | ","
1187
1188     Each of these characters is reserved in some component of a URL,
1189     but not necessarily in all of them.
1190
1191     By default, the quote function is intended for quoting the path
1192     section of a URL.  Thus, it will not encode '/'.  This character
1193     is reserved, but in typical usage the quote function is being
1194     called on a path where the existing slash characters are used as
1195     reserved characters.
1196     """
1197     cachekey = (safe, always_safe)
1198     try:
1199         safe_map = _safemaps[cachekey]
1200     except KeyError:
1201         safe += always_safe
1202         safe_map = {}
1203         for i in range(256):
1204             c = chr(i)
1205             safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1206         _safemaps[cachekey] = safe_map
1207     res = map(safe_map.__getitem__, s)
1208     return ''.join(res)
1209
1210 def quote_plus(s, safe = ''):
1211     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1212     if ' ' in s:
1213         s = quote(s, safe + ' ')
1214         return s.replace(' ', '+')
1215     return quote(s, safe)
1216
1217 def urlencode(query,doseq=0):
1218     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1219
1220     If any values in the query arg are sequences and doseq is true, each
1221     sequence element is converted to a separate parameter.
1222
1223     If the query arg is a sequence of two-element tuples, the order of the
1224     parameters in the output will match the order of parameters in the
1225     input.
1226     """
1227
1228     if hasattr(query,"items"):
1229         # mapping objects
1230         query = query.items()
1231     else:
1232         # it's a bother at times that strings and string-like objects are
1233         # sequences...
1234         try:
1235             # non-sequence items should not work with len()
1236             # non-empty strings will fail this
1237             if len(query) and not isinstance(query[0], tuple):
1238                 raise TypeError
1239             # zero-length sequences of all types will get here and succeed,
1240             # but that's a minor nit - since the original implementation
1241             # allowed empty dicts that type of behavior probably should be
1242             # preserved for consistency
1243         except TypeError:
1244             ty,va,tb = sys.exc_info()
1245             raise TypeError, "not a valid non-string sequence or mapping object", tb
1246
1247     l = []
1248     if not doseq:
1249         # preserve old behavior
1250         for k, v in query:
1251             k = quote_plus(str(k))
1252             v = quote_plus(str(v))
1253             l.append(k + '=' + v)
1254     else:
1255         for k, v in query:
1256             k = quote_plus(str(k))
1257             if isinstance(v, str):
1258                 v = quote_plus(v)
1259                 l.append(k + '=' + v)
1260             elif _is_unicode(v):
1261                 # is there a reasonable way to convert to ASCII?
1262                 # encode generates a string, but "replace" or "ignore"
1263                 # lose information and "strict" can raise UnicodeError
1264                 v = quote_plus(v.encode("ASCII","replace"))
1265                 l.append(k + '=' + v)
1266             else:
1267                 try:
1268                     # is this a sufficient test for sequence-ness?
1269                     x = len(v)
1270                 except TypeError:
1271                     # not a sequence
1272                     v = quote_plus(str(v))
1273                     l.append(k + '=' + v)
1274                 else:
1275                     # loop over the sequence
1276                     for elt in v:
1277                         l.append(k + '=' + quote_plus(str(elt)))
1278     return '&'.join(l)
1279
1280 # Proxy handling
1281 def getproxies_environment():
1282     """Return a dictionary of scheme -> proxy server URL mappings.
1283
1284     Scan the environment for variables named <scheme>_proxy;
1285     this seems to be the standard convention.  If you need a
1286     different way, you can pass a proxies dictionary to the
1287     [Fancy]URLopener constructor.
1288
1289     """
1290     proxies = {}
1291     for name, value in os.environ.items():
1292         name = name.lower()
1293         if value and name[-6:] == '_proxy':
1294             proxies[name[:-6]] = value
1295     return proxies
1296
1297 if sys.platform == 'darwin':
1298     def getproxies_internetconfig():
1299         """Return a dictionary of scheme -> proxy server URL mappings.
1300
1301         By convention the mac uses Internet Config to store
1302         proxies.  An HTTP proxy, for instance, is stored under
1303         the HttpProxy key.
1304
1305         """
1306         try:
1307             import ic
1308         except ImportError:
1309             return {}
1310
1311         try:
1312             config = ic.IC()
1313         except ic.error:
1314             return {}
1315         proxies = {}
1316         # HTTP:
1317         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1318             try:
1319                 value = config['HTTPProxyHost']
1320             except ic.error:
1321                 pass
1322             else:
1323                 proxies['http'] = 'http://%s' % value
1324         # FTP: XXXX To be done.
1325         # Gopher: XXXX To be done.
1326         return proxies
1327
1328     def proxy_bypass(x):
1329         return 0
1330
1331     def getproxies():
1332         return getproxies_environment() or getproxies_internetconfig()
1333
1334 elif os.name == 'nt':
1335     def getproxies_registry():
1336         """Return a dictionary of scheme -> proxy server URL mappings.
1337
1338         Win32 uses the registry to store proxies.
1339
1340         """
1341         proxies = {}
1342         try:
1343             import _winreg
1344         except ImportError:
1345             # Std module, so should be around - but you never know!
1346             return proxies
1347         try:
1348             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1349                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1350             proxyEnable = _winreg.QueryValueEx(internetSettings,
1351                                                'ProxyEnable')[0]
1352             if proxyEnable:
1353                 # Returned as Unicode but problems if not converted to ASCII
1354                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1355                                                        'ProxyServer')[0])
1356                 if '=' in proxyServer:
1357                     # Per-protocol settings
1358                     for p in proxyServer.split(';'):
1359                         protocol, address = p.split('=', 1)
1360                         # See if address has a type:// prefix
1361                         import re
1362                         if not re.match('^([^/:]+)://', address):
1363                             address = '%s://%s' % (protocol, address)
1364                         proxies[protocol] = address
1365                 else:
1366                     # Use one setting for all protocols
1367                     if proxyServer[:5] == 'http:':
1368                         proxies['http'] = proxyServer
1369                     else:
1370                         proxies['http'] = 'http://%s' % proxyServer
1371                         proxies['ftp'] = 'ftp://%s' % proxyServer
1372             internetSettings.Close()
1373         except (WindowsError, ValueError, TypeError):
1374             # Either registry key not found etc, or the value in an
1375             # unexpected format.
1376             # proxies already set up to be empty so nothing to do
1377             pass
1378         return proxies
1379
1380     def getproxies():
1381         """Return a dictionary of scheme -> proxy server URL mappings.
1382
1383         Returns settings gathered from the environment, if specified,
1384         or the registry.
1385
1386         """
1387         return getproxies_environment() or getproxies_registry()
1388
1389     def proxy_bypass(host):
1390         try:
1391             import _winreg
1392             import re
1393         except ImportError:
1394             # Std modules, so should be around - but you never know!
1395             return 0
1396         try:
1397             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1398                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1399             proxyEnable = _winreg.QueryValueEx(internetSettings,
1400                                                'ProxyEnable')[0]
1401             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1402                                                      'ProxyOverride')[0])
1403             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1404         except WindowsError:
1405             return 0
1406         if not proxyEnable or not proxyOverride:
1407             return 0
1408         # try to make a host list from name and IP address.
1409         rawHost, port = splitport(host)
1410         host = [rawHost]
1411         try:
1412             addr = socket.gethostbyname(rawHost)
1413             if addr != rawHost:
1414                 host.append(addr)
1415         except socket.error:
1416             pass
1417         try:
1418             fqdn = socket.getfqdn(rawHost)
1419             if fqdn != rawHost:
1420                 host.append(fqdn)
1421         except socket.error:
1422             pass
1423         # make a check value list from the registry entry: replace the
1424         # '<local>' string by the localhost entry and the corresponding
1425         # canonical entry.
1426         proxyOverride = proxyOverride.split(';')
1427         i = 0
1428         while i < len(proxyOverride):
1429             if proxyOverride[i] == '<local>':
1430                 proxyOverride[i:i+1] = ['localhost',
1431                                         '127.0.0.1',
1432                                         socket.gethostname(),
1433                                         socket.gethostbyname(
1434                                             socket.gethostname())]
1435             i += 1
1436         # print proxyOverride
1437         # now check if we match one of the registry values.
1438         for test in proxyOverride:
1439             test = test.replace(".", r"\.")     # mask dots
1440             test = test.replace("*", r".*")     # change glob sequence
1441             test = test.replace("?", r".")      # change glob char
1442             for val in host:
1443                 # print "%s <--> %s" %( test, val )
1444                 if re.match(test, val, re.I):
1445                     return 1
1446         return 0
1447
1448 else:
1449     # By default use environment variables
1450     getproxies = getproxies_environment
1451
1452     def proxy_bypass(host):
1453         return 0
1454
1455 # Test and time quote() and unquote()
1456 def test1():
1457     s = ''
1458     for i in range(256): s = s + chr(i)
1459     s = s*4
1460     t0 = time.time()
1461     qs = quote(s)
1462     uqs = unquote(qs)
1463     t1 = time.time()
1464     if uqs != s:
1465         print 'Wrong!'
1466     print repr(s)
1467     print repr(qs)
1468     print repr(uqs)
1469     print round(t1 - t0, 3), 'sec'
1470
1471
1472 def reporthook(blocknum, blocksize, totalsize):
1473     # Report during remote transfers
1474     print "Block number: %d, Block size: %d, Total size: %d" % (
1475         blocknum, blocksize, totalsize)
1476
1477 # Test program
1478 def test(args=[]):
1479     if not args:
1480         args = [
1481             '/etc/passwd',
1482             'file:/etc/passwd',
1483             'file://localhost/etc/passwd',
1484             'ftp://ftp.gnu.org/pub/README',
1485 ##          'gopher://gopher.micro.umn.edu/1/',
1486             'http://www.python.org/index.html',
1487             ]
1488         if hasattr(URLopener, "open_https"):
1489             args.append('https://synergy.as.cmu.edu/~geek/')
1490     try:
1491         for url in args:
1492             print '-'*10, url, '-'*10
1493             fn, h = urlretrieve(url, None, reporthook)
1494             print fn
1495             if h:
1496                 print '======'
1497                 for k in h.keys(): print k + ':', h[k]
1498                 print '======'
1499             fp = open(fn, 'rb')
1500             data = fp.read()
1501             del fp
1502             if '\r' in data:
1503                 table = string.maketrans("", "")
1504                 data = data.translate(table, "\r")
1505             print data
1506             fn, h = None, None
1507         print '-'*40
1508     finally:
1509         urlcleanup()
1510
1511 def main():
1512     import getopt, sys
1513     try:
1514         opts, args = getopt.getopt(sys.argv[1:], "th")
1515     except getopt.error, msg:
1516         print msg
1517         print "Use -h for help"
1518         return
1519     t = 0
1520     for o, a in opts:
1521         if o == '-t':
1522             t = t + 1
1523         if o == '-h':
1524             print "Usage: python urllib.py [-t] [url ...]"
1525             print "-t runs self-test;",
1526             print "otherwise, contents of urls are printed"
1527             return
1528     if t:
1529         if t > 1:
1530             test1()
1531         test(args)
1532     else:
1533         if not args:
1534             print "Use -h for help"
1535         for url in args:
1536             print urlopen(url).read(),
1537
1538 # Run test program when run as a script
1539 if __name__ == '__main__':
1540     main()