Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30 from urlparse import urljoin as basejoin
  31
  32 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  33            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  34            "urlencode", "url2pathname", "pathname2url", "splittag",
  35            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  36            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  37            "splitnport", "splitquery", "splitattr", "splitvalue",
  38            "splitgophertype", "getproxies"]
  39
  40 __version__ = '1.17'    # XXX This version is not always updated :-(
  41
  42 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  43
  44 # Helper for non-unix systems
  45 if os.name == 'mac':
  46     from macurl2path import url2pathname, pathname2url
  47 elif os.name == 'nt':
  48     from nturl2path import url2pathname, pathname2url
  49 elif os.name == 'riscos':
  50     from rourl2path import url2pathname, pathname2url
  51 else:
  52     def url2pathname(pathname):
  53         """OS-specific conversion from a relative URL of the 'file' scheme
  54         to a file system path; not recommended for general use."""
  55         return unquote(pathname)
  56
  57     def pathname2url(pathname):
  58         """OS-specific conversion from a file system path to a relative URL
  59         of the 'file' scheme; not recommended for general use."""
  60         return quote(pathname)
  61
  62 # This really consists of two pieces:
  63 # (1) a class which handles opening of all sorts of URLs
  64 #     (plus assorted utilities etc.)
  65 # (2) a set of functions for parsing URLs
  66 # XXX Should these be separated out into different modules?
  67
  68
  69 # Shortcut for basic usage
  70 _urlopener = None
  71 def urlopen(url, data=None, proxies=None):
  72     """urlopen(url [, data]) -> open file-like object"""
  73     global _urlopener
  74     if proxies is not None:
  75         opener = FancyURLopener(proxies=proxies)
  76     elif not _urlopener:
  77         opener = FancyURLopener()
  78         _urlopener = opener
  79     else:
  80         opener = _urlopener
  81     if data is None:
  82         return opener.open(url)
  83     else:
  84         return opener.open(url, data)
  85 def urlretrieve(url, filename=None, reporthook=None, data=None):
  86     global _urlopener
  87     if not _urlopener:
  88         _urlopener = FancyURLopener()
  89     return _urlopener.retrieve(url, filename, reporthook, data)
  90 def urlcleanup():
  91     if _urlopener:
  92         _urlopener.cleanup()
  93
  94 # exception raised when downloaded size does not match content-length
  95 class ContentTooShortError(IOError):
  96     def __init__(self, message, content):
  97         IOError.__init__(self, message)
  98         self.content = content
  99
 100 ftpcache = {}
 101 class URLopener:
 102     """Class to open URLs.
 103     This is a class rather than just a subroutine because we may need
 104     more than one set of global protocol-specific options.
 105     Note -- this is a base class for those who don't want the
 106     automatic handling of errors type 302 (relocated) and 401
 107     (authorization needed)."""
 108
 109     __tempfiles = None
 110
 111     version = "Python-urllib/%s" % __version__
 112
 113     # Constructor
 114     def __init__(self, proxies=None, **x509):
 115         if proxies is None:
 116             proxies = getproxies()
 117         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 118         self.proxies = proxies
 119         self.key_file = x509.get('key_file')
 120         self.cert_file = x509.get('cert_file')
 121         self.addheaders = [('User-agent', self.version)]
 122         self.__tempfiles = []
 123         self.__unlink = os.unlink # See cleanup()
 124         self.tempcache = None
 125         # Undocumented feature: if you assign {} to tempcache,
 126         # it is used to cache files retrieved with
 127         # self.retrieve().  This is not enabled by default
 128         # since it does not work for changing documents (and I
 129         # haven't got the logic to check expiration headers
 130         # yet).
 131         self.ftpcache = ftpcache
 132         # Undocumented feature: you can use a different
 133         # ftp cache by assigning to the .ftpcache member;
 134         # in case you want logically independent URL openers
 135         # XXX This is not threadsafe.  Bah.
 136
 137     def __del__(self):
 138         self.close()
 139
 140     def close(self):
 141         self.cleanup()
 142
 143     def cleanup(self):
 144         # This code sometimes runs when the rest of this module
 145         # has already been deleted, so it can't use any globals
 146         # or import anything.
 147         if self.__tempfiles:
 148             for file in self.__tempfiles:
 149                 try:
 150                     self.__unlink(file)
 151                 except OSError:
 152                     pass
 153             del self.__tempfiles[:]
 154         if self.tempcache:
 155             self.tempcache.clear()
 156
 157     def addheader(self, *args):
 158         """Add a header to be used by the HTTP interface only
 159         e.g. u.addheader('Accept', 'sound/basic')"""
 160         self.addheaders.append(args)
 161
 162     # External interface
 163     def open(self, fullurl, data=None):
 164         """Use URLopener().open(file) instead of open(file, 'r')."""
 165         fullurl = unwrap(toBytes(fullurl))
 166         if self.tempcache and fullurl in self.tempcache:
 167             filename, headers = self.tempcache[fullurl]
 168             fp = open(filename, 'rb')
 169             return addinfourl(fp, headers, fullurl)
 170         urltype, url = splittype(fullurl)
 171         if not urltype:
 172             urltype = 'file'
 173         if urltype in self.proxies:
 174             proxy = self.proxies[urltype]
 175             urltype, proxyhost = splittype(proxy)
 176             host, selector = splithost(proxyhost)
 177             url = (host, fullurl) # Signal special case to open_*()
 178         else:
 179             proxy = None
 180         name = 'open_' + urltype
 181         self.type = urltype
 182         name = name.replace('-', '_')
 183         if not hasattr(self, name):
 184             if proxy:
 185                 return self.open_unknown_proxy(proxy, fullurl, data)
 186             else:
 187                 return self.open_unknown(fullurl, data)
 188         try:
 189             if data is None:
 190                 return getattr(self, name)(url)
 191             else:
 192                 return getattr(self, name)(url, data)
 193         except socket.error, msg:
 194             raise IOError, ('socket error', msg), sys.exc_info()[2]
 195
 196     def open_unknown(self, fullurl, data=None):
 197         """Overridable interface to open unknown URL type."""
 198         type, url = splittype(fullurl)
 199         raise IOError, ('url error', 'unknown url type', type)
 200
 201     def open_unknown_proxy(self, proxy, fullurl, data=None):
 202         """Overridable interface to open unknown URL type."""
 203         type, url = splittype(fullurl)
 204         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 205
 206     # External interface
 207     def retrieve(self, url, filename=None, reporthook=None, data=None):
 208         """retrieve(url) returns (filename, headers) for a local object
 209         or (tempfilename, headers) for a remote object."""
 210         url = unwrap(toBytes(url))
 211         if self.tempcache and url in self.tempcache:
 212             return self.tempcache[url]
 213         type, url1 = splittype(url)
 214         if filename is None and (not type or type == 'file'):
 215             try:
 216                 fp = self.open_local_file(url1)
 217                 hdrs = fp.info()
 218                 del fp
 219                 return url2pathname(splithost(url1)[1]), hdrs
 220             except IOError, msg:
 221                 pass
 222         fp = self.open(url, data)
 223         headers = fp.info()
 224         if filename:
 225             tfp = open(filename, 'wb')
 226         else:
 227             import tempfile
 228             garbage, path = splittype(url)
 229             garbage, path = splithost(path or "")
 230             path, garbage = splitquery(path or "")
 231             path, garbage = splitattr(path or "")
 232             suffix = os.path.splitext(path)[1]
 233             (fd, filename) = tempfile.mkstemp(suffix)
 234             self.__tempfiles.append(filename)
 235             tfp = os.fdopen(fd, 'wb')
 236         result = filename, headers
 237         if self.tempcache is not None:
 238             self.tempcache[url] = result
 239         bs = 1024*8
 240         size = -1
 241         read = 0
 242         blocknum = 0
 243         if reporthook:
 244             if "content-length" in headers:
 245                 size = int(headers["Content-Length"])
 246             reporthook(blocknum, bs, size)
 247         while 1:
 248             block = fp.read(bs)
 249             if block == "":
 250                 break
 251             read += len(block)
 252             tfp.write(block)
 253             blocknum += 1
 254             if reporthook:
 255                 reporthook(blocknum, bs, size)
 256         fp.close()
 257         tfp.close()
 258         del fp
 259         del tfp
 260
 261         # raise exception if actual size does not match content-length header
 262         if size >= 0 and read < size:
 263             raise ContentTooShortError("retrieval incomplete: got only %i out "
 264                                        "of %i bytes" % (read, size), result)
 265
 266         return result
 267
 268     # Each method named open_<type> knows how to open that type of URL
 269
 270     def open_http(self, url, data=None):
 271         """Use HTTP protocol."""
 272         import httplib
 273         user_passwd = None
 274         proxy_passwd= None
 275         if isinstance(url, str):
 276             host, selector = splithost(url)
 277             if host:
 278                 user_passwd, host = splituser(host)
 279                 host = unquote(host)
 280             realhost = host
 281         else:
 282             host, selector = url
 283             # check whether the proxy contains authorization information
 284             proxy_passwd, host = splituser(host)
 285             # now we proceed with the url we want to obtain
 286             urltype, rest = splittype(selector)
 287             url = rest
 288             user_passwd = None
 289             if urltype.lower() != 'http':
 290                 realhost = None
 291             else:
 292                 realhost, rest = splithost(rest)
 293                 if realhost:
 294                     user_passwd, realhost = splituser(realhost)
 295                 if user_passwd:
 296                     selector = "%s://%s%s" % (urltype, realhost, rest)
 297                 if proxy_bypass(realhost):
 298                     host = realhost
 299
 300             #print "proxy via http:", host, selector
 301         if not host: raise IOError, ('http error', 'no host given')
 302
 303         if proxy_passwd:
 304             import base64
 305             proxy_auth = base64.encodestring(proxy_passwd).strip()
 306         else:
 307             proxy_auth = None
 308
 309         if user_passwd:
 310             import base64
 311             auth = base64.encodestring(user_passwd).strip()
 312         else:
 313             auth = None
 314         h = httplib.HTTP(host)
 315         if data is not None:
 316             h.putrequest('POST', selector)
 317             h.putheader('Content-type', 'application/x-www-form-urlencoded')
 318             h.putheader('Content-length', '%d' % len(data))
 319         else:
 320             h.putrequest('GET', selector)
 321         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 322         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 323         if realhost: h.putheader('Host', realhost)
 324         for args in self.addheaders: h.putheader(*args)
 325         h.endheaders()
 326         if data is not None:
 327             h.send(data)
 328         errcode, errmsg, headers = h.getreply()
 329         fp = h.getfile()
 330         if errcode == 200:
 331             return addinfourl(fp, headers, "http:" + url)
 332         else:
 333             if data is None:
 334                 return self.http_error(url, fp, errcode, errmsg, headers)
 335             else:
 336                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 337
 338     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 339         """Handle http errors.
 340         Derived class can override this, or provide specific handlers
 341         named http_error_DDD where DDD is the 3-digit error code."""
 342         # First check if there's a specific handler for this error
 343         name = 'http_error_%d' % errcode
 344         if hasattr(self, name):
 345             method = getattr(self, name)
 346             if data is None:
 347                 result = method(url, fp, errcode, errmsg, headers)
 348             else:
 349                 result = method(url, fp, errcode, errmsg, headers, data)
 350             if result: return result
 351         return self.http_error_default(url, fp, errcode, errmsg, headers)
 352
 353     def http_error_default(self, url, fp, errcode, errmsg, headers):
 354         """Default error handler: close the connection and raise IOError."""
 355         void = fp.read()
 356         fp.close()
 357         raise IOError, ('http error', errcode, errmsg, headers)
 358
 359     if hasattr(socket, "ssl"):
 360         def open_https(self, url, data=None):
 361             """Use HTTPS protocol."""
 362             import httplib
 363             user_passwd = None
 364             proxy_passwd = None
 365             if isinstance(url, str):
 366                 host, selector = splithost(url)
 367                 if host:
 368                     user_passwd, host = splituser(host)
 369                     host = unquote(host)
 370                 realhost = host
 371             else:
 372                 host, selector = url
 373                 # here, we determine, whether the proxy contains authorization information
 374                 proxy_passwd, host = splituser(host)
 375                 urltype, rest = splittype(selector)
 376                 url = rest
 377                 user_passwd = None
 378                 if urltype.lower() != 'https':
 379                     realhost = None
 380                 else:
 381                     realhost, rest = splithost(rest)
 382                     if realhost:
 383                         user_passwd, realhost = splituser(realhost)
 384                     if user_passwd:
 385                         selector = "%s://%s%s" % (urltype, realhost, rest)
 386                 #print "proxy via https:", host, selector
 387             if not host: raise IOError, ('https error', 'no host given')
 388             if proxy_passwd:
 389                 import base64
 390                 proxy_auth = base64.encodestring(proxy_passwd).strip()
 391             else:
 392                 proxy_auth = None
 393             if user_passwd:
 394                 import base64
 395                 auth = base64.encodestring(user_passwd).strip()
 396             else:
 397                 auth = None
 398             h = httplib.HTTPS(host, 0,
 399                               key_file=self.key_file,
 400                               cert_file=self.cert_file)
 401             if data is not None:
 402                 h.putrequest('POST', selector)
 403                 h.putheader('Content-type',
 404                             'application/x-www-form-urlencoded')
 405                 h.putheader('Content-length', '%d' % len(data))
 406             else:
 407                 h.putrequest('GET', selector)
 408             if proxy_auth: h.putheader('Proxy-Authorization: Basic %s' % proxy_auth)
 409             if auth: h.putheader('Authorization: Basic %s' % auth)
 410             if realhost: h.putheader('Host', realhost)
 411             for args in self.addheaders: h.putheader(*args)
 412             h.endheaders()
 413             if data is not None:
 414                 h.send(data)
 415             errcode, errmsg, headers = h.getreply()
 416             fp = h.getfile()
 417             if errcode == 200:
 418                 return addinfourl(fp, headers, "https:" + url)
 419             else:
 420                 if data is None:
 421                     return self.http_error(url, fp, errcode, errmsg, headers)
 422                 else:
 423                     return self.http_error(url, fp, errcode, errmsg, headers,
 424                                            data)
 425
 426     def open_gopher(self, url):
 427         """Use Gopher protocol."""
 428         if not isinstance(url, str):
 429             raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
 430         import gopherlib
 431         host, selector = splithost(url)
 432         if not host: raise IOError, ('gopher error', 'no host given')
 433         host = unquote(host)
 434         type, selector = splitgophertype(selector)
 435         selector, query = splitquery(selector)
 436         selector = unquote(selector)
 437         if query:
 438             query = unquote(query)
 439             fp = gopherlib.send_query(selector, query, host)
 440         else:
 441             fp = gopherlib.send_selector(selector, host)
 442         return addinfourl(fp, noheaders(), "gopher:" + url)
 443
 444     def open_file(self, url):
 445         if not isinstance(url, str):
 446             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
 447         """Use local file or FTP depending on form of URL."""
 448         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 449             return self.open_ftp(url)
 450         else:
 451             return self.open_local_file(url)
 452
 453     def open_local_file(self, url):
 454         """Use local file."""
 455         import mimetypes, mimetools, email.Utils
 456         try:
 457             from cStringIO import StringIO
 458         except ImportError:
 459             from StringIO import StringIO
 460         host, file = splithost(url)
 461         localname = url2pathname(file)
 462         try:
 463             stats = os.stat(localname)
 464         except OSError, e:
 465             raise IOError(e.errno, e.strerror, e.filename)
 466         size = stats.st_size
 467         modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
 468         mtype = mimetypes.guess_type(url)[0]
 469         headers = mimetools.Message(StringIO(
 470             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 471             (mtype or 'text/plain', size, modified)))
 472         if not host:
 473             urlfile = file
 474             if file[:1] == '/':
 475                 urlfile = 'file://' + file
 476             return addinfourl(open(localname, 'rb'),
 477                               headers, urlfile)
 478         host, port = splitport(host)
 479         if not port \
 480            and socket.gethostbyname(host) in (localhost(), thishost()):
 481             urlfile = file
 482             if file[:1] == '/':
 483                 urlfile = 'file://' + file
 484             return addinfourl(open(localname, 'rb'),
 485                               headers, urlfile)
 486         raise IOError, ('local file error', 'not on local host')
 487
 488     def open_ftp(self, url):
 489         """Use FTP protocol."""
 490         if not isinstance(url, str):
 491             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
 492         import mimetypes, mimetools
 493         try:
 494             from cStringIO import StringIO
 495         except ImportError:
 496             from StringIO import StringIO
 497         host, path = splithost(url)
 498         if not host: raise IOError, ('ftp error', 'no host given')
 499         host, port = splitport(host)
 500         user, host = splituser(host)
 501         if user: user, passwd = splitpasswd(user)
 502         else: passwd = None
 503         host = unquote(host)
 504         user = unquote(user or '')
 505         passwd = unquote(passwd or '')
 506         host = socket.gethostbyname(host)
 507         if not port:
 508             import ftplib
 509             port = ftplib.FTP_PORT
 510         else:
 511             port = int(port)
 512         path, attrs = splitattr(path)
 513         path = unquote(path)
 514         dirs = path.split('/')
 515         dirs, file = dirs[:-1], dirs[-1]
 516         if dirs and not dirs[0]: dirs = dirs[1:]
 517         if dirs and not dirs[0]: dirs[0] = '/'
 518         key = user, host, port, '/'.join(dirs)
 519         # XXX thread unsafe!
 520         if len(self.ftpcache) > MAXFTPCACHE:
 521             # Prune the cache, rather arbitrarily
 522             for k in self.ftpcache.keys():
 523                 if k != key:
 524                     v = self.ftpcache[k]
 525                     del self.ftpcache[k]
 526                     v.close()
 527         try:
 528             if not key in self.ftpcache:
 529                 self.ftpcache[key] = \
 530                     ftpwrapper(user, passwd, host, port, dirs)
 531             if not file: type = 'D'
 532             else: type = 'I'
 533             for attr in attrs:
 534                 attr, value = splitvalue(attr)
 535                 if attr.lower() == 'type' and \
 536                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 537                     type = value.upper()
 538             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 539             mtype = mimetypes.guess_type("ftp:" + url)[0]
 540             headers = ""
 541             if mtype:
 542                 headers += "Content-Type: %s\n" % mtype
 543             if retrlen is not None and retrlen >= 0:
 544                 headers += "Content-Length: %d\n" % retrlen
 545             headers = mimetools.Message(StringIO(headers))
 546             return addinfourl(fp, headers, "ftp:" + url)
 547         except ftperrors(), msg:
 548             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 549
 550     def open_data(self, url, data=None):
 551         """Use "data" URL."""
 552         if not isinstance(url, str):
 553             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
 554         # ignore POSTed data
 555         #
 556         # syntax of data URLs:
 557         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 558         # mediatype := [ type "/" subtype ] *( ";" parameter )
 559         # data      := *urlchar
 560         # parameter := attribute "=" value
 561         import mimetools
 562         try:
 563             from cStringIO import StringIO
 564         except ImportError:
 565             from StringIO import StringIO
 566         try:
 567             [type, data] = url.split(',', 1)
 568         except ValueError:
 569             raise IOError, ('data error', 'bad data URL')
 570         if not type:
 571             type = 'text/plain;charset=US-ASCII'
 572         semi = type.rfind(';')
 573         if semi >= 0 and '=' not in type[semi:]:
 574             encoding = type[semi+1:]
 575             type = type[:semi]
 576         else:
 577             encoding = ''
 578         msg = []
 579         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 580                                             time.gmtime(time.time())))
 581         msg.append('Content-type: %s' % type)
 582         if encoding == 'base64':
 583             import base64
 584             data = base64.decodestring(data)
 585         else:
 586             data = unquote(data)
 587         msg.append('Content-length: %d' % len(data))
 588         msg.append('')
 589         msg.append(data)
 590         msg = '\n'.join(msg)
 591         f = StringIO(msg)
 592         headers = mimetools.Message(f, 0)
 593         #f.fileno = None     # needed for addinfourl
 594         return addinfourl(f, headers, url)
 595
 596
 597 class FancyURLopener(URLopener):
 598     """Derived class with handlers for errors we can handle (perhaps)."""
 599
 600     def __init__(self, *args, **kwargs):
 601         URLopener.__init__(self, *args, **kwargs)
 602         self.auth_cache = {}
 603         self.tries = 0
 604         self.maxtries = 10
 605
 606     def http_error_default(self, url, fp, errcode, errmsg, headers):
 607         """Default error handling -- don't raise an exception."""
 608         return addinfourl(fp, headers, "http:" + url)
 609
 610     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 611         """Error 302 -- relocated (temporarily)."""
 612         self.tries += 1
 613         if self.maxtries and self.tries >= self.maxtries:
 614             if hasattr(self, "http_error_500"):
 615                 meth = self.http_error_500
 616             else:
 617                 meth = self.http_error_default
 618             self.tries = 0
 619             return meth(url, fp, 500,
 620                         "Internal Server Error: Redirect Recursion", headers)
 621         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 622                                         data)
 623         self.tries = 0
 624         return result
 625
 626     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 627         if 'location' in headers:
 628             newurl = headers['location']
 629         elif 'uri' in headers:
 630             newurl = headers['uri']
 631         else:
 632             return
 633         void = fp.read()
 634         fp.close()
 635         # In case the server sent a relative URL, join with original:
 636         newurl = basejoin(self.type + ":" + url, newurl)
 637         return self.open(newurl)
 638
 639     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 640         """Error 301 -- also relocated (permanently)."""
 641         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 642
 643     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 644         """Error 303 -- also relocated (essentially identical to 302)."""
 645         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 646
 647     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
 648         """Error 307 -- relocated, but turn POST into error."""
 649         if data is None:
 650             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 651         else:
 652             return self.http_error_default(url, fp, errcode, errmsg, headers)
 653
 654     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 655         """Error 401 -- authentication required.
 656         This function supports Basic authentication only."""
 657         if not 'www-authenticate' in headers:
 658             URLopener.http_error_default(self, url, fp,
 659                                          errcode, errmsg, headers)
 660         stuff = headers['www-authenticate']
 661         import re
 662         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 663         if not match:
 664             URLopener.http_error_default(self, url, fp,
 665                                          errcode, errmsg, headers)
 666         scheme, realm = match.groups()
 667         if scheme.lower() != 'basic':
 668             URLopener.http_error_default(self, url, fp,
 669                                          errcode, errmsg, headers)
 670         name = 'retry_' + self.type + '_basic_auth'
 671         if data is None:
 672             return getattr(self,name)(url, realm)
 673         else:
 674             return getattr(self,name)(url, realm, data)
 675
 676     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
 677         """Error 407 -- proxy authentication required.
 678         This function supports Basic authentication only."""
 679         if not 'proxy-authenticate' in headers:
 680             URLopener.http_error_default(self, url, fp,
 681                                          errcode, errmsg, headers)
 682         stuff = headers['proxy-authenticate']
 683         import re
 684         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 685         if not match:
 686             URLopener.http_error_default(self, url, fp,
 687                                          errcode, errmsg, headers)
 688         scheme, realm = match.groups()
 689         if scheme.lower() != 'basic':
 690             URLopener.http_error_default(self, url, fp,
 691                                          errcode, errmsg, headers)
 692         name = 'retry_proxy_' + self.type + '_basic_auth'
 693         if data is None:
 694             return getattr(self,name)(url, realm)
 695         else:
 696             return getattr(self,name)(url, realm, data)
 697
 698     def retry_proxy_http_basic_auth(self, url, realm, data=None):
 699         host, selector = splithost(url)
 700         newurl = 'http://' + host + selector
 701         proxy = self.proxies['http']
 702         urltype, proxyhost = splittype(proxy)
 703         proxyhost, proxyselector = splithost(proxyhost)
 704         i = proxyhost.find('@') + 1
 705         proxyhost = proxyhost[i:]
 706         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 707         if not (user or passwd): return None
 708         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 709         self.proxies['http'] = 'http://' + proxyhost + proxyselector
 710         if data is None:
 711             return self.open(newurl)
 712         else:
 713             return self.open(newurl, data)
 714
 715     def retry_proxy_https_basic_auth(self, url, realm, data=None):
 716         host, selector = splithost(url)
 717         newurl = 'https://' + host + selector
 718         proxy = self.proxies['https']
 719         urltype, proxyhost = splittype(proxy)
 720         proxyhost, proxyselector = splithost(proxyhost)
 721         i = proxyhost.find('@') + 1
 722         proxyhost = proxyhost[i:]
 723         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 724         if not (user or passwd): return None
 725         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 726         self.proxies['https'] = 'https://' + proxyhost + proxyselector
 727         if data is None:
 728             return self.open(newurl)
 729         else:
 730             return self.open(newurl, data)
 731
 732     def retry_http_basic_auth(self, url, realm, data=None):
 733         host, selector = splithost(url)
 734         i = host.find('@') + 1
 735         host = host[i:]
 736         user, passwd = self.get_user_passwd(host, realm, i)
 737         if not (user or passwd): return None
 738         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 739         newurl = 'http://' + host + selector
 740         if data is None:
 741             return self.open(newurl)
 742         else:
 743             return self.open(newurl, data)
 744
 745     def retry_https_basic_auth(self, url, realm, data=None):
 746         host, selector = splithost(url)
 747         i = host.find('@') + 1
 748         host = host[i:]
 749         user, passwd = self.get_user_passwd(host, realm, i)
 750         if not (user or passwd): return None
 751         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 752         newurl = 'https://' + host + selector
 753         if data is None:
 754             return self.open(newurl)
 755         else:
 756             return self.open(newurl, data)
 757
 758     def get_user_passwd(self, host, realm, clear_cache = 0):
 759         key = realm + '@' + host.lower()
 760         if key in self.auth_cache:
 761             if clear_cache:
 762                 del self.auth_cache[key]
 763             else:
 764                 return self.auth_cache[key]
 765         user, passwd = self.prompt_user_passwd(host, realm)
 766         if user or passwd: self.auth_cache[key] = (user, passwd)
 767         return user, passwd
 768
 769     def prompt_user_passwd(self, host, realm):
 770         """Override this in a GUI environment!"""
 771         import getpass
 772         try:
 773             user = raw_input("Enter username for %s at %s: " % (realm,
 774                                                                 host))
 775             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 776                 (user, realm, host))
 777             return user, passwd
 778         except KeyboardInterrupt:
 779             print
 780             return None, None
 781
 782
 783 # Utility functions
 784
 785 _localhost = None
 786 def localhost():
 787     """Return the IP address of the magic hostname 'localhost'."""
 788     global _localhost
 789     if _localhost is None:
 790         _localhost = socket.gethostbyname('localhost')
 791     return _localhost
 792
 793 _thishost = None
 794 def thishost():
 795     """Return the IP address of the current host."""
 796     global _thishost
 797     if _thishost is None:
 798         _thishost = socket.gethostbyname(socket.gethostname())
 799     return _thishost
 800
 801 _ftperrors = None
 802 def ftperrors():
 803     """Return the set of errors raised by the FTP class."""
 804     global _ftperrors
 805     if _ftperrors is None:
 806         import ftplib
 807         _ftperrors = ftplib.all_errors
 808     return _ftperrors
 809
 810 _noheaders = None
 811 def noheaders():
 812     """Return an empty mimetools.Message object."""
 813     global _noheaders
 814     if _noheaders is None:
 815         import mimetools
 816         try:
 817             from cStringIO import StringIO
 818         except ImportError:
 819             from StringIO import StringIO
 820         _noheaders = mimetools.Message(StringIO(), 0)
 821         _noheaders.fp.close()   # Recycle file descriptor
 822     return _noheaders
 823
 824
 825 # Utility classes
 826
 827 class ftpwrapper:
 828     """Class used by open_ftp() for cache of open FTP connections."""
 829
 830     def __init__(self, user, passwd, host, port, dirs):
 831         self.user = user
 832         self.passwd = passwd
 833         self.host = host
 834         self.port = port
 835         self.dirs = dirs
 836         self.init()
 837
 838     def init(self):
 839         import ftplib
 840         self.busy = 0
 841         self.ftp = ftplib.FTP()
 842         self.ftp.connect(self.host, self.port)
 843         self.ftp.login(self.user, self.passwd)
 844         for dir in self.dirs:
 845             self.ftp.cwd(dir)
 846
 847     def retrfile(self, file, type):
 848         import ftplib
 849         self.endtransfer()
 850         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 851         else: cmd = 'TYPE ' + type; isdir = 0
 852         try:
 853             self.ftp.voidcmd(cmd)
 854         except ftplib.all_errors:
 855             self.init()
 856             self.ftp.voidcmd(cmd)
 857         conn = None
 858         if file and not isdir:
 859             # Use nlst to see if the file exists at all
 860             try:
 861                 self.ftp.nlst(file)
 862             except ftplib.error_perm, reason:
 863                 raise IOError, ('ftp error', reason), sys.exc_info()[2]
 864             # Restore the transfer mode!
 865             self.ftp.voidcmd(cmd)
 866             # Try to retrieve as a file
 867             try:
 868                 cmd = 'RETR ' + file
 869                 conn = self.ftp.ntransfercmd(cmd)
 870             except ftplib.error_perm, reason:
 871                 if str(reason)[:3] != '550':
 872                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 873         if not conn:
 874             # Set transfer mode to ASCII!
 875             self.ftp.voidcmd('TYPE A')
 876             # Try a directory listing
 877             if file: cmd = 'LIST ' + file
 878             else: cmd = 'LIST'
 879             conn = self.ftp.ntransfercmd(cmd)
 880         self.busy = 1
 881         # Pass back both a suitably decorated object and a retrieval length
 882         return (addclosehook(conn[0].makefile('rb'),
 883                              self.endtransfer), conn[1])
 884     def endtransfer(self):
 885         if not self.busy:
 886             return
 887         self.busy = 0
 888         try:
 889             self.ftp.voidresp()
 890         except ftperrors():
 891             pass
 892
 893     def close(self):
 894         self.endtransfer()
 895         try:
 896             self.ftp.close()
 897         except ftperrors():
 898             pass
 899
 900 class addbase:
 901     """Base class for addinfo and addclosehook."""
 902
 903     def __init__(self, fp):
 904         self.fp = fp
 905         self.read = self.fp.read
 906         self.readline = self.fp.readline
 907         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 908         if hasattr(self.fp, "fileno"):
 909             self.fileno = self.fp.fileno
 910         else:
 911             self.fileno = lambda: None
 912         if hasattr(self.fp, "__iter__"):
 913             self.__iter__ = self.fp.__iter__
 914             if hasattr(self.fp, "next"):
 915                 self.next = self.fp.next
 916
 917     def __repr__(self):
 918         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
 919                                              id(self), self.fp)
 920
 921     def close(self):
 922         self.read = None
 923         self.readline = None
 924         self.readlines = None
 925         self.fileno = None
 926         if self.fp: self.fp.close()
 927         self.fp = None
 928
 929 class addclosehook(addbase):
 930     """Class to add a close hook to an open file."""
 931
 932     def __init__(self, fp, closehook, *hookargs):
 933         addbase.__init__(self, fp)
 934         self.closehook = closehook
 935         self.hookargs = hookargs
 936
 937     def close(self):
 938         addbase.close(self)
 939         if self.closehook:
 940             self.closehook(*self.hookargs)
 941             self.closehook = None
 942             self.hookargs = None
 943
 944 class addinfo(addbase):
 945     """class to add an info() method to an open file."""
 946
 947     def __init__(self, fp, headers):
 948         addbase.__init__(self, fp)
 949         self.headers = headers
 950
 951     def info(self):
 952         return self.headers
 953
 954 class addinfourl(addbase):
 955     """class to add info() and geturl() methods to an open file."""
 956
 957     def __init__(self, fp, headers, url):
 958         addbase.__init__(self, fp)
 959         self.headers = headers
 960         self.url = url
 961
 962     def info(self):
 963         return self.headers
 964
 965     def geturl(self):
 966         return self.url
 967
 968
 969 # Utilities to parse URLs (most of these return None for missing parts):
 970 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 971 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 972 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 973 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 974 # splitpasswd('user:passwd') -> 'user', 'passwd'
 975 # splitport('host:port') --> 'host', 'port'
 976 # splitquery('/path?query') --> '/path', 'query'
 977 # splittag('/path#tag') --> '/path', 'tag'
 978 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 979 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 980 # splitvalue('attr=value') --> 'attr', 'value'
 981 # splitgophertype('/Xselector') --> 'X', 'selector'
 982 # unquote('abc%20def') -> 'abc def'
 983 # quote('abc def') -> 'abc%20def')
 984
 985 try:
 986     unicode
 987 except NameError:
 988     def _is_unicode(x):
 989         return 0
 990 else:
 991     def _is_unicode(x):
 992         return isinstance(x, unicode)
 993
 994 def toBytes(url):
 995     """toBytes(u"URL") --> 'URL'."""
 996     # Most URL schemes require ASCII. If that changes, the conversion
 997     # can be relaxed
 998     if _is_unicode(url):
 999         try:
1000             url = url.encode("ASCII")
1001         except UnicodeError:
1002             raise UnicodeError("URL " + repr(url) +
1003                                " contains non-ASCII characters")
1004     return url
1005
1006 def unwrap(url):
1007     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1008     url = url.strip()
1009     if url[:1] == '<' and url[-1:] == '>':
1010         url = url[1:-1].strip()
1011     if url[:4] == 'URL:': url = url[4:].strip()
1012     return url
1013
1014 _typeprog = None
1015 def splittype(url):
1016     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1017     global _typeprog
1018     if _typeprog is None:
1019         import re
1020         _typeprog = re.compile('^([^/:]+):')
1021
1022     match = _typeprog.match(url)
1023     if match:
1024         scheme = match.group(1)
1025         return scheme.lower(), url[len(scheme) + 1:]
1026     return None, url
1027
1028 _hostprog = None
1029 def splithost(url):
1030     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1031     global _hostprog
1032     if _hostprog is None:
1033         import re
1034         _hostprog = re.compile('^//([^/]*)(.*)$')
1035
1036     match = _hostprog.match(url)
1037     if match: return match.group(1, 2)
1038     return None, url
1039
1040 _userprog = None
1041 def splituser(host):
1042     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1043     global _userprog
1044     if _userprog is None:
1045         import re
1046         _userprog = re.compile('^(.*)@(.*)$')
1047
1048     match = _userprog.match(host)
1049     if match: return map(unquote, match.group(1, 2))
1050     return None, host
1051
1052 _passwdprog = None
1053 def splitpasswd(user):
1054     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1055     global _passwdprog
1056     if _passwdprog is None:
1057         import re
1058         _passwdprog = re.compile('^([^:]*):(.*)$')
1059
1060     match = _passwdprog.match(user)
1061     if match: return match.group(1, 2)
1062     return user, None
1063
1064 # splittag('/path#tag') --> '/path', 'tag'
1065 _portprog = None
1066 def splitport(host):
1067     """splitport('host:port') --> 'host', 'port'."""
1068     global _portprog
1069     if _portprog is None:
1070         import re
1071         _portprog = re.compile('^(.*):([0-9]+)$')
1072
1073     match = _portprog.match(host)
1074     if match: return match.group(1, 2)
1075     return host, None
1076
1077 _nportprog = None
1078 def splitnport(host, defport=-1):
1079     """Split host and port, returning numeric port.
1080     Return given default port if no ':' found; defaults to -1.
1081     Return numerical port if a valid number are found after ':'.
1082     Return None if ':' but not a valid number."""
1083     global _nportprog
1084     if _nportprog is None:
1085         import re
1086         _nportprog = re.compile('^(.*):(.*)$')
1087
1088     match = _nportprog.match(host)
1089     if match:
1090         host, port = match.group(1, 2)
1091         try:
1092             if not port: raise ValueError, "no digits"
1093             nport = int(port)
1094         except ValueError:
1095             nport = None
1096         return host, nport
1097     return host, defport
1098
1099 _queryprog = None
1100 def splitquery(url):
1101     """splitquery('/path?query') --> '/path', 'query'."""
1102     global _queryprog
1103     if _queryprog is None:
1104         import re
1105         _queryprog = re.compile('^(.*)\?([^?]*)$')
1106
1107     match = _queryprog.match(url)
1108     if match: return match.group(1, 2)
1109     return url, None
1110
1111 _tagprog = None
1112 def splittag(url):
1113     """splittag('/path#tag') --> '/path', 'tag'."""
1114     global _tagprog
1115     if _tagprog is None:
1116         import re
1117         _tagprog = re.compile('^(.*)#([^#]*)$')
1118
1119     match = _tagprog.match(url)
1120     if match: return match.group(1, 2)
1121     return url, None
1122
1123 def splitattr(url):
1124     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1125         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1126     words = url.split(';')
1127     return words[0], words[1:]
1128
1129 _valueprog = None
1130 def splitvalue(attr):
1131     """splitvalue('attr=value') --> 'attr', 'value'."""
1132     global _valueprog
1133     if _valueprog is None:
1134         import re
1135         _valueprog = re.compile('^([^=]*)=(.*)$')
1136
1137     match = _valueprog.match(attr)
1138     if match: return match.group(1, 2)
1139     return attr, None
1140
1141 def splitgophertype(selector):
1142     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1143     if selector[:1] == '/' and selector[1:2]:
1144         return selector[1], selector[2:]
1145     return None, selector
1146
1147 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1148 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1149
1150 def unquote(s):
1151     """unquote('abc%20def') -> 'abc def'."""
1152     res = s.split('%')
1153     for i in xrange(1, len(res)):
1154         item = res[i]
1155         try:
1156             res[i] = _hextochr[item[:2]] + item[2:]
1157         except KeyError:
1158             res[i] = '%' + item
1159         except UnicodeDecodeError:
1160             res[i] = unichr(int(item[:2], 16)) + item[2:]
1161     return "".join(res)
1162
1163 def unquote_plus(s):
1164     """unquote('%7e/abc+def') -> '~/abc def'"""
1165     s = s.replace('+', ' ')
1166     return unquote(s)
1167
1168 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1169                'abcdefghijklmnopqrstuvwxyz'
1170                '0123456789' '_.-')
1171 _safemaps = {}
1172
1173 def quote(s, safe = '/'):
1174     """quote('abc def') -> 'abc%20def'
1175
1176     Each part of a URL, e.g. the path info, the query, etc., has a
1177     different set of reserved characters that must be quoted.
1178
1179     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1180     the following reserved characters.
1181
1182     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1183                   "$" | ","
1184
1185     Each of these characters is reserved in some component of a URL,
1186     but not necessarily in all of them.
1187
1188     By default, the quote function is intended for quoting the path
1189     section of a URL.  Thus, it will not encode '/'.  This character
1190     is reserved, but in typical usage the quote function is being
1191     called on a path where the existing slash characters are used as
1192     reserved characters.
1193     """
1194     cachekey = (safe, always_safe)
1195     try:
1196         safe_map = _safemaps[cachekey]
1197     except KeyError:
1198         safe += always_safe
1199         safe_map = {}
1200         for i in range(256):
1201             c = chr(i)
1202             safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1203         _safemaps[cachekey] = safe_map
1204     res = map(safe_map.__getitem__, s)
1205     return ''.join(res)
1206
1207 def quote_plus(s, safe = ''):
1208     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1209     if ' ' in s:
1210         s = quote(s, safe + ' ')
1211         return s.replace(' ', '+')
1212     return quote(s, safe)
1213
1214 def urlencode(query,doseq=0):
1215     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1216
1217     If any values in the query arg are sequences and doseq is true, each
1218     sequence element is converted to a separate parameter.
1219
1220     If the query arg is a sequence of two-element tuples, the order of the
1221     parameters in the output will match the order of parameters in the
1222     input.
1223     """
1224
1225     if hasattr(query,"items"):
1226         # mapping objects
1227         query = query.items()
1228     else:
1229         # it's a bother at times that strings and string-like objects are
1230         # sequences...
1231         try:
1232             # non-sequence items should not work with len()
1233             # non-empty strings will fail this
1234             if len(query) and not isinstance(query[0], tuple):
1235                 raise TypeError
1236             # zero-length sequences of all types will get here and succeed,
1237             # but that's a minor nit - since the original implementation
1238             # allowed empty dicts that type of behavior probably should be
1239             # preserved for consistency
1240         except TypeError:
1241             ty,va,tb = sys.exc_info()
1242             raise TypeError, "not a valid non-string sequence or mapping object", tb
1243
1244     l = []
1245     if not doseq:
1246         # preserve old behavior
1247         for k, v in query:
1248             k = quote_plus(str(k))
1249             v = quote_plus(str(v))
1250             l.append(k + '=' + v)
1251     else:
1252         for k, v in query:
1253             k = quote_plus(str(k))
1254             if isinstance(v, str):
1255                 v = quote_plus(v)
1256                 l.append(k + '=' + v)
1257             elif _is_unicode(v):
1258                 # is there a reasonable way to convert to ASCII?
1259                 # encode generates a string, but "replace" or "ignore"
1260                 # lose information and "strict" can raise UnicodeError
1261                 v = quote_plus(v.encode("ASCII","replace"))
1262                 l.append(k + '=' + v)
1263             else:
1264                 try:
1265                     # is this a sufficient test for sequence-ness?
1266                     x = len(v)
1267                 except TypeError:
1268                     # not a sequence
1269                     v = quote_plus(str(v))
1270                     l.append(k + '=' + v)
1271                 else:
1272                     # loop over the sequence
1273                     for elt in v:
1274                         l.append(k + '=' + quote_plus(str(elt)))
1275     return '&'.join(l)
1276
1277 # Proxy handling
1278 def getproxies_environment():
1279     """Return a dictionary of scheme -> proxy server URL mappings.
1280
1281     Scan the environment for variables named <scheme>_proxy;
1282     this seems to be the standard convention.  If you need a
1283     different way, you can pass a proxies dictionary to the
1284     [Fancy]URLopener constructor.
1285
1286     """
1287     proxies = {}
1288     for name, value in os.environ.items():
1289         name = name.lower()
1290         if value and name[-6:] == '_proxy':
1291             proxies[name[:-6]] = value
1292     return proxies
1293
1294 if sys.platform == 'darwin':
1295     def getproxies_internetconfig():
1296         """Return a dictionary of scheme -> proxy server URL mappings.
1297
1298         By convention the mac uses Internet Config to store
1299         proxies.  An HTTP proxy, for instance, is stored under
1300         the HttpProxy key.
1301
1302         """
1303         try:
1304             import ic
1305         except ImportError:
1306             return {}
1307
1308         try:
1309             config = ic.IC()
1310         except ic.error:
1311             return {}
1312         proxies = {}
1313         # HTTP:
1314         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1315             try:
1316                 value = config['HTTPProxyHost']
1317             except ic.error:
1318                 pass
1319             else:
1320                 proxies['http'] = 'http://%s' % value
1321         # FTP: XXXX To be done.
1322         # Gopher: XXXX To be done.
1323         return proxies
1324
1325     def proxy_bypass(x):
1326         return 0
1327
1328     def getproxies():
1329         return getproxies_environment() or getproxies_internetconfig()
1330
1331 elif os.name == 'nt':
1332     def getproxies_registry():
1333         """Return a dictionary of scheme -> proxy server URL mappings.
1334
1335         Win32 uses the registry to store proxies.
1336
1337         """
1338         proxies = {}
1339         try:
1340             import _winreg
1341         except ImportError:
1342             # Std module, so should be around - but you never know!
1343             return proxies
1344         try:
1345             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1346                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1347             proxyEnable = _winreg.QueryValueEx(internetSettings,
1348                                                'ProxyEnable')[0]
1349             if proxyEnable:
1350                 # Returned as Unicode but problems if not converted to ASCII
1351                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1352                                                        'ProxyServer')[0])
1353                 if '=' in proxyServer:
1354                     # Per-protocol settings
1355                     for p in proxyServer.split(';'):
1356                         protocol, address = p.split('=', 1)
1357                         # See if address has a type:// prefix
1358                         import re
1359                         if not re.match('^([^/:]+)://', address):
1360                             address = '%s://%s' % (protocol, address)
1361                         proxies[protocol] = address
1362                 else:
1363                     # Use one setting for all protocols
1364                     if proxyServer[:5] == 'http:':
1365                         proxies['http'] = proxyServer
1366                     else:
1367                         proxies['http'] = 'http://%s' % proxyServer
1368                         proxies['ftp'] = 'ftp://%s' % proxyServer
1369             internetSettings.Close()
1370         except (WindowsError, ValueError, TypeError):
1371             # Either registry key not found etc, or the value in an
1372             # unexpected format.
1373             # proxies already set up to be empty so nothing to do
1374             pass
1375         return proxies
1376
1377     def getproxies():
1378         """Return a dictionary of scheme -> proxy server URL mappings.
1379
1380         Returns settings gathered from the environment, if specified,
1381         or the registry.
1382
1383         """
1384         return getproxies_environment() or getproxies_registry()
1385
1386     def proxy_bypass(host):
1387         try:
1388             import _winreg
1389             import re
1390         except ImportError:
1391             # Std modules, so should be around - but you never know!
1392             return 0
1393         try:
1394             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1395                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1396             proxyEnable = _winreg.QueryValueEx(internetSettings,
1397                                                'ProxyEnable')[0]
1398             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1399                                                      'ProxyOverride')[0])
1400             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1401         except WindowsError:
1402             return 0
1403         if not proxyEnable or not proxyOverride:
1404             return 0
1405         # try to make a host list from name and IP address.
1406         rawHost, port = splitport(host)
1407         host = [rawHost]
1408         try:
1409             addr = socket.gethostbyname(rawHost)
1410             if addr != rawHost:
1411                 host.append(addr)
1412         except socket.error:
1413             pass
1414         try:
1415             fqdn = socket.getfqdn(rawHost)
1416             if fqdn != rawHost:
1417                 host.append(fqdn)
1418         except socket.error:
1419             pass
1420         # make a check value list from the registry entry: replace the
1421         # '<local>' string by the localhost entry and the corresponding
1422         # canonical entry.
1423         proxyOverride = proxyOverride.split(';')
1424         i = 0
1425         while i < len(proxyOverride):
1426             if proxyOverride[i] == '<local>':
1427                 proxyOverride[i:i+1] = ['localhost',
1428                                         '127.0.0.1',
1429                                         socket.gethostname(),
1430                                         socket.gethostbyname(
1431                                             socket.gethostname())]
1432             i += 1
1433         # print proxyOverride
1434         # now check if we match one of the registry values.
1435         for test in proxyOverride:
1436             test = test.replace(".", r"\.")     # mask dots
1437             test = test.replace("*", r".*")     # change glob sequence
1438             test = test.replace("?", r".")      # change glob char
1439             for val in host:
1440                 # print "%s <--> %s" %( test, val )
1441                 if re.match(test, val, re.I):
1442                     return 1
1443         return 0
1444
1445 else:
1446     # By default use environment variables
1447     getproxies = getproxies_environment
1448
1449     def proxy_bypass(host):
1450         return 0
1451
1452 # Test and time quote() and unquote()
1453 def test1():
1454     s = ''
1455     for i in range(256): s = s + chr(i)
1456     s = s*4
1457     t0 = time.time()
1458     qs = quote(s)
1459     uqs = unquote(qs)
1460     t1 = time.time()
1461     if uqs != s:
1462         print 'Wrong!'
1463     print repr(s)
1464     print repr(qs)
1465     print repr(uqs)
1466     print round(t1 - t0, 3), 'sec'
1467
1468
1469 def reporthook(blocknum, blocksize, totalsize):
1470     # Report during remote transfers
1471     print "Block number: %d, Block size: %d, Total size: %d" % (
1472         blocknum, blocksize, totalsize)
1473
1474 # Test program
1475 def test(args=[]):
1476     if not args:
1477         args = [
1478             '/etc/passwd',
1479             'file:/etc/passwd',
1480             'file://localhost/etc/passwd',
1481             'ftp://ftp.python.org/pub/python/README',
1482 ##          'gopher://gopher.micro.umn.edu/1/',
1483             'http://www.python.org/index.html',
1484             ]
1485         if hasattr(URLopener, "open_https"):
1486             args.append('https://synergy.as.cmu.edu/~geek/')
1487     try:
1488         for url in args:
1489             print '-'*10, url, '-'*10
1490             fn, h = urlretrieve(url, None, reporthook)
1491             print fn
1492             if h:
1493                 print '======'
1494                 for k in h.keys(): print k + ':', h[k]
1495                 print '======'
1496             fp = open(fn, 'rb')
1497             data = fp.read()
1498             del fp
1499             if '\r' in data:
1500                 table = string.maketrans("", "")
1501                 data = data.translate(table, "\r")
1502             print data
1503             fn, h = None, None
1504         print '-'*40
1505     finally:
1506         urlcleanup()
1507
1508 def main():
1509     import getopt, sys
1510     try:
1511         opts, args = getopt.getopt(sys.argv[1:], "th")
1512     except getopt.error, msg:
1513         print msg
1514         print "Use -h for help"
1515         return
1516     t = 0
1517     for o, a in opts:
1518         if o == '-t':
1519             t = t + 1
1520         if o == '-h':
1521             print "Usage: python urllib.py [-t] [url ...]"
1522             print "-t runs self-test;",
1523             print "otherwise, contents of urls are printed"
1524             return
1525     if t:
1526         if t > 1:
1527             test1()
1528         test(args)
1529     else:
1530         if not args:
1531             print "Use -h for help"
1532         for url in args:
1533             print urlopen(url).read(),
1534
1535 # Run test program when run as a script
1536 if __name__ == '__main__':
1537     main()