Lib/urllib.py

   1 """Open an arbitrary URL.
   2
   3 See the following document for more info on URLs:
   4 "Names and Addresses, URIs, URLs, URNs, URCs", at
   5 http://www.w3.org/pub/WWW/Addressing/Overview.html
   6
   7 See also the HTTP spec (from which the error codes are derived):
   8 "HTTP - Hypertext Transfer Protocol", at
   9 http://www.w3.org/pub/WWW/Protocols/
  10
  11 Related standards and specs:
  12 - RFC1808: the "relative URL" spec. (authoritative status)
  13 - RFC1738 - the "URL standard". (authoritative status)
  14 - RFC1630 - the "URI spec". (informational status)
  15
  16 The object returned by URLopener().open(file) will differ per
  17 protocol.  All you know is that is has methods read(), readline(),
  18 readlines(), fileno(), close() and info().  The read*(), fileno()
  19 and close() methods work like those of open files.
  20 The info() method returns a mimetools.Message object which can be
  21 used to query various info about the object, if available.
  22 (mimetools.Message objects are queried with the getheader() method.)
  23 """
  24
  25 import string
  26 import socket
  27 import os
  28 import time
  29 import sys
  30 from urlparse import urljoin as basejoin
  31
  32 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
  33            "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
  34            "urlencode", "url2pathname", "pathname2url", "splittag",
  35            "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
  36            "splittype", "splithost", "splituser", "splitpasswd", "splitport",
  37            "splitnport", "splitquery", "splitattr", "splitvalue",
  38            "splitgophertype", "getproxies"]
  39
  40 __version__ = '1.17'    # XXX This version is not always updated :-(
  41
  42 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
  43
  44 # Helper for non-unix systems
  45 if os.name == 'mac':
  46     from macurl2path import url2pathname, pathname2url
  47 elif os.name == 'nt':
  48     from nturl2path import url2pathname, pathname2url
  49 elif os.name == 'riscos':
  50     from rourl2path import url2pathname, pathname2url
  51 else:
  52     def url2pathname(pathname):
  53         """OS-specific conversion from a relative URL of the 'file' scheme
  54         to a file system path; not recommended for general use."""
  55         return unquote(pathname)
  56
  57     def pathname2url(pathname):
  58         """OS-specific conversion from a file system path to a relative URL
  59         of the 'file' scheme; not recommended for general use."""
  60         return quote(pathname)
  61
  62 # This really consists of two pieces:
  63 # (1) a class which handles opening of all sorts of URLs
  64 #     (plus assorted utilities etc.)
  65 # (2) a set of functions for parsing URLs
  66 # XXX Should these be separated out into different modules?
  67
  68
  69 # Shortcut for basic usage
  70 _urlopener = None
  71 def urlopen(url, data=None, proxies=None):
  72     """urlopen(url [, data]) -> open file-like object"""
  73     global _urlopener
  74     if proxies is not None:
  75         opener = FancyURLopener(proxies=proxies)
  76     elif not _urlopener:
  77         opener = FancyURLopener()
  78         _urlopener = opener
  79     else:
  80         opener = _urlopener
  81     if data is None:
  82         return opener.open(url)
  83     else:
  84         return opener.open(url, data)
  85 def urlretrieve(url, filename=None, reporthook=None, data=None):
  86     global _urlopener
  87     if not _urlopener:
  88         _urlopener = FancyURLopener()
  89     return _urlopener.retrieve(url, filename, reporthook, data)
  90 def urlcleanup():
  91     if _urlopener:
  92         _urlopener.cleanup()
  93
  94 # exception raised when downloaded size does not match content-length
  95 class ContentTooShortError(IOError):
  96     def __init__(self, message, content):
  97         IOError.__init__(self, message)
  98         self.content = content
  99
 100 ftpcache = {}
 101 class URLopener:
 102     """Class to open URLs.
 103     This is a class rather than just a subroutine because we may need
 104     more than one set of global protocol-specific options.
 105     Note -- this is a base class for those who don't want the
 106     automatic handling of errors type 302 (relocated) and 401
 107     (authorization needed)."""
 108
 109     __tempfiles = None
 110
 111     version = "Python-urllib/%s" % __version__
 112
 113     # Constructor
 114     def __init__(self, proxies=None, **x509):
 115         if proxies is None:
 116             proxies = getproxies()
 117         assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
 118         self.proxies = proxies
 119         self.key_file = x509.get('key_file')
 120         self.cert_file = x509.get('cert_file')
 121         self.addheaders = [('User-Agent', self.version)]
 122         self.__tempfiles = []
 123         self.__unlink = os.unlink # See cleanup()
 124         self.tempcache = None
 125         # Undocumented feature: if you assign {} to tempcache,
 126         # it is used to cache files retrieved with
 127         # self.retrieve().  This is not enabled by default
 128         # since it does not work for changing documents (and I
 129         # haven't got the logic to check expiration headers
 130         # yet).
 131         self.ftpcache = ftpcache
 132         # Undocumented feature: you can use a different
 133         # ftp cache by assigning to the .ftpcache member;
 134         # in case you want logically independent URL openers
 135         # XXX This is not threadsafe.  Bah.
 136
 137     def __del__(self):
 138         self.close()
 139
 140     def close(self):
 141         self.cleanup()
 142
 143     def cleanup(self):
 144         # This code sometimes runs when the rest of this module
 145         # has already been deleted, so it can't use any globals
 146         # or import anything.
 147         if self.__tempfiles:
 148             for file in self.__tempfiles:
 149                 try:
 150                     self.__unlink(file)
 151                 except OSError:
 152                     pass
 153             del self.__tempfiles[:]
 154         if self.tempcache:
 155             self.tempcache.clear()
 156
 157     def addheader(self, *args):
 158         """Add a header to be used by the HTTP interface only
 159         e.g. u.addheader('Accept', 'sound/basic')"""
 160         self.addheaders.append(args)
 161
 162     # External interface
 163     def open(self, fullurl, data=None):
 164         """Use URLopener().open(file) instead of open(file, 'r')."""
 165         fullurl = unwrap(toBytes(fullurl))
 166         if self.tempcache and fullurl in self.tempcache:
 167             filename, headers = self.tempcache[fullurl]
 168             fp = open(filename, 'rb')
 169             return addinfourl(fp, headers, fullurl)
 170         urltype, url = splittype(fullurl)
 171         if not urltype:
 172             urltype = 'file'
 173         if urltype in self.proxies:
 174             proxy = self.proxies[urltype]
 175             urltype, proxyhost = splittype(proxy)
 176             host, selector = splithost(proxyhost)
 177             url = (host, fullurl) # Signal special case to open_*()
 178         else:
 179             proxy = None
 180         name = 'open_' + urltype
 181         self.type = urltype
 182         name = name.replace('-', '_')
 183         if not hasattr(self, name):
 184             if proxy:
 185                 return self.open_unknown_proxy(proxy, fullurl, data)
 186             else:
 187                 return self.open_unknown(fullurl, data)
 188         try:
 189             if data is None:
 190                 return getattr(self, name)(url)
 191             else:
 192                 return getattr(self, name)(url, data)
 193         except socket.error, msg:
 194             raise IOError, ('socket error', msg), sys.exc_info()[2]
 195
 196     def open_unknown(self, fullurl, data=None):
 197         """Overridable interface to open unknown URL type."""
 198         type, url = splittype(fullurl)
 199         raise IOError, ('url error', 'unknown url type', type)
 200
 201     def open_unknown_proxy(self, proxy, fullurl, data=None):
 202         """Overridable interface to open unknown URL type."""
 203         type, url = splittype(fullurl)
 204         raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
 205
 206     # External interface
 207     def retrieve(self, url, filename=None, reporthook=None, data=None):
 208         """retrieve(url) returns (filename, headers) for a local object
 209         or (tempfilename, headers) for a remote object."""
 210         url = unwrap(toBytes(url))
 211         if self.tempcache and url in self.tempcache:
 212             return self.tempcache[url]
 213         type, url1 = splittype(url)
 214         if filename is None and (not type or type == 'file'):
 215             try:
 216                 fp = self.open_local_file(url1)
 217                 hdrs = fp.info()
 218                 del fp
 219                 return url2pathname(splithost(url1)[1]), hdrs
 220             except IOError, msg:
 221                 pass
 222         fp = self.open(url, data)
 223         headers = fp.info()
 224         if filename:
 225             tfp = open(filename, 'wb')
 226         else:
 227             import tempfile
 228             garbage, path = splittype(url)
 229             garbage, path = splithost(path or "")
 230             path, garbage = splitquery(path or "")
 231             path, garbage = splitattr(path or "")
 232             suffix = os.path.splitext(path)[1]
 233             (fd, filename) = tempfile.mkstemp(suffix)
 234             self.__tempfiles.append(filename)
 235             tfp = os.fdopen(fd, 'wb')
 236         result = filename, headers
 237         if self.tempcache is not None:
 238             self.tempcache[url] = result
 239         bs = 1024*8
 240         size = -1
 241         read = 0
 242         blocknum = 0
 243         if reporthook:
 244             if "content-length" in headers:
 245                 size = int(headers["Content-Length"])
 246             reporthook(blocknum, bs, size)
 247         while 1:
 248             block = fp.read(bs)
 249             if block == "":
 250                 break
 251             read += len(block)
 252             tfp.write(block)
 253             blocknum += 1
 254             if reporthook:
 255                 reporthook(blocknum, bs, size)
 256         fp.close()
 257         tfp.close()
 258         del fp
 259         del tfp
 260
 261         # raise exception if actual size does not match content-length header
 262         if size >= 0 and read < size:
 263             raise ContentTooShortError("retrieval incomplete: got only %i out "
 264                                        "of %i bytes" % (read, size), result)
 265
 266         return result
 267
 268     # Each method named open_<type> knows how to open that type of URL
 269
 270     def open_http(self, url, data=None):
 271         """Use HTTP protocol."""
 272         import httplib
 273         user_passwd = None
 274         proxy_passwd= None
 275         if isinstance(url, str):
 276             host, selector = splithost(url)
 277             if host:
 278                 user_passwd, host = splituser(host)
 279                 host = unquote(host)
 280             realhost = host
 281         else:
 282             host, selector = url
 283             # check whether the proxy contains authorization information
 284             proxy_passwd, host = splituser(host)
 285             # now we proceed with the url we want to obtain
 286             urltype, rest = splittype(selector)
 287             url = rest
 288             user_passwd = None
 289             if urltype.lower() != 'http':
 290                 realhost = None
 291             else:
 292                 realhost, rest = splithost(rest)
 293                 if realhost:
 294                     user_passwd, realhost = splituser(realhost)
 295                 if user_passwd:
 296                     selector = "%s://%s%s" % (urltype, realhost, rest)
 297                 if proxy_bypass(realhost):
 298                     host = realhost
 299
 300             #print "proxy via http:", host, selector
 301         if not host: raise IOError, ('http error', 'no host given')
 302
 303         if proxy_passwd:
 304             import base64
 305             proxy_auth = base64.b64encode(proxy_passwd).strip()
 306         else:
 307             proxy_auth = None
 308
 309         if user_passwd:
 310             import base64
 311             auth = base64.b64encode(user_passwd).strip()
 312         else:
 313             auth = None
 314         h = httplib.HTTP(host)
 315         if data is not None:
 316             h.putrequest('POST', selector)
 317             h.putheader('Content-Type', 'application/x-www-form-urlencoded')
 318             h.putheader('Content-Length', '%d' % len(data))
 319         else:
 320             h.putrequest('GET', selector)
 321         if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
 322         if auth: h.putheader('Authorization', 'Basic %s' % auth)
 323         if realhost: h.putheader('Host', realhost)
 324         for args in self.addheaders: h.putheader(*args)
 325         h.endheaders()
 326         if data is not None:
 327             h.send(data)
 328         errcode, errmsg, headers = h.getreply()
 329         fp = h.getfile()
 330         if errcode == 200:
 331             return addinfourl(fp, headers, "http:" + url)
 332         else:
 333             if data is None:
 334                 return self.http_error(url, fp, errcode, errmsg, headers)
 335             else:
 336                 return self.http_error(url, fp, errcode, errmsg, headers, data)
 337
 338     def http_error(self, url, fp, errcode, errmsg, headers, data=None):
 339         """Handle http errors.
 340         Derived class can override this, or provide specific handlers
 341         named http_error_DDD where DDD is the 3-digit error code."""
 342         # First check if there's a specific handler for this error
 343         name = 'http_error_%d' % errcode
 344         if hasattr(self, name):
 345             method = getattr(self, name)
 346             if data is None:
 347                 result = method(url, fp, errcode, errmsg, headers)
 348             else:
 349                 result = method(url, fp, errcode, errmsg, headers, data)
 350             if result: return result
 351         return self.http_error_default(url, fp, errcode, errmsg, headers)
 352
 353     def http_error_default(self, url, fp, errcode, errmsg, headers):
 354         """Default error handler: close the connection and raise IOError."""
 355         void = fp.read()
 356         fp.close()
 357         raise IOError, ('http error', errcode, errmsg, headers)
 358
 359     if hasattr(socket, "ssl"):
 360         def open_https(self, url, data=None):
 361             """Use HTTPS protocol."""
 362             import httplib
 363             user_passwd = None
 364             proxy_passwd = None
 365             if isinstance(url, str):
 366                 host, selector = splithost(url)
 367                 if host:
 368                     user_passwd, host = splituser(host)
 369                     host = unquote(host)
 370                 realhost = host
 371             else:
 372                 host, selector = url
 373                 # here, we determine, whether the proxy contains authorization information
 374                 proxy_passwd, host = splituser(host)
 375                 urltype, rest = splittype(selector)
 376                 url = rest
 377                 user_passwd = None
 378                 if urltype.lower() != 'https':
 379                     realhost = None
 380                 else:
 381                     realhost, rest = splithost(rest)
 382                     if realhost:
 383                         user_passwd, realhost = splituser(realhost)
 384                     if user_passwd:
 385                         selector = "%s://%s%s" % (urltype, realhost, rest)
 386                 #print "proxy via https:", host, selector
 387             if not host: raise IOError, ('https error', 'no host given')
 388             if proxy_passwd:
 389                 import base64
 390                 proxy_auth = base64.b64encode(proxy_passwd).strip()
 391             else:
 392                 proxy_auth = None
 393             if user_passwd:
 394                 import base64
 395                 auth = base64.b64encode(user_passwd).strip()
 396             else:
 397                 auth = None
 398             h = httplib.HTTPS(host, 0,
 399                               key_file=self.key_file,
 400                               cert_file=self.cert_file)
 401             if data is not None:
 402                 h.putrequest('POST', selector)
 403                 h.putheader('Content-Type',
 404                             'application/x-www-form-urlencoded')
 405                 h.putheader('Content-Length', '%d' % len(data))
 406             else:
 407                 h.putrequest('GET', selector)
 408             if proxy_auth: h.putheader('Proxy-Authorization: Basic %s' % proxy_auth)
 409             if auth: h.putheader('Authorization: Basic %s' % auth)
 410             if realhost: h.putheader('Host', realhost)
 411             for args in self.addheaders: h.putheader(*args)
 412             h.endheaders()
 413             if data is not None:
 414                 h.send(data)
 415             errcode, errmsg, headers = h.getreply()
 416             fp = h.getfile()
 417             if errcode == 200:
 418                 return addinfourl(fp, headers, "https:" + url)
 419             else:
 420                 if data is None:
 421                     return self.http_error(url, fp, errcode, errmsg, headers)
 422                 else:
 423                     return self.http_error(url, fp, errcode, errmsg, headers,
 424                                            data)
 425
 426     def open_gopher(self, url):
 427         """Use Gopher protocol."""
 428         if not isinstance(url, str):
 429             raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
 430         import gopherlib
 431         host, selector = splithost(url)
 432         if not host: raise IOError, ('gopher error', 'no host given')
 433         host = unquote(host)
 434         type, selector = splitgophertype(selector)
 435         selector, query = splitquery(selector)
 436         selector = unquote(selector)
 437         if query:
 438             query = unquote(query)
 439             fp = gopherlib.send_query(selector, query, host)
 440         else:
 441             fp = gopherlib.send_selector(selector, host)
 442         return addinfourl(fp, noheaders(), "gopher:" + url)
 443
 444     def open_file(self, url):
 445         """Use local file or FTP depending on form of URL."""
 446         if not isinstance(url, str):
 447             raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
 448         if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
 449             return self.open_ftp(url)
 450         else:
 451             return self.open_local_file(url)
 452
 453     def open_local_file(self, url):
 454         """Use local file."""
 455         import mimetypes, mimetools, email.Utils
 456         try:
 457             from cStringIO import StringIO
 458         except ImportError:
 459             from StringIO import StringIO
 460         host, file = splithost(url)
 461         localname = url2pathname(file)
 462         try:
 463             stats = os.stat(localname)
 464         except OSError, e:
 465             raise IOError(e.errno, e.strerror, e.filename)
 466         size = stats.st_size
 467         modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
 468         mtype = mimetypes.guess_type(url)[0]
 469         headers = mimetools.Message(StringIO(
 470             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
 471             (mtype or 'text/plain', size, modified)))
 472         if not host:
 473             urlfile = file
 474             if file[:1] == '/':
 475                 urlfile = 'file://' + file
 476             return addinfourl(open(localname, 'rb'),
 477                               headers, urlfile)
 478         host, port = splitport(host)
 479         if not port \
 480            and socket.gethostbyname(host) in (localhost(), thishost()):
 481             urlfile = file
 482             if file[:1] == '/':
 483                 urlfile = 'file://' + file
 484             return addinfourl(open(localname, 'rb'),
 485                               headers, urlfile)
 486         raise IOError, ('local file error', 'not on local host')
 487
 488     def open_ftp(self, url):
 489         """Use FTP protocol."""
 490         if not isinstance(url, str):
 491             raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
 492         import mimetypes, mimetools
 493         try:
 494             from cStringIO import StringIO
 495         except ImportError:
 496             from StringIO import StringIO
 497         host, path = splithost(url)
 498         if not host: raise IOError, ('ftp error', 'no host given')
 499         host, port = splitport(host)
 500         user, host = splituser(host)
 501         if user: user, passwd = splitpasswd(user)
 502         else: passwd = None
 503         host = unquote(host)
 504         user = unquote(user or '')
 505         passwd = unquote(passwd or '')
 506         host = socket.gethostbyname(host)
 507         if not port:
 508             import ftplib
 509             port = ftplib.FTP_PORT
 510         else:
 511             port = int(port)
 512         path, attrs = splitattr(path)
 513         path = unquote(path)
 514         dirs = path.split('/')
 515         dirs, file = dirs[:-1], dirs[-1]
 516         if dirs and not dirs[0]: dirs = dirs[1:]
 517         if dirs and not dirs[0]: dirs[0] = '/'
 518         key = user, host, port, '/'.join(dirs)
 519         # XXX thread unsafe!
 520         if len(self.ftpcache) > MAXFTPCACHE:
 521             # Prune the cache, rather arbitrarily
 522             for k in self.ftpcache.keys():
 523                 if k != key:
 524                     v = self.ftpcache[k]
 525                     del self.ftpcache[k]
 526                     v.close()
 527         try:
 528             if not key in self.ftpcache:
 529                 self.ftpcache[key] = \
 530                     ftpwrapper(user, passwd, host, port, dirs)
 531             if not file: type = 'D'
 532             else: type = 'I'
 533             for attr in attrs:
 534                 attr, value = splitvalue(attr)
 535                 if attr.lower() == 'type' and \
 536                    value in ('a', 'A', 'i', 'I', 'd', 'D'):
 537                     type = value.upper()
 538             (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
 539             mtype = mimetypes.guess_type("ftp:" + url)[0]
 540             headers = ""
 541             if mtype:
 542                 headers += "Content-Type: %s\n" % mtype
 543             if retrlen is not None and retrlen >= 0:
 544                 headers += "Content-Length: %d\n" % retrlen
 545             headers = mimetools.Message(StringIO(headers))
 546             return addinfourl(fp, headers, "ftp:" + url)
 547         except ftperrors(), msg:
 548             raise IOError, ('ftp error', msg), sys.exc_info()[2]
 549
 550     def open_data(self, url, data=None):
 551         """Use "data" URL."""
 552         if not isinstance(url, str):
 553             raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
 554         # ignore POSTed data
 555         #
 556         # syntax of data URLs:
 557         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 558         # mediatype := [ type "/" subtype ] *( ";" parameter )
 559         # data      := *urlchar
 560         # parameter := attribute "=" value
 561         import mimetools
 562         try:
 563             from cStringIO import StringIO
 564         except ImportError:
 565             from StringIO import StringIO
 566         try:
 567             [type, data] = url.split(',', 1)
 568         except ValueError:
 569             raise IOError, ('data error', 'bad data URL')
 570         if not type:
 571             type = 'text/plain;charset=US-ASCII'
 572         semi = type.rfind(';')
 573         if semi >= 0 and '=' not in type[semi:]:
 574             encoding = type[semi+1:]
 575             type = type[:semi]
 576         else:
 577             encoding = ''
 578         msg = []
 579         msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
 580                                             time.gmtime(time.time())))
 581         msg.append('Content-type: %s' % type)
 582         if encoding == 'base64':
 583             import base64
 584             data = base64.decodestring(data)
 585         else:
 586             data = unquote(data)
 587         msg.append('Content-Length: %d' % len(data))
 588         msg.append('')
 589         msg.append(data)
 590         msg = '\n'.join(msg)
 591         f = StringIO(msg)
 592         headers = mimetools.Message(f, 0)
 593         #f.fileno = None     # needed for addinfourl
 594         return addinfourl(f, headers, url)
 595
 596
 597 class FancyURLopener(URLopener):
 598     """Derived class with handlers for errors we can handle (perhaps)."""
 599
 600     def __init__(self, *args, **kwargs):
 601         URLopener.__init__(self, *args, **kwargs)
 602         self.auth_cache = {}
 603         self.tries = 0
 604         self.maxtries = 10
 605
 606     def http_error_default(self, url, fp, errcode, errmsg, headers):
 607         """Default error handling -- don't raise an exception."""
 608         return addinfourl(fp, headers, "http:" + url)
 609
 610     def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
 611         """Error 302 -- relocated (temporarily)."""
 612         self.tries += 1
 613         if self.maxtries and self.tries >= self.maxtries:
 614             if hasattr(self, "http_error_500"):
 615                 meth = self.http_error_500
 616             else:
 617                 meth = self.http_error_default
 618             self.tries = 0
 619             return meth(url, fp, 500,
 620                         "Internal Server Error: Redirect Recursion", headers)
 621         result = self.redirect_internal(url, fp, errcode, errmsg, headers,
 622                                         data)
 623         self.tries = 0
 624         return result
 625
 626     def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
 627         if 'location' in headers:
 628             newurl = headers['location']
 629         elif 'uri' in headers:
 630             newurl = headers['uri']
 631         else:
 632             return
 633         void = fp.read()
 634         fp.close()
 635         # In case the server sent a relative URL, join with original:
 636         newurl = basejoin(self.type + ":" + url, newurl)
 637         return self.open(newurl)
 638
 639     def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
 640         """Error 301 -- also relocated (permanently)."""
 641         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 642
 643     def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
 644         """Error 303 -- also relocated (essentially identical to 302)."""
 645         return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 646
 647     def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
 648         """Error 307 -- relocated, but turn POST into error."""
 649         if data is None:
 650             return self.http_error_302(url, fp, errcode, errmsg, headers, data)
 651         else:
 652             return self.http_error_default(url, fp, errcode, errmsg, headers)
 653
 654     def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
 655         """Error 401 -- authentication required.
 656         This function supports Basic authentication only."""
 657         if not 'www-authenticate' in headers:
 658             URLopener.http_error_default(self, url, fp,
 659                                          errcode, errmsg, headers)
 660         stuff = headers['www-authenticate']
 661         import re
 662         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 663         if not match:
 664             URLopener.http_error_default(self, url, fp,
 665                                          errcode, errmsg, headers)
 666         scheme, realm = match.groups()
 667         if scheme.lower() != 'basic':
 668             URLopener.http_error_default(self, url, fp,
 669                                          errcode, errmsg, headers)
 670         name = 'retry_' + self.type + '_basic_auth'
 671         if data is None:
 672             return getattr(self,name)(url, realm)
 673         else:
 674             return getattr(self,name)(url, realm, data)
 675
 676     def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
 677         """Error 407 -- proxy authentication required.
 678         This function supports Basic authentication only."""
 679         if not 'proxy-authenticate' in headers:
 680             URLopener.http_error_default(self, url, fp,
 681                                          errcode, errmsg, headers)
 682         stuff = headers['proxy-authenticate']
 683         import re
 684         match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
 685         if not match:
 686             URLopener.http_error_default(self, url, fp,
 687                                          errcode, errmsg, headers)
 688         scheme, realm = match.groups()
 689         if scheme.lower() != 'basic':
 690             URLopener.http_error_default(self, url, fp,
 691                                          errcode, errmsg, headers)
 692         name = 'retry_proxy_' + self.type + '_basic_auth'
 693         if data is None:
 694             return getattr(self,name)(url, realm)
 695         else:
 696             return getattr(self,name)(url, realm, data)
 697
 698     def retry_proxy_http_basic_auth(self, url, realm, data=None):
 699         host, selector = splithost(url)
 700         newurl = 'http://' + host + selector
 701         proxy = self.proxies['http']
 702         urltype, proxyhost = splittype(proxy)
 703         proxyhost, proxyselector = splithost(proxyhost)
 704         i = proxyhost.find('@') + 1
 705         proxyhost = proxyhost[i:]
 706         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 707         if not (user or passwd): return None
 708         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 709         self.proxies['http'] = 'http://' + proxyhost + proxyselector
 710         if data is None:
 711             return self.open(newurl)
 712         else:
 713             return self.open(newurl, data)
 714
 715     def retry_proxy_https_basic_auth(self, url, realm, data=None):
 716         host, selector = splithost(url)
 717         newurl = 'https://' + host + selector
 718         proxy = self.proxies['https']
 719         urltype, proxyhost = splittype(proxy)
 720         proxyhost, proxyselector = splithost(proxyhost)
 721         i = proxyhost.find('@') + 1
 722         proxyhost = proxyhost[i:]
 723         user, passwd = self.get_user_passwd(proxyhost, realm, i)
 724         if not (user or passwd): return None
 725         proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
 726         self.proxies['https'] = 'https://' + proxyhost + proxyselector
 727         if data is None:
 728             return self.open(newurl)
 729         else:
 730             return self.open(newurl, data)
 731
 732     def retry_http_basic_auth(self, url, realm, data=None):
 733         host, selector = splithost(url)
 734         i = host.find('@') + 1
 735         host = host[i:]
 736         user, passwd = self.get_user_passwd(host, realm, i)
 737         if not (user or passwd): return None
 738         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 739         newurl = 'http://' + host + selector
 740         if data is None:
 741             return self.open(newurl)
 742         else:
 743             return self.open(newurl, data)
 744
 745     def retry_https_basic_auth(self, url, realm, data=None):
 746         host, selector = splithost(url)
 747         i = host.find('@') + 1
 748         host = host[i:]
 749         user, passwd = self.get_user_passwd(host, realm, i)
 750         if not (user or passwd): return None
 751         host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
 752         newurl = 'https://' + host + selector
 753         if data is None:
 754             return self.open(newurl)
 755         else:
 756             return self.open(newurl, data)
 757
 758     def get_user_passwd(self, host, realm, clear_cache = 0):
 759         key = realm + '@' + host.lower()
 760         if key in self.auth_cache:
 761             if clear_cache:
 762                 del self.auth_cache[key]
 763             else:
 764                 return self.auth_cache[key]
 765         user, passwd = self.prompt_user_passwd(host, realm)
 766         if user or passwd: self.auth_cache[key] = (user, passwd)
 767         return user, passwd
 768
 769     def prompt_user_passwd(self, host, realm):
 770         """Override this in a GUI environment!"""
 771         import getpass
 772         try:
 773             user = raw_input("Enter username for %s at %s: " % (realm,
 774                                                                 host))
 775             passwd = getpass.getpass("Enter password for %s in %s at %s: " %
 776                 (user, realm, host))
 777             return user, passwd
 778         except KeyboardInterrupt:
 779             print
 780             return None, None
 781
 782
 783 # Utility functions
 784
 785 _localhost = None
 786 def localhost():
 787     """Return the IP address of the magic hostname 'localhost'."""
 788     global _localhost
 789     if _localhost is None:
 790         _localhost = socket.gethostbyname('localhost')
 791     return _localhost
 792
 793 _thishost = None
 794 def thishost():
 795     """Return the IP address of the current host."""
 796     global _thishost
 797     if _thishost is None:
 798         _thishost = socket.gethostbyname(socket.gethostname())
 799     return _thishost
 800
 801 _ftperrors = None
 802 def ftperrors():
 803     """Return the set of errors raised by the FTP class."""
 804     global _ftperrors
 805     if _ftperrors is None:
 806         import ftplib
 807         _ftperrors = ftplib.all_errors
 808     return _ftperrors
 809
 810 _noheaders = None
 811 def noheaders():
 812     """Return an empty mimetools.Message object."""
 813     global _noheaders
 814     if _noheaders is None:
 815         import mimetools
 816         try:
 817             from cStringIO import StringIO
 818         except ImportError:
 819             from StringIO import StringIO
 820         _noheaders = mimetools.Message(StringIO(), 0)
 821         _noheaders.fp.close()   # Recycle file descriptor
 822     return _noheaders
 823
 824
 825 # Utility classes
 826
 827 class ftpwrapper:
 828     """Class used by open_ftp() for cache of open FTP connections."""
 829
 830     def __init__(self, user, passwd, host, port, dirs):
 831         self.user = user
 832         self.passwd = passwd
 833         self.host = host
 834         self.port = port
 835         self.dirs = dirs
 836         self.init()
 837
 838     def init(self):
 839         import ftplib
 840         self.busy = 0
 841         self.ftp = ftplib.FTP()
 842         self.ftp.connect(self.host, self.port)
 843         self.ftp.login(self.user, self.passwd)
 844         for dir in self.dirs:
 845             self.ftp.cwd(dir)
 846
 847     def retrfile(self, file, type):
 848         import ftplib
 849         self.endtransfer()
 850         if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
 851         else: cmd = 'TYPE ' + type; isdir = 0
 852         try:
 853             self.ftp.voidcmd(cmd)
 854         except ftplib.all_errors:
 855             self.init()
 856             self.ftp.voidcmd(cmd)
 857         conn = None
 858         if file and not isdir:
 859             # Try to retrieve as a file
 860             try:
 861                 cmd = 'RETR ' + file
 862                 conn = self.ftp.ntransfercmd(cmd)
 863             except ftplib.error_perm, reason:
 864                 if str(reason)[:3] != '550':
 865                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
 866         if not conn:
 867             # Set transfer mode to ASCII!
 868             self.ftp.voidcmd('TYPE A')
 869             # Try a directory listing
 870             if file: cmd = 'LIST ' + file
 871             else: cmd = 'LIST'
 872             conn = self.ftp.ntransfercmd(cmd)
 873         self.busy = 1
 874         # Pass back both a suitably decorated object and a retrieval length
 875         return (addclosehook(conn[0].makefile('rb'),
 876                              self.endtransfer), conn[1])
 877     def endtransfer(self):
 878         if not self.busy:
 879             return
 880         self.busy = 0
 881         try:
 882             self.ftp.voidresp()
 883         except ftperrors():
 884             pass
 885
 886     def close(self):
 887         self.endtransfer()
 888         try:
 889             self.ftp.close()
 890         except ftperrors():
 891             pass
 892
 893 class addbase:
 894     """Base class for addinfo and addclosehook."""
 895
 896     def __init__(self, fp):
 897         self.fp = fp
 898         self.read = self.fp.read
 899         self.readline = self.fp.readline
 900         if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
 901         if hasattr(self.fp, "fileno"):
 902             self.fileno = self.fp.fileno
 903         else:
 904             self.fileno = lambda: None
 905         if hasattr(self.fp, "__iter__"):
 906             self.__iter__ = self.fp.__iter__
 907             if hasattr(self.fp, "next"):
 908                 self.next = self.fp.next
 909
 910     def __repr__(self):
 911         return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
 912                                              id(self), self.fp)
 913
 914     def close(self):
 915         self.read = None
 916         self.readline = None
 917         self.readlines = None
 918         self.fileno = None
 919         if self.fp: self.fp.close()
 920         self.fp = None
 921
 922 class addclosehook(addbase):
 923     """Class to add a close hook to an open file."""
 924
 925     def __init__(self, fp, closehook, *hookargs):
 926         addbase.__init__(self, fp)
 927         self.closehook = closehook
 928         self.hookargs = hookargs
 929
 930     def close(self):
 931         addbase.close(self)
 932         if self.closehook:
 933             self.closehook(*self.hookargs)
 934             self.closehook = None
 935             self.hookargs = None
 936
 937 class addinfo(addbase):
 938     """class to add an info() method to an open file."""
 939
 940     def __init__(self, fp, headers):
 941         addbase.__init__(self, fp)
 942         self.headers = headers
 943
 944     def info(self):
 945         return self.headers
 946
 947 class addinfourl(addbase):
 948     """class to add info() and geturl() methods to an open file."""
 949
 950     def __init__(self, fp, headers, url):
 951         addbase.__init__(self, fp)
 952         self.headers = headers
 953         self.url = url
 954
 955     def info(self):
 956         return self.headers
 957
 958     def geturl(self):
 959         return self.url
 960
 961
 962 # Utilities to parse URLs (most of these return None for missing parts):
 963 # unwrap('<URL:type://host/path>') --> 'type://host/path'
 964 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 965 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 966 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
 967 # splitpasswd('user:passwd') -> 'user', 'passwd'
 968 # splitport('host:port') --> 'host', 'port'
 969 # splitquery('/path?query') --> '/path', 'query'
 970 # splittag('/path#tag') --> '/path', 'tag'
 971 # splitattr('/path;attr1=value1;attr2=value2;...') ->
 972 #   '/path', ['attr1=value1', 'attr2=value2', ...]
 973 # splitvalue('attr=value') --> 'attr', 'value'
 974 # splitgophertype('/Xselector') --> 'X', 'selector'
 975 # unquote('abc%20def') -> 'abc def'
 976 # quote('abc def') -> 'abc%20def')
 977
 978 try:
 979     unicode
 980 except NameError:
 981     def _is_unicode(x):
 982         return 0
 983 else:
 984     def _is_unicode(x):
 985         return isinstance(x, unicode)
 986
 987 def toBytes(url):
 988     """toBytes(u"URL") --> 'URL'."""
 989     # Most URL schemes require ASCII. If that changes, the conversion
 990     # can be relaxed
 991     if _is_unicode(url):
 992         try:
 993             url = url.encode("ASCII")
 994         except UnicodeError:
 995             raise UnicodeError("URL " + repr(url) +
 996                                " contains non-ASCII characters")
 997     return url
 998
 999 def unwrap(url):
1000     """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1001     url = url.strip()
1002     if url[:1] == '<' and url[-1:] == '>':
1003         url = url[1:-1].strip()
1004     if url[:4] == 'URL:': url = url[4:].strip()
1005     return url
1006
1007 _typeprog = None
1008 def splittype(url):
1009     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1010     global _typeprog
1011     if _typeprog is None:
1012         import re
1013         _typeprog = re.compile('^([^/:]+):')
1014
1015     match = _typeprog.match(url)
1016     if match:
1017         scheme = match.group(1)
1018         return scheme.lower(), url[len(scheme) + 1:]
1019     return None, url
1020
1021 _hostprog = None
1022 def splithost(url):
1023     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1024     global _hostprog
1025     if _hostprog is None:
1026         import re
1027         _hostprog = re.compile('^//([^/?]*)(.*)$')
1028
1029     match = _hostprog.match(url)
1030     if match: return match.group(1, 2)
1031     return None, url
1032
1033 _userprog = None
1034 def splituser(host):
1035     """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1036     global _userprog
1037     if _userprog is None:
1038         import re
1039         _userprog = re.compile('^(.*)@(.*)$')
1040
1041     match = _userprog.match(host)
1042     if match: return map(unquote, match.group(1, 2))
1043     return None, host
1044
1045 _passwdprog = None
1046 def splitpasswd(user):
1047     """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1048     global _passwdprog
1049     if _passwdprog is None:
1050         import re
1051         _passwdprog = re.compile('^([^:]*):(.*)$')
1052
1053     match = _passwdprog.match(user)
1054     if match: return match.group(1, 2)
1055     return user, None
1056
1057 # splittag('/path#tag') --> '/path', 'tag'
1058 _portprog = None
1059 def splitport(host):
1060     """splitport('host:port') --> 'host', 'port'."""
1061     global _portprog
1062     if _portprog is None:
1063         import re
1064         _portprog = re.compile('^(.*):([0-9]+)$')
1065
1066     match = _portprog.match(host)
1067     if match: return match.group(1, 2)
1068     return host, None
1069
1070 _nportprog = None
1071 def splitnport(host, defport=-1):
1072     """Split host and port, returning numeric port.
1073     Return given default port if no ':' found; defaults to -1.
1074     Return numerical port if a valid number are found after ':'.
1075     Return None if ':' but not a valid number."""
1076     global _nportprog
1077     if _nportprog is None:
1078         import re
1079         _nportprog = re.compile('^(.*):(.*)$')
1080
1081     match = _nportprog.match(host)
1082     if match:
1083         host, port = match.group(1, 2)
1084         try:
1085             if not port: raise ValueError, "no digits"
1086             nport = int(port)
1087         except ValueError:
1088             nport = None
1089         return host, nport
1090     return host, defport
1091
1092 _queryprog = None
1093 def splitquery(url):
1094     """splitquery('/path?query') --> '/path', 'query'."""
1095     global _queryprog
1096     if _queryprog is None:
1097         import re
1098         _queryprog = re.compile('^(.*)\?([^?]*)$')
1099
1100     match = _queryprog.match(url)
1101     if match: return match.group(1, 2)
1102     return url, None
1103
1104 _tagprog = None
1105 def splittag(url):
1106     """splittag('/path#tag') --> '/path', 'tag'."""
1107     global _tagprog
1108     if _tagprog is None:
1109         import re
1110         _tagprog = re.compile('^(.*)#([^#]*)$')
1111
1112     match = _tagprog.match(url)
1113     if match: return match.group(1, 2)
1114     return url, None
1115
1116 def splitattr(url):
1117     """splitattr('/path;attr1=value1;attr2=value2;...') ->
1118         '/path', ['attr1=value1', 'attr2=value2', ...]."""
1119     words = url.split(';')
1120     return words[0], words[1:]
1121
1122 _valueprog = None
1123 def splitvalue(attr):
1124     """splitvalue('attr=value') --> 'attr', 'value'."""
1125     global _valueprog
1126     if _valueprog is None:
1127         import re
1128         _valueprog = re.compile('^([^=]*)=(.*)$')
1129
1130     match = _valueprog.match(attr)
1131     if match: return match.group(1, 2)
1132     return attr, None
1133
1134 def splitgophertype(selector):
1135     """splitgophertype('/Xselector') --> 'X', 'selector'."""
1136     if selector[:1] == '/' and selector[1:2]:
1137         return selector[1], selector[2:]
1138     return None, selector
1139
1140 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1141 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1142
1143 def unquote(s):
1144     """unquote('abc%20def') -> 'abc def'."""
1145     res = s.split('%')
1146     for i in xrange(1, len(res)):
1147         item = res[i]
1148         try:
1149             res[i] = _hextochr[item[:2]] + item[2:]
1150         except KeyError:
1151             res[i] = '%' + item
1152         except UnicodeDecodeError:
1153             res[i] = unichr(int(item[:2], 16)) + item[2:]
1154     return "".join(res)
1155
1156 def unquote_plus(s):
1157     """unquote('%7e/abc+def') -> '~/abc def'"""
1158     s = s.replace('+', ' ')
1159     return unquote(s)
1160
1161 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1162                'abcdefghijklmnopqrstuvwxyz'
1163                '0123456789' '_.-')
1164 _safemaps = {}
1165
1166 def quote(s, safe = '/'):
1167     """quote('abc def') -> 'abc%20def'
1168
1169     Each part of a URL, e.g. the path info, the query, etc., has a
1170     different set of reserved characters that must be quoted.
1171
1172     RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1173     the following reserved characters.
1174
1175     reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1176                   "$" | ","
1177
1178     Each of these characters is reserved in some component of a URL,
1179     but not necessarily in all of them.
1180
1181     By default, the quote function is intended for quoting the path
1182     section of a URL.  Thus, it will not encode '/'.  This character
1183     is reserved, but in typical usage the quote function is being
1184     called on a path where the existing slash characters are used as
1185     reserved characters.
1186     """
1187     cachekey = (safe, always_safe)
1188     try:
1189         safe_map = _safemaps[cachekey]
1190     except KeyError:
1191         safe += always_safe
1192         safe_map = {}
1193         for i in range(256):
1194             c = chr(i)
1195             safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1196         _safemaps[cachekey] = safe_map
1197     res = map(safe_map.__getitem__, s)
1198     return ''.join(res)
1199
1200 def quote_plus(s, safe = ''):
1201     """Quote the query fragment of a URL; replacing ' ' with '+'"""
1202     if ' ' in s:
1203         s = quote(s, safe + ' ')
1204         return s.replace(' ', '+')
1205     return quote(s, safe)
1206
1207 def urlencode(query,doseq=0):
1208     """Encode a sequence of two-element tuples or dictionary into a URL query string.
1209
1210     If any values in the query arg are sequences and doseq is true, each
1211     sequence element is converted to a separate parameter.
1212
1213     If the query arg is a sequence of two-element tuples, the order of the
1214     parameters in the output will match the order of parameters in the
1215     input.
1216     """
1217
1218     if hasattr(query,"items"):
1219         # mapping objects
1220         query = query.items()
1221     else:
1222         # it's a bother at times that strings and string-like objects are
1223         # sequences...
1224         try:
1225             # non-sequence items should not work with len()
1226             # non-empty strings will fail this
1227             if len(query) and not isinstance(query[0], tuple):
1228                 raise TypeError
1229             # zero-length sequences of all types will get here and succeed,
1230             # but that's a minor nit - since the original implementation
1231             # allowed empty dicts that type of behavior probably should be
1232             # preserved for consistency
1233         except TypeError:
1234             ty,va,tb = sys.exc_info()
1235             raise TypeError, "not a valid non-string sequence or mapping object", tb
1236
1237     l = []
1238     if not doseq:
1239         # preserve old behavior
1240         for k, v in query:
1241             k = quote_plus(str(k))
1242             v = quote_plus(str(v))
1243             l.append(k + '=' + v)
1244     else:
1245         for k, v in query:
1246             k = quote_plus(str(k))
1247             if isinstance(v, str):
1248                 v = quote_plus(v)
1249                 l.append(k + '=' + v)
1250             elif _is_unicode(v):
1251                 # is there a reasonable way to convert to ASCII?
1252                 # encode generates a string, but "replace" or "ignore"
1253                 # lose information and "strict" can raise UnicodeError
1254                 v = quote_plus(v.encode("ASCII","replace"))
1255                 l.append(k + '=' + v)
1256             else:
1257                 try:
1258                     # is this a sufficient test for sequence-ness?
1259                     x = len(v)
1260                 except TypeError:
1261                     # not a sequence
1262                     v = quote_plus(str(v))
1263                     l.append(k + '=' + v)
1264                 else:
1265                     # loop over the sequence
1266                     for elt in v:
1267                         l.append(k + '=' + quote_plus(str(elt)))
1268     return '&'.join(l)
1269
1270 # Proxy handling
1271 def getproxies_environment():
1272     """Return a dictionary of scheme -> proxy server URL mappings.
1273
1274     Scan the environment for variables named <scheme>_proxy;
1275     this seems to be the standard convention.  If you need a
1276     different way, you can pass a proxies dictionary to the
1277     [Fancy]URLopener constructor.
1278
1279     """
1280     proxies = {}
1281     for name, value in os.environ.items():
1282         name = name.lower()
1283         if value and name[-6:] == '_proxy':
1284             proxies[name[:-6]] = value
1285     return proxies
1286
1287 if sys.platform == 'darwin':
1288     def getproxies_internetconfig():
1289         """Return a dictionary of scheme -> proxy server URL mappings.
1290
1291         By convention the mac uses Internet Config to store
1292         proxies.  An HTTP proxy, for instance, is stored under
1293         the HttpProxy key.
1294
1295         """
1296         try:
1297             import ic
1298         except ImportError:
1299             return {}
1300
1301         try:
1302             config = ic.IC()
1303         except ic.error:
1304             return {}
1305         proxies = {}
1306         # HTTP:
1307         if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1308             try:
1309                 value = config['HTTPProxyHost']
1310             except ic.error:
1311                 pass
1312             else:
1313                 proxies['http'] = 'http://%s' % value
1314         # FTP: XXXX To be done.
1315         # Gopher: XXXX To be done.
1316         return proxies
1317
1318     def proxy_bypass(x):
1319         return 0
1320
1321     def getproxies():
1322         return getproxies_environment() or getproxies_internetconfig()
1323
1324 elif os.name == 'nt':
1325     def getproxies_registry():
1326         """Return a dictionary of scheme -> proxy server URL mappings.
1327
1328         Win32 uses the registry to store proxies.
1329
1330         """
1331         proxies = {}
1332         try:
1333             import _winreg
1334         except ImportError:
1335             # Std module, so should be around - but you never know!
1336             return proxies
1337         try:
1338             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1339                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1340             proxyEnable = _winreg.QueryValueEx(internetSettings,
1341                                                'ProxyEnable')[0]
1342             if proxyEnable:
1343                 # Returned as Unicode but problems if not converted to ASCII
1344                 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1345                                                        'ProxyServer')[0])
1346                 if '=' in proxyServer:
1347                     # Per-protocol settings
1348                     for p in proxyServer.split(';'):
1349                         protocol, address = p.split('=', 1)
1350                         # See if address has a type:// prefix
1351                         import re
1352                         if not re.match('^([^/:]+)://', address):
1353                             address = '%s://%s' % (protocol, address)
1354                         proxies[protocol] = address
1355                 else:
1356                     # Use one setting for all protocols
1357                     if proxyServer[:5] == 'http:':
1358                         proxies['http'] = proxyServer
1359                     else:
1360                         proxies['http'] = 'http://%s' % proxyServer
1361                         proxies['ftp'] = 'ftp://%s' % proxyServer
1362             internetSettings.Close()
1363         except (WindowsError, ValueError, TypeError):
1364             # Either registry key not found etc, or the value in an
1365             # unexpected format.
1366             # proxies already set up to be empty so nothing to do
1367             pass
1368         return proxies
1369
1370     def getproxies():
1371         """Return a dictionary of scheme -> proxy server URL mappings.
1372
1373         Returns settings gathered from the environment, if specified,
1374         or the registry.
1375
1376         """
1377         return getproxies_environment() or getproxies_registry()
1378
1379     def proxy_bypass(host):
1380         try:
1381             import _winreg
1382             import re
1383         except ImportError:
1384             # Std modules, so should be around - but you never know!
1385             return 0
1386         try:
1387             internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1388                 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1389             proxyEnable = _winreg.QueryValueEx(internetSettings,
1390                                                'ProxyEnable')[0]
1391             proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1392                                                      'ProxyOverride')[0])
1393             # ^^^^ Returned as Unicode but problems if not converted to ASCII
1394         except WindowsError:
1395             return 0
1396         if not proxyEnable or not proxyOverride:
1397             return 0
1398         # try to make a host list from name and IP address.
1399         rawHost, port = splitport(host)
1400         host = [rawHost]
1401         try:
1402             addr = socket.gethostbyname(rawHost)
1403             if addr != rawHost:
1404                 host.append(addr)
1405         except socket.error:
1406             pass
1407         try:
1408             fqdn = socket.getfqdn(rawHost)
1409             if fqdn != rawHost:
1410                 host.append(fqdn)
1411         except socket.error:
1412             pass
1413         # make a check value list from the registry entry: replace the
1414         # '<local>' string by the localhost entry and the corresponding
1415         # canonical entry.
1416         proxyOverride = proxyOverride.split(';')
1417         i = 0
1418         while i < len(proxyOverride):
1419             if proxyOverride[i] == '<local>':
1420                 proxyOverride[i:i+1] = ['localhost',
1421                                         '127.0.0.1',
1422                                         socket.gethostname(),
1423                                         socket.gethostbyname(
1424                                             socket.gethostname())]
1425             i += 1
1426         # print proxyOverride
1427         # now check if we match one of the registry values.
1428         for test in proxyOverride:
1429             test = test.replace(".", r"\.")     # mask dots
1430             test = test.replace("*", r".*")     # change glob sequence
1431             test = test.replace("?", r".")      # change glob char
1432             for val in host:
1433                 # print "%s <--> %s" %( test, val )
1434                 if re.match(test, val, re.I):
1435                     return 1
1436         return 0
1437
1438 else:
1439     # By default use environment variables
1440     getproxies = getproxies_environment
1441
1442     def proxy_bypass(host):
1443         return 0
1444
1445 # Test and time quote() and unquote()
1446 def test1():
1447     s = ''
1448     for i in range(256): s = s + chr(i)
1449     s = s*4
1450     t0 = time.time()
1451     qs = quote(s)
1452     uqs = unquote(qs)
1453     t1 = time.time()
1454     if uqs != s:
1455         print 'Wrong!'
1456     print repr(s)
1457     print repr(qs)
1458     print repr(uqs)
1459     print round(t1 - t0, 3), 'sec'
1460
1461
1462 def reporthook(blocknum, blocksize, totalsize):
1463     # Report during remote transfers
1464     print "Block number: %d, Block size: %d, Total size: %d" % (
1465         blocknum, blocksize, totalsize)
1466
1467 # Test program
1468 def test(args=[]):
1469     if not args:
1470         args = [
1471             '/etc/passwd',
1472             'file:/etc/passwd',
1473             'file://localhost/etc/passwd',
1474             'ftp://ftp.python.org/pub/python/README',
1475 ##          'gopher://gopher.micro.umn.edu/1/',
1476             'http://www.python.org/index.html',
1477             ]
1478         if hasattr(URLopener, "open_https"):
1479             args.append('https://synergy.as.cmu.edu/~geek/')
1480     try:
1481         for url in args:
1482             print '-'*10, url, '-'*10
1483             fn, h = urlretrieve(url, None, reporthook)
1484             print fn
1485             if h:
1486                 print '======'
1487                 for k in h.keys(): print k + ':', h[k]
1488                 print '======'
1489             fp = open(fn, 'rb')
1490             data = fp.read()
1491             del fp
1492             if '\r' in data:
1493                 table = string.maketrans("", "")
1494                 data = data.translate(table, "\r")
1495             print data
1496             fn, h = None, None
1497         print '-'*40
1498     finally:
1499         urlcleanup()
1500
1501 def main():
1502     import getopt, sys
1503     try:
1504         opts, args = getopt.getopt(sys.argv[1:], "th")
1505     except getopt.error, msg:
1506         print msg
1507         print "Use -h for help"
1508         return
1509     t = 0
1510     for o, a in opts:
1511         if o == '-t':
1512             t = t + 1
1513         if o == '-h':
1514             print "Usage: python urllib.py [-t] [url ...]"
1515             print "-t runs self-test;",
1516             print "otherwise, contents of urls are printed"
1517             return
1518     if t:
1519         if t > 1:
1520             test1()
1521         test(args)
1522     else:
1523         if not args:
1524             print "Use -h for help"
1525         for url in args:
1526             print urlopen(url).read(),
1527
1528 # Run test program when run as a script
1529 if __name__ == '__main__':
1530     main()