see issue1006238, this merges in the following patch to ease cross
[python.git] / Lib / urllib.py
blobdb8e6160d45088ceee26822ae71b3c007a8f5ff2
1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
23 """
25 import string
26 import socket
27 import os
28 import time
29 import sys
30 from urlparse import urljoin as basejoin
31 import warnings
33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
35 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
39 "getproxies"]
41 __version__ = '1.17' # XXX This version is not always updated :-(
43 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
45 # Helper for non-unix systems
46 if os.name == 'mac':
47 from macurl2path import url2pathname, pathname2url
48 elif os.name == 'nt':
49 from nturl2path import url2pathname, pathname2url
50 elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
52 else:
53 def url2pathname(pathname):
54 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
56 return unquote(pathname)
58 def pathname2url(pathname):
59 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
61 return quote(pathname)
63 # This really consists of two pieces:
64 # (1) a class which handles opening of all sorts of URLs
65 # (plus assorted utilities etc.)
66 # (2) a set of functions for parsing URLs
67 # XXX Should these be separated out into different modules?
70 # Shortcut for basic usage
71 _urlopener = None
72 def urlopen(url, data=None, proxies=None):
73 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
78 global _urlopener
79 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
84 else:
85 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
90 def urlretrieve(url, filename=None, reporthook=None, data=None):
91 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
94 return _urlopener.retrieve(url, filename, reporthook, data)
95 def urlcleanup():
96 if _urlopener:
97 _urlopener.cleanup()
99 # check for SSL
100 try:
101 import ssl
102 except:
103 _have_ssl = False
104 else:
105 _have_ssl = True
107 # exception raised when downloaded size does not match content-length
108 class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
113 ftpcache = {}
114 class URLopener:
115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
122 __tempfiles = None
124 version = "Python-urllib/%s" % __version__
126 # Constructor
127 def __init__(self, proxies=None, **x509):
128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
134 self.addheaders = [('User-Agent', self.version)]
135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
150 def __del__(self):
151 self.close()
153 def close(self):
154 self.cleanup()
156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
164 except OSError:
165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
170 def addheader(self, *args):
171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
173 self.addheaders.append(args)
175 # External interface
176 def open(self, fullurl, data=None):
177 """Use URLopener().open(file) instead of open(file, 'r')."""
178 fullurl = unwrap(toBytes(fullurl))
179 # percent encode url, fixing lame server errors for e.g, like space
180 # within url paths.
181 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
182 if self.tempcache and fullurl in self.tempcache:
183 filename, headers = self.tempcache[fullurl]
184 fp = open(filename, 'rb')
185 return addinfourl(fp, headers, fullurl)
186 urltype, url = splittype(fullurl)
187 if not urltype:
188 urltype = 'file'
189 if urltype in self.proxies:
190 proxy = self.proxies[urltype]
191 urltype, proxyhost = splittype(proxy)
192 host, selector = splithost(proxyhost)
193 url = (host, fullurl) # Signal special case to open_*()
194 else:
195 proxy = None
196 name = 'open_' + urltype
197 self.type = urltype
198 name = name.replace('-', '_')
199 if not hasattr(self, name):
200 if proxy:
201 return self.open_unknown_proxy(proxy, fullurl, data)
202 else:
203 return self.open_unknown(fullurl, data)
204 try:
205 if data is None:
206 return getattr(self, name)(url)
207 else:
208 return getattr(self, name)(url, data)
209 except socket.error, msg:
210 raise IOError, ('socket error', msg), sys.exc_info()[2]
212 def open_unknown(self, fullurl, data=None):
213 """Overridable interface to open unknown URL type."""
214 type, url = splittype(fullurl)
215 raise IOError, ('url error', 'unknown url type', type)
217 def open_unknown_proxy(self, proxy, fullurl, data=None):
218 """Overridable interface to open unknown URL type."""
219 type, url = splittype(fullurl)
220 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
222 # External interface
223 def retrieve(self, url, filename=None, reporthook=None, data=None):
224 """retrieve(url) returns (filename, headers) for a local object
225 or (tempfilename, headers) for a remote object."""
226 url = unwrap(toBytes(url))
227 if self.tempcache and url in self.tempcache:
228 return self.tempcache[url]
229 type, url1 = splittype(url)
230 if filename is None and (not type or type == 'file'):
231 try:
232 fp = self.open_local_file(url1)
233 hdrs = fp.info()
234 del fp
235 return url2pathname(splithost(url1)[1]), hdrs
236 except IOError, msg:
237 pass
238 fp = self.open(url, data)
239 try:
240 headers = fp.info()
241 if filename:
242 tfp = open(filename, 'wb')
243 else:
244 import tempfile
245 garbage, path = splittype(url)
246 garbage, path = splithost(path or "")
247 path, garbage = splitquery(path or "")
248 path, garbage = splitattr(path or "")
249 suffix = os.path.splitext(path)[1]
250 (fd, filename) = tempfile.mkstemp(suffix)
251 self.__tempfiles.append(filename)
252 tfp = os.fdopen(fd, 'wb')
253 try:
254 result = filename, headers
255 if self.tempcache is not None:
256 self.tempcache[url] = result
257 bs = 1024*8
258 size = -1
259 read = 0
260 blocknum = 0
261 if reporthook:
262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
264 reporthook(blocknum, bs, size)
265 while 1:
266 block = fp.read(bs)
267 if block == "":
268 break
269 read += len(block)
270 tfp.write(block)
271 blocknum += 1
272 if reporthook:
273 reporthook(blocknum, bs, size)
274 finally:
275 tfp.close()
276 finally:
277 fp.close()
278 del fp
279 del tfp
281 # raise exception if actual size does not match content-length header
282 if size >= 0 and read < size:
283 raise ContentTooShortError("retrieval incomplete: got only %i out "
284 "of %i bytes" % (read, size), result)
286 return result
288 # Each method named open_<type> knows how to open that type of URL
290 def open_http(self, url, data=None):
291 """Use HTTP protocol."""
292 import httplib
293 user_passwd = None
294 proxy_passwd= None
295 if isinstance(url, str):
296 host, selector = splithost(url)
297 if host:
298 user_passwd, host = splituser(host)
299 host = unquote(host)
300 realhost = host
301 else:
302 host, selector = url
303 # check whether the proxy contains authorization information
304 proxy_passwd, host = splituser(host)
305 # now we proceed with the url we want to obtain
306 urltype, rest = splittype(selector)
307 url = rest
308 user_passwd = None
309 if urltype.lower() != 'http':
310 realhost = None
311 else:
312 realhost, rest = splithost(rest)
313 if realhost:
314 user_passwd, realhost = splituser(realhost)
315 if user_passwd:
316 selector = "%s://%s%s" % (urltype, realhost, rest)
317 if proxy_bypass(realhost):
318 host = realhost
320 #print "proxy via http:", host, selector
321 if not host: raise IOError, ('http error', 'no host given')
323 if proxy_passwd:
324 import base64
325 proxy_auth = base64.b64encode(proxy_passwd).strip()
326 else:
327 proxy_auth = None
329 if user_passwd:
330 import base64
331 auth = base64.b64encode(user_passwd).strip()
332 else:
333 auth = None
334 h = httplib.HTTP(host)
335 if data is not None:
336 h.putrequest('POST', selector)
337 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338 h.putheader('Content-Length', '%d' % len(data))
339 else:
340 h.putrequest('GET', selector)
341 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
342 if auth: h.putheader('Authorization', 'Basic %s' % auth)
343 if realhost: h.putheader('Host', realhost)
344 for args in self.addheaders: h.putheader(*args)
345 h.endheaders(data)
346 errcode, errmsg, headers = h.getreply()
347 fp = h.getfile()
348 if errcode == -1:
349 if fp: fp.close()
350 # something went wrong with the HTTP status line
351 raise IOError, ('http protocol error', 0,
352 'got a bad status line', None)
353 # According to RFC 2616, "2xx" code indicates that the client's
354 # request was successfully received, understood, and accepted.
355 if (200 <= errcode < 300):
356 return addinfourl(fp, headers, "http:" + url, errcode)
357 else:
358 if data is None:
359 return self.http_error(url, fp, errcode, errmsg, headers)
360 else:
361 return self.http_error(url, fp, errcode, errmsg, headers, data)
363 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
364 """Handle http errors.
365 Derived class can override this, or provide specific handlers
366 named http_error_DDD where DDD is the 3-digit error code."""
367 # First check if there's a specific handler for this error
368 name = 'http_error_%d' % errcode
369 if hasattr(self, name):
370 method = getattr(self, name)
371 if data is None:
372 result = method(url, fp, errcode, errmsg, headers)
373 else:
374 result = method(url, fp, errcode, errmsg, headers, data)
375 if result: return result
376 return self.http_error_default(url, fp, errcode, errmsg, headers)
378 def http_error_default(self, url, fp, errcode, errmsg, headers):
379 """Default error handler: close the connection and raise IOError."""
380 void = fp.read()
381 fp.close()
382 raise IOError, ('http error', errcode, errmsg, headers)
384 if _have_ssl:
385 def open_https(self, url, data=None):
386 """Use HTTPS protocol."""
388 import httplib
389 user_passwd = None
390 proxy_passwd = None
391 if isinstance(url, str):
392 host, selector = splithost(url)
393 if host:
394 user_passwd, host = splituser(host)
395 host = unquote(host)
396 realhost = host
397 else:
398 host, selector = url
399 # here, we determine, whether the proxy contains authorization information
400 proxy_passwd, host = splituser(host)
401 urltype, rest = splittype(selector)
402 url = rest
403 user_passwd = None
404 if urltype.lower() != 'https':
405 realhost = None
406 else:
407 realhost, rest = splithost(rest)
408 if realhost:
409 user_passwd, realhost = splituser(realhost)
410 if user_passwd:
411 selector = "%s://%s%s" % (urltype, realhost, rest)
412 #print "proxy via https:", host, selector
413 if not host: raise IOError, ('https error', 'no host given')
414 if proxy_passwd:
415 import base64
416 proxy_auth = base64.b64encode(proxy_passwd).strip()
417 else:
418 proxy_auth = None
419 if user_passwd:
420 import base64
421 auth = base64.b64encode(user_passwd).strip()
422 else:
423 auth = None
424 h = httplib.HTTPS(host, 0,
425 key_file=self.key_file,
426 cert_file=self.cert_file)
427 if data is not None:
428 h.putrequest('POST', selector)
429 h.putheader('Content-Type',
430 'application/x-www-form-urlencoded')
431 h.putheader('Content-Length', '%d' % len(data))
432 else:
433 h.putrequest('GET', selector)
434 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
435 if auth: h.putheader('Authorization', 'Basic %s' % auth)
436 if realhost: h.putheader('Host', realhost)
437 for args in self.addheaders: h.putheader(*args)
438 h.endheaders(data)
439 errcode, errmsg, headers = h.getreply()
440 fp = h.getfile()
441 if errcode == -1:
442 if fp: fp.close()
443 # something went wrong with the HTTP status line
444 raise IOError, ('http protocol error', 0,
445 'got a bad status line', None)
446 # According to RFC 2616, "2xx" code indicates that the client's
447 # request was successfully received, understood, and accepted.
448 if (200 <= errcode < 300):
449 return addinfourl(fp, headers, "https:" + url, errcode)
450 else:
451 if data is None:
452 return self.http_error(url, fp, errcode, errmsg, headers)
453 else:
454 return self.http_error(url, fp, errcode, errmsg, headers,
455 data)
457 def open_file(self, url):
458 """Use local file or FTP depending on form of URL."""
459 if not isinstance(url, str):
460 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
461 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
462 return self.open_ftp(url)
463 else:
464 return self.open_local_file(url)
466 def open_local_file(self, url):
467 """Use local file."""
468 import mimetypes, mimetools, email.utils
469 try:
470 from cStringIO import StringIO
471 except ImportError:
472 from StringIO import StringIO
473 host, file = splithost(url)
474 localname = url2pathname(file)
475 try:
476 stats = os.stat(localname)
477 except OSError, e:
478 raise IOError(e.errno, e.strerror, e.filename)
479 size = stats.st_size
480 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
481 mtype = mimetypes.guess_type(url)[0]
482 headers = mimetools.Message(StringIO(
483 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
484 (mtype or 'text/plain', size, modified)))
485 if not host:
486 urlfile = file
487 if file[:1] == '/':
488 urlfile = 'file://' + file
489 return addinfourl(open(localname, 'rb'),
490 headers, urlfile)
491 host, port = splitport(host)
492 if not port \
493 and socket.gethostbyname(host) in (localhost(), thishost()):
494 urlfile = file
495 if file[:1] == '/':
496 urlfile = 'file://' + file
497 return addinfourl(open(localname, 'rb'),
498 headers, urlfile)
499 raise IOError, ('local file error', 'not on local host')
501 def open_ftp(self, url):
502 """Use FTP protocol."""
503 if not isinstance(url, str):
504 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
505 import mimetypes, mimetools
506 try:
507 from cStringIO import StringIO
508 except ImportError:
509 from StringIO import StringIO
510 host, path = splithost(url)
511 if not host: raise IOError, ('ftp error', 'no host given')
512 host, port = splitport(host)
513 user, host = splituser(host)
514 if user: user, passwd = splitpasswd(user)
515 else: passwd = None
516 host = unquote(host)
517 user = unquote(user or '')
518 passwd = unquote(passwd or '')
519 host = socket.gethostbyname(host)
520 if not port:
521 import ftplib
522 port = ftplib.FTP_PORT
523 else:
524 port = int(port)
525 path, attrs = splitattr(path)
526 path = unquote(path)
527 dirs = path.split('/')
528 dirs, file = dirs[:-1], dirs[-1]
529 if dirs and not dirs[0]: dirs = dirs[1:]
530 if dirs and not dirs[0]: dirs[0] = '/'
531 key = user, host, port, '/'.join(dirs)
532 # XXX thread unsafe!
533 if len(self.ftpcache) > MAXFTPCACHE:
534 # Prune the cache, rather arbitrarily
535 for k in self.ftpcache.keys():
536 if k != key:
537 v = self.ftpcache[k]
538 del self.ftpcache[k]
539 v.close()
540 try:
541 if not key in self.ftpcache:
542 self.ftpcache[key] = \
543 ftpwrapper(user, passwd, host, port, dirs)
544 if not file: type = 'D'
545 else: type = 'I'
546 for attr in attrs:
547 attr, value = splitvalue(attr)
548 if attr.lower() == 'type' and \
549 value in ('a', 'A', 'i', 'I', 'd', 'D'):
550 type = value.upper()
551 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
552 mtype = mimetypes.guess_type("ftp:" + url)[0]
553 headers = ""
554 if mtype:
555 headers += "Content-Type: %s\n" % mtype
556 if retrlen is not None and retrlen >= 0:
557 headers += "Content-Length: %d\n" % retrlen
558 headers = mimetools.Message(StringIO(headers))
559 return addinfourl(fp, headers, "ftp:" + url)
560 except ftperrors(), msg:
561 raise IOError, ('ftp error', msg), sys.exc_info()[2]
563 def open_data(self, url, data=None):
564 """Use "data" URL."""
565 if not isinstance(url, str):
566 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
567 # ignore POSTed data
569 # syntax of data URLs:
570 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
571 # mediatype := [ type "/" subtype ] *( ";" parameter )
572 # data := *urlchar
573 # parameter := attribute "=" value
574 import mimetools
575 try:
576 from cStringIO import StringIO
577 except ImportError:
578 from StringIO import StringIO
579 try:
580 [type, data] = url.split(',', 1)
581 except ValueError:
582 raise IOError, ('data error', 'bad data URL')
583 if not type:
584 type = 'text/plain;charset=US-ASCII'
585 semi = type.rfind(';')
586 if semi >= 0 and '=' not in type[semi:]:
587 encoding = type[semi+1:]
588 type = type[:semi]
589 else:
590 encoding = ''
591 msg = []
592 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
593 time.gmtime(time.time())))
594 msg.append('Content-type: %s' % type)
595 if encoding == 'base64':
596 import base64
597 data = base64.decodestring(data)
598 else:
599 data = unquote(data)
600 msg.append('Content-Length: %d' % len(data))
601 msg.append('')
602 msg.append(data)
603 msg = '\n'.join(msg)
604 f = StringIO(msg)
605 headers = mimetools.Message(f, 0)
606 #f.fileno = None # needed for addinfourl
607 return addinfourl(f, headers, url)
610 class FancyURLopener(URLopener):
611 """Derived class with handlers for errors we can handle (perhaps)."""
613 def __init__(self, *args, **kwargs):
614 URLopener.__init__(self, *args, **kwargs)
615 self.auth_cache = {}
616 self.tries = 0
617 self.maxtries = 10
619 def http_error_default(self, url, fp, errcode, errmsg, headers):
620 """Default error handling -- don't raise an exception."""
621 return addinfourl(fp, headers, "http:" + url, errcode)
623 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
624 """Error 302 -- relocated (temporarily)."""
625 self.tries += 1
626 if self.maxtries and self.tries >= self.maxtries:
627 if hasattr(self, "http_error_500"):
628 meth = self.http_error_500
629 else:
630 meth = self.http_error_default
631 self.tries = 0
632 return meth(url, fp, 500,
633 "Internal Server Error: Redirect Recursion", headers)
634 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
635 data)
636 self.tries = 0
637 return result
639 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
640 if 'location' in headers:
641 newurl = headers['location']
642 elif 'uri' in headers:
643 newurl = headers['uri']
644 else:
645 return
646 void = fp.read()
647 fp.close()
648 # In case the server sent a relative URL, join with original:
649 newurl = basejoin(self.type + ":" + url, newurl)
650 return self.open(newurl)
652 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
653 """Error 301 -- also relocated (permanently)."""
654 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
656 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
657 """Error 303 -- also relocated (essentially identical to 302)."""
658 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
660 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
661 """Error 307 -- relocated, but turn POST into error."""
662 if data is None:
663 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
664 else:
665 return self.http_error_default(url, fp, errcode, errmsg, headers)
667 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
668 """Error 401 -- authentication required.
669 This function supports Basic authentication only."""
670 if not 'www-authenticate' in headers:
671 URLopener.http_error_default(self, url, fp,
672 errcode, errmsg, headers)
673 stuff = headers['www-authenticate']
674 import re
675 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
676 if not match:
677 URLopener.http_error_default(self, url, fp,
678 errcode, errmsg, headers)
679 scheme, realm = match.groups()
680 if scheme.lower() != 'basic':
681 URLopener.http_error_default(self, url, fp,
682 errcode, errmsg, headers)
683 name = 'retry_' + self.type + '_basic_auth'
684 if data is None:
685 return getattr(self,name)(url, realm)
686 else:
687 return getattr(self,name)(url, realm, data)
689 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
690 """Error 407 -- proxy authentication required.
691 This function supports Basic authentication only."""
692 if not 'proxy-authenticate' in headers:
693 URLopener.http_error_default(self, url, fp,
694 errcode, errmsg, headers)
695 stuff = headers['proxy-authenticate']
696 import re
697 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
698 if not match:
699 URLopener.http_error_default(self, url, fp,
700 errcode, errmsg, headers)
701 scheme, realm = match.groups()
702 if scheme.lower() != 'basic':
703 URLopener.http_error_default(self, url, fp,
704 errcode, errmsg, headers)
705 name = 'retry_proxy_' + self.type + '_basic_auth'
706 if data is None:
707 return getattr(self,name)(url, realm)
708 else:
709 return getattr(self,name)(url, realm, data)
711 def retry_proxy_http_basic_auth(self, url, realm, data=None):
712 host, selector = splithost(url)
713 newurl = 'http://' + host + selector
714 proxy = self.proxies['http']
715 urltype, proxyhost = splittype(proxy)
716 proxyhost, proxyselector = splithost(proxyhost)
717 i = proxyhost.find('@') + 1
718 proxyhost = proxyhost[i:]
719 user, passwd = self.get_user_passwd(proxyhost, realm, i)
720 if not (user or passwd): return None
721 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
722 self.proxies['http'] = 'http://' + proxyhost + proxyselector
723 if data is None:
724 return self.open(newurl)
725 else:
726 return self.open(newurl, data)
728 def retry_proxy_https_basic_auth(self, url, realm, data=None):
729 host, selector = splithost(url)
730 newurl = 'https://' + host + selector
731 proxy = self.proxies['https']
732 urltype, proxyhost = splittype(proxy)
733 proxyhost, proxyselector = splithost(proxyhost)
734 i = proxyhost.find('@') + 1
735 proxyhost = proxyhost[i:]
736 user, passwd = self.get_user_passwd(proxyhost, realm, i)
737 if not (user or passwd): return None
738 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
739 self.proxies['https'] = 'https://' + proxyhost + proxyselector
740 if data is None:
741 return self.open(newurl)
742 else:
743 return self.open(newurl, data)
745 def retry_http_basic_auth(self, url, realm, data=None):
746 host, selector = splithost(url)
747 i = host.find('@') + 1
748 host = host[i:]
749 user, passwd = self.get_user_passwd(host, realm, i)
750 if not (user or passwd): return None
751 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
752 newurl = 'http://' + host + selector
753 if data is None:
754 return self.open(newurl)
755 else:
756 return self.open(newurl, data)
758 def retry_https_basic_auth(self, url, realm, data=None):
759 host, selector = splithost(url)
760 i = host.find('@') + 1
761 host = host[i:]
762 user, passwd = self.get_user_passwd(host, realm, i)
763 if not (user or passwd): return None
764 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
765 newurl = 'https://' + host + selector
766 if data is None:
767 return self.open(newurl)
768 else:
769 return self.open(newurl, data)
771 def get_user_passwd(self, host, realm, clear_cache = 0):
772 key = realm + '@' + host.lower()
773 if key in self.auth_cache:
774 if clear_cache:
775 del self.auth_cache[key]
776 else:
777 return self.auth_cache[key]
778 user, passwd = self.prompt_user_passwd(host, realm)
779 if user or passwd: self.auth_cache[key] = (user, passwd)
780 return user, passwd
782 def prompt_user_passwd(self, host, realm):
783 """Override this in a GUI environment!"""
784 import getpass
785 try:
786 user = raw_input("Enter username for %s at %s: " % (realm,
787 host))
788 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
789 (user, realm, host))
790 return user, passwd
791 except KeyboardInterrupt:
792 print
793 return None, None
796 # Utility functions
798 _localhost = None
799 def localhost():
800 """Return the IP address of the magic hostname 'localhost'."""
801 global _localhost
802 if _localhost is None:
803 _localhost = socket.gethostbyname('localhost')
804 return _localhost
806 _thishost = None
807 def thishost():
808 """Return the IP address of the current host."""
809 global _thishost
810 if _thishost is None:
811 _thishost = socket.gethostbyname(socket.gethostname())
812 return _thishost
814 _ftperrors = None
815 def ftperrors():
816 """Return the set of errors raised by the FTP class."""
817 global _ftperrors
818 if _ftperrors is None:
819 import ftplib
820 _ftperrors = ftplib.all_errors
821 return _ftperrors
823 _noheaders = None
824 def noheaders():
825 """Return an empty mimetools.Message object."""
826 global _noheaders
827 if _noheaders is None:
828 import mimetools
829 try:
830 from cStringIO import StringIO
831 except ImportError:
832 from StringIO import StringIO
833 _noheaders = mimetools.Message(StringIO(), 0)
834 _noheaders.fp.close() # Recycle file descriptor
835 return _noheaders
838 # Utility classes
840 class ftpwrapper:
841 """Class used by open_ftp() for cache of open FTP connections."""
843 def __init__(self, user, passwd, host, port, dirs,
844 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
845 self.user = user
846 self.passwd = passwd
847 self.host = host
848 self.port = port
849 self.dirs = dirs
850 self.timeout = timeout
851 self.init()
853 def init(self):
854 import ftplib
855 self.busy = 0
856 self.ftp = ftplib.FTP()
857 self.ftp.connect(self.host, self.port, self.timeout)
858 self.ftp.login(self.user, self.passwd)
859 for dir in self.dirs:
860 self.ftp.cwd(dir)
862 def retrfile(self, file, type):
863 import ftplib
864 self.endtransfer()
865 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
866 else: cmd = 'TYPE ' + type; isdir = 0
867 try:
868 self.ftp.voidcmd(cmd)
869 except ftplib.all_errors:
870 self.init()
871 self.ftp.voidcmd(cmd)
872 conn = None
873 if file and not isdir:
874 # Try to retrieve as a file
875 try:
876 cmd = 'RETR ' + file
877 conn = self.ftp.ntransfercmd(cmd)
878 except ftplib.error_perm, reason:
879 if str(reason)[:3] != '550':
880 raise IOError, ('ftp error', reason), sys.exc_info()[2]
881 if not conn:
882 # Set transfer mode to ASCII!
883 self.ftp.voidcmd('TYPE A')
884 # Try a directory listing. Verify that directory exists.
885 if file:
886 pwd = self.ftp.pwd()
887 try:
888 try:
889 self.ftp.cwd(file)
890 except ftplib.error_perm, reason:
891 raise IOError, ('ftp error', reason), sys.exc_info()[2]
892 finally:
893 self.ftp.cwd(pwd)
894 cmd = 'LIST ' + file
895 else:
896 cmd = 'LIST'
897 conn = self.ftp.ntransfercmd(cmd)
898 self.busy = 1
899 # Pass back both a suitably decorated object and a retrieval length
900 return (addclosehook(conn[0].makefile('rb'),
901 self.endtransfer), conn[1])
902 def endtransfer(self):
903 if not self.busy:
904 return
905 self.busy = 0
906 try:
907 self.ftp.voidresp()
908 except ftperrors():
909 pass
911 def close(self):
912 self.endtransfer()
913 try:
914 self.ftp.close()
915 except ftperrors():
916 pass
918 class addbase:
919 """Base class for addinfo and addclosehook."""
921 def __init__(self, fp):
922 self.fp = fp
923 self.read = self.fp.read
924 self.readline = self.fp.readline
925 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
926 if hasattr(self.fp, "fileno"):
927 self.fileno = self.fp.fileno
928 else:
929 self.fileno = lambda: None
930 if hasattr(self.fp, "__iter__"):
931 self.__iter__ = self.fp.__iter__
932 if hasattr(self.fp, "next"):
933 self.next = self.fp.next
935 def __repr__(self):
936 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
937 id(self), self.fp)
939 def close(self):
940 self.read = None
941 self.readline = None
942 self.readlines = None
943 self.fileno = None
944 if self.fp: self.fp.close()
945 self.fp = None
947 class addclosehook(addbase):
948 """Class to add a close hook to an open file."""
950 def __init__(self, fp, closehook, *hookargs):
951 addbase.__init__(self, fp)
952 self.closehook = closehook
953 self.hookargs = hookargs
955 def close(self):
956 addbase.close(self)
957 if self.closehook:
958 self.closehook(*self.hookargs)
959 self.closehook = None
960 self.hookargs = None
962 class addinfo(addbase):
963 """class to add an info() method to an open file."""
965 def __init__(self, fp, headers):
966 addbase.__init__(self, fp)
967 self.headers = headers
969 def info(self):
970 return self.headers
972 class addinfourl(addbase):
973 """class to add info() and geturl() methods to an open file."""
975 def __init__(self, fp, headers, url, code=None):
976 addbase.__init__(self, fp)
977 self.headers = headers
978 self.url = url
979 self.code = code
981 def info(self):
982 return self.headers
984 def getcode(self):
985 return self.code
987 def geturl(self):
988 return self.url
991 # Utilities to parse URLs (most of these return None for missing parts):
992 # unwrap('<URL:type://host/path>') --> 'type://host/path'
993 # splittype('type:opaquestring') --> 'type', 'opaquestring'
994 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
995 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
996 # splitpasswd('user:passwd') -> 'user', 'passwd'
997 # splitport('host:port') --> 'host', 'port'
998 # splitquery('/path?query') --> '/path', 'query'
999 # splittag('/path#tag') --> '/path', 'tag'
1000 # splitattr('/path;attr1=value1;attr2=value2;...') ->
1001 # '/path', ['attr1=value1', 'attr2=value2', ...]
1002 # splitvalue('attr=value') --> 'attr', 'value'
1003 # unquote('abc%20def') -> 'abc def'
1004 # quote('abc def') -> 'abc%20def')
1006 try:
1007 unicode
1008 except NameError:
1009 def _is_unicode(x):
1010 return 0
1011 else:
1012 def _is_unicode(x):
1013 return isinstance(x, unicode)
1015 def toBytes(url):
1016 """toBytes(u"URL") --> 'URL'."""
1017 # Most URL schemes require ASCII. If that changes, the conversion
1018 # can be relaxed
1019 if _is_unicode(url):
1020 try:
1021 url = url.encode("ASCII")
1022 except UnicodeError:
1023 raise UnicodeError("URL " + repr(url) +
1024 " contains non-ASCII characters")
1025 return url
1027 def unwrap(url):
1028 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1029 url = url.strip()
1030 if url[:1] == '<' and url[-1:] == '>':
1031 url = url[1:-1].strip()
1032 if url[:4] == 'URL:': url = url[4:].strip()
1033 return url
1035 _typeprog = None
1036 def splittype(url):
1037 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1038 global _typeprog
1039 if _typeprog is None:
1040 import re
1041 _typeprog = re.compile('^([^/:]+):')
1043 match = _typeprog.match(url)
1044 if match:
1045 scheme = match.group(1)
1046 return scheme.lower(), url[len(scheme) + 1:]
1047 return None, url
1049 _hostprog = None
1050 def splithost(url):
1051 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1052 global _hostprog
1053 if _hostprog is None:
1054 import re
1055 _hostprog = re.compile('^//([^/?]*)(.*)$')
1057 match = _hostprog.match(url)
1058 if match: return match.group(1, 2)
1059 return None, url
1061 _userprog = None
1062 def splituser(host):
1063 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1064 global _userprog
1065 if _userprog is None:
1066 import re
1067 _userprog = re.compile('^(.*)@(.*)$')
1069 match = _userprog.match(host)
1070 if match: return map(unquote, match.group(1, 2))
1071 return None, host
1073 _passwdprog = None
1074 def splitpasswd(user):
1075 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1076 global _passwdprog
1077 if _passwdprog is None:
1078 import re
1079 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1081 match = _passwdprog.match(user)
1082 if match: return match.group(1, 2)
1083 return user, None
1085 # splittag('/path#tag') --> '/path', 'tag'
1086 _portprog = None
1087 def splitport(host):
1088 """splitport('host:port') --> 'host', 'port'."""
1089 global _portprog
1090 if _portprog is None:
1091 import re
1092 _portprog = re.compile('^(.*):([0-9]+)$')
1094 match = _portprog.match(host)
1095 if match: return match.group(1, 2)
1096 return host, None
1098 _nportprog = None
1099 def splitnport(host, defport=-1):
1100 """Split host and port, returning numeric port.
1101 Return given default port if no ':' found; defaults to -1.
1102 Return numerical port if a valid number are found after ':'.
1103 Return None if ':' but not a valid number."""
1104 global _nportprog
1105 if _nportprog is None:
1106 import re
1107 _nportprog = re.compile('^(.*):(.*)$')
1109 match = _nportprog.match(host)
1110 if match:
1111 host, port = match.group(1, 2)
1112 try:
1113 if not port: raise ValueError, "no digits"
1114 nport = int(port)
1115 except ValueError:
1116 nport = None
1117 return host, nport
1118 return host, defport
1120 _queryprog = None
1121 def splitquery(url):
1122 """splitquery('/path?query') --> '/path', 'query'."""
1123 global _queryprog
1124 if _queryprog is None:
1125 import re
1126 _queryprog = re.compile('^(.*)\?([^?]*)$')
1128 match = _queryprog.match(url)
1129 if match: return match.group(1, 2)
1130 return url, None
1132 _tagprog = None
1133 def splittag(url):
1134 """splittag('/path#tag') --> '/path', 'tag'."""
1135 global _tagprog
1136 if _tagprog is None:
1137 import re
1138 _tagprog = re.compile('^(.*)#([^#]*)$')
1140 match = _tagprog.match(url)
1141 if match: return match.group(1, 2)
1142 return url, None
1144 def splitattr(url):
1145 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1146 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1147 words = url.split(';')
1148 return words[0], words[1:]
1150 _valueprog = None
1151 def splitvalue(attr):
1152 """splitvalue('attr=value') --> 'attr', 'value'."""
1153 global _valueprog
1154 if _valueprog is None:
1155 import re
1156 _valueprog = re.compile('^([^=]*)=(.*)$')
1158 match = _valueprog.match(attr)
1159 if match: return match.group(1, 2)
1160 return attr, None
1162 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1163 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1165 def unquote(s):
1166 """unquote('abc%20def') -> 'abc def'."""
1167 res = s.split('%')
1168 for i in xrange(1, len(res)):
1169 item = res[i]
1170 try:
1171 res[i] = _hextochr[item[:2]] + item[2:]
1172 except KeyError:
1173 res[i] = '%' + item
1174 except UnicodeDecodeError:
1175 res[i] = unichr(int(item[:2], 16)) + item[2:]
1176 return "".join(res)
1178 def unquote_plus(s):
1179 """unquote('%7e/abc+def') -> '~/abc def'"""
1180 s = s.replace('+', ' ')
1181 return unquote(s)
1183 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1184 'abcdefghijklmnopqrstuvwxyz'
1185 '0123456789' '_.-')
1186 _safemaps = {}
1188 def quote(s, safe = '/'):
1189 """quote('abc def') -> 'abc%20def'
1191 Each part of a URL, e.g. the path info, the query, etc., has a
1192 different set of reserved characters that must be quoted.
1194 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1195 the following reserved characters.
1197 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1198 "$" | ","
1200 Each of these characters is reserved in some component of a URL,
1201 but not necessarily in all of them.
1203 By default, the quote function is intended for quoting the path
1204 section of a URL. Thus, it will not encode '/'. This character
1205 is reserved, but in typical usage the quote function is being
1206 called on a path where the existing slash characters are used as
1207 reserved characters.
1209 cachekey = (safe, always_safe)
1210 try:
1211 safe_map = _safemaps[cachekey]
1212 except KeyError:
1213 safe += always_safe
1214 safe_map = {}
1215 for i in range(256):
1216 c = chr(i)
1217 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1218 _safemaps[cachekey] = safe_map
1219 res = map(safe_map.__getitem__, s)
1220 return ''.join(res)
1222 def quote_plus(s, safe = ''):
1223 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1224 if ' ' in s:
1225 s = quote(s, safe + ' ')
1226 return s.replace(' ', '+')
1227 return quote(s, safe)
1229 def urlencode(query,doseq=0):
1230 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1232 If any values in the query arg are sequences and doseq is true, each
1233 sequence element is converted to a separate parameter.
1235 If the query arg is a sequence of two-element tuples, the order of the
1236 parameters in the output will match the order of parameters in the
1237 input.
1240 if hasattr(query,"items"):
1241 # mapping objects
1242 query = query.items()
1243 else:
1244 # it's a bother at times that strings and string-like objects are
1245 # sequences...
1246 try:
1247 # non-sequence items should not work with len()
1248 # non-empty strings will fail this
1249 if len(query) and not isinstance(query[0], tuple):
1250 raise TypeError
1251 # zero-length sequences of all types will get here and succeed,
1252 # but that's a minor nit - since the original implementation
1253 # allowed empty dicts that type of behavior probably should be
1254 # preserved for consistency
1255 except TypeError:
1256 ty,va,tb = sys.exc_info()
1257 raise TypeError, "not a valid non-string sequence or mapping object", tb
1259 l = []
1260 if not doseq:
1261 # preserve old behavior
1262 for k, v in query:
1263 k = quote_plus(str(k))
1264 v = quote_plus(str(v))
1265 l.append(k + '=' + v)
1266 else:
1267 for k, v in query:
1268 k = quote_plus(str(k))
1269 if isinstance(v, str):
1270 v = quote_plus(v)
1271 l.append(k + '=' + v)
1272 elif _is_unicode(v):
1273 # is there a reasonable way to convert to ASCII?
1274 # encode generates a string, but "replace" or "ignore"
1275 # lose information and "strict" can raise UnicodeError
1276 v = quote_plus(v.encode("ASCII","replace"))
1277 l.append(k + '=' + v)
1278 else:
1279 try:
1280 # is this a sufficient test for sequence-ness?
1281 x = len(v)
1282 except TypeError:
1283 # not a sequence
1284 v = quote_plus(str(v))
1285 l.append(k + '=' + v)
1286 else:
1287 # loop over the sequence
1288 for elt in v:
1289 l.append(k + '=' + quote_plus(str(elt)))
1290 return '&'.join(l)
1292 # Proxy handling
1293 def getproxies_environment():
1294 """Return a dictionary of scheme -> proxy server URL mappings.
1296 Scan the environment for variables named <scheme>_proxy;
1297 this seems to be the standard convention. If you need a
1298 different way, you can pass a proxies dictionary to the
1299 [Fancy]URLopener constructor.
1302 proxies = {}
1303 for name, value in os.environ.items():
1304 name = name.lower()
1305 if value and name[-6:] == '_proxy':
1306 proxies[name[:-6]] = value
1307 return proxies
1309 def proxy_bypass_environment(host):
1310 """Test if proxies should not be used for a particular host.
1312 Checks the environment for a variable named no_proxy, which should
1313 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1315 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1316 # '*' is special case for always bypass
1317 if no_proxy == '*':
1318 return 1
1319 # strip port off host
1320 hostonly, port = splitport(host)
1321 # check if the host ends with any of the DNS suffixes
1322 for name in no_proxy.split(','):
1323 if name and (hostonly.endswith(name) or host.endswith(name)):
1324 return 1
1325 # otherwise, don't bypass
1326 return 0
1329 if sys.platform == 'darwin':
1330 from _scproxy import _get_proxy_settings, _get_proxies
1332 def proxy_bypass_macosx_sysconf(host):
1334 Return True iff this host shouldn't be accessed using a proxy
1336 This function uses the MacOSX framework SystemConfiguration
1337 to fetch the proxy information.
1339 import re
1340 import socket
1341 from fnmatch import fnmatch
1343 hostonly, port = splitport(host)
1345 def ip2num(ipAddr):
1346 parts = ipAddr.split('.')
1347 parts = map(int, parts)
1348 if len(parts) != 4:
1349 parts = (parts + [0, 0, 0, 0])[:4]
1350 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1352 proxy_settings = _get_proxy_settings()
1354 # Check for simple host names:
1355 if '.' not in host:
1356 if proxy_settings['exclude_simple']:
1357 return True
1359 hostIP = None
1361 for value in proxy_settings.get('exceptions', ()):
1362 # Items in the list are strings like these: *.local, 169.254/16
1363 if not value: continue
1365 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1366 if m is not None:
1367 if hostIP is None:
1368 try:
1369 hostIP = socket.gethostbyname(hostonly)
1370 hostIP = ip2num(hostIP)
1371 except socket.error:
1372 continue
1374 base = ip2num(m.group(1))
1375 mask = int(m.group(2)[1:])
1376 mask = 32 - mask
1378 if (hostIP >> mask) == (base >> mask):
1379 return True
1381 elif fnmatch(host, value):
1382 return True
1384 return False
1387 def getproxies_macosx_sysconf():
1388 """Return a dictionary of scheme -> proxy server URL mappings.
1390 This function uses the MacOSX framework SystemConfiguration
1391 to fetch the proxy information.
1393 return _get_proxies()
1397 def proxy_bypass(host):
1398 if getproxies_environment():
1399 return proxy_bypass_environment(host)
1400 else:
1401 return proxy_bypass_macosx_sysconf(host)
1403 def getproxies():
1404 return getproxies_environment() or getproxies_macosx_sysconf()
1406 elif os.name == 'nt':
1407 def getproxies_registry():
1408 """Return a dictionary of scheme -> proxy server URL mappings.
1410 Win32 uses the registry to store proxies.
1413 proxies = {}
1414 try:
1415 import _winreg
1416 except ImportError:
1417 # Std module, so should be around - but you never know!
1418 return proxies
1419 try:
1420 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1421 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1422 proxyEnable = _winreg.QueryValueEx(internetSettings,
1423 'ProxyEnable')[0]
1424 if proxyEnable:
1425 # Returned as Unicode but problems if not converted to ASCII
1426 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1427 'ProxyServer')[0])
1428 if '=' in proxyServer:
1429 # Per-protocol settings
1430 for p in proxyServer.split(';'):
1431 protocol, address = p.split('=', 1)
1432 # See if address has a type:// prefix
1433 import re
1434 if not re.match('^([^/:]+)://', address):
1435 address = '%s://%s' % (protocol, address)
1436 proxies[protocol] = address
1437 else:
1438 # Use one setting for all protocols
1439 if proxyServer[:5] == 'http:':
1440 proxies['http'] = proxyServer
1441 else:
1442 proxies['http'] = 'http://%s' % proxyServer
1443 proxies['ftp'] = 'ftp://%s' % proxyServer
1444 internetSettings.Close()
1445 except (WindowsError, ValueError, TypeError):
1446 # Either registry key not found etc, or the value in an
1447 # unexpected format.
1448 # proxies already set up to be empty so nothing to do
1449 pass
1450 return proxies
1452 def getproxies():
1453 """Return a dictionary of scheme -> proxy server URL mappings.
1455 Returns settings gathered from the environment, if specified,
1456 or the registry.
1459 return getproxies_environment() or getproxies_registry()
1461 def proxy_bypass_registry(host):
1462 try:
1463 import _winreg
1464 import re
1465 except ImportError:
1466 # Std modules, so should be around - but you never know!
1467 return 0
1468 try:
1469 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1470 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1471 proxyEnable = _winreg.QueryValueEx(internetSettings,
1472 'ProxyEnable')[0]
1473 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1474 'ProxyOverride')[0])
1475 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1476 except WindowsError:
1477 return 0
1478 if not proxyEnable or not proxyOverride:
1479 return 0
1480 # try to make a host list from name and IP address.
1481 rawHost, port = splitport(host)
1482 host = [rawHost]
1483 try:
1484 addr = socket.gethostbyname(rawHost)
1485 if addr != rawHost:
1486 host.append(addr)
1487 except socket.error:
1488 pass
1489 try:
1490 fqdn = socket.getfqdn(rawHost)
1491 if fqdn != rawHost:
1492 host.append(fqdn)
1493 except socket.error:
1494 pass
1495 # make a check value list from the registry entry: replace the
1496 # '<local>' string by the localhost entry and the corresponding
1497 # canonical entry.
1498 proxyOverride = proxyOverride.split(';')
1499 # now check if we match one of the registry values.
1500 for test in proxyOverride:
1501 if test == '<local>':
1502 if '.' not in rawHost:
1503 return 1
1504 test = test.replace(".", r"\.") # mask dots
1505 test = test.replace("*", r".*") # change glob sequence
1506 test = test.replace("?", r".") # change glob char
1507 for val in host:
1508 # print "%s <--> %s" %( test, val )
1509 if re.match(test, val, re.I):
1510 return 1
1511 return 0
1513 def proxy_bypass(host):
1514 """Return a dictionary of scheme -> proxy server URL mappings.
1516 Returns settings gathered from the environment, if specified,
1517 or the registry.
1520 if getproxies_environment():
1521 return proxy_bypass_environment(host)
1522 else:
1523 return proxy_bypass_registry(host)
1525 else:
1526 # By default use environment variables
1527 getproxies = getproxies_environment
1528 proxy_bypass = proxy_bypass_environment
1530 # Test and time quote() and unquote()
1531 def test1():
1532 s = ''
1533 for i in range(256): s = s + chr(i)
1534 s = s*4
1535 t0 = time.time()
1536 qs = quote(s)
1537 uqs = unquote(qs)
1538 t1 = time.time()
1539 if uqs != s:
1540 print 'Wrong!'
1541 print repr(s)
1542 print repr(qs)
1543 print repr(uqs)
1544 print round(t1 - t0, 3), 'sec'
1547 def reporthook(blocknum, blocksize, totalsize):
1548 # Report during remote transfers
1549 print "Block number: %d, Block size: %d, Total size: %d" % (
1550 blocknum, blocksize, totalsize)
1552 # Test program
1553 def test(args=[]):
1554 if not args:
1555 args = [
1556 '/etc/passwd',
1557 'file:/etc/passwd',
1558 'file://localhost/etc/passwd',
1559 'ftp://ftp.gnu.org/pub/README',
1560 'http://www.python.org/index.html',
1562 if hasattr(URLopener, "open_https"):
1563 args.append('https://synergy.as.cmu.edu/~geek/')
1564 try:
1565 for url in args:
1566 print '-'*10, url, '-'*10
1567 fn, h = urlretrieve(url, None, reporthook)
1568 print fn
1569 if h:
1570 print '======'
1571 for k in h.keys(): print k + ':', h[k]
1572 print '======'
1573 fp = open(fn, 'rb')
1574 data = fp.read()
1575 del fp
1576 if '\r' in data:
1577 table = string.maketrans("", "")
1578 data = data.translate(table, "\r")
1579 print data
1580 fn, h = None, None
1581 print '-'*40
1582 finally:
1583 urlcleanup()
1585 def main():
1586 import getopt, sys
1587 try:
1588 opts, args = getopt.getopt(sys.argv[1:], "th")
1589 except getopt.error, msg:
1590 print msg
1591 print "Use -h for help"
1592 return
1593 t = 0
1594 for o, a in opts:
1595 if o == '-t':
1596 t = t + 1
1597 if o == '-h':
1598 print "Usage: python urllib.py [-t] [url ...]"
1599 print "-t runs self-test;",
1600 print "otherwise, contents of urls are printed"
1601 return
1602 if t:
1603 if t > 1:
1604 test1()
1605 test(args)
1606 else:
1607 if not args:
1608 print "Use -h for help"
1609 for url in args:
1610 print urlopen(url).read(),
1612 # Run test program when run as a script
1613 if __name__ == '__main__':
1614 main()