Add missing issue number in Misc/NEWS entry.
[python.git] / Lib / urllib.py
blob1c31d48f9c314010a813f73b64bb67203d25b0a8
1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
23 """
25 import string
26 import socket
27 import os
28 import time
29 import sys
30 from urlparse import urljoin as basejoin
31 import warnings
33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
35 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
39 "getproxies"]
41 __version__ = '1.17' # XXX This version is not always updated :-(
43 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
45 # Helper for non-unix systems
46 if os.name == 'mac':
47 from macurl2path import url2pathname, pathname2url
48 elif os.name == 'nt':
49 from nturl2path import url2pathname, pathname2url
50 elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
52 else:
53 def url2pathname(pathname):
54 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
56 return unquote(pathname)
58 def pathname2url(pathname):
59 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
61 return quote(pathname)
63 # This really consists of two pieces:
64 # (1) a class which handles opening of all sorts of URLs
65 # (plus assorted utilities etc.)
66 # (2) a set of functions for parsing URLs
67 # XXX Should these be separated out into different modules?
70 # Shortcut for basic usage
71 _urlopener = None
72 def urlopen(url, data=None, proxies=None):
73 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
78 global _urlopener
79 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
84 else:
85 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
90 def urlretrieve(url, filename=None, reporthook=None, data=None):
91 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
94 return _urlopener.retrieve(url, filename, reporthook, data)
95 def urlcleanup():
96 if _urlopener:
97 _urlopener.cleanup()
99 # check for SSL
100 try:
101 import ssl
102 except:
103 _have_ssl = False
104 else:
105 _have_ssl = True
107 # exception raised when downloaded size does not match content-length
108 class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
113 ftpcache = {}
114 class URLopener:
115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
122 __tempfiles = None
124 version = "Python-urllib/%s" % __version__
126 # Constructor
127 def __init__(self, proxies=None, **x509):
128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
134 self.addheaders = [('User-Agent', self.version)]
135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
150 def __del__(self):
151 self.close()
153 def close(self):
154 self.cleanup()
156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
164 except OSError:
165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
170 def addheader(self, *args):
171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
173 self.addheaders.append(args)
175 # External interface
176 def open(self, fullurl, data=None):
177 """Use URLopener().open(file) instead of open(file, 'r')."""
178 fullurl = unwrap(toBytes(fullurl))
179 # percent encode url, fixing lame server errors for e.g, like space
180 # within url paths.
181 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]")
182 if self.tempcache and fullurl in self.tempcache:
183 filename, headers = self.tempcache[fullurl]
184 fp = open(filename, 'rb')
185 return addinfourl(fp, headers, fullurl)
186 urltype, url = splittype(fullurl)
187 if not urltype:
188 urltype = 'file'
189 if urltype in self.proxies:
190 proxy = self.proxies[urltype]
191 urltype, proxyhost = splittype(proxy)
192 host, selector = splithost(proxyhost)
193 url = (host, fullurl) # Signal special case to open_*()
194 else:
195 proxy = None
196 name = 'open_' + urltype
197 self.type = urltype
198 name = name.replace('-', '_')
199 if not hasattr(self, name):
200 if proxy:
201 return self.open_unknown_proxy(proxy, fullurl, data)
202 else:
203 return self.open_unknown(fullurl, data)
204 try:
205 if data is None:
206 return getattr(self, name)(url)
207 else:
208 return getattr(self, name)(url, data)
209 except socket.error, msg:
210 raise IOError, ('socket error', msg), sys.exc_info()[2]
212 def open_unknown(self, fullurl, data=None):
213 """Overridable interface to open unknown URL type."""
214 type, url = splittype(fullurl)
215 raise IOError, ('url error', 'unknown url type', type)
217 def open_unknown_proxy(self, proxy, fullurl, data=None):
218 """Overridable interface to open unknown URL type."""
219 type, url = splittype(fullurl)
220 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
222 # External interface
223 def retrieve(self, url, filename=None, reporthook=None, data=None):
224 """retrieve(url) returns (filename, headers) for a local object
225 or (tempfilename, headers) for a remote object."""
226 url = unwrap(toBytes(url))
227 if self.tempcache and url in self.tempcache:
228 return self.tempcache[url]
229 type, url1 = splittype(url)
230 if filename is None and (not type or type == 'file'):
231 try:
232 fp = self.open_local_file(url1)
233 hdrs = fp.info()
234 fp.close()
235 return url2pathname(splithost(url1)[1]), hdrs
236 except IOError, msg:
237 pass
238 fp = self.open(url, data)
239 try:
240 headers = fp.info()
241 if filename:
242 tfp = open(filename, 'wb')
243 else:
244 import tempfile
245 garbage, path = splittype(url)
246 garbage, path = splithost(path or "")
247 path, garbage = splitquery(path or "")
248 path, garbage = splitattr(path or "")
249 suffix = os.path.splitext(path)[1]
250 (fd, filename) = tempfile.mkstemp(suffix)
251 self.__tempfiles.append(filename)
252 tfp = os.fdopen(fd, 'wb')
253 try:
254 result = filename, headers
255 if self.tempcache is not None:
256 self.tempcache[url] = result
257 bs = 1024*8
258 size = -1
259 read = 0
260 blocknum = 0
261 if reporthook:
262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
264 reporthook(blocknum, bs, size)
265 while 1:
266 block = fp.read(bs)
267 if block == "":
268 break
269 read += len(block)
270 tfp.write(block)
271 blocknum += 1
272 if reporthook:
273 reporthook(blocknum, bs, size)
274 finally:
275 tfp.close()
276 finally:
277 fp.close()
279 # raise exception if actual size does not match content-length header
280 if size >= 0 and read < size:
281 raise ContentTooShortError("retrieval incomplete: got only %i out "
282 "of %i bytes" % (read, size), result)
284 return result
286 # Each method named open_<type> knows how to open that type of URL
288 def open_http(self, url, data=None):
289 """Use HTTP protocol."""
290 import httplib
291 user_passwd = None
292 proxy_passwd= None
293 if isinstance(url, str):
294 host, selector = splithost(url)
295 if host:
296 user_passwd, host = splituser(host)
297 host = unquote(host)
298 realhost = host
299 else:
300 host, selector = url
301 # check whether the proxy contains authorization information
302 proxy_passwd, host = splituser(host)
303 # now we proceed with the url we want to obtain
304 urltype, rest = splittype(selector)
305 url = rest
306 user_passwd = None
307 if urltype.lower() != 'http':
308 realhost = None
309 else:
310 realhost, rest = splithost(rest)
311 if realhost:
312 user_passwd, realhost = splituser(realhost)
313 if user_passwd:
314 selector = "%s://%s%s" % (urltype, realhost, rest)
315 if proxy_bypass(realhost):
316 host = realhost
318 #print "proxy via http:", host, selector
319 if not host: raise IOError, ('http error', 'no host given')
321 if proxy_passwd:
322 import base64
323 proxy_auth = base64.b64encode(proxy_passwd).strip()
324 else:
325 proxy_auth = None
327 if user_passwd:
328 import base64
329 auth = base64.b64encode(user_passwd).strip()
330 else:
331 auth = None
332 h = httplib.HTTP(host)
333 if data is not None:
334 h.putrequest('POST', selector)
335 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
336 h.putheader('Content-Length', '%d' % len(data))
337 else:
338 h.putrequest('GET', selector)
339 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
340 if auth: h.putheader('Authorization', 'Basic %s' % auth)
341 if realhost: h.putheader('Host', realhost)
342 for args in self.addheaders: h.putheader(*args)
343 h.endheaders(data)
344 errcode, errmsg, headers = h.getreply()
345 fp = h.getfile()
346 if errcode == -1:
347 if fp: fp.close()
348 # something went wrong with the HTTP status line
349 raise IOError, ('http protocol error', 0,
350 'got a bad status line', None)
351 # According to RFC 2616, "2xx" code indicates that the client's
352 # request was successfully received, understood, and accepted.
353 if (200 <= errcode < 300):
354 return addinfourl(fp, headers, "http:" + url, errcode)
355 else:
356 if data is None:
357 return self.http_error(url, fp, errcode, errmsg, headers)
358 else:
359 return self.http_error(url, fp, errcode, errmsg, headers, data)
361 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
362 """Handle http errors.
363 Derived class can override this, or provide specific handlers
364 named http_error_DDD where DDD is the 3-digit error code."""
365 # First check if there's a specific handler for this error
366 name = 'http_error_%d' % errcode
367 if hasattr(self, name):
368 method = getattr(self, name)
369 if data is None:
370 result = method(url, fp, errcode, errmsg, headers)
371 else:
372 result = method(url, fp, errcode, errmsg, headers, data)
373 if result: return result
374 return self.http_error_default(url, fp, errcode, errmsg, headers)
376 def http_error_default(self, url, fp, errcode, errmsg, headers):
377 """Default error handler: close the connection and raise IOError."""
378 void = fp.read()
379 fp.close()
380 raise IOError, ('http error', errcode, errmsg, headers)
382 if _have_ssl:
383 def open_https(self, url, data=None):
384 """Use HTTPS protocol."""
386 import httplib
387 user_passwd = None
388 proxy_passwd = None
389 if isinstance(url, str):
390 host, selector = splithost(url)
391 if host:
392 user_passwd, host = splituser(host)
393 host = unquote(host)
394 realhost = host
395 else:
396 host, selector = url
397 # here, we determine, whether the proxy contains authorization information
398 proxy_passwd, host = splituser(host)
399 urltype, rest = splittype(selector)
400 url = rest
401 user_passwd = None
402 if urltype.lower() != 'https':
403 realhost = None
404 else:
405 realhost, rest = splithost(rest)
406 if realhost:
407 user_passwd, realhost = splituser(realhost)
408 if user_passwd:
409 selector = "%s://%s%s" % (urltype, realhost, rest)
410 #print "proxy via https:", host, selector
411 if not host: raise IOError, ('https error', 'no host given')
412 if proxy_passwd:
413 import base64
414 proxy_auth = base64.b64encode(proxy_passwd).strip()
415 else:
416 proxy_auth = None
417 if user_passwd:
418 import base64
419 auth = base64.b64encode(user_passwd).strip()
420 else:
421 auth = None
422 h = httplib.HTTPS(host, 0,
423 key_file=self.key_file,
424 cert_file=self.cert_file)
425 if data is not None:
426 h.putrequest('POST', selector)
427 h.putheader('Content-Type',
428 'application/x-www-form-urlencoded')
429 h.putheader('Content-Length', '%d' % len(data))
430 else:
431 h.putrequest('GET', selector)
432 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
433 if auth: h.putheader('Authorization', 'Basic %s' % auth)
434 if realhost: h.putheader('Host', realhost)
435 for args in self.addheaders: h.putheader(*args)
436 h.endheaders(data)
437 errcode, errmsg, headers = h.getreply()
438 fp = h.getfile()
439 if errcode == -1:
440 if fp: fp.close()
441 # something went wrong with the HTTP status line
442 raise IOError, ('http protocol error', 0,
443 'got a bad status line', None)
444 # According to RFC 2616, "2xx" code indicates that the client's
445 # request was successfully received, understood, and accepted.
446 if (200 <= errcode < 300):
447 return addinfourl(fp, headers, "https:" + url, errcode)
448 else:
449 if data is None:
450 return self.http_error(url, fp, errcode, errmsg, headers)
451 else:
452 return self.http_error(url, fp, errcode, errmsg, headers,
453 data)
455 def open_file(self, url):
456 """Use local file or FTP depending on form of URL."""
457 if not isinstance(url, str):
458 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
459 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
460 return self.open_ftp(url)
461 else:
462 return self.open_local_file(url)
464 def open_local_file(self, url):
465 """Use local file."""
466 import mimetypes, mimetools, email.utils
467 try:
468 from cStringIO import StringIO
469 except ImportError:
470 from StringIO import StringIO
471 host, file = splithost(url)
472 localname = url2pathname(file)
473 try:
474 stats = os.stat(localname)
475 except OSError, e:
476 raise IOError(e.errno, e.strerror, e.filename)
477 size = stats.st_size
478 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
479 mtype = mimetypes.guess_type(url)[0]
480 headers = mimetools.Message(StringIO(
481 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
482 (mtype or 'text/plain', size, modified)))
483 if not host:
484 urlfile = file
485 if file[:1] == '/':
486 urlfile = 'file://' + file
487 return addinfourl(open(localname, 'rb'),
488 headers, urlfile)
489 host, port = splitport(host)
490 if not port \
491 and socket.gethostbyname(host) in (localhost(), thishost()):
492 urlfile = file
493 if file[:1] == '/':
494 urlfile = 'file://' + file
495 return addinfourl(open(localname, 'rb'),
496 headers, urlfile)
497 raise IOError, ('local file error', 'not on local host')
499 def open_ftp(self, url):
500 """Use FTP protocol."""
501 if not isinstance(url, str):
502 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
503 import mimetypes, mimetools
504 try:
505 from cStringIO import StringIO
506 except ImportError:
507 from StringIO import StringIO
508 host, path = splithost(url)
509 if not host: raise IOError, ('ftp error', 'no host given')
510 host, port = splitport(host)
511 user, host = splituser(host)
512 if user: user, passwd = splitpasswd(user)
513 else: passwd = None
514 host = unquote(host)
515 user = unquote(user or '')
516 passwd = unquote(passwd or '')
517 host = socket.gethostbyname(host)
518 if not port:
519 import ftplib
520 port = ftplib.FTP_PORT
521 else:
522 port = int(port)
523 path, attrs = splitattr(path)
524 path = unquote(path)
525 dirs = path.split('/')
526 dirs, file = dirs[:-1], dirs[-1]
527 if dirs and not dirs[0]: dirs = dirs[1:]
528 if dirs and not dirs[0]: dirs[0] = '/'
529 key = user, host, port, '/'.join(dirs)
530 # XXX thread unsafe!
531 if len(self.ftpcache) > MAXFTPCACHE:
532 # Prune the cache, rather arbitrarily
533 for k in self.ftpcache.keys():
534 if k != key:
535 v = self.ftpcache[k]
536 del self.ftpcache[k]
537 v.close()
538 try:
539 if not key in self.ftpcache:
540 self.ftpcache[key] = \
541 ftpwrapper(user, passwd, host, port, dirs)
542 if not file: type = 'D'
543 else: type = 'I'
544 for attr in attrs:
545 attr, value = splitvalue(attr)
546 if attr.lower() == 'type' and \
547 value in ('a', 'A', 'i', 'I', 'd', 'D'):
548 type = value.upper()
549 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
550 mtype = mimetypes.guess_type("ftp:" + url)[0]
551 headers = ""
552 if mtype:
553 headers += "Content-Type: %s\n" % mtype
554 if retrlen is not None and retrlen >= 0:
555 headers += "Content-Length: %d\n" % retrlen
556 headers = mimetools.Message(StringIO(headers))
557 return addinfourl(fp, headers, "ftp:" + url)
558 except ftperrors(), msg:
559 raise IOError, ('ftp error', msg), sys.exc_info()[2]
561 def open_data(self, url, data=None):
562 """Use "data" URL."""
563 if not isinstance(url, str):
564 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
565 # ignore POSTed data
567 # syntax of data URLs:
568 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
569 # mediatype := [ type "/" subtype ] *( ";" parameter )
570 # data := *urlchar
571 # parameter := attribute "=" value
572 import mimetools
573 try:
574 from cStringIO import StringIO
575 except ImportError:
576 from StringIO import StringIO
577 try:
578 [type, data] = url.split(',', 1)
579 except ValueError:
580 raise IOError, ('data error', 'bad data URL')
581 if not type:
582 type = 'text/plain;charset=US-ASCII'
583 semi = type.rfind(';')
584 if semi >= 0 and '=' not in type[semi:]:
585 encoding = type[semi+1:]
586 type = type[:semi]
587 else:
588 encoding = ''
589 msg = []
590 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
591 time.gmtime(time.time())))
592 msg.append('Content-type: %s' % type)
593 if encoding == 'base64':
594 import base64
595 data = base64.decodestring(data)
596 else:
597 data = unquote(data)
598 msg.append('Content-Length: %d' % len(data))
599 msg.append('')
600 msg.append(data)
601 msg = '\n'.join(msg)
602 f = StringIO(msg)
603 headers = mimetools.Message(f, 0)
604 #f.fileno = None # needed for addinfourl
605 return addinfourl(f, headers, url)
608 class FancyURLopener(URLopener):
609 """Derived class with handlers for errors we can handle (perhaps)."""
611 def __init__(self, *args, **kwargs):
612 URLopener.__init__(self, *args, **kwargs)
613 self.auth_cache = {}
614 self.tries = 0
615 self.maxtries = 10
617 def http_error_default(self, url, fp, errcode, errmsg, headers):
618 """Default error handling -- don't raise an exception."""
619 return addinfourl(fp, headers, "http:" + url, errcode)
621 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
622 """Error 302 -- relocated (temporarily)."""
623 self.tries += 1
624 if self.maxtries and self.tries >= self.maxtries:
625 if hasattr(self, "http_error_500"):
626 meth = self.http_error_500
627 else:
628 meth = self.http_error_default
629 self.tries = 0
630 return meth(url, fp, 500,
631 "Internal Server Error: Redirect Recursion", headers)
632 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
633 data)
634 self.tries = 0
635 return result
637 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
638 if 'location' in headers:
639 newurl = headers['location']
640 elif 'uri' in headers:
641 newurl = headers['uri']
642 else:
643 return
644 void = fp.read()
645 fp.close()
646 # In case the server sent a relative URL, join with original:
647 newurl = basejoin(self.type + ":" + url, newurl)
648 return self.open(newurl)
650 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
651 """Error 301 -- also relocated (permanently)."""
652 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
654 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
655 """Error 303 -- also relocated (essentially identical to 302)."""
656 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
658 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
659 """Error 307 -- relocated, but turn POST into error."""
660 if data is None:
661 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
662 else:
663 return self.http_error_default(url, fp, errcode, errmsg, headers)
665 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
666 """Error 401 -- authentication required.
667 This function supports Basic authentication only."""
668 if not 'www-authenticate' in headers:
669 URLopener.http_error_default(self, url, fp,
670 errcode, errmsg, headers)
671 stuff = headers['www-authenticate']
672 import re
673 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
674 if not match:
675 URLopener.http_error_default(self, url, fp,
676 errcode, errmsg, headers)
677 scheme, realm = match.groups()
678 if scheme.lower() != 'basic':
679 URLopener.http_error_default(self, url, fp,
680 errcode, errmsg, headers)
681 name = 'retry_' + self.type + '_basic_auth'
682 if data is None:
683 return getattr(self,name)(url, realm)
684 else:
685 return getattr(self,name)(url, realm, data)
687 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
688 """Error 407 -- proxy authentication required.
689 This function supports Basic authentication only."""
690 if not 'proxy-authenticate' in headers:
691 URLopener.http_error_default(self, url, fp,
692 errcode, errmsg, headers)
693 stuff = headers['proxy-authenticate']
694 import re
695 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
696 if not match:
697 URLopener.http_error_default(self, url, fp,
698 errcode, errmsg, headers)
699 scheme, realm = match.groups()
700 if scheme.lower() != 'basic':
701 URLopener.http_error_default(self, url, fp,
702 errcode, errmsg, headers)
703 name = 'retry_proxy_' + self.type + '_basic_auth'
704 if data is None:
705 return getattr(self,name)(url, realm)
706 else:
707 return getattr(self,name)(url, realm, data)
709 def retry_proxy_http_basic_auth(self, url, realm, data=None):
710 host, selector = splithost(url)
711 newurl = 'http://' + host + selector
712 proxy = self.proxies['http']
713 urltype, proxyhost = splittype(proxy)
714 proxyhost, proxyselector = splithost(proxyhost)
715 i = proxyhost.find('@') + 1
716 proxyhost = proxyhost[i:]
717 user, passwd = self.get_user_passwd(proxyhost, realm, i)
718 if not (user or passwd): return None
719 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
720 self.proxies['http'] = 'http://' + proxyhost + proxyselector
721 if data is None:
722 return self.open(newurl)
723 else:
724 return self.open(newurl, data)
726 def retry_proxy_https_basic_auth(self, url, realm, data=None):
727 host, selector = splithost(url)
728 newurl = 'https://' + host + selector
729 proxy = self.proxies['https']
730 urltype, proxyhost = splittype(proxy)
731 proxyhost, proxyselector = splithost(proxyhost)
732 i = proxyhost.find('@') + 1
733 proxyhost = proxyhost[i:]
734 user, passwd = self.get_user_passwd(proxyhost, realm, i)
735 if not (user or passwd): return None
736 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
737 self.proxies['https'] = 'https://' + proxyhost + proxyselector
738 if data is None:
739 return self.open(newurl)
740 else:
741 return self.open(newurl, data)
743 def retry_http_basic_auth(self, url, realm, data=None):
744 host, selector = splithost(url)
745 i = host.find('@') + 1
746 host = host[i:]
747 user, passwd = self.get_user_passwd(host, realm, i)
748 if not (user or passwd): return None
749 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
750 newurl = 'http://' + host + selector
751 if data is None:
752 return self.open(newurl)
753 else:
754 return self.open(newurl, data)
756 def retry_https_basic_auth(self, url, realm, data=None):
757 host, selector = splithost(url)
758 i = host.find('@') + 1
759 host = host[i:]
760 user, passwd = self.get_user_passwd(host, realm, i)
761 if not (user or passwd): return None
762 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
763 newurl = 'https://' + host + selector
764 if data is None:
765 return self.open(newurl)
766 else:
767 return self.open(newurl, data)
769 def get_user_passwd(self, host, realm, clear_cache = 0):
770 key = realm + '@' + host.lower()
771 if key in self.auth_cache:
772 if clear_cache:
773 del self.auth_cache[key]
774 else:
775 return self.auth_cache[key]
776 user, passwd = self.prompt_user_passwd(host, realm)
777 if user or passwd: self.auth_cache[key] = (user, passwd)
778 return user, passwd
780 def prompt_user_passwd(self, host, realm):
781 """Override this in a GUI environment!"""
782 import getpass
783 try:
784 user = raw_input("Enter username for %s at %s: " % (realm,
785 host))
786 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
787 (user, realm, host))
788 return user, passwd
789 except KeyboardInterrupt:
790 print
791 return None, None
794 # Utility functions
796 _localhost = None
797 def localhost():
798 """Return the IP address of the magic hostname 'localhost'."""
799 global _localhost
800 if _localhost is None:
801 _localhost = socket.gethostbyname('localhost')
802 return _localhost
804 _thishost = None
805 def thishost():
806 """Return the IP address of the current host."""
807 global _thishost
808 if _thishost is None:
809 _thishost = socket.gethostbyname(socket.gethostname())
810 return _thishost
812 _ftperrors = None
813 def ftperrors():
814 """Return the set of errors raised by the FTP class."""
815 global _ftperrors
816 if _ftperrors is None:
817 import ftplib
818 _ftperrors = ftplib.all_errors
819 return _ftperrors
821 _noheaders = None
822 def noheaders():
823 """Return an empty mimetools.Message object."""
824 global _noheaders
825 if _noheaders is None:
826 import mimetools
827 try:
828 from cStringIO import StringIO
829 except ImportError:
830 from StringIO import StringIO
831 _noheaders = mimetools.Message(StringIO(), 0)
832 _noheaders.fp.close() # Recycle file descriptor
833 return _noheaders
836 # Utility classes
838 class ftpwrapper:
839 """Class used by open_ftp() for cache of open FTP connections."""
841 def __init__(self, user, passwd, host, port, dirs,
842 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
843 self.user = user
844 self.passwd = passwd
845 self.host = host
846 self.port = port
847 self.dirs = dirs
848 self.timeout = timeout
849 self.init()
851 def init(self):
852 import ftplib
853 self.busy = 0
854 self.ftp = ftplib.FTP()
855 self.ftp.connect(self.host, self.port, self.timeout)
856 self.ftp.login(self.user, self.passwd)
857 for dir in self.dirs:
858 self.ftp.cwd(dir)
860 def retrfile(self, file, type):
861 import ftplib
862 self.endtransfer()
863 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
864 else: cmd = 'TYPE ' + type; isdir = 0
865 try:
866 self.ftp.voidcmd(cmd)
867 except ftplib.all_errors:
868 self.init()
869 self.ftp.voidcmd(cmd)
870 conn = None
871 if file and not isdir:
872 # Try to retrieve as a file
873 try:
874 cmd = 'RETR ' + file
875 conn = self.ftp.ntransfercmd(cmd)
876 except ftplib.error_perm, reason:
877 if str(reason)[:3] != '550':
878 raise IOError, ('ftp error', reason), sys.exc_info()[2]
879 if not conn:
880 # Set transfer mode to ASCII!
881 self.ftp.voidcmd('TYPE A')
882 # Try a directory listing. Verify that directory exists.
883 if file:
884 pwd = self.ftp.pwd()
885 try:
886 try:
887 self.ftp.cwd(file)
888 except ftplib.error_perm, reason:
889 raise IOError, ('ftp error', reason), sys.exc_info()[2]
890 finally:
891 self.ftp.cwd(pwd)
892 cmd = 'LIST ' + file
893 else:
894 cmd = 'LIST'
895 conn = self.ftp.ntransfercmd(cmd)
896 self.busy = 1
897 # Pass back both a suitably decorated object and a retrieval length
898 return (addclosehook(conn[0].makefile('rb'),
899 self.endtransfer), conn[1])
900 def endtransfer(self):
901 if not self.busy:
902 return
903 self.busy = 0
904 try:
905 self.ftp.voidresp()
906 except ftperrors():
907 pass
909 def close(self):
910 self.endtransfer()
911 try:
912 self.ftp.close()
913 except ftperrors():
914 pass
916 class addbase:
917 """Base class for addinfo and addclosehook."""
919 def __init__(self, fp):
920 self.fp = fp
921 self.read = self.fp.read
922 self.readline = self.fp.readline
923 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
924 if hasattr(self.fp, "fileno"):
925 self.fileno = self.fp.fileno
926 else:
927 self.fileno = lambda: None
928 if hasattr(self.fp, "__iter__"):
929 self.__iter__ = self.fp.__iter__
930 if hasattr(self.fp, "next"):
931 self.next = self.fp.next
933 def __repr__(self):
934 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
935 id(self), self.fp)
937 def close(self):
938 self.read = None
939 self.readline = None
940 self.readlines = None
941 self.fileno = None
942 if self.fp: self.fp.close()
943 self.fp = None
945 class addclosehook(addbase):
946 """Class to add a close hook to an open file."""
948 def __init__(self, fp, closehook, *hookargs):
949 addbase.__init__(self, fp)
950 self.closehook = closehook
951 self.hookargs = hookargs
953 def close(self):
954 addbase.close(self)
955 if self.closehook:
956 self.closehook(*self.hookargs)
957 self.closehook = None
958 self.hookargs = None
960 class addinfo(addbase):
961 """class to add an info() method to an open file."""
963 def __init__(self, fp, headers):
964 addbase.__init__(self, fp)
965 self.headers = headers
967 def info(self):
968 return self.headers
970 class addinfourl(addbase):
971 """class to add info() and geturl() methods to an open file."""
973 def __init__(self, fp, headers, url, code=None):
974 addbase.__init__(self, fp)
975 self.headers = headers
976 self.url = url
977 self.code = code
979 def info(self):
980 return self.headers
982 def getcode(self):
983 return self.code
985 def geturl(self):
986 return self.url
989 # Utilities to parse URLs (most of these return None for missing parts):
990 # unwrap('<URL:type://host/path>') --> 'type://host/path'
991 # splittype('type:opaquestring') --> 'type', 'opaquestring'
992 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
993 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
994 # splitpasswd('user:passwd') -> 'user', 'passwd'
995 # splitport('host:port') --> 'host', 'port'
996 # splitquery('/path?query') --> '/path', 'query'
997 # splittag('/path#tag') --> '/path', 'tag'
998 # splitattr('/path;attr1=value1;attr2=value2;...') ->
999 # '/path', ['attr1=value1', 'attr2=value2', ...]
1000 # splitvalue('attr=value') --> 'attr', 'value'
1001 # unquote('abc%20def') -> 'abc def'
1002 # quote('abc def') -> 'abc%20def')
1004 try:
1005 unicode
1006 except NameError:
1007 def _is_unicode(x):
1008 return 0
1009 else:
1010 def _is_unicode(x):
1011 return isinstance(x, unicode)
1013 def toBytes(url):
1014 """toBytes(u"URL") --> 'URL'."""
1015 # Most URL schemes require ASCII. If that changes, the conversion
1016 # can be relaxed
1017 if _is_unicode(url):
1018 try:
1019 url = url.encode("ASCII")
1020 except UnicodeError:
1021 raise UnicodeError("URL " + repr(url) +
1022 " contains non-ASCII characters")
1023 return url
1025 def unwrap(url):
1026 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1027 url = url.strip()
1028 if url[:1] == '<' and url[-1:] == '>':
1029 url = url[1:-1].strip()
1030 if url[:4] == 'URL:': url = url[4:].strip()
1031 return url
1033 _typeprog = None
1034 def splittype(url):
1035 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1036 global _typeprog
1037 if _typeprog is None:
1038 import re
1039 _typeprog = re.compile('^([^/:]+):')
1041 match = _typeprog.match(url)
1042 if match:
1043 scheme = match.group(1)
1044 return scheme.lower(), url[len(scheme) + 1:]
1045 return None, url
1047 _hostprog = None
1048 def splithost(url):
1049 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1050 global _hostprog
1051 if _hostprog is None:
1052 import re
1053 _hostprog = re.compile('^//([^/?]*)(.*)$')
1055 match = _hostprog.match(url)
1056 if match: return match.group(1, 2)
1057 return None, url
1059 _userprog = None
1060 def splituser(host):
1061 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1062 global _userprog
1063 if _userprog is None:
1064 import re
1065 _userprog = re.compile('^(.*)@(.*)$')
1067 match = _userprog.match(host)
1068 if match: return map(unquote, match.group(1, 2))
1069 return None, host
1071 _passwdprog = None
1072 def splitpasswd(user):
1073 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1074 global _passwdprog
1075 if _passwdprog is None:
1076 import re
1077 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1079 match = _passwdprog.match(user)
1080 if match: return match.group(1, 2)
1081 return user, None
1083 # splittag('/path#tag') --> '/path', 'tag'
1084 _portprog = None
1085 def splitport(host):
1086 """splitport('host:port') --> 'host', 'port'."""
1087 global _portprog
1088 if _portprog is None:
1089 import re
1090 _portprog = re.compile('^(.*):([0-9]+)$')
1092 match = _portprog.match(host)
1093 if match: return match.group(1, 2)
1094 return host, None
1096 _nportprog = None
1097 def splitnport(host, defport=-1):
1098 """Split host and port, returning numeric port.
1099 Return given default port if no ':' found; defaults to -1.
1100 Return numerical port if a valid number are found after ':'.
1101 Return None if ':' but not a valid number."""
1102 global _nportprog
1103 if _nportprog is None:
1104 import re
1105 _nportprog = re.compile('^(.*):(.*)$')
1107 match = _nportprog.match(host)
1108 if match:
1109 host, port = match.group(1, 2)
1110 try:
1111 if not port: raise ValueError, "no digits"
1112 nport = int(port)
1113 except ValueError:
1114 nport = None
1115 return host, nport
1116 return host, defport
1118 _queryprog = None
1119 def splitquery(url):
1120 """splitquery('/path?query') --> '/path', 'query'."""
1121 global _queryprog
1122 if _queryprog is None:
1123 import re
1124 _queryprog = re.compile('^(.*)\?([^?]*)$')
1126 match = _queryprog.match(url)
1127 if match: return match.group(1, 2)
1128 return url, None
1130 _tagprog = None
1131 def splittag(url):
1132 """splittag('/path#tag') --> '/path', 'tag'."""
1133 global _tagprog
1134 if _tagprog is None:
1135 import re
1136 _tagprog = re.compile('^(.*)#([^#]*)$')
1138 match = _tagprog.match(url)
1139 if match: return match.group(1, 2)
1140 return url, None
1142 def splitattr(url):
1143 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1144 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1145 words = url.split(';')
1146 return words[0], words[1:]
1148 _valueprog = None
1149 def splitvalue(attr):
1150 """splitvalue('attr=value') --> 'attr', 'value'."""
1151 global _valueprog
1152 if _valueprog is None:
1153 import re
1154 _valueprog = re.compile('^([^=]*)=(.*)$')
1156 match = _valueprog.match(attr)
1157 if match: return match.group(1, 2)
1158 return attr, None
1160 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1161 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1163 def unquote(s):
1164 """unquote('abc%20def') -> 'abc def'."""
1165 res = s.split('%')
1166 for i in xrange(1, len(res)):
1167 item = res[i]
1168 try:
1169 res[i] = _hextochr[item[:2]] + item[2:]
1170 except KeyError:
1171 res[i] = '%' + item
1172 except UnicodeDecodeError:
1173 res[i] = unichr(int(item[:2], 16)) + item[2:]
1174 return "".join(res)
1176 def unquote_plus(s):
1177 """unquote('%7e/abc+def') -> '~/abc def'"""
1178 s = s.replace('+', ' ')
1179 return unquote(s)
1181 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1182 'abcdefghijklmnopqrstuvwxyz'
1183 '0123456789' '_.-')
1184 _safemaps = {}
1186 def quote(s, safe = '/'):
1187 """quote('abc def') -> 'abc%20def'
1189 Each part of a URL, e.g. the path info, the query, etc., has a
1190 different set of reserved characters that must be quoted.
1192 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1193 the following reserved characters.
1195 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1196 "$" | ","
1198 Each of these characters is reserved in some component of a URL,
1199 but not necessarily in all of them.
1201 By default, the quote function is intended for quoting the path
1202 section of a URL. Thus, it will not encode '/'. This character
1203 is reserved, but in typical usage the quote function is being
1204 called on a path where the existing slash characters are used as
1205 reserved characters.
1207 cachekey = (safe, always_safe)
1208 try:
1209 safe_map = _safemaps[cachekey]
1210 except KeyError:
1211 safe += always_safe
1212 safe_map = {}
1213 for i in range(256):
1214 c = chr(i)
1215 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1216 _safemaps[cachekey] = safe_map
1217 res = map(safe_map.__getitem__, s)
1218 return ''.join(res)
1220 def quote_plus(s, safe = ''):
1221 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1222 if ' ' in s:
1223 s = quote(s, safe + ' ')
1224 return s.replace(' ', '+')
1225 return quote(s, safe)
1227 def urlencode(query,doseq=0):
1228 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1230 If any values in the query arg are sequences and doseq is true, each
1231 sequence element is converted to a separate parameter.
1233 If the query arg is a sequence of two-element tuples, the order of the
1234 parameters in the output will match the order of parameters in the
1235 input.
1238 if hasattr(query,"items"):
1239 # mapping objects
1240 query = query.items()
1241 else:
1242 # it's a bother at times that strings and string-like objects are
1243 # sequences...
1244 try:
1245 # non-sequence items should not work with len()
1246 # non-empty strings will fail this
1247 if len(query) and not isinstance(query[0], tuple):
1248 raise TypeError
1249 # zero-length sequences of all types will get here and succeed,
1250 # but that's a minor nit - since the original implementation
1251 # allowed empty dicts that type of behavior probably should be
1252 # preserved for consistency
1253 except TypeError:
1254 ty,va,tb = sys.exc_info()
1255 raise TypeError, "not a valid non-string sequence or mapping object", tb
1257 l = []
1258 if not doseq:
1259 # preserve old behavior
1260 for k, v in query:
1261 k = quote_plus(str(k))
1262 v = quote_plus(str(v))
1263 l.append(k + '=' + v)
1264 else:
1265 for k, v in query:
1266 k = quote_plus(str(k))
1267 if isinstance(v, str):
1268 v = quote_plus(v)
1269 l.append(k + '=' + v)
1270 elif _is_unicode(v):
1271 # is there a reasonable way to convert to ASCII?
1272 # encode generates a string, but "replace" or "ignore"
1273 # lose information and "strict" can raise UnicodeError
1274 v = quote_plus(v.encode("ASCII","replace"))
1275 l.append(k + '=' + v)
1276 else:
1277 try:
1278 # is this a sufficient test for sequence-ness?
1279 x = len(v)
1280 except TypeError:
1281 # not a sequence
1282 v = quote_plus(str(v))
1283 l.append(k + '=' + v)
1284 else:
1285 # loop over the sequence
1286 for elt in v:
1287 l.append(k + '=' + quote_plus(str(elt)))
1288 return '&'.join(l)
1290 # Proxy handling
1291 def getproxies_environment():
1292 """Return a dictionary of scheme -> proxy server URL mappings.
1294 Scan the environment for variables named <scheme>_proxy;
1295 this seems to be the standard convention. If you need a
1296 different way, you can pass a proxies dictionary to the
1297 [Fancy]URLopener constructor.
1300 proxies = {}
1301 for name, value in os.environ.items():
1302 name = name.lower()
1303 if value and name[-6:] == '_proxy':
1304 proxies[name[:-6]] = value
1305 return proxies
1307 def proxy_bypass_environment(host):
1308 """Test if proxies should not be used for a particular host.
1310 Checks the environment for a variable named no_proxy, which should
1311 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1313 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1314 # '*' is special case for always bypass
1315 if no_proxy == '*':
1316 return 1
1317 # strip port off host
1318 hostonly, port = splitport(host)
1319 # check if the host ends with any of the DNS suffixes
1320 for name in no_proxy.split(','):
1321 if name and (hostonly.endswith(name) or host.endswith(name)):
1322 return 1
1323 # otherwise, don't bypass
1324 return 0
1327 if sys.platform == 'darwin':
1328 from _scproxy import _get_proxy_settings, _get_proxies
1330 def proxy_bypass_macosx_sysconf(host):
1332 Return True iff this host shouldn't be accessed using a proxy
1334 This function uses the MacOSX framework SystemConfiguration
1335 to fetch the proxy information.
1337 import re
1338 import socket
1339 from fnmatch import fnmatch
1341 hostonly, port = splitport(host)
1343 def ip2num(ipAddr):
1344 parts = ipAddr.split('.')
1345 parts = map(int, parts)
1346 if len(parts) != 4:
1347 parts = (parts + [0, 0, 0, 0])[:4]
1348 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1350 proxy_settings = _get_proxy_settings()
1352 # Check for simple host names:
1353 if '.' not in host:
1354 if proxy_settings['exclude_simple']:
1355 return True
1357 hostIP = None
1359 for value in proxy_settings.get('exceptions', ()):
1360 # Items in the list are strings like these: *.local, 169.254/16
1361 if not value: continue
1363 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1364 if m is not None:
1365 if hostIP is None:
1366 try:
1367 hostIP = socket.gethostbyname(hostonly)
1368 hostIP = ip2num(hostIP)
1369 except socket.error:
1370 continue
1372 base = ip2num(m.group(1))
1373 mask = int(m.group(2)[1:])
1374 mask = 32 - mask
1376 if (hostIP >> mask) == (base >> mask):
1377 return True
1379 elif fnmatch(host, value):
1380 return True
1382 return False
1385 def getproxies_macosx_sysconf():
1386 """Return a dictionary of scheme -> proxy server URL mappings.
1388 This function uses the MacOSX framework SystemConfiguration
1389 to fetch the proxy information.
1391 return _get_proxies()
1395 def proxy_bypass(host):
1396 if getproxies_environment():
1397 return proxy_bypass_environment(host)
1398 else:
1399 return proxy_bypass_macosx_sysconf(host)
1401 def getproxies():
1402 return getproxies_environment() or getproxies_macosx_sysconf()
1404 elif os.name == 'nt':
1405 def getproxies_registry():
1406 """Return a dictionary of scheme -> proxy server URL mappings.
1408 Win32 uses the registry to store proxies.
1411 proxies = {}
1412 try:
1413 import _winreg
1414 except ImportError:
1415 # Std module, so should be around - but you never know!
1416 return proxies
1417 try:
1418 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1419 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1420 proxyEnable = _winreg.QueryValueEx(internetSettings,
1421 'ProxyEnable')[0]
1422 if proxyEnable:
1423 # Returned as Unicode but problems if not converted to ASCII
1424 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1425 'ProxyServer')[0])
1426 if '=' in proxyServer:
1427 # Per-protocol settings
1428 for p in proxyServer.split(';'):
1429 protocol, address = p.split('=', 1)
1430 # See if address has a type:// prefix
1431 import re
1432 if not re.match('^([^/:]+)://', address):
1433 address = '%s://%s' % (protocol, address)
1434 proxies[protocol] = address
1435 else:
1436 # Use one setting for all protocols
1437 if proxyServer[:5] == 'http:':
1438 proxies['http'] = proxyServer
1439 else:
1440 proxies['http'] = 'http://%s' % proxyServer
1441 proxies['ftp'] = 'ftp://%s' % proxyServer
1442 internetSettings.Close()
1443 except (WindowsError, ValueError, TypeError):
1444 # Either registry key not found etc, or the value in an
1445 # unexpected format.
1446 # proxies already set up to be empty so nothing to do
1447 pass
1448 return proxies
1450 def getproxies():
1451 """Return a dictionary of scheme -> proxy server URL mappings.
1453 Returns settings gathered from the environment, if specified,
1454 or the registry.
1457 return getproxies_environment() or getproxies_registry()
1459 def proxy_bypass_registry(host):
1460 try:
1461 import _winreg
1462 import re
1463 except ImportError:
1464 # Std modules, so should be around - but you never know!
1465 return 0
1466 try:
1467 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1468 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1469 proxyEnable = _winreg.QueryValueEx(internetSettings,
1470 'ProxyEnable')[0]
1471 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1472 'ProxyOverride')[0])
1473 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1474 except WindowsError:
1475 return 0
1476 if not proxyEnable or not proxyOverride:
1477 return 0
1478 # try to make a host list from name and IP address.
1479 rawHost, port = splitport(host)
1480 host = [rawHost]
1481 try:
1482 addr = socket.gethostbyname(rawHost)
1483 if addr != rawHost:
1484 host.append(addr)
1485 except socket.error:
1486 pass
1487 try:
1488 fqdn = socket.getfqdn(rawHost)
1489 if fqdn != rawHost:
1490 host.append(fqdn)
1491 except socket.error:
1492 pass
1493 # make a check value list from the registry entry: replace the
1494 # '<local>' string by the localhost entry and the corresponding
1495 # canonical entry.
1496 proxyOverride = proxyOverride.split(';')
1497 # now check if we match one of the registry values.
1498 for test in proxyOverride:
1499 if test == '<local>':
1500 if '.' not in rawHost:
1501 return 1
1502 test = test.replace(".", r"\.") # mask dots
1503 test = test.replace("*", r".*") # change glob sequence
1504 test = test.replace("?", r".") # change glob char
1505 for val in host:
1506 # print "%s <--> %s" %( test, val )
1507 if re.match(test, val, re.I):
1508 return 1
1509 return 0
1511 def proxy_bypass(host):
1512 """Return a dictionary of scheme -> proxy server URL mappings.
1514 Returns settings gathered from the environment, if specified,
1515 or the registry.
1518 if getproxies_environment():
1519 return proxy_bypass_environment(host)
1520 else:
1521 return proxy_bypass_registry(host)
1523 else:
1524 # By default use environment variables
1525 getproxies = getproxies_environment
1526 proxy_bypass = proxy_bypass_environment
1528 # Test and time quote() and unquote()
1529 def test1():
1530 s = ''
1531 for i in range(256): s = s + chr(i)
1532 s = s*4
1533 t0 = time.time()
1534 qs = quote(s)
1535 uqs = unquote(qs)
1536 t1 = time.time()
1537 if uqs != s:
1538 print 'Wrong!'
1539 print repr(s)
1540 print repr(qs)
1541 print repr(uqs)
1542 print round(t1 - t0, 3), 'sec'
1545 def reporthook(blocknum, blocksize, totalsize):
1546 # Report during remote transfers
1547 print "Block number: %d, Block size: %d, Total size: %d" % (
1548 blocknum, blocksize, totalsize)
1550 # Test program
1551 def test(args=[]):
1552 if not args:
1553 args = [
1554 '/etc/passwd',
1555 'file:/etc/passwd',
1556 'file://localhost/etc/passwd',
1557 'ftp://ftp.gnu.org/pub/README',
1558 'http://www.python.org/index.html',
1560 if hasattr(URLopener, "open_https"):
1561 args.append('https://synergy.as.cmu.edu/~geek/')
1562 try:
1563 for url in args:
1564 print '-'*10, url, '-'*10
1565 fn, h = urlretrieve(url, None, reporthook)
1566 print fn
1567 if h:
1568 print '======'
1569 for k in h.keys(): print k + ':', h[k]
1570 print '======'
1571 with open(fn, 'rb') as fp:
1572 data = fp.read()
1573 if '\r' in data:
1574 table = string.maketrans("", "")
1575 data = data.translate(table, "\r")
1576 print data
1577 fn, h = None, None
1578 print '-'*40
1579 finally:
1580 urlcleanup()
1582 def main():
1583 import getopt, sys
1584 try:
1585 opts, args = getopt.getopt(sys.argv[1:], "th")
1586 except getopt.error, msg:
1587 print msg
1588 print "Use -h for help"
1589 return
1590 t = 0
1591 for o, a in opts:
1592 if o == '-t':
1593 t = t + 1
1594 if o == '-h':
1595 print "Usage: python urllib.py [-t] [url ...]"
1596 print "-t runs self-test;",
1597 print "otherwise, contents of urls are printed"
1598 return
1599 if t:
1600 if t > 1:
1601 test1()
1602 test(args)
1603 else:
1604 if not args:
1605 print "Use -h for help"
1606 for url in args:
1607 print urlopen(url).read(),
1609 # Run test program when run as a script
1610 if __name__ == '__main__':
1611 main()