Remove Barry's love of deprecated syntax to silence warnings in the email
[python.git] / Lib / urllib.py
blob55a29f4fc8cd689b354f7087e219328836f51d67
1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
23 """
25 import string
26 import socket
27 import os
28 import time
29 import sys
30 from urlparse import urljoin as basejoin
31 import warnings
33 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
35 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
39 "getproxies"]
41 __version__ = '1.17' # XXX This version is not always updated :-(
43 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
45 # Helper for non-unix systems
46 if os.name == 'mac':
47 from macurl2path import url2pathname, pathname2url
48 elif os.name == 'nt':
49 from nturl2path import url2pathname, pathname2url
50 elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
52 else:
53 def url2pathname(pathname):
54 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
56 return unquote(pathname)
58 def pathname2url(pathname):
59 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
61 return quote(pathname)
63 # This really consists of two pieces:
64 # (1) a class which handles opening of all sorts of URLs
65 # (plus assorted utilities etc.)
66 # (2) a set of functions for parsing URLs
67 # XXX Should these be separated out into different modules?
70 # Shortcut for basic usage
71 _urlopener = None
72 def urlopen(url, data=None, proxies=None):
73 """Create a file-like object for the specified URL to read from."""
74 from warnings import warnpy3k
75 warnings.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel=2)
78 global _urlopener
79 if proxies is not None:
80 opener = FancyURLopener(proxies=proxies)
81 elif not _urlopener:
82 opener = FancyURLopener()
83 _urlopener = opener
84 else:
85 opener = _urlopener
86 if data is None:
87 return opener.open(url)
88 else:
89 return opener.open(url, data)
90 def urlretrieve(url, filename=None, reporthook=None, data=None):
91 global _urlopener
92 if not _urlopener:
93 _urlopener = FancyURLopener()
94 return _urlopener.retrieve(url, filename, reporthook, data)
95 def urlcleanup():
96 if _urlopener:
97 _urlopener.cleanup()
99 # check for SSL
100 try:
101 import ssl
102 except:
103 _have_ssl = False
104 else:
105 _have_ssl = True
107 # exception raised when downloaded size does not match content-length
108 class ContentTooShortError(IOError):
109 def __init__(self, message, content):
110 IOError.__init__(self, message)
111 self.content = content
113 ftpcache = {}
114 class URLopener:
115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
122 __tempfiles = None
124 version = "Python-urllib/%s" % __version__
126 # Constructor
127 def __init__(self, proxies=None, **x509):
128 if proxies is None:
129 proxies = getproxies()
130 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
131 self.proxies = proxies
132 self.key_file = x509.get('key_file')
133 self.cert_file = x509.get('cert_file')
134 self.addheaders = [('User-Agent', self.version)]
135 self.__tempfiles = []
136 self.__unlink = os.unlink # See cleanup()
137 self.tempcache = None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
143 # yet).
144 self.ftpcache = ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
150 def __del__(self):
151 self.close()
153 def close(self):
154 self.cleanup()
156 def cleanup(self):
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
160 if self.__tempfiles:
161 for file in self.__tempfiles:
162 try:
163 self.__unlink(file)
164 except OSError:
165 pass
166 del self.__tempfiles[:]
167 if self.tempcache:
168 self.tempcache.clear()
170 def addheader(self, *args):
171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
173 self.addheaders.append(args)
175 # External interface
176 def open(self, fullurl, data=None):
177 """Use URLopener().open(file) instead of open(file, 'r')."""
178 fullurl = unwrap(toBytes(fullurl))
179 if self.tempcache and fullurl in self.tempcache:
180 filename, headers = self.tempcache[fullurl]
181 fp = open(filename, 'rb')
182 return addinfourl(fp, headers, fullurl)
183 urltype, url = splittype(fullurl)
184 if not urltype:
185 urltype = 'file'
186 if urltype in self.proxies:
187 proxy = self.proxies[urltype]
188 urltype, proxyhost = splittype(proxy)
189 host, selector = splithost(proxyhost)
190 url = (host, fullurl) # Signal special case to open_*()
191 else:
192 proxy = None
193 name = 'open_' + urltype
194 self.type = urltype
195 name = name.replace('-', '_')
196 if not hasattr(self, name):
197 if proxy:
198 return self.open_unknown_proxy(proxy, fullurl, data)
199 else:
200 return self.open_unknown(fullurl, data)
201 try:
202 if data is None:
203 return getattr(self, name)(url)
204 else:
205 return getattr(self, name)(url, data)
206 except socket.error, msg:
207 raise IOError, ('socket error', msg), sys.exc_info()[2]
209 def open_unknown(self, fullurl, data=None):
210 """Overridable interface to open unknown URL type."""
211 type, url = splittype(fullurl)
212 raise IOError, ('url error', 'unknown url type', type)
214 def open_unknown_proxy(self, proxy, fullurl, data=None):
215 """Overridable interface to open unknown URL type."""
216 type, url = splittype(fullurl)
217 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
219 # External interface
220 def retrieve(self, url, filename=None, reporthook=None, data=None):
221 """retrieve(url) returns (filename, headers) for a local object
222 or (tempfilename, headers) for a remote object."""
223 url = unwrap(toBytes(url))
224 if self.tempcache and url in self.tempcache:
225 return self.tempcache[url]
226 type, url1 = splittype(url)
227 if filename is None and (not type or type == 'file'):
228 try:
229 fp = self.open_local_file(url1)
230 hdrs = fp.info()
231 del fp
232 return url2pathname(splithost(url1)[1]), hdrs
233 except IOError, msg:
234 pass
235 fp = self.open(url, data)
236 headers = fp.info()
237 if filename:
238 tfp = open(filename, 'wb')
239 else:
240 import tempfile
241 garbage, path = splittype(url)
242 garbage, path = splithost(path or "")
243 path, garbage = splitquery(path or "")
244 path, garbage = splitattr(path or "")
245 suffix = os.path.splitext(path)[1]
246 (fd, filename) = tempfile.mkstemp(suffix)
247 self.__tempfiles.append(filename)
248 tfp = os.fdopen(fd, 'wb')
249 result = filename, headers
250 if self.tempcache is not None:
251 self.tempcache[url] = result
252 bs = 1024*8
253 size = -1
254 read = 0
255 blocknum = 0
256 if reporthook:
257 if "content-length" in headers:
258 size = int(headers["Content-Length"])
259 reporthook(blocknum, bs, size)
260 while 1:
261 block = fp.read(bs)
262 if block == "":
263 break
264 read += len(block)
265 tfp.write(block)
266 blocknum += 1
267 if reporthook:
268 reporthook(blocknum, bs, size)
269 fp.close()
270 tfp.close()
271 del fp
272 del tfp
274 # raise exception if actual size does not match content-length header
275 if size >= 0 and read < size:
276 raise ContentTooShortError("retrieval incomplete: got only %i out "
277 "of %i bytes" % (read, size), result)
279 return result
281 # Each method named open_<type> knows how to open that type of URL
283 def open_http(self, url, data=None):
284 """Use HTTP protocol."""
285 import httplib
286 user_passwd = None
287 proxy_passwd= None
288 if isinstance(url, str):
289 host, selector = splithost(url)
290 if host:
291 user_passwd, host = splituser(host)
292 host = unquote(host)
293 realhost = host
294 else:
295 host, selector = url
296 # check whether the proxy contains authorization information
297 proxy_passwd, host = splituser(host)
298 # now we proceed with the url we want to obtain
299 urltype, rest = splittype(selector)
300 url = rest
301 user_passwd = None
302 if urltype.lower() != 'http':
303 realhost = None
304 else:
305 realhost, rest = splithost(rest)
306 if realhost:
307 user_passwd, realhost = splituser(realhost)
308 if user_passwd:
309 selector = "%s://%s%s" % (urltype, realhost, rest)
310 if proxy_bypass(realhost):
311 host = realhost
313 #print "proxy via http:", host, selector
314 if not host: raise IOError, ('http error', 'no host given')
316 if proxy_passwd:
317 import base64
318 proxy_auth = base64.b64encode(proxy_passwd).strip()
319 else:
320 proxy_auth = None
322 if user_passwd:
323 import base64
324 auth = base64.b64encode(user_passwd).strip()
325 else:
326 auth = None
327 h = httplib.HTTP(host)
328 if data is not None:
329 h.putrequest('POST', selector)
330 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
331 h.putheader('Content-Length', '%d' % len(data))
332 else:
333 h.putrequest('GET', selector)
334 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
335 if auth: h.putheader('Authorization', 'Basic %s' % auth)
336 if realhost: h.putheader('Host', realhost)
337 for args in self.addheaders: h.putheader(*args)
338 h.endheaders()
339 if data is not None:
340 h.send(data)
341 errcode, errmsg, headers = h.getreply()
342 fp = h.getfile()
343 if errcode == -1:
344 if fp: fp.close()
345 # something went wrong with the HTTP status line
346 raise IOError, ('http protocol error', 0,
347 'got a bad status line', None)
348 # According to RFC 2616, "2xx" code indicates that the client's
349 # request was successfully received, understood, and accepted.
350 if (200 <= errcode < 300):
351 return addinfourl(fp, headers, "http:" + url, errcode)
352 else:
353 if data is None:
354 return self.http_error(url, fp, errcode, errmsg, headers)
355 else:
356 return self.http_error(url, fp, errcode, errmsg, headers, data)
358 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
359 """Handle http errors.
360 Derived class can override this, or provide specific handlers
361 named http_error_DDD where DDD is the 3-digit error code."""
362 # First check if there's a specific handler for this error
363 name = 'http_error_%d' % errcode
364 if hasattr(self, name):
365 method = getattr(self, name)
366 if data is None:
367 result = method(url, fp, errcode, errmsg, headers)
368 else:
369 result = method(url, fp, errcode, errmsg, headers, data)
370 if result: return result
371 return self.http_error_default(url, fp, errcode, errmsg, headers)
373 def http_error_default(self, url, fp, errcode, errmsg, headers):
374 """Default error handler: close the connection and raise IOError."""
375 void = fp.read()
376 fp.close()
377 raise IOError, ('http error', errcode, errmsg, headers)
379 if _have_ssl:
380 def open_https(self, url, data=None):
381 """Use HTTPS protocol."""
383 import httplib
384 user_passwd = None
385 proxy_passwd = None
386 if isinstance(url, str):
387 host, selector = splithost(url)
388 if host:
389 user_passwd, host = splituser(host)
390 host = unquote(host)
391 realhost = host
392 else:
393 host, selector = url
394 # here, we determine, whether the proxy contains authorization information
395 proxy_passwd, host = splituser(host)
396 urltype, rest = splittype(selector)
397 url = rest
398 user_passwd = None
399 if urltype.lower() != 'https':
400 realhost = None
401 else:
402 realhost, rest = splithost(rest)
403 if realhost:
404 user_passwd, realhost = splituser(realhost)
405 if user_passwd:
406 selector = "%s://%s%s" % (urltype, realhost, rest)
407 #print "proxy via https:", host, selector
408 if not host: raise IOError, ('https error', 'no host given')
409 if proxy_passwd:
410 import base64
411 proxy_auth = base64.b64encode(proxy_passwd).strip()
412 else:
413 proxy_auth = None
414 if user_passwd:
415 import base64
416 auth = base64.b64encode(user_passwd).strip()
417 else:
418 auth = None
419 h = httplib.HTTPS(host, 0,
420 key_file=self.key_file,
421 cert_file=self.cert_file)
422 if data is not None:
423 h.putrequest('POST', selector)
424 h.putheader('Content-Type',
425 'application/x-www-form-urlencoded')
426 h.putheader('Content-Length', '%d' % len(data))
427 else:
428 h.putrequest('GET', selector)
429 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
430 if auth: h.putheader('Authorization', 'Basic %s' % auth)
431 if realhost: h.putheader('Host', realhost)
432 for args in self.addheaders: h.putheader(*args)
433 h.endheaders()
434 if data is not None:
435 h.send(data)
436 errcode, errmsg, headers = h.getreply()
437 fp = h.getfile()
438 if errcode == -1:
439 if fp: fp.close()
440 # something went wrong with the HTTP status line
441 raise IOError, ('http protocol error', 0,
442 'got a bad status line', None)
443 # According to RFC 2616, "2xx" code indicates that the client's
444 # request was successfully received, understood, and accepted.
445 if (200 <= errcode < 300):
446 return addinfourl(fp, headers, "https:" + url, errcode)
447 else:
448 if data is None:
449 return self.http_error(url, fp, errcode, errmsg, headers)
450 else:
451 return self.http_error(url, fp, errcode, errmsg, headers,
452 data)
454 def open_file(self, url):
455 """Use local file or FTP depending on form of URL."""
456 if not isinstance(url, str):
457 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
458 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
459 return self.open_ftp(url)
460 else:
461 return self.open_local_file(url)
463 def open_local_file(self, url):
464 """Use local file."""
465 import mimetypes, mimetools, email.utils
466 try:
467 from cStringIO import StringIO
468 except ImportError:
469 from StringIO import StringIO
470 host, file = splithost(url)
471 localname = url2pathname(file)
472 try:
473 stats = os.stat(localname)
474 except OSError, e:
475 raise IOError(e.errno, e.strerror, e.filename)
476 size = stats.st_size
477 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
478 mtype = mimetypes.guess_type(url)[0]
479 headers = mimetools.Message(StringIO(
480 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
481 (mtype or 'text/plain', size, modified)))
482 if not host:
483 urlfile = file
484 if file[:1] == '/':
485 urlfile = 'file://' + file
486 return addinfourl(open(localname, 'rb'),
487 headers, urlfile)
488 host, port = splitport(host)
489 if not port \
490 and socket.gethostbyname(host) in (localhost(), thishost()):
491 urlfile = file
492 if file[:1] == '/':
493 urlfile = 'file://' + file
494 return addinfourl(open(localname, 'rb'),
495 headers, urlfile)
496 raise IOError, ('local file error', 'not on local host')
498 def open_ftp(self, url):
499 """Use FTP protocol."""
500 if not isinstance(url, str):
501 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
502 import mimetypes, mimetools
503 try:
504 from cStringIO import StringIO
505 except ImportError:
506 from StringIO import StringIO
507 host, path = splithost(url)
508 if not host: raise IOError, ('ftp error', 'no host given')
509 host, port = splitport(host)
510 user, host = splituser(host)
511 if user: user, passwd = splitpasswd(user)
512 else: passwd = None
513 host = unquote(host)
514 user = unquote(user or '')
515 passwd = unquote(passwd or '')
516 host = socket.gethostbyname(host)
517 if not port:
518 import ftplib
519 port = ftplib.FTP_PORT
520 else:
521 port = int(port)
522 path, attrs = splitattr(path)
523 path = unquote(path)
524 dirs = path.split('/')
525 dirs, file = dirs[:-1], dirs[-1]
526 if dirs and not dirs[0]: dirs = dirs[1:]
527 if dirs and not dirs[0]: dirs[0] = '/'
528 key = user, host, port, '/'.join(dirs)
529 # XXX thread unsafe!
530 if len(self.ftpcache) > MAXFTPCACHE:
531 # Prune the cache, rather arbitrarily
532 for k in self.ftpcache.keys():
533 if k != key:
534 v = self.ftpcache[k]
535 del self.ftpcache[k]
536 v.close()
537 try:
538 if not key in self.ftpcache:
539 self.ftpcache[key] = \
540 ftpwrapper(user, passwd, host, port, dirs)
541 if not file: type = 'D'
542 else: type = 'I'
543 for attr in attrs:
544 attr, value = splitvalue(attr)
545 if attr.lower() == 'type' and \
546 value in ('a', 'A', 'i', 'I', 'd', 'D'):
547 type = value.upper()
548 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
549 mtype = mimetypes.guess_type("ftp:" + url)[0]
550 headers = ""
551 if mtype:
552 headers += "Content-Type: %s\n" % mtype
553 if retrlen is not None and retrlen >= 0:
554 headers += "Content-Length: %d\n" % retrlen
555 headers = mimetools.Message(StringIO(headers))
556 return addinfourl(fp, headers, "ftp:" + url)
557 except ftperrors(), msg:
558 raise IOError, ('ftp error', msg), sys.exc_info()[2]
560 def open_data(self, url, data=None):
561 """Use "data" URL."""
562 if not isinstance(url, str):
563 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
564 # ignore POSTed data
566 # syntax of data URLs:
567 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
568 # mediatype := [ type "/" subtype ] *( ";" parameter )
569 # data := *urlchar
570 # parameter := attribute "=" value
571 import mimetools
572 try:
573 from cStringIO import StringIO
574 except ImportError:
575 from StringIO import StringIO
576 try:
577 [type, data] = url.split(',', 1)
578 except ValueError:
579 raise IOError, ('data error', 'bad data URL')
580 if not type:
581 type = 'text/plain;charset=US-ASCII'
582 semi = type.rfind(';')
583 if semi >= 0 and '=' not in type[semi:]:
584 encoding = type[semi+1:]
585 type = type[:semi]
586 else:
587 encoding = ''
588 msg = []
589 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
590 time.gmtime(time.time())))
591 msg.append('Content-type: %s' % type)
592 if encoding == 'base64':
593 import base64
594 data = base64.decodestring(data)
595 else:
596 data = unquote(data)
597 msg.append('Content-Length: %d' % len(data))
598 msg.append('')
599 msg.append(data)
600 msg = '\n'.join(msg)
601 f = StringIO(msg)
602 headers = mimetools.Message(f, 0)
603 #f.fileno = None # needed for addinfourl
604 return addinfourl(f, headers, url)
607 class FancyURLopener(URLopener):
608 """Derived class with handlers for errors we can handle (perhaps)."""
610 def __init__(self, *args, **kwargs):
611 URLopener.__init__(self, *args, **kwargs)
612 self.auth_cache = {}
613 self.tries = 0
614 self.maxtries = 10
616 def http_error_default(self, url, fp, errcode, errmsg, headers):
617 """Default error handling -- don't raise an exception."""
618 return addinfourl(fp, headers, "http:" + url, errcode)
620 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
621 """Error 302 -- relocated (temporarily)."""
622 self.tries += 1
623 if self.maxtries and self.tries >= self.maxtries:
624 if hasattr(self, "http_error_500"):
625 meth = self.http_error_500
626 else:
627 meth = self.http_error_default
628 self.tries = 0
629 return meth(url, fp, 500,
630 "Internal Server Error: Redirect Recursion", headers)
631 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
632 data)
633 self.tries = 0
634 return result
636 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
637 if 'location' in headers:
638 newurl = headers['location']
639 elif 'uri' in headers:
640 newurl = headers['uri']
641 else:
642 return
643 void = fp.read()
644 fp.close()
645 # In case the server sent a relative URL, join with original:
646 newurl = basejoin(self.type + ":" + url, newurl)
647 return self.open(newurl)
649 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
650 """Error 301 -- also relocated (permanently)."""
651 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
653 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
654 """Error 303 -- also relocated (essentially identical to 302)."""
655 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
657 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
658 """Error 307 -- relocated, but turn POST into error."""
659 if data is None:
660 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
661 else:
662 return self.http_error_default(url, fp, errcode, errmsg, headers)
664 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
665 """Error 401 -- authentication required.
666 This function supports Basic authentication only."""
667 if not 'www-authenticate' in headers:
668 URLopener.http_error_default(self, url, fp,
669 errcode, errmsg, headers)
670 stuff = headers['www-authenticate']
671 import re
672 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
673 if not match:
674 URLopener.http_error_default(self, url, fp,
675 errcode, errmsg, headers)
676 scheme, realm = match.groups()
677 if scheme.lower() != 'basic':
678 URLopener.http_error_default(self, url, fp,
679 errcode, errmsg, headers)
680 name = 'retry_' + self.type + '_basic_auth'
681 if data is None:
682 return getattr(self,name)(url, realm)
683 else:
684 return getattr(self,name)(url, realm, data)
686 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
687 """Error 407 -- proxy authentication required.
688 This function supports Basic authentication only."""
689 if not 'proxy-authenticate' in headers:
690 URLopener.http_error_default(self, url, fp,
691 errcode, errmsg, headers)
692 stuff = headers['proxy-authenticate']
693 import re
694 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
695 if not match:
696 URLopener.http_error_default(self, url, fp,
697 errcode, errmsg, headers)
698 scheme, realm = match.groups()
699 if scheme.lower() != 'basic':
700 URLopener.http_error_default(self, url, fp,
701 errcode, errmsg, headers)
702 name = 'retry_proxy_' + self.type + '_basic_auth'
703 if data is None:
704 return getattr(self,name)(url, realm)
705 else:
706 return getattr(self,name)(url, realm, data)
708 def retry_proxy_http_basic_auth(self, url, realm, data=None):
709 host, selector = splithost(url)
710 newurl = 'http://' + host + selector
711 proxy = self.proxies['http']
712 urltype, proxyhost = splittype(proxy)
713 proxyhost, proxyselector = splithost(proxyhost)
714 i = proxyhost.find('@') + 1
715 proxyhost = proxyhost[i:]
716 user, passwd = self.get_user_passwd(proxyhost, realm, i)
717 if not (user or passwd): return None
718 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
719 self.proxies['http'] = 'http://' + proxyhost + proxyselector
720 if data is None:
721 return self.open(newurl)
722 else:
723 return self.open(newurl, data)
725 def retry_proxy_https_basic_auth(self, url, realm, data=None):
726 host, selector = splithost(url)
727 newurl = 'https://' + host + selector
728 proxy = self.proxies['https']
729 urltype, proxyhost = splittype(proxy)
730 proxyhost, proxyselector = splithost(proxyhost)
731 i = proxyhost.find('@') + 1
732 proxyhost = proxyhost[i:]
733 user, passwd = self.get_user_passwd(proxyhost, realm, i)
734 if not (user or passwd): return None
735 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
736 self.proxies['https'] = 'https://' + proxyhost + proxyselector
737 if data is None:
738 return self.open(newurl)
739 else:
740 return self.open(newurl, data)
742 def retry_http_basic_auth(self, url, realm, data=None):
743 host, selector = splithost(url)
744 i = host.find('@') + 1
745 host = host[i:]
746 user, passwd = self.get_user_passwd(host, realm, i)
747 if not (user or passwd): return None
748 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
749 newurl = 'http://' + host + selector
750 if data is None:
751 return self.open(newurl)
752 else:
753 return self.open(newurl, data)
755 def retry_https_basic_auth(self, url, realm, data=None):
756 host, selector = splithost(url)
757 i = host.find('@') + 1
758 host = host[i:]
759 user, passwd = self.get_user_passwd(host, realm, i)
760 if not (user or passwd): return None
761 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
762 newurl = 'https://' + host + selector
763 if data is None:
764 return self.open(newurl)
765 else:
766 return self.open(newurl, data)
768 def get_user_passwd(self, host, realm, clear_cache = 0):
769 key = realm + '@' + host.lower()
770 if key in self.auth_cache:
771 if clear_cache:
772 del self.auth_cache[key]
773 else:
774 return self.auth_cache[key]
775 user, passwd = self.prompt_user_passwd(host, realm)
776 if user or passwd: self.auth_cache[key] = (user, passwd)
777 return user, passwd
779 def prompt_user_passwd(self, host, realm):
780 """Override this in a GUI environment!"""
781 import getpass
782 try:
783 user = raw_input("Enter username for %s at %s: " % (realm,
784 host))
785 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
786 (user, realm, host))
787 return user, passwd
788 except KeyboardInterrupt:
789 print
790 return None, None
793 # Utility functions
795 _localhost = None
796 def localhost():
797 """Return the IP address of the magic hostname 'localhost'."""
798 global _localhost
799 if _localhost is None:
800 _localhost = socket.gethostbyname('localhost')
801 return _localhost
803 _thishost = None
804 def thishost():
805 """Return the IP address of the current host."""
806 global _thishost
807 if _thishost is None:
808 _thishost = socket.gethostbyname(socket.gethostname())
809 return _thishost
811 _ftperrors = None
812 def ftperrors():
813 """Return the set of errors raised by the FTP class."""
814 global _ftperrors
815 if _ftperrors is None:
816 import ftplib
817 _ftperrors = ftplib.all_errors
818 return _ftperrors
820 _noheaders = None
821 def noheaders():
822 """Return an empty mimetools.Message object."""
823 global _noheaders
824 if _noheaders is None:
825 import mimetools
826 try:
827 from cStringIO import StringIO
828 except ImportError:
829 from StringIO import StringIO
830 _noheaders = mimetools.Message(StringIO(), 0)
831 _noheaders.fp.close() # Recycle file descriptor
832 return _noheaders
835 # Utility classes
837 class ftpwrapper:
838 """Class used by open_ftp() for cache of open FTP connections."""
840 def __init__(self, user, passwd, host, port, dirs,
841 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
842 self.user = user
843 self.passwd = passwd
844 self.host = host
845 self.port = port
846 self.dirs = dirs
847 self.timeout = timeout
848 self.init()
850 def init(self):
851 import ftplib
852 self.busy = 0
853 self.ftp = ftplib.FTP()
854 self.ftp.connect(self.host, self.port, self.timeout)
855 self.ftp.login(self.user, self.passwd)
856 for dir in self.dirs:
857 self.ftp.cwd(dir)
859 def retrfile(self, file, type):
860 import ftplib
861 self.endtransfer()
862 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
863 else: cmd = 'TYPE ' + type; isdir = 0
864 try:
865 self.ftp.voidcmd(cmd)
866 except ftplib.all_errors:
867 self.init()
868 self.ftp.voidcmd(cmd)
869 conn = None
870 if file and not isdir:
871 # Try to retrieve as a file
872 try:
873 cmd = 'RETR ' + file
874 conn = self.ftp.ntransfercmd(cmd)
875 except ftplib.error_perm, reason:
876 if str(reason)[:3] != '550':
877 raise IOError, ('ftp error', reason), sys.exc_info()[2]
878 if not conn:
879 # Set transfer mode to ASCII!
880 self.ftp.voidcmd('TYPE A')
881 # Try a directory listing. Verify that directory exists.
882 if file:
883 pwd = self.ftp.pwd()
884 try:
885 try:
886 self.ftp.cwd(file)
887 except ftplib.error_perm, reason:
888 raise IOError, ('ftp error', reason), sys.exc_info()[2]
889 finally:
890 self.ftp.cwd(pwd)
891 cmd = 'LIST ' + file
892 else:
893 cmd = 'LIST'
894 conn = self.ftp.ntransfercmd(cmd)
895 self.busy = 1
896 # Pass back both a suitably decorated object and a retrieval length
897 return (addclosehook(conn[0].makefile('rb'),
898 self.endtransfer), conn[1])
899 def endtransfer(self):
900 if not self.busy:
901 return
902 self.busy = 0
903 try:
904 self.ftp.voidresp()
905 except ftperrors():
906 pass
908 def close(self):
909 self.endtransfer()
910 try:
911 self.ftp.close()
912 except ftperrors():
913 pass
915 class addbase:
916 """Base class for addinfo and addclosehook."""
918 def __init__(self, fp):
919 self.fp = fp
920 self.read = self.fp.read
921 self.readline = self.fp.readline
922 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
923 if hasattr(self.fp, "fileno"):
924 self.fileno = self.fp.fileno
925 else:
926 self.fileno = lambda: None
927 if hasattr(self.fp, "__iter__"):
928 self.__iter__ = self.fp.__iter__
929 if hasattr(self.fp, "next"):
930 self.next = self.fp.next
932 def __repr__(self):
933 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
934 id(self), self.fp)
936 def close(self):
937 self.read = None
938 self.readline = None
939 self.readlines = None
940 self.fileno = None
941 if self.fp: self.fp.close()
942 self.fp = None
944 class addclosehook(addbase):
945 """Class to add a close hook to an open file."""
947 def __init__(self, fp, closehook, *hookargs):
948 addbase.__init__(self, fp)
949 self.closehook = closehook
950 self.hookargs = hookargs
952 def close(self):
953 addbase.close(self)
954 if self.closehook:
955 self.closehook(*self.hookargs)
956 self.closehook = None
957 self.hookargs = None
959 class addinfo(addbase):
960 """class to add an info() method to an open file."""
962 def __init__(self, fp, headers):
963 addbase.__init__(self, fp)
964 self.headers = headers
966 def info(self):
967 return self.headers
969 class addinfourl(addbase):
970 """class to add info() and geturl() methods to an open file."""
972 def __init__(self, fp, headers, url, code=None):
973 addbase.__init__(self, fp)
974 self.headers = headers
975 self.url = url
976 self.code = code
978 def info(self):
979 return self.headers
981 def getcode(self):
982 return self.code
984 def geturl(self):
985 return self.url
988 # Utilities to parse URLs (most of these return None for missing parts):
989 # unwrap('<URL:type://host/path>') --> 'type://host/path'
990 # splittype('type:opaquestring') --> 'type', 'opaquestring'
991 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
992 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
993 # splitpasswd('user:passwd') -> 'user', 'passwd'
994 # splitport('host:port') --> 'host', 'port'
995 # splitquery('/path?query') --> '/path', 'query'
996 # splittag('/path#tag') --> '/path', 'tag'
997 # splitattr('/path;attr1=value1;attr2=value2;...') ->
998 # '/path', ['attr1=value1', 'attr2=value2', ...]
999 # splitvalue('attr=value') --> 'attr', 'value'
1000 # unquote('abc%20def') -> 'abc def'
1001 # quote('abc def') -> 'abc%20def')
1003 try:
1004 unicode
1005 except NameError:
1006 def _is_unicode(x):
1007 return 0
1008 else:
1009 def _is_unicode(x):
1010 return isinstance(x, unicode)
1012 def toBytes(url):
1013 """toBytes(u"URL") --> 'URL'."""
1014 # Most URL schemes require ASCII. If that changes, the conversion
1015 # can be relaxed
1016 if _is_unicode(url):
1017 try:
1018 url = url.encode("ASCII")
1019 except UnicodeError:
1020 raise UnicodeError("URL " + repr(url) +
1021 " contains non-ASCII characters")
1022 return url
1024 def unwrap(url):
1025 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1026 url = url.strip()
1027 if url[:1] == '<' and url[-1:] == '>':
1028 url = url[1:-1].strip()
1029 if url[:4] == 'URL:': url = url[4:].strip()
1030 return url
1032 _typeprog = None
1033 def splittype(url):
1034 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1035 global _typeprog
1036 if _typeprog is None:
1037 import re
1038 _typeprog = re.compile('^([^/:]+):')
1040 match = _typeprog.match(url)
1041 if match:
1042 scheme = match.group(1)
1043 return scheme.lower(), url[len(scheme) + 1:]
1044 return None, url
1046 _hostprog = None
1047 def splithost(url):
1048 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1049 global _hostprog
1050 if _hostprog is None:
1051 import re
1052 _hostprog = re.compile('^//([^/?]*)(.*)$')
1054 match = _hostprog.match(url)
1055 if match: return match.group(1, 2)
1056 return None, url
1058 _userprog = None
1059 def splituser(host):
1060 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1061 global _userprog
1062 if _userprog is None:
1063 import re
1064 _userprog = re.compile('^(.*)@(.*)$')
1066 match = _userprog.match(host)
1067 if match: return map(unquote, match.group(1, 2))
1068 return None, host
1070 _passwdprog = None
1071 def splitpasswd(user):
1072 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1073 global _passwdprog
1074 if _passwdprog is None:
1075 import re
1076 _passwdprog = re.compile('^([^:]*):(.*)$')
1078 match = _passwdprog.match(user)
1079 if match: return match.group(1, 2)
1080 return user, None
1082 # splittag('/path#tag') --> '/path', 'tag'
1083 _portprog = None
1084 def splitport(host):
1085 """splitport('host:port') --> 'host', 'port'."""
1086 global _portprog
1087 if _portprog is None:
1088 import re
1089 _portprog = re.compile('^(.*):([0-9]+)$')
1091 match = _portprog.match(host)
1092 if match: return match.group(1, 2)
1093 return host, None
1095 _nportprog = None
1096 def splitnport(host, defport=-1):
1097 """Split host and port, returning numeric port.
1098 Return given default port if no ':' found; defaults to -1.
1099 Return numerical port if a valid number are found after ':'.
1100 Return None if ':' but not a valid number."""
1101 global _nportprog
1102 if _nportprog is None:
1103 import re
1104 _nportprog = re.compile('^(.*):(.*)$')
1106 match = _nportprog.match(host)
1107 if match:
1108 host, port = match.group(1, 2)
1109 try:
1110 if not port: raise ValueError, "no digits"
1111 nport = int(port)
1112 except ValueError:
1113 nport = None
1114 return host, nport
1115 return host, defport
1117 _queryprog = None
1118 def splitquery(url):
1119 """splitquery('/path?query') --> '/path', 'query'."""
1120 global _queryprog
1121 if _queryprog is None:
1122 import re
1123 _queryprog = re.compile('^(.*)\?([^?]*)$')
1125 match = _queryprog.match(url)
1126 if match: return match.group(1, 2)
1127 return url, None
1129 _tagprog = None
1130 def splittag(url):
1131 """splittag('/path#tag') --> '/path', 'tag'."""
1132 global _tagprog
1133 if _tagprog is None:
1134 import re
1135 _tagprog = re.compile('^(.*)#([^#]*)$')
1137 match = _tagprog.match(url)
1138 if match: return match.group(1, 2)
1139 return url, None
1141 def splitattr(url):
1142 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1143 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1144 words = url.split(';')
1145 return words[0], words[1:]
1147 _valueprog = None
1148 def splitvalue(attr):
1149 """splitvalue('attr=value') --> 'attr', 'value'."""
1150 global _valueprog
1151 if _valueprog is None:
1152 import re
1153 _valueprog = re.compile('^([^=]*)=(.*)$')
1155 match = _valueprog.match(attr)
1156 if match: return match.group(1, 2)
1157 return attr, None
1159 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1160 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1162 def unquote(s):
1163 """unquote('abc%20def') -> 'abc def'."""
1164 res = s.split('%')
1165 for i in xrange(1, len(res)):
1166 item = res[i]
1167 try:
1168 res[i] = _hextochr[item[:2]] + item[2:]
1169 except KeyError:
1170 res[i] = '%' + item
1171 except UnicodeDecodeError:
1172 res[i] = unichr(int(item[:2], 16)) + item[2:]
1173 return "".join(res)
1175 def unquote_plus(s):
1176 """unquote('%7e/abc+def') -> '~/abc def'"""
1177 s = s.replace('+', ' ')
1178 return unquote(s)
1180 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1181 'abcdefghijklmnopqrstuvwxyz'
1182 '0123456789' '_.-')
1183 _safemaps = {}
1185 def quote(s, safe = '/'):
1186 """quote('abc def') -> 'abc%20def'
1188 Each part of a URL, e.g. the path info, the query, etc., has a
1189 different set of reserved characters that must be quoted.
1191 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1192 the following reserved characters.
1194 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1195 "$" | ","
1197 Each of these characters is reserved in some component of a URL,
1198 but not necessarily in all of them.
1200 By default, the quote function is intended for quoting the path
1201 section of a URL. Thus, it will not encode '/'. This character
1202 is reserved, but in typical usage the quote function is being
1203 called on a path where the existing slash characters are used as
1204 reserved characters.
1206 cachekey = (safe, always_safe)
1207 try:
1208 safe_map = _safemaps[cachekey]
1209 except KeyError:
1210 safe += always_safe
1211 safe_map = {}
1212 for i in range(256):
1213 c = chr(i)
1214 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1215 _safemaps[cachekey] = safe_map
1216 res = map(safe_map.__getitem__, s)
1217 return ''.join(res)
1219 def quote_plus(s, safe = ''):
1220 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1221 if ' ' in s:
1222 s = quote(s, safe + ' ')
1223 return s.replace(' ', '+')
1224 return quote(s, safe)
1226 def urlencode(query,doseq=0):
1227 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1229 If any values in the query arg are sequences and doseq is true, each
1230 sequence element is converted to a separate parameter.
1232 If the query arg is a sequence of two-element tuples, the order of the
1233 parameters in the output will match the order of parameters in the
1234 input.
1237 if hasattr(query,"items"):
1238 # mapping objects
1239 query = query.items()
1240 else:
1241 # it's a bother at times that strings and string-like objects are
1242 # sequences...
1243 try:
1244 # non-sequence items should not work with len()
1245 # non-empty strings will fail this
1246 if len(query) and not isinstance(query[0], tuple):
1247 raise TypeError
1248 # zero-length sequences of all types will get here and succeed,
1249 # but that's a minor nit - since the original implementation
1250 # allowed empty dicts that type of behavior probably should be
1251 # preserved for consistency
1252 except TypeError:
1253 ty,va,tb = sys.exc_info()
1254 raise TypeError, "not a valid non-string sequence or mapping object", tb
1256 l = []
1257 if not doseq:
1258 # preserve old behavior
1259 for k, v in query:
1260 k = quote_plus(str(k))
1261 v = quote_plus(str(v))
1262 l.append(k + '=' + v)
1263 else:
1264 for k, v in query:
1265 k = quote_plus(str(k))
1266 if isinstance(v, str):
1267 v = quote_plus(v)
1268 l.append(k + '=' + v)
1269 elif _is_unicode(v):
1270 # is there a reasonable way to convert to ASCII?
1271 # encode generates a string, but "replace" or "ignore"
1272 # lose information and "strict" can raise UnicodeError
1273 v = quote_plus(v.encode("ASCII","replace"))
1274 l.append(k + '=' + v)
1275 else:
1276 try:
1277 # is this a sufficient test for sequence-ness?
1278 x = len(v)
1279 except TypeError:
1280 # not a sequence
1281 v = quote_plus(str(v))
1282 l.append(k + '=' + v)
1283 else:
1284 # loop over the sequence
1285 for elt in v:
1286 l.append(k + '=' + quote_plus(str(elt)))
1287 return '&'.join(l)
1289 # Proxy handling
1290 def getproxies_environment():
1291 """Return a dictionary of scheme -> proxy server URL mappings.
1293 Scan the environment for variables named <scheme>_proxy;
1294 this seems to be the standard convention. If you need a
1295 different way, you can pass a proxies dictionary to the
1296 [Fancy]URLopener constructor.
1299 proxies = {}
1300 for name, value in os.environ.items():
1301 name = name.lower()
1302 if name == 'no_proxy':
1303 # handled in proxy_bypass_environment
1304 continue
1305 if value and name[-6:] == '_proxy':
1306 proxies[name[:-6]] = value
1307 return proxies
1309 def proxy_bypass_environment(host):
1310 """Test if proxies should not be used for a particular host.
1312 Checks the environment for a variable named no_proxy, which should
1313 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1315 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1316 # '*' is special case for always bypass
1317 if no_proxy == '*':
1318 return 1
1319 # strip port off host
1320 hostonly, port = splitport(host)
1321 # check if the host ends with any of the DNS suffixes
1322 for name in no_proxy.split(','):
1323 if name and (hostonly.endswith(name) or host.endswith(name)):
1324 return 1
1325 # otherwise, don't bypass
1326 return 0
1329 if sys.platform == 'darwin':
1331 def _CFSetup(sc):
1332 from ctypes import c_int32, c_void_p, c_char_p, c_int
1333 sc.CFStringCreateWithCString.argtypes = [ c_void_p, c_char_p, c_int32 ]
1334 sc.CFStringCreateWithCString.restype = c_void_p
1335 sc.SCDynamicStoreCopyProxies.argtypes = [ c_void_p ]
1336 sc.SCDynamicStoreCopyProxies.restype = c_void_p
1337 sc.CFDictionaryGetValue.argtypes = [ c_void_p, c_void_p ]
1338 sc.CFDictionaryGetValue.restype = c_void_p
1339 sc.CFStringGetLength.argtypes = [ c_void_p ]
1340 sc.CFStringGetLength.restype = c_int32
1341 sc.CFStringGetCString.argtypes = [ c_void_p, c_char_p, c_int32, c_int32 ]
1342 sc.CFStringGetCString.restype = c_int32
1343 sc.CFNumberGetValue.argtypes = [ c_void_p, c_int, c_void_p ]
1344 sc.CFNumberGetValue.restype = c_int32
1345 sc.CFRelease.argtypes = [ c_void_p ]
1346 sc.CFRelease.restype = None
1348 def _CStringFromCFString(sc, value):
1349 from ctypes import create_string_buffer
1350 length = sc.CFStringGetLength(value) + 1
1351 buff = create_string_buffer(length)
1352 sc.CFStringGetCString(value, buff, length, 0)
1353 return buff.value
1355 def _CFNumberToInt32(sc, cfnum):
1356 from ctypes import byref, c_int
1357 val = c_int()
1358 kCFNumberSInt32Type = 3
1359 sc.CFNumberGetValue(cfnum, kCFNumberSInt32Type, byref(val))
1360 return val.value
1363 def proxy_bypass_macosx_sysconf(host):
1365 Return True iff this host shouldn't be accessed using a proxy
1367 This function uses the MacOSX framework SystemConfiguration
1368 to fetch the proxy information.
1370 from ctypes import cdll
1371 from ctypes.util import find_library
1372 import re
1373 import socket
1374 from fnmatch import fnmatch
1376 def ip2num(ipAddr):
1377 parts = ipAddr.split('.')
1378 parts = map(int, parts)
1379 if len(parts) != 4:
1380 parts = (parts + [0, 0, 0, 0])[:4]
1381 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1383 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
1384 _CFSetup(sc)
1386 hostIP = None
1388 if not sc:
1389 return False
1391 kSCPropNetProxiesExceptionsList = sc.CFStringCreateWithCString(0, "ExceptionsList", 0)
1392 kSCPropNetProxiesExcludeSimpleHostnames = sc.CFStringCreateWithCString(0,
1393 "ExcludeSimpleHostnames", 0)
1396 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1397 if proxyDict is None:
1398 return False
1400 try:
1401 # Check for simple host names:
1402 if '.' not in host:
1403 exclude_simple = sc.CFDictionaryGetValue(proxyDict,
1404 kSCPropNetProxiesExcludeSimpleHostnames)
1405 if exclude_simple and _CFNumberToInt32(sc, exclude_simple):
1406 return True
1409 # Check the exceptions list:
1410 exceptions = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesExceptionsList)
1411 if exceptions:
1412 # Items in the list are strings like these: *.local, 169.254/16
1413 for index in xrange(sc.CFArrayGetCount(exceptions)):
1414 value = sc.CFArrayGetValueAtIndex(exceptions, index)
1415 if not value: continue
1416 value = _CStringFromCFString(sc, value)
1418 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1419 if m is not None:
1420 if hostIP is None:
1421 hostIP = socket.gethostbyname(host)
1422 hostIP = ip2num(hostIP)
1424 base = ip2num(m.group(1))
1425 mask = int(m.group(2)[1:])
1426 mask = 32 - mask
1428 if (hostIP >> mask) == (base >> mask):
1429 return True
1431 elif fnmatch(host, value):
1432 return True
1434 return False
1436 finally:
1437 sc.CFRelease(kSCPropNetProxiesExceptionsList)
1438 sc.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames)
1442 def getproxies_macosx_sysconf():
1443 """Return a dictionary of scheme -> proxy server URL mappings.
1445 This function uses the MacOSX framework SystemConfiguration
1446 to fetch the proxy information.
1448 from ctypes import cdll
1449 from ctypes.util import find_library
1451 sc = cdll.LoadLibrary(find_library("SystemConfiguration"))
1452 _CFSetup(sc)
1454 if not sc:
1455 return {}
1457 kSCPropNetProxiesHTTPEnable = sc.CFStringCreateWithCString(0, "HTTPEnable", 0)
1458 kSCPropNetProxiesHTTPProxy = sc.CFStringCreateWithCString(0, "HTTPProxy", 0)
1459 kSCPropNetProxiesHTTPPort = sc.CFStringCreateWithCString(0, "HTTPPort", 0)
1461 kSCPropNetProxiesHTTPSEnable = sc.CFStringCreateWithCString(0, "HTTPSEnable", 0)
1462 kSCPropNetProxiesHTTPSProxy = sc.CFStringCreateWithCString(0, "HTTPSProxy", 0)
1463 kSCPropNetProxiesHTTPSPort = sc.CFStringCreateWithCString(0, "HTTPSPort", 0)
1465 kSCPropNetProxiesFTPEnable = sc.CFStringCreateWithCString(0, "FTPEnable", 0)
1466 kSCPropNetProxiesFTPPassive = sc.CFStringCreateWithCString(0, "FTPPassive", 0)
1467 kSCPropNetProxiesFTPPort = sc.CFStringCreateWithCString(0, "FTPPort", 0)
1468 kSCPropNetProxiesFTPProxy = sc.CFStringCreateWithCString(0, "FTPProxy", 0)
1470 kSCPropNetProxiesGopherEnable = sc.CFStringCreateWithCString(0, "GopherEnable", 0)
1471 kSCPropNetProxiesGopherPort = sc.CFStringCreateWithCString(0, "GopherPort", 0)
1472 kSCPropNetProxiesGopherProxy = sc.CFStringCreateWithCString(0, "GopherProxy", 0)
1474 proxies = {}
1475 proxyDict = sc.SCDynamicStoreCopyProxies(None)
1477 try:
1478 # HTTP:
1479 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPEnable)
1480 if enabled and _CFNumberToInt32(sc, enabled):
1481 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPProxy)
1482 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPPort)
1484 if proxy:
1485 proxy = _CStringFromCFString(sc, proxy)
1486 if port:
1487 port = _CFNumberToInt32(sc, port)
1488 proxies["http"] = "http://%s:%i" % (proxy, port)
1489 else:
1490 proxies["http"] = "http://%s" % (proxy, )
1492 # HTTPS:
1493 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSEnable)
1494 if enabled and _CFNumberToInt32(sc, enabled):
1495 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSProxy)
1496 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesHTTPSPort)
1498 if proxy:
1499 proxy = _CStringFromCFString(sc, proxy)
1500 if port:
1501 port = _CFNumberToInt32(sc, port)
1502 proxies["https"] = "http://%s:%i" % (proxy, port)
1503 else:
1504 proxies["https"] = "http://%s" % (proxy, )
1506 # FTP:
1507 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPEnable)
1508 if enabled and _CFNumberToInt32(sc, enabled):
1509 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPProxy)
1510 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesFTPPort)
1512 if proxy:
1513 proxy = _CStringFromCFString(sc, proxy)
1514 if port:
1515 port = _CFNumberToInt32(sc, port)
1516 proxies["ftp"] = "http://%s:%i" % (proxy, port)
1517 else:
1518 proxies["ftp"] = "http://%s" % (proxy, )
1520 # Gopher:
1521 enabled = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherEnable)
1522 if enabled and _CFNumberToInt32(sc, enabled):
1523 proxy = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherProxy)
1524 port = sc.CFDictionaryGetValue(proxyDict, kSCPropNetProxiesGopherPort)
1526 if proxy:
1527 proxy = _CStringFromCFString(sc, proxy)
1528 if port:
1529 port = _CFNumberToInt32(sc, port)
1530 proxies["gopher"] = "http://%s:%i" % (proxy, port)
1531 else:
1532 proxies["gopher"] = "http://%s" % (proxy, )
1533 finally:
1534 sc.CFRelease(proxyDict)
1536 sc.CFRelease(kSCPropNetProxiesHTTPEnable)
1537 sc.CFRelease(kSCPropNetProxiesHTTPProxy)
1538 sc.CFRelease(kSCPropNetProxiesHTTPPort)
1539 sc.CFRelease(kSCPropNetProxiesFTPEnable)
1540 sc.CFRelease(kSCPropNetProxiesFTPPassive)
1541 sc.CFRelease(kSCPropNetProxiesFTPPort)
1542 sc.CFRelease(kSCPropNetProxiesFTPProxy)
1543 sc.CFRelease(kSCPropNetProxiesGopherEnable)
1544 sc.CFRelease(kSCPropNetProxiesGopherPort)
1545 sc.CFRelease(kSCPropNetProxiesGopherProxy)
1547 return proxies
1551 def proxy_bypass(host):
1552 if getproxies_environment():
1553 return proxy_bypass_environment(host)
1554 else:
1555 return proxy_bypass_macosx_sysconf(host)
1557 def getproxies():
1558 return getproxies_environment() or getproxies_macosx_sysconf()
1560 elif os.name == 'nt':
1561 def getproxies_registry():
1562 """Return a dictionary of scheme -> proxy server URL mappings.
1564 Win32 uses the registry to store proxies.
1567 proxies = {}
1568 try:
1569 import _winreg
1570 except ImportError:
1571 # Std module, so should be around - but you never know!
1572 return proxies
1573 try:
1574 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1575 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1576 proxyEnable = _winreg.QueryValueEx(internetSettings,
1577 'ProxyEnable')[0]
1578 if proxyEnable:
1579 # Returned as Unicode but problems if not converted to ASCII
1580 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1581 'ProxyServer')[0])
1582 if '=' in proxyServer:
1583 # Per-protocol settings
1584 for p in proxyServer.split(';'):
1585 protocol, address = p.split('=', 1)
1586 # See if address has a type:// prefix
1587 import re
1588 if not re.match('^([^/:]+)://', address):
1589 address = '%s://%s' % (protocol, address)
1590 proxies[protocol] = address
1591 else:
1592 # Use one setting for all protocols
1593 if proxyServer[:5] == 'http:':
1594 proxies['http'] = proxyServer
1595 else:
1596 proxies['http'] = 'http://%s' % proxyServer
1597 proxies['ftp'] = 'ftp://%s' % proxyServer
1598 internetSettings.Close()
1599 except (WindowsError, ValueError, TypeError):
1600 # Either registry key not found etc, or the value in an
1601 # unexpected format.
1602 # proxies already set up to be empty so nothing to do
1603 pass
1604 return proxies
1606 def getproxies():
1607 """Return a dictionary of scheme -> proxy server URL mappings.
1609 Returns settings gathered from the environment, if specified,
1610 or the registry.
1613 return getproxies_environment() or getproxies_registry()
1615 def proxy_bypass_registry(host):
1616 try:
1617 import _winreg
1618 import re
1619 except ImportError:
1620 # Std modules, so should be around - but you never know!
1621 return 0
1622 try:
1623 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1624 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1625 proxyEnable = _winreg.QueryValueEx(internetSettings,
1626 'ProxyEnable')[0]
1627 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1628 'ProxyOverride')[0])
1629 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1630 except WindowsError:
1631 return 0
1632 if not proxyEnable or not proxyOverride:
1633 return 0
1634 # try to make a host list from name and IP address.
1635 rawHost, port = splitport(host)
1636 host = [rawHost]
1637 try:
1638 addr = socket.gethostbyname(rawHost)
1639 if addr != rawHost:
1640 host.append(addr)
1641 except socket.error:
1642 pass
1643 try:
1644 fqdn = socket.getfqdn(rawHost)
1645 if fqdn != rawHost:
1646 host.append(fqdn)
1647 except socket.error:
1648 pass
1649 # make a check value list from the registry entry: replace the
1650 # '<local>' string by the localhost entry and the corresponding
1651 # canonical entry.
1652 proxyOverride = proxyOverride.split(';')
1653 i = 0
1654 while i < len(proxyOverride):
1655 if proxyOverride[i] == '<local>':
1656 proxyOverride[i:i+1] = ['localhost',
1657 '127.0.0.1',
1658 socket.gethostname(),
1659 socket.gethostbyname(
1660 socket.gethostname())]
1661 i += 1
1662 # print proxyOverride
1663 # now check if we match one of the registry values.
1664 for test in proxyOverride:
1665 test = test.replace(".", r"\.") # mask dots
1666 test = test.replace("*", r".*") # change glob sequence
1667 test = test.replace("?", r".") # change glob char
1668 for val in host:
1669 # print "%s <--> %s" %( test, val )
1670 if re.match(test, val, re.I):
1671 return 1
1672 return 0
1674 def proxy_bypass(host):
1675 """Return a dictionary of scheme -> proxy server URL mappings.
1677 Returns settings gathered from the environment, if specified,
1678 or the registry.
1681 if getproxies_environment():
1682 return proxy_bypass_environment(host)
1683 else:
1684 return proxy_bypass_registry(host)
1686 else:
1687 # By default use environment variables
1688 getproxies = getproxies_environment
1689 proxy_bypass = proxy_bypass_environment
1691 # Test and time quote() and unquote()
1692 def test1():
1693 s = ''
1694 for i in range(256): s = s + chr(i)
1695 s = s*4
1696 t0 = time.time()
1697 qs = quote(s)
1698 uqs = unquote(qs)
1699 t1 = time.time()
1700 if uqs != s:
1701 print 'Wrong!'
1702 print repr(s)
1703 print repr(qs)
1704 print repr(uqs)
1705 print round(t1 - t0, 3), 'sec'
1708 def reporthook(blocknum, blocksize, totalsize):
1709 # Report during remote transfers
1710 print "Block number: %d, Block size: %d, Total size: %d" % (
1711 blocknum, blocksize, totalsize)
1713 # Test program
1714 def test(args=[]):
1715 if not args:
1716 args = [
1717 '/etc/passwd',
1718 'file:/etc/passwd',
1719 'file://localhost/etc/passwd',
1720 'ftp://ftp.gnu.org/pub/README',
1721 'http://www.python.org/index.html',
1723 if hasattr(URLopener, "open_https"):
1724 args.append('https://synergy.as.cmu.edu/~geek/')
1725 try:
1726 for url in args:
1727 print '-'*10, url, '-'*10
1728 fn, h = urlretrieve(url, None, reporthook)
1729 print fn
1730 if h:
1731 print '======'
1732 for k in h.keys(): print k + ':', h[k]
1733 print '======'
1734 fp = open(fn, 'rb')
1735 data = fp.read()
1736 del fp
1737 if '\r' in data:
1738 table = string.maketrans("", "")
1739 data = data.translate(table, "\r")
1740 print data
1741 fn, h = None, None
1742 print '-'*40
1743 finally:
1744 urlcleanup()
1746 def main():
1747 import getopt, sys
1748 try:
1749 opts, args = getopt.getopt(sys.argv[1:], "th")
1750 except getopt.error, msg:
1751 print msg
1752 print "Use -h for help"
1753 return
1754 t = 0
1755 for o, a in opts:
1756 if o == '-t':
1757 t = t + 1
1758 if o == '-h':
1759 print "Usage: python urllib.py [-t] [url ...]"
1760 print "-t runs self-test;",
1761 print "otherwise, contents of urls are printed"
1762 return
1763 if t:
1764 if t > 1:
1765 test1()
1766 test(args)
1767 else:
1768 if not args:
1769 print "Use -h for help"
1770 for url in args:
1771 print urlopen(url).read(),
1773 # Run test program when run as a script
1774 if __name__ == '__main__':
1775 main()