1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
30 from urlparse
import urljoin
as basejoin
33 __all__
= ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
34 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
35 "urlencode", "url2pathname", "pathname2url", "splittag",
36 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
37 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
38 "splitnport", "splitquery", "splitattr", "splitvalue",
41 __version__
= '1.17' # XXX This version is not always updated :-(
43 MAXFTPCACHE
= 10 # Trim the ftp cache beyond this size
45 # Helper for non-unix systems
47 from macurl2path
import url2pathname
, pathname2url
49 from nturl2path
import url2pathname
, pathname2url
50 elif os
.name
== 'riscos':
51 from rourl2path
import url2pathname
, pathname2url
53 def url2pathname(pathname
):
54 """OS-specific conversion from a relative URL of the 'file' scheme
55 to a file system path; not recommended for general use."""
56 return unquote(pathname
)
58 def pathname2url(pathname
):
59 """OS-specific conversion from a file system path to a relative URL
60 of the 'file' scheme; not recommended for general use."""
61 return quote(pathname
)
63 # This really consists of two pieces:
64 # (1) a class which handles opening of all sorts of URLs
65 # (plus assorted utilities etc.)
66 # (2) a set of functions for parsing URLs
67 # XXX Should these be separated out into different modules?
70 # Shortcut for basic usage
72 def urlopen(url
, data
=None, proxies
=None):
73 """Create a file-like object for the specified URL to read from."""
74 from warnings
import warnpy3k
75 warnings
.warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76 "favor of urllib2.urlopen()", stacklevel
=2)
79 if proxies
is not None:
80 opener
= FancyURLopener(proxies
=proxies
)
82 opener
= FancyURLopener()
87 return opener
.open(url
)
89 return opener
.open(url
, data
)
90 def urlretrieve(url
, filename
=None, reporthook
=None, data
=None):
93 _urlopener
= FancyURLopener()
94 return _urlopener
.retrieve(url
, filename
, reporthook
, data
)
107 # exception raised when downloaded size does not match content-length
108 class ContentTooShortError(IOError):
109 def __init__(self
, message
, content
):
110 IOError.__init
__(self
, message
)
111 self
.content
= content
115 """Class to open URLs.
116 This is a class rather than just a subroutine because we may need
117 more than one set of global protocol-specific options.
118 Note -- this is a base class for those who don't want the
119 automatic handling of errors type 302 (relocated) and 401
120 (authorization needed)."""
124 version
= "Python-urllib/%s" % __version__
127 def __init__(self
, proxies
=None, **x509
):
129 proxies
= getproxies()
130 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
131 self
.proxies
= proxies
132 self
.key_file
= x509
.get('key_file')
133 self
.cert_file
= x509
.get('cert_file')
134 self
.addheaders
= [('User-Agent', self
.version
)]
135 self
.__tempfiles
= []
136 self
.__unlink
= os
.unlink
# See cleanup()
137 self
.tempcache
= None
138 # Undocumented feature: if you assign {} to tempcache,
139 # it is used to cache files retrieved with
140 # self.retrieve(). This is not enabled by default
141 # since it does not work for changing documents (and I
142 # haven't got the logic to check expiration headers
144 self
.ftpcache
= ftpcache
145 # Undocumented feature: you can use a different
146 # ftp cache by assigning to the .ftpcache member;
147 # in case you want logically independent URL openers
148 # XXX This is not threadsafe. Bah.
157 # This code sometimes runs when the rest of this module
158 # has already been deleted, so it can't use any globals
159 # or import anything.
161 for file in self
.__tempfiles
:
166 del self
.__tempfiles
[:]
168 self
.tempcache
.clear()
170 def addheader(self
, *args
):
171 """Add a header to be used by the HTTP interface only
172 e.g. u.addheader('Accept', 'sound/basic')"""
173 self
.addheaders
.append(args
)
176 def open(self
, fullurl
, data
=None):
177 """Use URLopener().open(file) instead of open(file, 'r')."""
178 fullurl
= unwrap(toBytes(fullurl
))
179 if self
.tempcache
and fullurl
in self
.tempcache
:
180 filename
, headers
= self
.tempcache
[fullurl
]
181 fp
= open(filename
, 'rb')
182 return addinfourl(fp
, headers
, fullurl
)
183 urltype
, url
= splittype(fullurl
)
186 if urltype
in self
.proxies
:
187 proxy
= self
.proxies
[urltype
]
188 urltype
, proxyhost
= splittype(proxy
)
189 host
, selector
= splithost(proxyhost
)
190 url
= (host
, fullurl
) # Signal special case to open_*()
193 name
= 'open_' + urltype
195 name
= name
.replace('-', '_')
196 if not hasattr(self
, name
):
198 return self
.open_unknown_proxy(proxy
, fullurl
, data
)
200 return self
.open_unknown(fullurl
, data
)
203 return getattr(self
, name
)(url
)
205 return getattr(self
, name
)(url
, data
)
206 except socket
.error
, msg
:
207 raise IOError, ('socket error', msg
), sys
.exc_info()[2]
209 def open_unknown(self
, fullurl
, data
=None):
210 """Overridable interface to open unknown URL type."""
211 type, url
= splittype(fullurl
)
212 raise IOError, ('url error', 'unknown url type', type)
214 def open_unknown_proxy(self
, proxy
, fullurl
, data
=None):
215 """Overridable interface to open unknown URL type."""
216 type, url
= splittype(fullurl
)
217 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy
)
220 def retrieve(self
, url
, filename
=None, reporthook
=None, data
=None):
221 """retrieve(url) returns (filename, headers) for a local object
222 or (tempfilename, headers) for a remote object."""
223 url
= unwrap(toBytes(url
))
224 if self
.tempcache
and url
in self
.tempcache
:
225 return self
.tempcache
[url
]
226 type, url1
= splittype(url
)
227 if filename
is None and (not type or type == 'file'):
229 fp
= self
.open_local_file(url1
)
232 return url2pathname(splithost(url1
)[1]), hdrs
235 fp
= self
.open(url
, data
)
239 tfp
= open(filename
, 'wb')
242 garbage
, path
= splittype(url
)
243 garbage
, path
= splithost(path
or "")
244 path
, garbage
= splitquery(path
or "")
245 path
, garbage
= splitattr(path
or "")
246 suffix
= os
.path
.splitext(path
)[1]
247 (fd
, filename
) = tempfile
.mkstemp(suffix
)
248 self
.__tempfiles
.append(filename
)
249 tfp
= os
.fdopen(fd
, 'wb')
251 result
= filename
, headers
252 if self
.tempcache
is not None:
253 self
.tempcache
[url
] = result
259 if "content-length" in headers
:
260 size
= int(headers
["Content-Length"])
261 reporthook(blocknum
, bs
, size
)
270 reporthook(blocknum
, bs
, size
)
278 # raise exception if actual size does not match content-length header
279 if size
>= 0 and read
< size
:
280 raise ContentTooShortError("retrieval incomplete: got only %i out "
281 "of %i bytes" % (read
, size
), result
)
285 # Each method named open_<type> knows how to open that type of URL
287 def open_http(self
, url
, data
=None):
288 """Use HTTP protocol."""
292 if isinstance(url
, str):
293 host
, selector
= splithost(url
)
295 user_passwd
, host
= splituser(host
)
300 # check whether the proxy contains authorization information
301 proxy_passwd
, host
= splituser(host
)
302 # now we proceed with the url we want to obtain
303 urltype
, rest
= splittype(selector
)
306 if urltype
.lower() != 'http':
309 realhost
, rest
= splithost(rest
)
311 user_passwd
, realhost
= splituser(realhost
)
313 selector
= "%s://%s%s" % (urltype
, realhost
, rest
)
314 if proxy_bypass(realhost
):
317 #print "proxy via http:", host, selector
318 if not host
: raise IOError, ('http error', 'no host given')
322 proxy_auth
= base64
.b64encode(proxy_passwd
).strip()
328 auth
= base64
.b64encode(user_passwd
).strip()
331 h
= httplib
.HTTP(host
)
333 h
.putrequest('POST', selector
)
334 h
.putheader('Content-Type', 'application/x-www-form-urlencoded')
335 h
.putheader('Content-Length', '%d' % len(data
))
337 h
.putrequest('GET', selector
)
338 if proxy_auth
: h
.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth
)
339 if auth
: h
.putheader('Authorization', 'Basic %s' % auth
)
340 if realhost
: h
.putheader('Host', realhost
)
341 for args
in self
.addheaders
: h
.putheader(*args
)
343 errcode
, errmsg
, headers
= h
.getreply()
347 # something went wrong with the HTTP status line
348 raise IOError, ('http protocol error', 0,
349 'got a bad status line', None)
350 # According to RFC 2616, "2xx" code indicates that the client's
351 # request was successfully received, understood, and accepted.
352 if (200 <= errcode
< 300):
353 return addinfourl(fp
, headers
, "http:" + url
, errcode
)
356 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
)
358 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
, data
)
360 def http_error(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
361 """Handle http errors.
362 Derived class can override this, or provide specific handlers
363 named http_error_DDD where DDD is the 3-digit error code."""
364 # First check if there's a specific handler for this error
365 name
= 'http_error_%d' % errcode
366 if hasattr(self
, name
):
367 method
= getattr(self
, name
)
369 result
= method(url
, fp
, errcode
, errmsg
, headers
)
371 result
= method(url
, fp
, errcode
, errmsg
, headers
, data
)
372 if result
: return result
373 return self
.http_error_default(url
, fp
, errcode
, errmsg
, headers
)
375 def http_error_default(self
, url
, fp
, errcode
, errmsg
, headers
):
376 """Default error handler: close the connection and raise IOError."""
379 raise IOError, ('http error', errcode
, errmsg
, headers
)
382 def open_https(self
, url
, data
=None):
383 """Use HTTPS protocol."""
388 if isinstance(url
, str):
389 host
, selector
= splithost(url
)
391 user_passwd
, host
= splituser(host
)
396 # here, we determine, whether the proxy contains authorization information
397 proxy_passwd
, host
= splituser(host
)
398 urltype
, rest
= splittype(selector
)
401 if urltype
.lower() != 'https':
404 realhost
, rest
= splithost(rest
)
406 user_passwd
, realhost
= splituser(realhost
)
408 selector
= "%s://%s%s" % (urltype
, realhost
, rest
)
409 #print "proxy via https:", host, selector
410 if not host
: raise IOError, ('https error', 'no host given')
413 proxy_auth
= base64
.b64encode(proxy_passwd
).strip()
418 auth
= base64
.b64encode(user_passwd
).strip()
421 h
= httplib
.HTTPS(host
, 0,
422 key_file
=self
.key_file
,
423 cert_file
=self
.cert_file
)
425 h
.putrequest('POST', selector
)
426 h
.putheader('Content-Type',
427 'application/x-www-form-urlencoded')
428 h
.putheader('Content-Length', '%d' % len(data
))
430 h
.putrequest('GET', selector
)
431 if proxy_auth
: h
.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth
)
432 if auth
: h
.putheader('Authorization', 'Basic %s' % auth
)
433 if realhost
: h
.putheader('Host', realhost
)
434 for args
in self
.addheaders
: h
.putheader(*args
)
436 errcode
, errmsg
, headers
= h
.getreply()
440 # something went wrong with the HTTP status line
441 raise IOError, ('http protocol error', 0,
442 'got a bad status line', None)
443 # According to RFC 2616, "2xx" code indicates that the client's
444 # request was successfully received, understood, and accepted.
445 if (200 <= errcode
< 300):
446 return addinfourl(fp
, headers
, "https:" + url
, errcode
)
449 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
)
451 return self
.http_error(url
, fp
, errcode
, errmsg
, headers
,
454 def open_file(self
, url
):
455 """Use local file or FTP depending on form of URL."""
456 if not isinstance(url
, str):
457 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
458 if url
[:2] == '//' and url
[2:3] != '/' and url
[2:12].lower() != 'localhost/':
459 return self
.open_ftp(url
)
461 return self
.open_local_file(url
)
463 def open_local_file(self
, url
):
464 """Use local file."""
465 import mimetypes
, mimetools
, email
.utils
467 from cStringIO
import StringIO
469 from StringIO
import StringIO
470 host
, file = splithost(url
)
471 localname
= url2pathname(file)
473 stats
= os
.stat(localname
)
475 raise IOError(e
.errno
, e
.strerror
, e
.filename
)
477 modified
= email
.utils
.formatdate(stats
.st_mtime
, usegmt
=True)
478 mtype
= mimetypes
.guess_type(url
)[0]
479 headers
= mimetools
.Message(StringIO(
480 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
481 (mtype
or 'text/plain', size
, modified
)))
485 urlfile
= 'file://' + file
486 return addinfourl(open(localname
, 'rb'),
488 host
, port
= splitport(host
)
490 and socket
.gethostbyname(host
) in (localhost(), thishost()):
493 urlfile
= 'file://' + file
494 return addinfourl(open(localname
, 'rb'),
496 raise IOError, ('local file error', 'not on local host')
498 def open_ftp(self
, url
):
499 """Use FTP protocol."""
500 if not isinstance(url
, str):
501 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
502 import mimetypes
, mimetools
504 from cStringIO
import StringIO
506 from StringIO
import StringIO
507 host
, path
= splithost(url
)
508 if not host
: raise IOError, ('ftp error', 'no host given')
509 host
, port
= splitport(host
)
510 user
, host
= splituser(host
)
511 if user
: user
, passwd
= splitpasswd(user
)
514 user
= unquote(user
or '')
515 passwd
= unquote(passwd
or '')
516 host
= socket
.gethostbyname(host
)
519 port
= ftplib
.FTP_PORT
522 path
, attrs
= splitattr(path
)
524 dirs
= path
.split('/')
525 dirs
, file = dirs
[:-1], dirs
[-1]
526 if dirs
and not dirs
[0]: dirs
= dirs
[1:]
527 if dirs
and not dirs
[0]: dirs
[0] = '/'
528 key
= user
, host
, port
, '/'.join(dirs
)
530 if len(self
.ftpcache
) > MAXFTPCACHE
:
531 # Prune the cache, rather arbitrarily
532 for k
in self
.ftpcache
.keys():
538 if not key
in self
.ftpcache
:
539 self
.ftpcache
[key
] = \
540 ftpwrapper(user
, passwd
, host
, port
, dirs
)
541 if not file: type = 'D'
544 attr
, value
= splitvalue(attr
)
545 if attr
.lower() == 'type' and \
546 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
548 (fp
, retrlen
) = self
.ftpcache
[key
].retrfile(file, type)
549 mtype
= mimetypes
.guess_type("ftp:" + url
)[0]
552 headers
+= "Content-Type: %s\n" % mtype
553 if retrlen
is not None and retrlen
>= 0:
554 headers
+= "Content-Length: %d\n" % retrlen
555 headers
= mimetools
.Message(StringIO(headers
))
556 return addinfourl(fp
, headers
, "ftp:" + url
)
557 except ftperrors(), msg
:
558 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
560 def open_data(self
, url
, data
=None):
561 """Use "data" URL."""
562 if not isinstance(url
, str):
563 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
566 # syntax of data URLs:
567 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
568 # mediatype := [ type "/" subtype ] *( ";" parameter )
570 # parameter := attribute "=" value
573 from cStringIO
import StringIO
575 from StringIO
import StringIO
577 [type, data
] = url
.split(',', 1)
579 raise IOError, ('data error', 'bad data URL')
581 type = 'text/plain;charset=US-ASCII'
582 semi
= type.rfind(';')
583 if semi
>= 0 and '=' not in type[semi
:]:
584 encoding
= type[semi
+1:]
589 msg
.append('Date: %s'%time
.strftime('%a, %d %b %Y %T GMT',
590 time
.gmtime(time
.time())))
591 msg
.append('Content-type: %s' % type)
592 if encoding
== 'base64':
594 data
= base64
.decodestring(data
)
597 msg
.append('Content-Length: %d' % len(data
))
602 headers
= mimetools
.Message(f
, 0)
603 #f.fileno = None # needed for addinfourl
604 return addinfourl(f
, headers
, url
)
607 class FancyURLopener(URLopener
):
608 """Derived class with handlers for errors we can handle (perhaps)."""
610 def __init__(self
, *args
, **kwargs
):
611 URLopener
.__init
__(self
, *args
, **kwargs
)
616 def http_error_default(self
, url
, fp
, errcode
, errmsg
, headers
):
617 """Default error handling -- don't raise an exception."""
618 return addinfourl(fp
, headers
, "http:" + url
, errcode
)
620 def http_error_302(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
621 """Error 302 -- relocated (temporarily)."""
623 if self
.maxtries
and self
.tries
>= self
.maxtries
:
624 if hasattr(self
, "http_error_500"):
625 meth
= self
.http_error_500
627 meth
= self
.http_error_default
629 return meth(url
, fp
, 500,
630 "Internal Server Error: Redirect Recursion", headers
)
631 result
= self
.redirect_internal(url
, fp
, errcode
, errmsg
, headers
,
636 def redirect_internal(self
, url
, fp
, errcode
, errmsg
, headers
, data
):
637 if 'location' in headers
:
638 newurl
= headers
['location']
639 elif 'uri' in headers
:
640 newurl
= headers
['uri']
645 # In case the server sent a relative URL, join with original:
646 newurl
= basejoin(self
.type + ":" + url
, newurl
)
647 return self
.open(newurl
)
649 def http_error_301(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
650 """Error 301 -- also relocated (permanently)."""
651 return self
.http_error_302(url
, fp
, errcode
, errmsg
, headers
, data
)
653 def http_error_303(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
654 """Error 303 -- also relocated (essentially identical to 302)."""
655 return self
.http_error_302(url
, fp
, errcode
, errmsg
, headers
, data
)
657 def http_error_307(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
658 """Error 307 -- relocated, but turn POST into error."""
660 return self
.http_error_302(url
, fp
, errcode
, errmsg
, headers
, data
)
662 return self
.http_error_default(url
, fp
, errcode
, errmsg
, headers
)
664 def http_error_401(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
665 """Error 401 -- authentication required.
666 This function supports Basic authentication only."""
667 if not 'www-authenticate' in headers
:
668 URLopener
.http_error_default(self
, url
, fp
,
669 errcode
, errmsg
, headers
)
670 stuff
= headers
['www-authenticate']
672 match
= re
.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff
)
674 URLopener
.http_error_default(self
, url
, fp
,
675 errcode
, errmsg
, headers
)
676 scheme
, realm
= match
.groups()
677 if scheme
.lower() != 'basic':
678 URLopener
.http_error_default(self
, url
, fp
,
679 errcode
, errmsg
, headers
)
680 name
= 'retry_' + self
.type + '_basic_auth'
682 return getattr(self
,name
)(url
, realm
)
684 return getattr(self
,name
)(url
, realm
, data
)
686 def http_error_407(self
, url
, fp
, errcode
, errmsg
, headers
, data
=None):
687 """Error 407 -- proxy authentication required.
688 This function supports Basic authentication only."""
689 if not 'proxy-authenticate' in headers
:
690 URLopener
.http_error_default(self
, url
, fp
,
691 errcode
, errmsg
, headers
)
692 stuff
= headers
['proxy-authenticate']
694 match
= re
.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff
)
696 URLopener
.http_error_default(self
, url
, fp
,
697 errcode
, errmsg
, headers
)
698 scheme
, realm
= match
.groups()
699 if scheme
.lower() != 'basic':
700 URLopener
.http_error_default(self
, url
, fp
,
701 errcode
, errmsg
, headers
)
702 name
= 'retry_proxy_' + self
.type + '_basic_auth'
704 return getattr(self
,name
)(url
, realm
)
706 return getattr(self
,name
)(url
, realm
, data
)
708 def retry_proxy_http_basic_auth(self
, url
, realm
, data
=None):
709 host
, selector
= splithost(url
)
710 newurl
= 'http://' + host
+ selector
711 proxy
= self
.proxies
['http']
712 urltype
, proxyhost
= splittype(proxy
)
713 proxyhost
, proxyselector
= splithost(proxyhost
)
714 i
= proxyhost
.find('@') + 1
715 proxyhost
= proxyhost
[i
:]
716 user
, passwd
= self
.get_user_passwd(proxyhost
, realm
, i
)
717 if not (user
or passwd
): return None
718 proxyhost
= quote(user
, safe
='') + ':' + quote(passwd
, safe
='') + '@' + proxyhost
719 self
.proxies
['http'] = 'http://' + proxyhost
+ proxyselector
721 return self
.open(newurl
)
723 return self
.open(newurl
, data
)
725 def retry_proxy_https_basic_auth(self
, url
, realm
, data
=None):
726 host
, selector
= splithost(url
)
727 newurl
= 'https://' + host
+ selector
728 proxy
= self
.proxies
['https']
729 urltype
, proxyhost
= splittype(proxy
)
730 proxyhost
, proxyselector
= splithost(proxyhost
)
731 i
= proxyhost
.find('@') + 1
732 proxyhost
= proxyhost
[i
:]
733 user
, passwd
= self
.get_user_passwd(proxyhost
, realm
, i
)
734 if not (user
or passwd
): return None
735 proxyhost
= quote(user
, safe
='') + ':' + quote(passwd
, safe
='') + '@' + proxyhost
736 self
.proxies
['https'] = 'https://' + proxyhost
+ proxyselector
738 return self
.open(newurl
)
740 return self
.open(newurl
, data
)
742 def retry_http_basic_auth(self
, url
, realm
, data
=None):
743 host
, selector
= splithost(url
)
744 i
= host
.find('@') + 1
746 user
, passwd
= self
.get_user_passwd(host
, realm
, i
)
747 if not (user
or passwd
): return None
748 host
= quote(user
, safe
='') + ':' + quote(passwd
, safe
='') + '@' + host
749 newurl
= 'http://' + host
+ selector
751 return self
.open(newurl
)
753 return self
.open(newurl
, data
)
755 def retry_https_basic_auth(self
, url
, realm
, data
=None):
756 host
, selector
= splithost(url
)
757 i
= host
.find('@') + 1
759 user
, passwd
= self
.get_user_passwd(host
, realm
, i
)
760 if not (user
or passwd
): return None
761 host
= quote(user
, safe
='') + ':' + quote(passwd
, safe
='') + '@' + host
762 newurl
= 'https://' + host
+ selector
764 return self
.open(newurl
)
766 return self
.open(newurl
, data
)
768 def get_user_passwd(self
, host
, realm
, clear_cache
= 0):
769 key
= realm
+ '@' + host
.lower()
770 if key
in self
.auth_cache
:
772 del self
.auth_cache
[key
]
774 return self
.auth_cache
[key
]
775 user
, passwd
= self
.prompt_user_passwd(host
, realm
)
776 if user
or passwd
: self
.auth_cache
[key
] = (user
, passwd
)
779 def prompt_user_passwd(self
, host
, realm
):
780 """Override this in a GUI environment!"""
783 user
= raw_input("Enter username for %s at %s: " % (realm
,
785 passwd
= getpass
.getpass("Enter password for %s in %s at %s: " %
788 except KeyboardInterrupt:
797 """Return the IP address of the magic hostname 'localhost'."""
799 if _localhost
is None:
800 _localhost
= socket
.gethostbyname('localhost')
805 """Return the IP address of the current host."""
807 if _thishost
is None:
808 _thishost
= socket
.gethostbyname(socket
.gethostname())
813 """Return the set of errors raised by the FTP class."""
815 if _ftperrors
is None:
817 _ftperrors
= ftplib
.all_errors
822 """Return an empty mimetools.Message object."""
824 if _noheaders
is None:
827 from cStringIO
import StringIO
829 from StringIO
import StringIO
830 _noheaders
= mimetools
.Message(StringIO(), 0)
831 _noheaders
.fp
.close() # Recycle file descriptor
838 """Class used by open_ftp() for cache of open FTP connections."""
840 def __init__(self
, user
, passwd
, host
, port
, dirs
,
841 timeout
=socket
._GLOBAL
_DEFAULT
_TIMEOUT
):
847 self
.timeout
= timeout
853 self
.ftp
= ftplib
.FTP()
854 self
.ftp
.connect(self
.host
, self
.port
, self
.timeout
)
855 self
.ftp
.login(self
.user
, self
.passwd
)
856 for dir in self
.dirs
:
859 def retrfile(self
, file, type):
862 if type in ('d', 'D'): cmd
= 'TYPE A'; isdir
= 1
863 else: cmd
= 'TYPE ' + type; isdir
= 0
865 self
.ftp
.voidcmd(cmd
)
866 except ftplib
.all_errors
:
868 self
.ftp
.voidcmd(cmd
)
870 if file and not isdir
:
871 # Try to retrieve as a file
874 conn
= self
.ftp
.ntransfercmd(cmd
)
875 except ftplib
.error_perm
, reason
:
876 if str(reason
)[:3] != '550':
877 raise IOError, ('ftp error', reason
), sys
.exc_info()[2]
879 # Set transfer mode to ASCII!
880 self
.ftp
.voidcmd('TYPE A')
881 # Try a directory listing. Verify that directory exists.
887 except ftplib
.error_perm
, reason
:
888 raise IOError, ('ftp error', reason
), sys
.exc_info()[2]
894 conn
= self
.ftp
.ntransfercmd(cmd
)
896 # Pass back both a suitably decorated object and a retrieval length
897 return (addclosehook(conn
[0].makefile('rb'),
898 self
.endtransfer
), conn
[1])
899 def endtransfer(self
):
916 """Base class for addinfo and addclosehook."""
918 def __init__(self
, fp
):
920 self
.read
= self
.fp
.read
921 self
.readline
= self
.fp
.readline
922 if hasattr(self
.fp
, "readlines"): self
.readlines
= self
.fp
.readlines
923 if hasattr(self
.fp
, "fileno"):
924 self
.fileno
= self
.fp
.fileno
926 self
.fileno
= lambda: None
927 if hasattr(self
.fp
, "__iter__"):
928 self
.__iter
__ = self
.fp
.__iter
__
929 if hasattr(self
.fp
, "next"):
930 self
.next
= self
.fp
.next
933 return '<%s at %r whose fp = %r>' % (self
.__class
__.__name
__,
939 self
.readlines
= None
941 if self
.fp
: self
.fp
.close()
944 class addclosehook(addbase
):
945 """Class to add a close hook to an open file."""
947 def __init__(self
, fp
, closehook
, *hookargs
):
948 addbase
.__init
__(self
, fp
)
949 self
.closehook
= closehook
950 self
.hookargs
= hookargs
955 self
.closehook(*self
.hookargs
)
956 self
.closehook
= None
959 class addinfo(addbase
):
960 """class to add an info() method to an open file."""
962 def __init__(self
, fp
, headers
):
963 addbase
.__init
__(self
, fp
)
964 self
.headers
= headers
969 class addinfourl(addbase
):
970 """class to add info() and geturl() methods to an open file."""
972 def __init__(self
, fp
, headers
, url
, code
=None):
973 addbase
.__init
__(self
, fp
)
974 self
.headers
= headers
988 # Utilities to parse URLs (most of these return None for missing parts):
989 # unwrap('<URL:type://host/path>') --> 'type://host/path'
990 # splittype('type:opaquestring') --> 'type', 'opaquestring'
991 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
992 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
993 # splitpasswd('user:passwd') -> 'user', 'passwd'
994 # splitport('host:port') --> 'host', 'port'
995 # splitquery('/path?query') --> '/path', 'query'
996 # splittag('/path#tag') --> '/path', 'tag'
997 # splitattr('/path;attr1=value1;attr2=value2;...') ->
998 # '/path', ['attr1=value1', 'attr2=value2', ...]
999 # splitvalue('attr=value') --> 'attr', 'value'
1000 # unquote('abc%20def') -> 'abc def'
1001 # quote('abc def') -> 'abc%20def')
1010 return isinstance(x
, unicode)
1013 """toBytes(u"URL") --> 'URL'."""
1014 # Most URL schemes require ASCII. If that changes, the conversion
1016 if _is_unicode(url
):
1018 url
= url
.encode("ASCII")
1019 except UnicodeError:
1020 raise UnicodeError("URL " + repr(url
) +
1021 " contains non-ASCII characters")
1025 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1027 if url
[:1] == '<' and url
[-1:] == '>':
1028 url
= url
[1:-1].strip()
1029 if url
[:4] == 'URL:': url
= url
[4:].strip()
1034 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1036 if _typeprog
is None:
1038 _typeprog
= re
.compile('^([^/:]+):')
1040 match
= _typeprog
.match(url
)
1042 scheme
= match
.group(1)
1043 return scheme
.lower(), url
[len(scheme
) + 1:]
1048 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1050 if _hostprog
is None:
1052 _hostprog
= re
.compile('^//([^/?]*)(.*)$')
1054 match
= _hostprog
.match(url
)
1055 if match
: return match
.group(1, 2)
1059 def splituser(host
):
1060 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1062 if _userprog
is None:
1064 _userprog
= re
.compile('^(.*)@(.*)$')
1066 match
= _userprog
.match(host
)
1067 if match
: return map(unquote
, match
.group(1, 2))
1071 def splitpasswd(user
):
1072 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1074 if _passwdprog
is None:
1076 _passwdprog
= re
.compile('^([^:]*):(.*)$',re
.S
)
1078 match
= _passwdprog
.match(user
)
1079 if match
: return match
.group(1, 2)
1082 # splittag('/path#tag') --> '/path', 'tag'
1084 def splitport(host
):
1085 """splitport('host:port') --> 'host', 'port'."""
1087 if _portprog
is None:
1089 _portprog
= re
.compile('^(.*):([0-9]+)$')
1091 match
= _portprog
.match(host
)
1092 if match
: return match
.group(1, 2)
1096 def splitnport(host
, defport
=-1):
1097 """Split host and port, returning numeric port.
1098 Return given default port if no ':' found; defaults to -1.
1099 Return numerical port if a valid number are found after ':'.
1100 Return None if ':' but not a valid number."""
1102 if _nportprog
is None:
1104 _nportprog
= re
.compile('^(.*):(.*)$')
1106 match
= _nportprog
.match(host
)
1108 host
, port
= match
.group(1, 2)
1110 if not port
: raise ValueError, "no digits"
1115 return host
, defport
1118 def splitquery(url
):
1119 """splitquery('/path?query') --> '/path', 'query'."""
1121 if _queryprog
is None:
1123 _queryprog
= re
.compile('^(.*)\?([^?]*)$')
1125 match
= _queryprog
.match(url
)
1126 if match
: return match
.group(1, 2)
1131 """splittag('/path#tag') --> '/path', 'tag'."""
1133 if _tagprog
is None:
1135 _tagprog
= re
.compile('^(.*)#([^#]*)$')
1137 match
= _tagprog
.match(url
)
1138 if match
: return match
.group(1, 2)
1142 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1143 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1144 words
= url
.split(';')
1145 return words
[0], words
[1:]
1148 def splitvalue(attr
):
1149 """splitvalue('attr=value') --> 'attr', 'value'."""
1151 if _valueprog
is None:
1153 _valueprog
= re
.compile('^([^=]*)=(.*)$')
1155 match
= _valueprog
.match(attr
)
1156 if match
: return match
.group(1, 2)
1159 _hextochr
= dict(('%02x' % i
, chr(i
)) for i
in range(256))
1160 _hextochr
.update(('%02X' % i
, chr(i
)) for i
in range(256))
1163 """unquote('abc%20def') -> 'abc def'."""
1165 for i
in xrange(1, len(res
)):
1168 res
[i
] = _hextochr
[item
[:2]] + item
[2:]
1171 except UnicodeDecodeError:
1172 res
[i
] = unichr(int(item
[:2], 16)) + item
[2:]
1175 def unquote_plus(s
):
1176 """unquote('%7e/abc+def') -> '~/abc def'"""
1177 s
= s
.replace('+', ' ')
1180 always_safe
= ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1181 'abcdefghijklmnopqrstuvwxyz'
1185 def quote(s
, safe
= '/'):
1186 """quote('abc def') -> 'abc%20def'
1188 Each part of a URL, e.g. the path info, the query, etc., has a
1189 different set of reserved characters that must be quoted.
1191 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1192 the following reserved characters.
1194 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1197 Each of these characters is reserved in some component of a URL,
1198 but not necessarily in all of them.
1200 By default, the quote function is intended for quoting the path
1201 section of a URL. Thus, it will not encode '/'. This character
1202 is reserved, but in typical usage the quote function is being
1203 called on a path where the existing slash characters are used as
1204 reserved characters.
1206 cachekey
= (safe
, always_safe
)
1208 safe_map
= _safemaps
[cachekey
]
1212 for i
in range(256):
1214 safe_map
[c
] = (c
in safe
) and c
or ('%%%02X' % i
)
1215 _safemaps
[cachekey
] = safe_map
1216 res
= map(safe_map
.__getitem
__, s
)
1219 def quote_plus(s
, safe
= ''):
1220 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1222 s
= quote(s
, safe
+ ' ')
1223 return s
.replace(' ', '+')
1224 return quote(s
, safe
)
1226 def urlencode(query
,doseq
=0):
1227 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1229 If any values in the query arg are sequences and doseq is true, each
1230 sequence element is converted to a separate parameter.
1232 If the query arg is a sequence of two-element tuples, the order of the
1233 parameters in the output will match the order of parameters in the
1237 if hasattr(query
,"items"):
1239 query
= query
.items()
1241 # it's a bother at times that strings and string-like objects are
1244 # non-sequence items should not work with len()
1245 # non-empty strings will fail this
1246 if len(query
) and not isinstance(query
[0], tuple):
1248 # zero-length sequences of all types will get here and succeed,
1249 # but that's a minor nit - since the original implementation
1250 # allowed empty dicts that type of behavior probably should be
1251 # preserved for consistency
1253 ty
,va
,tb
= sys
.exc_info()
1254 raise TypeError, "not a valid non-string sequence or mapping object", tb
1258 # preserve old behavior
1260 k
= quote_plus(str(k
))
1261 v
= quote_plus(str(v
))
1262 l
.append(k
+ '=' + v
)
1265 k
= quote_plus(str(k
))
1266 if isinstance(v
, str):
1268 l
.append(k
+ '=' + v
)
1269 elif _is_unicode(v
):
1270 # is there a reasonable way to convert to ASCII?
1271 # encode generates a string, but "replace" or "ignore"
1272 # lose information and "strict" can raise UnicodeError
1273 v
= quote_plus(v
.encode("ASCII","replace"))
1274 l
.append(k
+ '=' + v
)
1277 # is this a sufficient test for sequence-ness?
1281 v
= quote_plus(str(v
))
1282 l
.append(k
+ '=' + v
)
1284 # loop over the sequence
1286 l
.append(k
+ '=' + quote_plus(str(elt
)))
1290 def getproxies_environment():
1291 """Return a dictionary of scheme -> proxy server URL mappings.
1293 Scan the environment for variables named <scheme>_proxy;
1294 this seems to be the standard convention. If you need a
1295 different way, you can pass a proxies dictionary to the
1296 [Fancy]URLopener constructor.
1300 for name
, value
in os
.environ
.items():
1302 if value
and name
[-6:] == '_proxy':
1303 proxies
[name
[:-6]] = value
1306 def proxy_bypass_environment(host
):
1307 """Test if proxies should not be used for a particular host.
1309 Checks the environment for a variable named no_proxy, which should
1310 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1312 no_proxy
= os
.environ
.get('no_proxy', '') or os
.environ
.get('NO_PROXY', '')
1313 # '*' is special case for always bypass
1316 # strip port off host
1317 hostonly
, port
= splitport(host
)
1318 # check if the host ends with any of the DNS suffixes
1319 for name
in no_proxy
.split(','):
1320 if name
and (hostonly
.endswith(name
) or host
.endswith(name
)):
1322 # otherwise, don't bypass
1326 if sys
.platform
== 'darwin':
1329 from ctypes
import c_int32
, c_void_p
, c_char_p
, c_int
1330 sc
.CFStringCreateWithCString
.argtypes
= [ c_void_p
, c_char_p
, c_int32
]
1331 sc
.CFStringCreateWithCString
.restype
= c_void_p
1332 sc
.SCDynamicStoreCopyProxies
.argtypes
= [ c_void_p
]
1333 sc
.SCDynamicStoreCopyProxies
.restype
= c_void_p
1334 sc
.CFDictionaryGetValue
.argtypes
= [ c_void_p
, c_void_p
]
1335 sc
.CFDictionaryGetValue
.restype
= c_void_p
1336 sc
.CFStringGetLength
.argtypes
= [ c_void_p
]
1337 sc
.CFStringGetLength
.restype
= c_int32
1338 sc
.CFStringGetCString
.argtypes
= [ c_void_p
, c_char_p
, c_int32
, c_int32
]
1339 sc
.CFStringGetCString
.restype
= c_int32
1340 sc
.CFNumberGetValue
.argtypes
= [ c_void_p
, c_int
, c_void_p
]
1341 sc
.CFNumberGetValue
.restype
= c_int32
1342 sc
.CFRelease
.argtypes
= [ c_void_p
]
1343 sc
.CFRelease
.restype
= None
1345 def _CStringFromCFString(sc
, value
):
1346 from ctypes
import create_string_buffer
1347 length
= sc
.CFStringGetLength(value
) + 1
1348 buff
= create_string_buffer(length
)
1349 sc
.CFStringGetCString(value
, buff
, length
, 0)
1352 def _CFNumberToInt32(sc
, cfnum
):
1353 from ctypes
import byref
, c_int
1355 kCFNumberSInt32Type
= 3
1356 sc
.CFNumberGetValue(cfnum
, kCFNumberSInt32Type
, byref(val
))
1360 def proxy_bypass_macosx_sysconf(host
):
1362 Return True iff this host shouldn't be accessed using a proxy
1364 This function uses the MacOSX framework SystemConfiguration
1365 to fetch the proxy information.
1367 from ctypes
import cdll
1368 from ctypes
.util
import find_library
1371 from fnmatch
import fnmatch
1374 parts
= ipAddr
.split('.')
1375 parts
= map(int, parts
)
1377 parts
= (parts
+ [0, 0, 0, 0])[:4]
1378 return (parts
[0] << 24) |
(parts
[1] << 16) |
(parts
[2] << 8) | parts
[3]
1380 sc
= cdll
.LoadLibrary(find_library("SystemConfiguration"))
1388 kSCPropNetProxiesExceptionsList
= sc
.CFStringCreateWithCString(0, "ExceptionsList", 0)
1389 kSCPropNetProxiesExcludeSimpleHostnames
= sc
.CFStringCreateWithCString(0,
1390 "ExcludeSimpleHostnames", 0)
1393 proxyDict
= sc
.SCDynamicStoreCopyProxies(None)
1394 if proxyDict
is None:
1398 # Check for simple host names:
1400 exclude_simple
= sc
.CFDictionaryGetValue(proxyDict
,
1401 kSCPropNetProxiesExcludeSimpleHostnames
)
1402 if exclude_simple
and _CFNumberToInt32(sc
, exclude_simple
):
1406 # Check the exceptions list:
1407 exceptions
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesExceptionsList
)
1409 # Items in the list are strings like these: *.local, 169.254/16
1410 for index
in xrange(sc
.CFArrayGetCount(exceptions
)):
1411 value
= sc
.CFArrayGetValueAtIndex(exceptions
, index
)
1412 if not value
: continue
1413 value
= _CStringFromCFString(sc
, value
)
1415 m
= re
.match(r
"(\d+(?:\.\d+)*)(/\d+)?", value
)
1418 hostIP
= socket
.gethostbyname(host
)
1419 hostIP
= ip2num(hostIP
)
1421 base
= ip2num(m
.group(1))
1422 mask
= int(m
.group(2)[1:])
1425 if (hostIP
>> mask
) == (base
>> mask
):
1428 elif fnmatch(host
, value
):
1434 sc
.CFRelease(kSCPropNetProxiesExceptionsList
)
1435 sc
.CFRelease(kSCPropNetProxiesExcludeSimpleHostnames
)
1439 def getproxies_macosx_sysconf():
1440 """Return a dictionary of scheme -> proxy server URL mappings.
1442 This function uses the MacOSX framework SystemConfiguration
1443 to fetch the proxy information.
1445 from ctypes
import cdll
1446 from ctypes
.util
import find_library
1448 sc
= cdll
.LoadLibrary(find_library("SystemConfiguration"))
1454 kSCPropNetProxiesHTTPEnable
= sc
.CFStringCreateWithCString(0, "HTTPEnable", 0)
1455 kSCPropNetProxiesHTTPProxy
= sc
.CFStringCreateWithCString(0, "HTTPProxy", 0)
1456 kSCPropNetProxiesHTTPPort
= sc
.CFStringCreateWithCString(0, "HTTPPort", 0)
1458 kSCPropNetProxiesHTTPSEnable
= sc
.CFStringCreateWithCString(0, "HTTPSEnable", 0)
1459 kSCPropNetProxiesHTTPSProxy
= sc
.CFStringCreateWithCString(0, "HTTPSProxy", 0)
1460 kSCPropNetProxiesHTTPSPort
= sc
.CFStringCreateWithCString(0, "HTTPSPort", 0)
1462 kSCPropNetProxiesFTPEnable
= sc
.CFStringCreateWithCString(0, "FTPEnable", 0)
1463 kSCPropNetProxiesFTPPassive
= sc
.CFStringCreateWithCString(0, "FTPPassive", 0)
1464 kSCPropNetProxiesFTPPort
= sc
.CFStringCreateWithCString(0, "FTPPort", 0)
1465 kSCPropNetProxiesFTPProxy
= sc
.CFStringCreateWithCString(0, "FTPProxy", 0)
1467 kSCPropNetProxiesGopherEnable
= sc
.CFStringCreateWithCString(0, "GopherEnable", 0)
1468 kSCPropNetProxiesGopherPort
= sc
.CFStringCreateWithCString(0, "GopherPort", 0)
1469 kSCPropNetProxiesGopherProxy
= sc
.CFStringCreateWithCString(0, "GopherProxy", 0)
1472 proxyDict
= sc
.SCDynamicStoreCopyProxies(None)
1476 enabled
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesHTTPEnable
)
1477 if enabled
and _CFNumberToInt32(sc
, enabled
):
1478 proxy
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesHTTPProxy
)
1479 port
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesHTTPPort
)
1482 proxy
= _CStringFromCFString(sc
, proxy
)
1484 port
= _CFNumberToInt32(sc
, port
)
1485 proxies
["http"] = "http://%s:%i" % (proxy
, port
)
1487 proxies
["http"] = "http://%s" % (proxy
, )
1490 enabled
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesHTTPSEnable
)
1491 if enabled
and _CFNumberToInt32(sc
, enabled
):
1492 proxy
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesHTTPSProxy
)
1493 port
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesHTTPSPort
)
1496 proxy
= _CStringFromCFString(sc
, proxy
)
1498 port
= _CFNumberToInt32(sc
, port
)
1499 proxies
["https"] = "http://%s:%i" % (proxy
, port
)
1501 proxies
["https"] = "http://%s" % (proxy
, )
1504 enabled
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesFTPEnable
)
1505 if enabled
and _CFNumberToInt32(sc
, enabled
):
1506 proxy
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesFTPProxy
)
1507 port
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesFTPPort
)
1510 proxy
= _CStringFromCFString(sc
, proxy
)
1512 port
= _CFNumberToInt32(sc
, port
)
1513 proxies
["ftp"] = "http://%s:%i" % (proxy
, port
)
1515 proxies
["ftp"] = "http://%s" % (proxy
, )
1518 enabled
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesGopherEnable
)
1519 if enabled
and _CFNumberToInt32(sc
, enabled
):
1520 proxy
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesGopherProxy
)
1521 port
= sc
.CFDictionaryGetValue(proxyDict
, kSCPropNetProxiesGopherPort
)
1524 proxy
= _CStringFromCFString(sc
, proxy
)
1526 port
= _CFNumberToInt32(sc
, port
)
1527 proxies
["gopher"] = "http://%s:%i" % (proxy
, port
)
1529 proxies
["gopher"] = "http://%s" % (proxy
, )
1531 sc
.CFRelease(proxyDict
)
1533 sc
.CFRelease(kSCPropNetProxiesHTTPEnable
)
1534 sc
.CFRelease(kSCPropNetProxiesHTTPProxy
)
1535 sc
.CFRelease(kSCPropNetProxiesHTTPPort
)
1536 sc
.CFRelease(kSCPropNetProxiesFTPEnable
)
1537 sc
.CFRelease(kSCPropNetProxiesFTPPassive
)
1538 sc
.CFRelease(kSCPropNetProxiesFTPPort
)
1539 sc
.CFRelease(kSCPropNetProxiesFTPProxy
)
1540 sc
.CFRelease(kSCPropNetProxiesGopherEnable
)
1541 sc
.CFRelease(kSCPropNetProxiesGopherPort
)
1542 sc
.CFRelease(kSCPropNetProxiesGopherProxy
)
1548 def proxy_bypass(host
):
1549 if getproxies_environment():
1550 return proxy_bypass_environment(host
)
1552 return proxy_bypass_macosx_sysconf(host
)
1555 return getproxies_environment() or getproxies_macosx_sysconf()
1557 elif os
.name
== 'nt':
1558 def getproxies_registry():
1559 """Return a dictionary of scheme -> proxy server URL mappings.
1561 Win32 uses the registry to store proxies.
1568 # Std module, so should be around - but you never know!
1571 internetSettings
= _winreg
.OpenKey(_winreg
.HKEY_CURRENT_USER
,
1572 r
'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1573 proxyEnable
= _winreg
.QueryValueEx(internetSettings
,
1576 # Returned as Unicode but problems if not converted to ASCII
1577 proxyServer
= str(_winreg
.QueryValueEx(internetSettings
,
1579 if '=' in proxyServer
:
1580 # Per-protocol settings
1581 for p
in proxyServer
.split(';'):
1582 protocol
, address
= p
.split('=', 1)
1583 # See if address has a type:// prefix
1585 if not re
.match('^([^/:]+)://', address
):
1586 address
= '%s://%s' % (protocol
, address
)
1587 proxies
[protocol
] = address
1589 # Use one setting for all protocols
1590 if proxyServer
[:5] == 'http:':
1591 proxies
['http'] = proxyServer
1593 proxies
['http'] = 'http://%s' % proxyServer
1594 proxies
['ftp'] = 'ftp://%s' % proxyServer
1595 internetSettings
.Close()
1596 except (WindowsError, ValueError, TypeError):
1597 # Either registry key not found etc, or the value in an
1598 # unexpected format.
1599 # proxies already set up to be empty so nothing to do
1604 """Return a dictionary of scheme -> proxy server URL mappings.
1606 Returns settings gathered from the environment, if specified,
1610 return getproxies_environment() or getproxies_registry()
1612 def proxy_bypass_registry(host
):
1617 # Std modules, so should be around - but you never know!
1620 internetSettings
= _winreg
.OpenKey(_winreg
.HKEY_CURRENT_USER
,
1621 r
'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1622 proxyEnable
= _winreg
.QueryValueEx(internetSettings
,
1624 proxyOverride
= str(_winreg
.QueryValueEx(internetSettings
,
1625 'ProxyOverride')[0])
1626 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1627 except WindowsError:
1629 if not proxyEnable
or not proxyOverride
:
1631 # try to make a host list from name and IP address.
1632 rawHost
, port
= splitport(host
)
1635 addr
= socket
.gethostbyname(rawHost
)
1638 except socket
.error
:
1641 fqdn
= socket
.getfqdn(rawHost
)
1644 except socket
.error
:
1646 # make a check value list from the registry entry: replace the
1647 # '<local>' string by the localhost entry and the corresponding
1649 proxyOverride
= proxyOverride
.split(';')
1651 while i
< len(proxyOverride
):
1652 if proxyOverride
[i
] == '<local>':
1653 proxyOverride
[i
:i
+1] = ['localhost',
1655 socket
.gethostname(),
1656 socket
.gethostbyname(
1657 socket
.gethostname())]
1659 # print proxyOverride
1660 # now check if we match one of the registry values.
1661 for test
in proxyOverride
:
1662 test
= test
.replace(".", r
"\.") # mask dots
1663 test
= test
.replace("*", r
".*") # change glob sequence
1664 test
= test
.replace("?", r
".") # change glob char
1666 # print "%s <--> %s" %( test, val )
1667 if re
.match(test
, val
, re
.I
):
1671 def proxy_bypass(host
):
1672 """Return a dictionary of scheme -> proxy server URL mappings.
1674 Returns settings gathered from the environment, if specified,
1678 if getproxies_environment():
1679 return proxy_bypass_environment(host
)
1681 return proxy_bypass_registry(host
)
1684 # By default use environment variables
1685 getproxies
= getproxies_environment
1686 proxy_bypass
= proxy_bypass_environment
1688 # Test and time quote() and unquote()
1691 for i
in range(256): s
= s
+ chr(i
)
1702 print round(t1
- t0
, 3), 'sec'
1705 def reporthook(blocknum
, blocksize
, totalsize
):
1706 # Report during remote transfers
1707 print "Block number: %d, Block size: %d, Total size: %d" % (
1708 blocknum
, blocksize
, totalsize
)
1716 'file://localhost/etc/passwd',
1717 'ftp://ftp.gnu.org/pub/README',
1718 'http://www.python.org/index.html',
1720 if hasattr(URLopener
, "open_https"):
1721 args
.append('https://synergy.as.cmu.edu/~geek/')
1724 print '-'*10, url
, '-'*10
1725 fn
, h
= urlretrieve(url
, None, reporthook
)
1729 for k
in h
.keys(): print k
+ ':', h
[k
]
1735 table
= string
.maketrans("", "")
1736 data
= data
.translate(table
, "\r")
1746 opts
, args
= getopt
.getopt(sys
.argv
[1:], "th")
1747 except getopt
.error
, msg
:
1749 print "Use -h for help"
1756 print "Usage: python urllib.py [-t] [url ...]"
1757 print "-t runs self-test;",
1758 print "otherwise, contents of urls are printed"
1766 print "Use -h for help"
1768 print urlopen(url
).read(),
1770 # Run test program when run as a script
1771 if __name__
== '__main__':