Change to flush and close logic to fix #1760556.
[python.git] / Lib / urllib.py
blobad0e72b76845a822355d95aea87cfb33f9205287
1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
23 """
25 import string
26 import socket
27 import os
28 import time
29 import sys
30 from urlparse import urljoin as basejoin
32 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "getproxies"]
40 __version__ = '1.17' # XXX This version is not always updated :-(
42 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
44 # Helper for non-unix systems
45 if os.name == 'mac':
46 from macurl2path import url2pathname, pathname2url
47 elif os.name == 'nt':
48 from nturl2path import url2pathname, pathname2url
49 elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
51 else:
52 def url2pathname(pathname):
53 """OS-specific conversion from a relative URL of the 'file' scheme
54 to a file system path; not recommended for general use."""
55 return unquote(pathname)
57 def pathname2url(pathname):
58 """OS-specific conversion from a file system path to a relative URL
59 of the 'file' scheme; not recommended for general use."""
60 return quote(pathname)
62 # This really consists of two pieces:
63 # (1) a class which handles opening of all sorts of URLs
64 # (plus assorted utilities etc.)
65 # (2) a set of functions for parsing URLs
66 # XXX Should these be separated out into different modules?
69 # Shortcut for basic usage
70 _urlopener = None
71 def urlopen(url, data=None, proxies=None):
72 """urlopen(url [, data]) -> open file-like object"""
73 global _urlopener
74 if proxies is not None:
75 opener = FancyURLopener(proxies=proxies)
76 elif not _urlopener:
77 opener = FancyURLopener()
78 _urlopener = opener
79 else:
80 opener = _urlopener
81 if data is None:
82 return opener.open(url)
83 else:
84 return opener.open(url, data)
85 def urlretrieve(url, filename=None, reporthook=None, data=None):
86 global _urlopener
87 if not _urlopener:
88 _urlopener = FancyURLopener()
89 return _urlopener.retrieve(url, filename, reporthook, data)
90 def urlcleanup():
91 if _urlopener:
92 _urlopener.cleanup()
94 # check for SSL
95 try:
96 import ssl
97 except:
98 _have_ssl = False
99 else:
100 _have_ssl = True
102 # exception raised when downloaded size does not match content-length
103 class ContentTooShortError(IOError):
104 def __init__(self, message, content):
105 IOError.__init__(self, message)
106 self.content = content
108 ftpcache = {}
109 class URLopener:
110 """Class to open URLs.
111 This is a class rather than just a subroutine because we may need
112 more than one set of global protocol-specific options.
113 Note -- this is a base class for those who don't want the
114 automatic handling of errors type 302 (relocated) and 401
115 (authorization needed)."""
117 __tempfiles = None
119 version = "Python-urllib/%s" % __version__
121 # Constructor
122 def __init__(self, proxies=None, **x509):
123 if proxies is None:
124 proxies = getproxies()
125 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
126 self.proxies = proxies
127 self.key_file = x509.get('key_file')
128 self.cert_file = x509.get('cert_file')
129 self.addheaders = [('User-Agent', self.version)]
130 self.__tempfiles = []
131 self.__unlink = os.unlink # See cleanup()
132 self.tempcache = None
133 # Undocumented feature: if you assign {} to tempcache,
134 # it is used to cache files retrieved with
135 # self.retrieve(). This is not enabled by default
136 # since it does not work for changing documents (and I
137 # haven't got the logic to check expiration headers
138 # yet).
139 self.ftpcache = ftpcache
140 # Undocumented feature: you can use a different
141 # ftp cache by assigning to the .ftpcache member;
142 # in case you want logically independent URL openers
143 # XXX This is not threadsafe. Bah.
145 def __del__(self):
146 self.close()
148 def close(self):
149 self.cleanup()
151 def cleanup(self):
152 # This code sometimes runs when the rest of this module
153 # has already been deleted, so it can't use any globals
154 # or import anything.
155 if self.__tempfiles:
156 for file in self.__tempfiles:
157 try:
158 self.__unlink(file)
159 except OSError:
160 pass
161 del self.__tempfiles[:]
162 if self.tempcache:
163 self.tempcache.clear()
165 def addheader(self, *args):
166 """Add a header to be used by the HTTP interface only
167 e.g. u.addheader('Accept', 'sound/basic')"""
168 self.addheaders.append(args)
170 # External interface
171 def open(self, fullurl, data=None):
172 """Use URLopener().open(file) instead of open(file, 'r')."""
173 fullurl = unwrap(toBytes(fullurl))
174 if self.tempcache and fullurl in self.tempcache:
175 filename, headers = self.tempcache[fullurl]
176 fp = open(filename, 'rb')
177 return addinfourl(fp, headers, fullurl)
178 urltype, url = splittype(fullurl)
179 if not urltype:
180 urltype = 'file'
181 if urltype in self.proxies:
182 proxy = self.proxies[urltype]
183 urltype, proxyhost = splittype(proxy)
184 host, selector = splithost(proxyhost)
185 url = (host, fullurl) # Signal special case to open_*()
186 else:
187 proxy = None
188 name = 'open_' + urltype
189 self.type = urltype
190 name = name.replace('-', '_')
191 if not hasattr(self, name):
192 if proxy:
193 return self.open_unknown_proxy(proxy, fullurl, data)
194 else:
195 return self.open_unknown(fullurl, data)
196 try:
197 if data is None:
198 return getattr(self, name)(url)
199 else:
200 return getattr(self, name)(url, data)
201 except socket.error, msg:
202 raise IOError, ('socket error', msg), sys.exc_info()[2]
204 def open_unknown(self, fullurl, data=None):
205 """Overridable interface to open unknown URL type."""
206 type, url = splittype(fullurl)
207 raise IOError, ('url error', 'unknown url type', type)
209 def open_unknown_proxy(self, proxy, fullurl, data=None):
210 """Overridable interface to open unknown URL type."""
211 type, url = splittype(fullurl)
212 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
214 # External interface
215 def retrieve(self, url, filename=None, reporthook=None, data=None):
216 """retrieve(url) returns (filename, headers) for a local object
217 or (tempfilename, headers) for a remote object."""
218 url = unwrap(toBytes(url))
219 if self.tempcache and url in self.tempcache:
220 return self.tempcache[url]
221 type, url1 = splittype(url)
222 if filename is None and (not type or type == 'file'):
223 try:
224 fp = self.open_local_file(url1)
225 hdrs = fp.info()
226 del fp
227 return url2pathname(splithost(url1)[1]), hdrs
228 except IOError, msg:
229 pass
230 fp = self.open(url, data)
231 headers = fp.info()
232 if filename:
233 tfp = open(filename, 'wb')
234 else:
235 import tempfile
236 garbage, path = splittype(url)
237 garbage, path = splithost(path or "")
238 path, garbage = splitquery(path or "")
239 path, garbage = splitattr(path or "")
240 suffix = os.path.splitext(path)[1]
241 (fd, filename) = tempfile.mkstemp(suffix)
242 self.__tempfiles.append(filename)
243 tfp = os.fdopen(fd, 'wb')
244 result = filename, headers
245 if self.tempcache is not None:
246 self.tempcache[url] = result
247 bs = 1024*8
248 size = -1
249 read = 0
250 blocknum = 0
251 if reporthook:
252 if "content-length" in headers:
253 size = int(headers["Content-Length"])
254 reporthook(blocknum, bs, size)
255 while 1:
256 block = fp.read(bs)
257 if block == "":
258 break
259 read += len(block)
260 tfp.write(block)
261 blocknum += 1
262 if reporthook:
263 reporthook(blocknum, bs, size)
264 fp.close()
265 tfp.close()
266 del fp
267 del tfp
269 # raise exception if actual size does not match content-length header
270 if size >= 0 and read < size:
271 raise ContentTooShortError("retrieval incomplete: got only %i out "
272 "of %i bytes" % (read, size), result)
274 return result
276 # Each method named open_<type> knows how to open that type of URL
278 def open_http(self, url, data=None):
279 """Use HTTP protocol."""
280 import httplib
281 user_passwd = None
282 proxy_passwd= None
283 if isinstance(url, str):
284 host, selector = splithost(url)
285 if host:
286 user_passwd, host = splituser(host)
287 host = unquote(host)
288 realhost = host
289 else:
290 host, selector = url
291 # check whether the proxy contains authorization information
292 proxy_passwd, host = splituser(host)
293 # now we proceed with the url we want to obtain
294 urltype, rest = splittype(selector)
295 url = rest
296 user_passwd = None
297 if urltype.lower() != 'http':
298 realhost = None
299 else:
300 realhost, rest = splithost(rest)
301 if realhost:
302 user_passwd, realhost = splituser(realhost)
303 if user_passwd:
304 selector = "%s://%s%s" % (urltype, realhost, rest)
305 if proxy_bypass(realhost):
306 host = realhost
308 #print "proxy via http:", host, selector
309 if not host: raise IOError, ('http error', 'no host given')
311 if proxy_passwd:
312 import base64
313 proxy_auth = base64.b64encode(proxy_passwd).strip()
314 else:
315 proxy_auth = None
317 if user_passwd:
318 import base64
319 auth = base64.b64encode(user_passwd).strip()
320 else:
321 auth = None
322 h = httplib.HTTP(host)
323 if data is not None:
324 h.putrequest('POST', selector)
325 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
326 h.putheader('Content-Length', '%d' % len(data))
327 else:
328 h.putrequest('GET', selector)
329 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
330 if auth: h.putheader('Authorization', 'Basic %s' % auth)
331 if realhost: h.putheader('Host', realhost)
332 for args in self.addheaders: h.putheader(*args)
333 h.endheaders()
334 if data is not None:
335 h.send(data)
336 errcode, errmsg, headers = h.getreply()
337 fp = h.getfile()
338 if errcode == -1:
339 if fp: fp.close()
340 # something went wrong with the HTTP status line
341 raise IOError, ('http protocol error', 0,
342 'got a bad status line', None)
343 # According to RFC 2616, "2xx" code indicates that the client's
344 # request was successfully received, understood, and accepted.
345 if not (200 <= errcode < 300):
346 return addinfourl(fp, headers, "http:" + url)
347 else:
348 if data is None:
349 return self.http_error(url, fp, errcode, errmsg, headers)
350 else:
351 return self.http_error(url, fp, errcode, errmsg, headers, data)
353 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
354 """Handle http errors.
355 Derived class can override this, or provide specific handlers
356 named http_error_DDD where DDD is the 3-digit error code."""
357 # First check if there's a specific handler for this error
358 name = 'http_error_%d' % errcode
359 if hasattr(self, name):
360 method = getattr(self, name)
361 if data is None:
362 result = method(url, fp, errcode, errmsg, headers)
363 else:
364 result = method(url, fp, errcode, errmsg, headers, data)
365 if result: return result
366 return self.http_error_default(url, fp, errcode, errmsg, headers)
368 def http_error_default(self, url, fp, errcode, errmsg, headers):
369 """Default error handler: close the connection and raise IOError."""
370 void = fp.read()
371 fp.close()
372 raise IOError, ('http error', errcode, errmsg, headers)
374 if _have_ssl:
375 def open_https(self, url, data=None):
376 """Use HTTPS protocol."""
378 import httplib
379 user_passwd = None
380 proxy_passwd = None
381 if isinstance(url, str):
382 host, selector = splithost(url)
383 if host:
384 user_passwd, host = splituser(host)
385 host = unquote(host)
386 realhost = host
387 else:
388 host, selector = url
389 # here, we determine, whether the proxy contains authorization information
390 proxy_passwd, host = splituser(host)
391 urltype, rest = splittype(selector)
392 url = rest
393 user_passwd = None
394 if urltype.lower() != 'https':
395 realhost = None
396 else:
397 realhost, rest = splithost(rest)
398 if realhost:
399 user_passwd, realhost = splituser(realhost)
400 if user_passwd:
401 selector = "%s://%s%s" % (urltype, realhost, rest)
402 #print "proxy via https:", host, selector
403 if not host: raise IOError, ('https error', 'no host given')
404 if proxy_passwd:
405 import base64
406 proxy_auth = base64.b64encode(proxy_passwd).strip()
407 else:
408 proxy_auth = None
409 if user_passwd:
410 import base64
411 auth = base64.b64encode(user_passwd).strip()
412 else:
413 auth = None
414 h = httplib.HTTPS(host, 0,
415 key_file=self.key_file,
416 cert_file=self.cert_file)
417 if data is not None:
418 h.putrequest('POST', selector)
419 h.putheader('Content-Type',
420 'application/x-www-form-urlencoded')
421 h.putheader('Content-Length', '%d' % len(data))
422 else:
423 h.putrequest('GET', selector)
424 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
425 if auth: h.putheader('Authorization', 'Basic %s' % auth)
426 if realhost: h.putheader('Host', realhost)
427 for args in self.addheaders: h.putheader(*args)
428 h.endheaders()
429 if data is not None:
430 h.send(data)
431 errcode, errmsg, headers = h.getreply()
432 fp = h.getfile()
433 if errcode == -1:
434 if fp: fp.close()
435 # something went wrong with the HTTP status line
436 raise IOError, ('http protocol error', 0,
437 'got a bad status line', None)
438 # According to RFC 2616, "2xx" code indicates that the client's
439 # request was successfully received, understood, and accepted.
440 if not (200 <= errcode < 300):
441 return addinfourl(fp, headers, "https:" + url)
442 else:
443 if data is None:
444 return self.http_error(url, fp, errcode, errmsg, headers)
445 else:
446 return self.http_error(url, fp, errcode, errmsg, headers,
447 data)
449 def open_file(self, url):
450 """Use local file or FTP depending on form of URL."""
451 if not isinstance(url, str):
452 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
453 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
454 return self.open_ftp(url)
455 else:
456 return self.open_local_file(url)
458 def open_local_file(self, url):
459 """Use local file."""
460 import mimetypes, mimetools, email.utils
461 try:
462 from cStringIO import StringIO
463 except ImportError:
464 from StringIO import StringIO
465 host, file = splithost(url)
466 localname = url2pathname(file)
467 try:
468 stats = os.stat(localname)
469 except OSError, e:
470 raise IOError(e.errno, e.strerror, e.filename)
471 size = stats.st_size
472 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
473 mtype = mimetypes.guess_type(url)[0]
474 headers = mimetools.Message(StringIO(
475 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
476 (mtype or 'text/plain', size, modified)))
477 if not host:
478 urlfile = file
479 if file[:1] == '/':
480 urlfile = 'file://' + file
481 return addinfourl(open(localname, 'rb'),
482 headers, urlfile)
483 host, port = splitport(host)
484 if not port \
485 and socket.gethostbyname(host) in (localhost(), thishost()):
486 urlfile = file
487 if file[:1] == '/':
488 urlfile = 'file://' + file
489 return addinfourl(open(localname, 'rb'),
490 headers, urlfile)
491 raise IOError, ('local file error', 'not on local host')
493 def open_ftp(self, url):
494 """Use FTP protocol."""
495 if not isinstance(url, str):
496 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
497 import mimetypes, mimetools
498 try:
499 from cStringIO import StringIO
500 except ImportError:
501 from StringIO import StringIO
502 host, path = splithost(url)
503 if not host: raise IOError, ('ftp error', 'no host given')
504 host, port = splitport(host)
505 user, host = splituser(host)
506 if user: user, passwd = splitpasswd(user)
507 else: passwd = None
508 host = unquote(host)
509 user = unquote(user or '')
510 passwd = unquote(passwd or '')
511 host = socket.gethostbyname(host)
512 if not port:
513 import ftplib
514 port = ftplib.FTP_PORT
515 else:
516 port = int(port)
517 path, attrs = splitattr(path)
518 path = unquote(path)
519 dirs = path.split('/')
520 dirs, file = dirs[:-1], dirs[-1]
521 if dirs and not dirs[0]: dirs = dirs[1:]
522 if dirs and not dirs[0]: dirs[0] = '/'
523 key = user, host, port, '/'.join(dirs)
524 # XXX thread unsafe!
525 if len(self.ftpcache) > MAXFTPCACHE:
526 # Prune the cache, rather arbitrarily
527 for k in self.ftpcache.keys():
528 if k != key:
529 v = self.ftpcache[k]
530 del self.ftpcache[k]
531 v.close()
532 try:
533 if not key in self.ftpcache:
534 self.ftpcache[key] = \
535 ftpwrapper(user, passwd, host, port, dirs)
536 if not file: type = 'D'
537 else: type = 'I'
538 for attr in attrs:
539 attr, value = splitvalue(attr)
540 if attr.lower() == 'type' and \
541 value in ('a', 'A', 'i', 'I', 'd', 'D'):
542 type = value.upper()
543 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
544 mtype = mimetypes.guess_type("ftp:" + url)[0]
545 headers = ""
546 if mtype:
547 headers += "Content-Type: %s\n" % mtype
548 if retrlen is not None and retrlen >= 0:
549 headers += "Content-Length: %d\n" % retrlen
550 headers = mimetools.Message(StringIO(headers))
551 return addinfourl(fp, headers, "ftp:" + url)
552 except ftperrors(), msg:
553 raise IOError, ('ftp error', msg), sys.exc_info()[2]
555 def open_data(self, url, data=None):
556 """Use "data" URL."""
557 if not isinstance(url, str):
558 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
559 # ignore POSTed data
561 # syntax of data URLs:
562 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
563 # mediatype := [ type "/" subtype ] *( ";" parameter )
564 # data := *urlchar
565 # parameter := attribute "=" value
566 import mimetools
567 try:
568 from cStringIO import StringIO
569 except ImportError:
570 from StringIO import StringIO
571 try:
572 [type, data] = url.split(',', 1)
573 except ValueError:
574 raise IOError, ('data error', 'bad data URL')
575 if not type:
576 type = 'text/plain;charset=US-ASCII'
577 semi = type.rfind(';')
578 if semi >= 0 and '=' not in type[semi:]:
579 encoding = type[semi+1:]
580 type = type[:semi]
581 else:
582 encoding = ''
583 msg = []
584 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
585 time.gmtime(time.time())))
586 msg.append('Content-type: %s' % type)
587 if encoding == 'base64':
588 import base64
589 data = base64.decodestring(data)
590 else:
591 data = unquote(data)
592 msg.append('Content-Length: %d' % len(data))
593 msg.append('')
594 msg.append(data)
595 msg = '\n'.join(msg)
596 f = StringIO(msg)
597 headers = mimetools.Message(f, 0)
598 #f.fileno = None # needed for addinfourl
599 return addinfourl(f, headers, url)
602 class FancyURLopener(URLopener):
603 """Derived class with handlers for errors we can handle (perhaps)."""
605 def __init__(self, *args, **kwargs):
606 URLopener.__init__(self, *args, **kwargs)
607 self.auth_cache = {}
608 self.tries = 0
609 self.maxtries = 10
611 def http_error_default(self, url, fp, errcode, errmsg, headers):
612 """Default error handling -- don't raise an exception."""
613 return addinfourl(fp, headers, "http:" + url)
615 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
616 """Error 302 -- relocated (temporarily)."""
617 self.tries += 1
618 if self.maxtries and self.tries >= self.maxtries:
619 if hasattr(self, "http_error_500"):
620 meth = self.http_error_500
621 else:
622 meth = self.http_error_default
623 self.tries = 0
624 return meth(url, fp, 500,
625 "Internal Server Error: Redirect Recursion", headers)
626 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
627 data)
628 self.tries = 0
629 return result
631 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
632 if 'location' in headers:
633 newurl = headers['location']
634 elif 'uri' in headers:
635 newurl = headers['uri']
636 else:
637 return
638 void = fp.read()
639 fp.close()
640 # In case the server sent a relative URL, join with original:
641 newurl = basejoin(self.type + ":" + url, newurl)
642 return self.open(newurl)
644 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
645 """Error 301 -- also relocated (permanently)."""
646 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
648 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
649 """Error 303 -- also relocated (essentially identical to 302)."""
650 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
652 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
653 """Error 307 -- relocated, but turn POST into error."""
654 if data is None:
655 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
656 else:
657 return self.http_error_default(url, fp, errcode, errmsg, headers)
659 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
660 """Error 401 -- authentication required.
661 This function supports Basic authentication only."""
662 if not 'www-authenticate' in headers:
663 URLopener.http_error_default(self, url, fp,
664 errcode, errmsg, headers)
665 stuff = headers['www-authenticate']
666 import re
667 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
668 if not match:
669 URLopener.http_error_default(self, url, fp,
670 errcode, errmsg, headers)
671 scheme, realm = match.groups()
672 if scheme.lower() != 'basic':
673 URLopener.http_error_default(self, url, fp,
674 errcode, errmsg, headers)
675 name = 'retry_' + self.type + '_basic_auth'
676 if data is None:
677 return getattr(self,name)(url, realm)
678 else:
679 return getattr(self,name)(url, realm, data)
681 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
682 """Error 407 -- proxy authentication required.
683 This function supports Basic authentication only."""
684 if not 'proxy-authenticate' in headers:
685 URLopener.http_error_default(self, url, fp,
686 errcode, errmsg, headers)
687 stuff = headers['proxy-authenticate']
688 import re
689 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
690 if not match:
691 URLopener.http_error_default(self, url, fp,
692 errcode, errmsg, headers)
693 scheme, realm = match.groups()
694 if scheme.lower() != 'basic':
695 URLopener.http_error_default(self, url, fp,
696 errcode, errmsg, headers)
697 name = 'retry_proxy_' + self.type + '_basic_auth'
698 if data is None:
699 return getattr(self,name)(url, realm)
700 else:
701 return getattr(self,name)(url, realm, data)
703 def retry_proxy_http_basic_auth(self, url, realm, data=None):
704 host, selector = splithost(url)
705 newurl = 'http://' + host + selector
706 proxy = self.proxies['http']
707 urltype, proxyhost = splittype(proxy)
708 proxyhost, proxyselector = splithost(proxyhost)
709 i = proxyhost.find('@') + 1
710 proxyhost = proxyhost[i:]
711 user, passwd = self.get_user_passwd(proxyhost, realm, i)
712 if not (user or passwd): return None
713 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
714 self.proxies['http'] = 'http://' + proxyhost + proxyselector
715 if data is None:
716 return self.open(newurl)
717 else:
718 return self.open(newurl, data)
720 def retry_proxy_https_basic_auth(self, url, realm, data=None):
721 host, selector = splithost(url)
722 newurl = 'https://' + host + selector
723 proxy = self.proxies['https']
724 urltype, proxyhost = splittype(proxy)
725 proxyhost, proxyselector = splithost(proxyhost)
726 i = proxyhost.find('@') + 1
727 proxyhost = proxyhost[i:]
728 user, passwd = self.get_user_passwd(proxyhost, realm, i)
729 if not (user or passwd): return None
730 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
731 self.proxies['https'] = 'https://' + proxyhost + proxyselector
732 if data is None:
733 return self.open(newurl)
734 else:
735 return self.open(newurl, data)
737 def retry_http_basic_auth(self, url, realm, data=None):
738 host, selector = splithost(url)
739 i = host.find('@') + 1
740 host = host[i:]
741 user, passwd = self.get_user_passwd(host, realm, i)
742 if not (user or passwd): return None
743 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
744 newurl = 'http://' + host + selector
745 if data is None:
746 return self.open(newurl)
747 else:
748 return self.open(newurl, data)
750 def retry_https_basic_auth(self, url, realm, data=None):
751 host, selector = splithost(url)
752 i = host.find('@') + 1
753 host = host[i:]
754 user, passwd = self.get_user_passwd(host, realm, i)
755 if not (user or passwd): return None
756 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
757 newurl = 'https://' + host + selector
758 if data is None:
759 return self.open(newurl)
760 else:
761 return self.open(newurl, data)
763 def get_user_passwd(self, host, realm, clear_cache = 0):
764 key = realm + '@' + host.lower()
765 if key in self.auth_cache:
766 if clear_cache:
767 del self.auth_cache[key]
768 else:
769 return self.auth_cache[key]
770 user, passwd = self.prompt_user_passwd(host, realm)
771 if user or passwd: self.auth_cache[key] = (user, passwd)
772 return user, passwd
774 def prompt_user_passwd(self, host, realm):
775 """Override this in a GUI environment!"""
776 import getpass
777 try:
778 user = raw_input("Enter username for %s at %s: " % (realm,
779 host))
780 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
781 (user, realm, host))
782 return user, passwd
783 except KeyboardInterrupt:
784 print
785 return None, None
788 # Utility functions
790 _localhost = None
791 def localhost():
792 """Return the IP address of the magic hostname 'localhost'."""
793 global _localhost
794 if _localhost is None:
795 _localhost = socket.gethostbyname('localhost')
796 return _localhost
798 _thishost = None
799 def thishost():
800 """Return the IP address of the current host."""
801 global _thishost
802 if _thishost is None:
803 _thishost = socket.gethostbyname(socket.gethostname())
804 return _thishost
806 _ftperrors = None
807 def ftperrors():
808 """Return the set of errors raised by the FTP class."""
809 global _ftperrors
810 if _ftperrors is None:
811 import ftplib
812 _ftperrors = ftplib.all_errors
813 return _ftperrors
815 _noheaders = None
816 def noheaders():
817 """Return an empty mimetools.Message object."""
818 global _noheaders
819 if _noheaders is None:
820 import mimetools
821 try:
822 from cStringIO import StringIO
823 except ImportError:
824 from StringIO import StringIO
825 _noheaders = mimetools.Message(StringIO(), 0)
826 _noheaders.fp.close() # Recycle file descriptor
827 return _noheaders
830 # Utility classes
832 class ftpwrapper:
833 """Class used by open_ftp() for cache of open FTP connections."""
835 def __init__(self, user, passwd, host, port, dirs, timeout=None):
836 self.user = user
837 self.passwd = passwd
838 self.host = host
839 self.port = port
840 self.dirs = dirs
841 self.timeout = timeout
842 self.init()
844 def init(self):
845 import ftplib
846 self.busy = 0
847 self.ftp = ftplib.FTP()
848 self.ftp.connect(self.host, self.port, self.timeout)
849 self.ftp.login(self.user, self.passwd)
850 for dir in self.dirs:
851 self.ftp.cwd(dir)
853 def retrfile(self, file, type):
854 import ftplib
855 self.endtransfer()
856 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
857 else: cmd = 'TYPE ' + type; isdir = 0
858 try:
859 self.ftp.voidcmd(cmd)
860 except ftplib.all_errors:
861 self.init()
862 self.ftp.voidcmd(cmd)
863 conn = None
864 if file and not isdir:
865 # Try to retrieve as a file
866 try:
867 cmd = 'RETR ' + file
868 conn = self.ftp.ntransfercmd(cmd)
869 except ftplib.error_perm, reason:
870 if str(reason)[:3] != '550':
871 raise IOError, ('ftp error', reason), sys.exc_info()[2]
872 if not conn:
873 # Set transfer mode to ASCII!
874 self.ftp.voidcmd('TYPE A')
875 # Try a directory listing
876 if file: cmd = 'LIST ' + file
877 else: cmd = 'LIST'
878 conn = self.ftp.ntransfercmd(cmd)
879 self.busy = 1
880 # Pass back both a suitably decorated object and a retrieval length
881 return (addclosehook(conn[0].makefile('rb'),
882 self.endtransfer), conn[1])
883 def endtransfer(self):
884 if not self.busy:
885 return
886 self.busy = 0
887 try:
888 self.ftp.voidresp()
889 except ftperrors():
890 pass
892 def close(self):
893 self.endtransfer()
894 try:
895 self.ftp.close()
896 except ftperrors():
897 pass
899 class addbase:
900 """Base class for addinfo and addclosehook."""
902 def __init__(self, fp):
903 self.fp = fp
904 self.read = self.fp.read
905 self.readline = self.fp.readline
906 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
907 if hasattr(self.fp, "fileno"):
908 self.fileno = self.fp.fileno
909 else:
910 self.fileno = lambda: None
911 if hasattr(self.fp, "__iter__"):
912 self.__iter__ = self.fp.__iter__
913 if hasattr(self.fp, "next"):
914 self.next = self.fp.next
916 def __repr__(self):
917 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
918 id(self), self.fp)
920 def close(self):
921 self.read = None
922 self.readline = None
923 self.readlines = None
924 self.fileno = None
925 if self.fp: self.fp.close()
926 self.fp = None
928 class addclosehook(addbase):
929 """Class to add a close hook to an open file."""
931 def __init__(self, fp, closehook, *hookargs):
932 addbase.__init__(self, fp)
933 self.closehook = closehook
934 self.hookargs = hookargs
936 def close(self):
937 addbase.close(self)
938 if self.closehook:
939 self.closehook(*self.hookargs)
940 self.closehook = None
941 self.hookargs = None
943 class addinfo(addbase):
944 """class to add an info() method to an open file."""
946 def __init__(self, fp, headers):
947 addbase.__init__(self, fp)
948 self.headers = headers
950 def info(self):
951 return self.headers
953 class addinfourl(addbase):
954 """class to add info() and geturl() methods to an open file."""
956 def __init__(self, fp, headers, url):
957 addbase.__init__(self, fp)
958 self.headers = headers
959 self.url = url
961 def info(self):
962 return self.headers
964 def geturl(self):
965 return self.url
968 # Utilities to parse URLs (most of these return None for missing parts):
969 # unwrap('<URL:type://host/path>') --> 'type://host/path'
970 # splittype('type:opaquestring') --> 'type', 'opaquestring'
971 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
972 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
973 # splitpasswd('user:passwd') -> 'user', 'passwd'
974 # splitport('host:port') --> 'host', 'port'
975 # splitquery('/path?query') --> '/path', 'query'
976 # splittag('/path#tag') --> '/path', 'tag'
977 # splitattr('/path;attr1=value1;attr2=value2;...') ->
978 # '/path', ['attr1=value1', 'attr2=value2', ...]
979 # splitvalue('attr=value') --> 'attr', 'value'
980 # unquote('abc%20def') -> 'abc def'
981 # quote('abc def') -> 'abc%20def')
983 try:
984 unicode
985 except NameError:
986 def _is_unicode(x):
987 return 0
988 else:
989 def _is_unicode(x):
990 return isinstance(x, unicode)
992 def toBytes(url):
993 """toBytes(u"URL") --> 'URL'."""
994 # Most URL schemes require ASCII. If that changes, the conversion
995 # can be relaxed
996 if _is_unicode(url):
997 try:
998 url = url.encode("ASCII")
999 except UnicodeError:
1000 raise UnicodeError("URL " + repr(url) +
1001 " contains non-ASCII characters")
1002 return url
1004 def unwrap(url):
1005 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1006 url = url.strip()
1007 if url[:1] == '<' and url[-1:] == '>':
1008 url = url[1:-1].strip()
1009 if url[:4] == 'URL:': url = url[4:].strip()
1010 return url
1012 _typeprog = None
1013 def splittype(url):
1014 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1015 global _typeprog
1016 if _typeprog is None:
1017 import re
1018 _typeprog = re.compile('^([^/:]+):')
1020 match = _typeprog.match(url)
1021 if match:
1022 scheme = match.group(1)
1023 return scheme.lower(), url[len(scheme) + 1:]
1024 return None, url
1026 _hostprog = None
1027 def splithost(url):
1028 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1029 global _hostprog
1030 if _hostprog is None:
1031 import re
1032 _hostprog = re.compile('^//([^/?]*)(.*)$')
1034 match = _hostprog.match(url)
1035 if match: return match.group(1, 2)
1036 return None, url
1038 _userprog = None
1039 def splituser(host):
1040 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1041 global _userprog
1042 if _userprog is None:
1043 import re
1044 _userprog = re.compile('^(.*)@(.*)$')
1046 match = _userprog.match(host)
1047 if match: return map(unquote, match.group(1, 2))
1048 return None, host
1050 _passwdprog = None
1051 def splitpasswd(user):
1052 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1053 global _passwdprog
1054 if _passwdprog is None:
1055 import re
1056 _passwdprog = re.compile('^([^:]*):(.*)$')
1058 match = _passwdprog.match(user)
1059 if match: return match.group(1, 2)
1060 return user, None
1062 # splittag('/path#tag') --> '/path', 'tag'
1063 _portprog = None
1064 def splitport(host):
1065 """splitport('host:port') --> 'host', 'port'."""
1066 global _portprog
1067 if _portprog is None:
1068 import re
1069 _portprog = re.compile('^(.*):([0-9]+)$')
1071 match = _portprog.match(host)
1072 if match: return match.group(1, 2)
1073 return host, None
1075 _nportprog = None
1076 def splitnport(host, defport=-1):
1077 """Split host and port, returning numeric port.
1078 Return given default port if no ':' found; defaults to -1.
1079 Return numerical port if a valid number are found after ':'.
1080 Return None if ':' but not a valid number."""
1081 global _nportprog
1082 if _nportprog is None:
1083 import re
1084 _nportprog = re.compile('^(.*):(.*)$')
1086 match = _nportprog.match(host)
1087 if match:
1088 host, port = match.group(1, 2)
1089 try:
1090 if not port: raise ValueError, "no digits"
1091 nport = int(port)
1092 except ValueError:
1093 nport = None
1094 return host, nport
1095 return host, defport
1097 _queryprog = None
1098 def splitquery(url):
1099 """splitquery('/path?query') --> '/path', 'query'."""
1100 global _queryprog
1101 if _queryprog is None:
1102 import re
1103 _queryprog = re.compile('^(.*)\?([^?]*)$')
1105 match = _queryprog.match(url)
1106 if match: return match.group(1, 2)
1107 return url, None
1109 _tagprog = None
1110 def splittag(url):
1111 """splittag('/path#tag') --> '/path', 'tag'."""
1112 global _tagprog
1113 if _tagprog is None:
1114 import re
1115 _tagprog = re.compile('^(.*)#([^#]*)$')
1117 match = _tagprog.match(url)
1118 if match: return match.group(1, 2)
1119 return url, None
1121 def splitattr(url):
1122 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1123 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1124 words = url.split(';')
1125 return words[0], words[1:]
1127 _valueprog = None
1128 def splitvalue(attr):
1129 """splitvalue('attr=value') --> 'attr', 'value'."""
1130 global _valueprog
1131 if _valueprog is None:
1132 import re
1133 _valueprog = re.compile('^([^=]*)=(.*)$')
1135 match = _valueprog.match(attr)
1136 if match: return match.group(1, 2)
1137 return attr, None
1139 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1140 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1142 def unquote(s):
1143 """unquote('abc%20def') -> 'abc def'."""
1144 res = s.split('%')
1145 for i in xrange(1, len(res)):
1146 item = res[i]
1147 try:
1148 res[i] = _hextochr[item[:2]] + item[2:]
1149 except KeyError:
1150 res[i] = '%' + item
1151 except UnicodeDecodeError:
1152 res[i] = unichr(int(item[:2], 16)) + item[2:]
1153 return "".join(res)
1155 def unquote_plus(s):
1156 """unquote('%7e/abc+def') -> '~/abc def'"""
1157 s = s.replace('+', ' ')
1158 return unquote(s)
1160 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1161 'abcdefghijklmnopqrstuvwxyz'
1162 '0123456789' '_.-')
1163 _safemaps = {}
1165 def quote(s, safe = '/'):
1166 """quote('abc def') -> 'abc%20def'
1168 Each part of a URL, e.g. the path info, the query, etc., has a
1169 different set of reserved characters that must be quoted.
1171 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1172 the following reserved characters.
1174 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1175 "$" | ","
1177 Each of these characters is reserved in some component of a URL,
1178 but not necessarily in all of them.
1180 By default, the quote function is intended for quoting the path
1181 section of a URL. Thus, it will not encode '/'. This character
1182 is reserved, but in typical usage the quote function is being
1183 called on a path where the existing slash characters are used as
1184 reserved characters.
1186 cachekey = (safe, always_safe)
1187 try:
1188 safe_map = _safemaps[cachekey]
1189 except KeyError:
1190 safe += always_safe
1191 safe_map = {}
1192 for i in range(256):
1193 c = chr(i)
1194 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1195 _safemaps[cachekey] = safe_map
1196 res = map(safe_map.__getitem__, s)
1197 return ''.join(res)
1199 def quote_plus(s, safe = ''):
1200 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1201 if ' ' in s:
1202 s = quote(s, safe + ' ')
1203 return s.replace(' ', '+')
1204 return quote(s, safe)
1206 def urlencode(query,doseq=0):
1207 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1209 If any values in the query arg are sequences and doseq is true, each
1210 sequence element is converted to a separate parameter.
1212 If the query arg is a sequence of two-element tuples, the order of the
1213 parameters in the output will match the order of parameters in the
1214 input.
1217 if hasattr(query,"items"):
1218 # mapping objects
1219 query = query.items()
1220 else:
1221 # it's a bother at times that strings and string-like objects are
1222 # sequences...
1223 try:
1224 # non-sequence items should not work with len()
1225 # non-empty strings will fail this
1226 if len(query) and not isinstance(query[0], tuple):
1227 raise TypeError
1228 # zero-length sequences of all types will get here and succeed,
1229 # but that's a minor nit - since the original implementation
1230 # allowed empty dicts that type of behavior probably should be
1231 # preserved for consistency
1232 except TypeError:
1233 ty,va,tb = sys.exc_info()
1234 raise TypeError, "not a valid non-string sequence or mapping object", tb
1236 l = []
1237 if not doseq:
1238 # preserve old behavior
1239 for k, v in query:
1240 k = quote_plus(str(k))
1241 v = quote_plus(str(v))
1242 l.append(k + '=' + v)
1243 else:
1244 for k, v in query:
1245 k = quote_plus(str(k))
1246 if isinstance(v, str):
1247 v = quote_plus(v)
1248 l.append(k + '=' + v)
1249 elif _is_unicode(v):
1250 # is there a reasonable way to convert to ASCII?
1251 # encode generates a string, but "replace" or "ignore"
1252 # lose information and "strict" can raise UnicodeError
1253 v = quote_plus(v.encode("ASCII","replace"))
1254 l.append(k + '=' + v)
1255 else:
1256 try:
1257 # is this a sufficient test for sequence-ness?
1258 x = len(v)
1259 except TypeError:
1260 # not a sequence
1261 v = quote_plus(str(v))
1262 l.append(k + '=' + v)
1263 else:
1264 # loop over the sequence
1265 for elt in v:
1266 l.append(k + '=' + quote_plus(str(elt)))
1267 return '&'.join(l)
1269 # Proxy handling
1270 def getproxies_environment():
1271 """Return a dictionary of scheme -> proxy server URL mappings.
1273 Scan the environment for variables named <scheme>_proxy;
1274 this seems to be the standard convention. If you need a
1275 different way, you can pass a proxies dictionary to the
1276 [Fancy]URLopener constructor.
1279 proxies = {}
1280 for name, value in os.environ.items():
1281 name = name.lower()
1282 if value and name[-6:] == '_proxy':
1283 proxies[name[:-6]] = value
1284 return proxies
1286 if sys.platform == 'darwin':
1287 def getproxies_internetconfig():
1288 """Return a dictionary of scheme -> proxy server URL mappings.
1290 By convention the mac uses Internet Config to store
1291 proxies. An HTTP proxy, for instance, is stored under
1292 the HttpProxy key.
1295 try:
1296 import ic
1297 except ImportError:
1298 return {}
1300 try:
1301 config = ic.IC()
1302 except ic.error:
1303 return {}
1304 proxies = {}
1305 # HTTP:
1306 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1307 try:
1308 value = config['HTTPProxyHost']
1309 except ic.error:
1310 pass
1311 else:
1312 proxies['http'] = 'http://%s' % value
1313 # FTP: XXXX To be done.
1314 # Gopher: XXXX To be done.
1315 return proxies
1317 def proxy_bypass(x):
1318 return 0
1320 def getproxies():
1321 return getproxies_environment() or getproxies_internetconfig()
1323 elif os.name == 'nt':
1324 def getproxies_registry():
1325 """Return a dictionary of scheme -> proxy server URL mappings.
1327 Win32 uses the registry to store proxies.
1330 proxies = {}
1331 try:
1332 import _winreg
1333 except ImportError:
1334 # Std module, so should be around - but you never know!
1335 return proxies
1336 try:
1337 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1338 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1339 proxyEnable = _winreg.QueryValueEx(internetSettings,
1340 'ProxyEnable')[0]
1341 if proxyEnable:
1342 # Returned as Unicode but problems if not converted to ASCII
1343 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1344 'ProxyServer')[0])
1345 if '=' in proxyServer:
1346 # Per-protocol settings
1347 for p in proxyServer.split(';'):
1348 protocol, address = p.split('=', 1)
1349 # See if address has a type:// prefix
1350 import re
1351 if not re.match('^([^/:]+)://', address):
1352 address = '%s://%s' % (protocol, address)
1353 proxies[protocol] = address
1354 else:
1355 # Use one setting for all protocols
1356 if proxyServer[:5] == 'http:':
1357 proxies['http'] = proxyServer
1358 else:
1359 proxies['http'] = 'http://%s' % proxyServer
1360 proxies['ftp'] = 'ftp://%s' % proxyServer
1361 internetSettings.Close()
1362 except (WindowsError, ValueError, TypeError):
1363 # Either registry key not found etc, or the value in an
1364 # unexpected format.
1365 # proxies already set up to be empty so nothing to do
1366 pass
1367 return proxies
1369 def getproxies():
1370 """Return a dictionary of scheme -> proxy server URL mappings.
1372 Returns settings gathered from the environment, if specified,
1373 or the registry.
1376 return getproxies_environment() or getproxies_registry()
1378 def proxy_bypass(host):
1379 try:
1380 import _winreg
1381 import re
1382 except ImportError:
1383 # Std modules, so should be around - but you never know!
1384 return 0
1385 try:
1386 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1387 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1388 proxyEnable = _winreg.QueryValueEx(internetSettings,
1389 'ProxyEnable')[0]
1390 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1391 'ProxyOverride')[0])
1392 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1393 except WindowsError:
1394 return 0
1395 if not proxyEnable or not proxyOverride:
1396 return 0
1397 # try to make a host list from name and IP address.
1398 rawHost, port = splitport(host)
1399 host = [rawHost]
1400 try:
1401 addr = socket.gethostbyname(rawHost)
1402 if addr != rawHost:
1403 host.append(addr)
1404 except socket.error:
1405 pass
1406 try:
1407 fqdn = socket.getfqdn(rawHost)
1408 if fqdn != rawHost:
1409 host.append(fqdn)
1410 except socket.error:
1411 pass
1412 # make a check value list from the registry entry: replace the
1413 # '<local>' string by the localhost entry and the corresponding
1414 # canonical entry.
1415 proxyOverride = proxyOverride.split(';')
1416 i = 0
1417 while i < len(proxyOverride):
1418 if proxyOverride[i] == '<local>':
1419 proxyOverride[i:i+1] = ['localhost',
1420 '127.0.0.1',
1421 socket.gethostname(),
1422 socket.gethostbyname(
1423 socket.gethostname())]
1424 i += 1
1425 # print proxyOverride
1426 # now check if we match one of the registry values.
1427 for test in proxyOverride:
1428 test = test.replace(".", r"\.") # mask dots
1429 test = test.replace("*", r".*") # change glob sequence
1430 test = test.replace("?", r".") # change glob char
1431 for val in host:
1432 # print "%s <--> %s" %( test, val )
1433 if re.match(test, val, re.I):
1434 return 1
1435 return 0
1437 else:
1438 # By default use environment variables
1439 getproxies = getproxies_environment
1441 def proxy_bypass(host):
1442 return 0
1444 # Test and time quote() and unquote()
1445 def test1():
1446 s = ''
1447 for i in range(256): s = s + chr(i)
1448 s = s*4
1449 t0 = time.time()
1450 qs = quote(s)
1451 uqs = unquote(qs)
1452 t1 = time.time()
1453 if uqs != s:
1454 print 'Wrong!'
1455 print repr(s)
1456 print repr(qs)
1457 print repr(uqs)
1458 print round(t1 - t0, 3), 'sec'
1461 def reporthook(blocknum, blocksize, totalsize):
1462 # Report during remote transfers
1463 print "Block number: %d, Block size: %d, Total size: %d" % (
1464 blocknum, blocksize, totalsize)
1466 # Test program
1467 def test(args=[]):
1468 if not args:
1469 args = [
1470 '/etc/passwd',
1471 'file:/etc/passwd',
1472 'file://localhost/etc/passwd',
1473 'ftp://ftp.gnu.org/pub/README',
1474 'http://www.python.org/index.html',
1476 if hasattr(URLopener, "open_https"):
1477 args.append('https://synergy.as.cmu.edu/~geek/')
1478 try:
1479 for url in args:
1480 print '-'*10, url, '-'*10
1481 fn, h = urlretrieve(url, None, reporthook)
1482 print fn
1483 if h:
1484 print '======'
1485 for k in h.keys(): print k + ':', h[k]
1486 print '======'
1487 fp = open(fn, 'rb')
1488 data = fp.read()
1489 del fp
1490 if '\r' in data:
1491 table = string.maketrans("", "")
1492 data = data.translate(table, "\r")
1493 print data
1494 fn, h = None, None
1495 print '-'*40
1496 finally:
1497 urlcleanup()
1499 def main():
1500 import getopt, sys
1501 try:
1502 opts, args = getopt.getopt(sys.argv[1:], "th")
1503 except getopt.error, msg:
1504 print msg
1505 print "Use -h for help"
1506 return
1507 t = 0
1508 for o, a in opts:
1509 if o == '-t':
1510 t = t + 1
1511 if o == '-h':
1512 print "Usage: python urllib.py [-t] [url ...]"
1513 print "-t runs self-test;",
1514 print "otherwise, contents of urls are printed"
1515 return
1516 if t:
1517 if t > 1:
1518 test1()
1519 test(args)
1520 else:
1521 if not args:
1522 print "Use -h for help"
1523 for url in args:
1524 print urlopen(url).read(),
1526 # Run test program when run as a script
1527 if __name__ == '__main__':
1528 main()