Remove the gopherlib module. It has been raising a DeprecationWarning since
[python.git] / Lib / urllib.py
blobcecfbb0f0deb4415711d841789c0ca2253c26949
1 """Open an arbitrary URL.
3 See the following document for more info on URLs:
4 "Names and Addresses, URIs, URLs, URNs, URCs", at
5 http://www.w3.org/pub/WWW/Addressing/Overview.html
7 See also the HTTP spec (from which the error codes are derived):
8 "HTTP - Hypertext Transfer Protocol", at
9 http://www.w3.org/pub/WWW/Protocols/
11 Related standards and specs:
12 - RFC1808: the "relative URL" spec. (authoritative status)
13 - RFC1738 - the "URL standard". (authoritative status)
14 - RFC1630 - the "URI spec". (informational status)
16 The object returned by URLopener().open(file) will differ per
17 protocol. All you know is that is has methods read(), readline(),
18 readlines(), fileno(), close() and info(). The read*(), fileno()
19 and close() methods work like those of open files.
20 The info() method returns a mimetools.Message object which can be
21 used to query various info about the object, if available.
22 (mimetools.Message objects are queried with the getheader() method.)
23 """
25 import string
26 import socket
27 import os
28 import time
29 import sys
30 from urlparse import urljoin as basejoin
32 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "getproxies"]
40 __version__ = '1.17' # XXX This version is not always updated :-(
42 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
44 # Helper for non-unix systems
45 if os.name == 'mac':
46 from macurl2path import url2pathname, pathname2url
47 elif os.name == 'nt':
48 from nturl2path import url2pathname, pathname2url
49 elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
51 else:
52 def url2pathname(pathname):
53 """OS-specific conversion from a relative URL of the 'file' scheme
54 to a file system path; not recommended for general use."""
55 return unquote(pathname)
57 def pathname2url(pathname):
58 """OS-specific conversion from a file system path to a relative URL
59 of the 'file' scheme; not recommended for general use."""
60 return quote(pathname)
62 # This really consists of two pieces:
63 # (1) a class which handles opening of all sorts of URLs
64 # (plus assorted utilities etc.)
65 # (2) a set of functions for parsing URLs
66 # XXX Should these be separated out into different modules?
69 # Shortcut for basic usage
70 _urlopener = None
71 def urlopen(url, data=None, proxies=None):
72 """urlopen(url [, data]) -> open file-like object"""
73 global _urlopener
74 if proxies is not None:
75 opener = FancyURLopener(proxies=proxies)
76 elif not _urlopener:
77 opener = FancyURLopener()
78 _urlopener = opener
79 else:
80 opener = _urlopener
81 if data is None:
82 return opener.open(url)
83 else:
84 return opener.open(url, data)
85 def urlretrieve(url, filename=None, reporthook=None, data=None):
86 global _urlopener
87 if not _urlopener:
88 _urlopener = FancyURLopener()
89 return _urlopener.retrieve(url, filename, reporthook, data)
90 def urlcleanup():
91 if _urlopener:
92 _urlopener.cleanup()
94 # exception raised when downloaded size does not match content-length
95 class ContentTooShortError(IOError):
96 def __init__(self, message, content):
97 IOError.__init__(self, message)
98 self.content = content
100 ftpcache = {}
101 class URLopener:
102 """Class to open URLs.
103 This is a class rather than just a subroutine because we may need
104 more than one set of global protocol-specific options.
105 Note -- this is a base class for those who don't want the
106 automatic handling of errors type 302 (relocated) and 401
107 (authorization needed)."""
109 __tempfiles = None
111 version = "Python-urllib/%s" % __version__
113 # Constructor
114 def __init__(self, proxies=None, **x509):
115 if proxies is None:
116 proxies = getproxies()
117 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
118 self.proxies = proxies
119 self.key_file = x509.get('key_file')
120 self.cert_file = x509.get('cert_file')
121 self.addheaders = [('User-Agent', self.version)]
122 self.__tempfiles = []
123 self.__unlink = os.unlink # See cleanup()
124 self.tempcache = None
125 # Undocumented feature: if you assign {} to tempcache,
126 # it is used to cache files retrieved with
127 # self.retrieve(). This is not enabled by default
128 # since it does not work for changing documents (and I
129 # haven't got the logic to check expiration headers
130 # yet).
131 self.ftpcache = ftpcache
132 # Undocumented feature: you can use a different
133 # ftp cache by assigning to the .ftpcache member;
134 # in case you want logically independent URL openers
135 # XXX This is not threadsafe. Bah.
137 def __del__(self):
138 self.close()
140 def close(self):
141 self.cleanup()
143 def cleanup(self):
144 # This code sometimes runs when the rest of this module
145 # has already been deleted, so it can't use any globals
146 # or import anything.
147 if self.__tempfiles:
148 for file in self.__tempfiles:
149 try:
150 self.__unlink(file)
151 except OSError:
152 pass
153 del self.__tempfiles[:]
154 if self.tempcache:
155 self.tempcache.clear()
157 def addheader(self, *args):
158 """Add a header to be used by the HTTP interface only
159 e.g. u.addheader('Accept', 'sound/basic')"""
160 self.addheaders.append(args)
162 # External interface
163 def open(self, fullurl, data=None):
164 """Use URLopener().open(file) instead of open(file, 'r')."""
165 fullurl = unwrap(toBytes(fullurl))
166 if self.tempcache and fullurl in self.tempcache:
167 filename, headers = self.tempcache[fullurl]
168 fp = open(filename, 'rb')
169 return addinfourl(fp, headers, fullurl)
170 urltype, url = splittype(fullurl)
171 if not urltype:
172 urltype = 'file'
173 if urltype in self.proxies:
174 proxy = self.proxies[urltype]
175 urltype, proxyhost = splittype(proxy)
176 host, selector = splithost(proxyhost)
177 url = (host, fullurl) # Signal special case to open_*()
178 else:
179 proxy = None
180 name = 'open_' + urltype
181 self.type = urltype
182 name = name.replace('-', '_')
183 if not hasattr(self, name):
184 if proxy:
185 return self.open_unknown_proxy(proxy, fullurl, data)
186 else:
187 return self.open_unknown(fullurl, data)
188 try:
189 if data is None:
190 return getattr(self, name)(url)
191 else:
192 return getattr(self, name)(url, data)
193 except socket.error, msg:
194 raise IOError, ('socket error', msg), sys.exc_info()[2]
196 def open_unknown(self, fullurl, data=None):
197 """Overridable interface to open unknown URL type."""
198 type, url = splittype(fullurl)
199 raise IOError, ('url error', 'unknown url type', type)
201 def open_unknown_proxy(self, proxy, fullurl, data=None):
202 """Overridable interface to open unknown URL type."""
203 type, url = splittype(fullurl)
204 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
206 # External interface
207 def retrieve(self, url, filename=None, reporthook=None, data=None):
208 """retrieve(url) returns (filename, headers) for a local object
209 or (tempfilename, headers) for a remote object."""
210 url = unwrap(toBytes(url))
211 if self.tempcache and url in self.tempcache:
212 return self.tempcache[url]
213 type, url1 = splittype(url)
214 if filename is None and (not type or type == 'file'):
215 try:
216 fp = self.open_local_file(url1)
217 hdrs = fp.info()
218 del fp
219 return url2pathname(splithost(url1)[1]), hdrs
220 except IOError, msg:
221 pass
222 fp = self.open(url, data)
223 headers = fp.info()
224 if filename:
225 tfp = open(filename, 'wb')
226 else:
227 import tempfile
228 garbage, path = splittype(url)
229 garbage, path = splithost(path or "")
230 path, garbage = splitquery(path or "")
231 path, garbage = splitattr(path or "")
232 suffix = os.path.splitext(path)[1]
233 (fd, filename) = tempfile.mkstemp(suffix)
234 self.__tempfiles.append(filename)
235 tfp = os.fdopen(fd, 'wb')
236 result = filename, headers
237 if self.tempcache is not None:
238 self.tempcache[url] = result
239 bs = 1024*8
240 size = -1
241 read = 0
242 blocknum = 0
243 if reporthook:
244 if "content-length" in headers:
245 size = int(headers["Content-Length"])
246 reporthook(blocknum, bs, size)
247 while 1:
248 block = fp.read(bs)
249 if block == "":
250 break
251 read += len(block)
252 tfp.write(block)
253 blocknum += 1
254 if reporthook:
255 reporthook(blocknum, bs, size)
256 fp.close()
257 tfp.close()
258 del fp
259 del tfp
261 # raise exception if actual size does not match content-length header
262 if size >= 0 and read < size:
263 raise ContentTooShortError("retrieval incomplete: got only %i out "
264 "of %i bytes" % (read, size), result)
266 return result
268 # Each method named open_<type> knows how to open that type of URL
270 def open_http(self, url, data=None):
271 """Use HTTP protocol."""
272 import httplib
273 user_passwd = None
274 proxy_passwd= None
275 if isinstance(url, str):
276 host, selector = splithost(url)
277 if host:
278 user_passwd, host = splituser(host)
279 host = unquote(host)
280 realhost = host
281 else:
282 host, selector = url
283 # check whether the proxy contains authorization information
284 proxy_passwd, host = splituser(host)
285 # now we proceed with the url we want to obtain
286 urltype, rest = splittype(selector)
287 url = rest
288 user_passwd = None
289 if urltype.lower() != 'http':
290 realhost = None
291 else:
292 realhost, rest = splithost(rest)
293 if realhost:
294 user_passwd, realhost = splituser(realhost)
295 if user_passwd:
296 selector = "%s://%s%s" % (urltype, realhost, rest)
297 if proxy_bypass(realhost):
298 host = realhost
300 #print "proxy via http:", host, selector
301 if not host: raise IOError, ('http error', 'no host given')
303 if proxy_passwd:
304 import base64
305 proxy_auth = base64.b64encode(proxy_passwd).strip()
306 else:
307 proxy_auth = None
309 if user_passwd:
310 import base64
311 auth = base64.b64encode(user_passwd).strip()
312 else:
313 auth = None
314 h = httplib.HTTP(host)
315 if data is not None:
316 h.putrequest('POST', selector)
317 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
318 h.putheader('Content-Length', '%d' % len(data))
319 else:
320 h.putrequest('GET', selector)
321 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
322 if auth: h.putheader('Authorization', 'Basic %s' % auth)
323 if realhost: h.putheader('Host', realhost)
324 for args in self.addheaders: h.putheader(*args)
325 h.endheaders()
326 if data is not None:
327 h.send(data)
328 errcode, errmsg, headers = h.getreply()
329 fp = h.getfile()
330 if errcode == -1:
331 if fp: fp.close()
332 # something went wrong with the HTTP status line
333 raise IOError, ('http protocol error', 0,
334 'got a bad status line', None)
335 if errcode == 200:
336 return addinfourl(fp, headers, "http:" + url)
337 else:
338 if data is None:
339 return self.http_error(url, fp, errcode, errmsg, headers)
340 else:
341 return self.http_error(url, fp, errcode, errmsg, headers, data)
343 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
344 """Handle http errors.
345 Derived class can override this, or provide specific handlers
346 named http_error_DDD where DDD is the 3-digit error code."""
347 # First check if there's a specific handler for this error
348 name = 'http_error_%d' % errcode
349 if hasattr(self, name):
350 method = getattr(self, name)
351 if data is None:
352 result = method(url, fp, errcode, errmsg, headers)
353 else:
354 result = method(url, fp, errcode, errmsg, headers, data)
355 if result: return result
356 return self.http_error_default(url, fp, errcode, errmsg, headers)
358 def http_error_default(self, url, fp, errcode, errmsg, headers):
359 """Default error handler: close the connection and raise IOError."""
360 void = fp.read()
361 fp.close()
362 raise IOError, ('http error', errcode, errmsg, headers)
364 if hasattr(socket, "ssl"):
365 def open_https(self, url, data=None):
366 """Use HTTPS protocol."""
367 import httplib
368 user_passwd = None
369 proxy_passwd = None
370 if isinstance(url, str):
371 host, selector = splithost(url)
372 if host:
373 user_passwd, host = splituser(host)
374 host = unquote(host)
375 realhost = host
376 else:
377 host, selector = url
378 # here, we determine, whether the proxy contains authorization information
379 proxy_passwd, host = splituser(host)
380 urltype, rest = splittype(selector)
381 url = rest
382 user_passwd = None
383 if urltype.lower() != 'https':
384 realhost = None
385 else:
386 realhost, rest = splithost(rest)
387 if realhost:
388 user_passwd, realhost = splituser(realhost)
389 if user_passwd:
390 selector = "%s://%s%s" % (urltype, realhost, rest)
391 #print "proxy via https:", host, selector
392 if not host: raise IOError, ('https error', 'no host given')
393 if proxy_passwd:
394 import base64
395 proxy_auth = base64.b64encode(proxy_passwd).strip()
396 else:
397 proxy_auth = None
398 if user_passwd:
399 import base64
400 auth = base64.b64encode(user_passwd).strip()
401 else:
402 auth = None
403 h = httplib.HTTPS(host, 0,
404 key_file=self.key_file,
405 cert_file=self.cert_file)
406 if data is not None:
407 h.putrequest('POST', selector)
408 h.putheader('Content-Type',
409 'application/x-www-form-urlencoded')
410 h.putheader('Content-Length', '%d' % len(data))
411 else:
412 h.putrequest('GET', selector)
413 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
414 if auth: h.putheader('Authorization', 'Basic %s' % auth)
415 if realhost: h.putheader('Host', realhost)
416 for args in self.addheaders: h.putheader(*args)
417 h.endheaders()
418 if data is not None:
419 h.send(data)
420 errcode, errmsg, headers = h.getreply()
421 fp = h.getfile()
422 if errcode == -1:
423 if fp: fp.close()
424 # something went wrong with the HTTP status line
425 raise IOError, ('http protocol error', 0,
426 'got a bad status line', None)
427 if errcode == 200:
428 return addinfourl(fp, headers, "https:" + url)
429 else:
430 if data is None:
431 return self.http_error(url, fp, errcode, errmsg, headers)
432 else:
433 return self.http_error(url, fp, errcode, errmsg, headers,
434 data)
436 def open_file(self, url):
437 """Use local file or FTP depending on form of URL."""
438 if not isinstance(url, str):
439 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
440 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
441 return self.open_ftp(url)
442 else:
443 return self.open_local_file(url)
445 def open_local_file(self, url):
446 """Use local file."""
447 import mimetypes, mimetools, email.utils
448 try:
449 from cStringIO import StringIO
450 except ImportError:
451 from StringIO import StringIO
452 host, file = splithost(url)
453 localname = url2pathname(file)
454 try:
455 stats = os.stat(localname)
456 except OSError, e:
457 raise IOError(e.errno, e.strerror, e.filename)
458 size = stats.st_size
459 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
460 mtype = mimetypes.guess_type(url)[0]
461 headers = mimetools.Message(StringIO(
462 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
463 (mtype or 'text/plain', size, modified)))
464 if not host:
465 urlfile = file
466 if file[:1] == '/':
467 urlfile = 'file://' + file
468 return addinfourl(open(localname, 'rb'),
469 headers, urlfile)
470 host, port = splitport(host)
471 if not port \
472 and socket.gethostbyname(host) in (localhost(), thishost()):
473 urlfile = file
474 if file[:1] == '/':
475 urlfile = 'file://' + file
476 return addinfourl(open(localname, 'rb'),
477 headers, urlfile)
478 raise IOError, ('local file error', 'not on local host')
480 def open_ftp(self, url):
481 """Use FTP protocol."""
482 if not isinstance(url, str):
483 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
484 import mimetypes, mimetools
485 try:
486 from cStringIO import StringIO
487 except ImportError:
488 from StringIO import StringIO
489 host, path = splithost(url)
490 if not host: raise IOError, ('ftp error', 'no host given')
491 host, port = splitport(host)
492 user, host = splituser(host)
493 if user: user, passwd = splitpasswd(user)
494 else: passwd = None
495 host = unquote(host)
496 user = unquote(user or '')
497 passwd = unquote(passwd or '')
498 host = socket.gethostbyname(host)
499 if not port:
500 import ftplib
501 port = ftplib.FTP_PORT
502 else:
503 port = int(port)
504 path, attrs = splitattr(path)
505 path = unquote(path)
506 dirs = path.split('/')
507 dirs, file = dirs[:-1], dirs[-1]
508 if dirs and not dirs[0]: dirs = dirs[1:]
509 if dirs and not dirs[0]: dirs[0] = '/'
510 key = user, host, port, '/'.join(dirs)
511 # XXX thread unsafe!
512 if len(self.ftpcache) > MAXFTPCACHE:
513 # Prune the cache, rather arbitrarily
514 for k in self.ftpcache.keys():
515 if k != key:
516 v = self.ftpcache[k]
517 del self.ftpcache[k]
518 v.close()
519 try:
520 if not key in self.ftpcache:
521 self.ftpcache[key] = \
522 ftpwrapper(user, passwd, host, port, dirs)
523 if not file: type = 'D'
524 else: type = 'I'
525 for attr in attrs:
526 attr, value = splitvalue(attr)
527 if attr.lower() == 'type' and \
528 value in ('a', 'A', 'i', 'I', 'd', 'D'):
529 type = value.upper()
530 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
531 mtype = mimetypes.guess_type("ftp:" + url)[0]
532 headers = ""
533 if mtype:
534 headers += "Content-Type: %s\n" % mtype
535 if retrlen is not None and retrlen >= 0:
536 headers += "Content-Length: %d\n" % retrlen
537 headers = mimetools.Message(StringIO(headers))
538 return addinfourl(fp, headers, "ftp:" + url)
539 except ftperrors(), msg:
540 raise IOError, ('ftp error', msg), sys.exc_info()[2]
542 def open_data(self, url, data=None):
543 """Use "data" URL."""
544 if not isinstance(url, str):
545 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
546 # ignore POSTed data
548 # syntax of data URLs:
549 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
550 # mediatype := [ type "/" subtype ] *( ";" parameter )
551 # data := *urlchar
552 # parameter := attribute "=" value
553 import mimetools
554 try:
555 from cStringIO import StringIO
556 except ImportError:
557 from StringIO import StringIO
558 try:
559 [type, data] = url.split(',', 1)
560 except ValueError:
561 raise IOError, ('data error', 'bad data URL')
562 if not type:
563 type = 'text/plain;charset=US-ASCII'
564 semi = type.rfind(';')
565 if semi >= 0 and '=' not in type[semi:]:
566 encoding = type[semi+1:]
567 type = type[:semi]
568 else:
569 encoding = ''
570 msg = []
571 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
572 time.gmtime(time.time())))
573 msg.append('Content-type: %s' % type)
574 if encoding == 'base64':
575 import base64
576 data = base64.decodestring(data)
577 else:
578 data = unquote(data)
579 msg.append('Content-Length: %d' % len(data))
580 msg.append('')
581 msg.append(data)
582 msg = '\n'.join(msg)
583 f = StringIO(msg)
584 headers = mimetools.Message(f, 0)
585 #f.fileno = None # needed for addinfourl
586 return addinfourl(f, headers, url)
589 class FancyURLopener(URLopener):
590 """Derived class with handlers for errors we can handle (perhaps)."""
592 def __init__(self, *args, **kwargs):
593 URLopener.__init__(self, *args, **kwargs)
594 self.auth_cache = {}
595 self.tries = 0
596 self.maxtries = 10
598 def http_error_default(self, url, fp, errcode, errmsg, headers):
599 """Default error handling -- don't raise an exception."""
600 return addinfourl(fp, headers, "http:" + url)
602 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
603 """Error 302 -- relocated (temporarily)."""
604 self.tries += 1
605 if self.maxtries and self.tries >= self.maxtries:
606 if hasattr(self, "http_error_500"):
607 meth = self.http_error_500
608 else:
609 meth = self.http_error_default
610 self.tries = 0
611 return meth(url, fp, 500,
612 "Internal Server Error: Redirect Recursion", headers)
613 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
614 data)
615 self.tries = 0
616 return result
618 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
619 if 'location' in headers:
620 newurl = headers['location']
621 elif 'uri' in headers:
622 newurl = headers['uri']
623 else:
624 return
625 void = fp.read()
626 fp.close()
627 # In case the server sent a relative URL, join with original:
628 newurl = basejoin(self.type + ":" + url, newurl)
629 return self.open(newurl)
631 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
632 """Error 301 -- also relocated (permanently)."""
633 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
635 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
636 """Error 303 -- also relocated (essentially identical to 302)."""
637 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
639 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
640 """Error 307 -- relocated, but turn POST into error."""
641 if data is None:
642 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
643 else:
644 return self.http_error_default(url, fp, errcode, errmsg, headers)
646 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
647 """Error 401 -- authentication required.
648 This function supports Basic authentication only."""
649 if not 'www-authenticate' in headers:
650 URLopener.http_error_default(self, url, fp,
651 errcode, errmsg, headers)
652 stuff = headers['www-authenticate']
653 import re
654 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
655 if not match:
656 URLopener.http_error_default(self, url, fp,
657 errcode, errmsg, headers)
658 scheme, realm = match.groups()
659 if scheme.lower() != 'basic':
660 URLopener.http_error_default(self, url, fp,
661 errcode, errmsg, headers)
662 name = 'retry_' + self.type + '_basic_auth'
663 if data is None:
664 return getattr(self,name)(url, realm)
665 else:
666 return getattr(self,name)(url, realm, data)
668 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
669 """Error 407 -- proxy authentication required.
670 This function supports Basic authentication only."""
671 if not 'proxy-authenticate' in headers:
672 URLopener.http_error_default(self, url, fp,
673 errcode, errmsg, headers)
674 stuff = headers['proxy-authenticate']
675 import re
676 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
677 if not match:
678 URLopener.http_error_default(self, url, fp,
679 errcode, errmsg, headers)
680 scheme, realm = match.groups()
681 if scheme.lower() != 'basic':
682 URLopener.http_error_default(self, url, fp,
683 errcode, errmsg, headers)
684 name = 'retry_proxy_' + self.type + '_basic_auth'
685 if data is None:
686 return getattr(self,name)(url, realm)
687 else:
688 return getattr(self,name)(url, realm, data)
690 def retry_proxy_http_basic_auth(self, url, realm, data=None):
691 host, selector = splithost(url)
692 newurl = 'http://' + host + selector
693 proxy = self.proxies['http']
694 urltype, proxyhost = splittype(proxy)
695 proxyhost, proxyselector = splithost(proxyhost)
696 i = proxyhost.find('@') + 1
697 proxyhost = proxyhost[i:]
698 user, passwd = self.get_user_passwd(proxyhost, realm, i)
699 if not (user or passwd): return None
700 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
701 self.proxies['http'] = 'http://' + proxyhost + proxyselector
702 if data is None:
703 return self.open(newurl)
704 else:
705 return self.open(newurl, data)
707 def retry_proxy_https_basic_auth(self, url, realm, data=None):
708 host, selector = splithost(url)
709 newurl = 'https://' + host + selector
710 proxy = self.proxies['https']
711 urltype, proxyhost = splittype(proxy)
712 proxyhost, proxyselector = splithost(proxyhost)
713 i = proxyhost.find('@') + 1
714 proxyhost = proxyhost[i:]
715 user, passwd = self.get_user_passwd(proxyhost, realm, i)
716 if not (user or passwd): return None
717 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
718 self.proxies['https'] = 'https://' + proxyhost + proxyselector
719 if data is None:
720 return self.open(newurl)
721 else:
722 return self.open(newurl, data)
724 def retry_http_basic_auth(self, url, realm, data=None):
725 host, selector = splithost(url)
726 i = host.find('@') + 1
727 host = host[i:]
728 user, passwd = self.get_user_passwd(host, realm, i)
729 if not (user or passwd): return None
730 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
731 newurl = 'http://' + host + selector
732 if data is None:
733 return self.open(newurl)
734 else:
735 return self.open(newurl, data)
737 def retry_https_basic_auth(self, url, realm, data=None):
738 host, selector = splithost(url)
739 i = host.find('@') + 1
740 host = host[i:]
741 user, passwd = self.get_user_passwd(host, realm, i)
742 if not (user or passwd): return None
743 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
744 newurl = 'https://' + host + selector
745 if data is None:
746 return self.open(newurl)
747 else:
748 return self.open(newurl, data)
750 def get_user_passwd(self, host, realm, clear_cache = 0):
751 key = realm + '@' + host.lower()
752 if key in self.auth_cache:
753 if clear_cache:
754 del self.auth_cache[key]
755 else:
756 return self.auth_cache[key]
757 user, passwd = self.prompt_user_passwd(host, realm)
758 if user or passwd: self.auth_cache[key] = (user, passwd)
759 return user, passwd
761 def prompt_user_passwd(self, host, realm):
762 """Override this in a GUI environment!"""
763 import getpass
764 try:
765 user = raw_input("Enter username for %s at %s: " % (realm,
766 host))
767 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
768 (user, realm, host))
769 return user, passwd
770 except KeyboardInterrupt:
771 print
772 return None, None
775 # Utility functions
777 _localhost = None
778 def localhost():
779 """Return the IP address of the magic hostname 'localhost'."""
780 global _localhost
781 if _localhost is None:
782 _localhost = socket.gethostbyname('localhost')
783 return _localhost
785 _thishost = None
786 def thishost():
787 """Return the IP address of the current host."""
788 global _thishost
789 if _thishost is None:
790 _thishost = socket.gethostbyname(socket.gethostname())
791 return _thishost
793 _ftperrors = None
794 def ftperrors():
795 """Return the set of errors raised by the FTP class."""
796 global _ftperrors
797 if _ftperrors is None:
798 import ftplib
799 _ftperrors = ftplib.all_errors
800 return _ftperrors
802 _noheaders = None
803 def noheaders():
804 """Return an empty mimetools.Message object."""
805 global _noheaders
806 if _noheaders is None:
807 import mimetools
808 try:
809 from cStringIO import StringIO
810 except ImportError:
811 from StringIO import StringIO
812 _noheaders = mimetools.Message(StringIO(), 0)
813 _noheaders.fp.close() # Recycle file descriptor
814 return _noheaders
817 # Utility classes
819 class ftpwrapper:
820 """Class used by open_ftp() for cache of open FTP connections."""
822 def __init__(self, user, passwd, host, port, dirs):
823 self.user = user
824 self.passwd = passwd
825 self.host = host
826 self.port = port
827 self.dirs = dirs
828 self.init()
830 def init(self):
831 import ftplib
832 self.busy = 0
833 self.ftp = ftplib.FTP()
834 self.ftp.connect(self.host, self.port)
835 self.ftp.login(self.user, self.passwd)
836 for dir in self.dirs:
837 self.ftp.cwd(dir)
839 def retrfile(self, file, type):
840 import ftplib
841 self.endtransfer()
842 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
843 else: cmd = 'TYPE ' + type; isdir = 0
844 try:
845 self.ftp.voidcmd(cmd)
846 except ftplib.all_errors:
847 self.init()
848 self.ftp.voidcmd(cmd)
849 conn = None
850 if file and not isdir:
851 # Try to retrieve as a file
852 try:
853 cmd = 'RETR ' + file
854 conn = self.ftp.ntransfercmd(cmd)
855 except ftplib.error_perm, reason:
856 if str(reason)[:3] != '550':
857 raise IOError, ('ftp error', reason), sys.exc_info()[2]
858 if not conn:
859 # Set transfer mode to ASCII!
860 self.ftp.voidcmd('TYPE A')
861 # Try a directory listing
862 if file: cmd = 'LIST ' + file
863 else: cmd = 'LIST'
864 conn = self.ftp.ntransfercmd(cmd)
865 self.busy = 1
866 # Pass back both a suitably decorated object and a retrieval length
867 return (addclosehook(conn[0].makefile('rb'),
868 self.endtransfer), conn[1])
869 def endtransfer(self):
870 if not self.busy:
871 return
872 self.busy = 0
873 try:
874 self.ftp.voidresp()
875 except ftperrors():
876 pass
878 def close(self):
879 self.endtransfer()
880 try:
881 self.ftp.close()
882 except ftperrors():
883 pass
885 class addbase:
886 """Base class for addinfo and addclosehook."""
888 def __init__(self, fp):
889 self.fp = fp
890 self.read = self.fp.read
891 self.readline = self.fp.readline
892 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
893 if hasattr(self.fp, "fileno"):
894 self.fileno = self.fp.fileno
895 else:
896 self.fileno = lambda: None
897 if hasattr(self.fp, "__iter__"):
898 self.__iter__ = self.fp.__iter__
899 if hasattr(self.fp, "next"):
900 self.next = self.fp.next
902 def __repr__(self):
903 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
904 id(self), self.fp)
906 def close(self):
907 self.read = None
908 self.readline = None
909 self.readlines = None
910 self.fileno = None
911 if self.fp: self.fp.close()
912 self.fp = None
914 class addclosehook(addbase):
915 """Class to add a close hook to an open file."""
917 def __init__(self, fp, closehook, *hookargs):
918 addbase.__init__(self, fp)
919 self.closehook = closehook
920 self.hookargs = hookargs
922 def close(self):
923 addbase.close(self)
924 if self.closehook:
925 self.closehook(*self.hookargs)
926 self.closehook = None
927 self.hookargs = None
929 class addinfo(addbase):
930 """class to add an info() method to an open file."""
932 def __init__(self, fp, headers):
933 addbase.__init__(self, fp)
934 self.headers = headers
936 def info(self):
937 return self.headers
939 class addinfourl(addbase):
940 """class to add info() and geturl() methods to an open file."""
942 def __init__(self, fp, headers, url):
943 addbase.__init__(self, fp)
944 self.headers = headers
945 self.url = url
947 def info(self):
948 return self.headers
950 def geturl(self):
951 return self.url
954 # Utilities to parse URLs (most of these return None for missing parts):
955 # unwrap('<URL:type://host/path>') --> 'type://host/path'
956 # splittype('type:opaquestring') --> 'type', 'opaquestring'
957 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
958 # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
959 # splitpasswd('user:passwd') -> 'user', 'passwd'
960 # splitport('host:port') --> 'host', 'port'
961 # splitquery('/path?query') --> '/path', 'query'
962 # splittag('/path#tag') --> '/path', 'tag'
963 # splitattr('/path;attr1=value1;attr2=value2;...') ->
964 # '/path', ['attr1=value1', 'attr2=value2', ...]
965 # splitvalue('attr=value') --> 'attr', 'value'
966 # unquote('abc%20def') -> 'abc def'
967 # quote('abc def') -> 'abc%20def')
969 try:
970 unicode
971 except NameError:
972 def _is_unicode(x):
973 return 0
974 else:
975 def _is_unicode(x):
976 return isinstance(x, unicode)
978 def toBytes(url):
979 """toBytes(u"URL") --> 'URL'."""
980 # Most URL schemes require ASCII. If that changes, the conversion
981 # can be relaxed
982 if _is_unicode(url):
983 try:
984 url = url.encode("ASCII")
985 except UnicodeError:
986 raise UnicodeError("URL " + repr(url) +
987 " contains non-ASCII characters")
988 return url
990 def unwrap(url):
991 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
992 url = url.strip()
993 if url[:1] == '<' and url[-1:] == '>':
994 url = url[1:-1].strip()
995 if url[:4] == 'URL:': url = url[4:].strip()
996 return url
998 _typeprog = None
999 def splittype(url):
1000 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1001 global _typeprog
1002 if _typeprog is None:
1003 import re
1004 _typeprog = re.compile('^([^/:]+):')
1006 match = _typeprog.match(url)
1007 if match:
1008 scheme = match.group(1)
1009 return scheme.lower(), url[len(scheme) + 1:]
1010 return None, url
1012 _hostprog = None
1013 def splithost(url):
1014 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1015 global _hostprog
1016 if _hostprog is None:
1017 import re
1018 _hostprog = re.compile('^//([^/?]*)(.*)$')
1020 match = _hostprog.match(url)
1021 if match: return match.group(1, 2)
1022 return None, url
1024 _userprog = None
1025 def splituser(host):
1026 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1027 global _userprog
1028 if _userprog is None:
1029 import re
1030 _userprog = re.compile('^(.*)@(.*)$')
1032 match = _userprog.match(host)
1033 if match: return map(unquote, match.group(1, 2))
1034 return None, host
1036 _passwdprog = None
1037 def splitpasswd(user):
1038 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1039 global _passwdprog
1040 if _passwdprog is None:
1041 import re
1042 _passwdprog = re.compile('^([^:]*):(.*)$')
1044 match = _passwdprog.match(user)
1045 if match: return match.group(1, 2)
1046 return user, None
1048 # splittag('/path#tag') --> '/path', 'tag'
1049 _portprog = None
1050 def splitport(host):
1051 """splitport('host:port') --> 'host', 'port'."""
1052 global _portprog
1053 if _portprog is None:
1054 import re
1055 _portprog = re.compile('^(.*):([0-9]+)$')
1057 match = _portprog.match(host)
1058 if match: return match.group(1, 2)
1059 return host, None
1061 _nportprog = None
1062 def splitnport(host, defport=-1):
1063 """Split host and port, returning numeric port.
1064 Return given default port if no ':' found; defaults to -1.
1065 Return numerical port if a valid number are found after ':'.
1066 Return None if ':' but not a valid number."""
1067 global _nportprog
1068 if _nportprog is None:
1069 import re
1070 _nportprog = re.compile('^(.*):(.*)$')
1072 match = _nportprog.match(host)
1073 if match:
1074 host, port = match.group(1, 2)
1075 try:
1076 if not port: raise ValueError, "no digits"
1077 nport = int(port)
1078 except ValueError:
1079 nport = None
1080 return host, nport
1081 return host, defport
1083 _queryprog = None
1084 def splitquery(url):
1085 """splitquery('/path?query') --> '/path', 'query'."""
1086 global _queryprog
1087 if _queryprog is None:
1088 import re
1089 _queryprog = re.compile('^(.*)\?([^?]*)$')
1091 match = _queryprog.match(url)
1092 if match: return match.group(1, 2)
1093 return url, None
1095 _tagprog = None
1096 def splittag(url):
1097 """splittag('/path#tag') --> '/path', 'tag'."""
1098 global _tagprog
1099 if _tagprog is None:
1100 import re
1101 _tagprog = re.compile('^(.*)#([^#]*)$')
1103 match = _tagprog.match(url)
1104 if match: return match.group(1, 2)
1105 return url, None
1107 def splitattr(url):
1108 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1109 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1110 words = url.split(';')
1111 return words[0], words[1:]
1113 _valueprog = None
1114 def splitvalue(attr):
1115 """splitvalue('attr=value') --> 'attr', 'value'."""
1116 global _valueprog
1117 if _valueprog is None:
1118 import re
1119 _valueprog = re.compile('^([^=]*)=(.*)$')
1121 match = _valueprog.match(attr)
1122 if match: return match.group(1, 2)
1123 return attr, None
1125 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1126 _hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1128 def unquote(s):
1129 """unquote('abc%20def') -> 'abc def'."""
1130 res = s.split('%')
1131 for i in xrange(1, len(res)):
1132 item = res[i]
1133 try:
1134 res[i] = _hextochr[item[:2]] + item[2:]
1135 except KeyError:
1136 res[i] = '%' + item
1137 except UnicodeDecodeError:
1138 res[i] = unichr(int(item[:2], 16)) + item[2:]
1139 return "".join(res)
1141 def unquote_plus(s):
1142 """unquote('%7e/abc+def') -> '~/abc def'"""
1143 s = s.replace('+', ' ')
1144 return unquote(s)
1146 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1147 'abcdefghijklmnopqrstuvwxyz'
1148 '0123456789' '_.-')
1149 _safemaps = {}
1151 def quote(s, safe = '/'):
1152 """quote('abc def') -> 'abc%20def'
1154 Each part of a URL, e.g. the path info, the query, etc., has a
1155 different set of reserved characters that must be quoted.
1157 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1158 the following reserved characters.
1160 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1161 "$" | ","
1163 Each of these characters is reserved in some component of a URL,
1164 but not necessarily in all of them.
1166 By default, the quote function is intended for quoting the path
1167 section of a URL. Thus, it will not encode '/'. This character
1168 is reserved, but in typical usage the quote function is being
1169 called on a path where the existing slash characters are used as
1170 reserved characters.
1172 cachekey = (safe, always_safe)
1173 try:
1174 safe_map = _safemaps[cachekey]
1175 except KeyError:
1176 safe += always_safe
1177 safe_map = {}
1178 for i in range(256):
1179 c = chr(i)
1180 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1181 _safemaps[cachekey] = safe_map
1182 res = map(safe_map.__getitem__, s)
1183 return ''.join(res)
1185 def quote_plus(s, safe = ''):
1186 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1187 if ' ' in s:
1188 s = quote(s, safe + ' ')
1189 return s.replace(' ', '+')
1190 return quote(s, safe)
1192 def urlencode(query,doseq=0):
1193 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1195 If any values in the query arg are sequences and doseq is true, each
1196 sequence element is converted to a separate parameter.
1198 If the query arg is a sequence of two-element tuples, the order of the
1199 parameters in the output will match the order of parameters in the
1200 input.
1203 if hasattr(query,"items"):
1204 # mapping objects
1205 query = query.items()
1206 else:
1207 # it's a bother at times that strings and string-like objects are
1208 # sequences...
1209 try:
1210 # non-sequence items should not work with len()
1211 # non-empty strings will fail this
1212 if len(query) and not isinstance(query[0], tuple):
1213 raise TypeError
1214 # zero-length sequences of all types will get here and succeed,
1215 # but that's a minor nit - since the original implementation
1216 # allowed empty dicts that type of behavior probably should be
1217 # preserved for consistency
1218 except TypeError:
1219 ty,va,tb = sys.exc_info()
1220 raise TypeError, "not a valid non-string sequence or mapping object", tb
1222 l = []
1223 if not doseq:
1224 # preserve old behavior
1225 for k, v in query:
1226 k = quote_plus(str(k))
1227 v = quote_plus(str(v))
1228 l.append(k + '=' + v)
1229 else:
1230 for k, v in query:
1231 k = quote_plus(str(k))
1232 if isinstance(v, str):
1233 v = quote_plus(v)
1234 l.append(k + '=' + v)
1235 elif _is_unicode(v):
1236 # is there a reasonable way to convert to ASCII?
1237 # encode generates a string, but "replace" or "ignore"
1238 # lose information and "strict" can raise UnicodeError
1239 v = quote_plus(v.encode("ASCII","replace"))
1240 l.append(k + '=' + v)
1241 else:
1242 try:
1243 # is this a sufficient test for sequence-ness?
1244 x = len(v)
1245 except TypeError:
1246 # not a sequence
1247 v = quote_plus(str(v))
1248 l.append(k + '=' + v)
1249 else:
1250 # loop over the sequence
1251 for elt in v:
1252 l.append(k + '=' + quote_plus(str(elt)))
1253 return '&'.join(l)
1255 # Proxy handling
1256 def getproxies_environment():
1257 """Return a dictionary of scheme -> proxy server URL mappings.
1259 Scan the environment for variables named <scheme>_proxy;
1260 this seems to be the standard convention. If you need a
1261 different way, you can pass a proxies dictionary to the
1262 [Fancy]URLopener constructor.
1265 proxies = {}
1266 for name, value in os.environ.items():
1267 name = name.lower()
1268 if value and name[-6:] == '_proxy':
1269 proxies[name[:-6]] = value
1270 return proxies
1272 if sys.platform == 'darwin':
1273 def getproxies_internetconfig():
1274 """Return a dictionary of scheme -> proxy server URL mappings.
1276 By convention the mac uses Internet Config to store
1277 proxies. An HTTP proxy, for instance, is stored under
1278 the HttpProxy key.
1281 try:
1282 import ic
1283 except ImportError:
1284 return {}
1286 try:
1287 config = ic.IC()
1288 except ic.error:
1289 return {}
1290 proxies = {}
1291 # HTTP:
1292 if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1293 try:
1294 value = config['HTTPProxyHost']
1295 except ic.error:
1296 pass
1297 else:
1298 proxies['http'] = 'http://%s' % value
1299 # FTP: XXXX To be done.
1300 # Gopher: XXXX To be done.
1301 return proxies
1303 def proxy_bypass(x):
1304 return 0
1306 def getproxies():
1307 return getproxies_environment() or getproxies_internetconfig()
1309 elif os.name == 'nt':
1310 def getproxies_registry():
1311 """Return a dictionary of scheme -> proxy server URL mappings.
1313 Win32 uses the registry to store proxies.
1316 proxies = {}
1317 try:
1318 import _winreg
1319 except ImportError:
1320 # Std module, so should be around - but you never know!
1321 return proxies
1322 try:
1323 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1324 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1325 proxyEnable = _winreg.QueryValueEx(internetSettings,
1326 'ProxyEnable')[0]
1327 if proxyEnable:
1328 # Returned as Unicode but problems if not converted to ASCII
1329 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1330 'ProxyServer')[0])
1331 if '=' in proxyServer:
1332 # Per-protocol settings
1333 for p in proxyServer.split(';'):
1334 protocol, address = p.split('=', 1)
1335 # See if address has a type:// prefix
1336 import re
1337 if not re.match('^([^/:]+)://', address):
1338 address = '%s://%s' % (protocol, address)
1339 proxies[protocol] = address
1340 else:
1341 # Use one setting for all protocols
1342 if proxyServer[:5] == 'http:':
1343 proxies['http'] = proxyServer
1344 else:
1345 proxies['http'] = 'http://%s' % proxyServer
1346 proxies['ftp'] = 'ftp://%s' % proxyServer
1347 internetSettings.Close()
1348 except (WindowsError, ValueError, TypeError):
1349 # Either registry key not found etc, or the value in an
1350 # unexpected format.
1351 # proxies already set up to be empty so nothing to do
1352 pass
1353 return proxies
1355 def getproxies():
1356 """Return a dictionary of scheme -> proxy server URL mappings.
1358 Returns settings gathered from the environment, if specified,
1359 or the registry.
1362 return getproxies_environment() or getproxies_registry()
1364 def proxy_bypass(host):
1365 try:
1366 import _winreg
1367 import re
1368 except ImportError:
1369 # Std modules, so should be around - but you never know!
1370 return 0
1371 try:
1372 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1373 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1374 proxyEnable = _winreg.QueryValueEx(internetSettings,
1375 'ProxyEnable')[0]
1376 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1377 'ProxyOverride')[0])
1378 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1379 except WindowsError:
1380 return 0
1381 if not proxyEnable or not proxyOverride:
1382 return 0
1383 # try to make a host list from name and IP address.
1384 rawHost, port = splitport(host)
1385 host = [rawHost]
1386 try:
1387 addr = socket.gethostbyname(rawHost)
1388 if addr != rawHost:
1389 host.append(addr)
1390 except socket.error:
1391 pass
1392 try:
1393 fqdn = socket.getfqdn(rawHost)
1394 if fqdn != rawHost:
1395 host.append(fqdn)
1396 except socket.error:
1397 pass
1398 # make a check value list from the registry entry: replace the
1399 # '<local>' string by the localhost entry and the corresponding
1400 # canonical entry.
1401 proxyOverride = proxyOverride.split(';')
1402 i = 0
1403 while i < len(proxyOverride):
1404 if proxyOverride[i] == '<local>':
1405 proxyOverride[i:i+1] = ['localhost',
1406 '127.0.0.1',
1407 socket.gethostname(),
1408 socket.gethostbyname(
1409 socket.gethostname())]
1410 i += 1
1411 # print proxyOverride
1412 # now check if we match one of the registry values.
1413 for test in proxyOverride:
1414 test = test.replace(".", r"\.") # mask dots
1415 test = test.replace("*", r".*") # change glob sequence
1416 test = test.replace("?", r".") # change glob char
1417 for val in host:
1418 # print "%s <--> %s" %( test, val )
1419 if re.match(test, val, re.I):
1420 return 1
1421 return 0
1423 else:
1424 # By default use environment variables
1425 getproxies = getproxies_environment
1427 def proxy_bypass(host):
1428 return 0
1430 # Test and time quote() and unquote()
1431 def test1():
1432 s = ''
1433 for i in range(256): s = s + chr(i)
1434 s = s*4
1435 t0 = time.time()
1436 qs = quote(s)
1437 uqs = unquote(qs)
1438 t1 = time.time()
1439 if uqs != s:
1440 print 'Wrong!'
1441 print repr(s)
1442 print repr(qs)
1443 print repr(uqs)
1444 print round(t1 - t0, 3), 'sec'
1447 def reporthook(blocknum, blocksize, totalsize):
1448 # Report during remote transfers
1449 print "Block number: %d, Block size: %d, Total size: %d" % (
1450 blocknum, blocksize, totalsize)
1452 # Test program
1453 def test(args=[]):
1454 if not args:
1455 args = [
1456 '/etc/passwd',
1457 'file:/etc/passwd',
1458 'file://localhost/etc/passwd',
1459 'ftp://ftp.gnu.org/pub/README',
1460 'http://www.python.org/index.html',
1462 if hasattr(URLopener, "open_https"):
1463 args.append('https://synergy.as.cmu.edu/~geek/')
1464 try:
1465 for url in args:
1466 print '-'*10, url, '-'*10
1467 fn, h = urlretrieve(url, None, reporthook)
1468 print fn
1469 if h:
1470 print '======'
1471 for k in h.keys(): print k + ':', h[k]
1472 print '======'
1473 fp = open(fn, 'rb')
1474 data = fp.read()
1475 del fp
1476 if '\r' in data:
1477 table = string.maketrans("", "")
1478 data = data.translate(table, "\r")
1479 print data
1480 fn, h = None, None
1481 print '-'*40
1482 finally:
1483 urlcleanup()
1485 def main():
1486 import getopt, sys
1487 try:
1488 opts, args = getopt.getopt(sys.argv[1:], "th")
1489 except getopt.error, msg:
1490 print msg
1491 print "Use -h for help"
1492 return
1493 t = 0
1494 for o, a in opts:
1495 if o == '-t':
1496 t = t + 1
1497 if o == '-h':
1498 print "Usage: python urllib.py [-t] [url ...]"
1499 print "-t runs self-test;",
1500 print "otherwise, contents of urls are printed"
1501 return
1502 if t:
1503 if t > 1:
1504 test1()
1505 test(args)
1506 else:
1507 if not args:
1508 print "Use -h for help"
1509 for url in args:
1510 print urlopen(url).read(),
1512 # Run test program when run as a script
1513 if __name__ == '__main__':
1514 main()