From 85a3135d8acf9d3b75d1905746b91b69400dd8e7 Mon Sep 17 00:00:00 2001 From: Thomas Leonard Date: Sun, 18 Dec 2011 15:10:45 +0000 Subject: [PATCH] Handle HTTP redirects manually Instead of having the download threads follow redirects, report them back to the main thread and let it deal with them. This will be needed to support connection pooling, since the download may need to be tranferred to another thread. --- zeroinstall/injector/_download_child.py | 34 +++++++++++++++----- zeroinstall/injector/download.py | 56 ++++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 25 deletions(-) diff --git a/zeroinstall/injector/_download_child.py b/zeroinstall/injector/_download_child.py index f81b8b6..fa8c927 100644 --- a/zeroinstall/injector/_download_child.py +++ b/zeroinstall/injector/_download_child.py @@ -27,16 +27,34 @@ for ca_bundle in ["/etc/ssl/certs/ca-certificates.crt", # Debian/Ubuntu/Arch Lin def getConnection(self, host, timeout=300): return ValidatingHTTPSConnection(host) - - urlopener = urllib2.build_opener(ValidatingHTTPSHandler) - - # Builds an opener that overrides the default HTTPS handler with our one - _my_urlopen = urllib2.build_opener(ValidatingHTTPSHandler()).open + MyHTTPSHandler = ValidatingHTTPSHandler break else: from logging import warn warn("No root CA's found; security of HTTPS connections cannot be verified") - _my_urlopen = urllib2.urlopen + MyHTTPSHandler = urllib2.HTTPSHandler + +class Redirect(Exception): + def __init__(self, req): + Exception.__init__(self, "Redirect") + self.req = req + +class MyRedirectHandler(urllib2.HTTPRedirectHandler): + """Throw an exception on redirects instead of continuing. The redirect will be handled in the main thread + so it can work with connection pooling.""" + def redirect_request(self, req, fp, code, msg, headers, newurl): + new_req = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) + if new_req: + raise Redirect(new_req) + +# Our handler differs from the Python default in that: +# - we don't support file:// URLs +# - we don't follow HTTP redirects +_my_urlopen = urllib2.OpenerDirector() +for klass in [urllib2.ProxyHandler, urllib2.UnknownHandler, urllib2.HTTPHandler, + urllib2.HTTPDefaultErrorHandler, MyRedirectHandler, + urllib2.FTPHandler, urllib2.HTTPErrorProcessor, MyHTTPSHandler]: + _my_urlopen.add_handler(klass()) def download_in_thread(url, target_file, if_modified_since, notify_done): try: @@ -45,7 +63,7 @@ def download_in_thread(url, target_file, if_modified_since, notify_done): req = urllib2.Request(url) if url.startswith('http:') and if_modified_since: req.add_header('If-Modified-Since', if_modified_since) - src = _my_urlopen(req) + src = _my_urlopen.open(req) else: raise Exception(_('Unsupported URL protocol in: %s') % url) @@ -67,6 +85,8 @@ def download_in_thread(url, target_file, if_modified_since, notify_done): #print >>sys.stderr, "Error downloading '" + url + "': " + (str(ex) or str(ex.__class__.__name__)) __, ex, tb = sys.exc_info() notify_done(download.RESULT_FAILED, (download.DownloadError(_('Error downloading {url}: {ex}').format(url = url, ex = ex)), tb)) + except Redirect as ex: + notify_done(download.RESULT_REDIRECT, redirect = ex.req.get_full_url()) except Exception as ex: __, ex, tb = sys.exc_info() notify_done(download.RESULT_FAILED, (ex, tb)) diff --git a/zeroinstall/injector/download.py b/zeroinstall/injector/download.py index 1b286a9..f14f93f 100644 --- a/zeroinstall/injector/download.py +++ b/zeroinstall/injector/download.py @@ -21,10 +21,10 @@ download_fetching = "fetching" # In progress download_complete = "complete" # Downloaded and cached OK download_failed = "failed" -# NB: duplicated in _download_child.py RESULT_OK = 0 RESULT_FAILED = 1 RESULT_NOT_MODIFIED = 2 +RESULT_REDIRECT = 3 class DownloadError(SafeException): """Download process failed.""" @@ -87,29 +87,49 @@ class Download(object): """Will trigger L{downloaded} when done (on success or failure).""" from ._download_child import download_in_thread - result = [] - thread_blocker = tasks.Blocker("wait for thread " + self.url) - def notify_done(status, ex = None): - result.append(status) - def wake_up_main(): - thread_blocker.trigger(ex) - return False - gobject.idle_add(wake_up_main) - child = threading.Thread(target = lambda: download_in_thread(self.url, self.tempfile, self.modification_time, notify_done)) - child.daemon = True - child.start() + # (changed if we get redirected) + current_url = self.url - # Wait for child to complete download. - yield thread_blocker + redirections_remaining = 10 - # Download is complete... - child.join() + while True: + result = [] + thread_blocker = tasks.Blocker("wait for thread " + current_url) + def notify_done(status, ex = None, redirect = None): + result.append((status, redirect)) + def wake_up_main(): + thread_blocker.trigger(ex) + return False + gobject.idle_add(wake_up_main) + child = threading.Thread(target = lambda: download_in_thread(current_url, self.tempfile, self.modification_time, notify_done)) + child.daemon = True + child.start() + + # Wait for child to complete download. + yield thread_blocker + + # Download is complete... + child.join() + + (status, redirect), = result + + if status != RESULT_REDIRECT: + assert not redirect, redirect + break + + assert redirect + current_url = redirect + + if redirections_remaining == 0: + raise DownloadError("Too many redirections {url} -> {current}".format( + url = self.url, + current = current_url)) + redirections_remaining -= 1 + # (else go around the loop again) assert self.status is download_fetching assert self.tempfile is not None - status, = result - if status == RESULT_NOT_MODIFIED: debug("%s not modified", self.url) self.tempfile = None -- 2.11.4.GIT