From 7f87baceeafa5ba39f84b90af8157292cf98eb18 Mon Sep 17 00:00:00 2001 From: Thomas Leonard Date: Sat, 14 Jul 2012 17:28:32 +0100 Subject: [PATCH] Allow mirroring both archives and implementations If an archive isn't found, we try to find a mirror for the archive. If that fails, we try to find a mirror for the whole implementation. This allows us to cope with the two common cases: 1) we have exact copies of the original archives, and the mirror can send us to them 2) we have generated a new archive for the whole implementation from the cache --- tests/Hello-new.xml | 4 +- tests/Hello.xml | 6 +-- tests/server.py | 4 +- tests/testdownload.py | 27 +++++++++++-- zeroinstall/injector/fetch.py | 79 +++++++++++++++++++++++++++------------ zeroinstall/injector/scheduler.py | 9 +++-- 6 files changed, 94 insertions(+), 35 deletions(-) diff --git a/tests/Hello-new.xml b/tests/Hello-new.xml index 3fede4c..2c254a4 100644 --- a/tests/Hello-new.xml +++ b/tests/Hello-new.xml @@ -9,7 +9,7 @@ diff --git a/tests/Hello.xml b/tests/Hello.xml index bac5b8d..c9b88b5 100644 --- a/tests/Hello.xml +++ b/tests/Hello.xml @@ -5,11 +5,11 @@ Hello Hello - + diff --git a/tests/server.py b/tests/server.py index abe9ed5..cd41805 100644 --- a/tests/server.py +++ b/tests/server.py @@ -47,8 +47,10 @@ class MyHandler(server.BaseHTTPRequestHandler): # (don't use a symlink as they don't work on Windows) if leaf == 'latest.xml': leaf = 'Hello.xml' - elif parsed.path == '/0mirror/feeds/http/example.com:8000/Hello.xml/impl/sha1=3ce644dc725f1d21cfcf02562c76f375944b266a': + elif parsed.path == '/0mirror/archive/http%3A%23%23example.com%3A8000%23HelloWorld.tgz': leaf = 'HelloWorld.tgz' + elif parsed.path == '/0mirror/feeds/http/example.com:8000/Hello.xml/impl/sha1=3ce644dc725f1d21cfcf02562c76f375944b266a': + leaf = 'HelloWorld.tar.bz2' if not resp: self.send_error(404, "Expected %s; got %s" % (next_step, parsed.path)) diff --git a/tests/testdownload.py b/tests/testdownload.py index 25431df..3ae5259 100755 --- a/tests/testdownload.py +++ b/tests/testdownload.py @@ -398,7 +398,6 @@ class TestDownload(BaseTest): def testRecipeFailure(self): old_out = sys.stdout try: - sys.stdout = StringIO() run_server('*') driver = Driver(requirements = Requirements(os.path.abspath('Recipe.xml')), config = self.config) try: @@ -417,7 +416,7 @@ class TestDownload(BaseTest): '/0mirror/feeds/http/example.com:8000/Hello.xml/latest.xml', '/0mirror/keys/6FCF121BE2390E0B.gpg', server.Give404('/HelloWorld.tgz'), - '/0mirror/feeds/http/example.com:8000/Hello.xml/impl/sha1=3ce644dc725f1d21cfcf02562c76f375944b266a') + '/0mirror/archive/http%3A%23%23example.com%3A8000%23HelloWorld.tgz') driver = Driver(requirements = Requirements('http://example.com:8000/Hello.xml'), config = self.config) self.config.mirror = 'http://example.com:8000/0mirror' @@ -431,6 +430,28 @@ class TestDownload(BaseTest): path = self.config.stores.lookup_any(driver.solver.selections.selections['http://example.com:8000/Hello.xml'].digests) assert os.path.exists(os.path.join(path, 'HelloWorld', 'main')) + def testImplMirror(self): + # This is like testMirror, except we have a different archive (that generates the same content), + # rather than an exact copy of the unavailable archive. + trust.trust_db.trust_key('DE937DD411906ACF7C263B396FCF121BE2390E0B', 'example.com:8000') + run_server('/Hello.xml', + '/6FCF121BE2390E0B.gpg', + server.Give404('/HelloWorld.tgz'), + server.Give404('/0mirror/archive/http%3A%2F%2Flocalhost%3A8000%2FHelloWorld.tgz'), + '/0mirror/feeds/http/example.com:8000/Hello.xml/impl/sha1=3ce644dc725f1d21cfcf02562c76f375944b266a') + driver = Driver(requirements = Requirements('http://example.com:8000/Hello.xml'), config = self.config) + self.config.mirror = 'http://example.com:8000/0mirror' + + refreshed = driver.solve_with_downloads() + tasks.wait_for_blocker(refreshed) + assert driver.solver.ready + + getLogger().setLevel(logging.ERROR) + downloaded = driver.download_uncached_implementations() + tasks.wait_for_blocker(downloaded) + path = self.config.stores.lookup_any(driver.solver.selections.selections['http://example.com:8000/Hello.xml'].digests) + assert os.path.exists(os.path.join(path, 'HelloWorld', 'main')) + def testReplay(self): old_out = sys.stdout try: @@ -458,7 +479,7 @@ class TestDownload(BaseTest): assert "New feed's modification time is before old version" in str(ex) # Must finish with the newest version - self.assertEqual(1235911552, self.config.iface_cache._get_signature_date(iface.uri)) + self.assertEqual(1342285569, self.config.iface_cache._get_signature_date(iface.uri)) finally: sys.stdout = old_out diff --git a/zeroinstall/injector/fetch.py b/zeroinstall/injector/fetch.py index 840ee00..2c637d7 100644 --- a/zeroinstall/injector/fetch.py +++ b/zeroinstall/injector/fetch.py @@ -162,6 +162,19 @@ class Fetcher(object): """Return the URL of a mirror for this feed.""" return self._get_mirror_url(url, 'latest.xml') + def _get_archive_mirror(self, source): + if self.config.mirror is None: + return None + if support.urlparse(source.url).hostname == 'localhost': + return None + if sys.version_info[0] > 2: + from urllib.parse import quote + else: + from urllib import quote + return '{mirror}/archive/{archive}'.format( + mirror = self.config.mirror, + archive = quote(source.url.replace('/', '#'), safe = '')) + def _get_impl_mirror(self, impl): return self._get_mirror_url(impl.feed.url, 'impl/' + _escape_slashes(impl.id)) @@ -356,26 +369,44 @@ class Fetcher(object): {'algorithms': impl.digests, 'implementation': impl.feed.get_name(), 'version': impl.get_version()}) @tasks.async - def download_impl(): - if isinstance(retrieval_method, DownloadSource): - blocker, stream = self.download_archive(retrieval_method, impl_hint = impl) - yield blocker - tasks.check(blocker) - - stream.seek(0) - if self.external_store: - self._add_to_external_store(required_digest, [retrieval_method], [stream]) - else: - self._add_to_cache(required_digest, stores, retrieval_method, stream) - elif isinstance(retrieval_method, Recipe): - blocker = self.cook(required_digest, retrieval_method, stores, impl_hint = impl) - yield blocker - tasks.check(blocker) - else: - raise Exception(_("Unknown download type for '%s'") % retrieval_method) + def download_impl(method): + original_exception = None + while True: + try: + if isinstance(method, DownloadSource): + blocker, stream = self.download_archive(method, impl_hint = impl, + may_use_mirror = original_exception is None) + yield blocker + tasks.check(blocker) + + stream.seek(0) + if self.external_store: + self._add_to_external_store(required_digest, [method], [stream]) + else: + self._add_to_cache(required_digest, stores, method, stream) + elif isinstance(method, Recipe): + blocker = self.cook(required_digest, method, stores, impl_hint = impl) + yield blocker + tasks.check(blocker) + else: + raise Exception(_("Unknown download type for '%s'") % method) + except download.DownloadError as ex: + if original_exception: + info("Error from mirror: %s", ex) + raise original_exception + else: + original_exception = ex + mirror_url = self._get_impl_mirror(impl) + if mirror_url is not None: + info("%s: trying implementation mirror at %s", ex, mirror_url) + method = model.DownloadSource(impl, mirror_url, + None, None, type = 'application/x-bzip-compressed-tar') + continue # Retry + raise + break self.handler.impl_added_to_store(impl) - return download_impl() + return download_impl(retrieval_method) def _add_to_cache(self, required_digest, stores, retrieval_method, stream): assert isinstance(retrieval_method, DownloadSource) @@ -407,7 +438,7 @@ class Fetcher(object): os.remove(path) # (force is deprecated and ignored) - def download_archive(self, download_source, force = False, impl_hint = None): + def download_archive(self, download_source, force = False, impl_hint = None, may_use_mirror = False): """Fetch an archive. You should normally call L{download_impl} instead, since it handles other kinds of retrieval method too.""" from zeroinstall.zerostore import unpack @@ -424,13 +455,15 @@ class Fetcher(object): if not self.external_store: unpack.check_type_ok(mime_type) - if impl_hint: - mirror = self._get_impl_mirror(impl_hint) + if may_use_mirror: + mirror = self._get_archive_mirror(download_source) else: mirror = None dl = self.download_url(download_source.url, hint = impl_hint, mirror_url = mirror) - dl.expected_size = download_source.size + (download_source.start_offset or 0) + if download_source.size is not None: + dl.expected_size = download_source.size + (download_source.start_offset or 0) + # (else don't know sizes for mirrored archives) return (dl.downloaded, dl.tempfile) # (force is deprecated and ignored) @@ -609,7 +642,7 @@ class DownloadStepRunner(StepRunner): model_type = model.DownloadSource def prepare(self, fetcher, blockers): - self.blocker, self.stream = fetcher.download_archive(self.stepdata, impl_hint = self.impl_hint) + self.blocker, self.stream = fetcher.download_archive(self.stepdata, impl_hint = self.impl_hint, may_use_mirror = True) assert self.stream blockers.append(self.blocker) diff --git a/zeroinstall/injector/scheduler.py b/zeroinstall/injector/scheduler.py index b6c9de3..a9820da 100644 --- a/zeroinstall/injector/scheduler.py +++ b/zeroinstall/injector/scheduler.py @@ -74,9 +74,12 @@ class DownloadScheduler: if mirror_url is None: raise original_exception - # Try mirror - logging.warn("%s: trying mirror at %s", ex, mirror_url) - dl.expected_size = None + # Try the mirror. + # There are actually two places where we try to use the mirror: this one + # looks to see if we have an exact copy of same file somewhere else. If this + # fails, Fetcher will also look for a different archive that would generate + # the required implementation. + logging.warn("%s: trying archive mirror at %s", ex, mirror_url) step.redirect = mirror_url redirections_remaining = 10 -- 2.11.4.GIT