Log using the "0install" logger rather than "root"
[zeroinstall/solver.git] / zeroinstall / injector / fetch.py
blob7c6ef0883b7e9b329f296d45665548f9d178f959
1 """
2 Downloads feeds, keys, packages and icons.
3 """
5 # Copyright (C) 2009, Thomas Leonard
6 # See the README file for details, or visit http://0install.net.
8 from zeroinstall import _, NeedDownload, logger
9 import os, sys
11 from zeroinstall import support
12 from zeroinstall.support import tasks, basedir, portable_rename
13 from zeroinstall.injector.namespaces import XMLNS_IFACE, config_site
14 from zeroinstall.injector import model
15 from zeroinstall.injector.model import DownloadSource, Recipe, SafeException, escape, DistributionSource
16 from zeroinstall.injector.iface_cache import PendingFeed, ReplayAttack
17 from zeroinstall.injector.handler import NoTrustedKeys
18 from zeroinstall.injector import download
20 def _escape_slashes(path):
21 return path.replace('/', '%23')
23 def _get_feed_dir(feed):
24 """The algorithm from 0mirror."""
25 if '#' in feed:
26 raise SafeException(_("Invalid URL '%s'") % feed)
27 scheme, rest = feed.split('://', 1)
28 assert '/' in rest, "Missing / in %s" % feed
29 domain, rest = rest.split('/', 1)
30 for x in [scheme, domain, rest]:
31 if not x or x.startswith('.'):
32 raise SafeException(_("Invalid URL '%s'") % feed)
33 return '/'.join(['feeds', scheme, domain, _escape_slashes(rest)])
35 class KeyInfoFetcher:
36 """Fetches information about a GPG key from a key-info server.
37 See L{Fetcher.fetch_key_info} for details.
38 @since: 0.42
40 Example:
42 >>> kf = KeyInfoFetcher(fetcher, 'https://server', fingerprint)
43 >>> while True:
44 print kf.info
45 if kf.blocker is None: break
46 print kf.status
47 yield kf.blocker
48 """
49 def __init__(self, fetcher, server, fingerprint):
50 self.fingerprint = fingerprint
51 self.info = []
52 self.blocker = None
54 if server is None: return
56 self.status = _('Fetching key information from %s...') % server
58 dl = fetcher.download_url(server + '/key/' + fingerprint)
60 from xml.dom import minidom
62 @tasks.async
63 def fetch_key_info():
64 try:
65 tempfile = dl.tempfile
66 yield dl.downloaded
67 self.blocker = None
68 tasks.check(dl.downloaded)
69 tempfile.seek(0)
70 doc = minidom.parse(tempfile)
71 if doc.documentElement.localName != 'key-lookup':
72 raise SafeException(_('Expected <key-lookup>, not <%s>') % doc.documentElement.localName)
73 self.info += doc.documentElement.childNodes
74 except Exception as ex:
75 doc = minidom.parseString('<item vote="bad"/>')
76 root = doc.documentElement
77 root.appendChild(doc.createTextNode(_('Error getting key information: %s') % ex))
78 self.info.append(root)
80 self.blocker = fetch_key_info()
82 class Fetcher(object):
83 """Downloads and stores various things.
84 @ivar config: used to get handler, iface_cache and stores
85 @type config: L{config.Config}
86 @ivar key_info: caches information about GPG keys
87 @type key_info: {str: L{KeyInfoFetcher}}
88 """
89 __slots__ = ['config', 'key_info', '_scheduler', 'external_store']
91 def __init__(self, config):
92 assert config.handler, "API change!"
93 self.config = config
94 self.key_info = {}
95 self._scheduler = None
96 self.external_store = os.environ.get('ZEROINSTALL_EXTERNAL_STORE')
98 @property
99 def handler(self):
100 return self.config.handler
102 @property
103 def scheduler(self):
104 if self._scheduler is None:
105 from . import scheduler
106 self._scheduler = scheduler.DownloadScheduler()
107 return self._scheduler
109 # (force is deprecated and ignored)
110 @tasks.async
111 def cook(self, required_digest, recipe, stores, force = False, impl_hint = None):
112 """Follow a Recipe.
113 @param impl_hint: the Implementation this is for (if any) as a hint for the GUI
114 @see: L{download_impl} uses this method when appropriate"""
115 # Maybe we're taking this metaphor too far?
117 # Start a download for each ingredient
118 blockers = []
119 steps = []
120 for stepdata in recipe.steps:
121 cls = StepRunner.class_for(stepdata)
122 step = cls(stepdata, impl_hint=impl_hint)
123 step.prepare(self, blockers)
124 steps.append(step)
126 while blockers:
127 yield blockers
128 tasks.check(blockers)
129 blockers = [b for b in blockers if not b.happened]
132 if self.external_store:
133 # Note: external_store will not yet work with non-<archive> steps.
134 streams = [step.stream for step in steps]
135 self._add_to_external_store(required_digest, recipe.steps, streams)
136 else:
137 # Create an empty directory for the new implementation
138 store = stores.stores[0]
139 tmpdir = store.get_tmp_dir_for(required_digest)
140 try:
141 # Unpack each of the downloaded archives into it in turn
142 for step in steps:
143 step.apply(tmpdir)
144 # Check that the result is correct and store it in the cache
145 store.check_manifest_and_rename(required_digest, tmpdir)
146 tmpdir = None
147 finally:
148 # If unpacking fails, remove the temporary directory
149 if tmpdir is not None:
150 support.ro_rmtree(tmpdir)
152 def _get_mirror_url(self, feed_url, resource):
153 """Return the URL of a mirror for this feed."""
154 if self.config.mirror is None:
155 return None
156 if support.urlparse(feed_url).hostname == 'localhost':
157 return None
158 return '%s/%s/%s' % (self.config.mirror, _get_feed_dir(feed_url), resource)
160 def get_feed_mirror(self, url):
161 """Return the URL of a mirror for this feed."""
162 return self._get_mirror_url(url, 'latest.xml')
164 def _get_archive_mirror(self, source):
165 if self.config.mirror is None:
166 return None
167 if support.urlparse(source.url).hostname == 'localhost':
168 return None
169 if sys.version_info[0] > 2:
170 from urllib.parse import quote
171 else:
172 from urllib import quote
173 return '{mirror}/archive/{archive}'.format(
174 mirror = self.config.mirror,
175 archive = quote(source.url.replace('/', '#'), safe = ''))
177 def _get_impl_mirror(self, impl):
178 return self._get_mirror_url(impl.feed.url, 'impl/' + _escape_slashes(impl.id))
180 @tasks.async
181 def get_packagekit_feed(self, feed_url):
182 """Send a query to PackageKit (if available) for information about this package.
183 On success, the result is added to iface_cache.
185 assert feed_url.startswith('distribution:'), feed_url
186 master_feed = self.config.iface_cache.get_feed(feed_url.split(':', 1)[1])
187 if master_feed:
188 fetch = self.config.iface_cache.distro.fetch_candidates(master_feed)
189 if fetch:
190 yield fetch
191 tasks.check(fetch)
193 # Force feed to be regenerated with the new information
194 self.config.iface_cache.get_feed(feed_url, force = True)
196 def download_and_import_feed(self, feed_url, iface_cache = None):
197 """Download the feed, download any required keys, confirm trust if needed and import.
198 @param feed_url: the feed to be downloaded
199 @type feed_url: str
200 @param iface_cache: (deprecated)"""
201 from .download import DownloadAborted
203 assert iface_cache is None or iface_cache is self.config.iface_cache
205 self.config.iface_cache.mark_as_checking(feed_url)
207 logger.debug(_("download_and_import_feed %(url)s"), {'url': feed_url})
208 assert not os.path.isabs(feed_url)
210 if feed_url.startswith('distribution:'):
211 return self.get_packagekit_feed(feed_url)
213 primary = self._download_and_import_feed(feed_url, use_mirror = False)
215 @tasks.named_async("monitor feed downloads for " + feed_url)
216 def wait_for_downloads(primary):
217 # Download just the upstream feed, unless it takes too long...
218 timeout = tasks.TimeoutBlocker(5, 'Mirror timeout') # 5 seconds
220 yield primary, timeout
221 tasks.check(timeout)
223 try:
224 tasks.check(primary)
225 if primary.happened:
226 return # OK, primary succeeded!
227 # OK, maybe it's just being slow...
228 logger.info("Feed download from %s is taking a long time.", feed_url)
229 primary_ex = None
230 except NoTrustedKeys as ex:
231 raise # Don't bother trying the mirror if we have a trust problem
232 except ReplayAttack as ex:
233 raise # Don't bother trying the mirror if we have a replay attack
234 except DownloadAborted as ex:
235 raise # Don't bother trying the mirror if the user cancelled
236 except SafeException as ex:
237 # Primary failed
238 primary = None
239 primary_ex = ex
240 logger.warn(_("Feed download from %(url)s failed: %(exception)s"), {'url': feed_url, 'exception': ex})
242 # Start downloading from mirror...
243 mirror = self._download_and_import_feed(feed_url, use_mirror = True)
245 # Wait until both mirror and primary tasks are complete...
246 while True:
247 blockers = list(filter(None, [primary, mirror]))
248 if not blockers:
249 break
250 yield blockers
252 if primary:
253 try:
254 tasks.check(primary)
255 if primary.happened:
256 primary = None
257 # No point carrying on with the mirror once the primary has succeeded
258 if mirror:
259 logger.info(_("Primary feed download succeeded; aborting mirror download for %s") % feed_url)
260 mirror.dl.abort()
261 except SafeException as ex:
262 primary = None
263 primary_ex = ex
264 logger.info(_("Feed download from %(url)s failed; still trying mirror: %(exception)s"), {'url': feed_url, 'exception': ex})
266 if mirror:
267 try:
268 tasks.check(mirror)
269 if mirror.happened:
270 mirror = None
271 if primary_ex:
272 # We already warned; no need to raise an exception too,
273 # as the mirror download succeeded.
274 primary_ex = None
275 except ReplayAttack as ex:
276 logger.info(_("Version from mirror is older than cached version; ignoring it: %s"), ex)
277 mirror = None
278 primary_ex = None
279 except SafeException as ex:
280 logger.info(_("Mirror download failed: %s"), ex)
281 mirror = None
283 if primary_ex:
284 raise primary_ex
286 return wait_for_downloads(primary)
288 def _download_and_import_feed(self, feed_url, use_mirror):
289 """Download and import a feed.
290 @param use_mirror: False to use primary location; True to use mirror."""
291 if use_mirror:
292 url = self.get_feed_mirror(feed_url)
293 if url is None: return None
294 logger.info(_("Trying mirror server for feed %s") % feed_url)
295 else:
296 url = feed_url
298 dl = self.download_url(url, hint = feed_url)
299 stream = dl.tempfile
301 @tasks.named_async("fetch_feed " + url)
302 def fetch_feed():
303 yield dl.downloaded
304 tasks.check(dl.downloaded)
306 pending = PendingFeed(feed_url, stream)
308 if use_mirror:
309 # If we got the feed from a mirror, get the key from there too
310 key_mirror = self.config.mirror + '/keys/'
311 else:
312 key_mirror = None
314 keys_downloaded = tasks.Task(pending.download_keys(self, feed_hint = feed_url, key_mirror = key_mirror), _("download keys for %s") % feed_url)
315 yield keys_downloaded.finished
316 tasks.check(keys_downloaded.finished)
318 if not self.config.iface_cache.update_feed_if_trusted(pending.url, pending.sigs, pending.new_xml):
319 blocker = self.config.trust_mgr.confirm_keys(pending)
320 if blocker:
321 yield blocker
322 tasks.check(blocker)
323 if not self.config.iface_cache.update_feed_if_trusted(pending.url, pending.sigs, pending.new_xml):
324 raise NoTrustedKeys(_("No signing keys trusted; not importing"))
326 task = fetch_feed()
327 task.dl = dl
328 return task
330 def fetch_key_info(self, fingerprint):
331 try:
332 return self.key_info[fingerprint]
333 except KeyError:
334 self.key_info[fingerprint] = key_info = KeyInfoFetcher(self,
335 self.config.key_info_server, fingerprint)
336 return key_info
338 # (force is deprecated and ignored)
339 def download_impl(self, impl, retrieval_method, stores, force = False):
340 """Download an implementation.
341 @param impl: the selected implementation
342 @type impl: L{model.ZeroInstallImplementation}
343 @param retrieval_method: a way of getting the implementation (e.g. an Archive or a Recipe)
344 @type retrieval_method: L{model.RetrievalMethod}
345 @param stores: where to store the downloaded implementation
346 @type stores: L{zerostore.Stores}
347 @rtype: L{tasks.Blocker}"""
348 assert impl
349 assert retrieval_method
351 if isinstance(retrieval_method, DistributionSource):
352 return retrieval_method.install(self.handler)
354 from zeroinstall.zerostore import manifest, parse_algorithm_digest_pair
355 best = None
356 for digest in impl.digests:
357 alg_name, digest_value = parse_algorithm_digest_pair(digest)
358 alg = manifest.algorithms.get(alg_name, None)
359 if alg and (best is None or best.rating < alg.rating):
360 best = alg
361 required_digest = digest
363 if best is None:
364 if not impl.digests:
365 raise SafeException(_("No <manifest-digest> given for '%(implementation)s' version %(version)s") %
366 {'implementation': impl.feed.get_name(), 'version': impl.get_version()})
367 raise SafeException(_("Unknown digest algorithms '%(algorithms)s' for '%(implementation)s' version %(version)s") %
368 {'algorithms': impl.digests, 'implementation': impl.feed.get_name(), 'version': impl.get_version()})
370 @tasks.async
371 def download_impl(method):
372 original_exception = None
373 while True:
374 try:
375 if isinstance(method, DownloadSource):
376 blocker, stream = self.download_archive(method, impl_hint = impl,
377 may_use_mirror = original_exception is None)
378 yield blocker
379 tasks.check(blocker)
381 stream.seek(0)
382 if self.external_store:
383 self._add_to_external_store(required_digest, [method], [stream])
384 else:
385 self._add_to_cache(required_digest, stores, method, stream)
386 elif isinstance(method, Recipe):
387 blocker = self.cook(required_digest, method, stores, impl_hint = impl)
388 yield blocker
389 tasks.check(blocker)
390 else:
391 raise Exception(_("Unknown download type for '%s'") % method)
392 except download.DownloadError as ex:
393 if original_exception:
394 logger.info("Error from mirror: %s", ex)
395 raise original_exception
396 else:
397 original_exception = ex
398 mirror_url = self._get_impl_mirror(impl)
399 if mirror_url is not None:
400 logger.info("%s: trying implementation mirror at %s", ex, mirror_url)
401 method = model.DownloadSource(impl, mirror_url,
402 None, None, type = 'application/x-bzip-compressed-tar')
403 continue # Retry
404 raise
405 break
407 self.handler.impl_added_to_store(impl)
408 return download_impl(retrieval_method)
410 def _add_to_cache(self, required_digest, stores, retrieval_method, stream):
411 assert isinstance(retrieval_method, DownloadSource)
412 stores.add_archive_to_cache(required_digest, stream, retrieval_method.url, retrieval_method.extract,
413 type = retrieval_method.type, start_offset = retrieval_method.start_offset or 0)
415 def _add_to_external_store(self, required_digest, steps, streams):
416 from zeroinstall.zerostore.unpack import type_from_url
418 # combine archive path, extract directory and MIME type arguments in an alternating fashion
419 paths = map(lambda stream: stream.name, streams)
420 extracts = map(lambda step: step.extract or "", steps)
421 types = map(lambda step: step.type or type_from_url(step.url), steps)
422 args = [None]*(len(paths)+len(extracts)+len(types))
423 args[::3] = paths
424 args[1::3] = extracts
425 args[2::3] = types
427 # close file handles to allow external processes access
428 for stream in streams:
429 stream.close()
431 # delegate extracting archives to external tool
432 import subprocess
433 subprocess.call([self.external_store, "add", required_digest] + args)
435 # delete temp files
436 for path in paths:
437 os.remove(path)
439 # (force is deprecated and ignored)
440 def download_archive(self, download_source, force = False, impl_hint = None, may_use_mirror = False):
441 """Fetch an archive. You should normally call L{download_impl}
442 instead, since it handles other kinds of retrieval method too."""
443 from zeroinstall.zerostore import unpack
445 url = download_source.url
446 if not (url.startswith('http:') or url.startswith('https:') or url.startswith('ftp:')):
447 raise SafeException(_("Unknown scheme in download URL '%s'") % url)
449 mime_type = download_source.type
450 if not mime_type:
451 mime_type = unpack.type_from_url(download_source.url)
452 if not mime_type:
453 raise SafeException(_("No 'type' attribute on archive, and I can't guess from the name (%s)") % download_source.url)
454 if not self.external_store:
455 unpack.check_type_ok(mime_type)
457 if may_use_mirror:
458 mirror = self._get_archive_mirror(download_source)
459 else:
460 mirror = None
462 dl = self.download_url(download_source.url, hint = impl_hint, mirror_url = mirror)
463 if download_source.size is not None:
464 dl.expected_size = download_source.size + (download_source.start_offset or 0)
465 # (else don't know sizes for mirrored archives)
466 return (dl.downloaded, dl.tempfile)
468 # (force is deprecated and ignored)
469 def download_icon(self, interface, force = False):
470 """Download an icon for this interface and add it to the
471 icon cache. If the interface has no icon do nothing.
472 @return: the task doing the import, or None
473 @rtype: L{tasks.Task}"""
474 logger.debug("download_icon %(interface)s", {'interface': interface})
476 modification_time = None
477 existing_icon = self.config.iface_cache.get_icon_path(interface)
478 if existing_icon:
479 file_mtime = os.stat(existing_icon).st_mtime
480 from email.utils import formatdate
481 modification_time = formatdate(timeval = file_mtime, localtime = False, usegmt = True)
483 # Find a suitable icon to download
484 for icon in interface.get_metadata(XMLNS_IFACE, 'icon'):
485 type = icon.getAttribute('type')
486 if type != 'image/png':
487 logger.debug(_('Skipping non-PNG icon'))
488 continue
489 source = icon.getAttribute('href')
490 if source:
491 break
492 logger.warn(_('Missing "href" attribute on <icon> in %s'), interface)
493 else:
494 logger.info(_('No PNG icons found in %s'), interface)
495 return
497 dl = self.download_url(source, hint = interface, modification_time = modification_time)
499 @tasks.async
500 def download_and_add_icon():
501 stream = dl.tempfile
502 yield dl.downloaded
503 try:
504 tasks.check(dl.downloaded)
505 if dl.unmodified: return
506 stream.seek(0)
508 import shutil, tempfile
509 icons_cache = basedir.save_cache_path(config_site, 'interface_icons')
511 tmp_file = tempfile.NamedTemporaryFile(dir = icons_cache, delete = False)
512 shutil.copyfileobj(stream, tmp_file)
513 tmp_file.close()
515 icon_file = os.path.join(icons_cache, escape(interface.uri))
516 portable_rename(tmp_file.name, icon_file)
517 except Exception as ex:
518 self.handler.report_error(ex)
519 finally:
520 stream.close()
522 return download_and_add_icon()
524 def download_impls(self, implementations, stores):
525 """Download the given implementations, choosing a suitable retrieval method for each.
526 If any of the retrieval methods are DistributionSources and
527 need confirmation, handler.confirm is called to check that the
528 installation should proceed.
530 unsafe_impls = []
532 to_download = []
533 for impl in implementations:
534 logger.debug(_("start_downloading_impls: for %(feed)s get %(implementation)s"), {'feed': impl.feed, 'implementation': impl})
535 source = self.get_best_source(impl)
536 if not source:
537 raise SafeException(_("Implementation %(implementation_id)s of interface %(interface)s"
538 " cannot be downloaded (no download locations given in "
539 "interface!)") % {'implementation_id': impl.id, 'interface': impl.feed.get_name()})
540 to_download.append((impl, source))
542 if isinstance(source, DistributionSource) and source.needs_confirmation:
543 unsafe_impls.append(source.package_id)
545 @tasks.async
546 def download_impls():
547 if unsafe_impls:
548 confirm = self.handler.confirm_install(_('The following components need to be installed using native packages. '
549 'These come from your distribution, and should therefore be trustworthy, but they also '
550 'run with extra privileges. In particular, installing them may run extra services on your '
551 'computer or affect other users. You may be asked to enter a password to confirm. The '
552 'packages are:\n\n') + ('\n'.join('- ' + x for x in unsafe_impls)))
553 yield confirm
554 tasks.check(confirm)
556 blockers = []
558 for impl, source in to_download:
559 blockers.append(self.download_impl(impl, source, stores))
561 # Record the first error log the rest
562 error = []
563 def dl_error(ex, tb = None):
564 if error:
565 self.handler.report_error(ex)
566 else:
567 error.append((ex, tb))
568 while blockers:
569 yield blockers
570 tasks.check(blockers, dl_error)
572 blockers = [b for b in blockers if not b.happened]
573 if error:
574 from zeroinstall import support
575 support.raise_with_traceback(*error[0])
577 if not to_download:
578 return None
580 return download_impls()
582 def get_best_source(self, impl):
583 """Return the best download source for this implementation.
584 @rtype: L{model.RetrievalMethod}"""
585 if impl.download_sources:
586 return impl.download_sources[0]
587 return None
589 def download_url(self, url, hint = None, modification_time = None, expected_size = None, mirror_url = None):
590 """The most low-level method here; just download a raw URL.
591 @param url: the location to download from
592 @param hint: user-defined data to store on the Download (e.g. used by the GUI)
593 @param modification_time: don't download unless newer than this
594 @param mirror_url: an altertive URL to try if this one fails
595 @type mirror_url: str
596 @rtype: L{download.Download}
597 @since: 1.5
599 if self.handler.dry_run:
600 raise NeedDownload(url)
602 dl = download.Download(url, hint = hint, modification_time = modification_time, expected_size = expected_size, auto_delete = not self.external_store)
603 dl.mirror = mirror_url
604 self.handler.monitor_download(dl)
605 dl.downloaded = self.scheduler.download(dl)
606 return dl
608 class StepRunner(object):
609 """The base class of all step runners.
610 @since: 1.10"""
612 def __init__(self, stepdata, impl_hint):
613 self.stepdata = stepdata
614 self.impl_hint = impl_hint
616 def prepare(self, fetcher, blockers):
617 pass
619 @classmethod
620 def class_for(cls, model):
621 for subcls in cls.__subclasses__():
622 if subcls.model_type == type(model):
623 return subcls
624 assert False, "Couldn't find step runner for %s" % (type(model),)
626 class RenameStepRunner(StepRunner):
627 """A step runner for the <rename> step.
628 @since: 1.10"""
630 model_type = model.RenameStep
632 def apply(self, basedir):
633 source = native_path_within_base(basedir, self.stepdata.source)
634 dest = native_path_within_base(basedir, self.stepdata.dest)
635 os.rename(source, dest)
637 class DownloadStepRunner(StepRunner):
638 """A step runner for the <archive> step.
639 @since: 1.10"""
641 model_type = model.DownloadSource
643 def prepare(self, fetcher, blockers):
644 self.blocker, self.stream = fetcher.download_archive(self.stepdata, impl_hint = self.impl_hint, may_use_mirror = True)
645 assert self.stream
646 blockers.append(self.blocker)
648 def apply(self, basedir):
649 from zeroinstall.zerostore import unpack
650 assert self.blocker.happened
651 unpack.unpack_archive_over(self.stepdata.url, self.stream, basedir,
652 extract = self.stepdata.extract,
653 type=self.stepdata.type,
654 start_offset = self.stepdata.start_offset or 0)
656 def native_path_within_base(base, crossplatform_path):
657 """Takes a cross-platform relative path (i.e using forward slashes, even on windows)
658 and returns the absolute, platform-native version of the path.
659 If the path does not resolve to a location within `base`, a SafeError is raised.
660 @since: 1.10
662 assert os.path.isabs(base)
663 if crossplatform_path.startswith("/"):
664 raise SafeException("path %r is not within the base directory" % (crossplatform_path,))
665 native_path = os.path.join(*crossplatform_path.split("/"))
666 fullpath = os.path.realpath(os.path.join(base, native_path))
667 base = os.path.realpath(base)
668 if not fullpath.startswith(base + os.path.sep):
669 raise SafeException("path %r is not within the base directory" % (crossplatform_path,))
670 return fullpath