Added DownloadScheduler
[zeroinstall/solver.git] / zeroinstall / injector / fetch.py
blob0bf6bc5871bfb8650b2b3944d9dd9f7af3287045
1 """
2 Downloads feeds, keys, packages and icons.
3 """
5 # Copyright (C) 2009, Thomas Leonard
6 # See the README file for details, or visit http://0install.net.
8 from zeroinstall import _, NeedDownload
9 import os
10 from logging import info, debug, warn
12 from zeroinstall.support import tasks, basedir
13 from zeroinstall.injector.namespaces import XMLNS_IFACE, config_site
14 from zeroinstall.injector.model import DownloadSource, Recipe, SafeException, escape, DistributionSource
15 from zeroinstall.injector.iface_cache import PendingFeed, ReplayAttack
16 from zeroinstall.injector.handler import NoTrustedKeys
17 from zeroinstall.injector import download
19 def _escape_slashes(path):
20 return path.replace('/', '%23')
22 def _get_feed_dir(feed):
23 """The algorithm from 0mirror."""
24 if '#' in feed:
25 raise SafeException(_("Invalid URL '%s'") % feed)
26 scheme, rest = feed.split('://', 1)
27 assert '/' in rest, "Missing / in %s" % feed
28 domain, rest = rest.split('/', 1)
29 for x in [scheme, domain, rest]:
30 if not x or x.startswith(','):
31 raise SafeException(_("Invalid URL '%s'") % feed)
32 return os.path.join('feeds', scheme, domain, _escape_slashes(rest))
34 class KeyInfoFetcher:
35 """Fetches information about a GPG key from a key-info server.
36 See L{Fetcher.fetch_key_info} for details.
37 @since: 0.42
39 Example:
41 >>> kf = KeyInfoFetcher(fetcher, 'https://server', fingerprint)
42 >>> while True:
43 print kf.info
44 if kf.blocker is None: break
45 print kf.status
46 yield kf.blocker
47 """
48 def __init__(self, fetcher, server, fingerprint):
49 self.fingerprint = fingerprint
50 self.info = []
51 self.blocker = None
53 if server is None: return
55 self.status = _('Fetching key information from %s...') % server
57 dl = fetcher.download_url(server + '/key/' + fingerprint)
59 from xml.dom import minidom
61 @tasks.async
62 def fetch_key_info():
63 try:
64 tempfile = dl.tempfile
65 yield dl.downloaded
66 self.blocker = None
67 tasks.check(dl.downloaded)
68 tempfile.seek(0)
69 doc = minidom.parse(tempfile)
70 if doc.documentElement.localName != 'key-lookup':
71 raise SafeException(_('Expected <key-lookup>, not <%s>') % doc.documentElement.localName)
72 self.info += doc.documentElement.childNodes
73 except Exception as ex:
74 doc = minidom.parseString('<item vote="bad"/>')
75 root = doc.documentElement
76 root.appendChild(doc.createTextNode(_('Error getting key information: %s') % ex))
77 self.info.append(root)
79 self.blocker = fetch_key_info()
81 class Fetcher(object):
82 """Downloads and stores various things.
83 @ivar config: used to get handler, iface_cache and stores
84 @type config: L{config.Config}
85 @ivar key_info: caches information about GPG keys
86 @type key_info: {str: L{KeyInfoFetcher}}
87 """
88 __slots__ = ['config', 'key_info', '_scheduler']
90 def __init__(self, config):
91 assert config.handler, "API change!"
92 self.config = config
93 self.key_info = {}
94 self._scheduler = None
96 @property
97 def handler(self):
98 return self.config.handler
100 @property
101 def scheduler(self):
102 if self._scheduler is None:
103 from . import scheduler
104 self._scheduler = scheduler.DownloadScheduler()
105 return self._scheduler
107 @tasks.async
108 def cook(self, required_digest, recipe, stores, force = False, impl_hint = None):
109 """Follow a Recipe.
110 @param impl_hint: the Implementation this is for (if any) as a hint for the GUI
111 @see: L{download_impl} uses this method when appropriate"""
112 # Maybe we're taking this metaphor too far?
114 # Start downloading all the ingredients.
115 streams = {} # Streams collected from successful downloads
117 # Start a download for each ingredient
118 blockers = []
119 for step in recipe.steps:
120 blocker, stream = self.download_archive(step, force = force, impl_hint = impl_hint)
121 assert stream
122 blockers.append(blocker)
123 streams[step] = stream
125 while blockers:
126 yield blockers
127 tasks.check(blockers)
128 blockers = [b for b in blockers if not b.happened]
130 from zeroinstall.zerostore import unpack
132 # Create an empty directory for the new implementation
133 store = stores.stores[0]
134 tmpdir = store.get_tmp_dir_for(required_digest)
135 try:
136 # Unpack each of the downloaded archives into it in turn
137 for step in recipe.steps:
138 stream = streams[step]
139 stream.seek(0)
140 unpack.unpack_archive_over(step.url, stream, tmpdir,
141 extract = step.extract,
142 type = step.type,
143 start_offset = step.start_offset or 0)
144 # Check that the result is correct and store it in the cache
145 store.check_manifest_and_rename(required_digest, tmpdir)
146 tmpdir = None
147 finally:
148 # If unpacking fails, remove the temporary directory
149 if tmpdir is not None:
150 from zeroinstall import support
151 support.ro_rmtree(tmpdir)
153 def get_feed_mirror(self, url):
154 """Return the URL of a mirror for this feed."""
155 if self.config.feed_mirror is None:
156 return None
157 import urlparse
158 if urlparse.urlparse(url).hostname == 'localhost':
159 return None
160 return '%s/%s/latest.xml' % (self.config.feed_mirror, _get_feed_dir(url))
162 @tasks.async
163 def get_packagekit_feed(self, feed_url):
164 """Send a query to PackageKit (if available) for information about this package.
165 On success, the result is added to iface_cache.
167 assert feed_url.startswith('distribution:'), feed_url
168 master_feed = self.config.iface_cache.get_feed(feed_url.split(':', 1)[1])
169 if master_feed:
170 fetch = self.config.iface_cache.distro.fetch_candidates(master_feed)
171 if fetch:
172 yield fetch
173 tasks.check(fetch)
175 # Force feed to be regenerated with the new information
176 self.config.iface_cache.get_feed(feed_url, force = True)
178 def download_and_import_feed(self, feed_url, iface_cache = None):
179 """Download the feed, download any required keys, confirm trust if needed and import.
180 @param feed_url: the feed to be downloaded
181 @type feed_url: str
182 @param iface_cache: (deprecated)"""
183 from .download import DownloadAborted
185 assert iface_cache is None or iface_cache is self.config.iface_cache
187 self.config.iface_cache.mark_as_checking(feed_url)
189 debug(_("download_and_import_feed %(url)s"), {'url': feed_url})
190 assert not os.path.isabs(feed_url)
192 if feed_url.startswith('distribution:'):
193 return self.get_packagekit_feed(feed_url)
195 primary = self._download_and_import_feed(feed_url, use_mirror = False)
197 @tasks.named_async("monitor feed downloads for " + feed_url)
198 def wait_for_downloads(primary):
199 # Download just the upstream feed, unless it takes too long...
200 timeout = tasks.TimeoutBlocker(5, 'Mirror timeout') # 5 seconds
202 yield primary, timeout
203 tasks.check(timeout)
205 try:
206 tasks.check(primary)
207 if primary.happened:
208 return # OK, primary succeeded!
209 # OK, maybe it's just being slow...
210 info("Feed download from %s is taking a long time.", feed_url)
211 primary_ex = None
212 except NoTrustedKeys as ex:
213 raise # Don't bother trying the mirror if we have a trust problem
214 except ReplayAttack as ex:
215 raise # Don't bother trying the mirror if we have a replay attack
216 except DownloadAborted as ex:
217 raise # Don't bother trying the mirror if the user cancelled
218 except SafeException as ex:
219 # Primary failed
220 primary = None
221 primary_ex = ex
222 warn(_("Feed download from %(url)s failed: %(exception)s"), {'url': feed_url, 'exception': ex})
224 # Start downloading from mirror...
225 mirror = self._download_and_import_feed(feed_url, use_mirror = True)
227 # Wait until both mirror and primary tasks are complete...
228 while True:
229 blockers = filter(None, [primary, mirror])
230 if not blockers:
231 break
232 yield blockers
234 if primary:
235 try:
236 tasks.check(primary)
237 if primary.happened:
238 primary = None
239 # No point carrying on with the mirror once the primary has succeeded
240 if mirror:
241 info(_("Primary feed download succeeded; aborting mirror download for %s") % feed_url)
242 mirror.dl.abort()
243 except SafeException as ex:
244 primary = None
245 primary_ex = ex
246 info(_("Feed download from %(url)s failed; still trying mirror: %(exception)s"), {'url': feed_url, 'exception': ex})
248 if mirror:
249 try:
250 tasks.check(mirror)
251 if mirror.happened:
252 mirror = None
253 if primary_ex:
254 # We already warned; no need to raise an exception too,
255 # as the mirror download succeeded.
256 primary_ex = None
257 except ReplayAttack as ex:
258 info(_("Version from mirror is older than cached version; ignoring it: %s"), ex)
259 mirror = None
260 primary_ex = None
261 except SafeException as ex:
262 info(_("Mirror download failed: %s"), ex)
263 mirror = None
265 if primary_ex:
266 raise primary_ex
268 return wait_for_downloads(primary)
270 def _download_and_import_feed(self, feed_url, use_mirror):
271 """Download and import a feed.
272 @param use_mirror: False to use primary location; True to use mirror."""
273 if use_mirror:
274 url = self.get_feed_mirror(feed_url)
275 if url is None: return None
276 info(_("Trying mirror server for feed %s") % feed_url)
277 else:
278 url = feed_url
280 dl = self.download_url(url, hint = feed_url)
281 stream = dl.tempfile
283 @tasks.named_async("fetch_feed " + url)
284 def fetch_feed():
285 yield dl.downloaded
286 tasks.check(dl.downloaded)
288 pending = PendingFeed(feed_url, stream)
290 if use_mirror:
291 # If we got the feed from a mirror, get the key from there too
292 key_mirror = self.config.feed_mirror + '/keys/'
293 else:
294 key_mirror = None
296 keys_downloaded = tasks.Task(pending.download_keys(self, feed_hint = feed_url, key_mirror = key_mirror), _("download keys for %s") % feed_url)
297 yield keys_downloaded.finished
298 tasks.check(keys_downloaded.finished)
300 if not self.config.iface_cache.update_feed_if_trusted(pending.url, pending.sigs, pending.new_xml):
301 blocker = self.config.trust_mgr.confirm_keys(pending)
302 if blocker:
303 yield blocker
304 tasks.check(blocker)
305 if not self.config.iface_cache.update_feed_if_trusted(pending.url, pending.sigs, pending.new_xml):
306 raise NoTrustedKeys(_("No signing keys trusted; not importing"))
308 task = fetch_feed()
309 task.dl = dl
310 return task
312 def fetch_key_info(self, fingerprint):
313 try:
314 return self.key_info[fingerprint]
315 except KeyError:
316 self.key_info[fingerprint] = key_info = KeyInfoFetcher(self,
317 self.config.key_info_server, fingerprint)
318 return key_info
320 def download_impl(self, impl, retrieval_method, stores, force = False):
321 """Download an implementation.
322 @param impl: the selected implementation
323 @type impl: L{model.ZeroInstallImplementation}
324 @param retrieval_method: a way of getting the implementation (e.g. an Archive or a Recipe)
325 @type retrieval_method: L{model.RetrievalMethod}
326 @param stores: where to store the downloaded implementation
327 @type stores: L{zerostore.Stores}
328 @param force: whether to abort and restart an existing download
329 @rtype: L{tasks.Blocker}"""
330 assert impl
331 assert retrieval_method
333 if isinstance(retrieval_method, DistributionSource):
334 return retrieval_method.install(self.handler)
336 from zeroinstall.zerostore import manifest
337 best = None
338 for digest in impl.digests:
339 alg_name = digest.split('=', 1)[0]
340 alg = manifest.algorithms.get(alg_name, None)
341 if alg and (best is None or best.rating < alg.rating):
342 best = alg
343 required_digest = digest
345 if best is None:
346 if not impl.digests:
347 raise SafeException(_("No <manifest-digest> given for '%(implementation)s' version %(version)s") %
348 {'implementation': impl.feed.get_name(), 'version': impl.get_version()})
349 raise SafeException(_("Unknown digest algorithms '%(algorithms)s' for '%(implementation)s' version %(version)s") %
350 {'algorithms': impl.digests, 'implementation': impl.feed.get_name(), 'version': impl.get_version()})
352 @tasks.async
353 def download_impl():
354 if isinstance(retrieval_method, DownloadSource):
355 blocker, stream = self.download_archive(retrieval_method, force = force, impl_hint = impl)
356 yield blocker
357 tasks.check(blocker)
359 stream.seek(0)
360 self._add_to_cache(required_digest, stores, retrieval_method, stream)
361 elif isinstance(retrieval_method, Recipe):
362 blocker = self.cook(required_digest, retrieval_method, stores, force, impl_hint = impl)
363 yield blocker
364 tasks.check(blocker)
365 else:
366 raise Exception(_("Unknown download type for '%s'") % retrieval_method)
368 self.handler.impl_added_to_store(impl)
369 return download_impl()
371 def _add_to_cache(self, required_digest, stores, retrieval_method, stream):
372 assert isinstance(retrieval_method, DownloadSource)
373 stores.add_archive_to_cache(required_digest, stream, retrieval_method.url, retrieval_method.extract,
374 type = retrieval_method.type, start_offset = retrieval_method.start_offset or 0)
376 # (force is deprecated and ignored)
377 def download_archive(self, download_source, force = False, impl_hint = None):
378 """Fetch an archive. You should normally call L{download_impl}
379 instead, since it handles other kinds of retrieval method too."""
380 from zeroinstall.zerostore import unpack
382 url = download_source.url
383 if not (url.startswith('http:') or url.startswith('https:') or url.startswith('ftp:')):
384 raise SafeException(_("Unknown scheme in download URL '%s'") % url)
386 mime_type = download_source.type
387 if not mime_type:
388 mime_type = unpack.type_from_url(download_source.url)
389 if not mime_type:
390 raise SafeException(_("No 'type' attribute on archive, and I can't guess from the name (%s)") % download_source.url)
391 unpack.check_type_ok(mime_type)
392 dl = self.download_url(download_source.url, hint = impl_hint)
393 dl.expected_size = download_source.size + (download_source.start_offset or 0)
394 return (dl.downloaded, dl.tempfile)
396 # (force is deprecated and ignored)
397 def download_icon(self, interface, force = False):
398 """Download an icon for this interface and add it to the
399 icon cache. If the interface has no icon do nothing.
400 @return: the task doing the import, or None
401 @rtype: L{tasks.Task}"""
402 debug("download_icon %(interface)s", {'interface': interface})
404 modification_time = None
405 existing_icon = self.config.iface_cache.get_icon_path(interface)
406 if existing_icon:
407 file_mtime = os.stat(existing_icon).st_mtime
408 from email.utils import formatdate
409 modification_time = formatdate(timeval = file_mtime, localtime = False, usegmt = True)
411 # Find a suitable icon to download
412 for icon in interface.get_metadata(XMLNS_IFACE, 'icon'):
413 type = icon.getAttribute('type')
414 if type != 'image/png':
415 debug(_('Skipping non-PNG icon'))
416 continue
417 source = icon.getAttribute('href')
418 if source:
419 break
420 warn(_('Missing "href" attribute on <icon> in %s'), interface)
421 else:
422 info(_('No PNG icons found in %s'), interface)
423 return
425 dl = self.download_url(source, hint = interface, modification_time = modification_time)
427 @tasks.async
428 def download_and_add_icon():
429 stream = dl.tempfile
430 yield dl.downloaded
431 try:
432 tasks.check(dl.downloaded)
433 if dl.unmodified: return
434 stream.seek(0)
436 import shutil
437 icons_cache = basedir.save_cache_path(config_site, 'interface_icons')
438 icon_file = open(os.path.join(icons_cache, escape(interface.uri)), 'w')
439 shutil.copyfileobj(stream, icon_file)
440 except Exception as ex:
441 self.handler.report_error(ex)
443 return download_and_add_icon()
445 def download_impls(self, implementations, stores):
446 """Download the given implementations, choosing a suitable retrieval method for each.
447 If any of the retrieval methods are DistributionSources and
448 need confirmation, handler.confirm is called to check that the
449 installation should proceed.
451 unsafe_impls = []
453 to_download = []
454 for impl in implementations:
455 debug(_("start_downloading_impls: for %(feed)s get %(implementation)s"), {'feed': impl.feed, 'implementation': impl})
456 source = self.get_best_source(impl)
457 if not source:
458 raise SafeException(_("Implementation %(implementation_id)s of interface %(interface)s"
459 " cannot be downloaded (no download locations given in "
460 "interface!)") % {'implementation_id': impl.id, 'interface': impl.feed.get_name()})
461 to_download.append((impl, source))
463 if isinstance(source, DistributionSource) and source.needs_confirmation:
464 unsafe_impls.append(source.package_id)
466 @tasks.async
467 def download_impls():
468 if unsafe_impls:
469 confirm = self.handler.confirm_install(_('The following components need to be installed using native packages. '
470 'These come from your distribution, and should therefore be trustworthy, but they also '
471 'run with extra privileges. In particular, installing them may run extra services on your '
472 'computer or affect other users. You may be asked to enter a password to confirm. The '
473 'packages are:\n\n') + ('\n'.join('- ' + x for x in unsafe_impls)))
474 yield confirm
475 tasks.check(confirm)
477 blockers = []
479 for impl, source in to_download:
480 blockers.append(self.download_impl(impl, source, stores))
482 # Record the first error log the rest
483 error = []
484 def dl_error(ex, tb = None):
485 if error:
486 self.handler.report_error(ex)
487 else:
488 error.append((ex, tb))
489 while blockers:
490 yield blockers
491 tasks.check(blockers, dl_error)
493 blockers = [b for b in blockers if not b.happened]
494 if error:
495 from zeroinstall import support
496 support.raise_with_traceback(*error[0])
498 if not to_download:
499 return None
501 return download_impls()
503 def get_best_source(self, impl):
504 """Return the best download source for this implementation.
505 @rtype: L{model.RetrievalMethod}"""
506 if impl.download_sources:
507 return impl.download_sources[0]
508 return None
510 def download_url(self, url, hint = None, modification_time = None, expected_size = None):
511 """The most low-level method here; just download a raw URL.
512 @param url: the location to download from
513 @param hint: user-defined data to store on the Download (e.g. used by the GUI)
514 @param modification_time: don't download unless newer than this
515 @rtype: L{download.Download}
516 @since: 1.5
518 if self.handler.dry_run:
519 raise NeedDownload(url)
521 dl = download.Download(url, hint = hint, modification_time = modification_time, expected_size = expected_size)
522 self.handler.monitor_download(dl)
523 dl.downloaded = self.scheduler.download(dl)
524 return dl