Auto-throw exceptions when resuming tasks
[zeroinstall.git] / zeroinstall / injector / iface_cache.py
blob4421079b1debb23849f235eec5e631d8bce478be
1 """
2 Manages the feed cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
7 """
8 # Copyright (C) 2009, Thomas Leonard
9 # See the README file for details, or visit http://0install.net.
11 # Note:
13 # We need to know the modification time of each interface, because we refuse
14 # to update to an older version (this prevents an attack where the attacker
15 # sends back an old version which is correctly signed but has a known bug).
17 # The way we store this is a bit complicated due to backward compatibility:
19 # - GPG-signed interfaces have their signatures removed and a last-modified
20 # attribute is stored containing the date from the signature.
22 # - XML-signed interfaces are stored unmodified with their signatures. The
23 # date is extracted from the signature when needed.
25 # - Older versions used to add the last-modified attribute even to files
26 # with XML signatures - these files therefore have invalid signatures and
27 # we extract from the attribute for these.
29 # Eventually, support for the first and third cases will be removed.
31 import os, sys, time
32 from logging import debug, info, warn
33 from cStringIO import StringIO
35 from zeroinstall import _
36 from zeroinstall.support import basedir
37 from zeroinstall.injector import reader, model
38 from zeroinstall.injector.namespaces import config_site, config_prog
39 from zeroinstall.injector.model import Interface, escape, unescape
40 from zeroinstall import SafeException
42 # If we started a check within this period, don't start another one:
43 FAILED_CHECK_DELAY = 60 * 60 # 1 Hour
45 def _pretty_time(t):
46 assert isinstance(t, (int, long)), t
47 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
49 class ReplayAttack(SafeException):
50 """Attempt to import a feed that's older than the one in the cache."""
51 pass
53 class PendingFeed(object):
54 """A feed that has been downloaded but not yet added to the interface cache.
55 Feeds remain in this state until the user confirms that they trust at least
56 one of the signatures.
57 @ivar url: URL for the feed
58 @type url: str
59 @ivar signed_data: the untrusted data
60 @type signed_data: stream
61 @ivar sigs: signatures extracted from signed_data
62 @type sigs: [L{gpg.Signature}]
63 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
64 @type new_xml: str
65 @since: 0.25"""
66 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
68 def __init__(self, url, signed_data):
69 """Downloaded data is a GPG-signed message.
70 @param url: the URL of the downloaded feed
71 @type url: str
72 @param signed_data: the downloaded data (not yet trusted)
73 @type signed_data: stream
74 @raise SafeException: if the data is not signed, and logs the actual data"""
75 self.url = url
76 self.signed_data = signed_data
77 self.recheck()
79 def download_keys(self, fetcher, feed_hint = None, key_mirror = None):
80 """Download any required GPG keys not already on our keyring.
81 When all downloads are done (successful or otherwise), add any new keys
82 to the keyring, L{recheck}.
83 @param fetcher: fetcher to manage the download (was Handler before version 1.5)
84 @type fetcher: L{fetch.Fetcher}
85 @param key_mirror: URL of directory containing keys, or None to use feed's directory
86 @type key_mirror: str
87 """
88 downloads = {}
89 blockers = []
90 for x in self.sigs:
91 key_id = x.need_key()
92 if key_id:
93 import urlparse
94 key_url = urlparse.urljoin(key_mirror or self.url, '%s.gpg' % key_id)
95 info(_("Fetching key from %s"), key_url)
96 dl = fetcher.download_url(key_url, hint = feed_hint)
97 downloads[dl.downloaded] = (dl, dl.tempfile)
98 blockers.append(dl.downloaded)
100 exception = None
101 any_success = False
103 from zeroinstall.support import tasks
105 while blockers:
106 try:
107 yield blockers
108 except:
109 pass
111 old_blockers = blockers
112 blockers = []
114 for b in old_blockers:
115 try:
116 tasks.check(b)
117 if b.happened:
118 dl, stream = downloads[b]
119 stream.seek(0)
120 self._downloaded_key(stream)
121 any_success = True
122 else:
123 blockers.append(b)
124 except Exception:
125 _type, exception, tb = sys.exc_info()
126 warn(_("Failed to import key for '%(url)s': %(exception)s"), {'url': self.url, 'exception': str(exception)})
128 if exception and not any_success:
129 raise exception, None, tb
131 self.recheck()
133 def _downloaded_key(self, stream):
134 import shutil, tempfile
135 from zeroinstall.injector import gpg
137 info(_("Importing key for feed '%s'"), self.url)
139 # Python2.4: can't call fileno() on stream, so save to tmp file instead
140 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
141 try:
142 shutil.copyfileobj(stream, tmpfile)
143 tmpfile.flush()
145 tmpfile.seek(0)
146 gpg.import_key(tmpfile)
147 finally:
148 tmpfile.close()
150 def recheck(self):
151 """Set new_xml and sigs by reading signed_data.
152 You need to call this when previously-missing keys are added to the GPG keyring."""
153 from . import gpg
154 try:
155 self.signed_data.seek(0)
156 stream, sigs = gpg.check_stream(self.signed_data)
157 assert sigs
159 data = stream.read()
160 if stream is not self.signed_data:
161 stream.close()
163 self.new_xml = data
164 self.sigs = sigs
165 except:
166 self.signed_data.seek(0)
167 info(_("Failed to check GPG signature. Data received was:\n") + repr(self.signed_data.read()))
168 raise
170 class IfaceCache(object):
172 The interface cache stores downloaded and verified interfaces in
173 ~/.cache/0install.net/interfaces (by default).
175 There are methods to query the cache, add to it, check signatures, etc.
177 The cache is updated by L{fetch.Fetcher}.
179 Confusingly, this class is really two caches combined: the in-memory
180 cache of L{model.Interface} objects, and an on-disk cache of L{model.ZeroInstallFeed}s.
181 It will probably be split into two in future.
183 @ivar distro: the native distribution proxy
184 @type distro: L{distro.Distribution}
186 @see: L{iface_cache} - the singleton IfaceCache instance.
189 __slots__ = ['_interfaces', '_feeds', '_distro', '_config']
191 def __init__(self, distro = None):
192 """@param distro: distribution used to fetch "distribution:" feeds (since 0.49)
193 @param distro: distribution used to resolve "distribution:" feeds
194 @type distro: L{distro.Distribution}, or None to use the host distribution
196 self._interfaces = {}
197 self._feeds = {}
198 self._distro = distro
200 @property
201 def stores(self):
202 from zeroinstall.injector import policy
203 return policy.get_deprecated_singleton_config().stores
205 @property
206 def distro(self):
207 if self._distro is None:
208 from zeroinstall.injector.distro import get_host_distribution
209 self._distro = get_host_distribution()
210 return self._distro
212 def update_interface_if_trusted(self, interface, sigs, xml):
213 import warnings
214 warnings.warn("Use update_feed_if_trusted instead", DeprecationWarning, stacklevel = 2)
215 return self.update_feed_if_trusted(interface.uri, sigs, xml)
217 def update_feed_if_trusted(self, feed_url, sigs, xml):
218 """Update a cached feed (using L{update_feed_from_network})
219 if we trust the signatures.
220 If we don't trust any of the signatures, do nothing.
221 @param feed_url: the feed being updated
222 @type feed_url: str
223 @param sigs: signatures from L{gpg.check_stream}
224 @type sigs: [L{gpg.Signature}]
225 @param xml: the downloaded replacement feed document
226 @type xml: str
227 @return: True if the feed was updated
228 @rtype: bool
229 @since: 0.48
231 from . import trust
232 updated = self._oldest_trusted(sigs, trust.domain_from_url(feed_url))
233 if updated is None: return False # None are trusted
235 self.update_feed_from_network(feed_url, xml, updated)
236 return True
238 def update_interface_from_network(self, interface, new_xml, modified_time):
239 import warnings
240 warnings.warn("Use update_feed_from_network instead", DeprecationWarning, stacklevel = 2)
241 self.update_feed_from_network(interface.uri, new_xml, modified_time)
243 def update_feed_from_network(self, feed_url, new_xml, modified_time):
244 """Update a cached feed.
245 Called by L{update_feed_if_trusted} if we trust this data.
246 After a successful update, L{writer} is used to update the feed's
247 last_checked time.
248 @param feed_url: the feed being updated
249 @type feed_url: L{model.Interface}
250 @param new_xml: the downloaded replacement feed document
251 @type new_xml: str
252 @param modified_time: the timestamp of the oldest trusted signature
253 (used as an approximation to the feed's modification time)
254 @type modified_time: long
255 @raises ReplayAttack: if modified_time is older than the currently cached time
256 @since: 0.48
258 debug(_("Updating '%(interface)s' from network; modified at %(time)s") %
259 {'interface': feed_url, 'time': _pretty_time(modified_time)})
261 if '\n<!-- Base64 Signature' not in new_xml:
262 # Only do this for old-style feeds without
263 # signatures Otherwise, we can get the time from the
264 # signature, and adding this attribute just makes the
265 # signature invalid.
266 from xml.dom import minidom
267 doc = minidom.parseString(new_xml)
268 doc.documentElement.setAttribute('last-modified', str(modified_time))
269 new_xml = StringIO()
270 doc.writexml(new_xml)
271 new_xml = new_xml.getvalue()
273 self._import_new_feed(feed_url, new_xml, modified_time)
275 feed = self.get_feed(feed_url)
277 from . import writer
278 feed.last_checked = int(time.time())
279 writer.save_feed(feed)
281 info(_("Updated feed cache entry for %(interface)s (modified %(time)s)"),
282 {'interface': feed.get_name(), 'time': _pretty_time(modified_time)})
284 def _import_new_feed(self, feed_url, new_xml, modified_time):
285 """Write new_xml into the cache.
286 @param feed_url: the URL for the feed being updated
287 @param new_xml: the data to write
288 @param modified_time: when new_xml was modified
289 @raises ReplayAttack: if the new mtime is older than the current one
291 assert modified_time
293 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
294 cached = os.path.join(upstream_dir, escape(feed_url))
296 old_modified = None
297 if os.path.exists(cached):
298 old_xml = open(cached).read()
299 if old_xml == new_xml:
300 debug(_("No change"))
301 # Update in-memory copy, in case someone else updated the disk copy
302 self.get_feed(feed_url, force = True)
303 return
304 old_modified = int(os.stat(cached).st_mtime)
306 # Do we need to write this temporary file now?
307 stream = open(cached + '.new', 'w')
308 stream.write(new_xml)
309 stream.close()
310 os.utime(cached + '.new', (modified_time, modified_time))
311 new_mtime = reader.check_readable(feed_url, cached + '.new')
312 assert new_mtime == modified_time
314 old_modified = self._get_signature_date(feed_url) or old_modified
316 if old_modified:
317 if new_mtime < old_modified:
318 os.unlink(cached + '.new')
319 raise ReplayAttack(_("New feed's modification time is "
320 "before old version!\nInterface: %(iface)s\nOld time: %(old_time)s\nNew time: %(new_time)s\n"
321 "Refusing update.")
322 % {'iface': feed_url, 'old_time': _pretty_time(old_modified), 'new_time': _pretty_time(new_mtime)})
323 if new_mtime == old_modified:
324 # You used to have to update the modification time manually.
325 # Now it comes from the signature, this check isn't useful
326 # and often causes problems when the stored format changes
327 # (e.g., when we stopped writing last-modified attributes)
328 pass
329 #raise SafeException("Interface has changed, but modification time "
330 # "hasn't! Refusing update.")
331 os.rename(cached + '.new', cached)
332 debug(_("Saved as %s") % cached)
334 self.get_feed(feed_url, force = True)
336 def get_feed(self, url, force = False, selections_ok = False):
337 """Get a feed from the cache.
338 @param url: the URL of the feed
339 @param force: load the file from disk again
340 @param selections_ok: if url is a local selections file, return that instead
341 @return: the feed, or None if it isn't cached
342 @rtype: L{model.ZeroInstallFeed}"""
343 if not force:
344 feed = self._feeds.get(url, False)
345 if feed != False:
346 return feed
348 if url.startswith('distribution:'):
349 master_feed = self.get_feed(url.split(':', 1)[1])
350 if not master_feed:
351 return None # Can't happen?
352 feed = self.distro.get_feed(master_feed)
353 else:
354 feed = reader.load_feed_from_cache(url, selections_ok = selections_ok)
355 if selections_ok and feed and not isinstance(feed, model.ZeroInstallFeed):
356 assert feed.selections is not None
357 return feed # (it's actually a selections document)
358 if feed:
359 reader.update_user_feed_overrides(feed)
360 self._feeds[url] = feed
361 return feed
363 def get_interface(self, uri):
364 """Get the interface for uri, creating a new one if required.
365 New interfaces are initialised from the disk cache, but not from
366 the network.
367 @param uri: the URI of the interface to find
368 @rtype: L{model.Interface}
370 if type(uri) == str:
371 uri = unicode(uri)
372 assert isinstance(uri, unicode)
374 if uri in self._interfaces:
375 return self._interfaces[uri]
377 debug(_("Initialising new interface object for %s"), uri)
378 self._interfaces[uri] = Interface(uri)
379 reader.update_from_cache(self._interfaces[uri], iface_cache = self)
380 return self._interfaces[uri]
382 def list_all_interfaces(self):
383 """List all interfaces in the cache.
384 @rtype: [str]
386 all = set()
387 for d in basedir.load_cache_paths(config_site, 'interfaces'):
388 for leaf in os.listdir(d):
389 if not leaf.startswith('.'):
390 all.add(unescape(leaf))
391 return list(all) # Why not just return the set?
393 def get_icon_path(self, iface):
394 """Get the path of a cached icon for an interface.
395 @param iface: interface whose icon we want
396 @return: the path of the cached icon, or None if not cached.
397 @rtype: str"""
398 return basedir.load_first_cache(config_site, 'interface_icons',
399 escape(iface.uri))
401 def get_cached_signatures(self, uri):
402 """Verify the cached interface using GPG.
403 Only new-style XML-signed interfaces retain their signatures in the cache.
404 @param uri: the feed to check
405 @type uri: str
406 @return: a list of signatures, or None
407 @rtype: [L{gpg.Signature}] or None
408 @since: 0.25"""
409 from . import gpg
410 if os.path.isabs(uri):
411 old_iface = uri
412 else:
413 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
414 if old_iface is None:
415 return None
416 try:
417 return gpg.check_stream(open(old_iface))[1]
418 except SafeException as ex:
419 debug(_("No signatures (old-style interface): %s") % ex)
420 return None
422 def _get_signature_date(self, uri):
423 """Read the date-stamp from the signature of the cached interface.
424 If the date-stamp is unavailable, returns None."""
425 from . import trust
426 sigs = self.get_cached_signatures(uri)
427 if sigs:
428 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
430 def _oldest_trusted(self, sigs, domain):
431 """Return the date of the oldest trusted signature in the list, or None if there
432 are no trusted sigs in the list."""
433 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
434 if trusted:
435 return min(trusted)
436 return None
438 def mark_as_checking(self, url):
439 """Touch a 'last-check-attempt' timestamp file for this feed.
440 If url is a local path, nothing happens.
441 This prevents us from repeatedly trying to download a failing feed many
442 times in a short period."""
443 if os.path.isabs(url):
444 return
445 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
446 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
447 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0o644)
448 os.close(fd)
449 os.utime(timestamp_path, None) # In case file already exists
451 def get_last_check_attempt(self, url):
452 """Return the time of the most recent update attempt for a feed.
453 @see: L{mark_as_checking}
454 @return: The time, or None if none is recorded
455 @rtype: float | None"""
456 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
457 if timestamp_path:
458 return os.stat(timestamp_path).st_mtime
459 return None
461 def get_feed_imports(self, iface):
462 """Get all feeds that add to this interface.
463 This is the feeds explicitly added by the user, feeds added by the distribution,
464 and feeds imported by a <feed> in the main feed (but not recursively, at present).
465 @rtype: L{Feed}
466 @since: 0.48"""
467 main_feed = self.get_feed(iface.uri)
468 if main_feed:
469 return iface.extra_feeds + main_feed.feeds
470 else:
471 return iface.extra_feeds
473 def get_feeds(self, iface):
474 """Get all feeds for this interface. This is a mapping from feed URLs
475 to ZeroInstallFeeds. It includes the interface's main feed, plus the
476 resolution of every feed returned by L{get_feed_imports}. Uncached
477 feeds are indicated by a value of None.
478 @rtype: {str: L{ZeroInstallFeed} | None}
479 @since: 0.48"""
480 main_feed = self.get_feed(iface.uri)
481 results = {iface.uri: main_feed}
482 for imp in iface.extra_feeds:
483 try:
484 results[imp.uri] = self.get_feed(imp.uri)
485 except SafeException as ex:
486 warn("Failed to load feed '%s: %s", imp.uri, ex)
487 if main_feed:
488 for imp in main_feed.feeds:
489 results[imp.uri] = self.get_feed(imp.uri)
490 return results
492 def get_implementations(self, iface):
493 """Return all implementations from all of iface's feeds.
494 @rtype: [L{Implementation}]
495 @since: 0.48"""
496 impls = []
497 for feed in self.get_feeds(iface).itervalues():
498 if feed:
499 impls += feed.implementations.values()
500 return impls
502 def get_feed_targets(self, feed):
503 """Return a list of Interfaces for which feed can be a feed.
504 This is used by B{0install add-feed}.
505 @param feed: the feed
506 @type feed: L{model.ZeroInstallFeed} (or, deprecated, a URL)
507 @rtype: [model.Interface]
508 @raise SafeException: If there are no known feeds.
509 @since: 0.53"""
511 if not isinstance(feed, model.ZeroInstallFeed):
512 # (deprecated)
513 feed = self.get_feed(feed)
514 if feed is None:
515 raise SafeException("Feed is not cached and using deprecated API")
517 if not feed.feed_for:
518 raise SafeException(_("Missing <feed-for> element in '%s'; "
519 "it can't be used as a feed for any other interface.") % feed.url)
520 feed_targets = feed.feed_for
521 debug(_("Feed targets: %s"), feed_targets)
522 return [self.get_interface(uri) for uri in feed_targets]
524 def is_stale(self, feed, freshness_threshold):
525 """Check whether feed needs updating, based on the configured L{config.Config.freshness}.
526 None is considered to be stale.
527 If we already tried to update the feed within FAILED_CHECK_DELAY, returns false.
528 @return: True if feed should be updated
529 @since: 0.53"""
530 if feed is None:
531 return True
532 if os.path.isabs(feed.url):
533 return False # Local feeds are never stale
534 if feed.last_modified is None:
535 return True # Don't even have it yet
536 now = time.time()
537 staleness = now - (feed.last_checked or 0)
538 debug(_("Staleness for %(feed)s is %(staleness).2f hours"), {'feed': feed, 'staleness': staleness / 3600.0})
540 if freshness_threshold <= 0 or staleness < freshness_threshold:
541 return False # Fresh enough for us
543 last_check_attempt = self.get_last_check_attempt(feed.url)
544 if last_check_attempt and last_check_attempt > now - FAILED_CHECK_DELAY:
545 debug(_("Stale, but tried to check recently (%s) so not rechecking now."), time.ctime(last_check_attempt))
546 return False
548 return True
550 def usable_feeds(self, iface, arch):
551 """Generator for C{iface.feeds} that are valid for this architecture.
552 @rtype: generator
553 @see: L{arch}
554 @since: 0.53"""
555 for f in self.get_feed_imports(iface):
556 if f.os in arch.os_ranks and f.machine in arch.machine_ranks:
557 yield f
558 else:
559 debug(_("Skipping '%(feed)s'; unsupported architecture %(os)s-%(machine)s"),
560 {'feed': f, 'os': f.os, 'machine': f.machine})
562 iface_cache = IfaceCache()