Log using the "0install" logger rather than "root"
[zeroinstall/solver.git] / zeroinstall / injector / iface_cache.py
blobf9caa19a99025bf406a992b83de8322edb3d6144
1 """
2 Manages the feed cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
7 """
8 # Copyright (C) 2009, Thomas Leonard
9 # See the README file for details, or visit http://0install.net.
11 # Note:
13 # We need to know the modification time of each interface, because we refuse
14 # to update to an older version (this prevents an attack where the attacker
15 # sends back an old version which is correctly signed but has a known bug).
17 # The way we store this is a bit complicated due to backward compatibility:
19 # - GPG-signed interfaces have their signatures removed and a last-modified
20 # attribute is stored containing the date from the signature.
22 # - XML-signed interfaces are stored unmodified with their signatures. The
23 # date is extracted from the signature when needed.
25 # - Older versions used to add the last-modified attribute even to files
26 # with XML signatures - these files therefore have invalid signatures and
27 # we extract from the attribute for these.
29 # Eventually, support for the first and third cases will be removed.
31 import os, sys, time
33 from zeroinstall import _, logger
34 from zeroinstall.support import basedir, portable_rename, raise_with_traceback, unicode
35 from zeroinstall.injector import reader, model
36 from zeroinstall.injector.namespaces import config_site, config_prog
37 from zeroinstall.injector.model import Interface, escape, unescape
38 from zeroinstall import SafeException
40 # If we started a check within this period, don't start another one:
41 FAILED_CHECK_DELAY = 60 * 60 # 1 Hour
43 def _pretty_time(t):
44 #assert isinstance(t, (int, long)), t
45 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
47 class ReplayAttack(SafeException):
48 """Attempt to import a feed that's older than the one in the cache."""
49 pass
51 class PendingFeed(object):
52 """A feed that has been downloaded but not yet added to the interface cache.
53 Feeds remain in this state until the user confirms that they trust at least
54 one of the signatures.
55 @ivar url: URL for the feed
56 @type url: str
57 @ivar signed_data: the untrusted data
58 @type signed_data: stream
59 @ivar sigs: signatures extracted from signed_data
60 @type sigs: [L{gpg.Signature}]
61 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
62 @type new_xml: str
63 @since: 0.25"""
64 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
66 def __init__(self, url, signed_data):
67 """Downloaded data is a GPG-signed message.
68 @param url: the URL of the downloaded feed
69 @type url: str
70 @param signed_data: the downloaded data (not yet trusted)
71 @type signed_data: stream
72 @raise SafeException: if the data is not signed, and logs the actual data"""
73 self.url = url
74 self.signed_data = signed_data
75 self.recheck()
77 def download_keys(self, fetcher, feed_hint = None, key_mirror = None):
78 """Download any required GPG keys not already on our keyring.
79 When all downloads are done (successful or otherwise), add any new keys
80 to the keyring, L{recheck}.
81 @param fetcher: fetcher to manage the download (was Handler before version 1.5)
82 @type fetcher: L{fetch.Fetcher}
83 @param key_mirror: URL of directory containing keys, or None to use feed's directory
84 @type key_mirror: str
85 """
86 downloads = {}
87 blockers = []
88 for x in self.sigs:
89 key_id = x.need_key()
90 if key_id:
91 try:
92 import urlparse
93 except ImportError:
94 from urllib import parse as urlparse # Python 3
95 key_url = urlparse.urljoin(key_mirror or self.url, '%s.gpg' % key_id)
96 logger.info(_("Fetching key from %s"), key_url)
97 dl = fetcher.download_url(key_url, hint = feed_hint)
98 downloads[dl.downloaded] = (dl, dl.tempfile)
99 blockers.append(dl.downloaded)
101 exception = None
102 any_success = False
104 from zeroinstall.support import tasks
106 while blockers:
107 yield blockers
109 old_blockers = blockers
110 blockers = []
112 for b in old_blockers:
113 try:
114 tasks.check(b)
115 if b.happened:
116 dl, stream = downloads[b]
117 stream.seek(0)
118 self._downloaded_key(stream)
119 any_success = True
120 else:
121 blockers.append(b)
122 except Exception:
123 _type, exception, tb = sys.exc_info()
124 logger.warn(_("Failed to import key for '%(url)s': %(exception)s"), {'url': self.url, 'exception': str(exception)})
126 if exception and not any_success:
127 raise_with_traceback(exception, tb)
129 self.recheck()
131 def _downloaded_key(self, stream):
132 import shutil, tempfile
133 from zeroinstall.injector import gpg
135 logger.info(_("Importing key for feed '%s'"), self.url)
137 # Python2.4: can't call fileno() on stream, so save to tmp file instead
138 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
139 try:
140 shutil.copyfileobj(stream, tmpfile)
141 tmpfile.flush()
143 tmpfile.seek(0)
144 gpg.import_key(tmpfile)
145 finally:
146 tmpfile.close()
148 def recheck(self):
149 """Set new_xml and sigs by reading signed_data.
150 You need to call this when previously-missing keys are added to the GPG keyring."""
151 from . import gpg
152 try:
153 self.signed_data.seek(0)
154 stream, sigs = gpg.check_stream(self.signed_data)
155 assert sigs
157 data = stream.read()
158 if stream is not self.signed_data:
159 stream.close()
161 self.new_xml = data
162 self.sigs = sigs
163 except:
164 self.signed_data.seek(0)
165 logger.info(_("Failed to check GPG signature. Data received was:\n") + repr(self.signed_data.read()))
166 raise
168 class IfaceCache(object):
170 The interface cache stores downloaded and verified interfaces in
171 ~/.cache/0install.net/interfaces (by default).
173 There are methods to query the cache, add to it, check signatures, etc.
175 The cache is updated by L{fetch.Fetcher}.
177 Confusingly, this class is really two caches combined: the in-memory
178 cache of L{model.Interface} objects, and an on-disk cache of L{model.ZeroInstallFeed}s.
179 It will probably be split into two in future.
181 @ivar distro: the native distribution proxy
182 @type distro: L{distro.Distribution}
184 @see: L{iface_cache} - the singleton IfaceCache instance.
187 __slots__ = ['_interfaces', '_feeds', '_distro', '_config']
189 def __init__(self, distro = None):
190 """@param distro: distribution used to fetch "distribution:" feeds (since 0.49)
191 @param distro: distribution used to resolve "distribution:" feeds
192 @type distro: L{distro.Distribution}, or None to use the host distribution
194 self._interfaces = {}
195 self._feeds = {}
196 self._distro = distro
198 @property
199 def stores(self):
200 from zeroinstall.injector import policy
201 return policy.get_deprecated_singleton_config().stores
203 @property
204 def distro(self):
205 if self._distro is None:
206 from zeroinstall.injector.distro import get_host_distribution
207 self._distro = get_host_distribution()
208 return self._distro
210 def update_interface_if_trusted(self, interface, sigs, xml):
211 import warnings
212 warnings.warn("Use update_feed_if_trusted instead", DeprecationWarning, stacklevel = 2)
213 return self.update_feed_if_trusted(interface.uri, sigs, xml)
215 def update_feed_if_trusted(self, feed_url, sigs, xml):
216 """Update a cached feed (using L{update_feed_from_network})
217 if we trust the signatures.
218 If we don't trust any of the signatures, do nothing.
219 @param feed_url: the feed being updated
220 @type feed_url: str
221 @param sigs: signatures from L{gpg.check_stream}
222 @type sigs: [L{gpg.Signature}]
223 @param xml: the downloaded replacement feed document
224 @type xml: str
225 @return: True if the feed was updated
226 @rtype: bool
227 @since: 0.48
229 from . import trust
230 updated = self._oldest_trusted(sigs, trust.domain_from_url(feed_url))
231 if updated is None: return False # None are trusted
233 self.update_feed_from_network(feed_url, xml, updated)
234 return True
236 def update_interface_from_network(self, interface, new_xml, modified_time):
237 import warnings
238 warnings.warn("Use update_feed_from_network instead", DeprecationWarning, stacklevel = 2)
239 self.update_feed_from_network(interface.uri, new_xml, modified_time)
241 def update_feed_from_network(self, feed_url, new_xml, modified_time):
242 """Update a cached feed.
243 Called by L{update_feed_if_trusted} if we trust this data.
244 After a successful update, L{writer} is used to update the feed's
245 last_checked time.
246 @param feed_url: the feed being updated
247 @type feed_url: L{model.Interface}
248 @param new_xml: the downloaded replacement feed document
249 @type new_xml: str
250 @param modified_time: the timestamp of the oldest trusted signature
251 (used as an approximation to the feed's modification time)
252 @type modified_time: long
253 @raises ReplayAttack: if modified_time is older than the currently cached time
254 @since: 0.48
256 logger.debug(_("Updating '%(interface)s' from network; modified at %(time)s") %
257 {'interface': feed_url, 'time': _pretty_time(modified_time)})
259 self._import_new_feed(feed_url, new_xml, modified_time)
261 feed = self.get_feed(feed_url)
263 from . import writer
264 feed.last_checked = int(time.time())
265 writer.save_feed(feed)
267 logger.info(_("Updated feed cache entry for %(interface)s (modified %(time)s)"),
268 {'interface': feed.get_name(), 'time': _pretty_time(modified_time)})
270 def _import_new_feed(self, feed_url, new_xml, modified_time):
271 """Write new_xml into the cache.
272 @param feed_url: the URL for the feed being updated
273 @param new_xml: the data to write
274 @param modified_time: when new_xml was modified
275 @raises ReplayAttack: if the new mtime is older than the current one
277 assert modified_time
278 assert isinstance(new_xml, bytes), repr(new_xml)
280 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
281 cached = os.path.join(upstream_dir, escape(feed_url))
283 old_modified = None
284 if os.path.exists(cached):
285 with open(cached, 'rb') as stream:
286 old_xml = stream.read()
287 if old_xml == new_xml:
288 logger.debug(_("No change"))
289 # Update in-memory copy, in case someone else updated the disk copy
290 self.get_feed(feed_url, force = True)
291 return
292 old_modified = int(os.stat(cached).st_mtime)
294 # Do we need to write this temporary file now?
295 try:
296 with open(cached + '.new', 'wb') as stream:
297 stream.write(new_xml)
298 os.utime(cached + '.new', (modified_time, modified_time))
299 new_mtime = reader.check_readable(feed_url, cached + '.new')
300 assert new_mtime == modified_time
302 old_modified = self._get_signature_date(feed_url) or old_modified
304 if old_modified:
305 if new_mtime < old_modified:
306 raise ReplayAttack(_("New feed's modification time is "
307 "before old version!\nInterface: %(iface)s\nOld time: %(old_time)s\nNew time: %(new_time)s\n"
308 "Refusing update.")
309 % {'iface': feed_url, 'old_time': _pretty_time(old_modified), 'new_time': _pretty_time(new_mtime)})
310 if new_mtime == old_modified:
311 # You used to have to update the modification time manually.
312 # Now it comes from the signature, this check isn't useful
313 # and often causes problems when the stored format changes
314 # (e.g., when we stopped writing last-modified attributes)
315 pass
316 #raise SafeException("Interface has changed, but modification time "
317 # "hasn't! Refusing update.")
318 except:
319 os.unlink(cached + '.new')
320 raise
322 portable_rename(cached + '.new', cached)
323 logger.debug(_("Saved as %s") % cached)
325 self.get_feed(feed_url, force = True)
327 def get_feed(self, url, force = False, selections_ok = False):
328 """Get a feed from the cache.
329 @param url: the URL of the feed
330 @param force: load the file from disk again
331 @param selections_ok: if url is a local selections file, return that instead
332 @return: the feed, or None if it isn't cached
333 @rtype: L{model.ZeroInstallFeed}"""
334 if not force:
335 feed = self._feeds.get(url, False)
336 if feed != False:
337 return feed
339 if url.startswith('distribution:'):
340 master_feed = self.get_feed(url.split(':', 1)[1])
341 if not master_feed:
342 return None # Can't happen?
343 feed = self.distro.get_feed(master_feed)
344 else:
345 feed = reader.load_feed_from_cache(url, selections_ok = selections_ok)
346 if selections_ok and feed and not isinstance(feed, model.ZeroInstallFeed):
347 assert feed.selections is not None
348 return feed # (it's actually a selections document)
349 if feed:
350 reader.update_user_feed_overrides(feed)
351 self._feeds[url] = feed
352 return feed
354 def get_interface(self, uri):
355 """Get the interface for uri, creating a new one if required.
356 New interfaces are initialised from the disk cache, but not from
357 the network.
358 @param uri: the URI of the interface to find
359 @rtype: L{model.Interface}
361 if type(uri) == str:
362 uri = unicode(uri)
363 assert isinstance(uri, unicode)
365 if uri in self._interfaces:
366 return self._interfaces[uri]
368 logger.debug(_("Initialising new interface object for %s"), uri)
369 self._interfaces[uri] = Interface(uri)
370 reader.update_from_cache(self._interfaces[uri], iface_cache = self)
371 return self._interfaces[uri]
373 def list_all_interfaces(self):
374 """List all interfaces in the cache.
375 @rtype: [str]
377 all = set()
378 for d in basedir.load_cache_paths(config_site, 'interfaces'):
379 for leaf in os.listdir(d):
380 if not leaf.startswith('.'):
381 all.add(unescape(leaf))
382 return list(all) # Why not just return the set?
384 def get_icon_path(self, iface):
385 """Get the path of a cached icon for an interface.
386 @param iface: interface whose icon we want
387 @return: the path of the cached icon, or None if not cached.
388 @rtype: str"""
389 return basedir.load_first_cache(config_site, 'interface_icons',
390 escape(iface.uri))
392 def get_cached_signatures(self, uri):
393 """Verify the cached interface using GPG.
394 Only new-style XML-signed interfaces retain their signatures in the cache.
395 @param uri: the feed to check
396 @type uri: str
397 @return: a list of signatures, or None
398 @rtype: [L{gpg.Signature}] or None
399 @since: 0.25"""
400 from . import gpg
401 if os.path.isabs(uri):
402 old_iface = uri
403 else:
404 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
405 if old_iface is None:
406 return None
407 try:
408 with open(old_iface, 'rb') as stream:
409 return gpg.check_stream(stream)[1]
410 except SafeException as ex:
411 logger.info(_("No signatures (old-style interface): %s") % ex)
412 return None
414 def _get_signature_date(self, uri):
415 """Read the date-stamp from the signature of the cached interface.
416 If the date-stamp is unavailable, returns None."""
417 from . import trust
418 sigs = self.get_cached_signatures(uri)
419 if sigs:
420 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
422 def _oldest_trusted(self, sigs, domain):
423 """Return the date of the oldest trusted signature in the list, or None if there
424 are no trusted sigs in the list."""
425 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
426 if trusted:
427 return min(trusted)
428 return None
430 def mark_as_checking(self, url):
431 """Touch a 'last-check-attempt' timestamp file for this feed.
432 If url is a local path, nothing happens.
433 This prevents us from repeatedly trying to download a failing feed many
434 times in a short period."""
435 if os.path.isabs(url):
436 return
437 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
438 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
439 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0o644)
440 os.close(fd)
441 os.utime(timestamp_path, None) # In case file already exists
443 def get_last_check_attempt(self, url):
444 """Return the time of the most recent update attempt for a feed.
445 @see: L{mark_as_checking}
446 @return: The time, or None if none is recorded
447 @rtype: float | None"""
448 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
449 if timestamp_path:
450 return os.stat(timestamp_path).st_mtime
451 return None
453 def get_feed_imports(self, iface):
454 """Get all feeds that add to this interface.
455 This is the feeds explicitly added by the user, feeds added by the distribution,
456 and feeds imported by a <feed> in the main feed (but not recursively, at present).
457 @rtype: L{Feed}
458 @since: 0.48"""
459 main_feed = self.get_feed(iface.uri)
460 if main_feed:
461 return iface.extra_feeds + main_feed.feeds
462 else:
463 return iface.extra_feeds
465 def get_feeds(self, iface):
466 """Get all feeds for this interface. This is a mapping from feed URLs
467 to ZeroInstallFeeds. It includes the interface's main feed, plus the
468 resolution of every feed returned by L{get_feed_imports}. Uncached
469 feeds are indicated by a value of None.
470 @rtype: {str: L{ZeroInstallFeed} | None}
471 @since: 0.48"""
472 main_feed = self.get_feed(iface.uri)
473 results = {iface.uri: main_feed}
474 for imp in iface.extra_feeds:
475 try:
476 results[imp.uri] = self.get_feed(imp.uri)
477 except SafeException as ex:
478 logger.warn("Failed to load feed '%s: %s", imp.uri, ex)
479 if main_feed:
480 for imp in main_feed.feeds:
481 results[imp.uri] = self.get_feed(imp.uri)
482 return results
484 def get_implementations(self, iface):
485 """Return all implementations from all of iface's feeds.
486 @rtype: [L{Implementation}]
487 @since: 0.48"""
488 impls = []
489 for feed in self.get_feeds(iface).values():
490 if feed:
491 impls += feed.implementations.values()
492 return impls
494 def get_feed_targets(self, feed):
495 """Return a list of Interfaces for which feed can be a feed.
496 This is used by B{0install add-feed}.
497 @param feed: the feed
498 @type feed: L{model.ZeroInstallFeed} (or, deprecated, a URL)
499 @rtype: [model.Interface]
500 @raise SafeException: If there are no known feeds.
501 @since: 0.53"""
503 if not isinstance(feed, model.ZeroInstallFeed):
504 # (deprecated)
505 feed = self.get_feed(feed)
506 if feed is None:
507 raise SafeException("Feed is not cached and using deprecated API")
509 if not feed.feed_for:
510 raise SafeException(_("Missing <feed-for> element in '%s'; "
511 "it can't be used as a feed for any other interface.") % feed.url)
512 feed_targets = feed.feed_for
513 logger.debug(_("Feed targets: %s"), feed_targets)
514 return [self.get_interface(uri) for uri in feed_targets]
516 def is_stale(self, feed_url, freshness_threshold):
517 """Check whether feed needs updating, based on the configured L{config.Config.freshness}.
518 None is considered to be stale.
519 If we already tried to update the feed within FAILED_CHECK_DELAY, returns false.
520 @return: True if feed should be updated
521 @since: 0.53"""
522 if isinstance(feed_url, model.ZeroInstallFeed):
523 feed_url = feed_url.url # old API
524 elif feed_url is None:
525 return True # old API
527 now = time.time()
529 feed = self.get_feed(feed_url)
530 if feed is not None:
531 if feed.local_path is not None:
532 return False # Local feeds are never stale
534 if feed.last_modified is not None:
535 staleness = now - (feed.last_checked or 0)
536 logger.debug(_("Staleness for %(feed)s is %(staleness).2f hours"), {'feed': feed, 'staleness': staleness / 3600.0})
538 if freshness_threshold <= 0 or staleness < freshness_threshold:
539 return False # Fresh enough for us
540 # else we've never had it
542 last_check_attempt = self.get_last_check_attempt(feed_url)
543 if last_check_attempt and last_check_attempt > now - FAILED_CHECK_DELAY:
544 logger.debug(_("Stale, but tried to check recently (%s) so not rechecking now."), time.ctime(last_check_attempt))
545 return False
547 return True
549 def usable_feeds(self, iface, arch):
550 """Generator for C{iface.feeds} that are valid for this architecture.
551 @type iface: L{model.Interface}
552 @rtype: generator
553 @see: L{arch}
554 @since: 0.53"""
555 for f in self.get_feed_imports(iface):
556 if f.os in arch.os_ranks and f.machine in arch.machine_ranks:
557 yield f
558 else:
559 logger.debug(_("Skipping '%(feed)s'; unsupported architecture %(os)s-%(machine)s"),
560 {'feed': f, 'os': f.os, 'machine': f.machine})
562 iface_cache = IfaceCache()