All unit-tests now pass with Python 3
[zeroinstall/solver.git] / zeroinstall / injector / iface_cache.py
blobf03b878d9f6db086b56d038a121238ca90bfc0d7
1 """
2 Manages the feed cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
7 """
8 # Copyright (C) 2009, Thomas Leonard
9 # See the README file for details, or visit http://0install.net.
11 # Note:
13 # We need to know the modification time of each interface, because we refuse
14 # to update to an older version (this prevents an attack where the attacker
15 # sends back an old version which is correctly signed but has a known bug).
17 # The way we store this is a bit complicated due to backward compatibility:
19 # - GPG-signed interfaces have their signatures removed and a last-modified
20 # attribute is stored containing the date from the signature.
22 # - XML-signed interfaces are stored unmodified with their signatures. The
23 # date is extracted from the signature when needed.
25 # - Older versions used to add the last-modified attribute even to files
26 # with XML signatures - these files therefore have invalid signatures and
27 # we extract from the attribute for these.
29 # Eventually, support for the first and third cases will be removed.
31 import os, sys, time
32 from logging import debug, info, warn
34 from zeroinstall import _
35 from zeroinstall.support import basedir, portable_rename, raise_with_traceback, unicode
36 from zeroinstall.injector import reader, model
37 from zeroinstall.injector.namespaces import config_site, config_prog
38 from zeroinstall.injector.model import Interface, escape, unescape
39 from zeroinstall import SafeException
41 # If we started a check within this period, don't start another one:
42 FAILED_CHECK_DELAY = 60 * 60 # 1 Hour
44 def _pretty_time(t):
45 #assert isinstance(t, (int, long)), t
46 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
48 class ReplayAttack(SafeException):
49 """Attempt to import a feed that's older than the one in the cache."""
50 pass
52 class PendingFeed(object):
53 """A feed that has been downloaded but not yet added to the interface cache.
54 Feeds remain in this state until the user confirms that they trust at least
55 one of the signatures.
56 @ivar url: URL for the feed
57 @type url: str
58 @ivar signed_data: the untrusted data
59 @type signed_data: stream
60 @ivar sigs: signatures extracted from signed_data
61 @type sigs: [L{gpg.Signature}]
62 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
63 @type new_xml: str
64 @since: 0.25"""
65 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
67 def __init__(self, url, signed_data):
68 """Downloaded data is a GPG-signed message.
69 @param url: the URL of the downloaded feed
70 @type url: str
71 @param signed_data: the downloaded data (not yet trusted)
72 @type signed_data: stream
73 @raise SafeException: if the data is not signed, and logs the actual data"""
74 self.url = url
75 self.signed_data = signed_data
76 self.recheck()
78 def download_keys(self, fetcher, feed_hint = None, key_mirror = None):
79 """Download any required GPG keys not already on our keyring.
80 When all downloads are done (successful or otherwise), add any new keys
81 to the keyring, L{recheck}.
82 @param fetcher: fetcher to manage the download (was Handler before version 1.5)
83 @type fetcher: L{fetch.Fetcher}
84 @param key_mirror: URL of directory containing keys, or None to use feed's directory
85 @type key_mirror: str
86 """
87 downloads = {}
88 blockers = []
89 for x in self.sigs:
90 key_id = x.need_key()
91 if key_id:
92 try:
93 import urlparse
94 except ImportError:
95 from urllib import parse as urlparse # Python 3
96 key_url = urlparse.urljoin(key_mirror or self.url, '%s.gpg' % key_id)
97 info(_("Fetching key from %s"), key_url)
98 dl = fetcher.download_url(key_url, hint = feed_hint)
99 downloads[dl.downloaded] = (dl, dl.tempfile)
100 blockers.append(dl.downloaded)
102 exception = None
103 any_success = False
105 from zeroinstall.support import tasks
107 while blockers:
108 yield blockers
110 old_blockers = blockers
111 blockers = []
113 for b in old_blockers:
114 try:
115 tasks.check(b)
116 if b.happened:
117 dl, stream = downloads[b]
118 stream.seek(0)
119 self._downloaded_key(stream)
120 any_success = True
121 else:
122 blockers.append(b)
123 except Exception:
124 _type, exception, tb = sys.exc_info()
125 warn(_("Failed to import key for '%(url)s': %(exception)s"), {'url': self.url, 'exception': str(exception)})
127 if exception and not any_success:
128 raise_with_traceback(exception, tb)
130 self.recheck()
132 def _downloaded_key(self, stream):
133 import shutil, tempfile
134 from zeroinstall.injector import gpg
136 info(_("Importing key for feed '%s'"), self.url)
138 # Python2.4: can't call fileno() on stream, so save to tmp file instead
139 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
140 try:
141 shutil.copyfileobj(stream, tmpfile)
142 tmpfile.flush()
144 tmpfile.seek(0)
145 gpg.import_key(tmpfile)
146 finally:
147 tmpfile.close()
149 def recheck(self):
150 """Set new_xml and sigs by reading signed_data.
151 You need to call this when previously-missing keys are added to the GPG keyring."""
152 from . import gpg
153 try:
154 self.signed_data.seek(0)
155 stream, sigs = gpg.check_stream(self.signed_data)
156 assert sigs
158 data = stream.read()
159 if stream is not self.signed_data:
160 stream.close()
162 self.new_xml = data
163 self.sigs = sigs
164 except:
165 self.signed_data.seek(0)
166 info(_("Failed to check GPG signature. Data received was:\n") + repr(self.signed_data.read()))
167 raise
169 class IfaceCache(object):
171 The interface cache stores downloaded and verified interfaces in
172 ~/.cache/0install.net/interfaces (by default).
174 There are methods to query the cache, add to it, check signatures, etc.
176 The cache is updated by L{fetch.Fetcher}.
178 Confusingly, this class is really two caches combined: the in-memory
179 cache of L{model.Interface} objects, and an on-disk cache of L{model.ZeroInstallFeed}s.
180 It will probably be split into two in future.
182 @ivar distro: the native distribution proxy
183 @type distro: L{distro.Distribution}
185 @see: L{iface_cache} - the singleton IfaceCache instance.
188 __slots__ = ['_interfaces', '_feeds', '_distro', '_config']
190 def __init__(self, distro = None):
191 """@param distro: distribution used to fetch "distribution:" feeds (since 0.49)
192 @param distro: distribution used to resolve "distribution:" feeds
193 @type distro: L{distro.Distribution}, or None to use the host distribution
195 self._interfaces = {}
196 self._feeds = {}
197 self._distro = distro
199 @property
200 def stores(self):
201 from zeroinstall.injector import policy
202 return policy.get_deprecated_singleton_config().stores
204 @property
205 def distro(self):
206 if self._distro is None:
207 from zeroinstall.injector.distro import get_host_distribution
208 self._distro = get_host_distribution()
209 return self._distro
211 def update_interface_if_trusted(self, interface, sigs, xml):
212 import warnings
213 warnings.warn("Use update_feed_if_trusted instead", DeprecationWarning, stacklevel = 2)
214 return self.update_feed_if_trusted(interface.uri, sigs, xml)
216 def update_feed_if_trusted(self, feed_url, sigs, xml):
217 """Update a cached feed (using L{update_feed_from_network})
218 if we trust the signatures.
219 If we don't trust any of the signatures, do nothing.
220 @param feed_url: the feed being updated
221 @type feed_url: str
222 @param sigs: signatures from L{gpg.check_stream}
223 @type sigs: [L{gpg.Signature}]
224 @param xml: the downloaded replacement feed document
225 @type xml: str
226 @return: True if the feed was updated
227 @rtype: bool
228 @since: 0.48
230 from . import trust
231 updated = self._oldest_trusted(sigs, trust.domain_from_url(feed_url))
232 if updated is None: return False # None are trusted
234 self.update_feed_from_network(feed_url, xml, updated)
235 return True
237 def update_interface_from_network(self, interface, new_xml, modified_time):
238 import warnings
239 warnings.warn("Use update_feed_from_network instead", DeprecationWarning, stacklevel = 2)
240 self.update_feed_from_network(interface.uri, new_xml, modified_time)
242 def update_feed_from_network(self, feed_url, new_xml, modified_time):
243 """Update a cached feed.
244 Called by L{update_feed_if_trusted} if we trust this data.
245 After a successful update, L{writer} is used to update the feed's
246 last_checked time.
247 @param feed_url: the feed being updated
248 @type feed_url: L{model.Interface}
249 @param new_xml: the downloaded replacement feed document
250 @type new_xml: str
251 @param modified_time: the timestamp of the oldest trusted signature
252 (used as an approximation to the feed's modification time)
253 @type modified_time: long
254 @raises ReplayAttack: if modified_time is older than the currently cached time
255 @since: 0.48
257 debug(_("Updating '%(interface)s' from network; modified at %(time)s") %
258 {'interface': feed_url, 'time': _pretty_time(modified_time)})
260 self._import_new_feed(feed_url, new_xml, modified_time)
262 feed = self.get_feed(feed_url)
264 from . import writer
265 feed.last_checked = int(time.time())
266 writer.save_feed(feed)
268 info(_("Updated feed cache entry for %(interface)s (modified %(time)s)"),
269 {'interface': feed.get_name(), 'time': _pretty_time(modified_time)})
271 def _import_new_feed(self, feed_url, new_xml, modified_time):
272 """Write new_xml into the cache.
273 @param feed_url: the URL for the feed being updated
274 @param new_xml: the data to write
275 @param modified_time: when new_xml was modified
276 @raises ReplayAttack: if the new mtime is older than the current one
278 assert modified_time
279 assert isinstance(new_xml, bytes), repr(new_xml)
281 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
282 cached = os.path.join(upstream_dir, escape(feed_url))
284 old_modified = None
285 if os.path.exists(cached):
286 with open(cached, 'rb') as stream:
287 old_xml = stream.read()
288 if old_xml == new_xml:
289 debug(_("No change"))
290 # Update in-memory copy, in case someone else updated the disk copy
291 self.get_feed(feed_url, force = True)
292 return
293 old_modified = int(os.stat(cached).st_mtime)
295 # Do we need to write this temporary file now?
296 try:
297 with open(cached + '.new', 'wb') as stream:
298 stream.write(new_xml)
299 os.utime(cached + '.new', (modified_time, modified_time))
300 new_mtime = reader.check_readable(feed_url, cached + '.new')
301 assert new_mtime == modified_time
303 old_modified = self._get_signature_date(feed_url) or old_modified
305 if old_modified:
306 if new_mtime < old_modified:
307 raise ReplayAttack(_("New feed's modification time is "
308 "before old version!\nInterface: %(iface)s\nOld time: %(old_time)s\nNew time: %(new_time)s\n"
309 "Refusing update.")
310 % {'iface': feed_url, 'old_time': _pretty_time(old_modified), 'new_time': _pretty_time(new_mtime)})
311 if new_mtime == old_modified:
312 # You used to have to update the modification time manually.
313 # Now it comes from the signature, this check isn't useful
314 # and often causes problems when the stored format changes
315 # (e.g., when we stopped writing last-modified attributes)
316 pass
317 #raise SafeException("Interface has changed, but modification time "
318 # "hasn't! Refusing update.")
319 except:
320 os.unlink(cached + '.new')
321 raise
323 portable_rename(cached + '.new', cached)
324 debug(_("Saved as %s") % cached)
326 self.get_feed(feed_url, force = True)
328 def get_feed(self, url, force = False, selections_ok = False):
329 """Get a feed from the cache.
330 @param url: the URL of the feed
331 @param force: load the file from disk again
332 @param selections_ok: if url is a local selections file, return that instead
333 @return: the feed, or None if it isn't cached
334 @rtype: L{model.ZeroInstallFeed}"""
335 if not force:
336 feed = self._feeds.get(url, False)
337 if feed != False:
338 return feed
340 if url.startswith('distribution:'):
341 master_feed = self.get_feed(url.split(':', 1)[1])
342 if not master_feed:
343 return None # Can't happen?
344 feed = self.distro.get_feed(master_feed)
345 else:
346 feed = reader.load_feed_from_cache(url, selections_ok = selections_ok)
347 if selections_ok and feed and not isinstance(feed, model.ZeroInstallFeed):
348 assert feed.selections is not None
349 return feed # (it's actually a selections document)
350 if feed:
351 reader.update_user_feed_overrides(feed)
352 self._feeds[url] = feed
353 return feed
355 def get_interface(self, uri):
356 """Get the interface for uri, creating a new one if required.
357 New interfaces are initialised from the disk cache, but not from
358 the network.
359 @param uri: the URI of the interface to find
360 @rtype: L{model.Interface}
362 if type(uri) == str:
363 uri = unicode(uri)
364 assert isinstance(uri, unicode)
366 if uri in self._interfaces:
367 return self._interfaces[uri]
369 debug(_("Initialising new interface object for %s"), uri)
370 self._interfaces[uri] = Interface(uri)
371 reader.update_from_cache(self._interfaces[uri], iface_cache = self)
372 return self._interfaces[uri]
374 def list_all_interfaces(self):
375 """List all interfaces in the cache.
376 @rtype: [str]
378 all = set()
379 for d in basedir.load_cache_paths(config_site, 'interfaces'):
380 for leaf in os.listdir(d):
381 if not leaf.startswith('.'):
382 all.add(unescape(leaf))
383 return list(all) # Why not just return the set?
385 def get_icon_path(self, iface):
386 """Get the path of a cached icon for an interface.
387 @param iface: interface whose icon we want
388 @return: the path of the cached icon, or None if not cached.
389 @rtype: str"""
390 return basedir.load_first_cache(config_site, 'interface_icons',
391 escape(iface.uri))
393 def get_cached_signatures(self, uri):
394 """Verify the cached interface using GPG.
395 Only new-style XML-signed interfaces retain their signatures in the cache.
396 @param uri: the feed to check
397 @type uri: str
398 @return: a list of signatures, or None
399 @rtype: [L{gpg.Signature}] or None
400 @since: 0.25"""
401 from . import gpg
402 if os.path.isabs(uri):
403 old_iface = uri
404 else:
405 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
406 if old_iface is None:
407 return None
408 try:
409 with open(old_iface, 'rb') as stream:
410 return gpg.check_stream(stream)[1]
411 except SafeException as ex:
412 info(_("No signatures (old-style interface): %s") % ex)
413 return None
415 def _get_signature_date(self, uri):
416 """Read the date-stamp from the signature of the cached interface.
417 If the date-stamp is unavailable, returns None."""
418 from . import trust
419 sigs = self.get_cached_signatures(uri)
420 if sigs:
421 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
423 def _oldest_trusted(self, sigs, domain):
424 """Return the date of the oldest trusted signature in the list, or None if there
425 are no trusted sigs in the list."""
426 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
427 if trusted:
428 return min(trusted)
429 return None
431 def mark_as_checking(self, url):
432 """Touch a 'last-check-attempt' timestamp file for this feed.
433 If url is a local path, nothing happens.
434 This prevents us from repeatedly trying to download a failing feed many
435 times in a short period."""
436 if os.path.isabs(url):
437 return
438 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
439 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
440 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0o644)
441 os.close(fd)
442 os.utime(timestamp_path, None) # In case file already exists
444 def get_last_check_attempt(self, url):
445 """Return the time of the most recent update attempt for a feed.
446 @see: L{mark_as_checking}
447 @return: The time, or None if none is recorded
448 @rtype: float | None"""
449 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
450 if timestamp_path:
451 return os.stat(timestamp_path).st_mtime
452 return None
454 def get_feed_imports(self, iface):
455 """Get all feeds that add to this interface.
456 This is the feeds explicitly added by the user, feeds added by the distribution,
457 and feeds imported by a <feed> in the main feed (but not recursively, at present).
458 @rtype: L{Feed}
459 @since: 0.48"""
460 main_feed = self.get_feed(iface.uri)
461 if main_feed:
462 return iface.extra_feeds + main_feed.feeds
463 else:
464 return iface.extra_feeds
466 def get_feeds(self, iface):
467 """Get all feeds for this interface. This is a mapping from feed URLs
468 to ZeroInstallFeeds. It includes the interface's main feed, plus the
469 resolution of every feed returned by L{get_feed_imports}. Uncached
470 feeds are indicated by a value of None.
471 @rtype: {str: L{ZeroInstallFeed} | None}
472 @since: 0.48"""
473 main_feed = self.get_feed(iface.uri)
474 results = {iface.uri: main_feed}
475 for imp in iface.extra_feeds:
476 try:
477 results[imp.uri] = self.get_feed(imp.uri)
478 except SafeException as ex:
479 warn("Failed to load feed '%s: %s", imp.uri, ex)
480 if main_feed:
481 for imp in main_feed.feeds:
482 results[imp.uri] = self.get_feed(imp.uri)
483 return results
485 def get_implementations(self, iface):
486 """Return all implementations from all of iface's feeds.
487 @rtype: [L{Implementation}]
488 @since: 0.48"""
489 impls = []
490 for feed in self.get_feeds(iface).values():
491 if feed:
492 impls += feed.implementations.values()
493 return impls
495 def get_feed_targets(self, feed):
496 """Return a list of Interfaces for which feed can be a feed.
497 This is used by B{0install add-feed}.
498 @param feed: the feed
499 @type feed: L{model.ZeroInstallFeed} (or, deprecated, a URL)
500 @rtype: [model.Interface]
501 @raise SafeException: If there are no known feeds.
502 @since: 0.53"""
504 if not isinstance(feed, model.ZeroInstallFeed):
505 # (deprecated)
506 feed = self.get_feed(feed)
507 if feed is None:
508 raise SafeException("Feed is not cached and using deprecated API")
510 if not feed.feed_for:
511 raise SafeException(_("Missing <feed-for> element in '%s'; "
512 "it can't be used as a feed for any other interface.") % feed.url)
513 feed_targets = feed.feed_for
514 debug(_("Feed targets: %s"), feed_targets)
515 return [self.get_interface(uri) for uri in feed_targets]
517 def is_stale(self, feed_url, freshness_threshold):
518 """Check whether feed needs updating, based on the configured L{config.Config.freshness}.
519 None is considered to be stale.
520 If we already tried to update the feed within FAILED_CHECK_DELAY, returns false.
521 @return: True if feed should be updated
522 @since: 0.53"""
523 if isinstance(feed_url, model.ZeroInstallFeed):
524 feed_url = feed_url.url # old API
525 elif feed_url is None:
526 return True # old API
528 now = time.time()
530 feed = self.get_feed(feed_url)
531 if feed is not None:
532 if feed.local_path is not None:
533 return False # Local feeds are never stale
535 if feed.last_modified is not None:
536 staleness = now - (feed.last_checked or 0)
537 debug(_("Staleness for %(feed)s is %(staleness).2f hours"), {'feed': feed, 'staleness': staleness / 3600.0})
539 if freshness_threshold <= 0 or staleness < freshness_threshold:
540 return False # Fresh enough for us
541 # else we've never had it
543 last_check_attempt = self.get_last_check_attempt(feed_url)
544 if last_check_attempt and last_check_attempt > now - FAILED_CHECK_DELAY:
545 debug(_("Stale, but tried to check recently (%s) so not rechecking now."), time.ctime(last_check_attempt))
546 return False
548 return True
550 def usable_feeds(self, iface, arch):
551 """Generator for C{iface.feeds} that are valid for this architecture.
552 @type iface: L{model.Interface}
553 @rtype: generator
554 @see: L{arch}
555 @since: 0.53"""
556 for f in self.get_feed_imports(iface):
557 if f.os in arch.os_ranks and f.machine in arch.machine_ranks:
558 yield f
559 else:
560 debug(_("Skipping '%(feed)s'; unsupported architecture %(os)s-%(machine)s"),
561 {'feed': f, 'os': f.os, 'machine': f.machine})
563 iface_cache = IfaceCache()