More updates to support Python 3
[zeroinstall/solver.git] / zeroinstall / injector / iface_cache.py
blobe604c77cfb740999d5011df93c943f39c1773a86
1 """
2 Manages the feed cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
7 """
8 # Copyright (C) 2009, Thomas Leonard
9 # See the README file for details, or visit http://0install.net.
11 # Note:
13 # We need to know the modification time of each interface, because we refuse
14 # to update to an older version (this prevents an attack where the attacker
15 # sends back an old version which is correctly signed but has a known bug).
17 # The way we store this is a bit complicated due to backward compatibility:
19 # - GPG-signed interfaces have their signatures removed and a last-modified
20 # attribute is stored containing the date from the signature.
22 # - XML-signed interfaces are stored unmodified with their signatures. The
23 # date is extracted from the signature when needed.
25 # - Older versions used to add the last-modified attribute even to files
26 # with XML signatures - these files therefore have invalid signatures and
27 # we extract from the attribute for these.
29 # Eventually, support for the first and third cases will be removed.
31 import os, sys, time
32 from logging import debug, info, warn
34 from zeroinstall import _
35 from zeroinstall.support import basedir, portable_rename, raise_with_traceback
36 from zeroinstall.injector import reader, model
37 from zeroinstall.injector.namespaces import config_site, config_prog
38 from zeroinstall.injector.model import Interface, escape, unescape
39 from zeroinstall import SafeException
41 # If we started a check within this period, don't start another one:
42 FAILED_CHECK_DELAY = 60 * 60 # 1 Hour
44 def _pretty_time(t):
45 assert isinstance(t, (int, long)), t
46 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
48 class ReplayAttack(SafeException):
49 """Attempt to import a feed that's older than the one in the cache."""
50 pass
52 class PendingFeed(object):
53 """A feed that has been downloaded but not yet added to the interface cache.
54 Feeds remain in this state until the user confirms that they trust at least
55 one of the signatures.
56 @ivar url: URL for the feed
57 @type url: str
58 @ivar signed_data: the untrusted data
59 @type signed_data: stream
60 @ivar sigs: signatures extracted from signed_data
61 @type sigs: [L{gpg.Signature}]
62 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
63 @type new_xml: str
64 @since: 0.25"""
65 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
67 def __init__(self, url, signed_data):
68 """Downloaded data is a GPG-signed message.
69 @param url: the URL of the downloaded feed
70 @type url: str
71 @param signed_data: the downloaded data (not yet trusted)
72 @type signed_data: stream
73 @raise SafeException: if the data is not signed, and logs the actual data"""
74 self.url = url
75 self.signed_data = signed_data
76 self.recheck()
78 def download_keys(self, fetcher, feed_hint = None, key_mirror = None):
79 """Download any required GPG keys not already on our keyring.
80 When all downloads are done (successful or otherwise), add any new keys
81 to the keyring, L{recheck}.
82 @param fetcher: fetcher to manage the download (was Handler before version 1.5)
83 @type fetcher: L{fetch.Fetcher}
84 @param key_mirror: URL of directory containing keys, or None to use feed's directory
85 @type key_mirror: str
86 """
87 downloads = {}
88 blockers = []
89 for x in self.sigs:
90 key_id = x.need_key()
91 if key_id:
92 import urlparse
93 key_url = urlparse.urljoin(key_mirror or self.url, '%s.gpg' % key_id)
94 info(_("Fetching key from %s"), key_url)
95 dl = fetcher.download_url(key_url, hint = feed_hint)
96 downloads[dl.downloaded] = (dl, dl.tempfile)
97 blockers.append(dl.downloaded)
99 exception = None
100 any_success = False
102 from zeroinstall.support import tasks
104 while blockers:
105 yield blockers
107 old_blockers = blockers
108 blockers = []
110 for b in old_blockers:
111 try:
112 tasks.check(b)
113 if b.happened:
114 dl, stream = downloads[b]
115 stream.seek(0)
116 self._downloaded_key(stream)
117 any_success = True
118 else:
119 blockers.append(b)
120 except Exception:
121 _type, exception, tb = sys.exc_info()
122 warn(_("Failed to import key for '%(url)s': %(exception)s"), {'url': self.url, 'exception': str(exception)})
124 if exception and not any_success:
125 raise_with_traceback(exception, tb)
127 self.recheck()
129 def _downloaded_key(self, stream):
130 import shutil, tempfile
131 from zeroinstall.injector import gpg
133 info(_("Importing key for feed '%s'"), self.url)
135 # Python2.4: can't call fileno() on stream, so save to tmp file instead
136 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
137 try:
138 shutil.copyfileobj(stream, tmpfile)
139 tmpfile.flush()
141 tmpfile.seek(0)
142 gpg.import_key(tmpfile)
143 finally:
144 tmpfile.close()
146 def recheck(self):
147 """Set new_xml and sigs by reading signed_data.
148 You need to call this when previously-missing keys are added to the GPG keyring."""
149 from . import gpg
150 try:
151 self.signed_data.seek(0)
152 stream, sigs = gpg.check_stream(self.signed_data)
153 assert sigs
155 data = stream.read()
156 if stream is not self.signed_data:
157 stream.close()
159 self.new_xml = data
160 self.sigs = sigs
161 except:
162 self.signed_data.seek(0)
163 info(_("Failed to check GPG signature. Data received was:\n") + repr(self.signed_data.read()))
164 raise
166 class IfaceCache(object):
168 The interface cache stores downloaded and verified interfaces in
169 ~/.cache/0install.net/interfaces (by default).
171 There are methods to query the cache, add to it, check signatures, etc.
173 The cache is updated by L{fetch.Fetcher}.
175 Confusingly, this class is really two caches combined: the in-memory
176 cache of L{model.Interface} objects, and an on-disk cache of L{model.ZeroInstallFeed}s.
177 It will probably be split into two in future.
179 @ivar distro: the native distribution proxy
180 @type distro: L{distro.Distribution}
182 @see: L{iface_cache} - the singleton IfaceCache instance.
185 __slots__ = ['_interfaces', '_feeds', '_distro', '_config']
187 def __init__(self, distro = None):
188 """@param distro: distribution used to fetch "distribution:" feeds (since 0.49)
189 @param distro: distribution used to resolve "distribution:" feeds
190 @type distro: L{distro.Distribution}, or None to use the host distribution
192 self._interfaces = {}
193 self._feeds = {}
194 self._distro = distro
196 @property
197 def stores(self):
198 from zeroinstall.injector import policy
199 return policy.get_deprecated_singleton_config().stores
201 @property
202 def distro(self):
203 if self._distro is None:
204 from zeroinstall.injector.distro import get_host_distribution
205 self._distro = get_host_distribution()
206 return self._distro
208 def update_interface_if_trusted(self, interface, sigs, xml):
209 import warnings
210 warnings.warn("Use update_feed_if_trusted instead", DeprecationWarning, stacklevel = 2)
211 return self.update_feed_if_trusted(interface.uri, sigs, xml)
213 def update_feed_if_trusted(self, feed_url, sigs, xml):
214 """Update a cached feed (using L{update_feed_from_network})
215 if we trust the signatures.
216 If we don't trust any of the signatures, do nothing.
217 @param feed_url: the feed being updated
218 @type feed_url: str
219 @param sigs: signatures from L{gpg.check_stream}
220 @type sigs: [L{gpg.Signature}]
221 @param xml: the downloaded replacement feed document
222 @type xml: str
223 @return: True if the feed was updated
224 @rtype: bool
225 @since: 0.48
227 from . import trust
228 updated = self._oldest_trusted(sigs, trust.domain_from_url(feed_url))
229 if updated is None: return False # None are trusted
231 self.update_feed_from_network(feed_url, xml, updated)
232 return True
234 def update_interface_from_network(self, interface, new_xml, modified_time):
235 import warnings
236 warnings.warn("Use update_feed_from_network instead", DeprecationWarning, stacklevel = 2)
237 self.update_feed_from_network(interface.uri, new_xml, modified_time)
239 def update_feed_from_network(self, feed_url, new_xml, modified_time):
240 """Update a cached feed.
241 Called by L{update_feed_if_trusted} if we trust this data.
242 After a successful update, L{writer} is used to update the feed's
243 last_checked time.
244 @param feed_url: the feed being updated
245 @type feed_url: L{model.Interface}
246 @param new_xml: the downloaded replacement feed document
247 @type new_xml: str
248 @param modified_time: the timestamp of the oldest trusted signature
249 (used as an approximation to the feed's modification time)
250 @type modified_time: long
251 @raises ReplayAttack: if modified_time is older than the currently cached time
252 @since: 0.48
254 debug(_("Updating '%(interface)s' from network; modified at %(time)s") %
255 {'interface': feed_url, 'time': _pretty_time(modified_time)})
257 self._import_new_feed(feed_url, new_xml, modified_time)
259 feed = self.get_feed(feed_url)
261 from . import writer
262 feed.last_checked = int(time.time())
263 writer.save_feed(feed)
265 info(_("Updated feed cache entry for %(interface)s (modified %(time)s)"),
266 {'interface': feed.get_name(), 'time': _pretty_time(modified_time)})
268 def _import_new_feed(self, feed_url, new_xml, modified_time):
269 """Write new_xml into the cache.
270 @param feed_url: the URL for the feed being updated
271 @param new_xml: the data to write
272 @param modified_time: when new_xml was modified
273 @raises ReplayAttack: if the new mtime is older than the current one
275 assert modified_time
277 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
278 cached = os.path.join(upstream_dir, escape(feed_url))
280 old_modified = None
281 if os.path.exists(cached):
282 old_xml = open(cached).read()
283 if old_xml == new_xml:
284 debug(_("No change"))
285 # Update in-memory copy, in case someone else updated the disk copy
286 self.get_feed(feed_url, force = True)
287 return
288 old_modified = int(os.stat(cached).st_mtime)
290 # Do we need to write this temporary file now?
291 stream = open(cached + '.new', 'w')
292 try:
293 stream.write(new_xml)
294 stream.close()
295 os.utime(cached + '.new', (modified_time, modified_time))
296 new_mtime = reader.check_readable(feed_url, cached + '.new')
297 assert new_mtime == modified_time
299 old_modified = self._get_signature_date(feed_url) or old_modified
301 if old_modified:
302 if new_mtime < old_modified:
303 raise ReplayAttack(_("New feed's modification time is "
304 "before old version!\nInterface: %(iface)s\nOld time: %(old_time)s\nNew time: %(new_time)s\n"
305 "Refusing update.")
306 % {'iface': feed_url, 'old_time': _pretty_time(old_modified), 'new_time': _pretty_time(new_mtime)})
307 if new_mtime == old_modified:
308 # You used to have to update the modification time manually.
309 # Now it comes from the signature, this check isn't useful
310 # and often causes problems when the stored format changes
311 # (e.g., when we stopped writing last-modified attributes)
312 pass
313 #raise SafeException("Interface has changed, but modification time "
314 # "hasn't! Refusing update.")
315 except:
316 os.unlink(cached + '.new')
317 raise
319 portable_rename(cached + '.new', cached)
320 debug(_("Saved as %s") % cached)
322 self.get_feed(feed_url, force = True)
324 def get_feed(self, url, force = False, selections_ok = False):
325 """Get a feed from the cache.
326 @param url: the URL of the feed
327 @param force: load the file from disk again
328 @param selections_ok: if url is a local selections file, return that instead
329 @return: the feed, or None if it isn't cached
330 @rtype: L{model.ZeroInstallFeed}"""
331 if not force:
332 feed = self._feeds.get(url, False)
333 if feed != False:
334 return feed
336 if url.startswith('distribution:'):
337 master_feed = self.get_feed(url.split(':', 1)[1])
338 if not master_feed:
339 return None # Can't happen?
340 feed = self.distro.get_feed(master_feed)
341 else:
342 feed = reader.load_feed_from_cache(url, selections_ok = selections_ok)
343 if selections_ok and feed and not isinstance(feed, model.ZeroInstallFeed):
344 assert feed.selections is not None
345 return feed # (it's actually a selections document)
346 if feed:
347 reader.update_user_feed_overrides(feed)
348 self._feeds[url] = feed
349 return feed
351 def get_interface(self, uri):
352 """Get the interface for uri, creating a new one if required.
353 New interfaces are initialised from the disk cache, but not from
354 the network.
355 @param uri: the URI of the interface to find
356 @rtype: L{model.Interface}
358 if type(uri) == str:
359 uri = model.unicode(uri)
360 assert isinstance(uri, model.unicode)
362 if uri in self._interfaces:
363 return self._interfaces[uri]
365 debug(_("Initialising new interface object for %s"), uri)
366 self._interfaces[uri] = Interface(uri)
367 reader.update_from_cache(self._interfaces[uri], iface_cache = self)
368 return self._interfaces[uri]
370 def list_all_interfaces(self):
371 """List all interfaces in the cache.
372 @rtype: [str]
374 all = set()
375 for d in basedir.load_cache_paths(config_site, 'interfaces'):
376 for leaf in os.listdir(d):
377 if not leaf.startswith('.'):
378 all.add(unescape(leaf))
379 return list(all) # Why not just return the set?
381 def get_icon_path(self, iface):
382 """Get the path of a cached icon for an interface.
383 @param iface: interface whose icon we want
384 @return: the path of the cached icon, or None if not cached.
385 @rtype: str"""
386 return basedir.load_first_cache(config_site, 'interface_icons',
387 escape(iface.uri))
389 def get_cached_signatures(self, uri):
390 """Verify the cached interface using GPG.
391 Only new-style XML-signed interfaces retain their signatures in the cache.
392 @param uri: the feed to check
393 @type uri: str
394 @return: a list of signatures, or None
395 @rtype: [L{gpg.Signature}] or None
396 @since: 0.25"""
397 from . import gpg
398 if os.path.isabs(uri):
399 old_iface = uri
400 else:
401 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
402 if old_iface is None:
403 return None
404 try:
405 return gpg.check_stream(open(old_iface))[1]
406 except SafeException as ex:
407 debug(_("No signatures (old-style interface): %s") % ex)
408 return None
410 def _get_signature_date(self, uri):
411 """Read the date-stamp from the signature of the cached interface.
412 If the date-stamp is unavailable, returns None."""
413 from . import trust
414 sigs = self.get_cached_signatures(uri)
415 if sigs:
416 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
418 def _oldest_trusted(self, sigs, domain):
419 """Return the date of the oldest trusted signature in the list, or None if there
420 are no trusted sigs in the list."""
421 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
422 if trusted:
423 return min(trusted)
424 return None
426 def mark_as_checking(self, url):
427 """Touch a 'last-check-attempt' timestamp file for this feed.
428 If url is a local path, nothing happens.
429 This prevents us from repeatedly trying to download a failing feed many
430 times in a short period."""
431 if os.path.isabs(url):
432 return
433 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
434 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
435 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0o644)
436 os.close(fd)
437 os.utime(timestamp_path, None) # In case file already exists
439 def get_last_check_attempt(self, url):
440 """Return the time of the most recent update attempt for a feed.
441 @see: L{mark_as_checking}
442 @return: The time, or None if none is recorded
443 @rtype: float | None"""
444 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
445 if timestamp_path:
446 return os.stat(timestamp_path).st_mtime
447 return None
449 def get_feed_imports(self, iface):
450 """Get all feeds that add to this interface.
451 This is the feeds explicitly added by the user, feeds added by the distribution,
452 and feeds imported by a <feed> in the main feed (but not recursively, at present).
453 @rtype: L{Feed}
454 @since: 0.48"""
455 main_feed = self.get_feed(iface.uri)
456 if main_feed:
457 return iface.extra_feeds + main_feed.feeds
458 else:
459 return iface.extra_feeds
461 def get_feeds(self, iface):
462 """Get all feeds for this interface. This is a mapping from feed URLs
463 to ZeroInstallFeeds. It includes the interface's main feed, plus the
464 resolution of every feed returned by L{get_feed_imports}. Uncached
465 feeds are indicated by a value of None.
466 @rtype: {str: L{ZeroInstallFeed} | None}
467 @since: 0.48"""
468 main_feed = self.get_feed(iface.uri)
469 results = {iface.uri: main_feed}
470 for imp in iface.extra_feeds:
471 try:
472 results[imp.uri] = self.get_feed(imp.uri)
473 except SafeException as ex:
474 warn("Failed to load feed '%s: %s", imp.uri, ex)
475 if main_feed:
476 for imp in main_feed.feeds:
477 results[imp.uri] = self.get_feed(imp.uri)
478 return results
480 def get_implementations(self, iface):
481 """Return all implementations from all of iface's feeds.
482 @rtype: [L{Implementation}]
483 @since: 0.48"""
484 impls = []
485 for feed in self.get_feeds(iface).itervalues():
486 if feed:
487 impls += feed.implementations.values()
488 return impls
490 def get_feed_targets(self, feed):
491 """Return a list of Interfaces for which feed can be a feed.
492 This is used by B{0install add-feed}.
493 @param feed: the feed
494 @type feed: L{model.ZeroInstallFeed} (or, deprecated, a URL)
495 @rtype: [model.Interface]
496 @raise SafeException: If there are no known feeds.
497 @since: 0.53"""
499 if not isinstance(feed, model.ZeroInstallFeed):
500 # (deprecated)
501 feed = self.get_feed(feed)
502 if feed is None:
503 raise SafeException("Feed is not cached and using deprecated API")
505 if not feed.feed_for:
506 raise SafeException(_("Missing <feed-for> element in '%s'; "
507 "it can't be used as a feed for any other interface.") % feed.url)
508 feed_targets = feed.feed_for
509 debug(_("Feed targets: %s"), feed_targets)
510 return [self.get_interface(uri) for uri in feed_targets]
512 def is_stale(self, feed_url, freshness_threshold):
513 """Check whether feed needs updating, based on the configured L{config.Config.freshness}.
514 None is considered to be stale.
515 If we already tried to update the feed within FAILED_CHECK_DELAY, returns false.
516 @return: True if feed should be updated
517 @since: 0.53"""
518 if isinstance(feed_url, model.ZeroInstallFeed):
519 feed_url = feed_url.url # old API
520 elif feed_url is None:
521 return True # old API
523 now = time.time()
525 feed = self.get_feed(feed_url)
526 if feed is not None:
527 if feed.local_path is not None:
528 return False # Local feeds are never stale
530 if feed.last_modified is not None:
531 staleness = now - (feed.last_checked or 0)
532 debug(_("Staleness for %(feed)s is %(staleness).2f hours"), {'feed': feed, 'staleness': staleness / 3600.0})
534 if freshness_threshold <= 0 or staleness < freshness_threshold:
535 return False # Fresh enough for us
536 # else we've never had it
538 last_check_attempt = self.get_last_check_attempt(feed_url)
539 if last_check_attempt and last_check_attempt > now - FAILED_CHECK_DELAY:
540 debug(_("Stale, but tried to check recently (%s) so not rechecking now."), time.ctime(last_check_attempt))
541 return False
543 return True
545 def usable_feeds(self, iface, arch):
546 """Generator for C{iface.feeds} that are valid for this architecture.
547 @type iface: L{model.Interface}
548 @rtype: generator
549 @see: L{arch}
550 @since: 0.53"""
551 for f in self.get_feed_imports(iface):
552 if f.os in arch.os_ranks and f.machine in arch.machine_ranks:
553 yield f
554 else:
555 debug(_("Skipping '%(feed)s'; unsupported architecture %(os)s-%(machine)s"),
556 {'feed': f, 'os': f.os, 'machine': f.machine})
558 iface_cache = IfaceCache()