pyflakes
[zeroinstall.git] / zeroinstall / injector / iface_cache.py
blobc559dae61a2674fb98b5c62f5adcd01e2951c6ec
1 """
2 Manages the feed cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
7 """
8 # Copyright (C) 2009, Thomas Leonard
9 # See the README file for details, or visit http://0install.net.
11 # Note:
13 # We need to know the modification time of each interface, because we refuse
14 # to update to an older version (this prevents an attack where the attacker
15 # sends back an old version which is correctly signed but has a known bug).
17 # The way we store this is a bit complicated due to backward compatibility:
19 # - GPG-signed interfaces have their signatures removed and a last-modified
20 # attribute is stored containing the date from the signature.
22 # - XML-signed interfaces are stored unmodified with their signatures. The
23 # date is extracted from the signature when needed.
25 # - Older versions used to add the last-modified attribute even to files
26 # with XML signatures - these files therefore have invalid signatures and
27 # we extract from the attribute for these.
29 # Eventually, support for the first and third cases will be removed.
31 import os, sys, time
32 from logging import debug, info, warn
33 from cStringIO import StringIO
35 from zeroinstall import _
36 from zeroinstall.support import basedir
37 from zeroinstall.injector import reader, model
38 from zeroinstall.injector.namespaces import config_site, config_prog
39 from zeroinstall.injector.model import Interface, escape, unescape
40 from zeroinstall import SafeException
42 def _pretty_time(t):
43 assert isinstance(t, (int, long)), t
44 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
46 class ReplayAttack(SafeException):
47 """Attempt to import a feed that's older than the one in the cache."""
48 pass
50 class PendingFeed(object):
51 """A feed that has been downloaded but not yet added to the interface cache.
52 Feeds remain in this state until the user confirms that they trust at least
53 one of the signatures.
54 @ivar url: URL for the feed
55 @type url: str
56 @ivar signed_data: the untrusted data
57 @type signed_data: stream
58 @ivar sigs: signatures extracted from signed_data
59 @type sigs: [L{gpg.Signature}]
60 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
61 @type new_xml: str
62 @since: 0.25"""
63 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
65 def __init__(self, url, signed_data):
66 """Downloaded data is a GPG-signed message.
67 @param url: the URL of the downloaded feed
68 @type url: str
69 @param signed_data: the downloaded data (not yet trusted)
70 @type signed_data: stream
71 @raise SafeException: if the data is not signed, and logs the actual data"""
72 self.url = url
73 self.signed_data = signed_data
74 self.recheck()
76 def download_keys(self, handler, feed_hint = None, key_mirror = None):
77 """Download any required GPG keys not already on our keyring.
78 When all downloads are done (successful or otherwise), add any new keys
79 to the keyring, L{recheck}.
80 @param handler: handler to manage the download
81 @type handler: L{handler.Handler}
82 @param key_mirror: URL of directory containing keys, or None to use feed's directory
83 @type key_mirror: str
84 """
85 downloads = {}
86 blockers = []
87 for x in self.sigs:
88 key_id = x.need_key()
89 if key_id:
90 import urlparse
91 key_url = urlparse.urljoin(key_mirror or self.url, '%s.gpg' % key_id)
92 info(_("Fetching key from %s"), key_url)
93 dl = handler.get_download(key_url, hint = feed_hint)
94 downloads[dl.downloaded] = (dl, dl.tempfile)
95 blockers.append(dl.downloaded)
97 exception = None
98 any_success = False
100 from zeroinstall.support import tasks
102 while blockers:
103 yield blockers
105 old_blockers = blockers
106 blockers = []
108 for b in old_blockers:
109 try:
110 tasks.check(b)
111 if b.happened:
112 dl, stream = downloads[b]
113 stream.seek(0)
114 self._downloaded_key(stream)
115 any_success = True
116 else:
117 blockers.append(b)
118 except Exception:
119 _type, exception, tb = sys.exc_info()
120 warn(_("Failed to import key for '%(url)s': %(exception)s"), {'url': self.url, 'exception': str(exception)})
122 if exception and not any_success:
123 raise exception, None, tb
125 self.recheck()
127 def _downloaded_key(self, stream):
128 import shutil, tempfile
129 from zeroinstall.injector import gpg
131 info(_("Importing key for feed '%s'"), self.url)
133 # Python2.4: can't call fileno() on stream, so save to tmp file instead
134 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
135 try:
136 shutil.copyfileobj(stream, tmpfile)
137 tmpfile.flush()
139 tmpfile.seek(0)
140 gpg.import_key(tmpfile)
141 finally:
142 tmpfile.close()
144 def recheck(self):
145 """Set new_xml and sigs by reading signed_data.
146 You need to call this when previously-missing keys are added to the GPG keyring."""
147 import gpg
148 try:
149 self.signed_data.seek(0)
150 stream, sigs = gpg.check_stream(self.signed_data)
151 assert sigs
153 data = stream.read()
154 if stream is not self.signed_data:
155 stream.close()
157 self.new_xml = data
158 self.sigs = sigs
159 except:
160 self.signed_data.seek(0)
161 info(_("Failed to check GPG signature. Data received was:\n") + repr(self.signed_data.read()))
162 raise
164 class IfaceCache(object):
166 The interface cache stores downloaded and verified interfaces in
167 ~/.cache/0install.net/interfaces (by default).
169 There are methods to query the cache, add to it, check signatures, etc.
171 The cache is updated by L{fetch.Fetcher}.
173 Confusingly, this class is really two caches combined: the in-memory
174 cache of L{model.Interface} objects, and an on-disk cache of L{model.ZeroInstallFeed}s.
175 It will probably be split into two in future.
177 @ivar distro: the native distribution proxy
178 @type distro: L{distro.Distribution}
180 @see: L{iface_cache} - the singleton IfaceCache instance.
183 __slots__ = ['_interfaces', '_feeds', '_distro', '_config']
185 def __init__(self, distro = None):
186 """@param distro: distribution used to fetch "distribution:" feeds (since 0.49)
187 @param distro: distribution used to resolve "distribution:" feeds
188 @type distro: L{distro.Distribution}, or None to use the host distribution
190 self._interfaces = {}
191 self._feeds = {}
192 self._distro = distro
194 @property
195 def stores(self):
196 """deprecated"""
197 from zeroinstall.injector import policy
198 #raise Exception("iface_cache.stores")
199 return policy.get_deprecated_singleton_config().stores
201 @property
202 def distro(self):
203 if self._distro is None:
204 from zeroinstall.injector.distro import get_host_distribution
205 self._distro = get_host_distribution()
206 return self._distro
208 def update_interface_if_trusted(self, interface, sigs, xml):
209 import warnings
210 warnings.warn("Use update_feed_if_trusted instead", DeprecationWarning, stacklevel = 2)
211 return self.update_feed_if_trusted(interface.uri, sigs, xml)
213 def update_feed_if_trusted(self, feed_url, sigs, xml):
214 """Update a cached feed (using L{update_feed_from_network})
215 if we trust the signatures.
216 If we don't trust any of the signatures, do nothing.
217 @param feed_url: the feed being updated
218 @type feed_url: str
219 @param sigs: signatures from L{gpg.check_stream}
220 @type sigs: [L{gpg.Signature}]
221 @param xml: the downloaded replacement feed document
222 @type xml: str
223 @return: True if the feed was updated
224 @rtype: bool
225 @since: 0.48
227 import trust
228 updated = self._oldest_trusted(sigs, trust.domain_from_url(feed_url))
229 if updated is None: return False # None are trusted
231 self.update_feed_from_network(feed_url, xml, updated)
232 return True
234 def update_interface_from_network(self, interface, new_xml, modified_time):
235 import warnings
236 warnings.warn("Use update_feed_from_network instead", DeprecationWarning, stacklevel = 2)
237 self.update_feed_from_network(interface.uri, new_xml, modified_time)
239 def update_feed_from_network(self, feed_url, new_xml, modified_time):
240 """Update a cached feed.
241 Called by L{update_feed_if_trusted} if we trust this data.
242 After a successful update, L{writer} is used to update the feed's
243 last_checked time.
244 @param feed_url: the feed being updated
245 @type feed_url: L{model.Interface}
246 @param new_xml: the downloaded replacement feed document
247 @type new_xml: str
248 @param modified_time: the timestamp of the oldest trusted signature
249 (used as an approximation to the feed's modification time)
250 @type modified_time: long
251 @raises ReplayAttack: if modified_time is older than the currently cached time
252 @since: 0.48
254 debug(_("Updating '%(interface)s' from network; modified at %(time)s") %
255 {'interface': feed_url, 'time': _pretty_time(modified_time)})
257 if '\n<!-- Base64 Signature' not in new_xml:
258 # Only do this for old-style feeds without
259 # signatures Otherwise, we can get the time from the
260 # signature, and adding this attribute just makes the
261 # signature invalid.
262 from xml.dom import minidom
263 doc = minidom.parseString(new_xml)
264 doc.documentElement.setAttribute('last-modified', str(modified_time))
265 new_xml = StringIO()
266 doc.writexml(new_xml)
267 new_xml = new_xml.getvalue()
269 self._import_new_feed(feed_url, new_xml, modified_time)
271 feed = self.get_feed(feed_url)
273 import writer
274 feed.last_checked = long(time.time())
275 writer.save_feed(feed)
277 info(_("Updated feed cache entry for %(interface)s (modified %(time)s)"),
278 {'interface': feed.get_name(), 'time': _pretty_time(modified_time)})
280 def _import_new_feed(self, feed_url, new_xml, modified_time):
281 """Write new_xml into the cache.
282 @param feed_url: the URL for the feed being updated
283 @param new_xml: the data to write
284 @param modified_time: when new_xml was modified
285 @raises ReplayAttack: if the new mtime is older than the current one
287 assert modified_time
289 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
290 cached = os.path.join(upstream_dir, escape(feed_url))
292 old_modified = None
293 if os.path.exists(cached):
294 old_xml = file(cached).read()
295 if old_xml == new_xml:
296 debug(_("No change"))
297 # Update in-memory copy, in case someone else updated the disk copy
298 self.get_feed(feed_url, force = True)
299 return
300 old_modified = int(os.stat(cached).st_mtime)
302 # Do we need to write this temporary file now?
303 stream = file(cached + '.new', 'w')
304 stream.write(new_xml)
305 stream.close()
306 os.utime(cached + '.new', (modified_time, modified_time))
307 new_mtime = reader.check_readable(feed_url, cached + '.new')
308 assert new_mtime == modified_time
310 old_modified = self._get_signature_date(feed_url) or old_modified
312 if old_modified:
313 if new_mtime < old_modified:
314 os.unlink(cached + '.new')
315 raise ReplayAttack(_("New feed's modification time is "
316 "before old version!\nInterface: %(iface)s\nOld time: %(old_time)s\nNew time: %(new_time)s\n"
317 "Refusing update.")
318 % {'iface': feed_url, 'old_time': _pretty_time(old_modified), 'new_time': _pretty_time(new_mtime)})
319 if new_mtime == old_modified:
320 # You used to have to update the modification time manually.
321 # Now it comes from the signature, this check isn't useful
322 # and often causes problems when the stored format changes
323 # (e.g., when we stopped writing last-modified attributes)
324 pass
325 #raise SafeException("Interface has changed, but modification time "
326 # "hasn't! Refusing update.")
327 os.rename(cached + '.new', cached)
328 debug(_("Saved as %s") % cached)
330 self.get_feed(feed_url, force = True)
332 def get_feed(self, url, force = False, selections_ok = False):
333 """Get a feed from the cache.
334 @param url: the URL of the feed
335 @param force: load the file from disk again
336 @param selections_ok: if url is a local selections file, return that instead
337 @return: the feed, or None if it isn't cached
338 @rtype: L{model.ZeroInstallFeed}"""
339 if not force:
340 feed = self._feeds.get(url, False)
341 if feed != False:
342 return feed
344 if url.startswith('distribution:'):
345 master_feed = self.get_feed(url.split(':', 1)[1])
346 if not master_feed:
347 return None # Can't happen?
348 feed = self.distro.get_feed(master_feed)
349 else:
350 feed = reader.load_feed_from_cache(url, selections_ok = selections_ok)
351 if selections_ok and feed and not isinstance(feed, model.ZeroInstallFeed):
352 assert feed.selections is not None
353 return feed # (it's actually a selections document)
354 if feed:
355 reader.update_user_feed_overrides(feed)
356 self._feeds[url] = feed
357 return feed
359 def get_interface(self, uri):
360 """Get the interface for uri, creating a new one if required.
361 New interfaces are initialised from the disk cache, but not from
362 the network.
363 @param uri: the URI of the interface to find
364 @rtype: L{model.Interface}
366 if type(uri) == str:
367 uri = unicode(uri)
368 assert isinstance(uri, unicode)
370 if uri in self._interfaces:
371 return self._interfaces[uri]
373 debug(_("Initialising new interface object for %s"), uri)
374 self._interfaces[uri] = Interface(uri)
375 reader.update_from_cache(self._interfaces[uri])
376 return self._interfaces[uri]
378 def list_all_interfaces(self):
379 """List all interfaces in the cache.
380 @rtype: [str]
382 all = set()
383 for d in basedir.load_cache_paths(config_site, 'interfaces'):
384 for leaf in os.listdir(d):
385 if not leaf.startswith('.'):
386 all.add(unescape(leaf))
387 for d in basedir.load_config_paths(config_site, config_prog, 'user_overrides'):
388 for leaf in os.listdir(d):
389 if not leaf.startswith('.'):
390 all.add(unescape(leaf))
391 return list(all) # Why not just return the set?
393 def get_icon_path(self, iface):
394 """Get the path of a cached icon for an interface.
395 @param iface: interface whose icon we want
396 @return: the path of the cached icon, or None if not cached.
397 @rtype: str"""
398 return basedir.load_first_cache(config_site, 'interface_icons',
399 escape(iface.uri))
401 def get_cached_signatures(self, uri):
402 """Verify the cached interface using GPG.
403 Only new-style XML-signed interfaces retain their signatures in the cache.
404 @param uri: the feed to check
405 @type uri: str
406 @return: a list of signatures, or None
407 @rtype: [L{gpg.Signature}] or None
408 @since: 0.25"""
409 import gpg
410 if os.path.isabs(uri):
411 old_iface = uri
412 else:
413 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
414 if old_iface is None:
415 return None
416 try:
417 return gpg.check_stream(file(old_iface))[1]
418 except SafeException, ex:
419 debug(_("No signatures (old-style interface): %s") % ex)
420 return None
422 def _get_signature_date(self, uri):
423 """Read the date-stamp from the signature of the cached interface.
424 If the date-stamp is unavailable, returns None."""
425 import trust
426 sigs = self.get_cached_signatures(uri)
427 if sigs:
428 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
430 def _oldest_trusted(self, sigs, domain):
431 """Return the date of the oldest trusted signature in the list, or None if there
432 are no trusted sigs in the list."""
433 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
434 if trusted:
435 return min(trusted)
436 return None
438 def mark_as_checking(self, url):
439 """Touch a 'last_check_attempt_timestamp' file for this feed.
440 If url is a local path, nothing happens.
441 This prevents us from repeatedly trying to download a failing feed many
442 times in a short period."""
443 if os.path.isabs(url):
444 return
445 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
446 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
447 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0644)
448 os.close(fd)
449 os.utime(timestamp_path, None) # In case file already exists
451 def get_last_check_attempt(self, url):
452 """Return the time of the most recent update attempt for a feed.
453 @see: L{mark_as_checking}
454 @return: The time, or None if none is recorded
455 @rtype: float | None"""
456 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
457 if timestamp_path:
458 return os.stat(timestamp_path).st_mtime
459 return None
461 def get_feed_imports(self, iface):
462 """Get all feeds that add to this interface.
463 This is the feeds explicitly added by the user, feeds added by the distribution,
464 and feeds imported by a <feed> in the main feed (but not recursively, at present).
465 @rtype: L{Feed}
466 @since: 0.48"""
467 main_feed = self.get_feed(iface.uri)
468 if main_feed:
469 return iface.extra_feeds + main_feed.feeds
470 else:
471 return iface.extra_feeds
473 def get_feeds(self, iface):
474 """Get all feeds for this interface. This is a mapping from feed URLs
475 to ZeroInstallFeeds. It includes the interface's main feed, plus the
476 resolution of every feed returned by L{get_feed_imports}. Uncached
477 feeds are indicated by a value of None.
478 @rtype: {str: L{ZeroInstallFeed} | None}
479 @since: 0.48"""
480 main_feed = self.get_feed(iface.uri)
481 results = {iface.uri: main_feed}
482 for imp in iface.extra_feeds:
483 try:
484 results[imp.uri] = self.get_feed(imp.uri)
485 except SafeException, ex:
486 warn("Failed to load feed '%s: %s", imp.uri, ex)
487 if main_feed:
488 for imp in main_feed.feeds:
489 results[imp.uri] = self.get_feed(imp.uri)
490 return results
492 def get_implementations(self, iface):
493 """Return all implementations from all of iface's feeds.
494 @rtype: [L{Implementation}]
495 @since: 0.48"""
496 impls = []
497 for feed in self.get_feeds(iface).itervalues():
498 if feed:
499 impls += feed.implementations.values()
500 return impls
502 #iface_cache = IfaceCache()