Moved background updates and icon downloading to tasks system.
[zeroinstall/zeroinstall-mseaborn.git] / zeroinstall / injector / iface_cache.py
blob8aea7465358b7a1cb0333a6e8643a00978c4b48c
1 """
2 Manages the interface cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
6 """
7 # Copyright (C) 2006, Thomas Leonard
8 # See the README file for details, or visit http://0install.net.
10 # Note:
12 # We need to know the modification time of each interface, because we refuse
13 # to update to an older version (this prevents an attack where the attacker
14 # sends back an old version which is correctly signed but has a known bug).
16 # The way we store this is a bit complicated due to backward compatibility:
18 # - GPG-signed interfaces have their signatures removed and a last-modified
19 # attribute is stored containing the date from the signature.
21 # - XML-signed interfaces are stored unmodified with their signatures. The
22 # date is extracted from the signature when needed.
24 # - Older versions used to add the last-modified attribute even to files
25 # with XML signatures - these files therefore have invalid signatures and
26 # we extract from the attribute for these.
28 # Eventually, support for the first and third cases will be removed.
30 import os, sys, time
31 from logging import debug, info, warn
32 from cStringIO import StringIO
34 from zeroinstall.support import basedir
35 from zeroinstall.injector import reader, model
36 from zeroinstall.injector.namespaces import *
37 from zeroinstall.injector.model import *
38 from zeroinstall import zerostore
40 def _pretty_time(t):
41 assert isinstance(t, (int, long))
42 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
44 class PendingFeed(object):
45 """A feed that has been downloaded but not yet added to the interface cache.
46 Feeds remain in this state until the user confirms that they trust at least
47 one of the signatures.
48 @ivar url: URL for the feed
49 @type url: str
50 @ivar signed_data: the untrusted data
51 @type signed_data: stream
52 @ivar sigs: signatures extracted from signed_data
53 @type sigs: [L{gpg.Signature}]
54 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
55 @type new_xml: str
56 @since: 0.25"""
57 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
59 def __init__(self, url, signed_data):
60 """Downloaded data is a GPG-signed message.
61 @param url: the URL of the downloaded feed
62 @type url: str
63 @param signed_data: the downloaded data (not yet trusted)
64 @type signed_data: stream
65 @raise SafeException: if the data is not signed, and logs the actual data"""
66 self.url = url
67 self.signed_data = signed_data
68 self.recheck()
70 def download_keys(self, handler):
71 """Download any required GPG keys not already on our keyring.
72 When all downloads are done (successful or otherwise), add any new keys
73 to the keyring, L{recheck}.
74 @param handler: handler to manage the download
75 @type handler: L{handler.Handler}
76 @param callback: callback to invoke when done
77 @type callback: function()
78 """
79 downloads = {}
80 blockers = []
81 for x in self.sigs:
82 key_id = x.need_key()
83 if key_id:
84 import urlparse
85 key_url = urlparse.urljoin(self.url, '%s.gpg' % key_id)
86 info("Fetching key from %s", key_url)
87 dl = handler.get_download(key_url)
88 downloads[dl.downloaded] = (dl, dl.tempfile)
89 blockers.append(dl.downloaded)
91 exception = None
92 any_success = False
94 from zeroinstall.support import tasks
96 while blockers:
97 yield blockers
99 old_blockers = blockers
100 blockers = []
102 for b in old_blockers:
103 try:
104 tasks.check(b)
105 if b.happened:
106 dl, stream = downloads[b]
107 stream.seek(0)
108 self._downloaded_key(stream)
109 any_success = True
110 else:
111 blockers.append(b)
112 except Exception:
113 warn("Failed to import key for '%s': %s", self.url, str(ex))
114 _, exception, tb = sys.exc_info()
116 if exception and not any_success:
117 raise exception, None, tb
119 self.recheck()
121 def _downloaded_key(self, stream):
122 import shutil, tempfile
123 from zeroinstall.injector import gpg
125 info("Importing key for feed '%s'", self.url)
127 # Python2.4: can't call fileno() on stream, so save to tmp file instead
128 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
129 try:
130 shutil.copyfileobj(stream, tmpfile)
131 tmpfile.flush()
133 tmpfile.seek(0)
134 gpg.import_key(tmpfile)
135 finally:
136 tmpfile.close()
138 def recheck(self):
139 """Set new_xml and sigs by reading signed_data.
140 You need to call this when previously-missing keys are added to the GPG keyring."""
141 import gpg
142 try:
143 self.signed_data.seek(0)
144 stream, sigs = gpg.check_stream(self.signed_data)
145 assert sigs
147 data = stream.read()
148 if stream is not self.signed_data:
149 stream.close()
151 self.new_xml = data
152 self.sigs = sigs
153 except:
154 self.signed_data.seek(0)
155 info("Failed to check GPG signature. Data received was:\n" + `self.signed_data.read()`)
156 raise
158 class IfaceCache(object):
160 The interface cache stores downloaded and verified interfaces in
161 ~/.cache/0install.net/interfaces (by default).
163 There are methods to query the cache, add to it, check signatures, etc.
165 When updating the cache, the normal sequence is as follows:
167 1. When the data arrives, L{add_pending} is called.
168 2. Later, L{policy.Policy.process_pending} notices the pending feed and starts processing it.
169 3. It checks the signatures using L{PendingFeed.sigs}.
170 4. If any required GPG keys are missing, L{download_key} is used to fetch
171 them first.
172 5. If none of the keys are trusted, L{handler.Handler.confirm_trust_keys} is called.
173 6. L{update_interface_if_trusted} is called to update the cache.
175 Whenever something needs to be done before the feed can move from the pending
176 state, the process is resumed after the required activity by calling L{policy.Policy.process_pending}.
178 @ivar watchers: objects requiring notification of cache changes.
179 @ivar pending: downloaded feeds which are not yet trusted
180 @type pending: str -> PendingFeed
181 @see: L{iface_cache} - the singleton IfaceCache instance.
184 __slots__ = ['watchers', '_interfaces', 'stores', 'pending']
186 def __init__(self):
187 self.watchers = []
188 self._interfaces = {}
189 self.pending = {}
191 self.stores = zerostore.Stores()
193 def add_watcher(self, w):
194 """Call C{w.interface_changed(iface)} each time L{update_interface_from_network}
195 changes an interface in the cache."""
196 assert w not in self.watchers
197 self.watchers.append(w)
199 def add_pending(self, pending):
200 """Add a PendingFeed to the pending dict.
201 @param pending: the untrusted download to add
202 @type pending: PendingFeed
203 @since: 0.25"""
204 assert isinstance(pending, PendingFeed)
205 self.pending[pending.url] = pending
207 def update_interface_if_trusted(self, interface, sigs, xml):
208 """Update a cached interface (using L{update_interface_from_network})
209 if we trust the signatures, and remove it from L{pending}.
210 If we don't trust any of the signatures, do nothing.
211 @param interface: the interface being updated
212 @type interface: L{model.Interface}
213 @param sigs: signatures from L{gpg.check_stream}
214 @type sigs: [L{gpg.Signature}]
215 @param xml: the downloaded replacement interface document
216 @type xml: str
217 @return: True if the interface was updated
218 @rtype: bool
219 @precondition: call L{add_pending}
221 import trust
222 updated = self._oldest_trusted(sigs, trust.domain_from_url(interface.uri))
223 if updated is None: return False # None are trusted
225 if interface.uri in self.pending:
226 del self.pending[interface.uri]
227 else:
228 raise Exception("update_interface_if_trusted, but '%s' not pending!" % interface.uri)
230 self.update_interface_from_network(interface, xml, updated)
231 return True
233 def download_key(self, interface, key_id):
234 """Download a GPG key.
235 The location of the key is calculated from the uri of the interface.
236 @param interface: the interface which needs the key
237 @param key_id: the GPG long id of the key
238 @todo: This method blocks. It should start a download and return.
239 @deprecated: see PendingFeed
241 assert interface
242 assert key_id
243 import urlparse, urllib2, shutil, tempfile
244 key_url = urlparse.urljoin(interface.uri, '%s.gpg' % key_id)
245 info("Fetching key from %s", key_url)
246 try:
247 stream = urllib2.urlopen(key_url)
248 # Python2.4: can't call fileno() on stream, so save to tmp file instead
249 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
250 shutil.copyfileobj(stream, tmpfile)
251 tmpfile.flush()
252 stream.close()
253 except Exception, ex:
254 raise SafeException("Failed to download key from '%s': %s" % (key_url, str(ex)))
256 import gpg
258 tmpfile.seek(0)
259 gpg.import_key(tmpfile)
260 tmpfile.close()
262 def update_interface_from_network(self, interface, new_xml, modified_time):
263 """Update a cached interface.
264 Called by L{update_interface_if_trusted} if we trust this data.
265 After a successful update, L{writer} is used to update the interface's
266 last_checked time and then all the L{watchers} are notified.
267 @param interface: the interface being updated
268 @type interface: L{model.Interface}
269 @param new_xml: the downloaded replacement interface document
270 @type new_xml: str
271 @param modified_time: the timestamp of the oldest trusted signature
272 (used as an approximation to the interface's modification time)
273 @type modified_time: long
274 @raises SafeException: if modified_time is older than the currently cached time
276 debug("Updating '%s' from network; modified at %s" %
277 (interface.name or interface.uri, _pretty_time(modified_time)))
279 if '\n<!-- Base64 Signature' not in new_xml:
280 # Only do this for old-style interfaces without
281 # signatures Otherwise, we can get the time from the
282 # signature, and adding this attribute just makes the
283 # signature invalid.
284 from xml.dom import minidom
285 doc = minidom.parseString(new_xml)
286 doc.documentElement.setAttribute('last-modified', str(modified_time))
287 new_xml = StringIO()
288 doc.writexml(new_xml)
289 new_xml = new_xml.getvalue()
291 self._import_new_interface(interface, new_xml, modified_time)
293 import writer
294 interface._main_feed.last_checked = long(time.time())
295 writer.save_interface(interface)
297 info("Updated interface cache entry for %s (modified %s)",
298 interface.get_name(), _pretty_time(modified_time))
300 def _import_new_interface(self, interface, new_xml, modified_time):
301 """Write new_xml into the cache.
302 @param interface: updated once the new XML is written
303 @param new_xml: the data to write
304 @param modified_time: when new_xml was modified
305 @raises SafeException: if the new mtime is older than the current one
307 assert modified_time
309 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
310 cached = os.path.join(upstream_dir, escape(interface.uri))
312 if os.path.exists(cached):
313 old_xml = file(cached).read()
314 if old_xml == new_xml:
315 debug("No change")
316 return
318 stream = file(cached + '.new', 'w')
319 stream.write(new_xml)
320 stream.close()
321 os.utime(cached + '.new', (modified_time, modified_time))
322 new_mtime = reader.check_readable(interface.uri, cached + '.new')
323 assert new_mtime == modified_time
325 old_modified = self._get_signature_date(interface.uri)
326 if old_modified is None:
327 old_modified = interface.last_modified
329 if old_modified:
330 if new_mtime < old_modified:
331 raise SafeException("New interface's modification time is before old "
332 "version!"
333 "\nOld time: " + _pretty_time(old_modified) +
334 "\nNew time: " + _pretty_time(new_mtime) +
335 "\nRefusing update (leaving new copy as " +
336 cached + ".new)")
337 if new_mtime == old_modified:
338 # You used to have to update the modification time manually.
339 # Now it comes from the signature, this check isn't useful
340 # and often causes problems when the stored format changes
341 # (e.g., when we stopped writing last-modified attributes)
342 pass
343 #raise SafeException("Interface has changed, but modification time "
344 # "hasn't! Refusing update.")
345 os.rename(cached + '.new', cached)
346 debug("Saved as " + cached)
348 reader.update_from_cache(interface)
350 def get_feed(self, url):
351 # TODO: This isn't a good implementation
352 iface = self.get_interface(url)
353 return iface._main_feed
355 def get_interface(self, uri):
356 """Get the interface for uri, creating a new one if required.
357 New interfaces are initialised from the disk cache, but not from
358 the network.
359 @param uri: the URI of the interface to find
360 @rtype: L{model.Interface}
362 if type(uri) == str:
363 uri = unicode(uri)
364 assert isinstance(uri, unicode)
366 if uri in self._interfaces:
367 return self._interfaces[uri]
369 debug("Initialising new interface object for %s", uri)
370 self._interfaces[uri] = Interface(uri)
371 reader.update_from_cache(self._interfaces[uri])
372 return self._interfaces[uri]
374 def list_all_interfaces(self):
375 """List all interfaces in the cache.
376 @rtype: [str]
378 all = set()
379 for d in basedir.load_cache_paths(config_site, 'interfaces'):
380 for leaf in os.listdir(d):
381 if not leaf.startswith('.'):
382 all.add(unescape(leaf))
383 for d in basedir.load_config_paths(config_site, config_prog, 'user_overrides'):
384 for leaf in os.listdir(d):
385 if not leaf.startswith('.'):
386 all.add(unescape(leaf))
387 return list(all) # Why not just return the set?
389 def add_to_cache(self, source, data):
390 """Add an implementation to the cache.
391 @param source: information about the archive
392 @type source: L{model.DownloadSource}
393 @param data: the data stream
394 @type data: stream
395 @see: L{zerostore.Stores.add_archive_to_cache}
397 assert isinstance(source, DownloadSource)
398 required_digest = source.implementation.id
399 url = source.url
400 self.stores.add_archive_to_cache(required_digest, data, source.url, source.extract,
401 type = source.type, start_offset = source.start_offset or 0)
403 def get_icon_path(self, iface):
404 """Get the path of a cached icon for an interface.
405 @param iface: interface whose icon we want
406 @return: the path of the cached icon, or None if not cached.
407 @rtype: str"""
408 return basedir.load_first_cache(config_site, 'interface_icons',
409 escape(iface.uri))
411 def get_cached_signatures(self, uri):
412 """Verify the cached interface using GPG.
413 Only new-style XML-signed interfaces retain their signatures in the cache.
414 @param uri: the feed to check
415 @type uri: str
416 @return: a list of signatures, or None
417 @rtype: [L{gpg.Signature}] or None
418 @since: 0.25"""
419 import gpg
420 if uri.startswith('/'):
421 old_iface = uri
422 else:
423 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
424 if old_iface is None:
425 return None
426 try:
427 return gpg.check_stream(file(old_iface))[1]
428 except SafeException, ex:
429 debug("No signatures (old-style interface): %s" % ex)
430 return None
432 def _get_signature_date(self, uri):
433 """Read the date-stamp from the signature of the cached interface.
434 If the date-stamp is unavailable, returns None."""
435 import trust
436 sigs = self.get_cached_signatures(uri)
437 if sigs:
438 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
440 def _oldest_trusted(self, sigs, domain):
441 """Return the date of the oldest trusted signature in the list, or None if there
442 are no trusted sigs in the list."""
443 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
444 if trusted:
445 return min(trusted)
446 return None
448 def mark_as_checking(self, url):
449 """Touch a 'last_check_attempt_timestamp' file for this feed.
450 If url is a local path, nothing happens.
451 This prevents us from repeatedly trying to download a failing feed many
452 times in a short period."""
453 if url.startswith('/'):
454 return
455 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
456 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
457 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0644)
458 os.close(fd)
460 def get_last_check_attempt(self, url):
461 """Return the time of the most recent update attempt for a feed.
462 @see: L{mark_as_checking}
463 @return: The time, or None if none is recorded"""
464 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
465 if timestamp_path:
466 return os.stat(timestamp_path).st_mtime
467 return None
469 iface_cache = IfaceCache()