Start development series 0.42.1-post
[zeroinstall/zeroinstall-rsl.git] / zeroinstall / injector / iface_cache.py
blob29f647a43ff81a0d227b506a160cc84418b9879f
1 """
2 Manages the feed cache.
4 @var iface_cache: A singleton cache object. You should normally use this rather than
5 creating new cache objects.
7 """
8 # Copyright (C) 2009, Thomas Leonard
9 # See the README file for details, or visit http://0install.net.
11 # Note:
13 # We need to know the modification time of each interface, because we refuse
14 # to update to an older version (this prevents an attack where the attacker
15 # sends back an old version which is correctly signed but has a known bug).
17 # The way we store this is a bit complicated due to backward compatibility:
19 # - GPG-signed interfaces have their signatures removed and a last-modified
20 # attribute is stored containing the date from the signature.
22 # - XML-signed interfaces are stored unmodified with their signatures. The
23 # date is extracted from the signature when needed.
25 # - Older versions used to add the last-modified attribute even to files
26 # with XML signatures - these files therefore have invalid signatures and
27 # we extract from the attribute for these.
29 # Eventually, support for the first and third cases will be removed.
31 import os, sys, time
32 from logging import debug, info, warn
33 from cStringIO import StringIO
35 from zeroinstall import _
36 from zeroinstall.support import basedir
37 from zeroinstall.injector import reader, model
38 from zeroinstall.injector.namespaces import config_site, config_prog
39 from zeroinstall.injector.model import Interface, escape, unescape
40 from zeroinstall import zerostore, SafeException
42 def _pretty_time(t):
43 assert isinstance(t, (int, long)), t
44 return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.localtime(t))
46 class ReplayAttack(SafeException):
47 """Attempt to import a feed that's older than the one in the cache."""
48 pass
50 class PendingFeed(object):
51 """A feed that has been downloaded but not yet added to the interface cache.
52 Feeds remain in this state until the user confirms that they trust at least
53 one of the signatures.
54 @ivar url: URL for the feed
55 @type url: str
56 @ivar signed_data: the untrusted data
57 @type signed_data: stream
58 @ivar sigs: signatures extracted from signed_data
59 @type sigs: [L{gpg.Signature}]
60 @ivar new_xml: the payload of the signed_data, or the whole thing if XML
61 @type new_xml: str
62 @since: 0.25"""
63 __slots__ = ['url', 'signed_data', 'sigs', 'new_xml']
65 def __init__(self, url, signed_data):
66 """Downloaded data is a GPG-signed message.
67 @param url: the URL of the downloaded feed
68 @type url: str
69 @param signed_data: the downloaded data (not yet trusted)
70 @type signed_data: stream
71 @raise SafeException: if the data is not signed, and logs the actual data"""
72 self.url = url
73 self.signed_data = signed_data
74 self.recheck()
76 def download_keys(self, handler, feed_hint = None, key_mirror = None):
77 """Download any required GPG keys not already on our keyring.
78 When all downloads are done (successful or otherwise), add any new keys
79 to the keyring, L{recheck}.
80 @param handler: handler to manage the download
81 @type handler: L{handler.Handler}
82 @param key_mirror: URL of directory containing keys, or None to use feed's directory
83 @type key_mirror: str
84 """
85 downloads = {}
86 blockers = []
87 for x in self.sigs:
88 key_id = x.need_key()
89 if key_id:
90 import urlparse
91 key_url = urlparse.urljoin(key_mirror or self.url, '%s.gpg' % key_id)
92 info(_("Fetching key from %s"), key_url)
93 dl = handler.get_download(key_url, hint = feed_hint)
94 downloads[dl.downloaded] = (dl, dl.tempfile)
95 blockers.append(dl.downloaded)
97 exception = None
98 any_success = False
100 from zeroinstall.support import tasks
102 while blockers:
103 yield blockers
105 old_blockers = blockers
106 blockers = []
108 for b in old_blockers:
109 try:
110 tasks.check(b)
111 if b.happened:
112 dl, stream = downloads[b]
113 stream.seek(0)
114 self._downloaded_key(stream)
115 any_success = True
116 else:
117 blockers.append(b)
118 except Exception:
119 _type, exception, tb = sys.exc_info()
120 warn(_("Failed to import key for '%(url)s': %(exception)s"), {'url': self.url, 'exception': str(exception)})
122 if exception and not any_success:
123 raise exception, None, tb
125 self.recheck()
127 def _downloaded_key(self, stream):
128 import shutil, tempfile
129 from zeroinstall.injector import gpg
131 info(_("Importing key for feed '%s'"), self.url)
133 # Python2.4: can't call fileno() on stream, so save to tmp file instead
134 tmpfile = tempfile.TemporaryFile(prefix = 'injector-dl-data-')
135 try:
136 shutil.copyfileobj(stream, tmpfile)
137 tmpfile.flush()
139 tmpfile.seek(0)
140 gpg.import_key(tmpfile)
141 finally:
142 tmpfile.close()
144 def recheck(self):
145 """Set new_xml and sigs by reading signed_data.
146 You need to call this when previously-missing keys are added to the GPG keyring."""
147 import gpg
148 try:
149 self.signed_data.seek(0)
150 stream, sigs = gpg.check_stream(self.signed_data)
151 assert sigs
153 data = stream.read()
154 if stream is not self.signed_data:
155 stream.close()
157 self.new_xml = data
158 self.sigs = sigs
159 except:
160 self.signed_data.seek(0)
161 info(_("Failed to check GPG signature. Data received was:\n") + repr(self.signed_data.read()))
162 raise
164 class IfaceCache(object):
166 The interface cache stores downloaded and verified interfaces in
167 ~/.cache/0install.net/interfaces (by default).
169 There are methods to query the cache, add to it, check signatures, etc.
171 The cache is updated by L{fetch.Fetcher}.
173 Confusingly, this class is really two caches combined: the in-memory
174 cache of L{model.Interface} objects, and an on-disk cache of L{model.ZeroInstallFeed}s.
175 It will probably be split into two in future.
177 @see: L{iface_cache} - the singleton IfaceCache instance.
180 __slots__ = ['_interfaces', 'stores']
182 def __init__(self):
183 self._interfaces = {}
185 self.stores = zerostore.Stores()
187 def update_interface_if_trusted(self, interface, sigs, xml):
188 """Update a cached interface (using L{update_interface_from_network})
189 if we trust the signatures.
190 If we don't trust any of the signatures, do nothing.
191 @param interface: the interface being updated
192 @type interface: L{model.Interface}
193 @param sigs: signatures from L{gpg.check_stream}
194 @type sigs: [L{gpg.Signature}]
195 @param xml: the downloaded replacement interface document
196 @type xml: str
197 @return: True if the interface was updated
198 @rtype: bool
200 import trust
201 updated = self._oldest_trusted(sigs, trust.domain_from_url(interface.uri))
202 if updated is None: return False # None are trusted
204 self.update_interface_from_network(interface, xml, updated)
205 return True
207 def update_interface_from_network(self, interface, new_xml, modified_time):
208 """Update a cached interface.
209 Called by L{update_interface_if_trusted} if we trust this data.
210 After a successful update, L{writer} is used to update the interface's
211 last_checked time.
212 @param interface: the interface being updated
213 @type interface: L{model.Interface}
214 @param new_xml: the downloaded replacement interface document
215 @type new_xml: str
216 @param modified_time: the timestamp of the oldest trusted signature
217 (used as an approximation to the interface's modification time)
218 @type modified_time: long
219 @raises ReplayAttack: if modified_time is older than the currently cached time
221 debug(_("Updating '%(interface)s' from network; modified at %(time)s") %
222 {'interface': interface.name or interface.uri, 'time': _pretty_time(modified_time)})
224 if '\n<!-- Base64 Signature' not in new_xml:
225 # Only do this for old-style interfaces without
226 # signatures Otherwise, we can get the time from the
227 # signature, and adding this attribute just makes the
228 # signature invalid.
229 from xml.dom import minidom
230 doc = minidom.parseString(new_xml)
231 doc.documentElement.setAttribute('last-modified', str(modified_time))
232 new_xml = StringIO()
233 doc.writexml(new_xml)
234 new_xml = new_xml.getvalue()
236 self._import_new_interface(interface, new_xml, modified_time)
238 import writer
239 interface._main_feed.last_checked = long(time.time())
240 writer.save_interface(interface)
242 info(_("Updated interface cache entry for %(interface)s (modified %(time)s)"),
243 {'interface': interface.get_name(), 'time': _pretty_time(modified_time)})
245 def _import_new_interface(self, interface, new_xml, modified_time):
246 """Write new_xml into the cache.
247 @param interface: updated once the new XML is written
248 @param new_xml: the data to write
249 @param modified_time: when new_xml was modified
250 @raises ReplayAttack: if the new mtime is older than the current one
252 assert modified_time
254 upstream_dir = basedir.save_cache_path(config_site, 'interfaces')
255 cached = os.path.join(upstream_dir, escape(interface.uri))
257 if os.path.exists(cached):
258 old_xml = file(cached).read()
259 if old_xml == new_xml:
260 debug(_("No change"))
261 return
263 stream = file(cached + '.new', 'w')
264 stream.write(new_xml)
265 stream.close()
266 os.utime(cached + '.new', (modified_time, modified_time))
267 new_mtime = reader.check_readable(interface.uri, cached + '.new')
268 assert new_mtime == modified_time
270 old_modified = self._get_signature_date(interface.uri)
271 if old_modified is None:
272 old_modified = interface.last_modified
274 if old_modified:
275 if new_mtime < old_modified:
276 os.unlink(cached + '.new')
277 raise ReplayAttack(_("New interface's modification time is "
278 "before old version!\nOld time: %(old_time)s\nNew time: %(new_time)s\n"
279 "Refusing update.")
280 % {'old_time': _pretty_time(old_modified), 'new_time': _pretty_time(new_mtime)})
281 if new_mtime == old_modified:
282 # You used to have to update the modification time manually.
283 # Now it comes from the signature, this check isn't useful
284 # and often causes problems when the stored format changes
285 # (e.g., when we stopped writing last-modified attributes)
286 pass
287 #raise SafeException("Interface has changed, but modification time "
288 # "hasn't! Refusing update.")
289 os.rename(cached + '.new', cached)
290 debug(_("Saved as %s") % cached)
292 reader.update_from_cache(interface)
294 def get_feed(self, url):
295 """Get a feed from the cache.
296 @param url: the URL of the feed
297 @return: the feed, or None if it isn't cached
298 @rtype: L{model.ZeroInstallFeed}"""
299 # TODO: This isn't a good implementation
300 iface = self.get_interface(url)
301 feed = iface._main_feed
302 if not isinstance(feed, model.DummyFeed):
303 return feed
304 return None
306 def get_interface(self, uri):
307 """Get the interface for uri, creating a new one if required.
308 New interfaces are initialised from the disk cache, but not from
309 the network.
310 @param uri: the URI of the interface to find
311 @rtype: L{model.Interface}
313 if type(uri) == str:
314 uri = unicode(uri)
315 assert isinstance(uri, unicode)
317 if uri in self._interfaces:
318 return self._interfaces[uri]
320 debug(_("Initialising new interface object for %s"), uri)
321 self._interfaces[uri] = Interface(uri)
322 reader.update_from_cache(self._interfaces[uri])
323 return self._interfaces[uri]
325 def list_all_interfaces(self):
326 """List all interfaces in the cache.
327 @rtype: [str]
329 all = set()
330 for d in basedir.load_cache_paths(config_site, 'interfaces'):
331 for leaf in os.listdir(d):
332 if not leaf.startswith('.'):
333 all.add(unescape(leaf))
334 for d in basedir.load_config_paths(config_site, config_prog, 'user_overrides'):
335 for leaf in os.listdir(d):
336 if not leaf.startswith('.'):
337 all.add(unescape(leaf))
338 return list(all) # Why not just return the set?
340 def get_icon_path(self, iface):
341 """Get the path of a cached icon for an interface.
342 @param iface: interface whose icon we want
343 @return: the path of the cached icon, or None if not cached.
344 @rtype: str"""
345 return basedir.load_first_cache(config_site, 'interface_icons',
346 escape(iface.uri))
348 def get_cached_signatures(self, uri):
349 """Verify the cached interface using GPG.
350 Only new-style XML-signed interfaces retain their signatures in the cache.
351 @param uri: the feed to check
352 @type uri: str
353 @return: a list of signatures, or None
354 @rtype: [L{gpg.Signature}] or None
355 @since: 0.25"""
356 import gpg
357 if uri.startswith('/'):
358 old_iface = uri
359 else:
360 old_iface = basedir.load_first_cache(config_site, 'interfaces', escape(uri))
361 if old_iface is None:
362 return None
363 try:
364 return gpg.check_stream(file(old_iface))[1]
365 except SafeException, ex:
366 debug(_("No signatures (old-style interface): %s") % ex)
367 return None
369 def _get_signature_date(self, uri):
370 """Read the date-stamp from the signature of the cached interface.
371 If the date-stamp is unavailable, returns None."""
372 import trust
373 sigs = self.get_cached_signatures(uri)
374 if sigs:
375 return self._oldest_trusted(sigs, trust.domain_from_url(uri))
377 def _oldest_trusted(self, sigs, domain):
378 """Return the date of the oldest trusted signature in the list, or None if there
379 are no trusted sigs in the list."""
380 trusted = [s.get_timestamp() for s in sigs if s.is_trusted(domain)]
381 if trusted:
382 return min(trusted)
383 return None
385 def mark_as_checking(self, url):
386 """Touch a 'last_check_attempt_timestamp' file for this feed.
387 If url is a local path, nothing happens.
388 This prevents us from repeatedly trying to download a failing feed many
389 times in a short period."""
390 if url.startswith('/'):
391 return
392 feeds_dir = basedir.save_cache_path(config_site, config_prog, 'last-check-attempt')
393 timestamp_path = os.path.join(feeds_dir, model._pretty_escape(url))
394 fd = os.open(timestamp_path, os.O_WRONLY | os.O_CREAT, 0644)
395 os.close(fd)
396 os.utime(timestamp_path, None) # In case file already exists
398 def get_last_check_attempt(self, url):
399 """Return the time of the most recent update attempt for a feed.
400 @see: L{mark_as_checking}
401 @return: The time, or None if none is recorded
402 @rtype: float | None"""
403 timestamp_path = basedir.load_first_cache(config_site, config_prog, 'last-check-attempt', model._pretty_escape(url))
404 if timestamp_path:
405 return os.stat(timestamp_path).st_mtime
406 return None
408 iface_cache = IfaceCache()