Cope with the distribution cache changing while we're running
[zeroinstall/solver.git] / zeroinstall / injector / distro.py
blob872f60c22cf9fb302f2972622216bc6f62f7f04b
1 """
2 Integration with native distribution package managers.
3 @since: 0.28
4 """
6 # Copyright (C) 2009, Thomas Leonard
7 # See the README file for details, or visit http://0install.net.
9 from zeroinstall import _
10 import os, re, glob, subprocess, sys
11 from logging import warn, info
12 from zeroinstall.injector import namespaces, model, arch
13 from zeroinstall.support import basedir
15 _dotted_ints = '[0-9]+(?:\.[0-9]+)*'
17 # This matches a version number that would be a valid Zero Install version without modification
18 _zeroinstall_regexp = '(?:%s)(?:-(?:pre|rc|post|)(?:%s))*' % (_dotted_ints, _dotted_ints)
20 # This matches the interesting bits of distribution version numbers
21 _version_regexp = '(%s)(-r%s)?' % (_zeroinstall_regexp, _dotted_ints)
23 # We try to do updates atomically without locking, but we don't worry too much about
24 # duplicate entries or being a little out of sync with the on-disk copy.
25 class Cache(object):
26 def __init__(self, cache_leaf, source, format):
27 """Maintain a cache file (e.g. ~/.cache/0install.net/injector/$name).
28 If the size or mtime of $source has changed, or the cache
29 format version if different, reset the cache first."""
30 self.cache_leaf = cache_leaf
31 self.source = source
32 self.format = format
33 self.cache_dir = basedir.save_cache_path(namespaces.config_site,
34 namespaces.config_prog)
35 self.cached_for = {} # Attributes of source when cache was created
36 try:
37 self._load_cache()
38 except Exception, ex:
39 info(_("Failed to load cache (%s). Flushing..."), ex)
40 self.flush()
42 def flush(self):
43 # Wipe the cache
44 try:
45 info = os.stat(self.source)
46 mtime = int(info.st_mtime)
47 size = info.st_size
48 except Exception, ex:
49 warn("Failed to stat %s: %s", self.source, ex)
50 mtime = size = 0
51 self.cache = {}
52 import tempfile
53 tmp, tmp_name = tempfile.mkstemp(dir = self.cache_dir)
54 data = "mtime=%d\nsize=%d\nformat=%d\n\n" % (mtime, size, self.format)
55 while data:
56 wrote = os.write(tmp, data)
57 data = data[wrote:]
58 os.rename(tmp_name, os.path.join(self.cache_dir, self.cache_leaf))
60 self._load_cache()
62 # Populate self.cache from our saved cache file.
63 # Throws an exception if the cache doesn't exist or has the wrong format.
64 def _load_cache(self):
65 self.cache = cache = {}
66 stream = file(os.path.join(self.cache_dir, self.cache_leaf))
67 try:
68 meta = {}
69 cached_format = False
70 for line in stream:
71 line = line.strip()
72 if not line:
73 break
74 key, value = line.split('=', 1)
75 if key in ('mtime', 'size', 'format'):
76 self.cached_for[key] = int(value)
78 self._check_valid()
80 for line in stream:
81 key, value = line.split('=', 1)
82 cache[key] = value[:-1]
83 finally:
84 stream.close()
86 # Check the source file hasn't changed since we created the cache
87 def _check_valid(self):
88 info = os.stat(self.source)
89 if self.cached_for['mtime'] != int(info.st_mtime):
90 raise Exception("Modification time of %s has changed" % self.source)
91 if self.cached_for['size'] != info.st_size:
92 raise Exception("Size of %s has changed" % self.source)
93 if self.cached_for.get('format', None) != self.format:
94 raise Exception("Format of cache has changed")
96 def get(self, key):
97 try:
98 self._check_valid()
99 except Exception, ex:
100 info(_("Cache needs to be refreshed: %s"), ex)
101 self.flush()
102 return None
103 else:
104 return self.cache.get(key, None)
106 def put(self, key, value):
107 cache_path = os.path.join(self.cache_dir, self.cache_leaf)
108 self.cache[key] = value
109 try:
110 stream = file(cache_path, 'a')
111 try:
112 stream.write('%s=%s\n' % (key, value))
113 finally:
114 stream.close()
115 except Exception, ex:
116 warn("Failed to write to cache %s: %s=%s: %s", cache_path, key, value, ex)
118 def try_cleanup_distro_version(version):
119 """Try to turn a distribution version string into one readable by Zero Install.
120 We do this by stripping off anything we can't parse.
121 @return: the part we understood, or None if we couldn't parse anything
122 @rtype: str"""
123 match = re.match(_version_regexp, version)
124 if match:
125 version, revision = match.groups()
126 if revision is None:
127 return version
128 else:
129 return '%s-%s' % (version, revision[2:])
130 return None
132 class Distribution(object):
133 """Represents a distribution with which we can integrate.
134 Sub-classes should specialise this to integrate with the package managers of
135 particular distributions. This base class ignores the native package manager.
136 @since: 0.28
139 def get_package_info(self, package, factory):
140 """Get information about the given package.
141 Add zero or more implementations using the factory (typically at most two
142 will be added; the currently installed version and the latest available).
143 @param package: package name (e.g. "gimp")
144 @type package: str
145 @param factory: function for creating new DistributionImplementation objects from IDs
146 @type factory: str -> L{model.DistributionImplementation}
148 return
150 def get_score(self, distribution):
151 """Indicate how closely the host distribution matches this one.
152 The <package-implementation> with the highest score is passed
153 to L{Distribution.get_package_info}. If several elements get
154 the same score, get_package_info is called for all of them.
155 @param distribution: a distribution name
156 @type distribution: str
157 @return: an integer, or None if there is no match at all
158 @rtype: int | None
160 return 0
162 def get_installed(self, package_id):
163 """Check whether 'package' is currently installed.
164 @param package_id: the Implementation ID used by get_package_info
165 @type package_id: str
166 @return: True iff the package is currently installed"""
167 return True
169 class CachedDistribution(Distribution):
170 """For distributions where querying the package database is slow (e.g. requires running
171 an external command), we cache the results.
172 @since: 0.39
173 @deprecated: use Cache instead
176 def __init__(self, db_status_file):
177 """@param db_status_file: update the cache when the timestamp of this file changes"""
178 self._status_details = os.stat(db_status_file)
180 self.versions = {}
181 self.cache_dir = basedir.save_cache_path(namespaces.config_site,
182 namespaces.config_prog)
184 try:
185 self._load_cache()
186 except Exception, ex:
187 info(_("Failed to load distribution database cache (%s). Regenerating..."), ex)
188 try:
189 self.generate_cache()
190 self._load_cache()
191 except Exception, ex:
192 warn(_("Failed to regenerate distribution database cache: %s"), ex)
194 def _load_cache(self):
195 """Load {cache_leaf} cache file into self.versions if it is available and up-to-date.
196 Throws an exception if the cache should be (re)created."""
197 stream = file(os.path.join(self.cache_dir, self.cache_leaf))
199 cache_version = None
200 for line in stream:
201 if line == '\n':
202 break
203 name, value = line.split(': ')
204 if name == 'mtime' and int(value) != int(self._status_details.st_mtime):
205 raise Exception(_("Modification time of package database file has changed"))
206 if name == 'size' and int(value) != self._status_details.st_size:
207 raise Exception(_("Size of package database file has changed"))
208 if name == 'version':
209 cache_version = int(value)
210 else:
211 raise Exception(_('Invalid cache format (bad header)'))
213 if cache_version is None:
214 raise Exception(_('Old cache format'))
216 versions = self.versions
217 for line in stream:
218 package, version, zi_arch = line[:-1].split('\t')
219 versionarch = (version, intern(zi_arch))
220 if package not in versions:
221 versions[package] = [versionarch]
222 else:
223 versions[package].append(versionarch)
225 def _write_cache(self, cache):
226 #cache.sort() # Might be useful later; currently we don't care
227 import tempfile
228 fd, tmpname = tempfile.mkstemp(prefix = 'zeroinstall-cache-tmp',
229 dir = self.cache_dir)
230 try:
231 stream = os.fdopen(fd, 'wb')
232 stream.write('version: 2\n')
233 stream.write('mtime: %d\n' % int(self._status_details.st_mtime))
234 stream.write('size: %d\n' % self._status_details.st_size)
235 stream.write('\n')
236 for line in cache:
237 stream.write(line + '\n')
238 stream.close()
240 os.rename(tmpname,
241 os.path.join(self.cache_dir,
242 self.cache_leaf))
243 except:
244 os.unlink(tmpname)
245 raise
247 # Maps machine type names used in packages to their Zero Install versions
248 _canonical_machine = {
249 'all' : '*',
250 'any' : '*',
251 'amd64': 'x86_64',
252 'i386': 'i386',
255 host_machine = os.uname()[-1]
256 def canonical_machine(package_machine):
257 machine = _canonical_machine.get(package_machine, None)
258 if machine is None:
259 # Safe default if we can't understand the arch
260 return host_machine
261 return machine
263 class DebianDistribution(Distribution):
264 """A dpkg-based distribution."""
266 cache_leaf = 'dpkg-status.cache'
268 def __init__(self, dpkg_status, pkgcache):
269 self.dpkg_cache = Cache('dpkg-status.cache', dpkg_status, 2)
270 self.apt_cache = Cache('apt-cache-cache', pkgcache, 3)
272 def _query_installed_package(self, package):
273 child = subprocess.Popen(["dpkg-query", "-W", "--showformat=${Version}\t${Architecture}\t${Status}\n", "--", package],
274 stdout = subprocess.PIPE)
275 stdout, stderr = child.communicate()
276 child.wait()
277 for line in stdout.split('\n'):
278 if not line: continue
279 version, debarch, status = line.split('\t', 2)
280 if not status.endswith(' installed'): continue
281 if ':' in version:
282 # Debian's 'epoch' system
283 version = version.split(':', 1)[1]
284 clean_version = try_cleanup_distro_version(version)
285 if clean_version:
286 return '%s\t%s' % (clean_version, canonical_machine(debarch.strip()))
287 else:
288 warn(_("Can't parse distribution version '%(version)s' for package '%(package)s'"), {'version': version, 'package': package})
290 return '-'
292 def get_package_info(self, package, factory):
293 installed_cached_info = self._get_dpkg_info(package)
295 if installed_cached_info != '-':
296 installed_version, machine = installed_cached_info.split('\t')
297 impl = factory('package:deb:%s:%s' % (package, installed_version))
298 impl.version = model.parse_version(installed_version)
299 if machine != '*':
300 impl.machine = machine
301 else:
302 installed_version = None
304 # Check to see whether we could get a newer version using apt-get
306 cached = self.apt_cache.get(package)
307 if cached is None:
308 try:
309 null = os.open('/dev/null', os.O_WRONLY)
310 child = subprocess.Popen(['apt-cache', 'show', '--no-all-versions', '--', package], stdout = subprocess.PIPE, stderr = null)
311 os.close(null)
313 arch = version = size = None
314 for line in child.stdout:
315 line = line.strip()
316 if line.startswith('Version: '):
317 version = line[9:]
318 if ':' in version:
319 # Debian's 'epoch' system
320 version = version.split(':', 1)[1]
321 version = try_cleanup_distro_version(version)
322 elif line.startswith('Architecture: '):
323 arch = canonical_machine(line[14:].strip())
324 elif line.startswith('Size: '):
325 size = int(line[6:].strip())
326 if version and arch:
327 cached = '%s\t%s\t%d' % (version, arch, size)
328 else:
329 cached = '-'
330 child.wait()
331 except Exception, ex:
332 warn("'apt-cache show %s' failed: %s", package, ex)
333 cached = '-'
334 # (multi-arch support? can there be multiple candidates?)
335 self.apt_cache.put(package, cached)
337 if cached != '-':
338 candidate_version, candidate_arch, candidate_size = cached.split('\t')
339 if candidate_version and candidate_version != installed_version:
340 impl = factory('package:deb:%s:%s' % (package, candidate_version))
341 impl.version = model.parse_version(candidate_version)
342 if candidate_arch != '*':
343 impl.machine = candidate_arch
344 impl.download_sources.append(model.DistributionSource(package, candidate_size))
346 def get_score(self, disto_name):
347 return int(disto_name == 'Debian')
349 def _get_dpkg_info(self, package):
350 installed_cached_info = self.dpkg_cache.get(package)
351 if installed_cached_info == None:
352 installed_cached_info = self._query_installed_package(package)
353 self.dpkg_cache.put(package, installed_cached_info)
355 return installed_cached_info
357 def get_installed(self, package_id):
358 details = package_id.split(':', 3)
359 assert details[0] == 'package'
360 package = details[2]
361 info = self._get_dpkg_info(package)
362 if info is '-': return False
363 installed_version, machine = info.split('\t')
364 installed_id = 'package:deb:%s:%s' % (package, installed_version)
365 return package_id == installed_id
367 class RPMDistribution(CachedDistribution):
368 """An RPM-based distribution."""
370 cache_leaf = 'rpm-status.cache'
372 def generate_cache(self):
373 cache = []
375 for line in os.popen("rpm -qa --qf='%{NAME}\t%{VERSION}-%{RELEASE}\t%{ARCH}\n'"):
376 package, version, rpmarch = line.split('\t', 2)
377 if package == 'gpg-pubkey':
378 continue
379 if rpmarch == 'amd64\n':
380 zi_arch = 'x86_64'
381 elif rpmarch == 'noarch\n' or rpmarch == "(none)\n":
382 zi_arch = '*'
383 else:
384 zi_arch = rpmarch.strip()
385 clean_version = try_cleanup_distro_version(version)
386 if clean_version:
387 cache.append('%s\t%s\t%s' % (package, clean_version, zi_arch))
388 else:
389 warn(_("Can't parse distribution version '%(version)s' for package '%(package)s'"), {'version': version, 'package': package})
391 self._write_cache(cache)
393 def get_package_info(self, package, factory):
394 try:
395 versions = self.versions[package]
396 except KeyError:
397 return
399 for version, machine in versions:
400 impl = factory('package:rpm:%s:%s:%s' % (package, version, machine))
401 impl.version = model.parse_version(version)
402 if machine != '*':
403 impl.machine = machine
405 def get_score(self, disto_name):
406 return int(disto_name == 'RPM')
408 class GentooDistribution(Distribution):
410 def __init__(self, pkgdir):
411 self._pkgdir = pkgdir
413 def get_package_info(self, package, factory):
414 _version_start_reqexp = '-[0-9]'
416 if package.count('/') != 1: return
418 category, leafname = package.split('/')
419 category_dir = os.path.join(self._pkgdir, category)
420 match_prefix = leafname + '-'
422 if not os.path.isdir(category_dir): return
424 for filename in os.listdir(category_dir):
425 if filename.startswith(match_prefix) and filename[len(match_prefix)].isdigit():
426 name = file(os.path.join(category_dir, filename, 'PF')).readline().strip()
428 match = re.search(_version_start_reqexp, name)
429 if match is None:
430 warn(_('Cannot parse version from Gentoo package named "%(name)s"'), {'name': name})
431 continue
432 else:
433 version = try_cleanup_distro_version(name[match.start() + 1:])
435 if category == 'app-emulation' and name.startswith('emul-'):
436 __, __, machine, __ = name.split('-', 3)
437 else:
438 machine, __ = file(os.path.join(category_dir, filename, 'CHOST')).readline().split('-', 1)
439 machine = arch.canonicalize_machine(machine)
441 impl = factory('package:gentoo:%s:%s:%s' % \
442 (package, version, machine))
443 impl.version = model.parse_version(version)
444 impl.machine = machine
446 def get_score(self, disto_name):
447 return int(disto_name == 'Gentoo')
449 class PortsDistribution(Distribution):
451 def __init__(self, pkgdir):
452 self._pkgdir = pkgdir
454 def get_package_info(self, package, factory):
455 _version_start_reqexp = '-[0-9]'
457 for pkgname in os.listdir(self._pkgdir):
458 pkgdir = os.path.join(self._pkgdir, pkgname)
459 if not os.path.isdir(pkgdir): continue
461 #contents = file(os.path.join(pkgdir, '+CONTENTS')).readline().strip()
463 match = re.search(_version_start_reqexp, pkgname)
464 if match is None:
465 warn(_('Cannot parse version from Ports package named "%(pkgname)s"'), {'name': pkgname})
466 continue
467 else:
468 name = pkgname[0:match.start()]
469 version = try_cleanup_distro_version(pkgname[match.start() + 1:])
471 machine = arch.canonicalize_machine(host_machine)
473 impl = factory('package:ports:%s:%s:%s' % \
474 (package, version, machine))
475 impl.version = model.parse_version(version)
476 impl.machine = machine
478 def get_score(self, disto_name):
479 return int(disto_name == 'Ports')
481 _host_distribution = None
482 def get_host_distribution():
483 """Get a Distribution suitable for the host operating system.
484 Calling this twice will return the same object.
485 @rtype: L{Distribution}"""
486 global _host_distribution
487 if not _host_distribution:
488 dpkg_db_status = '/var/lib/dpkg/status'
489 pkgcache = '/var/cache/apt/pkgcache.bin'
490 _rpm_db = '/var/lib/rpm/Packages'
491 _pkg_db = '/var/db/pkg'
493 if os.path.isdir(_pkg_db):
494 if sys.platform.startswith("linux"):
495 _host_distribution = GentooDistribution(_pkg_db)
496 elif sys.platform.startswith("freebsd"):
497 _host_distribution = PortsDistribution(_pkg_db)
498 elif os.access(dpkg_db_status, os.R_OK):
499 _host_distribution = DebianDistribution(dpkg_db_status, pkgcache)
500 elif os.path.isfile(_rpm_db):
501 _host_distribution = RPMDistribution(_rpm_db)
502 else:
503 _host_distribution = Distribution()
505 return _host_distribution