Added support for extracting RPM archives. This is useful when binaries are
[zeroinstall.git] / zeroinstall / zerostore / __init__.py
bloba43ce4b08adf21317ebc8d35a9ffee7914bf90f7
1 import os
2 import shutil
3 import traceback
4 from tempfile import mkdtemp, mkstemp
5 import sha
6 import re
7 from logging import debug, info, warn
9 import manifest
10 from zeroinstall.injector import basedir
11 from zeroinstall import SafeException
13 class BadDigest(SafeException): pass
14 class NotStored(SafeException): pass
16 _recent_gnu_tar = None
17 def recent_gnu_tar():
18 global _recent_gnu_tar
19 if _recent_gnu_tar is None:
20 _recent_gnu_tar = False
21 version = os.popen('tar --version 2>&1').next()
22 if '(GNU tar)' in version:
23 try:
24 version = version.split(')', 1)[1].strip()
25 assert version
26 version = map(int, version.split('.'))
27 _recent_gnu_tar = version > [1, 13, 92]
28 except:
29 warn("Failed to extract GNU tar version number")
30 debug("Recent GNU tar = %s", _recent_gnu_tar)
31 return _recent_gnu_tar
33 def copytree2(src, dst):
34 names = os.listdir(src)
35 assert os.path.isdir(dst)
36 errors = []
37 for name in names:
38 srcname = os.path.join(src, name)
39 dstname = os.path.join(dst, name)
40 if os.path.islink(srcname):
41 linkto = os.readlink(srcname)
42 os.symlink(linkto, dstname)
43 elif os.path.isdir(srcname):
44 os.mkdir(dstname)
45 mtime = os.lstat(srcname).st_mtime
46 copytree2(srcname, dstname)
47 os.utime(dstname, (mtime, mtime))
48 else:
49 shutil.copy2(srcname, dstname)
51 class Store:
52 def __init__(self, dir):
53 self.dir = dir
55 def lookup(self, digest):
56 alg, value = digest.split('=', 1)
57 assert alg == 'sha1'
58 assert '/' not in value
59 int(value, 16) # Check valid format
60 dir = os.path.join(self.dir, digest)
61 if os.path.isdir(dir):
62 return dir
63 return None
65 def add_archive_to_cache(self, required_digest, data, url, extract = None):
66 assert required_digest.startswith('sha1=')
67 info("Caching new implementation (digest %s)", required_digest)
69 if self.lookup(required_digest):
70 info("Not adding %s as it already exists!", required_digest)
71 return
73 if extract:
74 # Limit the characters we accept, to avoid sending dodgy
75 # strings to tar
76 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
77 raise Exception('Illegal character in extract attribute')
79 if url.endswith('.tar.bz2'):
80 tmp = self._add_tbz_to_cache(required_digest, data, extract)
81 elif url.endswith('.rpm'):
82 tmp = self._add_rpm_to_cache(required_digest, data, extract)
83 else:
84 if not (url.endswith('.tar.gz') or url.endswith('.tgz')):
85 warn('Unknown extension on "%s"; assuming tar.gz format' % url)
86 tmp = self._add_tgz_to_cache(required_digest, data, extract)
88 try:
89 self.check_manifest_and_rename(required_digest, tmp, extract)
90 except Exception, ex:
91 warn("Leaving extracted directory as %s", tmp)
92 raise
94 def _add_rpm_to_cache(self, required_digest, stream, extract = None):
95 if extract:
96 raise Exception('Sorry, but the "extract" attribute is not yet supported for RPMs')
97 fd, cpiopath = mkstemp('-rpm-tmp')
98 try:
99 child = os.fork()
100 if child == 0:
101 try:
102 try:
103 os.dup2(stream.fileno(), 0)
104 os.dup2(fd, 1)
105 os.execlp('rpm2cpio', 'rpm2cpio', '-')
106 except:
107 traceback.print_exc()
108 finally:
109 os._exit(1)
110 id, status = os.waitpid(child, 0)
111 assert id == child
112 if status != 0:
113 raise Exception("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status)
114 os.close(fd)
115 fd = None
116 args = ['cpio', '-mid', '--quiet']
117 tmp = self.extract(file(cpiopath), args)
118 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
119 # preserve directory mtimes.
120 os.path.walk(tmp, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
121 return tmp
122 finally:
123 if fd is not None:
124 os.close(fd)
125 os.unlink(cpiopath)
127 def _add_tbz_to_cache(self, required_digest, data, extract = None):
128 return self._add_tar_to_cache(required_digest, data, extract, '--bzip2')
130 def _add_tgz_to_cache(self, required_digest, data, extract = None):
131 return self._add_tar_to_cache(required_digest, data, extract, '-z')
133 def _add_tar_to_cache(self, required_digest, data, extract, decompress):
134 """Data is a .tgz compressed archive. Extract it somewhere, check that
135 the digest is correct, and add it to the store.
136 extract is the name of a directory within the archive to extract, rather
137 than extracting the whole archive. This is most useful to remove an extra
138 top-level directory."""
139 if recent_gnu_tar():
140 args = ['tar', decompress, '-x', '--no-same-owner', '--no-same-permissions']
141 else:
142 args = ['tar', decompress, '-xf', '-']
144 if extract:
145 args.append(extract)
147 return self.extract(data, args)
149 def add_dir_to_cache(self, required_digest, path):
150 if self.lookup(required_digest):
151 info("Not adding %s as it already exists!", required_digest)
152 return
154 if not os.path.isdir(self.dir):
155 os.makedirs(self.dir)
156 tmp = mkdtemp(dir = self.dir, prefix = 'tmp-')
157 copytree2(path, tmp)
158 try:
159 self.check_manifest_and_rename(required_digest, tmp)
160 except:
161 warn("Error importing directory.")
162 warn("Deleting %s", tmp)
163 shutil.rmtree(tmp)
164 raise
166 def check_manifest_and_rename(self, required_digest, tmp, extract = None):
167 if extract:
168 extracted = os.path.join(tmp, extract)
169 if not os.path.isdir(extracted):
170 raise Exception('Directory %s not found in archive' % extract)
171 else:
172 extracted = tmp
174 sha1 = 'sha1=' + manifest.add_manifest_file(extracted, sha.new()).hexdigest()
175 if sha1 != required_digest:
176 raise BadDigest('Incorrect manifest -- archive is corrupted.\n'
177 'Required digest: %s\n'
178 'Actual digest: %s\n' %
179 (required_digest, sha1))
181 final_name = os.path.join(self.dir, required_digest)
182 if os.path.isdir(final_name):
183 raise Exception("Item %s already stored." % final_name)
184 if extract:
185 os.rename(os.path.join(tmp, extract), final_name)
186 os.rmdir(tmp)
187 else:
188 os.rename(tmp, final_name)
190 def extract(self, stream, command):
191 """Create a temporary directory and execvp('command') inside it in a child process.
192 on error, delete the temporary directory and raise an exception.
193 On success, returns the path of the temporary directory."""
194 if not os.path.isdir(self.dir):
195 os.makedirs(self.dir)
196 tmp = mkdtemp(dir = self.dir, prefix = 'tmp-')
197 try:
198 child = os.fork()
199 if child == 0:
200 try:
201 try:
202 os.chdir(tmp)
203 stream.seek(0)
204 os.dup2(stream.fileno(), 0)
205 os.execvp(command[0], command)
206 except:
207 traceback.print_exc()
208 finally:
209 os._exit(1)
210 id, status = os.waitpid(child, 0)
211 assert id == child
212 if status != 0:
213 raise Exception('Failed to extract archive; exit code %d' % status)
214 except:
215 shutil.rmtree(tmp)
216 raise
217 return tmp
219 class Stores(object):
220 __slots__ = ['stores']
222 def __init__(self):
223 user_store = os.path.join(basedir.xdg_cache_home, '0install.net', 'implementations')
224 self.stores = [Store(user_store)]
226 impl_dirs = basedir.load_first_config('0install.net', 'injector',
227 'implementation-dirs')
228 debug("Location of 'implementation-dirs' config file being used: '%s'", impl_dirs)
229 if impl_dirs:
230 dirs = file(impl_dirs)
231 else:
232 dirs = ['/var/cache/0install.net/implementations']
233 for directory in dirs:
234 directory = directory.strip()
235 if directory and not directory.startswith('#'):
236 if os.path.isdir(directory):
237 self.stores.append(Store(directory))
238 debug("Added system store '%s'", directory)
239 else:
240 info("Ignoring non-directory store '%s'", directory)
242 def lookup(self, digest):
243 """Search for digest in all stores."""
244 assert digest
245 if '/' in digest or '=' not in digest:
246 raise BadDigest('Syntax error in digest (use ALG=VALUE)')
247 for store in self.stores:
248 path = store.lookup(digest)
249 if path:
250 return path
251 raise NotStored("Item with digest '%s' not found in stores. Searched:\n- %s" %
252 (digest, '\n- '.join([s.dir for s in self.stores])))
254 def add_dir_to_cache(self, required_digest, dir):
255 self.stores[0].add_dir_to_cache(required_digest, dir)
257 def add_archive_to_cache(self, required_digest, data, url, extract = None):
258 self.stores[0].add_archive_to_cache(required_digest, data, url, extract)