Cope with trailing text at the end of tar's version string.
[zeroinstall/zeroinstall-mseaborn.git] / zeroinstall / zerostore / unpack.py
bloba44cdc337a7854918a4c0b35e43cfe8fe9c09900
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2006, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 import os
7 import shutil
8 import traceback
9 from tempfile import mkdtemp, mkstemp
10 import sha
11 import re
12 from logging import debug, info, warn
13 from zeroinstall import SafeException
14 from zeroinstall.support import find_in_path, ro_rmtree
16 _cpio_version = None
17 def _get_cpio_version():
18 global _cpio_version
19 if _cpio_version is None:
20 _cpio_version = os.popen('cpio --version 2>&1').next()
21 debug("cpio version = %s", _cpio_version)
22 return _cpio_version
24 def _gnu_cpio():
25 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
26 debug("Is GNU cpio = %s", gnu_cpio)
27 return gnu_cpio
29 _tar_version = None
30 def _get_tar_version():
31 global _tar_version
32 if _tar_version is None:
33 _tar_version = os.popen('tar --version 2>&1').next().strip()
34 debug("tar version = %s", _tar_version)
35 return _tar_version
37 def _gnu_tar():
38 gnu_tar = '(GNU tar)' in _get_tar_version()
39 debug("Is GNU tar = %s", gnu_tar)
40 return gnu_tar
42 def recent_gnu_tar():
43 """@deprecated: should be private"""
44 recent_gnu_tar = False
45 if _gnu_tar():
46 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
47 if version:
48 version = map(int, version.group(1).split('.'))
49 recent_gnu_tar = version > [1, 13, 92]
50 else:
51 warn("Failed to extract GNU tar version number")
52 debug("Recent GNU tar = %s", recent_gnu_tar)
53 return recent_gnu_tar
55 _pola_run = find_in_path('pola-run')
56 if _pola_run:
57 info('Found pola-run: %s', _pola_run)
58 else:
59 info('pola-run not found; archive extraction will not be sandboxed')
61 def type_from_url(url):
62 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
63 url = url.lower()
64 if url.endswith('.rpm'): return 'application/x-rpm'
65 if url.endswith('.deb'): return 'application/x-deb'
66 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
67 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
68 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' # XXX: No registered MIME type!
69 if url.endswith('.tgz'): return 'application/x-compressed-tar'
70 if url.endswith('.tar'): return 'application/x-tar'
71 if url.endswith('.zip'): return 'application/zip'
72 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
73 return None
75 def check_type_ok(mime_type):
76 """Check we have the needed software to extract from an archive of the given type.
77 @raise SafeException: if the needed software is not available"""
78 assert mime_type
79 if mime_type == 'application/x-rpm':
80 if not find_in_path('rpm2cpio'):
81 raise SafeException("This package looks like an RPM, but you don't have the rpm2cpio command "
82 "I need to extract it. Install the 'rpm' package first (this works even if "
83 "you're on a non-RPM-based distribution such as Debian).")
84 elif mime_type == 'application/x-deb':
85 if not find_in_path('ar'):
86 raise SafeException("This package looks like a Debian package, but you don't have the 'ar' command "
87 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
88 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")
89 elif mime_type == 'application/x-bzip-compressed-tar':
90 if not find_in_path('bunzip2'):
91 raise SafeException("This package looks like a bzip2-compressed package, but you don't have the 'bunzip2' command "
92 "I need to extract it. Install the package containing it (it's probably called 'bzip2') "
93 "first.")
94 elif mime_type == 'application/zip':
95 if not find_in_path('unzip'):
96 raise SafeException("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
97 "I need to extract it. Install the package containing it first.")
98 elif mime_type == 'application/vnd.ms-cab-compressed':
99 if not find_in_path('cabextract'):
100 raise SafeException("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
101 "I need to extract it. Install the package containing it first.")
102 elif mime_type == 'application/x-lzma-compressed-tar':
103 if not find_in_path('unlzma'):
104 raise SafeException("This package looks like an LZMA archive, but you don't have the 'unlzma' command "
105 "I need to extract it. Install the package containing it (it's probably called 'lzma') first.")
106 elif mime_type in ('application/x-compressed-tar', 'application/x-tar'):
107 pass
108 else:
109 from zeroinstall import version
110 raise SafeException("Unsupported archive type '%s' (for injector version %s)" % (mime_type, version))
112 def _exec_maybe_sandboxed(writable, prog, *args):
113 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
114 If no sandbox is available, run without a sandbox."""
115 prog_path = find_in_path(prog)
116 if not prog_path: raise Exception("'%s' not found in $PATH" % prog)
117 if _pola_run is None:
118 os.execlp(prog_path, prog_path, *args)
119 # We have pola-shell :-)
120 pola_args = ['--prog', prog_path, '-f', '/']
121 for a in args:
122 pola_args += ['-a', a]
123 if writable:
124 pola_args += ['-fw', writable]
125 os.execl(_pola_run, _pola_run, *pola_args)
127 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
128 """Like unpack_archive, except that we unpack to a temporary directory first and
129 then move things over, checking that we're not following symlinks at each stage.
130 Use this when you want to unpack an unarchive into a directory which already has
131 stuff in it.
132 @since: 0.28"""
133 import stat
134 tmpdir = mkdtemp(dir = destdir)
135 try:
136 mtimes = []
138 unpack_archive(url, data, tmpdir, extract, type, start_offset)
140 stem_len = len(tmpdir)
141 for root, dirs, files in os.walk(tmpdir):
142 relative_root = root[stem_len + 1:] or '.'
143 target_root = os.path.join(destdir, relative_root)
144 try:
145 info = os.lstat(target_root)
146 except OSError, ex:
147 if ex.errno != 2:
148 raise # Some odd error.
149 # Doesn't exist. OK.
150 os.mkdir(target_root)
151 else:
152 if stat.S_ISLNK(info.st_mode):
153 raise SafeException('Attempt to unpack dir over symlink "%s"!' % relative_root)
154 elif not stat.S_ISDIR(info.st_mode):
155 raise SafeException('Attempt to unpack dir over non-directory "%s"!' % relative_root)
156 mtimes.append((relative_root, os.lstat(os.path.join(tmpdir, root)).st_mtime))
158 for s in dirs: # Symlinks are counted as directories
159 src = os.path.join(tmpdir, relative_root, s)
160 if os.path.islink(src):
161 files.append(s)
163 for f in files:
164 src = os.path.join(tmpdir, relative_root, f)
165 dest = os.path.join(destdir, relative_root, f)
166 if os.path.islink(dest):
167 raise SafeException('Attempt to unpack file over symlink "%s"!' %
168 os.path.join(relative_root, f))
169 os.rename(src, dest)
171 for path, mtime in mtimes[1:]:
172 os.utime(os.path.join(destdir, path), (mtime, mtime))
173 finally:
174 ro_rmtree(tmpdir)
176 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
177 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
178 that sub-directory from the archive. Works out the format from the name."""
179 if type is None: type = type_from_url(url)
180 if type is None: raise SafeException("Unknown extension (and no MIME type given) in '%s'" % url)
181 if type == 'application/x-bzip-compressed-tar':
182 extract_tar(data, destdir, extract, 'bzip2', start_offset)
183 elif type == 'application/x-deb':
184 extract_deb(data, destdir, extract, start_offset)
185 elif type == 'application/x-rpm':
186 extract_rpm(data, destdir, extract, start_offset)
187 elif type == 'application/zip':
188 extract_zip(data, destdir, extract, start_offset)
189 elif type == 'application/x-tar':
190 extract_tar(data, destdir, extract, None, start_offset)
191 elif type == 'application/x-lzma-compressed-tar':
192 extract_tar(data, destdir, extract, 'lzma', start_offset)
193 elif type == 'application/x-compressed-tar':
194 extract_tar(data, destdir, extract, 'gzip', start_offset)
195 elif type == 'application/vnd.ms-cab-compressed':
196 extract_cab(data, destdir, extract, start_offset)
197 else:
198 raise SafeException('Unknown MIME type "%s" for "%s"' % (type, url))
200 def extract_deb(stream, destdir, extract = None, start_offset = 0):
201 if extract:
202 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Debs')
204 stream.seek(start_offset)
205 # ar can't read from stdin, so make a copy...
206 deb_copy_name = os.path.join(destdir, 'archive.deb')
207 deb_copy = file(deb_copy_name, 'w')
208 shutil.copyfileobj(stream, deb_copy)
209 deb_copy.close()
210 _extract(stream, destdir, ('ar', 'x', 'archive.deb', 'data.tar.gz'))
211 os.unlink(deb_copy_name)
212 data_name = os.path.join(destdir, 'data.tar.gz')
213 data_stream = file(data_name)
214 os.unlink(data_name)
215 extract_tar(data_stream, destdir, None, 'gzip')
217 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
218 if extract:
219 raise SafeException('Sorry, but the "extract" attribute is not yet supported for RPMs')
220 fd, cpiopath = mkstemp('-rpm-tmp')
221 try:
222 child = os.fork()
223 if child == 0:
224 try:
225 try:
226 os.dup2(stream.fileno(), 0)
227 os.lseek(0, start_offset, 0)
228 os.dup2(fd, 1)
229 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
230 except:
231 traceback.print_exc()
232 finally:
233 os._exit(1)
234 id, status = os.waitpid(child, 0)
235 assert id == child
236 if status != 0:
237 raise SafeException("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status)
238 os.close(fd)
239 fd = None
241 args = ['cpio', '-mid']
242 if _gnu_cpio():
243 args.append('--quiet')
245 _extract(file(cpiopath), destdir, args)
246 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
247 # preserve directory mtimes.
248 os.path.walk(destdir, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
249 finally:
250 if fd is not None:
251 os.close(fd)
252 os.unlink(cpiopath)
254 def extract_cab(stream, destdir, extract, start_offset = 0):
255 "@since: 0.24"
256 if extract:
257 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Cabinet files')
259 stream.seek(start_offset)
260 # cabextract can't read from stdin, so make a copy...
261 cab_copy_name = os.path.join(destdir, 'archive.cab')
262 cab_copy = file(cab_copy_name, 'w')
263 shutil.copyfileobj(stream, cab_copy)
264 cab_copy.close()
266 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
267 os.unlink(cab_copy_name)
269 def extract_zip(stream, destdir, extract, start_offset = 0):
270 if extract:
271 # Limit the characters we accept, to avoid sending dodgy
272 # strings to zip
273 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
274 raise SafeException('Illegal character in extract attribute')
276 stream.seek(start_offset)
277 # unzip can't read from stdin, so make a copy...
278 zip_copy_name = os.path.join(destdir, 'archive.zip')
279 zip_copy = file(zip_copy_name, 'w')
280 shutil.copyfileobj(stream, zip_copy)
281 zip_copy.close()
283 args = ['unzip', '-q', '-o', 'archive.zip']
285 if extract:
286 args.append(extract + '/*')
288 _extract(stream, destdir, args)
289 os.unlink(zip_copy_name)
291 if extract:
292 # unzip uses extract just as a filter, so we still need to move things
293 extracted_dir = os.path.join(destdir, extract)
294 for x in os.listdir(extracted_dir):
295 os.rename(os.path.join(extracted_dir, x), os.path.join(destdir, x))
296 os.rmdir(extracted_dir)
298 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
299 if extract:
300 # Limit the characters we accept, to avoid sending dodgy
301 # strings to tar
302 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
303 raise SafeException('Illegal character in extract attribute')
305 assert decompress in [None, 'bzip2', 'gzip', 'lzma']
307 if _gnu_tar():
308 ext_cmd = ['tar']
309 if decompress:
310 if decompress == 'bzip2':
311 ext_cmd.append('--bzip2')
312 elif decompress == 'gzip':
313 ext_cmd.append('-z')
314 elif decompress == 'lzma':
315 ext_cmd.append('--use-compress-program=unlzma')
317 if recent_gnu_tar():
318 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
319 else:
320 ext_cmd.extend(('xf', '-'))
322 if extract:
323 ext_cmd.append(extract)
325 _extract(stream, destdir, ext_cmd, start_offset)
326 else:
327 # Since we don't have GNU tar, use python's tarfile module. This will probably
328 # be a lot slower and we do not support lzma; however, it is portable.
329 if decompress is None:
330 rmode = 'r|'
331 elif decompress == 'bzip2':
332 rmode = 'r|bz2'
333 elif decompress == 'gzip':
334 rmode = 'r|gz'
335 else:
336 raise SafeException('GNU tar unavailable; unsupported compression format: ' + decompress)
338 import tarfile
340 stream.seek(start_offset)
341 # Python 2.5.1 crashes if name is None; see Python bug #1706850
342 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
344 current_umask = os.umask(0)
345 os.umask(current_umask)
347 uid = gid = None
348 try:
349 uid = os.geteuid()
350 gid = os.getegid()
351 except:
352 debug("Can't get uid/gid")
354 def chmod_extract(tarinfo):
355 # If any X bit is set, they all must be
356 if tarinfo.mode & 0111:
357 tarinfo.mode |= 0111
359 # Everyone gets read and write (subject to the umask)
360 # No special bits are allowed.
361 tarinfo.mode = ((tarinfo.mode | 0666) & ~current_umask) & 0777
363 # Don't change owner, even if run as root
364 if uid:
365 tarinfo.uid = uid
366 if gid:
367 tarinfo.gid = gid
368 tar.extract(tarinfo, destdir)
370 extracted_anything = False
371 ext_dirs = []
373 for tarinfo in tar:
374 if extract is None or \
375 tarinfo.name.startswith(extract + '/') or \
376 tarinfo.name == extract:
377 if tarinfo.isdir():
378 ext_dirs.append(tarinfo)
380 chmod_extract(tarinfo)
381 extracted_anything = True
383 # Due to a bug in tarfile (python versions < 2.5), we have to manually
384 # set the mtime of each directory that we extract after extracting everything.
386 for tarinfo in ext_dirs:
387 dirname = os.path.join(destdir, tarinfo.name)
388 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
390 tar.close()
392 if extract and not extracted_anything:
393 raise SafeException('Unable to find specified file = %s in archive' % extract)
395 def _extract(stream, destdir, command, start_offset = 0):
396 """Run execvp('command') inside destdir in a child process, with
397 stream seeked to 'start_offset' as stdin."""
398 child = os.fork()
399 if child == 0:
400 try:
401 try:
402 # Some zip archives are missing timezone information; force consistent results
403 os.environ['TZ'] = 'GMT'
405 os.chdir(destdir)
406 stream.seek(start_offset)
407 os.dup2(stream.fileno(), 0)
408 _exec_maybe_sandboxed(destdir, *command)
409 except:
410 traceback.print_exc()
411 finally:
412 os._exit(1)
413 id, status = os.waitpid(child, 0)
414 assert id == child
415 if status != 0:
416 raise SafeException('Failed to extract archive; exit code %d' % status)