Don't require a bzip2 executable when handling .tar.bz2 archives
[zeroinstall/solver.git] / zeroinstall / zerostore / unpack.py
blob0155aec73d18867b964b8b7553a8ac003f4dae33
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall import _
7 import os, subprocess
8 import shutil
9 import traceback
10 from tempfile import mkdtemp, mkstemp
11 import re
12 from logging import debug, warn
13 from zeroinstall import SafeException
14 from zeroinstall.support import find_in_path, ro_rmtree
16 _cpio_version = None
17 def _get_cpio_version():
18 global _cpio_version
19 if _cpio_version is None:
20 _cpio_version = os.popen('cpio --version 2>&1').next()
21 debug(_("cpio version = %s"), _cpio_version)
22 return _cpio_version
24 def _gnu_cpio():
25 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
26 debug(_("Is GNU cpio = %s"), gnu_cpio)
27 return gnu_cpio
29 _tar_version = None
30 def _get_tar_version():
31 global _tar_version
32 if _tar_version is None:
33 _tar_version = os.popen('tar --version 2>&1').next().strip()
34 debug(_("tar version = %s"), _tar_version)
35 return _tar_version
37 def _gnu_tar():
38 gnu_tar = '(GNU tar)' in _get_tar_version()
39 debug(_("Is GNU tar = %s"), gnu_tar)
40 return gnu_tar
42 def recent_gnu_tar():
43 """@deprecated: should be private"""
44 recent_gnu_tar = False
45 if _gnu_tar():
46 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
47 if version:
48 version = map(int, version.group(1).split('.'))
49 recent_gnu_tar = version > [1, 13, 92]
50 else:
51 warn(_("Failed to extract GNU tar version number"))
52 debug(_("Recent GNU tar = %s"), recent_gnu_tar)
53 return recent_gnu_tar
55 # Disabled, as Plash does not currently support fchmod(2).
56 _pola_run = None
57 #_pola_run = find_in_path('pola-run')
58 #if _pola_run:
59 # info('Found pola-run: %s', _pola_run)
60 #else:
61 # info('pola-run not found; archive extraction will not be sandboxed')
63 def type_from_url(url):
64 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
65 url = url.lower()
66 if url.endswith('.rpm'): return 'application/x-rpm'
67 if url.endswith('.deb'): return 'application/x-deb'
68 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
69 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
70 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar'
71 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar'
72 if url.endswith('.tgz'): return 'application/x-compressed-tar'
73 if url.endswith('.tar'): return 'application/x-tar'
74 if url.endswith('.zip'): return 'application/zip'
75 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
76 return None
78 def check_type_ok(mime_type):
79 """Check we have the needed software to extract from an archive of the given type.
80 @raise SafeException: if the needed software is not available"""
81 assert mime_type
82 if mime_type == 'application/x-rpm':
83 if not find_in_path('rpm2cpio'):
84 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command "
85 "I need to extract it. Install the 'rpm' package first (this works even if "
86 "you're on a non-RPM-based distribution such as Debian)."))
87 elif mime_type == 'application/x-deb':
88 if not find_in_path('ar'):
89 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command "
90 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
91 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."))
92 elif mime_type == 'application/x-bzip-compressed-tar':
93 pass # We'll fall back to Python's built-in tar.bz2 support
94 elif mime_type == 'application/zip':
95 if not find_in_path('unzip'):
96 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
97 "I need to extract it. Install the package containing it first."))
98 elif mime_type == 'application/vnd.ms-cab-compressed':
99 if not find_in_path('cabextract'):
100 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
101 "I need to extract it. Install the package containing it first."))
102 elif mime_type == 'application/x-lzma-compressed-tar':
103 pass # We can get it through Zero Install
104 elif mime_type == 'application/x-xz-compressed-tar':
105 if not find_in_path('unxz'):
106 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command "
107 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') "
108 "first."))
109 elif mime_type in ('application/x-compressed-tar', 'application/x-tar'):
110 pass
111 else:
112 from zeroinstall import version
113 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
115 def _exec_maybe_sandboxed(writable, prog, *args):
116 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
117 If no sandbox is available, run without a sandbox."""
118 prog_path = find_in_path(prog)
119 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog)
120 if _pola_run is None:
121 os.execlp(prog_path, prog_path, *args)
122 # We have pola-shell :-)
123 pola_args = ['--prog', prog_path, '-f', '/']
124 for a in args:
125 pola_args += ['-a', a]
126 if writable:
127 pola_args += ['-fw', writable]
128 os.execl(_pola_run, _pola_run, *pola_args)
130 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
131 """Like unpack_archive, except that we unpack to a temporary directory first and
132 then move things over, checking that we're not following symlinks at each stage.
133 Use this when you want to unpack an unarchive into a directory which already has
134 stuff in it.
135 @since: 0.28"""
136 import stat
137 tmpdir = mkdtemp(dir = destdir)
138 try:
139 mtimes = []
141 unpack_archive(url, data, tmpdir, extract, type, start_offset)
143 stem_len = len(tmpdir)
144 for root, dirs, files in os.walk(tmpdir):
145 relative_root = root[stem_len + 1:] or '.'
146 target_root = os.path.join(destdir, relative_root)
147 try:
148 info = os.lstat(target_root)
149 except OSError, ex:
150 if ex.errno != 2:
151 raise # Some odd error.
152 # Doesn't exist. OK.
153 os.mkdir(target_root)
154 else:
155 if stat.S_ISLNK(info.st_mode):
156 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root)
157 elif not stat.S_ISDIR(info.st_mode):
158 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root)
159 mtimes.append((relative_root, os.lstat(os.path.join(tmpdir, root)).st_mtime))
161 for s in dirs: # Symlinks are counted as directories
162 src = os.path.join(tmpdir, relative_root, s)
163 if os.path.islink(src):
164 files.append(s)
166 for f in files:
167 src = os.path.join(tmpdir, relative_root, f)
168 dest = os.path.join(destdir, relative_root, f)
169 if os.path.islink(dest):
170 raise SafeException(_('Attempt to unpack file over symlink "%s"!') %
171 os.path.join(relative_root, f))
172 os.rename(src, dest)
174 for path, mtime in mtimes[1:]:
175 os.utime(os.path.join(destdir, path), (mtime, mtime))
176 finally:
177 ro_rmtree(tmpdir)
179 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
180 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
181 that sub-directory from the archive (i.e. destdir/extract will exist afterwards).
182 Works out the format from the name."""
183 if type is None: type = type_from_url(url)
184 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url)
185 if type == 'application/x-bzip-compressed-tar':
186 extract_tar(data, destdir, extract, 'bzip2', start_offset)
187 elif type == 'application/x-deb':
188 extract_deb(data, destdir, extract, start_offset)
189 elif type == 'application/x-rpm':
190 extract_rpm(data, destdir, extract, start_offset)
191 elif type == 'application/zip':
192 extract_zip(data, destdir, extract, start_offset)
193 elif type == 'application/x-tar':
194 extract_tar(data, destdir, extract, None, start_offset)
195 elif type == 'application/x-lzma-compressed-tar':
196 extract_tar(data, destdir, extract, 'lzma', start_offset)
197 elif type == 'application/x-xz-compressed-tar':
198 extract_tar(data, destdir, extract, 'xz', start_offset)
199 elif type == 'application/x-compressed-tar':
200 extract_tar(data, destdir, extract, 'gzip', start_offset)
201 elif type == 'application/vnd.ms-cab-compressed':
202 extract_cab(data, destdir, extract, start_offset)
203 else:
204 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
206 def extract_deb(stream, destdir, extract = None, start_offset = 0):
207 if extract:
208 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs'))
210 stream.seek(start_offset)
211 # ar can't read from stdin, so make a copy...
212 deb_copy_name = os.path.join(destdir, 'archive.deb')
213 deb_copy = file(deb_copy_name, 'w')
214 shutil.copyfileobj(stream, deb_copy)
215 deb_copy.close()
217 data_tar = None
218 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True)
219 o = p.communicate()[0]
220 for line in o.split('\n'):
221 if line == 'data.tar':
222 data_compression = None
223 elif line == 'data.tar.gz':
224 data_compression = 'gzip'
225 elif line == 'data.tar.bz2':
226 data_compression = 'bzip2'
227 elif line == 'data.tar.lzma':
228 data_compression = 'lzma'
229 else:
230 continue
231 data_tar = line
232 break
233 else:
234 raise SafeException(_("File is not a Debian package."))
236 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar))
237 os.unlink(deb_copy_name)
238 data_name = os.path.join(destdir, data_tar)
239 data_stream = file(data_name)
240 os.unlink(data_name)
241 extract_tar(data_stream, destdir, None, data_compression)
243 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
244 if extract:
245 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs'))
246 fd, cpiopath = mkstemp('-rpm-tmp')
247 try:
248 child = os.fork()
249 if child == 0:
250 try:
251 try:
252 os.dup2(stream.fileno(), 0)
253 os.lseek(0, start_offset, 0)
254 os.dup2(fd, 1)
255 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
256 except:
257 traceback.print_exc()
258 finally:
259 os._exit(1)
260 id, status = os.waitpid(child, 0)
261 assert id == child
262 if status != 0:
263 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status)
264 os.close(fd)
265 fd = None
267 args = ['cpio', '-mid']
268 if _gnu_cpio():
269 args.append('--quiet')
271 _extract(file(cpiopath), destdir, args)
272 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
273 # preserve directory mtimes.
274 os.path.walk(destdir, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
275 finally:
276 if fd is not None:
277 os.close(fd)
278 os.unlink(cpiopath)
280 def extract_cab(stream, destdir, extract, start_offset = 0):
281 "@since: 0.24"
282 if extract:
283 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files'))
285 stream.seek(start_offset)
286 # cabextract can't read from stdin, so make a copy...
287 cab_copy_name = os.path.join(destdir, 'archive.cab')
288 cab_copy = file(cab_copy_name, 'w')
289 shutil.copyfileobj(stream, cab_copy)
290 cab_copy.close()
292 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
293 os.unlink(cab_copy_name)
295 def extract_zip(stream, destdir, extract, start_offset = 0):
296 if extract:
297 # Limit the characters we accept, to avoid sending dodgy
298 # strings to zip
299 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
300 raise SafeException(_('Illegal character in extract attribute'))
302 stream.seek(start_offset)
303 # unzip can't read from stdin, so make a copy...
304 zip_copy_name = os.path.join(destdir, 'archive.zip')
305 zip_copy = file(zip_copy_name, 'w')
306 shutil.copyfileobj(stream, zip_copy)
307 zip_copy.close()
309 args = ['unzip', '-q', '-o', 'archive.zip']
311 if extract:
312 args.append(extract + '/*')
314 _extract(stream, destdir, args)
315 os.unlink(zip_copy_name)
317 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
318 if extract:
319 # Limit the characters we accept, to avoid sending dodgy
320 # strings to tar
321 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
322 raise SafeException(_('Illegal character in extract attribute'))
324 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz']
326 if _gnu_tar():
327 ext_cmd = ['tar']
328 if decompress:
329 if decompress == 'bzip2':
330 ext_cmd.append('--bzip2')
331 elif decompress == 'gzip':
332 ext_cmd.append('-z')
333 elif decompress == 'lzma':
334 unlzma = find_in_path('unlzma')
335 if not unlzma:
336 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
337 ext_cmd.append('--use-compress-program=' + unlzma)
338 elif decompress == 'xz':
339 ext_cmd.append('--use-compress-program=unxz')
341 if recent_gnu_tar():
342 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
343 else:
344 ext_cmd.extend(('xf', '-'))
346 if extract:
347 ext_cmd.append(extract)
349 _extract(stream, destdir, ext_cmd, start_offset)
350 else:
351 # Since we don't have GNU tar, use python's tarfile module. This will probably
352 # be a lot slower and we do not support lzma and xz; however, it is portable.
353 if decompress is None:
354 rmode = 'r|'
355 elif decompress == 'bzip2':
356 rmode = 'r|bz2'
357 elif decompress == 'gzip':
358 rmode = 'r|gz'
359 else:
360 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress)
362 import tarfile
364 stream.seek(start_offset)
365 # Python 2.5.1 crashes if name is None; see Python bug #1706850
366 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
368 current_umask = os.umask(0)
369 os.umask(current_umask)
371 uid = gid = None
372 try:
373 uid = os.geteuid()
374 gid = os.getegid()
375 except:
376 debug(_("Can't get uid/gid"))
378 def chmod_extract(tarinfo):
379 # If any X bit is set, they all must be
380 if tarinfo.mode & 0111:
381 tarinfo.mode |= 0111
383 # Everyone gets read and write (subject to the umask)
384 # No special bits are allowed.
385 tarinfo.mode = ((tarinfo.mode | 0666) & ~current_umask) & 0777
387 # Don't change owner, even if run as root
388 if uid:
389 tarinfo.uid = uid
390 if gid:
391 tarinfo.gid = gid
392 tar.extract(tarinfo, destdir)
394 extracted_anything = False
395 ext_dirs = []
397 for tarinfo in tar:
398 if extract is None or \
399 tarinfo.name.startswith(extract + '/') or \
400 tarinfo.name == extract:
401 if tarinfo.isdir():
402 ext_dirs.append(tarinfo)
404 chmod_extract(tarinfo)
405 extracted_anything = True
407 # Due to a bug in tarfile (python versions < 2.5), we have to manually
408 # set the mtime of each directory that we extract after extracting everything.
410 for tarinfo in ext_dirs:
411 dirname = os.path.join(destdir, tarinfo.name)
412 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
414 tar.close()
416 if extract and not extracted_anything:
417 raise SafeException(_('Unable to find specified file = %s in archive') % extract)
419 def _extract(stream, destdir, command, start_offset = 0):
420 """Run execvp('command') inside destdir in a child process, with
421 stream seeked to 'start_offset' as stdin."""
423 # Some zip archives are missing timezone information; force consistent results
424 child_env = os.environ.copy()
425 child_env['TZ'] = 'GMT'
427 stream.seek(start_offset)
429 # TODO: use pola-run if available, once it supports fchmod
430 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env)
432 unused, cerr = child.communicate()
434 status = child.wait()
435 if status != 0:
436 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})