Log using the "0install" logger rather than "root"
[zeroinstall/solver.git] / zeroinstall / zerostore / unpack.py
blob96a2430fa4fd480c747e4a9bd7fc2759922dce01
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall import _, logger
7 import os, subprocess
8 import shutil
9 import glob
10 import traceback
11 from tempfile import mkdtemp, mkstemp
12 import re
13 import errno
14 from zeroinstall import SafeException
15 from zeroinstall.support import find_in_path, ro_rmtree
17 _cpio_version = None
18 def _get_cpio_version():
19 global _cpio_version
20 if _cpio_version is None:
21 child = subprocess.Popen(['cpio', '--version'], stdout = subprocess.PIPE,
22 stderr = subprocess.STDOUT, universal_newlines = True)
23 out, unused = child.communicate()
24 child.stdout.close()
25 child.wait()
26 _cpio_version = out.split('\n', 1)[0]
27 logger.debug(_("cpio version = %s"), _cpio_version)
28 return _cpio_version
30 def _gnu_cpio():
31 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
32 logger.debug(_("Is GNU cpio = %s"), gnu_cpio)
33 return gnu_cpio
35 _tar_version = None
36 def _get_tar_version():
37 global _tar_version
38 if _tar_version is None:
39 child = subprocess.Popen(['tar', '--version'], stdout = subprocess.PIPE,
40 stderr = subprocess.STDOUT, universal_newlines = True)
41 out, unused = child.communicate()
42 child.stdout.close()
43 child.wait()
44 _tar_version = out.split('\n', 1)[0]
45 logger.debug(_("tar version = %s"), _tar_version)
46 return _tar_version
48 def _gnu_tar():
49 gnu_tar = '(GNU tar)' in _get_tar_version()
50 logger.debug(_("Is GNU tar = %s"), gnu_tar)
51 return gnu_tar
53 def recent_gnu_tar():
54 """@deprecated: should be private"""
55 recent_gnu_tar = False
56 if _gnu_tar():
57 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
58 if version:
59 version = list(map(int, version.group(1).split('.')))
60 recent_gnu_tar = version > [1, 13, 92]
61 else:
62 logger.warn(_("Failed to extract GNU tar version number"))
63 logger.debug(_("Recent GNU tar = %s"), recent_gnu_tar)
64 return recent_gnu_tar
66 # Disabled, as Plash does not currently support fchmod(2).
67 _pola_run = None
68 #_pola_run = find_in_path('pola-run')
69 #if _pola_run:
70 # info('Found pola-run: %s', _pola_run)
71 #else:
72 # info('pola-run not found; archive extraction will not be sandboxed')
74 def type_from_url(url):
75 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
76 url = url.lower()
77 if url.endswith('.rpm'): return 'application/x-rpm'
78 if url.endswith('.deb'): return 'application/x-deb'
79 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
80 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
81 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar'
82 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar'
83 if url.endswith('.tbz'): return 'application/x-bzip-compressed-tar'
84 if url.endswith('.tgz'): return 'application/x-compressed-tar'
85 if url.endswith('.tlz'): return 'application/x-lzma-compressed-tar'
86 if url.endswith('.txz'): return 'application/x-xz-compressed-tar'
87 if url.endswith('.tar'): return 'application/x-tar'
88 if url.endswith('.zip'): return 'application/zip'
89 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
90 if url.endswith('.dmg'): return 'application/x-apple-diskimage'
91 if url.endswith('.gem'): return 'application/x-ruby-gem'
92 return None
94 def check_type_ok(mime_type):
95 """Check we have the needed software to extract from an archive of the given type.
96 @raise SafeException: if the needed software is not available"""
97 assert mime_type
98 if mime_type == 'application/x-rpm':
99 if not find_in_path('rpm2cpio'):
100 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command "
101 "I need to extract it. Install the 'rpm' package first (this works even if "
102 "you're on a non-RPM-based distribution such as Debian)."))
103 elif mime_type == 'application/x-deb':
104 if not find_in_path('ar'):
105 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command "
106 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
107 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."))
108 elif mime_type == 'application/x-bzip-compressed-tar':
109 pass # We'll fall back to Python's built-in tar.bz2 support
110 elif mime_type == 'application/zip':
111 if not find_in_path('unzip'):
112 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
113 "I need to extract it. Install the package containing it first."))
114 elif mime_type == 'application/vnd.ms-cab-compressed':
115 if not find_in_path('cabextract'):
116 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
117 "I need to extract it. Install the package containing it first."))
118 elif mime_type == 'application/x-apple-diskimage':
119 if not find_in_path('hdiutil'):
120 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command "
121 "I need to extract it."))
122 elif mime_type == 'application/x-lzma-compressed-tar':
123 pass # We can get it through Zero Install
124 elif mime_type == 'application/x-xz-compressed-tar':
125 if not find_in_path('unxz'):
126 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command "
127 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') "
128 "first."))
129 elif mime_type in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'):
130 pass
131 else:
132 from zeroinstall import version
133 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
135 def _exec_maybe_sandboxed(writable, prog, *args):
136 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
137 If no sandbox is available, run without a sandbox."""
138 prog_path = find_in_path(prog)
139 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog)
140 if _pola_run is None:
141 os.execlp(prog_path, prog_path, *args)
142 # We have pola-shell :-)
143 pola_args = ['--prog', prog_path, '-f', '/']
144 for a in args:
145 pola_args += ['-a', a]
146 if writable:
147 pola_args += ['-fw', writable]
148 os.execl(_pola_run, _pola_run, *pola_args)
150 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
151 """Like unpack_archive, except that we unpack to a temporary directory first and
152 then move things over, checking that we're not following symlinks at each stage.
153 Use this when you want to unpack an unarchive into a directory which already has
154 stuff in it.
155 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive).
156 @since: 0.28"""
157 import stat
158 tmpdir = mkdtemp(dir = destdir)
159 assert extract is None or os.sep not in extract, extract
160 try:
161 mtimes = []
163 unpack_archive(url, data, tmpdir, extract, type, start_offset)
165 if extract is None:
166 srcdir = tmpdir
167 else:
168 srcdir = os.path.join(tmpdir, extract)
169 assert not os.path.islink(srcdir)
171 stem_len = len(srcdir)
172 for root, dirs, files in os.walk(srcdir):
173 relative_root = root[stem_len + 1:] or '.'
174 target_root = os.path.join(destdir, relative_root)
175 try:
176 info = os.lstat(target_root)
177 except OSError as ex:
178 if ex.errno != errno.ENOENT:
179 raise # Some odd error.
180 # Doesn't exist. OK.
181 os.mkdir(target_root)
182 else:
183 if stat.S_ISLNK(info.st_mode):
184 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root)
185 elif not stat.S_ISDIR(info.st_mode):
186 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root)
187 mtimes.append((relative_root, os.lstat(os.path.join(srcdir, root)).st_mtime))
189 for s in dirs: # Symlinks are counted as directories
190 src = os.path.join(srcdir, relative_root, s)
191 if os.path.islink(src):
192 files.append(s)
194 for f in files:
195 src = os.path.join(srcdir, relative_root, f)
196 dest = os.path.join(destdir, relative_root, f)
197 if os.path.islink(dest):
198 raise SafeException(_('Attempt to unpack file over symlink "%s"!') %
199 os.path.join(relative_root, f))
200 os.rename(src, dest)
202 for path, mtime in mtimes[1:]:
203 os.utime(os.path.join(destdir, path), (mtime, mtime))
204 finally:
205 ro_rmtree(tmpdir)
207 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
208 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
209 that sub-directory from the archive (i.e. destdir/extract will exist afterwards).
210 Works out the format from the name."""
211 if type is None: type = type_from_url(url)
212 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url)
213 if type == 'application/x-bzip-compressed-tar':
214 extract_tar(data, destdir, extract, 'bzip2', start_offset)
215 elif type == 'application/x-deb':
216 extract_deb(data, destdir, extract, start_offset)
217 elif type == 'application/x-rpm':
218 extract_rpm(data, destdir, extract, start_offset)
219 elif type == 'application/zip':
220 extract_zip(data, destdir, extract, start_offset)
221 elif type == 'application/x-tar':
222 extract_tar(data, destdir, extract, None, start_offset)
223 elif type == 'application/x-lzma-compressed-tar':
224 extract_tar(data, destdir, extract, 'lzma', start_offset)
225 elif type == 'application/x-xz-compressed-tar':
226 extract_tar(data, destdir, extract, 'xz', start_offset)
227 elif type == 'application/x-compressed-tar':
228 extract_tar(data, destdir, extract, 'gzip', start_offset)
229 elif type == 'application/vnd.ms-cab-compressed':
230 extract_cab(data, destdir, extract, start_offset)
231 elif type == 'application/x-apple-diskimage':
232 extract_dmg(data, destdir, extract, start_offset)
233 elif type == 'application/x-ruby-gem':
234 extract_gem(data, destdir, extract, start_offset)
235 else:
236 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
238 def extract_deb(stream, destdir, extract = None, start_offset = 0):
239 if extract:
240 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs'))
242 stream.seek(start_offset)
243 # ar can't read from stdin, so make a copy...
244 deb_copy_name = os.path.join(destdir, 'archive.deb')
245 with open(deb_copy_name, 'wb') as deb_copy:
246 shutil.copyfileobj(stream, deb_copy)
248 data_tar = None
249 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True)
250 o = p.communicate()[0]
251 for line in o.split('\n'):
252 if line == 'data.tar':
253 data_compression = None
254 elif line == 'data.tar.gz':
255 data_compression = 'gzip'
256 elif line == 'data.tar.bz2':
257 data_compression = 'bzip2'
258 elif line == 'data.tar.lzma':
259 data_compression = 'lzma'
260 else:
261 continue
262 data_tar = line
263 break
264 else:
265 raise SafeException(_("File is not a Debian package."))
267 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar))
268 os.unlink(deb_copy_name)
269 data_name = os.path.join(destdir, data_tar)
270 with open(data_name, 'rb') as data_stream:
271 os.unlink(data_name)
272 extract_tar(data_stream, destdir, None, data_compression)
274 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
275 if extract:
276 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs'))
277 fd, cpiopath = mkstemp('-rpm-tmp')
278 try:
279 child = os.fork()
280 if child == 0:
281 try:
282 try:
283 os.dup2(stream.fileno(), 0)
284 os.lseek(0, start_offset, 0)
285 os.dup2(fd, 1)
286 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
287 except:
288 traceback.print_exc()
289 finally:
290 os._exit(1)
291 id, status = os.waitpid(child, 0)
292 assert id == child
293 if status != 0:
294 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status)
295 os.close(fd)
296 fd = None
298 args = ['cpio', '-mid']
299 if _gnu_cpio():
300 args.append('--quiet')
302 with open(cpiopath, 'rb') as cpio_stream:
303 _extract(cpio_stream, destdir, args)
304 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
305 # preserve directory mtimes.
306 for root, dirs, files in os.walk(destdir):
307 os.utime(root, (0, 0))
308 finally:
309 if fd is not None:
310 os.close(fd)
311 os.unlink(cpiopath)
313 def extract_gem(stream, destdir, extract = None, start_offset = 0):
314 "@since: 0.53"
315 stream.seek(start_offset)
316 payload = 'data.tar.gz'
317 payload_stream = None
318 tmpdir = mkdtemp(dir = destdir)
319 try:
320 extract_tar(stream, destdir=tmpdir, extract=payload, decompress=None)
321 with open(os.path.join(tmpdir, payload), 'rb') as payload_stream:
322 extract_tar(payload_stream, destdir=destdir, extract=extract, decompress='gzip')
323 finally:
324 if payload_stream:
325 payload_stream.close()
326 ro_rmtree(tmpdir)
328 def extract_cab(stream, destdir, extract, start_offset = 0):
329 "@since: 0.24"
330 if extract:
331 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files'))
333 stream.seek(start_offset)
334 # cabextract can't read from stdin, so make a copy...
335 cab_copy_name = os.path.join(destdir, 'archive.cab')
336 cab_copy = open(cab_copy_name, 'wb')
337 shutil.copyfileobj(stream, cab_copy)
338 cab_copy.close()
340 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
341 os.unlink(cab_copy_name)
343 def extract_dmg(stream, destdir, extract, start_offset = 0):
344 "@since: 0.46"
345 if extract:
346 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs'))
348 stream.seek(start_offset)
349 # hdiutil can't read from stdin, so make a copy...
350 dmg_copy_name = os.path.join(destdir, 'archive.dmg')
351 dmg_copy = open(dmg_copy_name, 'wb')
352 shutil.copyfileobj(stream, dmg_copy)
353 dmg_copy.close()
355 mountpoint = mkdtemp(prefix='archive')
356 subprocess.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint, "-nobrowse", dmg_copy_name])
357 subprocess.check_call(["cp", "-pR"] + glob.glob("%s/*" % mountpoint) + [destdir])
358 subprocess.check_call(["hdiutil", "detach", "-quiet", mountpoint])
359 os.rmdir(mountpoint)
360 os.unlink(dmg_copy_name)
362 def extract_zip(stream, destdir, extract, start_offset = 0):
363 if extract:
364 # Limit the characters we accept, to avoid sending dodgy
365 # strings to zip
366 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
367 raise SafeException(_('Illegal character in extract attribute'))
369 stream.seek(start_offset)
370 # unzip can't read from stdin, so make a copy...
371 zip_copy_name = os.path.join(destdir, 'archive.zip')
372 with open(zip_copy_name, 'wb') as zip_copy:
373 shutil.copyfileobj(stream, zip_copy)
375 args = ['unzip', '-q', '-o', 'archive.zip']
377 if extract:
378 args.append(extract + '/*')
380 _extract(stream, destdir, args)
381 os.unlink(zip_copy_name)
383 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
384 if extract:
385 # Limit the characters we accept, to avoid sending dodgy
386 # strings to tar
387 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
388 raise SafeException(_('Illegal character in extract attribute'))
390 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz']
392 if _gnu_tar():
393 ext_cmd = ['tar']
394 if decompress:
395 if decompress == 'bzip2':
396 ext_cmd.append('--bzip2')
397 elif decompress == 'gzip':
398 ext_cmd.append('-z')
399 elif decompress == 'lzma':
400 unlzma = find_in_path('unlzma')
401 if not unlzma:
402 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
403 ext_cmd.append('--use-compress-program=' + unlzma)
404 elif decompress == 'xz':
405 unxz = find_in_path('unxz')
406 if not unxz:
407 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
408 ext_cmd.append('--use-compress-program=' + unxz)
410 if recent_gnu_tar():
411 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
412 else:
413 ext_cmd.extend(('xf', '-'))
415 if extract:
416 ext_cmd.append(extract)
418 _extract(stream, destdir, ext_cmd, start_offset)
419 else:
420 import tempfile
422 # Since we don't have GNU tar, use python's tarfile module. This will probably
423 # be a lot slower and we do not support lzma and xz; however, it is portable.
424 # (lzma and xz are handled by first uncompressing stream to a temporary file.
425 # this is simple to do, but less efficient than piping through the program)
426 if decompress is None:
427 rmode = 'r|'
428 elif decompress == 'bzip2':
429 rmode = 'r|bz2'
430 elif decompress == 'gzip':
431 rmode = 'r|gz'
432 elif decompress == 'lzma':
433 unlzma = find_in_path('unlzma')
434 if not unlzma:
435 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
436 temp = tempfile.NamedTemporaryFile(suffix='.tar', mode='w+b')
437 subprocess.check_call((unlzma), stdin=stream, stdout=temp)
438 rmode = 'r|'
439 stream = temp
440 elif decompress == 'xz':
441 unxz = find_in_path('unxz')
442 if not unxz:
443 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
444 temp = tempfile.NamedTemporaryFile(suffix='.tar', mode='w+b')
445 subprocess.check_call((unxz), stdin=stream, stdout=temp)
446 rmode = 'r|'
447 stream = temp
448 else:
449 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress)
451 import tarfile
453 stream.seek(start_offset)
454 # Python 2.5.1 crashes if name is None; see Python bug #1706850
455 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
457 current_umask = os.umask(0)
458 os.umask(current_umask)
460 uid = gid = None
461 try:
462 uid = os.geteuid()
463 gid = os.getegid()
464 except:
465 logger.debug(_("Can't get uid/gid"))
467 def chmod_extract(tarinfo):
468 # If any X bit is set, they all must be
469 if tarinfo.mode & 0o111:
470 tarinfo.mode |= 0o111
472 # Everyone gets read and write (subject to the umask)
473 # No special bits are allowed.
474 tarinfo.mode = ((tarinfo.mode | 0o666) & ~current_umask) & 0o777
476 # Don't change owner, even if run as root
477 if uid:
478 tarinfo.uid = uid
479 if gid:
480 tarinfo.gid = gid
481 tar.extract(tarinfo, destdir)
483 extracted_anything = False
484 ext_dirs = []
486 for tarinfo in tar:
487 if extract is None or \
488 tarinfo.name.startswith(extract + '/') or \
489 tarinfo.name == extract:
490 if tarinfo.isdir():
491 ext_dirs.append(tarinfo)
493 chmod_extract(tarinfo)
494 extracted_anything = True
496 # Due to a bug in tarfile (python versions < 2.5), we have to manually
497 # set the mtime of each directory that we extract after extracting everything.
499 for tarinfo in ext_dirs:
500 dirname = os.path.join(destdir, tarinfo.name)
501 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
503 tar.close()
505 if extract and not extracted_anything:
506 raise SafeException(_('Unable to find specified file = %s in archive') % extract)
508 def _extract(stream, destdir, command, start_offset = 0):
509 """Run execvp('command') inside destdir in a child process, with
510 stream seeked to 'start_offset' as stdin."""
512 # Some zip archives are missing timezone information; force consistent results
513 child_env = os.environ.copy()
514 child_env['TZ'] = 'GMT'
516 stream.seek(start_offset)
518 # TODO: use pola-run if available, once it supports fchmod
519 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env)
521 unused, cerr = child.communicate()
523 status = child.wait()
524 if status != 0:
525 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})