More Python 3 support
[zeroinstall/solver.git] / zeroinstall / zerostore / unpack.py
blob14322c47c51246d8fdef560019839305fadd5247
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall import _
7 import os, subprocess
8 import shutil
9 import glob
10 import traceback
11 from tempfile import mkdtemp, mkstemp
12 import re
13 from logging import debug, warn
14 import errno
15 from zeroinstall import SafeException
16 from zeroinstall.support import find_in_path, ro_rmtree
18 _cpio_version = None
19 def _get_cpio_version():
20 global _cpio_version
21 if _cpio_version is None:
22 child = subprocess.Popen(['cpio', '--version'], stdout = subprocess.PIPE,
23 stderr = subprocess.STDOUT, universal_newlines = True)
24 out, unused = child.communicate()
25 child.stdout.close()
26 child.wait()
27 _cpio_version = out.split('\n', 1)[0]
28 debug(_("cpio version = %s"), _cpio_version)
29 return _cpio_version
31 def _gnu_cpio():
32 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
33 debug(_("Is GNU cpio = %s"), gnu_cpio)
34 return gnu_cpio
36 _tar_version = None
37 def _get_tar_version():
38 global _tar_version
39 if _tar_version is None:
40 child = subprocess.Popen(['tar', '--version'], stdout = subprocess.PIPE,
41 stderr = subprocess.STDOUT, universal_newlines = True)
42 out, unused = child.communicate()
43 child.stdout.close()
44 child.wait()
45 _tar_version = out.split('\n', 1)[0]
46 debug(_("tar version = %s"), _tar_version)
47 return _tar_version
49 def _gnu_tar():
50 gnu_tar = '(GNU tar)' in _get_tar_version()
51 debug(_("Is GNU tar = %s"), gnu_tar)
52 return gnu_tar
54 def recent_gnu_tar():
55 """@deprecated: should be private"""
56 recent_gnu_tar = False
57 if _gnu_tar():
58 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
59 if version:
60 version = list(map(int, version.group(1).split('.')))
61 recent_gnu_tar = version > [1, 13, 92]
62 else:
63 warn(_("Failed to extract GNU tar version number"))
64 debug(_("Recent GNU tar = %s"), recent_gnu_tar)
65 return recent_gnu_tar
67 # Disabled, as Plash does not currently support fchmod(2).
68 _pola_run = None
69 #_pola_run = find_in_path('pola-run')
70 #if _pola_run:
71 # info('Found pola-run: %s', _pola_run)
72 #else:
73 # info('pola-run not found; archive extraction will not be sandboxed')
75 def type_from_url(url):
76 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
77 url = url.lower()
78 if url.endswith('.rpm'): return 'application/x-rpm'
79 if url.endswith('.deb'): return 'application/x-deb'
80 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
81 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
82 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar'
83 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar'
84 if url.endswith('.tbz'): return 'application/x-bzip-compressed-tar'
85 if url.endswith('.tgz'): return 'application/x-compressed-tar'
86 if url.endswith('.tlz'): return 'application/x-lzma-compressed-tar'
87 if url.endswith('.txz'): return 'application/x-xz-compressed-tar'
88 if url.endswith('.tar'): return 'application/x-tar'
89 if url.endswith('.zip'): return 'application/zip'
90 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
91 if url.endswith('.dmg'): return 'application/x-apple-diskimage'
92 if url.endswith('.gem'): return 'application/x-ruby-gem'
93 return None
95 def check_type_ok(mime_type):
96 """Check we have the needed software to extract from an archive of the given type.
97 @raise SafeException: if the needed software is not available"""
98 assert mime_type
99 if mime_type == 'application/x-rpm':
100 if not find_in_path('rpm2cpio'):
101 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command "
102 "I need to extract it. Install the 'rpm' package first (this works even if "
103 "you're on a non-RPM-based distribution such as Debian)."))
104 elif mime_type == 'application/x-deb':
105 if not find_in_path('ar'):
106 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command "
107 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
108 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."))
109 elif mime_type == 'application/x-bzip-compressed-tar':
110 pass # We'll fall back to Python's built-in tar.bz2 support
111 elif mime_type == 'application/zip':
112 if not find_in_path('unzip'):
113 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
114 "I need to extract it. Install the package containing it first."))
115 elif mime_type == 'application/vnd.ms-cab-compressed':
116 if not find_in_path('cabextract'):
117 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
118 "I need to extract it. Install the package containing it first."))
119 elif mime_type == 'application/x-apple-diskimage':
120 if not find_in_path('hdiutil'):
121 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command "
122 "I need to extract it."))
123 elif mime_type == 'application/x-lzma-compressed-tar':
124 pass # We can get it through Zero Install
125 elif mime_type == 'application/x-xz-compressed-tar':
126 if not find_in_path('unxz'):
127 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command "
128 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') "
129 "first."))
130 elif mime_type in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'):
131 pass
132 else:
133 from zeroinstall import version
134 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
136 def _exec_maybe_sandboxed(writable, prog, *args):
137 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
138 If no sandbox is available, run without a sandbox."""
139 prog_path = find_in_path(prog)
140 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog)
141 if _pola_run is None:
142 os.execlp(prog_path, prog_path, *args)
143 # We have pola-shell :-)
144 pola_args = ['--prog', prog_path, '-f', '/']
145 for a in args:
146 pola_args += ['-a', a]
147 if writable:
148 pola_args += ['-fw', writable]
149 os.execl(_pola_run, _pola_run, *pola_args)
151 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
152 """Like unpack_archive, except that we unpack to a temporary directory first and
153 then move things over, checking that we're not following symlinks at each stage.
154 Use this when you want to unpack an unarchive into a directory which already has
155 stuff in it.
156 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive).
157 @since: 0.28"""
158 import stat
159 tmpdir = mkdtemp(dir = destdir)
160 assert extract is None or os.sep not in extract, extract
161 try:
162 mtimes = []
164 unpack_archive(url, data, tmpdir, extract, type, start_offset)
166 if extract is None:
167 srcdir = tmpdir
168 else:
169 srcdir = os.path.join(tmpdir, extract)
170 assert not os.path.islink(srcdir)
172 stem_len = len(srcdir)
173 for root, dirs, files in os.walk(srcdir):
174 relative_root = root[stem_len + 1:] or '.'
175 target_root = os.path.join(destdir, relative_root)
176 try:
177 info = os.lstat(target_root)
178 except OSError as ex:
179 if ex.errno != errno.ENOENT:
180 raise # Some odd error.
181 # Doesn't exist. OK.
182 os.mkdir(target_root)
183 else:
184 if stat.S_ISLNK(info.st_mode):
185 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root)
186 elif not stat.S_ISDIR(info.st_mode):
187 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root)
188 mtimes.append((relative_root, os.lstat(os.path.join(srcdir, root)).st_mtime))
190 for s in dirs: # Symlinks are counted as directories
191 src = os.path.join(srcdir, relative_root, s)
192 if os.path.islink(src):
193 files.append(s)
195 for f in files:
196 src = os.path.join(srcdir, relative_root, f)
197 dest = os.path.join(destdir, relative_root, f)
198 if os.path.islink(dest):
199 raise SafeException(_('Attempt to unpack file over symlink "%s"!') %
200 os.path.join(relative_root, f))
201 os.rename(src, dest)
203 for path, mtime in mtimes[1:]:
204 os.utime(os.path.join(destdir, path), (mtime, mtime))
205 finally:
206 ro_rmtree(tmpdir)
208 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
209 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
210 that sub-directory from the archive (i.e. destdir/extract will exist afterwards).
211 Works out the format from the name."""
212 if type is None: type = type_from_url(url)
213 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url)
214 if type == 'application/x-bzip-compressed-tar':
215 extract_tar(data, destdir, extract, 'bzip2', start_offset)
216 elif type == 'application/x-deb':
217 extract_deb(data, destdir, extract, start_offset)
218 elif type == 'application/x-rpm':
219 extract_rpm(data, destdir, extract, start_offset)
220 elif type == 'application/zip':
221 extract_zip(data, destdir, extract, start_offset)
222 elif type == 'application/x-tar':
223 extract_tar(data, destdir, extract, None, start_offset)
224 elif type == 'application/x-lzma-compressed-tar':
225 extract_tar(data, destdir, extract, 'lzma', start_offset)
226 elif type == 'application/x-xz-compressed-tar':
227 extract_tar(data, destdir, extract, 'xz', start_offset)
228 elif type == 'application/x-compressed-tar':
229 extract_tar(data, destdir, extract, 'gzip', start_offset)
230 elif type == 'application/vnd.ms-cab-compressed':
231 extract_cab(data, destdir, extract, start_offset)
232 elif type == 'application/x-apple-diskimage':
233 extract_dmg(data, destdir, extract, start_offset)
234 elif type == 'application/x-ruby-gem':
235 extract_gem(data, destdir, extract, start_offset)
236 else:
237 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
239 def extract_deb(stream, destdir, extract = None, start_offset = 0):
240 if extract:
241 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs'))
243 stream.seek(start_offset)
244 # ar can't read from stdin, so make a copy...
245 deb_copy_name = os.path.join(destdir, 'archive.deb')
246 with open(deb_copy_name, 'wb') as deb_copy:
247 shutil.copyfileobj(stream, deb_copy)
249 data_tar = None
250 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True)
251 o = p.communicate()[0]
252 for line in o.split('\n'):
253 if line == 'data.tar':
254 data_compression = None
255 elif line == 'data.tar.gz':
256 data_compression = 'gzip'
257 elif line == 'data.tar.bz2':
258 data_compression = 'bzip2'
259 elif line == 'data.tar.lzma':
260 data_compression = 'lzma'
261 else:
262 continue
263 data_tar = line
264 break
265 else:
266 raise SafeException(_("File is not a Debian package."))
268 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar))
269 os.unlink(deb_copy_name)
270 data_name = os.path.join(destdir, data_tar)
271 with open(data_name, 'rb') as data_stream:
272 os.unlink(data_name)
273 extract_tar(data_stream, destdir, None, data_compression)
275 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
276 if extract:
277 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs'))
278 fd, cpiopath = mkstemp('-rpm-tmp')
279 try:
280 child = os.fork()
281 if child == 0:
282 try:
283 try:
284 os.dup2(stream.fileno(), 0)
285 os.lseek(0, start_offset, 0)
286 os.dup2(fd, 1)
287 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
288 except:
289 traceback.print_exc()
290 finally:
291 os._exit(1)
292 id, status = os.waitpid(child, 0)
293 assert id == child
294 if status != 0:
295 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status)
296 os.close(fd)
297 fd = None
299 args = ['cpio', '-mid']
300 if _gnu_cpio():
301 args.append('--quiet')
303 with open(cpiopath, 'rb') as cpio_stream:
304 _extract(cpio_stream, destdir, args)
305 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
306 # preserve directory mtimes.
307 for root, dirs, files in os.walk(destdir):
308 os.utime(root, (0, 0))
309 finally:
310 if fd is not None:
311 os.close(fd)
312 os.unlink(cpiopath)
314 def extract_gem(stream, destdir, extract = None, start_offset = 0):
315 "@since: 0.53"
316 stream.seek(start_offset)
317 payload = 'data.tar.gz'
318 payload_stream = None
319 tmpdir = mkdtemp(dir = destdir)
320 try:
321 extract_tar(stream, destdir=tmpdir, extract=payload, decompress=None)
322 with open(os.path.join(tmpdir, payload), 'rb') as payload_stream:
323 extract_tar(payload_stream, destdir=destdir, extract=extract, decompress='gzip')
324 finally:
325 if payload_stream:
326 payload_stream.close()
327 ro_rmtree(tmpdir)
329 def extract_cab(stream, destdir, extract, start_offset = 0):
330 "@since: 0.24"
331 if extract:
332 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files'))
334 stream.seek(start_offset)
335 # cabextract can't read from stdin, so make a copy...
336 cab_copy_name = os.path.join(destdir, 'archive.cab')
337 cab_copy = open(cab_copy_name, 'wb')
338 shutil.copyfileobj(stream, cab_copy)
339 cab_copy.close()
341 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
342 os.unlink(cab_copy_name)
344 def extract_dmg(stream, destdir, extract, start_offset = 0):
345 "@since: 0.46"
346 if extract:
347 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs'))
349 stream.seek(start_offset)
350 # hdiutil can't read from stdin, so make a copy...
351 dmg_copy_name = os.path.join(destdir, 'archive.dmg')
352 dmg_copy = open(dmg_copy_name, 'wb')
353 shutil.copyfileobj(stream, dmg_copy)
354 dmg_copy.close()
356 mountpoint = mkdtemp(prefix='archive')
357 subprocess.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint, "-nobrowse", dmg_copy_name])
358 subprocess.check_call(["cp", "-pR"] + glob.glob("%s/*" % mountpoint) + [destdir])
359 subprocess.check_call(["hdiutil", "detach", "-quiet", mountpoint])
360 os.rmdir(mountpoint)
361 os.unlink(dmg_copy_name)
363 def extract_zip(stream, destdir, extract, start_offset = 0):
364 if extract:
365 # Limit the characters we accept, to avoid sending dodgy
366 # strings to zip
367 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
368 raise SafeException(_('Illegal character in extract attribute'))
370 stream.seek(start_offset)
371 # unzip can't read from stdin, so make a copy...
372 zip_copy_name = os.path.join(destdir, 'archive.zip')
373 with open(zip_copy_name, 'wb') as zip_copy:
374 shutil.copyfileobj(stream, zip_copy)
376 args = ['unzip', '-q', '-o', 'archive.zip']
378 if extract:
379 args.append(extract + '/*')
381 _extract(stream, destdir, args)
382 os.unlink(zip_copy_name)
384 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
385 if extract:
386 # Limit the characters we accept, to avoid sending dodgy
387 # strings to tar
388 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
389 raise SafeException(_('Illegal character in extract attribute'))
391 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz']
393 if _gnu_tar():
394 ext_cmd = ['tar']
395 if decompress:
396 if decompress == 'bzip2':
397 ext_cmd.append('--bzip2')
398 elif decompress == 'gzip':
399 ext_cmd.append('-z')
400 elif decompress == 'lzma':
401 unlzma = find_in_path('unlzma')
402 if not unlzma:
403 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
404 ext_cmd.append('--use-compress-program=' + unlzma)
405 elif decompress == 'xz':
406 unxz = find_in_path('unxz')
407 if not unxz:
408 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
409 ext_cmd.append('--use-compress-program=' + unxz)
411 if recent_gnu_tar():
412 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
413 else:
414 ext_cmd.extend(('xf', '-'))
416 if extract:
417 ext_cmd.append(extract)
419 _extract(stream, destdir, ext_cmd, start_offset)
420 else:
421 import tempfile
423 # Since we don't have GNU tar, use python's tarfile module. This will probably
424 # be a lot slower and we do not support lzma and xz; however, it is portable.
425 # (lzma and xz are handled by first uncompressing stream to a temporary file.
426 # this is simple to do, but less efficient than piping through the program)
427 if decompress is None:
428 rmode = 'r|'
429 elif decompress == 'bzip2':
430 rmode = 'r|bz2'
431 elif decompress == 'gzip':
432 rmode = 'r|gz'
433 elif decompress == 'lzma':
434 unlzma = find_in_path('unlzma')
435 if not unlzma:
436 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
437 temp = tempfile.NamedTemporaryFile(suffix='.tar', mode='w+b')
438 subprocess.check_call((unlzma), stdin=stream, stdout=temp)
439 rmode = 'r|'
440 stream = temp
441 elif decompress == 'xz':
442 unxz = find_in_path('unxz')
443 if not unxz:
444 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
445 temp = tempfile.NamedTemporaryFile(suffix='.tar', mode='w+b')
446 subprocess.check_call((unxz), stdin=stream, stdout=temp)
447 rmode = 'r|'
448 stream = temp
449 else:
450 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress)
452 import tarfile
454 stream.seek(start_offset)
455 # Python 2.5.1 crashes if name is None; see Python bug #1706850
456 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
458 current_umask = os.umask(0)
459 os.umask(current_umask)
461 uid = gid = None
462 try:
463 uid = os.geteuid()
464 gid = os.getegid()
465 except:
466 debug(_("Can't get uid/gid"))
468 def chmod_extract(tarinfo):
469 # If any X bit is set, they all must be
470 if tarinfo.mode & 0o111:
471 tarinfo.mode |= 0o111
473 # Everyone gets read and write (subject to the umask)
474 # No special bits are allowed.
475 tarinfo.mode = ((tarinfo.mode | 0o666) & ~current_umask) & 0o777
477 # Don't change owner, even if run as root
478 if uid:
479 tarinfo.uid = uid
480 if gid:
481 tarinfo.gid = gid
482 tar.extract(tarinfo, destdir)
484 extracted_anything = False
485 ext_dirs = []
487 for tarinfo in tar:
488 if extract is None or \
489 tarinfo.name.startswith(extract + '/') or \
490 tarinfo.name == extract:
491 if tarinfo.isdir():
492 ext_dirs.append(tarinfo)
494 chmod_extract(tarinfo)
495 extracted_anything = True
497 # Due to a bug in tarfile (python versions < 2.5), we have to manually
498 # set the mtime of each directory that we extract after extracting everything.
500 for tarinfo in ext_dirs:
501 dirname = os.path.join(destdir, tarinfo.name)
502 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
504 tar.close()
506 if extract and not extracted_anything:
507 raise SafeException(_('Unable to find specified file = %s in archive') % extract)
509 def _extract(stream, destdir, command, start_offset = 0):
510 """Run execvp('command') inside destdir in a child process, with
511 stream seeked to 'start_offset' as stdin."""
513 # Some zip archives are missing timezone information; force consistent results
514 child_env = os.environ.copy()
515 child_env['TZ'] = 'GMT'
517 stream.seek(start_offset)
519 # TODO: use pola-run if available, once it supports fchmod
520 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env)
522 unused, cerr = child.communicate()
524 status = child.wait()
525 if status != 0:
526 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})