Some updates for Python 3
[zeroinstall.git] / zeroinstall / zerostore / unpack.py
bloba7098b48634a70f5066317b202feae9638a1f938
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall import _
7 import os, subprocess
8 import shutil
9 import glob
10 import traceback
11 from tempfile import mkdtemp, mkstemp
12 import re
13 from logging import debug, warn
14 import errno
15 from zeroinstall import SafeException
16 from zeroinstall.support import find_in_path, ro_rmtree
18 _cpio_version = None
19 def _get_cpio_version():
20 global _cpio_version
21 if _cpio_version is None:
22 _cpio_version = os.popen('cpio --version 2>&1').next()
23 debug(_("cpio version = %s"), _cpio_version)
24 return _cpio_version
26 def _gnu_cpio():
27 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
28 debug(_("Is GNU cpio = %s"), gnu_cpio)
29 return gnu_cpio
31 _tar_version = None
32 def _get_tar_version():
33 global _tar_version
34 if _tar_version is None:
35 _tar_version = os.popen('tar --version 2>&1').next().strip()
36 debug(_("tar version = %s"), _tar_version)
37 return _tar_version
39 def _gnu_tar():
40 gnu_tar = '(GNU tar)' in _get_tar_version()
41 debug(_("Is GNU tar = %s"), gnu_tar)
42 return gnu_tar
44 def recent_gnu_tar():
45 """@deprecated: should be private"""
46 recent_gnu_tar = False
47 if _gnu_tar():
48 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
49 if version:
50 version = map(int, version.group(1).split('.'))
51 recent_gnu_tar = version > [1, 13, 92]
52 else:
53 warn(_("Failed to extract GNU tar version number"))
54 debug(_("Recent GNU tar = %s"), recent_gnu_tar)
55 return recent_gnu_tar
57 # Disabled, as Plash does not currently support fchmod(2).
58 _pola_run = None
59 #_pola_run = find_in_path('pola-run')
60 #if _pola_run:
61 # info('Found pola-run: %s', _pola_run)
62 #else:
63 # info('pola-run not found; archive extraction will not be sandboxed')
65 def type_from_url(url):
66 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
67 url = url.lower()
68 if url.endswith('.rpm'): return 'application/x-rpm'
69 if url.endswith('.deb'): return 'application/x-deb'
70 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
71 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
72 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar'
73 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar'
74 if url.endswith('.tbz'): return 'application/x-bzip-compressed-tar'
75 if url.endswith('.tgz'): return 'application/x-compressed-tar'
76 if url.endswith('.tlz'): return 'application/x-lzma-compressed-tar'
77 if url.endswith('.txz'): return 'application/x-xz-compressed-tar'
78 if url.endswith('.tar'): return 'application/x-tar'
79 if url.endswith('.zip'): return 'application/zip'
80 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
81 if url.endswith('.dmg'): return 'application/x-apple-diskimage'
82 if url.endswith('.gem'): return 'application/x-ruby-gem'
83 return None
85 def check_type_ok(mime_type):
86 """Check we have the needed software to extract from an archive of the given type.
87 @raise SafeException: if the needed software is not available"""
88 assert mime_type
89 if mime_type == 'application/x-rpm':
90 if not find_in_path('rpm2cpio'):
91 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command "
92 "I need to extract it. Install the 'rpm' package first (this works even if "
93 "you're on a non-RPM-based distribution such as Debian)."))
94 elif mime_type == 'application/x-deb':
95 if not find_in_path('ar'):
96 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command "
97 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
98 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."))
99 elif mime_type == 'application/x-bzip-compressed-tar':
100 pass # We'll fall back to Python's built-in tar.bz2 support
101 elif mime_type == 'application/zip':
102 if not find_in_path('unzip'):
103 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
104 "I need to extract it. Install the package containing it first."))
105 elif mime_type == 'application/vnd.ms-cab-compressed':
106 if not find_in_path('cabextract'):
107 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
108 "I need to extract it. Install the package containing it first."))
109 elif mime_type == 'application/x-apple-diskimage':
110 if not find_in_path('hdiutil'):
111 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command "
112 "I need to extract it."))
113 elif mime_type == 'application/x-lzma-compressed-tar':
114 pass # We can get it through Zero Install
115 elif mime_type == 'application/x-xz-compressed-tar':
116 if not find_in_path('unxz'):
117 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command "
118 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') "
119 "first."))
120 elif mime_type in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'):
121 pass
122 else:
123 from zeroinstall import version
124 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
126 def _exec_maybe_sandboxed(writable, prog, *args):
127 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
128 If no sandbox is available, run without a sandbox."""
129 prog_path = find_in_path(prog)
130 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog)
131 if _pola_run is None:
132 os.execlp(prog_path, prog_path, *args)
133 # We have pola-shell :-)
134 pola_args = ['--prog', prog_path, '-f', '/']
135 for a in args:
136 pola_args += ['-a', a]
137 if writable:
138 pola_args += ['-fw', writable]
139 os.execl(_pola_run, _pola_run, *pola_args)
141 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
142 """Like unpack_archive, except that we unpack to a temporary directory first and
143 then move things over, checking that we're not following symlinks at each stage.
144 Use this when you want to unpack an unarchive into a directory which already has
145 stuff in it.
146 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive).
147 @since: 0.28"""
148 import stat
149 tmpdir = mkdtemp(dir = destdir)
150 assert extract is None or os.sep not in extract, extract
151 try:
152 mtimes = []
154 unpack_archive(url, data, tmpdir, extract, type, start_offset)
156 if extract is None:
157 srcdir = tmpdir
158 else:
159 srcdir = os.path.join(tmpdir, extract)
160 assert not os.path.islink(srcdir)
162 stem_len = len(srcdir)
163 for root, dirs, files in os.walk(srcdir):
164 relative_root = root[stem_len + 1:] or '.'
165 target_root = os.path.join(destdir, relative_root)
166 try:
167 info = os.lstat(target_root)
168 except OSError as ex:
169 if ex.errno != errno.ENOENT:
170 raise # Some odd error.
171 # Doesn't exist. OK.
172 os.mkdir(target_root)
173 else:
174 if stat.S_ISLNK(info.st_mode):
175 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root)
176 elif not stat.S_ISDIR(info.st_mode):
177 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root)
178 mtimes.append((relative_root, os.lstat(os.path.join(srcdir, root)).st_mtime))
180 for s in dirs: # Symlinks are counted as directories
181 src = os.path.join(srcdir, relative_root, s)
182 if os.path.islink(src):
183 files.append(s)
185 for f in files:
186 src = os.path.join(srcdir, relative_root, f)
187 dest = os.path.join(destdir, relative_root, f)
188 if os.path.islink(dest):
189 raise SafeException(_('Attempt to unpack file over symlink "%s"!') %
190 os.path.join(relative_root, f))
191 os.rename(src, dest)
193 for path, mtime in mtimes[1:]:
194 os.utime(os.path.join(destdir, path), (mtime, mtime))
195 finally:
196 ro_rmtree(tmpdir)
198 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
199 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
200 that sub-directory from the archive (i.e. destdir/extract will exist afterwards).
201 Works out the format from the name."""
202 if type is None: type = type_from_url(url)
203 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url)
204 if type == 'application/x-bzip-compressed-tar':
205 extract_tar(data, destdir, extract, 'bzip2', start_offset)
206 elif type == 'application/x-deb':
207 extract_deb(data, destdir, extract, start_offset)
208 elif type == 'application/x-rpm':
209 extract_rpm(data, destdir, extract, start_offset)
210 elif type == 'application/zip':
211 extract_zip(data, destdir, extract, start_offset)
212 elif type == 'application/x-tar':
213 extract_tar(data, destdir, extract, None, start_offset)
214 elif type == 'application/x-lzma-compressed-tar':
215 extract_tar(data, destdir, extract, 'lzma', start_offset)
216 elif type == 'application/x-xz-compressed-tar':
217 extract_tar(data, destdir, extract, 'xz', start_offset)
218 elif type == 'application/x-compressed-tar':
219 extract_tar(data, destdir, extract, 'gzip', start_offset)
220 elif type == 'application/vnd.ms-cab-compressed':
221 extract_cab(data, destdir, extract, start_offset)
222 elif type == 'application/x-apple-diskimage':
223 extract_dmg(data, destdir, extract, start_offset)
224 elif type == 'application/x-ruby-gem':
225 extract_gem(data, destdir, extract, start_offset)
226 else:
227 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
229 def extract_deb(stream, destdir, extract = None, start_offset = 0):
230 if extract:
231 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs'))
233 stream.seek(start_offset)
234 # ar can't read from stdin, so make a copy...
235 deb_copy_name = os.path.join(destdir, 'archive.deb')
236 deb_copy = open(deb_copy_name, 'w')
237 shutil.copyfileobj(stream, deb_copy)
238 deb_copy.close()
240 data_tar = None
241 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True)
242 o = p.communicate()[0]
243 for line in o.split('\n'):
244 if line == 'data.tar':
245 data_compression = None
246 elif line == 'data.tar.gz':
247 data_compression = 'gzip'
248 elif line == 'data.tar.bz2':
249 data_compression = 'bzip2'
250 elif line == 'data.tar.lzma':
251 data_compression = 'lzma'
252 else:
253 continue
254 data_tar = line
255 break
256 else:
257 raise SafeException(_("File is not a Debian package."))
259 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar))
260 os.unlink(deb_copy_name)
261 data_name = os.path.join(destdir, data_tar)
262 data_stream = open(data_name)
263 os.unlink(data_name)
264 extract_tar(data_stream, destdir, None, data_compression)
266 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
267 if extract:
268 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs'))
269 fd, cpiopath = mkstemp('-rpm-tmp')
270 try:
271 child = os.fork()
272 if child == 0:
273 try:
274 try:
275 os.dup2(stream.fileno(), 0)
276 os.lseek(0, start_offset, 0)
277 os.dup2(fd, 1)
278 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
279 except:
280 traceback.print_exc()
281 finally:
282 os._exit(1)
283 id, status = os.waitpid(child, 0)
284 assert id == child
285 if status != 0:
286 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status)
287 os.close(fd)
288 fd = None
290 args = ['cpio', '-mid']
291 if _gnu_cpio():
292 args.append('--quiet')
294 _extract(open(cpiopath), destdir, args)
295 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
296 # preserve directory mtimes.
297 for root, dirs, files in os.walk(destdir):
298 os.utime(root, (0, 0))
299 finally:
300 if fd is not None:
301 os.close(fd)
302 os.unlink(cpiopath)
304 def extract_gem(stream, destdir, extract = None, start_offset = 0):
305 "@since: 0.53"
306 stream.seek(start_offset)
307 payload = 'data.tar.gz'
308 payload_stream = None
309 tmpdir = mkdtemp(dir = destdir)
310 try:
311 extract_tar(stream, destdir=tmpdir, extract=payload, decompress=None)
312 payload_stream = open(os.path.join(tmpdir, payload))
313 extract_tar(payload_stream, destdir=destdir, extract=extract, decompress='gzip')
314 finally:
315 if payload_stream:
316 payload_stream.close()
317 ro_rmtree(tmpdir)
319 def extract_cab(stream, destdir, extract, start_offset = 0):
320 "@since: 0.24"
321 if extract:
322 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files'))
324 stream.seek(start_offset)
325 # cabextract can't read from stdin, so make a copy...
326 cab_copy_name = os.path.join(destdir, 'archive.cab')
327 cab_copy = open(cab_copy_name, 'w')
328 shutil.copyfileobj(stream, cab_copy)
329 cab_copy.close()
331 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
332 os.unlink(cab_copy_name)
334 def extract_dmg(stream, destdir, extract, start_offset = 0):
335 "@since: 0.46"
336 if extract:
337 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs'))
339 stream.seek(start_offset)
340 # hdiutil can't read from stdin, so make a copy...
341 dmg_copy_name = os.path.join(destdir, 'archive.dmg')
342 dmg_copy = open(dmg_copy_name, 'w')
343 shutil.copyfileobj(stream, dmg_copy)
344 dmg_copy.close()
346 mountpoint = mkdtemp(prefix='archive')
347 subprocess.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint, "-nobrowse", dmg_copy_name])
348 subprocess.check_call(["cp", "-pR"] + glob.glob("%s/*" % mountpoint) + [destdir])
349 subprocess.check_call(["hdiutil", "detach", "-quiet", mountpoint])
350 os.rmdir(mountpoint)
351 os.unlink(dmg_copy_name)
353 def extract_zip(stream, destdir, extract, start_offset = 0):
354 if extract:
355 # Limit the characters we accept, to avoid sending dodgy
356 # strings to zip
357 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
358 raise SafeException(_('Illegal character in extract attribute'))
360 stream.seek(start_offset)
361 # unzip can't read from stdin, so make a copy...
362 zip_copy_name = os.path.join(destdir, 'archive.zip')
363 zip_copy = open(zip_copy_name, 'w')
364 shutil.copyfileobj(stream, zip_copy)
365 zip_copy.close()
367 args = ['unzip', '-q', '-o', 'archive.zip']
369 if extract:
370 args.append(extract + '/*')
372 _extract(stream, destdir, args)
373 os.unlink(zip_copy_name)
375 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
376 if extract:
377 # Limit the characters we accept, to avoid sending dodgy
378 # strings to tar
379 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
380 raise SafeException(_('Illegal character in extract attribute'))
382 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz']
384 if _gnu_tar():
385 ext_cmd = ['tar']
386 if decompress:
387 if decompress == 'bzip2':
388 ext_cmd.append('--bzip2')
389 elif decompress == 'gzip':
390 ext_cmd.append('-z')
391 elif decompress == 'lzma':
392 unlzma = find_in_path('unlzma')
393 if not unlzma:
394 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
395 ext_cmd.append('--use-compress-program=' + unlzma)
396 elif decompress == 'xz':
397 unxz = find_in_path('unxz')
398 if not unxz:
399 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
400 ext_cmd.append('--use-compress-program=' + unxz)
402 if recent_gnu_tar():
403 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
404 else:
405 ext_cmd.extend(('xf', '-'))
407 if extract:
408 ext_cmd.append(extract)
410 _extract(stream, destdir, ext_cmd, start_offset)
411 else:
412 import tempfile
414 # Since we don't have GNU tar, use python's tarfile module. This will probably
415 # be a lot slower and we do not support lzma and xz; however, it is portable.
416 # (lzma and xz are handled by first uncompressing stream to a temporary file.
417 # this is simple to do, but less efficient than piping through the program)
418 if decompress is None:
419 rmode = 'r|'
420 elif decompress == 'bzip2':
421 rmode = 'r|bz2'
422 elif decompress == 'gzip':
423 rmode = 'r|gz'
424 elif decompress == 'lzma':
425 unlzma = find_in_path('unlzma')
426 if not unlzma:
427 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
428 temp = tempfile.NamedTemporaryFile(suffix='.tar')
429 subprocess.check_call((unlzma), stdin=stream, stdout=temp)
430 rmode = 'r|'
431 stream = temp
432 elif decompress == 'xz':
433 unxz = find_in_path('unxz')
434 if not unxz:
435 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
436 temp = tempfile.NamedTemporaryFile(suffix='.tar')
437 subprocess.check_call((unxz), stdin=stream, stdout=temp)
438 rmode = 'r|'
439 stream = temp
440 else:
441 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress)
443 import tarfile
445 stream.seek(start_offset)
446 # Python 2.5.1 crashes if name is None; see Python bug #1706850
447 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
449 current_umask = os.umask(0)
450 os.umask(current_umask)
452 uid = gid = None
453 try:
454 uid = os.geteuid()
455 gid = os.getegid()
456 except:
457 debug(_("Can't get uid/gid"))
459 def chmod_extract(tarinfo):
460 # If any X bit is set, they all must be
461 if tarinfo.mode & 0o111:
462 tarinfo.mode |= 0o111
464 # Everyone gets read and write (subject to the umask)
465 # No special bits are allowed.
466 tarinfo.mode = ((tarinfo.mode | 0o666) & ~current_umask) & 0o777
468 # Don't change owner, even if run as root
469 if uid:
470 tarinfo.uid = uid
471 if gid:
472 tarinfo.gid = gid
473 tar.extract(tarinfo, destdir)
475 extracted_anything = False
476 ext_dirs = []
478 for tarinfo in tar:
479 if extract is None or \
480 tarinfo.name.startswith(extract + '/') or \
481 tarinfo.name == extract:
482 if tarinfo.isdir():
483 ext_dirs.append(tarinfo)
485 chmod_extract(tarinfo)
486 extracted_anything = True
488 # Due to a bug in tarfile (python versions < 2.5), we have to manually
489 # set the mtime of each directory that we extract after extracting everything.
491 for tarinfo in ext_dirs:
492 dirname = os.path.join(destdir, tarinfo.name)
493 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
495 tar.close()
497 if extract and not extracted_anything:
498 raise SafeException(_('Unable to find specified file = %s in archive') % extract)
500 def _extract(stream, destdir, command, start_offset = 0):
501 """Run execvp('command') inside destdir in a child process, with
502 stream seeked to 'start_offset' as stdin."""
504 # Some zip archives are missing timezone information; force consistent results
505 child_env = os.environ.copy()
506 child_env['TZ'] = 'GMT'
508 stream.seek(start_offset)
510 # TODO: use pola-run if available, once it supports fchmod
511 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env)
513 unused, cerr = child.communicate()
515 status = child.wait()
516 if status != 0:
517 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})