Updated uses of deprecated file() function
[zeroinstall.git] / zeroinstall / zerostore / unpack.py
blobf2dddc3ae506975d8e6608124f1b2fcf3d7025e6
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall import _
7 import os, subprocess
8 import shutil
9 import glob
10 import traceback
11 from tempfile import mkdtemp, mkstemp
12 import re
13 from logging import debug, warn
14 import errno
15 from zeroinstall import SafeException
16 from zeroinstall.support import find_in_path, ro_rmtree
18 _cpio_version = None
19 def _get_cpio_version():
20 global _cpio_version
21 if _cpio_version is None:
22 _cpio_version = os.popen('cpio --version 2>&1').next()
23 debug(_("cpio version = %s"), _cpio_version)
24 return _cpio_version
26 def _gnu_cpio():
27 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
28 debug(_("Is GNU cpio = %s"), gnu_cpio)
29 return gnu_cpio
31 _tar_version = None
32 def _get_tar_version():
33 global _tar_version
34 if _tar_version is None:
35 _tar_version = os.popen('tar --version 2>&1').next().strip()
36 debug(_("tar version = %s"), _tar_version)
37 return _tar_version
39 def _gnu_tar():
40 gnu_tar = '(GNU tar)' in _get_tar_version()
41 debug(_("Is GNU tar = %s"), gnu_tar)
42 return gnu_tar
44 def recent_gnu_tar():
45 """@deprecated: should be private"""
46 recent_gnu_tar = False
47 if _gnu_tar():
48 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
49 if version:
50 version = map(int, version.group(1).split('.'))
51 recent_gnu_tar = version > [1, 13, 92]
52 else:
53 warn(_("Failed to extract GNU tar version number"))
54 debug(_("Recent GNU tar = %s"), recent_gnu_tar)
55 return recent_gnu_tar
57 # Disabled, as Plash does not currently support fchmod(2).
58 _pola_run = None
59 #_pola_run = find_in_path('pola-run')
60 #if _pola_run:
61 # info('Found pola-run: %s', _pola_run)
62 #else:
63 # info('pola-run not found; archive extraction will not be sandboxed')
65 def type_from_url(url):
66 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
67 url = url.lower()
68 if url.endswith('.rpm'): return 'application/x-rpm'
69 if url.endswith('.deb'): return 'application/x-deb'
70 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
71 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
72 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar'
73 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar'
74 if url.endswith('.tbz'): return 'application/x-bzip-compressed-tar'
75 if url.endswith('.tgz'): return 'application/x-compressed-tar'
76 if url.endswith('.tlz'): return 'application/x-lzma-compressed-tar'
77 if url.endswith('.txz'): return 'application/x-xz-compressed-tar'
78 if url.endswith('.tar'): return 'application/x-tar'
79 if url.endswith('.zip'): return 'application/zip'
80 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
81 if url.endswith('.dmg'): return 'application/x-apple-diskimage'
82 if url.endswith('.gem'): return 'application/x-ruby-gem'
83 return None
85 def check_type_ok(mime_type):
86 """Check we have the needed software to extract from an archive of the given type.
87 @raise SafeException: if the needed software is not available"""
88 assert mime_type
89 if mime_type == 'application/x-rpm':
90 if not find_in_path('rpm2cpio'):
91 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command "
92 "I need to extract it. Install the 'rpm' package first (this works even if "
93 "you're on a non-RPM-based distribution such as Debian)."))
94 elif mime_type == 'application/x-deb':
95 if not find_in_path('ar'):
96 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command "
97 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
98 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."))
99 elif mime_type == 'application/x-bzip-compressed-tar':
100 pass # We'll fall back to Python's built-in tar.bz2 support
101 elif mime_type == 'application/zip':
102 if not find_in_path('unzip'):
103 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
104 "I need to extract it. Install the package containing it first."))
105 elif mime_type == 'application/vnd.ms-cab-compressed':
106 if not find_in_path('cabextract'):
107 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
108 "I need to extract it. Install the package containing it first."))
109 elif mime_type == 'application/x-apple-diskimage':
110 if not find_in_path('hdiutil'):
111 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command "
112 "I need to extract it."))
113 elif mime_type == 'application/x-lzma-compressed-tar':
114 pass # We can get it through Zero Install
115 elif mime_type == 'application/x-xz-compressed-tar':
116 if not find_in_path('unxz'):
117 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command "
118 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') "
119 "first."))
120 elif mime_type in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'):
121 pass
122 else:
123 from zeroinstall import version
124 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
126 def _exec_maybe_sandboxed(writable, prog, *args):
127 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
128 If no sandbox is available, run without a sandbox."""
129 prog_path = find_in_path(prog)
130 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog)
131 if _pola_run is None:
132 os.execlp(prog_path, prog_path, *args)
133 # We have pola-shell :-)
134 pola_args = ['--prog', prog_path, '-f', '/']
135 for a in args:
136 pola_args += ['-a', a]
137 if writable:
138 pola_args += ['-fw', writable]
139 os.execl(_pola_run, _pola_run, *pola_args)
141 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
142 """Like unpack_archive, except that we unpack to a temporary directory first and
143 then move things over, checking that we're not following symlinks at each stage.
144 Use this when you want to unpack an unarchive into a directory which already has
145 stuff in it.
146 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive).
147 @since: 0.28"""
148 import stat
149 tmpdir = mkdtemp(dir = destdir)
150 assert extract is None or os.sep not in extract, extract
151 try:
152 mtimes = []
154 unpack_archive(url, data, tmpdir, extract, type, start_offset)
156 if extract is None:
157 srcdir = tmpdir
158 else:
159 srcdir = os.path.join(tmpdir, extract)
160 assert not os.path.islink(srcdir)
162 stem_len = len(srcdir)
163 for root, dirs, files in os.walk(srcdir):
164 relative_root = root[stem_len + 1:] or '.'
165 target_root = os.path.join(destdir, relative_root)
166 try:
167 info = os.lstat(target_root)
168 except OSError as ex:
169 if ex.errno != errno.ENOENT:
170 raise # Some odd error.
171 # Doesn't exist. OK.
172 os.mkdir(target_root)
173 else:
174 if stat.S_ISLNK(info.st_mode):
175 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root)
176 elif not stat.S_ISDIR(info.st_mode):
177 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root)
178 mtimes.append((relative_root, os.lstat(os.path.join(srcdir, root)).st_mtime))
180 for s in dirs: # Symlinks are counted as directories
181 src = os.path.join(srcdir, relative_root, s)
182 if os.path.islink(src):
183 files.append(s)
185 for f in files:
186 src = os.path.join(srcdir, relative_root, f)
187 dest = os.path.join(destdir, relative_root, f)
188 if os.path.islink(dest):
189 raise SafeException(_('Attempt to unpack file over symlink "%s"!') %
190 os.path.join(relative_root, f))
191 os.rename(src, dest)
193 for path, mtime in mtimes[1:]:
194 os.utime(os.path.join(destdir, path), (mtime, mtime))
195 finally:
196 ro_rmtree(tmpdir)
198 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
199 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
200 that sub-directory from the archive (i.e. destdir/extract will exist afterwards).
201 Works out the format from the name."""
202 if type is None: type = type_from_url(url)
203 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url)
204 if type == 'application/x-bzip-compressed-tar':
205 extract_tar(data, destdir, extract, 'bzip2', start_offset)
206 elif type == 'application/x-deb':
207 extract_deb(data, destdir, extract, start_offset)
208 elif type == 'application/x-rpm':
209 extract_rpm(data, destdir, extract, start_offset)
210 elif type == 'application/zip':
211 extract_zip(data, destdir, extract, start_offset)
212 elif type == 'application/x-tar':
213 extract_tar(data, destdir, extract, None, start_offset)
214 elif type == 'application/x-lzma-compressed-tar':
215 extract_tar(data, destdir, extract, 'lzma', start_offset)
216 elif type == 'application/x-xz-compressed-tar':
217 extract_tar(data, destdir, extract, 'xz', start_offset)
218 elif type == 'application/x-compressed-tar':
219 extract_tar(data, destdir, extract, 'gzip', start_offset)
220 elif type == 'application/vnd.ms-cab-compressed':
221 extract_cab(data, destdir, extract, start_offset)
222 elif type == 'application/x-apple-diskimage':
223 extract_dmg(data, destdir, extract, start_offset)
224 elif type == 'application/x-ruby-gem':
225 extract_gem(data, destdir, extract, start_offset)
226 else:
227 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
229 def extract_deb(stream, destdir, extract = None, start_offset = 0):
230 if extract:
231 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs'))
233 stream.seek(start_offset)
234 # ar can't read from stdin, so make a copy...
235 deb_copy_name = os.path.join(destdir, 'archive.deb')
236 deb_copy = open(deb_copy_name, 'w')
237 shutil.copyfileobj(stream, deb_copy)
238 deb_copy.close()
240 data_tar = None
241 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True)
242 o = p.communicate()[0]
243 for line in o.split('\n'):
244 if line == 'data.tar':
245 data_compression = None
246 elif line == 'data.tar.gz':
247 data_compression = 'gzip'
248 elif line == 'data.tar.bz2':
249 data_compression = 'bzip2'
250 elif line == 'data.tar.lzma':
251 data_compression = 'lzma'
252 else:
253 continue
254 data_tar = line
255 break
256 else:
257 raise SafeException(_("File is not a Debian package."))
259 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar))
260 os.unlink(deb_copy_name)
261 data_name = os.path.join(destdir, data_tar)
262 data_stream = open(data_name)
263 os.unlink(data_name)
264 extract_tar(data_stream, destdir, None, data_compression)
266 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
267 if extract:
268 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs'))
269 fd, cpiopath = mkstemp('-rpm-tmp')
270 try:
271 child = os.fork()
272 if child == 0:
273 try:
274 try:
275 os.dup2(stream.fileno(), 0)
276 os.lseek(0, start_offset, 0)
277 os.dup2(fd, 1)
278 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
279 except:
280 traceback.print_exc()
281 finally:
282 os._exit(1)
283 id, status = os.waitpid(child, 0)
284 assert id == child
285 if status != 0:
286 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status)
287 os.close(fd)
288 fd = None
290 args = ['cpio', '-mid']
291 if _gnu_cpio():
292 args.append('--quiet')
294 _extract(open(cpiopath), destdir, args)
295 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
296 # preserve directory mtimes.
297 os.path.walk(destdir, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
298 finally:
299 if fd is not None:
300 os.close(fd)
301 os.unlink(cpiopath)
303 def extract_gem(stream, destdir, extract = None, start_offset = 0):
304 "@since: 0.53"
305 stream.seek(start_offset)
306 payload = 'data.tar.gz'
307 payload_stream = None
308 tmpdir = mkdtemp(dir = destdir)
309 try:
310 extract_tar(stream, destdir=tmpdir, extract=payload, decompress=None)
311 payload_stream = open(os.path.join(tmpdir, payload))
312 extract_tar(payload_stream, destdir=destdir, extract=extract, decompress='gzip')
313 finally:
314 if payload_stream:
315 payload_stream.close()
316 ro_rmtree(tmpdir)
318 def extract_cab(stream, destdir, extract, start_offset = 0):
319 "@since: 0.24"
320 if extract:
321 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files'))
323 stream.seek(start_offset)
324 # cabextract can't read from stdin, so make a copy...
325 cab_copy_name = os.path.join(destdir, 'archive.cab')
326 cab_copy = open(cab_copy_name, 'w')
327 shutil.copyfileobj(stream, cab_copy)
328 cab_copy.close()
330 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
331 os.unlink(cab_copy_name)
333 def extract_dmg(stream, destdir, extract, start_offset = 0):
334 "@since: 0.46"
335 if extract:
336 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs'))
338 stream.seek(start_offset)
339 # hdiutil can't read from stdin, so make a copy...
340 dmg_copy_name = os.path.join(destdir, 'archive.dmg')
341 dmg_copy = open(dmg_copy_name, 'w')
342 shutil.copyfileobj(stream, dmg_copy)
343 dmg_copy.close()
345 mountpoint = mkdtemp(prefix='archive')
346 subprocess.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint, "-nobrowse", dmg_copy_name])
347 subprocess.check_call(["cp", "-pR"] + glob.glob("%s/*" % mountpoint) + [destdir])
348 subprocess.check_call(["hdiutil", "detach", "-quiet", mountpoint])
349 os.rmdir(mountpoint)
350 os.unlink(dmg_copy_name)
352 def extract_zip(stream, destdir, extract, start_offset = 0):
353 if extract:
354 # Limit the characters we accept, to avoid sending dodgy
355 # strings to zip
356 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
357 raise SafeException(_('Illegal character in extract attribute'))
359 stream.seek(start_offset)
360 # unzip can't read from stdin, so make a copy...
361 zip_copy_name = os.path.join(destdir, 'archive.zip')
362 zip_copy = open(zip_copy_name, 'w')
363 shutil.copyfileobj(stream, zip_copy)
364 zip_copy.close()
366 args = ['unzip', '-q', '-o', 'archive.zip']
368 if extract:
369 args.append(extract + '/*')
371 _extract(stream, destdir, args)
372 os.unlink(zip_copy_name)
374 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
375 if extract:
376 # Limit the characters we accept, to avoid sending dodgy
377 # strings to tar
378 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
379 raise SafeException(_('Illegal character in extract attribute'))
381 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz']
383 if _gnu_tar():
384 ext_cmd = ['tar']
385 if decompress:
386 if decompress == 'bzip2':
387 ext_cmd.append('--bzip2')
388 elif decompress == 'gzip':
389 ext_cmd.append('-z')
390 elif decompress == 'lzma':
391 unlzma = find_in_path('unlzma')
392 if not unlzma:
393 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
394 ext_cmd.append('--use-compress-program=' + unlzma)
395 elif decompress == 'xz':
396 unxz = find_in_path('unxz')
397 if not unxz:
398 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
399 ext_cmd.append('--use-compress-program=' + unxz)
401 if recent_gnu_tar():
402 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
403 else:
404 ext_cmd.extend(('xf', '-'))
406 if extract:
407 ext_cmd.append(extract)
409 _extract(stream, destdir, ext_cmd, start_offset)
410 else:
411 import tempfile
413 # Since we don't have GNU tar, use python's tarfile module. This will probably
414 # be a lot slower and we do not support lzma and xz; however, it is portable.
415 # (lzma and xz are handled by first uncompressing stream to a temporary file.
416 # this is simple to do, but less efficient than piping through the program)
417 if decompress is None:
418 rmode = 'r|'
419 elif decompress == 'bzip2':
420 rmode = 'r|bz2'
421 elif decompress == 'gzip':
422 rmode = 'r|gz'
423 elif decompress == 'lzma':
424 unlzma = find_in_path('unlzma')
425 if not unlzma:
426 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
427 temp = tempfile.NamedTemporaryFile(suffix='.tar')
428 subprocess.check_call((unlzma), stdin=stream, stdout=temp)
429 rmode = 'r|'
430 stream = temp
431 elif decompress == 'xz':
432 unxz = find_in_path('unxz')
433 if not unxz:
434 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz'))
435 temp = tempfile.NamedTemporaryFile(suffix='.tar')
436 subprocess.check_call((unxz), stdin=stream, stdout=temp)
437 rmode = 'r|'
438 stream = temp
439 else:
440 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress)
442 import tarfile
444 stream.seek(start_offset)
445 # Python 2.5.1 crashes if name is None; see Python bug #1706850
446 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
448 current_umask = os.umask(0)
449 os.umask(current_umask)
451 uid = gid = None
452 try:
453 uid = os.geteuid()
454 gid = os.getegid()
455 except:
456 debug(_("Can't get uid/gid"))
458 def chmod_extract(tarinfo):
459 # If any X bit is set, they all must be
460 if tarinfo.mode & 0o111:
461 tarinfo.mode |= 0o111
463 # Everyone gets read and write (subject to the umask)
464 # No special bits are allowed.
465 tarinfo.mode = ((tarinfo.mode | 0o666) & ~current_umask) & 0o777
467 # Don't change owner, even if run as root
468 if uid:
469 tarinfo.uid = uid
470 if gid:
471 tarinfo.gid = gid
472 tar.extract(tarinfo, destdir)
474 extracted_anything = False
475 ext_dirs = []
477 for tarinfo in tar:
478 if extract is None or \
479 tarinfo.name.startswith(extract + '/') or \
480 tarinfo.name == extract:
481 if tarinfo.isdir():
482 ext_dirs.append(tarinfo)
484 chmod_extract(tarinfo)
485 extracted_anything = True
487 # Due to a bug in tarfile (python versions < 2.5), we have to manually
488 # set the mtime of each directory that we extract after extracting everything.
490 for tarinfo in ext_dirs:
491 dirname = os.path.join(destdir, tarinfo.name)
492 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
494 tar.close()
496 if extract and not extracted_anything:
497 raise SafeException(_('Unable to find specified file = %s in archive') % extract)
499 def _extract(stream, destdir, command, start_offset = 0):
500 """Run execvp('command') inside destdir in a child process, with
501 stream seeked to 'start_offset' as stdin."""
503 # Some zip archives are missing timezone information; force consistent results
504 child_env = os.environ.copy()
505 child_env['TZ'] = 'GMT'
507 stream.seek(start_offset)
509 # TODO: use pola-run if available, once it supports fchmod
510 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env)
512 unused, cerr = child.communicate()
514 status = child.wait()
515 if status != 0:
516 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})