Update year to 2009 in various places
[zeroinstall/zeroinstall-rsl.git] / zeroinstall / zerostore / unpack.py
blob92016b5e73aad74ddda6c10443de763f85ddda08
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 import os, subprocess
7 import shutil
8 import traceback
9 from tempfile import mkdtemp, mkstemp
10 import re
11 from logging import debug, warn
12 from zeroinstall import SafeException
13 from zeroinstall.support import find_in_path, ro_rmtree
15 _cpio_version = None
16 def _get_cpio_version():
17 global _cpio_version
18 if _cpio_version is None:
19 _cpio_version = os.popen('cpio --version 2>&1').next()
20 debug("cpio version = %s", _cpio_version)
21 return _cpio_version
23 def _gnu_cpio():
24 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
25 debug("Is GNU cpio = %s", gnu_cpio)
26 return gnu_cpio
28 _tar_version = None
29 def _get_tar_version():
30 global _tar_version
31 if _tar_version is None:
32 _tar_version = os.popen('tar --version 2>&1').next().strip()
33 debug("tar version = %s", _tar_version)
34 return _tar_version
36 def _gnu_tar():
37 gnu_tar = '(GNU tar)' in _get_tar_version()
38 debug("Is GNU tar = %s", gnu_tar)
39 return gnu_tar
41 def recent_gnu_tar():
42 """@deprecated: should be private"""
43 recent_gnu_tar = False
44 if _gnu_tar():
45 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version())
46 if version:
47 version = map(int, version.group(1).split('.'))
48 recent_gnu_tar = version > [1, 13, 92]
49 else:
50 warn("Failed to extract GNU tar version number")
51 debug("Recent GNU tar = %s", recent_gnu_tar)
52 return recent_gnu_tar
54 # Disabled, as Plash does not currently support fchmod(2).
55 _pola_run = None
56 #_pola_run = find_in_path('pola-run')
57 #if _pola_run:
58 # info('Found pola-run: %s', _pola_run)
59 #else:
60 # info('pola-run not found; archive extraction will not be sandboxed')
62 def type_from_url(url):
63 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
64 url = url.lower()
65 if url.endswith('.rpm'): return 'application/x-rpm'
66 if url.endswith('.deb'): return 'application/x-deb'
67 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
68 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
69 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' # XXX: No registered MIME type!
70 if url.endswith('.tgz'): return 'application/x-compressed-tar'
71 if url.endswith('.tar'): return 'application/x-tar'
72 if url.endswith('.zip'): return 'application/zip'
73 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
74 return None
76 def check_type_ok(mime_type):
77 """Check we have the needed software to extract from an archive of the given type.
78 @raise SafeException: if the needed software is not available"""
79 assert mime_type
80 if mime_type == 'application/x-rpm':
81 if not find_in_path('rpm2cpio'):
82 raise SafeException("This package looks like an RPM, but you don't have the rpm2cpio command "
83 "I need to extract it. Install the 'rpm' package first (this works even if "
84 "you're on a non-RPM-based distribution such as Debian).")
85 elif mime_type == 'application/x-deb':
86 if not find_in_path('ar'):
87 raise SafeException("This package looks like a Debian package, but you don't have the 'ar' command "
88 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
89 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")
90 elif mime_type == 'application/x-bzip-compressed-tar':
91 if not find_in_path('bunzip2'):
92 raise SafeException("This package looks like a bzip2-compressed package, but you don't have the 'bunzip2' command "
93 "I need to extract it. Install the package containing it (it's probably called 'bzip2') "
94 "first.")
95 elif mime_type == 'application/zip':
96 if not find_in_path('unzip'):
97 raise SafeException("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
98 "I need to extract it. Install the package containing it first.")
99 elif mime_type == 'application/vnd.ms-cab-compressed':
100 if not find_in_path('cabextract'):
101 raise SafeException("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
102 "I need to extract it. Install the package containing it first.")
103 elif mime_type == 'application/x-lzma-compressed-tar':
104 pass # We can get it through Zero Install
105 elif mime_type in ('application/x-compressed-tar', 'application/x-tar'):
106 pass
107 else:
108 from zeroinstall import version
109 raise SafeException("Unsupported archive type '%s' (for injector version %s)" % (mime_type, version))
111 def _exec_maybe_sandboxed(writable, prog, *args):
112 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
113 If no sandbox is available, run without a sandbox."""
114 prog_path = find_in_path(prog)
115 if not prog_path: raise Exception("'%s' not found in $PATH" % prog)
116 if _pola_run is None:
117 os.execlp(prog_path, prog_path, *args)
118 # We have pola-shell :-)
119 pola_args = ['--prog', prog_path, '-f', '/']
120 for a in args:
121 pola_args += ['-a', a]
122 if writable:
123 pola_args += ['-fw', writable]
124 os.execl(_pola_run, _pola_run, *pola_args)
126 def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
127 """Like unpack_archive, except that we unpack to a temporary directory first and
128 then move things over, checking that we're not following symlinks at each stage.
129 Use this when you want to unpack an unarchive into a directory which already has
130 stuff in it.
131 @since: 0.28"""
132 import stat
133 tmpdir = mkdtemp(dir = destdir)
134 try:
135 mtimes = []
137 unpack_archive(url, data, tmpdir, extract, type, start_offset)
139 stem_len = len(tmpdir)
140 for root, dirs, files in os.walk(tmpdir):
141 relative_root = root[stem_len + 1:] or '.'
142 target_root = os.path.join(destdir, relative_root)
143 try:
144 info = os.lstat(target_root)
145 except OSError, ex:
146 if ex.errno != 2:
147 raise # Some odd error.
148 # Doesn't exist. OK.
149 os.mkdir(target_root)
150 else:
151 if stat.S_ISLNK(info.st_mode):
152 raise SafeException('Attempt to unpack dir over symlink "%s"!' % relative_root)
153 elif not stat.S_ISDIR(info.st_mode):
154 raise SafeException('Attempt to unpack dir over non-directory "%s"!' % relative_root)
155 mtimes.append((relative_root, os.lstat(os.path.join(tmpdir, root)).st_mtime))
157 for s in dirs: # Symlinks are counted as directories
158 src = os.path.join(tmpdir, relative_root, s)
159 if os.path.islink(src):
160 files.append(s)
162 for f in files:
163 src = os.path.join(tmpdir, relative_root, f)
164 dest = os.path.join(destdir, relative_root, f)
165 if os.path.islink(dest):
166 raise SafeException('Attempt to unpack file over symlink "%s"!' %
167 os.path.join(relative_root, f))
168 os.rename(src, dest)
170 for path, mtime in mtimes[1:]:
171 os.utime(os.path.join(destdir, path), (mtime, mtime))
172 finally:
173 ro_rmtree(tmpdir)
175 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
176 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
177 that sub-directory from the archive. Works out the format from the name."""
178 if type is None: type = type_from_url(url)
179 if type is None: raise SafeException("Unknown extension (and no MIME type given) in '%s'" % url)
180 if type == 'application/x-bzip-compressed-tar':
181 extract_tar(data, destdir, extract, 'bzip2', start_offset)
182 elif type == 'application/x-deb':
183 extract_deb(data, destdir, extract, start_offset)
184 elif type == 'application/x-rpm':
185 extract_rpm(data, destdir, extract, start_offset)
186 elif type == 'application/zip':
187 extract_zip(data, destdir, extract, start_offset)
188 elif type == 'application/x-tar':
189 extract_tar(data, destdir, extract, None, start_offset)
190 elif type == 'application/x-lzma-compressed-tar':
191 extract_tar(data, destdir, extract, 'lzma', start_offset)
192 elif type == 'application/x-compressed-tar':
193 extract_tar(data, destdir, extract, 'gzip', start_offset)
194 elif type == 'application/vnd.ms-cab-compressed':
195 extract_cab(data, destdir, extract, start_offset)
196 else:
197 raise SafeException('Unknown MIME type "%s" for "%s"' % (type, url))
199 def extract_deb(stream, destdir, extract = None, start_offset = 0):
200 if extract:
201 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Debs')
203 stream.seek(start_offset)
204 # ar can't read from stdin, so make a copy...
205 deb_copy_name = os.path.join(destdir, 'archive.deb')
206 deb_copy = file(deb_copy_name, 'w')
207 shutil.copyfileobj(stream, deb_copy)
208 deb_copy.close()
209 _extract(stream, destdir, ('ar', 'x', 'archive.deb', 'data.tar.gz'))
210 os.unlink(deb_copy_name)
211 data_name = os.path.join(destdir, 'data.tar.gz')
212 data_stream = file(data_name)
213 os.unlink(data_name)
214 extract_tar(data_stream, destdir, None, 'gzip')
216 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
217 if extract:
218 raise SafeException('Sorry, but the "extract" attribute is not yet supported for RPMs')
219 fd, cpiopath = mkstemp('-rpm-tmp')
220 try:
221 child = os.fork()
222 if child == 0:
223 try:
224 try:
225 os.dup2(stream.fileno(), 0)
226 os.lseek(0, start_offset, 0)
227 os.dup2(fd, 1)
228 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
229 except:
230 traceback.print_exc()
231 finally:
232 os._exit(1)
233 id, status = os.waitpid(child, 0)
234 assert id == child
235 if status != 0:
236 raise SafeException("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status)
237 os.close(fd)
238 fd = None
240 args = ['cpio', '-mid']
241 if _gnu_cpio():
242 args.append('--quiet')
244 _extract(file(cpiopath), destdir, args)
245 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
246 # preserve directory mtimes.
247 os.path.walk(destdir, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
248 finally:
249 if fd is not None:
250 os.close(fd)
251 os.unlink(cpiopath)
253 def extract_cab(stream, destdir, extract, start_offset = 0):
254 "@since: 0.24"
255 if extract:
256 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Cabinet files')
258 stream.seek(start_offset)
259 # cabextract can't read from stdin, so make a copy...
260 cab_copy_name = os.path.join(destdir, 'archive.cab')
261 cab_copy = file(cab_copy_name, 'w')
262 shutil.copyfileobj(stream, cab_copy)
263 cab_copy.close()
265 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
266 os.unlink(cab_copy_name)
268 def extract_zip(stream, destdir, extract, start_offset = 0):
269 if extract:
270 # Limit the characters we accept, to avoid sending dodgy
271 # strings to zip
272 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
273 raise SafeException('Illegal character in extract attribute')
275 stream.seek(start_offset)
276 # unzip can't read from stdin, so make a copy...
277 zip_copy_name = os.path.join(destdir, 'archive.zip')
278 zip_copy = file(zip_copy_name, 'w')
279 shutil.copyfileobj(stream, zip_copy)
280 zip_copy.close()
282 args = ['unzip', '-q', '-o', 'archive.zip']
284 if extract:
285 args.append(extract + '/*')
287 _extract(stream, destdir, args)
288 os.unlink(zip_copy_name)
290 if extract:
291 # unzip uses extract just as a filter, so we still need to move things
292 extracted_dir = os.path.join(destdir, extract)
293 for x in os.listdir(extracted_dir):
294 os.rename(os.path.join(extracted_dir, x), os.path.join(destdir, x))
295 os.rmdir(extracted_dir)
297 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
298 if extract:
299 # Limit the characters we accept, to avoid sending dodgy
300 # strings to tar
301 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
302 raise SafeException('Illegal character in extract attribute')
304 assert decompress in [None, 'bzip2', 'gzip', 'lzma']
306 if _gnu_tar():
307 ext_cmd = ['tar']
308 if decompress:
309 if decompress == 'bzip2':
310 ext_cmd.append('--bzip2')
311 elif decompress == 'gzip':
312 ext_cmd.append('-z')
313 elif decompress == 'lzma':
314 unlzma = find_in_path('unlzma')
315 if not unlzma:
316 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma'))
317 ext_cmd.append('--use-compress-program=' + unlzma)
319 if recent_gnu_tar():
320 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
321 else:
322 ext_cmd.extend(('xf', '-'))
324 if extract:
325 ext_cmd.append(extract)
327 _extract(stream, destdir, ext_cmd, start_offset)
328 else:
329 # Since we don't have GNU tar, use python's tarfile module. This will probably
330 # be a lot slower and we do not support lzma; however, it is portable.
331 if decompress is None:
332 rmode = 'r|'
333 elif decompress == 'bzip2':
334 rmode = 'r|bz2'
335 elif decompress == 'gzip':
336 rmode = 'r|gz'
337 else:
338 raise SafeException('GNU tar unavailable; unsupported compression format: ' + decompress)
340 import tarfile
342 stream.seek(start_offset)
343 # Python 2.5.1 crashes if name is None; see Python bug #1706850
344 tar = tarfile.open(name = '', mode = rmode, fileobj = stream)
346 current_umask = os.umask(0)
347 os.umask(current_umask)
349 uid = gid = None
350 try:
351 uid = os.geteuid()
352 gid = os.getegid()
353 except:
354 debug("Can't get uid/gid")
356 def chmod_extract(tarinfo):
357 # If any X bit is set, they all must be
358 if tarinfo.mode & 0111:
359 tarinfo.mode |= 0111
361 # Everyone gets read and write (subject to the umask)
362 # No special bits are allowed.
363 tarinfo.mode = ((tarinfo.mode | 0666) & ~current_umask) & 0777
365 # Don't change owner, even if run as root
366 if uid:
367 tarinfo.uid = uid
368 if gid:
369 tarinfo.gid = gid
370 tar.extract(tarinfo, destdir)
372 extracted_anything = False
373 ext_dirs = []
375 for tarinfo in tar:
376 if extract is None or \
377 tarinfo.name.startswith(extract + '/') or \
378 tarinfo.name == extract:
379 if tarinfo.isdir():
380 ext_dirs.append(tarinfo)
382 chmod_extract(tarinfo)
383 extracted_anything = True
385 # Due to a bug in tarfile (python versions < 2.5), we have to manually
386 # set the mtime of each directory that we extract after extracting everything.
388 for tarinfo in ext_dirs:
389 dirname = os.path.join(destdir, tarinfo.name)
390 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
392 tar.close()
394 if extract and not extracted_anything:
395 raise SafeException('Unable to find specified file = %s in archive' % extract)
397 def _extract(stream, destdir, command, start_offset = 0):
398 """Run execvp('command') inside destdir in a child process, with
399 stream seeked to 'start_offset' as stdin."""
401 # Some zip archives are missing timezone information; force consistent results
402 child_env = os.environ.copy()
403 child_env['TZ'] = 'GMT'
405 stream.seek(start_offset)
407 # TODO: use pola-run if available, once it supports fchmod
408 child = subprocess.Popen(command, cwd = destdir, stdin = stream, env = child_env)
410 status = child.wait()
411 if status != 0:
412 raise SafeException('Failed to extract archive; exit code %d' % status)