Stricter permissions for unpacking:
[zeroinstall.git] / zeroinstall / zerostore / unpack.py
bloba6dd4ffc79dd96efd12a53681df5452f3e71ed50
1 """Unpacking archives of various formats."""
3 # Copyright (C) 2006, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 import os
7 import shutil
8 import traceback
9 from tempfile import mkdtemp, mkstemp
10 import sha
11 import re
12 from logging import debug, info, warn
13 from zeroinstall import SafeException
15 _cpio_version = None
16 def _get_cpio_version():
17 global _cpio_version
18 if _cpio_version is None:
19 _cpio_version = os.popen('cpio --version 2>&1').next()
20 debug("cpio version = %s", _cpio_version)
21 return _cpio_version
23 def _gnu_cpio():
24 gnu_cpio = '(GNU cpio)' in _get_cpio_version()
25 debug("Is GNU cpio = %s", gnu_cpio)
26 return gnu_cpio
28 _tar_version = None
29 def _get_tar_version():
30 global _tar_version
31 if _tar_version is None:
32 _tar_version = os.popen('tar --version 2>&1').next()
33 debug("tar version = %s", _tar_version)
34 return _tar_version
36 def _gnu_tar():
37 gnu_tar = '(GNU tar)' in _get_tar_version()
38 debug("Is GNU tar = %s", gnu_tar)
39 return gnu_tar
41 def recent_gnu_tar():
42 """@deprecated: should be private"""
43 recent_gnu_tar = False
44 if _gnu_tar():
45 version = _get_tar_version()
46 try:
47 version = version.split(')', 1)[1].strip()
48 assert version
49 version = map(int, version.split('.'))
50 recent_gnu_tar = version > [1, 13, 92]
51 except:
52 warn("Failed to extract GNU tar version number")
53 debug("Recent GNU tar = %s", recent_gnu_tar)
54 return recent_gnu_tar
56 def _find_in_path(prog):
57 for d in os.environ['PATH'].split(':'):
58 path = os.path.join(d, prog)
59 if os.path.isfile(path):
60 return path
61 return None
62 _pola_run = _find_in_path('pola-run')
63 if _pola_run:
64 info('Found pola-run: %s', _pola_run)
65 else:
66 info('pola-run not found; archive extraction will not be sandboxed')
68 def type_from_url(url):
69 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
70 url = url.lower()
71 if url.endswith('.rpm'): return 'application/x-rpm'
72 if url.endswith('.deb'): return 'application/x-deb'
73 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
74 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
75 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' # XXX: No registered MIME type!
76 if url.endswith('.tgz'): return 'application/x-compressed-tar'
77 if url.endswith('.tar'): return 'application/x-tar'
78 if url.endswith('.zip'): return 'application/zip'
79 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
80 return None
82 def check_type_ok(mime_type):
83 """Check we have the needed software to extract from an archive of the given type.
84 @raise SafeException: if the needed software is not available"""
85 assert mime_type
86 if mime_type == 'application/x-rpm':
87 if not _find_in_path('rpm2cpio'):
88 raise SafeException("This package looks like an RPM, but you don't have the rpm2cpio command "
89 "I need to extract it. Install the 'rpm' package first (this works even if "
90 "you're on a non-RPM-based distribution such as Debian).")
91 elif mime_type == 'application/x-deb':
92 if not _find_in_path('ar'):
93 raise SafeException("This package looks like a Debian package, but you don't have the 'ar' command "
94 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
95 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")
96 elif mime_type == 'application/x-bzip-compressed-tar':
97 if not _find_in_path('bunzip2'):
98 raise SafeException("This package looks like a bzip2-compressed package, but you don't have the 'bunzip2' command "
99 "I need to extract it. Install the package containing it (it's probably called 'bzip2') "
100 "first.")
101 elif mime_type == 'application/zip':
102 if not _find_in_path('unzip'):
103 raise SafeException("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
104 "I need to extract it. Install the package containing it first.")
105 elif mime_type == 'application/vnd.ms-cab-compressed':
106 if not _find_in_path('cabextract'):
107 raise SafeException("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
108 "I need to extract it. Install the package containing it first.")
109 elif mime_type == 'application/x-lzma-compressed-tar':
110 if not _find_in_path('unlzma'):
111 raise SafeException("This package looks like an LZMA archive, but you don't have the 'unlzma' command "
112 "I need to extract it. Install the package containing it (it's probably called 'lzma') first.")
113 elif mime_type in ('application/x-compressed-tar', 'application/x-tar'):
114 pass
115 else:
116 from zeroinstall import version
117 raise SafeException("Unsupported archive type '%s' (for injector version %s)" % (mime_type, version))
119 def _exec_maybe_sandboxed(writable, prog, *args):
120 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
121 If no sandbox is available, run without a sandbox."""
122 prog_path = _find_in_path(prog)
123 if _pola_run is None:
124 os.execlp(prog_path, prog_path, *args)
125 # We have pola-shell :-)
126 pola_args = ['--prog', prog_path, '-f', '/']
127 for a in args:
128 pola_args += ['-a', a]
129 if writable:
130 pola_args += ['-fw', writable]
131 os.execl(_pola_run, _pola_run, *pola_args)
133 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
134 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
135 that sub-directory from the archive. Works out the format from the name."""
136 if type is None: type = type_from_url(url)
137 if type is None: raise SafeException("Unknown extension (and no MIME type given) in '%s'" % url)
138 if type == 'application/x-bzip-compressed-tar':
139 extract_tar(data, destdir, extract, 'bzip2', start_offset)
140 elif type == 'application/x-deb':
141 extract_deb(data, destdir, extract, start_offset)
142 elif type == 'application/x-rpm':
143 extract_rpm(data, destdir, extract, start_offset)
144 elif type == 'application/zip':
145 extract_zip(data, destdir, extract, start_offset)
146 elif type == 'application/x-tar':
147 extract_tar(data, destdir, extract, None, start_offset)
148 elif type == 'application/x-lzma-compressed-tar':
149 extract_tar(data, destdir, extract, 'lzma', start_offset)
150 elif type == 'application/x-compressed-tar':
151 extract_tar(data, destdir, extract, 'gzip', start_offset)
152 elif type == 'application/vnd.ms-cab-compressed':
153 extract_cab(data, destdir, extract, start_offset)
154 else:
155 raise SafeException('Unknown MIME type "%s" for "%s"' % (type, url))
157 def extract_deb(stream, destdir, extract = None, start_offset = 0):
158 if extract:
159 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Debs')
161 stream.seek(start_offset)
162 # ar can't read from stdin, so make a copy...
163 deb_copy_name = os.path.join(destdir, 'archive.deb')
164 deb_copy = file(deb_copy_name, 'w')
165 shutil.copyfileobj(stream, deb_copy)
166 deb_copy.close()
167 _extract(stream, destdir, ('ar', 'x', 'archive.deb', 'data.tar.gz'))
168 os.unlink(deb_copy_name)
169 data_name = os.path.join(destdir, 'data.tar.gz')
170 data_stream = file(data_name)
171 os.unlink(data_name)
172 extract_tar(data_stream, destdir, None, 'gzip')
174 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
175 if extract:
176 raise SafeException('Sorry, but the "extract" attribute is not yet supported for RPMs')
177 fd, cpiopath = mkstemp('-rpm-tmp')
178 try:
179 child = os.fork()
180 if child == 0:
181 try:
182 try:
183 os.dup2(stream.fileno(), 0)
184 os.lseek(0, start_offset, 0)
185 os.dup2(fd, 1)
186 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
187 except:
188 traceback.print_exc()
189 finally:
190 os._exit(1)
191 id, status = os.waitpid(child, 0)
192 assert id == child
193 if status != 0:
194 raise SafeException("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status)
195 os.close(fd)
196 fd = None
198 args = ['cpio', '-mid']
199 if _gnu_cpio():
200 args.append('--quiet')
202 _extract(file(cpiopath), destdir, args)
203 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
204 # preserve directory mtimes.
205 os.path.walk(destdir, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
206 finally:
207 if fd is not None:
208 os.close(fd)
209 os.unlink(cpiopath)
211 def extract_cab(stream, destdir, extract, start_offset = 0):
212 "@since: 0.24"
213 if extract:
214 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Cabinet files')
216 stream.seek(start_offset)
217 # cabextract can't read from stdin, so make a copy...
218 cab_copy_name = os.path.join(destdir, 'archive.cab')
219 cab_copy = file(cab_copy_name, 'w')
220 shutil.copyfileobj(stream, cab_copy)
221 cab_copy.close()
223 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab'])
224 os.unlink(cab_copy_name)
226 def extract_zip(stream, destdir, extract, start_offset = 0):
227 if extract:
228 # Limit the characters we accept, to avoid sending dodgy
229 # strings to zip
230 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
231 raise SafeException('Illegal character in extract attribute')
233 stream.seek(start_offset)
234 # unzip can't read from stdin, so make a copy...
235 zip_copy_name = os.path.join(destdir, 'archive.zip')
236 zip_copy = file(zip_copy_name, 'w')
237 shutil.copyfileobj(stream, zip_copy)
238 zip_copy.close()
240 args = ['unzip', '-q', '-o']
242 if extract:
243 args.append(extract)
245 _extract(stream, destdir, args + ['archive.zip'])
246 os.unlink(zip_copy_name)
248 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
249 if extract:
250 # Limit the characters we accept, to avoid sending dodgy
251 # strings to tar
252 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
253 raise SafeException('Illegal character in extract attribute')
255 assert decompress in [None, 'bzip2', 'gzip', 'lzma']
257 if _gnu_tar():
258 ext_cmd = ['tar']
259 if decompress:
260 if decompress == 'bzip2':
261 ext_cmd.append('--bzip2')
262 elif decompress == 'gzip':
263 ext_cmd.append('-z')
264 elif decompress == 'lzma':
265 ext_cmd.append('--use-compress-program=unlzma')
267 if recent_gnu_tar():
268 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions'))
269 else:
270 ext_cmd.extend(('xf', '-'))
272 if extract:
273 ext_cmd.append(extract)
275 _extract(stream, destdir, ext_cmd, start_offset)
276 else:
277 # Since we don't have GNU tar, use python's tarfile module. This will probably
278 # be a lot slower and we do not support lzma; however, it is portable.
279 if decompress is None:
280 rmode = 'r|'
281 elif decompress == 'bzip2':
282 rmode = 'r|bz2'
283 elif decompress == 'gzip':
284 rmode = 'r|gz'
285 else:
286 raise SafeException('GNU tar unavailable; unsupported compression format: ' + decompress)
288 import tarfile
290 stream.seek(start_offset)
291 tar = tarfile.open(mode = rmode, fileobj = stream)
293 current_umask = os.umask(0)
294 os.umask(current_umask)
296 uid = gid = None
297 try:
298 uid = os.geteuid()
299 gid = os.getegid()
300 except:
301 debug("Can't get uid/gid")
303 def chmod_extract(tarinfo):
304 # If any X bit is set, they all must be
305 if tarinfo.mode & 0111:
306 tarinfo.mode |= 0111
308 # Everyone gets read and write (subject to the umask)
309 # No special bits are allowed.
310 tarinfo.mode = ((tarinfo.mode | 0666) & ~current_umask) & 0777
312 # Don't change owner, even if run as root
313 if uid:
314 tarinfo.uid = uid
315 if gid:
316 tarinfo.gid = gid
317 tar.extract(tarinfo, destdir)
319 extracted_anything = False
320 ext_dirs = []
322 for tarinfo in tar:
323 if extract is None or \
324 tarinfo.name.startswith(extract + '/') or \
325 tarinfo.name == extract:
326 if tarinfo.isdir():
327 ext_dirs.append(tarinfo)
329 chmod_extract(tarinfo)
330 extracted_anything = True
332 # Due to a bug in tarfile (python versions < 2.5), we have to manually
333 # set the mtime of each directory that we extract after extracting everything.
335 for tarinfo in ext_dirs:
336 dirname = os.path.join(destdir, tarinfo.name)
337 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime))
339 tar.close()
341 if extract and not extracted_anything:
342 raise SafeException('Unable to find specified file = %s in archive' % extract)
344 def _extract(stream, destdir, command, start_offset = 0):
345 """Run execvp('command') inside destdir in a child process, with
346 stream seeked to 'start_offset' as stdin."""
347 child = os.fork()
348 if child == 0:
349 try:
350 try:
351 os.chdir(destdir)
352 stream.seek(start_offset)
353 os.dup2(stream.fileno(), 0)
354 _exec_maybe_sandboxed(destdir, *command)
355 except:
356 traceback.print_exc()
357 finally:
358 os._exit(1)
359 id, status = os.waitpid(child, 0)
360 assert id == child
361 if status != 0:
362 raise SafeException('Failed to extract archive; exit code %d' % status)