1 """Unpacking archives of various formats."""
3 # Copyright (C) 2006, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
9 from tempfile
import mkdtemp
, mkstemp
11 from logging
import debug
, warn
12 from zeroinstall
import SafeException
13 from zeroinstall
.support
import find_in_path
, ro_rmtree
16 def _get_cpio_version():
18 if _cpio_version
is None:
19 _cpio_version
= os
.popen('cpio --version 2>&1').next()
20 debug("cpio version = %s", _cpio_version
)
24 gnu_cpio
= '(GNU cpio)' in _get_cpio_version()
25 debug("Is GNU cpio = %s", gnu_cpio
)
29 def _get_tar_version():
31 if _tar_version
is None:
32 _tar_version
= os
.popen('tar --version 2>&1').next().strip()
33 debug("tar version = %s", _tar_version
)
37 gnu_tar
= '(GNU tar)' in _get_tar_version()
38 debug("Is GNU tar = %s", gnu_tar
)
42 """@deprecated: should be private"""
43 recent_gnu_tar
= False
45 version
= re
.search(r
'\)\s*(\d+(\.\d+)*)', _get_tar_version())
47 version
= map(int, version
.group(1).split('.'))
48 recent_gnu_tar
= version
> [1, 13, 92]
50 warn("Failed to extract GNU tar version number")
51 debug("Recent GNU tar = %s", recent_gnu_tar
)
54 # Disabled, as Plash does not currently support fchmod(2).
56 #_pola_run = find_in_path('pola-run')
58 # info('Found pola-run: %s', _pola_run)
60 # info('pola-run not found; archive extraction will not be sandboxed')
62 def type_from_url(url
):
63 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
65 if url
.endswith('.rpm'): return 'application/x-rpm'
66 if url
.endswith('.deb'): return 'application/x-deb'
67 if url
.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
68 if url
.endswith('.tar.gz'): return 'application/x-compressed-tar'
69 if url
.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' # XXX: No registered MIME type!
70 if url
.endswith('.tgz'): return 'application/x-compressed-tar'
71 if url
.endswith('.tar'): return 'application/x-tar'
72 if url
.endswith('.zip'): return 'application/zip'
73 if url
.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
76 def check_type_ok(mime_type
):
77 """Check we have the needed software to extract from an archive of the given type.
78 @raise SafeException: if the needed software is not available"""
80 if mime_type
== 'application/x-rpm':
81 if not find_in_path('rpm2cpio'):
82 raise SafeException("This package looks like an RPM, but you don't have the rpm2cpio command "
83 "I need to extract it. Install the 'rpm' package first (this works even if "
84 "you're on a non-RPM-based distribution such as Debian).")
85 elif mime_type
== 'application/x-deb':
86 if not find_in_path('ar'):
87 raise SafeException("This package looks like a Debian package, but you don't have the 'ar' command "
88 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
89 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")
90 elif mime_type
== 'application/x-bzip-compressed-tar':
91 if not find_in_path('bunzip2'):
92 raise SafeException("This package looks like a bzip2-compressed package, but you don't have the 'bunzip2' command "
93 "I need to extract it. Install the package containing it (it's probably called 'bzip2') "
95 elif mime_type
== 'application/zip':
96 if not find_in_path('unzip'):
97 raise SafeException("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
98 "I need to extract it. Install the package containing it first.")
99 elif mime_type
== 'application/vnd.ms-cab-compressed':
100 if not find_in_path('cabextract'):
101 raise SafeException("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
102 "I need to extract it. Install the package containing it first.")
103 elif mime_type
== 'application/x-lzma-compressed-tar':
104 pass # We can get it through Zero Install
105 elif mime_type
in ('application/x-compressed-tar', 'application/x-tar'):
108 from zeroinstall
import version
109 raise SafeException("Unsupported archive type '%s' (for injector version %s)" % (mime_type
, version
))
111 def _exec_maybe_sandboxed(writable
, prog
, *args
):
112 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
113 If no sandbox is available, run without a sandbox."""
114 prog_path
= find_in_path(prog
)
115 if not prog_path
: raise Exception("'%s' not found in $PATH" % prog
)
116 if _pola_run
is None:
117 os
.execlp(prog_path
, prog_path
, *args
)
118 # We have pola-shell :-)
119 pola_args
= ['--prog', prog_path
, '-f', '/']
121 pola_args
+= ['-a', a
]
123 pola_args
+= ['-fw', writable
]
124 os
.execl(_pola_run
, _pola_run
, *pola_args
)
126 def unpack_archive_over(url
, data
, destdir
, extract
= None, type = None, start_offset
= 0):
127 """Like unpack_archive, except that we unpack to a temporary directory first and
128 then move things over, checking that we're not following symlinks at each stage.
129 Use this when you want to unpack an unarchive into a directory which already has
133 tmpdir
= mkdtemp(dir = destdir
)
137 unpack_archive(url
, data
, tmpdir
, extract
, type, start_offset
)
139 stem_len
= len(tmpdir
)
140 for root
, dirs
, files
in os
.walk(tmpdir
):
141 relative_root
= root
[stem_len
+ 1:] or '.'
142 target_root
= os
.path
.join(destdir
, relative_root
)
144 info
= os
.lstat(target_root
)
147 raise # Some odd error.
149 os
.mkdir(target_root
)
151 if stat
.S_ISLNK(info
.st_mode
):
152 raise SafeException('Attempt to unpack dir over symlink "%s"!' % relative_root
)
153 elif not stat
.S_ISDIR(info
.st_mode
):
154 raise SafeException('Attempt to unpack dir over non-directory "%s"!' % relative_root
)
155 mtimes
.append((relative_root
, os
.lstat(os
.path
.join(tmpdir
, root
)).st_mtime
))
157 for s
in dirs
: # Symlinks are counted as directories
158 src
= os
.path
.join(tmpdir
, relative_root
, s
)
159 if os
.path
.islink(src
):
163 src
= os
.path
.join(tmpdir
, relative_root
, f
)
164 dest
= os
.path
.join(destdir
, relative_root
, f
)
165 if os
.path
.islink(dest
):
166 raise SafeException('Attempt to unpack file over symlink "%s"!' %
167 os
.path
.join(relative_root
, f
))
170 for path
, mtime
in mtimes
[1:]:
171 os
.utime(os
.path
.join(destdir
, path
), (mtime
, mtime
))
175 def unpack_archive(url
, data
, destdir
, extract
= None, type = None, start_offset
= 0):
176 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
177 that sub-directory from the archive. Works out the format from the name."""
178 if type is None: type = type_from_url(url
)
179 if type is None: raise SafeException("Unknown extension (and no MIME type given) in '%s'" % url
)
180 if type == 'application/x-bzip-compressed-tar':
181 extract_tar(data
, destdir
, extract
, 'bzip2', start_offset
)
182 elif type == 'application/x-deb':
183 extract_deb(data
, destdir
, extract
, start_offset
)
184 elif type == 'application/x-rpm':
185 extract_rpm(data
, destdir
, extract
, start_offset
)
186 elif type == 'application/zip':
187 extract_zip(data
, destdir
, extract
, start_offset
)
188 elif type == 'application/x-tar':
189 extract_tar(data
, destdir
, extract
, None, start_offset
)
190 elif type == 'application/x-lzma-compressed-tar':
191 extract_tar(data
, destdir
, extract
, 'lzma', start_offset
)
192 elif type == 'application/x-compressed-tar':
193 extract_tar(data
, destdir
, extract
, 'gzip', start_offset
)
194 elif type == 'application/vnd.ms-cab-compressed':
195 extract_cab(data
, destdir
, extract
, start_offset
)
197 raise SafeException('Unknown MIME type "%s" for "%s"' % (type, url
))
199 def extract_deb(stream
, destdir
, extract
= None, start_offset
= 0):
201 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Debs')
203 stream
.seek(start_offset
)
204 # ar can't read from stdin, so make a copy...
205 deb_copy_name
= os
.path
.join(destdir
, 'archive.deb')
206 deb_copy
= file(deb_copy_name
, 'w')
207 shutil
.copyfileobj(stream
, deb_copy
)
209 _extract(stream
, destdir
, ('ar', 'x', 'archive.deb', 'data.tar.gz'))
210 os
.unlink(deb_copy_name
)
211 data_name
= os
.path
.join(destdir
, 'data.tar.gz')
212 data_stream
= file(data_name
)
214 extract_tar(data_stream
, destdir
, None, 'gzip')
216 def extract_rpm(stream
, destdir
, extract
= None, start_offset
= 0):
218 raise SafeException('Sorry, but the "extract" attribute is not yet supported for RPMs')
219 fd
, cpiopath
= mkstemp('-rpm-tmp')
225 os
.dup2(stream
.fileno(), 0)
226 os
.lseek(0, start_offset
, 0)
228 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
230 traceback
.print_exc()
233 id, status
= os
.waitpid(child
, 0)
236 raise SafeException("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status
)
240 args
= ['cpio', '-mid']
242 args
.append('--quiet')
244 _extract(file(cpiopath
), destdir
, args
)
245 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
246 # preserve directory mtimes.
247 os
.path
.walk(destdir
, lambda arg
, dirname
, names
: os
.utime(dirname
, (0, 0)), None)
253 def extract_cab(stream
, destdir
, extract
, start_offset
= 0):
256 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Cabinet files')
258 stream
.seek(start_offset
)
259 # cabextract can't read from stdin, so make a copy...
260 cab_copy_name
= os
.path
.join(destdir
, 'archive.cab')
261 cab_copy
= file(cab_copy_name
, 'w')
262 shutil
.copyfileobj(stream
, cab_copy
)
265 _extract(stream
, destdir
, ['cabextract', '-s', '-q', 'archive.cab'])
266 os
.unlink(cab_copy_name
)
268 def extract_zip(stream
, destdir
, extract
, start_offset
= 0):
270 # Limit the characters we accept, to avoid sending dodgy
272 if not re
.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract
):
273 raise SafeException('Illegal character in extract attribute')
275 stream
.seek(start_offset
)
276 # unzip can't read from stdin, so make a copy...
277 zip_copy_name
= os
.path
.join(destdir
, 'archive.zip')
278 zip_copy
= file(zip_copy_name
, 'w')
279 shutil
.copyfileobj(stream
, zip_copy
)
282 args
= ['unzip', '-q', '-o', 'archive.zip']
285 args
.append(extract
+ '/*')
287 _extract(stream
, destdir
, args
)
288 os
.unlink(zip_copy_name
)
291 # unzip uses extract just as a filter, so we still need to move things
292 extracted_dir
= os
.path
.join(destdir
, extract
)
293 for x
in os
.listdir(extracted_dir
):
294 os
.rename(os
.path
.join(extracted_dir
, x
), os
.path
.join(destdir
, x
))
295 os
.rmdir(extracted_dir
)
297 def extract_tar(stream
, destdir
, extract
, decompress
, start_offset
= 0):
299 # Limit the characters we accept, to avoid sending dodgy
301 if not re
.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract
):
302 raise SafeException('Illegal character in extract attribute')
304 assert decompress
in [None, 'bzip2', 'gzip', 'lzma']
309 if decompress
== 'bzip2':
310 ext_cmd
.append('--bzip2')
311 elif decompress
== 'gzip':
313 elif decompress
== 'lzma':
314 unlzma
= find_in_path('unlzma')
316 unlzma
= os
.path
.abspath(os
.path
.join(os
.path
.dirname(__file__
), '_unlzma'))
317 ext_cmd
.append('--use-compress-program=' + unlzma
)
320 ext_cmd
.extend(('-x', '--no-same-owner', '--no-same-permissions'))
322 ext_cmd
.extend(('xf', '-'))
325 ext_cmd
.append(extract
)
327 _extract(stream
, destdir
, ext_cmd
, start_offset
)
329 # Since we don't have GNU tar, use python's tarfile module. This will probably
330 # be a lot slower and we do not support lzma; however, it is portable.
331 if decompress
is None:
333 elif decompress
== 'bzip2':
335 elif decompress
== 'gzip':
338 raise SafeException('GNU tar unavailable; unsupported compression format: ' + decompress
)
342 stream
.seek(start_offset
)
343 # Python 2.5.1 crashes if name is None; see Python bug #1706850
344 tar
= tarfile
.open(name
= '', mode
= rmode
, fileobj
= stream
)
346 current_umask
= os
.umask(0)
347 os
.umask(current_umask
)
354 debug("Can't get uid/gid")
356 def chmod_extract(tarinfo
):
357 # If any X bit is set, they all must be
358 if tarinfo
.mode
& 0111:
361 # Everyone gets read and write (subject to the umask)
362 # No special bits are allowed.
363 tarinfo
.mode
= ((tarinfo
.mode |
0666) & ~current_umask
) & 0777
365 # Don't change owner, even if run as root
370 tar
.extract(tarinfo
, destdir
)
372 extracted_anything
= False
376 if extract
is None or \
377 tarinfo
.name
.startswith(extract
+ '/') or \
378 tarinfo
.name
== extract
:
380 ext_dirs
.append(tarinfo
)
382 chmod_extract(tarinfo
)
383 extracted_anything
= True
385 # Due to a bug in tarfile (python versions < 2.5), we have to manually
386 # set the mtime of each directory that we extract after extracting everything.
388 for tarinfo
in ext_dirs
:
389 dirname
= os
.path
.join(destdir
, tarinfo
.name
)
390 os
.utime(dirname
, (tarinfo
.mtime
, tarinfo
.mtime
))
394 if extract
and not extracted_anything
:
395 raise SafeException('Unable to find specified file = %s in archive' % extract
)
397 def _extract(stream
, destdir
, command
, start_offset
= 0):
398 """Run execvp('command') inside destdir in a child process, with
399 stream seeked to 'start_offset' as stdin."""
401 # Some zip archives are missing timezone information; force consistent results
402 child_env
= os
.environ
.copy()
403 child_env
['TZ'] = 'GMT'
405 stream
.seek(start_offset
)
407 # TODO: use pola-run if available, once it supports fchmod
408 child
= subprocess
.Popen(command
, cwd
= destdir
, stdin
= stream
, env
= child_env
)
410 status
= child
.wait()
412 raise SafeException('Failed to extract archive; exit code %d' % status
)