1 """Unpacking archives of various formats."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall
import _
, logger
11 from tempfile
import mkdtemp
, mkstemp
14 from zeroinstall
import SafeException
15 from zeroinstall
.support
import find_in_path
, ro_rmtree
18 def _get_cpio_version():
20 if _cpio_version
is None:
21 child
= subprocess
.Popen(['cpio', '--version'], stdout
= subprocess
.PIPE
,
22 stderr
= subprocess
.STDOUT
, universal_newlines
= True)
23 out
, unused
= child
.communicate()
26 _cpio_version
= out
.split('\n', 1)[0]
27 logger
.debug(_("cpio version = %s"), _cpio_version
)
31 gnu_cpio
= '(GNU cpio)' in _get_cpio_version()
32 logger
.debug(_("Is GNU cpio = %s"), gnu_cpio
)
36 def _get_tar_version():
38 if _tar_version
is None:
39 child
= subprocess
.Popen(['tar', '--version'], stdout
= subprocess
.PIPE
,
40 stderr
= subprocess
.STDOUT
, universal_newlines
= True)
41 out
, unused
= child
.communicate()
44 _tar_version
= out
.split('\n', 1)[0]
45 logger
.debug(_("tar version = %s"), _tar_version
)
49 gnu_tar
= '(GNU tar)' in _get_tar_version()
50 logger
.debug(_("Is GNU tar = %s"), gnu_tar
)
54 """@deprecated: should be private"""
55 recent_gnu_tar
= False
57 version
= re
.search(r
'\)\s*(\d+(\.\d+)*)', _get_tar_version())
59 version
= list(map(int, version
.group(1).split('.')))
60 recent_gnu_tar
= version
> [1, 13, 92]
62 logger
.warn(_("Failed to extract GNU tar version number"))
63 logger
.debug(_("Recent GNU tar = %s"), recent_gnu_tar
)
66 # Disabled, as Plash does not currently support fchmod(2).
68 #_pola_run = find_in_path('pola-run')
70 # info('Found pola-run: %s', _pola_run)
72 # info('pola-run not found; archive extraction will not be sandboxed')
74 def type_from_url(url
):
75 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
77 if url
.endswith('.rpm'): return 'application/x-rpm'
78 if url
.endswith('.deb'): return 'application/x-deb'
79 if url
.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
80 if url
.endswith('.tar.gz'): return 'application/x-compressed-tar'
81 if url
.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar'
82 if url
.endswith('.tar.xz'): return 'application/x-xz-compressed-tar'
83 if url
.endswith('.tbz'): return 'application/x-bzip-compressed-tar'
84 if url
.endswith('.tgz'): return 'application/x-compressed-tar'
85 if url
.endswith('.tlz'): return 'application/x-lzma-compressed-tar'
86 if url
.endswith('.txz'): return 'application/x-xz-compressed-tar'
87 if url
.endswith('.tar'): return 'application/x-tar'
88 if url
.endswith('.zip'): return 'application/zip'
89 if url
.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
90 if url
.endswith('.dmg'): return 'application/x-apple-diskimage'
91 if url
.endswith('.gem'): return 'application/x-ruby-gem'
94 def check_type_ok(mime_type
):
95 """Check we have the needed software to extract from an archive of the given type.
96 @raise SafeException: if the needed software is not available"""
98 if mime_type
== 'application/x-rpm':
99 if not find_in_path('rpm2cpio'):
100 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command "
101 "I need to extract it. Install the 'rpm' package first (this works even if "
102 "you're on a non-RPM-based distribution such as Debian)."))
103 elif mime_type
== 'application/x-deb':
104 if not find_in_path('ar'):
105 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command "
106 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
107 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."))
108 elif mime_type
== 'application/x-bzip-compressed-tar':
109 pass # We'll fall back to Python's built-in tar.bz2 support
110 elif mime_type
== 'application/zip':
111 if not find_in_path('unzip'):
112 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
113 "I need to extract it. Install the package containing it first."))
114 elif mime_type
== 'application/vnd.ms-cab-compressed':
115 if not find_in_path('cabextract'):
116 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
117 "I need to extract it. Install the package containing it first."))
118 elif mime_type
== 'application/x-apple-diskimage':
119 if not find_in_path('hdiutil'):
120 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command "
121 "I need to extract it."))
122 elif mime_type
== 'application/x-lzma-compressed-tar':
123 pass # We can get it through Zero Install
124 elif mime_type
== 'application/x-xz-compressed-tar':
125 if not find_in_path('unxz'):
126 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command "
127 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') "
129 elif mime_type
in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'):
132 from zeroinstall
import version
133 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type
, 'version': version
})
135 def _exec_maybe_sandboxed(writable
, prog
, *args
):
136 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
137 If no sandbox is available, run without a sandbox."""
138 prog_path
= find_in_path(prog
)
139 if not prog_path
: raise Exception(_("'%s' not found in $PATH") % prog
)
140 if _pola_run
is None:
141 os
.execlp(prog_path
, prog_path
, *args
)
142 # We have pola-shell :-)
143 pola_args
= ['--prog', prog_path
, '-f', '/']
145 pola_args
+= ['-a', a
]
147 pola_args
+= ['-fw', writable
]
148 os
.execl(_pola_run
, _pola_run
, *pola_args
)
150 def unpack_archive_over(url
, data
, destdir
, extract
= None, type = None, start_offset
= 0):
151 """Like unpack_archive, except that we unpack to a temporary directory first and
152 then move things over, checking that we're not following symlinks at each stage.
153 Use this when you want to unpack an unarchive into a directory which already has
155 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive).
158 tmpdir
= mkdtemp(dir = destdir
)
159 assert extract
is None or os
.sep
not in extract
, extract
163 unpack_archive(url
, data
, tmpdir
, extract
, type, start_offset
)
168 srcdir
= os
.path
.join(tmpdir
, extract
)
169 assert not os
.path
.islink(srcdir
)
171 stem_len
= len(srcdir
)
172 for root
, dirs
, files
in os
.walk(srcdir
):
173 relative_root
= root
[stem_len
+ 1:] or '.'
174 target_root
= os
.path
.join(destdir
, relative_root
)
176 info
= os
.lstat(target_root
)
177 except OSError as ex
:
178 if ex
.errno
!= errno
.ENOENT
:
179 raise # Some odd error.
181 os
.mkdir(target_root
)
183 if stat
.S_ISLNK(info
.st_mode
):
184 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root
)
185 elif not stat
.S_ISDIR(info
.st_mode
):
186 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root
)
187 mtimes
.append((relative_root
, os
.lstat(os
.path
.join(srcdir
, root
)).st_mtime
))
189 for s
in dirs
: # Symlinks are counted as directories
190 src
= os
.path
.join(srcdir
, relative_root
, s
)
191 if os
.path
.islink(src
):
195 src
= os
.path
.join(srcdir
, relative_root
, f
)
196 dest
= os
.path
.join(destdir
, relative_root
, f
)
197 if os
.path
.islink(dest
):
198 raise SafeException(_('Attempt to unpack file over symlink "%s"!') %
199 os
.path
.join(relative_root
, f
))
202 for path
, mtime
in mtimes
[1:]:
203 os
.utime(os
.path
.join(destdir
, path
), (mtime
, mtime
))
207 def unpack_archive(url
, data
, destdir
, extract
= None, type = None, start_offset
= 0):
208 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
209 that sub-directory from the archive (i.e. destdir/extract will exist afterwards).
210 Works out the format from the name."""
211 if type is None: type = type_from_url(url
)
212 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url
)
213 if type == 'application/x-bzip-compressed-tar':
214 extract_tar(data
, destdir
, extract
, 'bzip2', start_offset
)
215 elif type == 'application/x-deb':
216 extract_deb(data
, destdir
, extract
, start_offset
)
217 elif type == 'application/x-rpm':
218 extract_rpm(data
, destdir
, extract
, start_offset
)
219 elif type == 'application/zip':
220 extract_zip(data
, destdir
, extract
, start_offset
)
221 elif type == 'application/x-tar':
222 extract_tar(data
, destdir
, extract
, None, start_offset
)
223 elif type == 'application/x-lzma-compressed-tar':
224 extract_tar(data
, destdir
, extract
, 'lzma', start_offset
)
225 elif type == 'application/x-xz-compressed-tar':
226 extract_tar(data
, destdir
, extract
, 'xz', start_offset
)
227 elif type == 'application/x-compressed-tar':
228 extract_tar(data
, destdir
, extract
, 'gzip', start_offset
)
229 elif type == 'application/vnd.ms-cab-compressed':
230 extract_cab(data
, destdir
, extract
, start_offset
)
231 elif type == 'application/x-apple-diskimage':
232 extract_dmg(data
, destdir
, extract
, start_offset
)
233 elif type == 'application/x-ruby-gem':
234 extract_gem(data
, destdir
, extract
, start_offset
)
236 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url
})
238 def extract_deb(stream
, destdir
, extract
= None, start_offset
= 0):
240 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs'))
242 stream
.seek(start_offset
)
243 # ar can't read from stdin, so make a copy...
244 deb_copy_name
= os
.path
.join(destdir
, 'archive.deb')
245 with
open(deb_copy_name
, 'wb') as deb_copy
:
246 shutil
.copyfileobj(stream
, deb_copy
)
249 p
= subprocess
.Popen(('ar', 't', 'archive.deb'), stdout
=subprocess
.PIPE
, cwd
=destdir
, universal_newlines
=True)
250 o
= p
.communicate()[0]
251 for line
in o
.split('\n'):
252 if line
== 'data.tar':
253 data_compression
= None
254 elif line
== 'data.tar.gz':
255 data_compression
= 'gzip'
256 elif line
== 'data.tar.bz2':
257 data_compression
= 'bzip2'
258 elif line
== 'data.tar.lzma':
259 data_compression
= 'lzma'
265 raise SafeException(_("File is not a Debian package."))
267 _extract(stream
, destdir
, ('ar', 'x', 'archive.deb', data_tar
))
268 os
.unlink(deb_copy_name
)
269 data_name
= os
.path
.join(destdir
, data_tar
)
270 with
open(data_name
, 'rb') as data_stream
:
272 extract_tar(data_stream
, destdir
, None, data_compression
)
274 def extract_rpm(stream
, destdir
, extract
= None, start_offset
= 0):
276 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs'))
277 fd
, cpiopath
= mkstemp('-rpm-tmp')
283 os
.dup2(stream
.fileno(), 0)
284 os
.lseek(0, start_offset
, 0)
286 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
288 traceback
.print_exc()
291 id, status
= os
.waitpid(child
, 0)
294 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status
)
298 args
= ['cpio', '-mid']
300 args
.append('--quiet')
302 with
open(cpiopath
, 'rb') as cpio_stream
:
303 _extract(cpio_stream
, destdir
, args
)
304 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
305 # preserve directory mtimes.
306 for root
, dirs
, files
in os
.walk(destdir
):
307 os
.utime(root
, (0, 0))
313 def extract_gem(stream
, destdir
, extract
= None, start_offset
= 0):
315 stream
.seek(start_offset
)
316 payload
= 'data.tar.gz'
317 payload_stream
= None
318 tmpdir
= mkdtemp(dir = destdir
)
320 extract_tar(stream
, destdir
=tmpdir
, extract
=payload
, decompress
=None)
321 with
open(os
.path
.join(tmpdir
, payload
), 'rb') as payload_stream
:
322 extract_tar(payload_stream
, destdir
=destdir
, extract
=extract
, decompress
='gzip')
325 payload_stream
.close()
328 def extract_cab(stream
, destdir
, extract
, start_offset
= 0):
331 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files'))
333 stream
.seek(start_offset
)
334 # cabextract can't read from stdin, so make a copy...
335 cab_copy_name
= os
.path
.join(destdir
, 'archive.cab')
336 cab_copy
= open(cab_copy_name
, 'wb')
337 shutil
.copyfileobj(stream
, cab_copy
)
340 _extract(stream
, destdir
, ['cabextract', '-s', '-q', 'archive.cab'])
341 os
.unlink(cab_copy_name
)
343 def extract_dmg(stream
, destdir
, extract
, start_offset
= 0):
346 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs'))
348 stream
.seek(start_offset
)
349 # hdiutil can't read from stdin, so make a copy...
350 dmg_copy_name
= os
.path
.join(destdir
, 'archive.dmg')
351 dmg_copy
= open(dmg_copy_name
, 'wb')
352 shutil
.copyfileobj(stream
, dmg_copy
)
355 mountpoint
= mkdtemp(prefix
='archive')
356 subprocess
.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint
, "-nobrowse", dmg_copy_name
])
357 subprocess
.check_call(["cp", "-pR"] + glob
.glob("%s/*" % mountpoint
) + [destdir
])
358 subprocess
.check_call(["hdiutil", "detach", "-quiet", mountpoint
])
360 os
.unlink(dmg_copy_name
)
362 def extract_zip(stream
, destdir
, extract
, start_offset
= 0):
364 # Limit the characters we accept, to avoid sending dodgy
366 if not re
.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract
):
367 raise SafeException(_('Illegal character in extract attribute'))
369 stream
.seek(start_offset
)
370 # unzip can't read from stdin, so make a copy...
371 zip_copy_name
= os
.path
.join(destdir
, 'archive.zip')
372 with
open(zip_copy_name
, 'wb') as zip_copy
:
373 shutil
.copyfileobj(stream
, zip_copy
)
375 args
= ['unzip', '-q', '-o', 'archive.zip']
378 args
.append(extract
+ '/*')
380 _extract(stream
, destdir
, args
)
381 os
.unlink(zip_copy_name
)
383 def extract_tar(stream
, destdir
, extract
, decompress
, start_offset
= 0):
385 # Limit the characters we accept, to avoid sending dodgy
387 if not re
.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract
):
388 raise SafeException(_('Illegal character in extract attribute'))
390 assert decompress
in [None, 'bzip2', 'gzip', 'lzma', 'xz']
395 if decompress
== 'bzip2':
396 ext_cmd
.append('--bzip2')
397 elif decompress
== 'gzip':
399 elif decompress
== 'lzma':
400 unlzma
= find_in_path('unlzma')
402 unlzma
= os
.path
.abspath(os
.path
.join(os
.path
.dirname(__file__
), '_unlzma'))
403 ext_cmd
.append('--use-compress-program=' + unlzma
)
404 elif decompress
== 'xz':
405 unxz
= find_in_path('unxz')
407 unxz
= os
.path
.abspath(os
.path
.join(os
.path
.dirname(__file__
), '_unxz'))
408 ext_cmd
.append('--use-compress-program=' + unxz
)
411 ext_cmd
.extend(('-x', '--no-same-owner', '--no-same-permissions'))
413 ext_cmd
.extend(('xf', '-'))
416 ext_cmd
.append(extract
)
418 _extract(stream
, destdir
, ext_cmd
, start_offset
)
422 # Since we don't have GNU tar, use python's tarfile module. This will probably
423 # be a lot slower and we do not support lzma and xz; however, it is portable.
424 # (lzma and xz are handled by first uncompressing stream to a temporary file.
425 # this is simple to do, but less efficient than piping through the program)
426 if decompress
is None:
428 elif decompress
== 'bzip2':
430 elif decompress
== 'gzip':
432 elif decompress
== 'lzma':
433 unlzma
= find_in_path('unlzma')
435 unlzma
= os
.path
.abspath(os
.path
.join(os
.path
.dirname(__file__
), '_unlzma'))
436 temp
= tempfile
.NamedTemporaryFile(suffix
='.tar', mode
='w+b')
437 subprocess
.check_call((unlzma
), stdin
=stream
, stdout
=temp
)
440 elif decompress
== 'xz':
441 unxz
= find_in_path('unxz')
443 unxz
= os
.path
.abspath(os
.path
.join(os
.path
.dirname(__file__
), '_unxz'))
444 temp
= tempfile
.NamedTemporaryFile(suffix
='.tar', mode
='w+b')
445 subprocess
.check_call((unxz
), stdin
=stream
, stdout
=temp
)
449 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress
)
453 stream
.seek(start_offset
)
454 # Python 2.5.1 crashes if name is None; see Python bug #1706850
455 tar
= tarfile
.open(name
= '', mode
= rmode
, fileobj
= stream
)
457 current_umask
= os
.umask(0)
458 os
.umask(current_umask
)
465 logger
.debug(_("Can't get uid/gid"))
467 def chmod_extract(tarinfo
):
468 # If any X bit is set, they all must be
469 if tarinfo
.mode
& 0o111:
470 tarinfo
.mode |
= 0o111
472 # Everyone gets read and write (subject to the umask)
473 # No special bits are allowed.
474 tarinfo
.mode
= ((tarinfo
.mode |
0o666) & ~current_umask
) & 0o777
476 # Don't change owner, even if run as root
481 tar
.extract(tarinfo
, destdir
)
483 extracted_anything
= False
487 if extract
is None or \
488 tarinfo
.name
.startswith(extract
+ '/') or \
489 tarinfo
.name
== extract
:
491 ext_dirs
.append(tarinfo
)
493 chmod_extract(tarinfo
)
494 extracted_anything
= True
496 # Due to a bug in tarfile (python versions < 2.5), we have to manually
497 # set the mtime of each directory that we extract after extracting everything.
499 for tarinfo
in ext_dirs
:
500 dirname
= os
.path
.join(destdir
, tarinfo
.name
)
501 os
.utime(dirname
, (tarinfo
.mtime
, tarinfo
.mtime
))
505 if extract
and not extracted_anything
:
506 raise SafeException(_('Unable to find specified file = %s in archive') % extract
)
508 def _extract(stream
, destdir
, command
, start_offset
= 0):
509 """Run execvp('command') inside destdir in a child process, with
510 stream seeked to 'start_offset' as stdin."""
512 # Some zip archives are missing timezone information; force consistent results
513 child_env
= os
.environ
.copy()
514 child_env
['TZ'] = 'GMT'
516 stream
.seek(start_offset
)
518 # TODO: use pola-run if available, once it supports fchmod
519 child
= subprocess
.Popen(command
, cwd
= destdir
, stdin
= stream
, stderr
= subprocess
.PIPE
, env
= child_env
)
521 unused
, cerr
= child
.communicate()
523 status
= child
.wait()
525 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command
, 'status': status
, 'err': cerr
.strip()})