1 """Unpacking archives of various formats."""
3 # Copyright (C) 2006, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
9 from tempfile
import mkdtemp
, mkstemp
12 from logging
import debug
, info
, warn
13 from zeroinstall
import SafeException
14 from zeroinstall
.injector
.run
import find_in_path
17 def _get_cpio_version():
19 if _cpio_version
is None:
20 _cpio_version
= os
.popen('cpio --version 2>&1').next()
21 debug("cpio version = %s", _cpio_version
)
25 gnu_cpio
= '(GNU cpio)' in _get_cpio_version()
26 debug("Is GNU cpio = %s", gnu_cpio
)
30 def _get_tar_version():
32 if _tar_version
is None:
33 _tar_version
= os
.popen('tar --version 2>&1').next()
34 debug("tar version = %s", _tar_version
)
38 gnu_tar
= '(GNU tar)' in _get_tar_version()
39 debug("Is GNU tar = %s", gnu_tar
)
43 """@deprecated: should be private"""
44 recent_gnu_tar
= False
46 version
= _get_tar_version()
48 version
= version
.split(')', 1)[1].strip()
50 version
= map(int, version
.split('.'))
51 recent_gnu_tar
= version
> [1, 13, 92]
53 warn("Failed to extract GNU tar version number")
54 debug("Recent GNU tar = %s", recent_gnu_tar
)
57 _pola_run
= find_in_path('pola-run')
59 info('Found pola-run: %s', _pola_run
)
61 info('pola-run not found; archive extraction will not be sandboxed')
63 def type_from_url(url
):
64 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
66 if url
.endswith('.rpm'): return 'application/x-rpm'
67 if url
.endswith('.deb'): return 'application/x-deb'
68 if url
.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
69 if url
.endswith('.tar.gz'): return 'application/x-compressed-tar'
70 if url
.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' # XXX: No registered MIME type!
71 if url
.endswith('.tgz'): return 'application/x-compressed-tar'
72 if url
.endswith('.tar'): return 'application/x-tar'
73 if url
.endswith('.zip'): return 'application/zip'
74 if url
.endswith('.cab'): return 'application/vnd.ms-cab-compressed'
77 def check_type_ok(mime_type
):
78 """Check we have the needed software to extract from an archive of the given type.
79 @raise SafeException: if the needed software is not available"""
81 if mime_type
== 'application/x-rpm':
82 if not find_in_path('rpm2cpio'):
83 raise SafeException("This package looks like an RPM, but you don't have the rpm2cpio command "
84 "I need to extract it. Install the 'rpm' package first (this works even if "
85 "you're on a non-RPM-based distribution such as Debian).")
86 elif mime_type
== 'application/x-deb':
87 if not find_in_path('ar'):
88 raise SafeException("This package looks like a Debian package, but you don't have the 'ar' command "
89 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
90 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")
91 elif mime_type
== 'application/x-bzip-compressed-tar':
92 if not find_in_path('bunzip2'):
93 raise SafeException("This package looks like a bzip2-compressed package, but you don't have the 'bunzip2' command "
94 "I need to extract it. Install the package containing it (it's probably called 'bzip2') "
96 elif mime_type
== 'application/zip':
97 if not find_in_path('unzip'):
98 raise SafeException("This package looks like a zip-compressed archive, but you don't have the 'unzip' command "
99 "I need to extract it. Install the package containing it first.")
100 elif mime_type
== 'application/vnd.ms-cab-compressed':
101 if not find_in_path('cabextract'):
102 raise SafeException("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command "
103 "I need to extract it. Install the package containing it first.")
104 elif mime_type
== 'application/x-lzma-compressed-tar':
105 if not find_in_path('unlzma'):
106 raise SafeException("This package looks like an LZMA archive, but you don't have the 'unlzma' command "
107 "I need to extract it. Install the package containing it (it's probably called 'lzma') first.")
108 elif mime_type
in ('application/x-compressed-tar', 'application/x-tar'):
111 from zeroinstall
import version
112 raise SafeException("Unsupported archive type '%s' (for injector version %s)" % (mime_type
, version
))
114 def _exec_maybe_sandboxed(writable
, prog
, *args
):
115 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
116 If no sandbox is available, run without a sandbox."""
117 prog_path
= find_in_path(prog
)
118 if not prog_path
: raise Exception("'%s' not found in $PATH" % prog
)
119 if _pola_run
is None:
120 os
.execlp(prog_path
, prog_path
, *args
)
121 # We have pola-shell :-)
122 pola_args
= ['--prog', prog_path
, '-f', '/']
124 pola_args
+= ['-a', a
]
126 pola_args
+= ['-fw', writable
]
127 os
.execl(_pola_run
, _pola_run
, *pola_args
)
129 def unpack_archive(url
, data
, destdir
, extract
= None, type = None, start_offset
= 0):
130 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
131 that sub-directory from the archive. Works out the format from the name."""
132 if type is None: type = type_from_url(url
)
133 if type is None: raise SafeException("Unknown extension (and no MIME type given) in '%s'" % url
)
134 if type == 'application/x-bzip-compressed-tar':
135 extract_tar(data
, destdir
, extract
, 'bzip2', start_offset
)
136 elif type == 'application/x-deb':
137 extract_deb(data
, destdir
, extract
, start_offset
)
138 elif type == 'application/x-rpm':
139 extract_rpm(data
, destdir
, extract
, start_offset
)
140 elif type == 'application/zip':
141 extract_zip(data
, destdir
, extract
, start_offset
)
142 elif type == 'application/x-tar':
143 extract_tar(data
, destdir
, extract
, None, start_offset
)
144 elif type == 'application/x-lzma-compressed-tar':
145 extract_tar(data
, destdir
, extract
, 'lzma', start_offset
)
146 elif type == 'application/x-compressed-tar':
147 extract_tar(data
, destdir
, extract
, 'gzip', start_offset
)
148 elif type == 'application/vnd.ms-cab-compressed':
149 extract_cab(data
, destdir
, extract
, start_offset
)
151 raise SafeException('Unknown MIME type "%s" for "%s"' % (type, url
))
153 def extract_deb(stream
, destdir
, extract
= None, start_offset
= 0):
155 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Debs')
157 stream
.seek(start_offset
)
158 # ar can't read from stdin, so make a copy...
159 deb_copy_name
= os
.path
.join(destdir
, 'archive.deb')
160 deb_copy
= file(deb_copy_name
, 'w')
161 shutil
.copyfileobj(stream
, deb_copy
)
163 _extract(stream
, destdir
, ('ar', 'x', 'archive.deb', 'data.tar.gz'))
164 os
.unlink(deb_copy_name
)
165 data_name
= os
.path
.join(destdir
, 'data.tar.gz')
166 data_stream
= file(data_name
)
168 extract_tar(data_stream
, destdir
, None, 'gzip')
170 def extract_rpm(stream
, destdir
, extract
= None, start_offset
= 0):
172 raise SafeException('Sorry, but the "extract" attribute is not yet supported for RPMs')
173 fd
, cpiopath
= mkstemp('-rpm-tmp')
179 os
.dup2(stream
.fileno(), 0)
180 os
.lseek(0, start_offset
, 0)
182 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
184 traceback
.print_exc()
187 id, status
= os
.waitpid(child
, 0)
190 raise SafeException("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status
)
194 args
= ['cpio', '-mid']
196 args
.append('--quiet')
198 _extract(file(cpiopath
), destdir
, args
)
199 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
200 # preserve directory mtimes.
201 os
.path
.walk(destdir
, lambda arg
, dirname
, names
: os
.utime(dirname
, (0, 0)), None)
207 def extract_cab(stream
, destdir
, extract
, start_offset
= 0):
210 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Cabinet files')
212 stream
.seek(start_offset
)
213 # cabextract can't read from stdin, so make a copy...
214 cab_copy_name
= os
.path
.join(destdir
, 'archive.cab')
215 cab_copy
= file(cab_copy_name
, 'w')
216 shutil
.copyfileobj(stream
, cab_copy
)
219 _extract(stream
, destdir
, ['cabextract', '-s', '-q', 'archive.cab'])
220 os
.unlink(cab_copy_name
)
222 def extract_zip(stream
, destdir
, extract
, start_offset
= 0):
224 # Limit the characters we accept, to avoid sending dodgy
226 if not re
.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract
):
227 raise SafeException('Illegal character in extract attribute')
229 stream
.seek(start_offset
)
230 # unzip can't read from stdin, so make a copy...
231 zip_copy_name
= os
.path
.join(destdir
, 'archive.zip')
232 zip_copy
= file(zip_copy_name
, 'w')
233 shutil
.copyfileobj(stream
, zip_copy
)
236 args
= ['unzip', '-q', '-o']
241 _extract(stream
, destdir
, args
+ ['archive.zip'])
242 os
.unlink(zip_copy_name
)
244 def extract_tar(stream
, destdir
, extract
, decompress
, start_offset
= 0):
246 # Limit the characters we accept, to avoid sending dodgy
248 if not re
.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract
):
249 raise SafeException('Illegal character in extract attribute')
251 assert decompress
in [None, 'bzip2', 'gzip', 'lzma']
256 if decompress
== 'bzip2':
257 ext_cmd
.append('--bzip2')
258 elif decompress
== 'gzip':
260 elif decompress
== 'lzma':
261 ext_cmd
.append('--use-compress-program=unlzma')
264 ext_cmd
.extend(('-x', '--no-same-owner', '--no-same-permissions'))
266 ext_cmd
.extend(('xf', '-'))
269 ext_cmd
.append(extract
)
271 _extract(stream
, destdir
, ext_cmd
, start_offset
)
273 # Since we don't have GNU tar, use python's tarfile module. This will probably
274 # be a lot slower and we do not support lzma; however, it is portable.
275 if decompress
is None:
277 elif decompress
== 'bzip2':
279 elif decompress
== 'gzip':
282 raise SafeException('GNU tar unavailable; unsupported compression format: ' + decompress
)
286 stream
.seek(start_offset
)
287 tar
= tarfile
.open(mode
= rmode
, fileobj
= stream
)
289 current_umask
= os
.umask(0)
290 os
.umask(current_umask
)
297 debug("Can't get uid/gid")
299 def chmod_extract(tarinfo
):
300 # If any X bit is set, they all must be
301 if tarinfo
.mode
& 0111:
304 # Everyone gets read and write (subject to the umask)
305 # No special bits are allowed.
306 tarinfo
.mode
= ((tarinfo
.mode |
0666) & ~current_umask
) & 0777
308 # Don't change owner, even if run as root
313 tar
.extract(tarinfo
, destdir
)
315 extracted_anything
= False
319 if extract
is None or \
320 tarinfo
.name
.startswith(extract
+ '/') or \
321 tarinfo
.name
== extract
:
323 ext_dirs
.append(tarinfo
)
325 chmod_extract(tarinfo
)
326 extracted_anything
= True
328 # Due to a bug in tarfile (python versions < 2.5), we have to manually
329 # set the mtime of each directory that we extract after extracting everything.
331 for tarinfo
in ext_dirs
:
332 dirname
= os
.path
.join(destdir
, tarinfo
.name
)
333 os
.utime(dirname
, (tarinfo
.mtime
, tarinfo
.mtime
))
337 if extract
and not extracted_anything
:
338 raise SafeException('Unable to find specified file = %s in archive' % extract
)
340 def _extract(stream
, destdir
, command
, start_offset
= 0):
341 """Run execvp('command') inside destdir in a child process, with
342 stream seeked to 'start_offset' as stdin."""
348 stream
.seek(start_offset
)
349 os
.dup2(stream
.fileno(), 0)
350 _exec_maybe_sandboxed(destdir
, *command
)
352 traceback
.print_exc()
355 id, status
= os
.waitpid(child
, 0)
358 raise SafeException('Failed to extract archive; exit code %d' % status
)