Added 'type' and 'start-offset' attributes to <archive> elements.
[zeroinstall.git] / zeroinstall / zerostore / unpack.py
blob5e9821b4df4d07e4a669439ec33af54c902d3fc2
1 # Copyright (C) 2006, Thomas Leonard
2 # See the README file for details, or visit http://0install.net.
4 import os
5 import shutil
6 import traceback
7 from tempfile import mkdtemp, mkstemp
8 import sha
9 import re
10 from logging import debug, info, warn
11 from zeroinstall import SafeException
13 _recent_gnu_tar = None
14 def recent_gnu_tar():
15 global _recent_gnu_tar
16 if _recent_gnu_tar is None:
17 _recent_gnu_tar = False
18 version = os.popen('tar --version 2>&1').next()
19 if '(GNU tar)' in version:
20 try:
21 version = version.split(')', 1)[1].strip()
22 assert version
23 version = map(int, version.split('.'))
24 _recent_gnu_tar = version > [1, 13, 92]
25 except:
26 warn("Failed to extract GNU tar version number")
27 debug("Recent GNU tar = %s", _recent_gnu_tar)
28 return _recent_gnu_tar
30 def _find_in_path(prog):
31 for d in os.environ['PATH'].split(':'):
32 path = os.path.join(d, prog)
33 if os.path.isfile(path):
34 return path
35 return None
36 _pola_run = _find_in_path('pola-run')
37 if _pola_run:
38 info('Found pola-run: %s', _pola_run)
39 else:
40 info('pola-run not found; archive extraction will not be sandboxed')
42 def type_from_url(url):
43 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is."""
44 url = url.lower()
45 if url.endswith('.rpm'): return 'application/x-rpm'
46 if url.endswith('.deb'): return 'application/x-deb'
47 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar'
48 if url.endswith('.tar.gz'): return 'application/x-compressed-tar'
49 if url.endswith('.tgz'): return 'application/x-compressed-tar'
50 if url.endswith('.zip'): return 'application/zip'
51 return None
53 def check_type_ok(mime_type):
54 """Check we have the needed software to extract from an archive of the given type. Raise an exception if not."""
55 assert mime_type
56 if mime_type == 'application/x-rpm':
57 if not _find_in_path('rpm2cpio'):
58 raise SafeException("The URL '%s' looks like an RPM, but you don't have the rpm2cpio command "
59 "I need to extract it. Install the 'rpm' package first (this works even if "
60 "you're on a non-RPM-based distribution such as Debian)." % url)
61 elif mime_type == 'application/x-deb':
62 if not _find_in_path('ar'):
63 raise SafeException("The URL '%s' looks like a Debian package, but you don't have the 'ar' command "
64 "I need to extract it. Install the package containing it (sometimes called 'binutils') "
65 "first. This works even if you're on a non-Debian-based distribution such as Red Hat)."
66 % url)
67 elif mime_type == 'application/x-bzip-compressed-tar':
68 if not _find_in_path('bunzip2'):
69 raise SafeException("The URL '%s' looks like a bzip2-compressed package, but you don't have the 'bunzip2' command "
70 "I need to extract it. Install the package containing it (it's probably called 'bzip2') "
71 "first."
72 % url)
73 elif mime_type == 'application/zip':
74 if not _find_in_path('unzip'):
75 raise SafeException("The URL '%s' looks like a zip-compressed archive, but you don't have the 'unzip' command "
76 "I need to extract it. Install the package containing it first."
77 % url)
78 elif mime_type in 'application/x-compressed-tar':
79 pass
80 else:
81 from zeroinstall import version
82 raise SafeException("Unsupported archive type '%s' (for injector version %s)" % (mime_type, version))
84 def _exec_maybe_sandboxed(writable, prog, *args):
85 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available.
86 If no sandbox is available, run without a sandbox."""
87 prog_path = _find_in_path(prog)
88 if _pola_run is None:
89 os.execlp(prog_path, prog_path, *args)
90 # We have pola-shell :-)
91 pola_args = ['--prog', prog_path, '-f', '/']
92 for a in args:
93 pola_args += ['-a', a]
94 if writable:
95 pola_args += ['-fw', writable]
96 os.execl(_pola_run, _pola_run, *pola_args)
98 def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
99 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just
100 that sub-directory from the archive. Works out the format from the name."""
101 if type is None: type = type_from_url(url)
102 if type == 'application/x-bzip-compressed-tar':
103 extract_tar(data, destdir, extract, '--bzip2', start_offset)
104 elif type == 'application/x-deb':
105 extract_deb(data, destdir, extract, start_offset)
106 elif type == 'application/x-rpm':
107 extract_rpm(data, destdir, extract, start_offset)
108 elif type == 'application/zip':
109 extract_zip(data, destdir, extract, start_offset)
110 elif type == 'application/x-compressed-tar':
111 extract_tar(data, destdir, extract, '-z', start_offset)
112 else:
113 raise Exception('Unknown MIME type "%s" for "%s"' % (type, url))
115 def extract_deb(stream, destdir, extract = None, start_offset = 0):
116 if extract:
117 raise SafeException('Sorry, but the "extract" attribute is not yet supported for Debs')
119 stream.seek(start_offset)
120 # ar can't read from stdin, so make a copy...
121 deb_copy_name = os.path.join(destdir, 'archive.deb')
122 deb_copy = file(deb_copy_name, 'w')
123 shutil.copyfileobj(stream, deb_copy)
124 deb_copy.close()
125 _extract(stream, destdir, ('ar', 'x', 'archive.deb', 'data.tar.gz'))
126 os.unlink(deb_copy_name)
127 data_name = os.path.join(destdir, 'data.tar.gz')
128 data_stream = file(data_name)
129 os.unlink(data_name)
130 _extract(data_stream, destdir, ('tar', 'xzf', '-'))
132 def extract_rpm(stream, destdir, extract = None, start_offset = 0):
133 if extract:
134 raise SafeException('Sorry, but the "extract" attribute is not yet supported for RPMs')
135 fd, cpiopath = mkstemp('-rpm-tmp')
136 try:
137 child = os.fork()
138 if child == 0:
139 try:
140 try:
141 os.dup2(stream.fileno(), 0)
142 os.lseek(0, start_offset, 0)
143 os.dup2(fd, 1)
144 _exec_maybe_sandboxed(None, 'rpm2cpio', '-')
145 except:
146 traceback.print_exc()
147 finally:
148 os._exit(1)
149 id, status = os.waitpid(child, 0)
150 assert id == child
151 if status != 0:
152 raise SafeException("rpm2cpio failed; can't unpack RPM archive; exit code %d" % status)
153 os.close(fd)
154 fd = None
155 args = ['cpio', '-mid', '--quiet']
156 _extract(file(cpiopath), destdir, args)
157 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't
158 # preserve directory mtimes.
159 os.path.walk(destdir, lambda arg, dirname, names: os.utime(dirname, (0, 0)), None)
160 finally:
161 if fd is not None:
162 os.close(fd)
163 os.unlink(cpiopath)
165 def extract_zip(stream, destdir, extract, start_offset = 0):
166 if extract:
167 # Limit the characters we accept, to avoid sending dodgy
168 # strings to zip
169 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
170 raise SafeException('Illegal character in extract attribute')
172 stream.seek(start_offset)
173 # unzip can't read from stdin, so make a copy...
174 zip_copy_name = os.path.join(destdir, 'archive.zip')
175 zip_copy = file(zip_copy_name, 'w')
176 shutil.copyfileobj(stream, zip_copy)
177 zip_copy.close()
179 args = ['unzip', '-q', '-o']
181 if extract:
182 args.append(extract)
184 _extract(stream, destdir, args + ['archive.zip'])
185 os.unlink(zip_copy_name)
187 def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
188 if extract:
189 # Limit the characters we accept, to avoid sending dodgy
190 # strings to tar
191 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract):
192 raise SafeException('Illegal character in extract attribute')
194 if recent_gnu_tar():
195 args = ['tar', decompress, '-x', '--no-same-owner', '--no-same-permissions']
196 else:
197 args = ['tar', decompress, '-xf', '-']
199 if extract:
200 args.append(extract)
202 _extract(stream, destdir, args, start_offset)
204 def _extract(stream, destdir, command, start_offset = 0):
205 """Run execvp('command') inside destdir in a child process, with
206 stream seeked to 'start_offset' as stdin."""
207 child = os.fork()
208 if child == 0:
209 try:
210 try:
211 os.chdir(destdir)
212 stream.seek(start_offset)
213 os.dup2(stream.fileno(), 0)
214 _exec_maybe_sandboxed(destdir, *command)
215 except:
216 traceback.print_exc()
217 finally:
218 os._exit(1)
219 id, status = os.waitpid(child, 0)
220 assert id == child
221 if status != 0:
222 raise SafeException('Failed to extract archive; exit code %d' % status)