If an error occurs while extracting, still try not to leave a directory-within-a...
[rox-archive.git] / formats.py
blob959ee6833153f0a97faed09ce88d1da447eae624
1 import os, sys
2 from support import PipeThroughCommand, escape, Tmp
3 import rox
5 current_command = None
7 def pipe_through_command(command, src, dst):
8 global current_command
9 assert not current_command
10 try:
11 src.seek(0)
12 except:
13 pass
14 current_command = PipeThroughCommand(command, src, dst)
15 try:
16 current_command.wait()
17 finally:
18 current_command = None
20 operations = []
21 class Operation:
22 add_extension = 0
24 def __init__(self, extension):
25 operations.append(self)
26 self.extension = extension
28 def can_handle(self, data):
29 return isinstance(data, FileData)
31 def save_to_stream(self, data, stream):
32 pipe_through_command(self.command, data.source, stream)
34 class Compress(Operation):
35 "Compress a stream into another stream."
36 add_extension = 1
38 def __init__(self, extension, command, type):
39 Operation.__init__(self, extension)
40 self.command = command
41 self.type = type
43 def __str__(self):
44 return 'Compress as .%s' % self.extension
46 class Decompress(Operation):
47 "Decompress a stream into another stream."
48 type = 'text/plain'
50 def __init__(self, extension, command):
51 Operation.__init__(self, extension)
52 self.command = command
54 def __str__(self):
55 return 'Decompress .%s' % self.extension
57 class Extract(Operation):
58 "Extract an archive to a directory."
59 type = 'inode/directory'
61 def __init__(self, extension, command):
62 "If command has a %s then the source path is inserted, else uses stdin."
63 Operation.__init__(self, extension)
64 self.command = command
66 def __str__(self):
67 return 'Extract from a .%s' % self.extension
69 def save_to_stream(self, data, stream):
70 raise Exception('This operation creates a directory, so you have '
71 'to drag to a filer window on the local machine')
73 def save_to_file(self, data, path):
74 if os.path.exists(path):
75 if not os.path.isdir(path):
76 raise Exception("'%s' already exists and is not a directory!" %
77 path)
78 if not os.path.exists(path):
79 os.mkdir(path)
80 os.chdir(path)
81 command = self.command
82 source = data.source
83 if command.find("'%s'") != -1:
84 command = command % escape(source.name)
85 source = None
86 try:
87 pipe_through_command(command, source, None)
88 finally:
89 try:
90 os.rmdir(path) # Will only succeed if it's empty
91 except:
92 pass
93 self.pull_up(path)
95 def pull_up(self, path):
96 # If we created only a single subdirectory, move it up.
97 dirs = os.listdir(path)
98 if len(dirs) != 1:
99 return
100 dir = dirs[0]
101 unneeded_path = os.path.join(path, dir)
102 if not os.path.isdir(unneeded_path):
103 return
104 import random
105 tmp_path = os.path.join(path, 'tmp-' + `random.randint(0, 100000)`)
106 os.rename(unneeded_path, tmp_path)
107 for file in os.listdir(tmp_path):
108 os.rename(os.path.join(tmp_path, file), os.path.join(path, file))
109 os.rmdir(tmp_path)
111 class Archive(Operation):
112 "Create an archive from a directory."
113 add_extension = 1
115 def __init__(self, extension, command, type):
116 assert command.find("'%s'") != -1
118 Operation.__init__(self, extension)
119 self.command = command
120 self.type = type
122 def __str__(self):
123 return 'Create .%s archive' % self.extension
125 def can_handle(self, data):
126 return isinstance(data, DirData)
128 def save_to_stream(self, data, stream):
129 os.chdir(os.path.dirname(data.path))
130 command = self.command % escape(os.path.basename(data.path))
131 pipe_through_command(command, None, stream)
133 tgz = Extract('tgz', "gunzip -c - | tar xf -")
134 tbz = Extract('tar.bz2', "bunzip2 -c - | tar xf -")
135 rar = Extract('rar', "rar x -")
136 tar = Extract('tar', "tar xf -")
137 rpm = Extract('rpm', "rpm2cpio - | cpio -id --quiet")
138 cpio = Extract('cpio', "cpio -id --quiet")
139 deb = Extract('deb', "ar x '%s'")
140 zip = Extract('zip', "unzip -q '%s'")
141 jar = Extract('jar', "unzip -q '%s'")
143 make_tgz = Archive('tgz', "tar cf - '%s' | gzip", 'application/x-compressed-tar')
144 Archive('tar.gz', "tar cf - '%s' | gzip", 'application/x-compressed-tar')
145 Archive('tar.bz2', "tar cf - '%s' | bzip2", 'application/x-bzip-compressed-tar')
146 Archive('zip', "zip -qr - '%s'", 'application/zip'),
147 Archive('jar', "zip -qr - '%s'", 'application/x-jar')
148 Archive('tar', "tar cf - '%s'", 'application/x-tar')
150 # Note: these go afterwards so that .tar.gz matches before .gz
151 make_gz = Compress('gz', "gzip -c -", 'application/x-gzip')
152 Compress('bz2', "bzip2 -c -", 'application/x-bzip')
153 Compress('uue', "uuencode /dev/stdout", 'application/x-uuencoded')
155 gz = Decompress('gz', "gunzip -c -")
156 bz2 = Decompress('bz2', "bunzip2 -ck -")
157 uue = Decompress('uue', "uudecode -o /dev/stdout")
160 # Can bzip2 read bzip files?
162 aliases = {
163 'tar.gz': 'tgz',
164 'tar.bz': 'tar.bz2',
165 'bz': 'bz2'
168 known_extensions = {}
169 for x in operations:
170 try:
171 known_extensions[x.extension] = None
172 except AttributeError:
173 pass
175 class FileData:
176 "A file on the local filesystem."
177 mode = None
178 def __init__(self, path):
179 self.path = path
181 if path == '-':
182 source = sys.stdin
183 else:
184 try:
185 source = file(path)
186 self.mode = os.stat(path).st_mode
187 except:
188 rox.report_exception()
189 sys.exit(1)
191 self.path = path
192 start = source.read(300)
193 try:
194 if source is sys.stdin:
195 raise "Always copy stdin!"
196 source.seek(0)
197 self.source = source
198 except:
199 # Input is not a regular, local, seekable file, so copy it
200 # to a local temp file.
201 import shutil
202 tmp = Tmp()
203 tmp.write(start)
204 tmp.flush()
205 shutil.copyfileobj(source, tmp)
206 tmp.seek(0)
207 tmp.flush()
208 self.source = tmp
209 self.default = self.guess_format(start)
211 if path == '-':
212 name = 'Data'
213 else:
214 name = path
215 for ext in known_extensions:
216 if path.endswith('.' + ext):
217 new = path[:-len(ext)-1]
218 if len(new) < len(name):
219 name = new
220 if self.default.add_extension:
221 name += '.' + self.default.extension
222 self.default_name = name
224 def guess_format(self, data):
225 "Return a good default Operation, judging by the first 300 bytes or so."
226 l = len(data)
227 def string(offset, match):
228 return data[offset:offset + len(match)] == match
229 def short(offset, match):
230 if l > offset + 1:
231 a = data[offset]
232 b = data[offset + 1]
233 return ((a == match & 0xff) and (b == (match >> 8))) or \
234 (b == match & 0xff) and (a == (match >> 8))
235 return 0
237 # Archives
238 if string(257, 'ustar\0') or string(257, 'ustar\040\040\0'):
239 return tar
240 if short(0, 070707) or short(0, 0143561) or string(0, '070707') or \
241 string(0, '070701') or string(0, '070702'):
242 return cpio
243 if string(0, '!<arch>') or string(0, '\\<ar>') or string(0, '<ar>'):
244 if string(7, '\ndebian'):
245 return deb
246 if string(0, 'Rar!'): return rar
247 if string(0, 'PK\003\004'): return zip
248 if string(0, '\xed\xab\xee\xdb'): return rpm
250 # Compressed streams
251 if string(0, '\037\213'):
252 if self.path.endswith('.tar.gz') or self.path.endswith('.tgz'):
253 return tgz
254 return gz
255 if string(0, 'BZh') or string(0, 'BZ'):
256 if self.path.endswith('.tar.bz') or self.path.endswith('.tar.bz2') or \
257 self.path.endswith('.tbz') or self.path.endswith('.tbz2'):
258 return tbz
259 return bz2
260 if string(0, 'begin '):
261 return uue
263 return make_gz
265 class DirData:
266 mode = None
267 def __init__(self, path):
268 self.path = path
269 self.default = make_tgz
270 self.default_name = path + '.' + self.default.extension
272 def test():
273 test_data = 'Hello\0World\n'
274 src = Tmp()
275 src.write(test_data)
276 src.flush()
277 data = FileData(src.name)
278 for comp in operations:
279 if not isinstance(comp, Compress): continue
280 dec = [o for o in operations if isinstance(o, Decompress) and
281 o.extension == comp.extension]
282 assert len(dec) == 1
283 dec = dec[0]
284 print "Test %s / %s" % (comp, dec)
285 middle = Tmp()
286 comp.save_to_stream(data, middle)
287 out = Tmp()
288 dec.save_to_stream(FileData(middle.name), out)
289 del middle
290 assert file(out.name).read() == test_data
291 print "Passed"
292 del src
294 dir = '/tmp/archive-regression-test'
295 out = dir + '.out'
296 if not os.path.exists(dir): os.mkdir(dir)
297 print >>file(dir + '/test', 'w'), test_data
298 data = DirData(dir)
300 for archive in operations:
301 if not isinstance(archive, Archive): continue
302 extract = [o for o in operations if isinstance(o, Extract) and
303 o.extension == archive.extension]
304 if not extract:
305 print "(skipping %s; no extractor)" % archive
306 continue
308 if os.path.exists(out): os.system("rm -r '%s'" % out)
310 assert len(extract) == 1
311 extract = extract[0]
312 print "Test %s / %s" % (archive, extract)
314 middle = Tmp()
315 archive.save_to_stream(data, middle)
316 extract.save_to_file(FileData(middle.name), dir + '.out')
318 assert os.listdir(dir) == os.listdir(out)
319 assert file(dir + '/test').read() == file(out + '/test').read()
320 print "Passed"
322 os.unlink(dir + '/test')
323 os.rmdir(dir)
324 if os.path.exists(out): os.system("rm -r '%s'" % out)
326 if __name__ == '__main__': test()