Sync readme
[rarfile.git] / dumprar.py
blobcf5014c1e01e3704cf3dc4fa3ac9c31ab9083fac
1 #! /usr/bin/env python
3 """Dump archive contents, test extraction."""
5 import io
6 import sys
7 import rarfile as rf
8 from binascii import crc32, hexlify
9 from datetime import datetime
11 try:
12 bytearray
13 except NameError:
14 import array
15 def bytearray(v):
16 return array.array('B', v)
18 rf.UNICODE_COMMENTS = 1
19 rf.USE_DATETIME = 1
21 usage = """
22 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
23 switches:
24 @file read archive names from file
25 -pPSW set password
26 -Ccharset set fallback charset
27 -v increase verbosity
28 -t attempt to read all files
29 -x write read files out
30 -c show archive comment
31 -h show usage
32 -- stop switch parsing
33 """.strip()
35 os_list = ['DOS', 'OS2', 'WIN', 'UNIX', 'MACOS', 'BEOS']
37 block_strs = ['MARK', 'MAIN', 'FILE', 'OLD_COMMENT', 'OLD_EXTRA',
38 'OLD_SUB', 'OLD_RECOVERY', 'OLD_AUTH', 'SUB', 'ENDARC']
40 def rarType(type):
41 if type < rf.RAR_BLOCK_MARK or type > rf.RAR_BLOCK_ENDARC:
42 return "*UNKNOWN*"
43 return block_strs[type - rf.RAR_BLOCK_MARK]
45 main_bits = (
46 (rf.RAR_MAIN_VOLUME, "VOL"),
47 (rf.RAR_MAIN_COMMENT, "COMMENT"),
48 (rf.RAR_MAIN_LOCK, "LOCK"),
49 (rf.RAR_MAIN_SOLID, "SOLID"),
50 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
51 (rf.RAR_MAIN_AUTH, "AUTH"),
52 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
53 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
54 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
55 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
56 (rf.RAR_LONG_BLOCK, "LONG"),
59 endarc_bits = (
60 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
61 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
62 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
63 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
64 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
65 (rf.RAR_LONG_BLOCK, "LONG"),
68 file_bits = (
69 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
70 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
71 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
72 (rf.RAR_FILE_COMMENT, "COMMENT"),
73 (rf.RAR_FILE_SOLID, "SOLID"),
74 (rf.RAR_FILE_LARGE, "LARGE"),
75 (rf.RAR_FILE_UNICODE, "UNICODE"),
76 (rf.RAR_FILE_SALT, "SALT"),
77 (rf.RAR_FILE_VERSION, "VERSION"),
78 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
79 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
80 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
81 (rf.RAR_LONG_BLOCK, "LONG"),
84 generic_bits = (
85 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
86 (rf.RAR_LONG_BLOCK, "LONG"),
89 file_parms = ("D64", "D128", "D256", "D512",
90 "D1024", "D2048", "D4096", "DIR")
92 def xprint(m, *args):
93 if sys.hexversion < 0x3000000:
94 m = m.decode('utf8')
95 if args:
96 m = m % args
97 if sys.hexversion < 0x3000000:
98 m = m.encode('utf8')
99 sys.stdout.write(m)
100 sys.stdout.write('\n')
102 def render_flags(flags, bit_list):
103 res = []
104 known = 0
105 for bit in bit_list:
106 known = known | bit[0]
107 if flags & bit[0]:
108 res.append(bit[1])
109 unknown = flags & ~known
110 n = 0
111 while unknown:
112 if unknown & 1:
113 res.append("UNK_%04x" % (1 << n))
114 unknown = unknown >> 1
115 n += 1
117 return ",".join(res)
119 def get_file_flags(flags):
120 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
122 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
123 res += "," + file_parms[xf]
124 return res
126 def get_main_flags(flags):
127 return render_flags(flags, main_bits)
129 def get_endarc_flags(flags):
130 return render_flags(flags, endarc_bits)
132 def get_generic_flags(flags):
133 return render_flags(flags, generic_bits)
135 def fmt_time(t):
136 if isinstance(t, datetime):
137 return t.isoformat(' ')
138 return "%04d-%02d-%02d %02d:%02d:%02d" % t
140 def show_item(h):
141 st = rarType(h.type)
142 unknown = h.header_size - h.header_base
143 xprint("%s: hdrlen=%d datlen=%d hdr_unknown=%d", st, h.header_size,
144 h.add_size, unknown)
145 if unknown > 0 and cf_verbose > 1:
146 dat = h.header_data[h.header_base : ]
147 xprint(" unknown: %s", hexlify(dat))
148 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
149 if h.host_os == rf.RAR_OS_UNIX:
150 s_mode = "0%o" % h.mode
151 else:
152 s_mode = "0x%x" % h.mode
153 xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
154 if h.host_os >= 0 and h.host_os < len(os_list):
155 s_os = os_list[h.host_os]
156 else:
157 s_os = "?"
158 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d",
159 h.host_os, s_os,
160 h.extract_version, s_mode, h.compress_type,
161 h.compress_size, h.file_size, h.volume)
162 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
163 xprint(" crc=0x%08x (%d) time=%s", ucrc, h.CRC, fmt_time(h.date_time))
164 xprint(" name=%s", h.filename)
165 if h.mtime:
166 xprint(" mtime=%s", fmt_time(h.mtime))
167 if h.ctime:
168 xprint(" ctime=%s", fmt_time(h.ctime))
169 if h.atime:
170 xprint(" atime=%s", fmt_time(h.atime))
171 if h.arctime:
172 xprint(" arctime=%s", fmt_time(h.arctime))
173 elif h.type == rf.RAR_BLOCK_MAIN:
174 xprint(" flags=0x%04x:%s", h.flags, get_main_flags(h.flags))
175 elif h.type == rf.RAR_BLOCK_ENDARC:
176 xprint(" flags=0x%04x:%s", h.flags, get_endarc_flags(h.flags))
177 elif h.type == rf.RAR_BLOCK_MARK:
178 xprint(" flags=0x%04x:", h.flags)
179 else:
180 xprint(" flags=0x%04x:%s", h.flags, get_generic_flags(h.flags))
182 if h.comment is not None:
183 cm = repr(h.comment)
184 if cm[0] == 'u':
185 cm = cm[1:]
186 xprint(" comment=%s", cm)
188 cf_show_comment = 0
189 cf_verbose = 0
190 cf_charset = None
191 cf_extract = 0
192 cf_test_read = 0
193 cf_test_unrar = 0
194 cf_test_memory = 0
196 def check_crc(f, inf):
197 ucrc = f.CRC
198 if ucrc < 0:
199 ucrc += (long(1) << 32)
200 if ucrc != inf.CRC:
201 print ('crc error')
203 def test_read_long(r, inf):
204 f = r.open(inf.filename)
205 total = 0
206 while 1:
207 data = f.read(8192)
208 if not data:
209 break
210 total += len(data)
211 if total != inf.file_size:
212 xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
213 xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
214 check_crc(f, inf)
216 # test .seek() & .readinto()
217 if cf_test_read > 1:
218 f.seek(0,0)
220 # hack: re-enable crc calc
221 f.crc_check = 1
222 f.CRC = 0
224 total = 0
225 buf = bytearray(rf.ZERO*4096)
226 while 1:
227 res = f.readinto(buf)
228 if not res:
229 break
230 total += res
231 if inf.file_size != total:
232 xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
233 check_crc(f, inf)
234 f.close()
236 def test_read(r, inf):
237 test_read_long(r, inf)
240 def test_real(fn, psw):
241 xprint("Archive: %s", fn)
243 cb = None
244 if cf_verbose > 1:
245 cb = show_item
247 rfarg = fn
248 if cf_test_memory:
249 fnarg = io.BytesIO(open(fn, 'rb').read())
251 # check if rar
252 if not rf.is_rarfile(rfarg):
253 xprint(" --- %s is not a RAR file ---", fn)
254 return
256 # open
257 r = rf.RarFile(rfarg, charset = cf_charset, info_callback = cb)
258 # set password
259 if r.needs_password():
260 if psw:
261 r.setpassword(psw)
262 else:
263 xprint(" --- %s requires password ---", fn)
264 return
266 # show comment
267 if cf_show_comment and r.comment:
268 for ln in r.comment.split('\n'):
269 xprint(" %s", ln)
270 elif cf_verbose == 1 and r.comment:
271 cm = repr(r.comment)
272 if cm[0] == 'u':
273 cm = cm[1:]
274 xprint(" comment=%s", cm)
276 # process
277 for n in r.namelist():
278 inf = r.getinfo(n)
279 if inf.isdir():
280 continue
281 if cf_verbose == 1:
282 show_item(inf)
283 if cf_test_read:
284 test_read(r, inf)
286 if cf_extract:
287 r.extractall()
288 for inf in r.infolist():
289 r.extract(inf)
291 if cf_test_unrar:
292 r.testrar()
294 def test(fn, psw):
295 try:
296 test_real(fn, psw)
297 except rf.NeedFirstVolume:
298 xprint(" --- %s is middle part of multi-vol archive ---", fn)
299 except rf.Error:
300 exc, msg, tb = sys.exc_info()
301 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
302 del tb
303 except IOError:
304 exc, msg, tb = sys.exc_info()
305 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
306 del tb
308 def main():
309 global cf_verbose, cf_show_comment, cf_charset
310 global cf_extract, cf_test_read, cf_test_unrar
311 global cf_test_memory
313 # parse args
314 args = []
315 psw = None
316 noswitch = False
317 for a in sys.argv[1:]:
318 if noswitch:
319 args.append(a)
320 elif a[0] == "@":
321 for ln in open(a[1:], 'r'):
322 fn = ln[:-1]
323 args.append(fn)
324 elif a[0] != '-':
325 args.append(a)
326 elif a[1] == 'p':
327 psw = a[2:]
328 elif a == '--':
329 noswitch = True
330 elif a == '-h':
331 xprint(usage)
332 return
333 elif a == '-v':
334 cf_verbose += 1
335 elif a == '-c':
336 cf_show_comment = 1
337 elif a == '-x':
338 cf_extract = 1
339 elif a == '-t':
340 cf_test_read += 1
341 elif a == '-T':
342 cf_test_unrar = 1
343 elif a == '-M':
344 cf_test_memory = 1
345 elif a[1] == 'C':
346 cf_charset = a[2:]
347 else:
348 raise Exception("unknown switch: "+a)
349 if not args:
350 xprint(usage)
352 for fn in args:
353 test(fn, psw)
356 if __name__ == '__main__':
357 try:
358 main()
359 except KeyboardInterrupt:
360 pass