sha1: cleanups
[rarfile.git] / dumprar.py
blob9153cfde5356d74049f3dd5eef6fb764e9dabc4d
1 #! /usr/bin/env python
3 """Dump archive contents, test extraction."""
5 from __future__ import division, absolute_import, print_function
7 import io
8 import sys
9 import getopt
11 from datetime import datetime
13 import rarfile as rf
16 usage = """
17 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
18 switches:
19 @file read archive names from file
20 -pPSW set password
21 -Ccharset set fallback charset
22 -v increase verbosity
23 -t attempt to read all files
24 -x write read files out
25 -c show archive comment
26 -h show usage
27 -- stop switch parsing
28 """.strip()
30 os_list = ['DOS', 'OS2', 'WIN', 'UNIX', 'MACOS', 'BEOS']
32 block_strs = ['MARK', 'MAIN', 'FILE', 'OLD_COMMENT', 'OLD_EXTRA',
33 'OLD_SUB', 'OLD_RECOVERY', 'OLD_AUTH', 'SUB', 'ENDARC']
35 r5_block_types = {
36 rf.RAR5_BLOCK_MAIN: 'R5_MAIN',
37 rf.RAR5_BLOCK_FILE: 'R5_FILE',
38 rf.RAR5_BLOCK_SERVICE: 'R5_SVC',
39 rf.RAR5_BLOCK_ENCRYPTION: 'R5_ENC',
40 rf.RAR5_BLOCK_ENDARC: 'R5_ENDARC',
44 def rar3_type(btype):
45 """RAR3 type code as string."""
46 if btype < rf.RAR_BLOCK_MARK or btype > rf.RAR_BLOCK_ENDARC:
47 return "*UNKNOWN*"
48 return block_strs[btype - rf.RAR_BLOCK_MARK]
51 def rar5_type(btype):
52 """RAR5 type code as string."""
53 return r5_block_types.get(btype, '*UNKNOWN*')
56 main_bits = (
57 (rf.RAR_MAIN_VOLUME, "VOL"),
58 (rf.RAR_MAIN_COMMENT, "COMMENT"),
59 (rf.RAR_MAIN_LOCK, "LOCK"),
60 (rf.RAR_MAIN_SOLID, "SOLID"),
61 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
62 (rf.RAR_MAIN_AUTH, "AUTH"),
63 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
64 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
65 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
66 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
67 (rf.RAR_LONG_BLOCK, "LONG"),
70 endarc_bits = (
71 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
72 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
73 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
74 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
75 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
76 (rf.RAR_LONG_BLOCK, "LONG"),
79 file_bits = (
80 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
81 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
82 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
83 (rf.RAR_FILE_COMMENT, "COMMENT"),
84 (rf.RAR_FILE_SOLID, "SOLID"),
85 (rf.RAR_FILE_LARGE, "LARGE"),
86 (rf.RAR_FILE_UNICODE, "UNICODE"),
87 (rf.RAR_FILE_SALT, "SALT"),
88 (rf.RAR_FILE_VERSION, "VERSION"),
89 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
90 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
91 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
92 (rf.RAR_LONG_BLOCK, "LONG"),
95 generic_bits = (
96 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
97 (rf.RAR_LONG_BLOCK, "LONG"),
100 file_parms = ("D64", "D128", "D256", "D512",
101 "D1024", "D2048", "D4096", "DIR")
103 r5_block_flags = (
104 (rf.RAR5_BLOCK_FLAG_EXTRA_DATA, 'EXTRA'),
105 (rf.RAR5_BLOCK_FLAG_DATA_AREA, 'DATA'),
106 (rf.RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN, 'SKIP'),
107 (rf.RAR5_BLOCK_FLAG_SPLIT_BEFORE, 'SPLIT_BEFORE'),
108 (rf.RAR5_BLOCK_FLAG_SPLIT_AFTER, 'SPLIT_AFTER'),
109 (rf.RAR5_BLOCK_FLAG_DEPENDS_PREV, 'DEPENDS'),
110 (rf.RAR5_BLOCK_FLAG_KEEP_WITH_PARENT, 'KEEP'),
113 r5_main_flags = (
114 (rf.RAR5_MAIN_FLAG_ISVOL, 'ISVOL'),
115 (rf.RAR5_MAIN_FLAG_HAS_VOLNR, 'VOLNR'),
116 (rf.RAR5_MAIN_FLAG_SOLID, 'SOLID'),
117 (rf.RAR5_MAIN_FLAG_RECOVERY, 'RECOVERY'),
118 (rf.RAR5_MAIN_FLAG_LOCKED, 'LOCKED'),
121 r5_file_flags = (
122 (rf.RAR5_FILE_FLAG_ISDIR, 'DIR'),
123 (rf.RAR5_FILE_FLAG_HAS_MTIME, 'MTIME'),
124 (rf.RAR5_FILE_FLAG_HAS_CRC32, 'CRC32'),
125 (rf.RAR5_FILE_FLAG_UNKNOWN_SIZE, 'NOSIZE'),
128 r5_enc_flags = (
129 (rf.RAR5_ENC_FLAG_HAS_CHECKVAL, 'CHECKVAL'),
132 r5_endarc_flags = (
133 (rf.RAR5_ENDARC_FLAG_NEXT_VOL, 'NEXTVOL'),
136 r5_file_enc_flags = (
137 (rf.RAR5_XENC_CHECKVAL, 'CHECKVAL'),
138 (rf.RAR5_XENC_TWEAKED, 'TWEAKED'),
141 r5_file_redir_types = {
142 rf.RAR5_XREDIR_UNIX_SYMLINK: 'UNIX_SYMLINK',
143 rf.RAR5_XREDIR_WINDOWS_SYMLINK: 'WINDOWS_SYMLINK',
144 rf.RAR5_XREDIR_WINDOWS_JUNCTION: 'WINDOWS_JUNCTION',
145 rf.RAR5_XREDIR_HARD_LINK: 'HARD_LINK',
146 rf.RAR5_XREDIR_FILE_COPY: 'FILE_COPY',
149 r5_file_redir_flags = (
150 (rf.RAR5_XREDIR_ISDIR, 'DIR'),
154 def xprint(m, *args):
155 """Print string to stdout.
157 Format unicode safely.
159 if sys.hexversion < 0x3000000:
160 m = m.decode('utf8')
161 if args:
162 m = m % args
163 if sys.hexversion < 0x3000000:
164 m = m.encode('utf8')
165 sys.stdout.write(m)
166 sys.stdout.write('\n')
169 def render_flags(flags, bit_list):
170 """Show bit names.
172 res = []
173 known = 0
174 for bit in bit_list:
175 known = known | bit[0]
176 if flags & bit[0]:
177 res.append(bit[1])
178 unknown = flags & ~known
179 n = 0
180 while unknown:
181 if unknown & 1:
182 res.append("UNK_%04x" % (1 << n))
183 unknown = unknown >> 1
184 n += 1
186 if not res:
187 return '-'
189 return ",".join(res)
192 def get_file_flags(flags):
193 """Show flag names and handle dict size.
195 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
197 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
198 res += "," + file_parms[xf]
199 return res
202 def fmt_time(t):
203 """Format time.
205 if t is None:
206 return '(-)'
207 if isinstance(t, datetime):
208 return t.isoformat('T')
209 return "%04d-%02d-%02d %02d:%02d:%02d" % t
212 def show_item(h):
213 """Show any RAR3/5 record.
215 if isinstance(h, rf.Rar3Info):
216 show_item_v3(h)
217 elif isinstance(h, rf.Rar5Info):
218 show_item_v5(h)
219 else:
220 xprint('Unknown info record')
223 def show_item_v3(h):
224 """Show any RAR3 record.
226 st = rar3_type(h.type)
227 xprint("%s: hdrlen=%d datlen=%d", st, h.header_size, h.add_size)
228 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
229 if h.host_os == rf.RAR_OS_UNIX:
230 s_mode = "0%o" % h.mode
231 else:
232 s_mode = "0x%x" % h.mode
233 xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
234 if h.host_os >= 0 and h.host_os < len(os_list):
235 s_os = os_list[h.host_os]
236 else:
237 s_os = "?"
238 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d",
239 h.host_os, s_os,
240 h.extract_version, s_mode, h.compress_type,
241 h.compress_size, h.file_size, h.volume)
242 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
243 xprint(" crc=0x%08x (%d) date_time=%s", ucrc, h.CRC, fmt_time(h.date_time))
244 xprint(" name=%s", h.filename)
245 if h.mtime:
246 xprint(" mtime=%s", fmt_time(h.mtime))
247 if h.ctime:
248 xprint(" ctime=%s", fmt_time(h.ctime))
249 if h.atime:
250 xprint(" atime=%s", fmt_time(h.atime))
251 if h.arctime:
252 xprint(" arctime=%s", fmt_time(h.arctime))
253 elif h.type == rf.RAR_BLOCK_MAIN:
254 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, main_bits))
255 elif h.type == rf.RAR_BLOCK_ENDARC:
256 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, endarc_bits))
257 elif h.type == rf.RAR_BLOCK_MARK:
258 xprint(" flags=0x%04x:", h.flags)
259 else:
260 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, generic_bits))
262 if h.comment is not None:
263 cm = repr(h.comment)
264 if cm[0] == 'u':
265 cm = cm[1:]
266 xprint(" comment=%s", cm)
269 def show_item_v5(h):
270 """Show any RAR5 record.
272 st = rar5_type(h.block_type)
273 xprint("%s: hdrlen=%d datlen=%d hdr_extra=%d", st, h.header_size,
274 h.compress_size, h.block_extra_size)
275 xprint(" block_flags=0x%04x:%s", h.block_flags, render_flags(h.block_flags, r5_block_flags))
276 if h.block_type in (rf.RAR5_BLOCK_FILE, rf.RAR5_BLOCK_SERVICE):
277 xprint(" name=%s", h.filename)
278 if h.file_host_os == rf.RAR5_OS_UNIX:
279 s_os = 'UNIX'
280 s_mode = "0%o" % h.mode
281 else:
282 s_os = 'WINDOWS'
283 s_mode = "0x%x" % h.mode
284 xprint(" file_flags=0x%04x:%s", h.file_flags, render_flags(h.file_flags, r5_file_flags))
286 cmp_flags = h.file_compress_flags
287 xprint(" cmp_algo=%d cmp_meth=%d dict=%d solid=%r",
288 cmp_flags & 0x3f,
289 (cmp_flags >> 7) & 0x07,
290 cmp_flags >> 10,
291 cmp_flags & rf.RAR5_COMPR_SOLID > 0)
292 xprint(" os=%d:%s mode=%s cmp=%r dec=%r vol=%r",
293 h.file_host_os, s_os, s_mode,
294 h.compress_size, h.file_size, h.volume)
295 if h.CRC is not None:
296 xprint(" crc=0x%08x (%d)", h.CRC, h.CRC)
297 if h.blake2sp_hash is not None:
298 xprint(" blake2sp=%s", rf.tohex(h.blake2sp_hash))
299 if h.date_time is not None:
300 xprint(" date_time=%s", fmt_time(h.date_time))
301 if h.mtime:
302 xprint(" mtime=%s", fmt_time(h.mtime))
303 if h.ctime:
304 xprint(" ctime=%s", fmt_time(h.ctime))
305 if h.atime:
306 xprint(" atime=%s", fmt_time(h.atime))
307 if h.arctime:
308 xprint(" arctime=%s", fmt_time(h.arctime))
309 if h.flags & rf.RAR_FILE_PASSWORD:
310 enc_algo, enc_flags, kdf_count, salt, iv, checkval = h.file_encryption
311 algo_name = 'AES256' if enc_algo == rf.RAR5_XENC_CIPHER_AES256 else 'UnknownAlgo'
312 xprint(' algo=%d:%s enc_flags=%04x:%s kdf_lg=%d kdf_count=%d salt=%s iv=%s checkval=%s',
313 enc_algo, algo_name, enc_flags, render_flags(enc_flags, r5_file_enc_flags),
314 kdf_count, 1 << kdf_count, rf.tohex(salt), rf.tohex(iv),
315 checkval and rf.tohex(checkval) or '-')
316 if h.file_redir:
317 redir_type, redir_flags, redir_name = h.file_redir
318 xprint(' redir: type=%s flags=%d:%s destination=%s',
319 r5_file_redir_types.get(redir_type, 'Unknown'),
320 redir_flags, render_flags(redir_flags, r5_file_redir_flags),
321 redir_name)
322 if h.file_owner:
323 uname, gname, uid, gid = h.file_owner
324 xprint(' owner: name=%r group=%r uid=%r gid=%r',
325 uname, gname, uid, gid)
326 if h.file_version:
327 flags, version = h.file_version
328 xprint(' version: flags=%r version=%r', flags, version)
329 elif h.block_type == rf.RAR5_BLOCK_MAIN:
330 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.main_flags, r5_main_flags))
331 elif h.block_type == rf.RAR5_BLOCK_ENDARC:
332 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.endarc_flags, r5_endarc_flags))
333 elif h.block_type == rf.RAR5_BLOCK_ENCRYPTION:
334 algo_name = 'AES256' if h.encryption_algo == rf.RAR5_XENC_CIPHER_AES256 else 'UnknownAlgo'
335 xprint(" algo=%d:%s flags=0x%04x:%s", h.encryption_algo, algo_name, h.flags,
336 render_flags(h.encryption_flags, r5_enc_flags))
337 xprint(" kdf_lg=%d kdf_count=%d", h.encryption_kdf_count, 1 << h.encryption_kdf_count)
338 xprint(" salt=%s", rf.tohex(h.encryption_salt))
339 else:
340 xprint(" - missing info -")
342 if h.comment is not None:
343 cm = repr(h.comment)
344 if cm[0] == 'u':
345 cm = cm[1:]
346 xprint(" comment=%s", cm)
349 cf_show_comment = 0
350 cf_verbose = 0
351 cf_charset = None
352 cf_extract = 0
353 cf_test_read = 0
354 cf_test_unrar = 0
355 cf_test_memory = 0
358 def check_crc(f, inf, desc):
359 """Compare result crc to expected value.
361 exp = inf._md_expect
362 if exp is None:
363 return
364 ucrc = f._md_context.digest()
365 if ucrc != exp:
366 print('crc error - %s - exp=%r got=%r' % (desc, exp, ucrc))
369 def test_read_long(r, inf):
370 """Test read and readinto.
372 md_class = inf._md_class or rf.NoHashContext
373 bctx = md_class()
374 f = r.open(inf.filename)
375 total = 0
376 while 1:
377 data = f.read(8192)
378 if not data:
379 break
380 bctx.update(data)
381 total += len(data)
382 if total != inf.file_size:
383 xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
384 xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
385 check_crc(f, inf, 'read')
386 bhash = bctx.hexdigest()
387 if cf_verbose > 1:
388 if f._md_context.digest() == inf._md_expect:
389 #xprint(" checkhash: %r", bhash)
390 pass
391 else:
392 xprint(" checkhash: %r got=%r exp=%r cls=%r\n",
393 bhash, f._md_context.digest(), inf._md_expect, inf._md_class)
395 # test .seek() & .readinto()
396 if cf_test_read > 1:
397 f.seek(0, 0)
399 total = 0
400 buf = bytearray(rf.ZERO * 1024)
401 while 1:
402 res = f.readinto(buf)
403 if not res:
404 break
405 total += res
406 if inf.file_size != total:
407 xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
408 #check_crc(f, inf, 'readinto')
409 f.close()
412 def test_read(r, inf):
413 """Test file read."""
414 test_read_long(r, inf)
417 def test_real(fn, psw):
418 """Actual archive processing.
420 xprint("Archive: %s", fn)
422 cb = None
423 if cf_verbose > 1:
424 cb = show_item
426 rfarg = fn
427 if cf_test_memory:
428 rfarg = io.BytesIO(open(fn, 'rb').read())
430 # check if rar
431 if not rf.is_rarfile(rfarg):
432 xprint(" --- %s is not a RAR file ---", fn)
433 return
435 # open
436 r = rf.RarFile(rfarg, charset=cf_charset, info_callback=cb)
437 # set password
438 if r.needs_password():
439 if psw:
440 r.setpassword(psw)
441 else:
442 xprint(" --- %s requires password ---", fn)
443 return
445 # show comment
446 if cf_show_comment and r.comment:
447 for ln in r.comment.split('\n'):
448 xprint(" %s", ln)
449 elif cf_verbose > 0 and r.comment:
450 cm = repr(r.comment)
451 if cm[0] == 'u':
452 cm = cm[1:]
453 xprint(" comment=%s", cm)
455 # process
456 for n in r.namelist():
457 inf = r.getinfo(n)
458 if inf.isdir():
459 continue
460 if cf_verbose == 1:
461 show_item(inf)
462 if cf_test_read:
463 test_read(r, inf)
465 if cf_extract:
466 r.extractall()
467 for inf in r.infolist():
468 r.extract(inf)
470 if cf_test_unrar:
471 r.testrar()
474 def test(fn, psw):
475 """Process one archive with error handling.
477 try:
478 test_real(fn, psw)
479 except rf.NeedFirstVolume:
480 xprint(" --- %s is middle part of multi-vol archive ---", fn)
481 except rf.Error:
482 exc, msg, tb = sys.exc_info()
483 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
484 del tb
485 except IOError:
486 exc, msg, tb = sys.exc_info()
487 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
488 del tb
491 def main():
492 """Program entry point.
494 global cf_verbose, cf_show_comment, cf_charset
495 global cf_extract, cf_test_read, cf_test_unrar
496 global cf_test_memory
498 psw = None
500 # parse args
501 try:
502 opts, args = getopt.getopt(sys.argv[1:], 'p:C:hvcxtRM')
503 except getopt.error as ex:
504 print(str(ex), file=sys.stderr)
505 sys.exit(1)
507 for o, v in opts:
508 if o == '-p':
509 psw = v
510 elif o == '-h':
511 xprint(usage)
512 return
513 elif o == '-v':
514 cf_verbose += 1
515 elif o == '-c':
516 cf_show_comment = 1
517 elif o == '-x':
518 cf_extract = 1
519 elif o == '-t':
520 cf_test_read += 1
521 elif o == '-T':
522 cf_test_unrar = 1
523 elif o == '-M':
524 cf_test_memory = 1
525 elif o == '-C':
526 cf_charset = v
527 else:
528 raise Exception("unhandled switch: " + o)
530 args2 = []
531 for a in args:
532 if a[0] == "@":
533 for ln in open(a[1:], 'r'):
534 fn = ln[:-1]
535 args2.append(fn)
536 else:
537 args2.append(a)
538 args = args2
540 if not args:
541 xprint(usage)
543 # pypy .readinto()+memoryview() is buggy
544 #if cf_test_read > 1 and hasattr(sys, 'pypy_version_info'):
545 # cf_test_read = 1
547 for fn in args:
548 test(fn, psw)
551 if __name__ == '__main__':
552 try:
553 main()
554 except KeyboardInterrupt:
555 pass