dumprar: drop py2 compat
[rarfile.git] / dumprar.py
blob5d60d8a2794c96fdc092be36a48d2d06b969bed2
1 #! /usr/bin/env python3
3 """Dump archive contents, test extraction."""
5 import getopt
6 import io
7 import sys
8 from datetime import datetime
10 import rarfile as rf
12 usage = """
13 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
14 switches:
15 @file read archive names from file
16 -pPWD set password
17 -Ccharset set fallback charset
18 -v increase verbosity
19 -t attempt to read all files
20 -x write read files out
21 -c show archive comment
22 -h show usage
23 -- stop switch parsing
24 """.strip()
26 os_list = ["DOS", "OS2", "WIN", "UNIX", "MACOS", "BEOS"]
28 block_strs = ["MARK", "MAIN", "FILE", "OLD_COMMENT", "OLD_EXTRA",
29 "OLD_SUB", "OLD_RECOVERY", "OLD_AUTH", "SUB", "ENDARC"]
31 r5_block_types = {
32 rf.RAR5_BLOCK_MAIN: "R5_MAIN",
33 rf.RAR5_BLOCK_FILE: "R5_FILE",
34 rf.RAR5_BLOCK_SERVICE: "R5_SVC",
35 rf.RAR5_BLOCK_ENCRYPTION: "R5_ENC",
36 rf.RAR5_BLOCK_ENDARC: "R5_ENDARC",
40 def rar3_type(btype):
41 """RAR3 type code as string."""
42 if btype < rf.RAR_BLOCK_MARK or btype > rf.RAR_BLOCK_ENDARC:
43 return "*UNKNOWN*"
44 return block_strs[btype - rf.RAR_BLOCK_MARK]
47 def rar5_type(btype):
48 """RAR5 type code as string."""
49 return r5_block_types.get(btype, "*UNKNOWN*")
52 main_bits = (
53 (rf.RAR_MAIN_VOLUME, "VOL"),
54 (rf.RAR_MAIN_COMMENT, "COMMENT"),
55 (rf.RAR_MAIN_LOCK, "LOCK"),
56 (rf.RAR_MAIN_SOLID, "SOLID"),
57 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
58 (rf.RAR_MAIN_AUTH, "AUTH"),
59 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
60 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
61 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
62 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
63 (rf.RAR_LONG_BLOCK, "LONG"),
66 endarc_bits = (
67 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
68 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
69 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
70 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
71 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
72 (rf.RAR_LONG_BLOCK, "LONG"),
75 file_bits = (
76 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
77 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
78 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
79 (rf.RAR_FILE_COMMENT, "COMMENT"),
80 (rf.RAR_FILE_SOLID, "SOLID"),
81 (rf.RAR_FILE_LARGE, "LARGE"),
82 (rf.RAR_FILE_UNICODE, "UNICODE"),
83 (rf.RAR_FILE_SALT, "SALT"),
84 (rf.RAR_FILE_VERSION, "VERSION"),
85 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
86 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
87 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
88 (rf.RAR_LONG_BLOCK, "LONG"),
91 generic_bits = (
92 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
93 (rf.RAR_LONG_BLOCK, "LONG"),
96 file_parms = ("D64", "D128", "D256", "D512",
97 "D1024", "D2048", "D4096", "DIR")
99 r5_block_flags = (
100 (rf.RAR5_BLOCK_FLAG_EXTRA_DATA, "EXTRA"),
101 (rf.RAR5_BLOCK_FLAG_DATA_AREA, "DATA"),
102 (rf.RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN, "SKIP"),
103 (rf.RAR5_BLOCK_FLAG_SPLIT_BEFORE, "SPLIT_BEFORE"),
104 (rf.RAR5_BLOCK_FLAG_SPLIT_AFTER, "SPLIT_AFTER"),
105 (rf.RAR5_BLOCK_FLAG_DEPENDS_PREV, "DEPENDS"),
106 (rf.RAR5_BLOCK_FLAG_KEEP_WITH_PARENT, "KEEP"),
109 r5_main_flags = (
110 (rf.RAR5_MAIN_FLAG_ISVOL, "ISVOL"),
111 (rf.RAR5_MAIN_FLAG_HAS_VOLNR, "VOLNR"),
112 (rf.RAR5_MAIN_FLAG_SOLID, "SOLID"),
113 (rf.RAR5_MAIN_FLAG_RECOVERY, "RECOVERY"),
114 (rf.RAR5_MAIN_FLAG_LOCKED, "LOCKED"),
117 r5_file_flags = (
118 (rf.RAR5_FILE_FLAG_ISDIR, "DIR"),
119 (rf.RAR5_FILE_FLAG_HAS_MTIME, "MTIME"),
120 (rf.RAR5_FILE_FLAG_HAS_CRC32, "CRC32"),
121 (rf.RAR5_FILE_FLAG_UNKNOWN_SIZE, "NOSIZE"),
124 r5_enc_flags = (
125 (rf.RAR5_ENC_FLAG_HAS_CHECKVAL, "CHECKVAL"),
128 r5_endarc_flags = (
129 (rf.RAR5_ENDARC_FLAG_NEXT_VOL, "NEXTVOL"),
132 r5_file_enc_flags = (
133 (rf.RAR5_XENC_CHECKVAL, "CHECKVAL"),
134 (rf.RAR5_XENC_TWEAKED, "TWEAKED"),
137 r5_file_redir_types = {
138 rf.RAR5_XREDIR_UNIX_SYMLINK: "UNIX_SYMLINK",
139 rf.RAR5_XREDIR_WINDOWS_SYMLINK: "WINDOWS_SYMLINK",
140 rf.RAR5_XREDIR_WINDOWS_JUNCTION: "WINDOWS_JUNCTION",
141 rf.RAR5_XREDIR_HARD_LINK: "HARD_LINK",
142 rf.RAR5_XREDIR_FILE_COPY: "FILE_COPY",
145 r5_file_redir_flags = (
146 (rf.RAR5_XREDIR_ISDIR, "DIR"),
150 dos_mode_bits = (
151 (0x01, "READONLY"),
152 (0x02, "HIDDEN"),
153 (0x04, "SYSTEM"),
154 (0x08, "VOLUME_ID"),
155 (0x10, "DIRECTORY"),
156 (0x20, "ARCHIVE"),
157 (0x40, "DEVICE"),
158 (0x80, "NORMAL"),
159 (0x0100, "TEMPORARY"),
160 (0x0200, "SPARSE_FILE"),
161 (0x0400, "REPARSE_POINT"),
162 (0x0800, "COMPRESSED"),
163 (0x1000, "OFFLINE"),
164 (0x2000, "NOT_CONTENT_INDEXED"),
165 (0x4000, "ENCRYPTED"),
166 (0x8000, "INTEGRITY_STREAM"),
167 (0x00010000, "VIRTUAL"),
168 (0x00020000, "NO_SCRUB_DATA"),
169 (0x00040000, "RECALL_ON_OPEN"),
170 (0x00080000, "PINNED"),
171 (0x00100000, "UNPINNED"),
172 (0x00400000, "RECALL_ON_DATA_ACCESS"),
173 (0x20000000, "STRICTLY_SEQUENTIAL"),
177 def xprint(m, *args):
178 """Print string to stdout.
180 if args:
181 m = m % args
182 print(m)
185 def render_flags(flags, bit_list):
186 """Show bit names.
188 res = []
189 known = 0
190 for bit in bit_list:
191 known = known | bit[0]
192 if flags & bit[0]:
193 res.append(bit[1])
194 unknown = flags & ~known
195 n = 0
196 while unknown:
197 if unknown & 1:
198 res.append("UNK_%04x" % (1 << n))
199 unknown = unknown >> 1
200 n += 1
202 if not res:
203 return "-"
205 return ",".join(res)
208 def get_file_flags(flags):
209 """Show flag names and handle dict size.
211 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
213 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
214 res += "," + file_parms[xf]
215 return res
218 def fmt_time(t):
219 """Format time.
221 if t is None:
222 return "(-)"
223 if isinstance(t, datetime):
224 return t.isoformat("T")
225 return "%04d-%02d-%02d %02d:%02d:%02d" % t
228 def show_item(h):
229 """Show any RAR3/5 record.
231 if isinstance(h, rf.Rar3Info):
232 show_item_v3(h)
233 elif isinstance(h, rf.Rar5Info):
234 show_item_v5(h)
235 else:
236 xprint("Unknown info record")
239 def show_rftype(h):
240 return "".join([
241 h.is_file() and "F" or "-",
242 h.is_dir() and "D" or "-",
243 h.is_symlink() and "L" or "-",
247 def modex3(v):
248 return [v & 4 and "r" or "-", v & 2 and "w" or "-", v & 1 and "x" or "-"]
251 def unix_mode(mode):
252 perms = modex3(mode >> 6) + modex3(mode >> 3) + modex3(mode)
253 if mode & 0x0800:
254 perms[2] = perms[2] == "x" and "s" or "S"
255 if mode & 0x0400:
256 perms[5] = perms[5] == "x" and "s" or "S"
257 if mode & 0x0200:
258 perms[8] = perms[8] == "x" and "t" or "-"
259 rest = mode & 0xF000
260 if rest == 0x4000:
261 perms.insert(0, "d")
262 elif rest == 0xA000:
263 perms.insert(0, "l")
264 elif rest == 0x8000:
265 # common
266 perms.insert(0, "-")
267 elif rest == 0:
268 perms.insert(0, "-")
269 else:
270 perms.insert(0, "?")
271 perms.append("(0x%04x)" % rest)
272 return "".join(perms)
275 def show_mode(h):
276 if h.host_os in (rf.RAR_OS_UNIX, rf.RAR_OS_BEOS):
277 s_mode = unix_mode(h.mode)
278 elif h.host_os in (rf.RAR_OS_MSDOS, rf.RAR_OS_WIN32, rf.RAR_OS_OS2):
279 s_mode = render_flags(h.mode, dos_mode_bits)
280 else:
281 s_mode = "0x%x" % h.mode
282 return s_mode
285 def show_item_v3(h):
286 """Show any RAR3 record.
288 st = rar3_type(h.type)
289 xprint("%s: hdrlen=%d datlen=%d is=%s",
290 st, h.header_size, h.add_size, show_rftype(h))
291 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
292 s_mode = show_mode(h)
293 xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
294 if h.host_os >= 0 and h.host_os < len(os_list):
295 s_os = os_list[h.host_os]
296 else:
297 s_os = "?"
298 if h.flags & rf.RAR_FILE_UNICODE:
299 s_namecmp = " namecmp=%d/%d" % (len(h.orig_filename), h._name_size)
300 else:
301 s_namecmp = ""
302 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d%s",
303 h.host_os, s_os,
304 h.extract_version, s_mode, h.compress_type,
305 h.compress_size, h.file_size, h.volume, s_namecmp)
306 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
307 xprint(" crc=0x%08x (%d) date_time=%s", ucrc, h.CRC, fmt_time(h.date_time))
308 xprint(" name=%s", h.filename)
309 if h.mtime:
310 xprint(" mtime=%s", fmt_time(h.mtime))
311 if h.ctime:
312 xprint(" ctime=%s", fmt_time(h.ctime))
313 if h.atime:
314 xprint(" atime=%s", fmt_time(h.atime))
315 if h.arctime:
316 xprint(" arctime=%s", fmt_time(h.arctime))
317 elif h.type == rf.RAR_BLOCK_MAIN:
318 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, main_bits))
319 elif h.type == rf.RAR_BLOCK_ENDARC:
320 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, endarc_bits))
321 if h.flags & rf.RAR_ENDARC_DATACRC:
322 xprint(" datacrc=0x%08x", h.endarc_datacrc)
323 if h.flags & rf.RAR_ENDARC_DATACRC:
324 xprint(" volnr=%d", h.endarc_volnr)
325 elif h.type == rf.RAR_BLOCK_MARK:
326 xprint(" flags=0x%04x:", h.flags)
327 else:
328 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, generic_bits))
330 if h.comment is not None:
331 cm = repr(h.comment)
332 if cm[0] == "u":
333 cm = cm[1:]
334 xprint(" comment=%s", cm)
337 def show_item_v5(h):
338 """Show any RAR5 record.
340 st = rar5_type(h.block_type)
341 xprint("%s: hdrlen=%d datlen=%d hdr_extra=%d is=%s", st, h.header_size,
342 h.compress_size, h.block_extra_size, show_rftype(h))
343 xprint(" block_flags=0x%04x:%s", h.block_flags, render_flags(h.block_flags, r5_block_flags))
344 if h.block_type in (rf.RAR5_BLOCK_FILE, rf.RAR5_BLOCK_SERVICE):
345 xprint(" name=%s", h.filename)
346 s_mode = show_mode(h)
347 if h.file_host_os == rf.RAR5_OS_UNIX:
348 s_os = "UNIX"
349 else:
350 s_os = "WINDOWS"
351 xprint(" file_flags=0x%04x:%s", h.file_flags, render_flags(h.file_flags, r5_file_flags))
353 cmp_flags = h.file_compress_flags
354 xprint(" cmp_algo=%d cmp_meth=%d dict=%d solid=%r",
355 cmp_flags & 0x3f,
356 (cmp_flags >> 7) & 0x07,
357 cmp_flags >> 10,
358 cmp_flags & rf.RAR5_COMPR_SOLID > 0)
359 xprint(" os=%d:%s mode=%s cmp=%r dec=%r vol=%r",
360 h.file_host_os, s_os, s_mode,
361 h.compress_size, h.file_size, h.volume)
362 if h.CRC is not None:
363 xprint(" crc=0x%08x (%d)", h.CRC, h.CRC)
364 if h.blake2sp_hash is not None:
365 xprint(" blake2sp=%s", rf.tohex(h.blake2sp_hash))
366 if h.date_time is not None:
367 xprint(" date_time=%s", fmt_time(h.date_time))
368 if h.mtime:
369 xprint(" mtime=%s", fmt_time(h.mtime))
370 if h.ctime:
371 xprint(" ctime=%s", fmt_time(h.ctime))
372 if h.atime:
373 xprint(" atime=%s", fmt_time(h.atime))
374 if h.arctime:
375 xprint(" arctime=%s", fmt_time(h.arctime))
376 if h.flags & rf.RAR_FILE_PASSWORD:
377 enc_algo, enc_flags, kdf_count, salt, iv, checkval = h.file_encryption
378 algo_name = "AES256" if enc_algo == rf.RAR5_XENC_CIPHER_AES256 else "UnknownAlgo"
379 xprint(" algo=%d:%s enc_flags=%04x:%s kdf_lg=%d kdf_count=%d salt=%s iv=%s checkval=%s",
380 enc_algo, algo_name, enc_flags, render_flags(enc_flags, r5_file_enc_flags),
381 kdf_count, 1 << kdf_count, rf.tohex(salt), rf.tohex(iv),
382 checkval and rf.tohex(checkval) or "-")
383 if h.file_redir:
384 redir_type, redir_flags, redir_name = h.file_redir
385 xprint(" redir: type=%s flags=%d:%s destination=%s",
386 r5_file_redir_types.get(redir_type, "Unknown"),
387 redir_flags, render_flags(redir_flags, r5_file_redir_flags),
388 redir_name)
389 if h.file_owner:
390 uname, gname, uid, gid = h.file_owner
391 xprint(" owner: name=%r group=%r uid=%r gid=%r",
392 uname, gname, uid, gid)
393 if h.file_version:
394 flags, version = h.file_version
395 xprint(" version: flags=%r version=%r", flags, version)
396 elif h.block_type == rf.RAR5_BLOCK_MAIN:
397 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.main_flags, r5_main_flags))
398 elif h.block_type == rf.RAR5_BLOCK_ENDARC:
399 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.endarc_flags, r5_endarc_flags))
400 elif h.block_type == rf.RAR5_BLOCK_ENCRYPTION:
401 algo_name = "AES256" if h.encryption_algo == rf.RAR5_XENC_CIPHER_AES256 else "UnknownAlgo"
402 xprint(" algo=%d:%s flags=0x%04x:%s", h.encryption_algo, algo_name, h.flags,
403 render_flags(h.encryption_flags, r5_enc_flags))
404 xprint(" kdf_lg=%d kdf_count=%d", h.encryption_kdf_count, 1 << h.encryption_kdf_count)
405 xprint(" salt=%s", rf.tohex(h.encryption_salt))
406 else:
407 xprint(" - missing info -")
409 if h.comment is not None:
410 cm = repr(h.comment)
411 if cm[0] == "u":
412 cm = cm[1:]
413 xprint(" comment=%s", cm)
416 cf_show_comment = 0
417 cf_verbose = 0
418 cf_charset = None
419 cf_extract = 0
420 cf_test_read = 0
421 cf_test_unrar = 0
422 cf_test_memory = 0
425 def check_crc(f, inf, desc):
426 """Compare result crc to expected value.
428 exp = inf._md_expect
429 if exp is None:
430 return
431 ucrc = f._md_context.digest()
432 if ucrc != exp:
433 print("crc error - %s - exp=%r got=%r" % (desc, exp, ucrc))
436 def test_read_long(r, inf):
437 """Test read and readinto.
439 md_class = inf._md_class or rf.NoHashContext
440 bctx = md_class()
441 f = r.open(inf.filename)
442 total = 0
443 while 1:
444 data = f.read(8192)
445 if not data:
446 break
447 bctx.update(data)
448 total += len(data)
449 if total != inf.file_size:
450 xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
451 xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
452 check_crc(f, inf, "read")
453 bhash = bctx.hexdigest()
454 if cf_verbose > 1:
455 if f._md_context.digest() == inf._md_expect:
456 #xprint(" checkhash: %r", bhash)
457 pass
458 else:
459 xprint(" checkhash: %r got=%r exp=%r cls=%r\n",
460 bhash, f._md_context.digest(), inf._md_expect, inf._md_class)
462 # test .seek() & .readinto()
463 if cf_test_read > 1:
464 f.seek(0, 0)
466 total = 0
467 buf = bytearray(1024)
468 while 1:
469 res = f.readinto(buf)
470 if not res:
471 break
472 total += res
473 if inf.file_size != total:
474 xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
475 #check_crc(f, inf, "readinto")
476 f.close()
479 def test_read(r, inf):
480 """Test file read."""
481 test_read_long(r, inf)
484 def test_real(fn, pwd):
485 """Actual archive processing.
487 xprint("Archive: %s", fn)
489 cb = None
490 if cf_verbose > 1:
491 cb = show_item
493 rfarg = fn
494 if cf_test_memory:
495 rfarg = io.BytesIO(open(fn, "rb").read())
497 # check if rar
498 if not rf.is_rarfile(rfarg):
499 xprint(" --- %s is not a RAR file ---", fn)
500 return
502 # open
503 r = rf.RarFile(rfarg, charset=cf_charset, info_callback=cb)
504 # set password
505 if r.needs_password():
506 if pwd:
507 r.setpassword(pwd)
508 else:
509 xprint(" --- %s requires password ---", fn)
510 return
512 # show comment
513 if cf_show_comment and r.comment:
514 for ln in r.comment.split("\n"):
515 xprint(" %s", ln)
516 elif cf_verbose > 0 and r.comment:
517 cm = repr(r.comment)
518 if cm[0] == "u":
519 cm = cm[1:]
520 xprint(" comment=%s", cm)
522 # process
523 for n in r.namelist():
524 inf = r.getinfo(n)
525 if cf_verbose == 1:
526 show_item(inf)
527 if cf_test_read and inf.is_file():
528 test_read(r, inf)
530 if cf_extract:
531 r.extractall()
532 for inf in r.infolist():
533 r.extract(inf)
535 if cf_test_unrar:
536 r.testrar()
539 def test(fn, pwd):
540 """Process one archive with error handling.
542 try:
543 test_real(fn, pwd)
544 except rf.NeedFirstVolume as ex:
545 xprint(" --- %s is middle part of multi-vol archive (%s)---", fn, str(ex))
546 except rf.Error:
547 exc, msg, tb = sys.exc_info()
548 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
549 del tb
550 except IOError:
551 exc, msg, tb = sys.exc_info()
552 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
553 del tb
556 def main():
557 """Program entry point.
559 global cf_verbose, cf_show_comment, cf_charset
560 global cf_extract, cf_test_read, cf_test_unrar
561 global cf_test_memory
563 pwd = None
565 # parse args
566 try:
567 opts, args = getopt.getopt(sys.argv[1:], "p:C:hvcxtRM")
568 except getopt.error as ex:
569 print(str(ex), file=sys.stderr)
570 sys.exit(1)
572 for o, v in opts:
573 if o == "-p":
574 pwd = v
575 elif o == "-h":
576 xprint(usage)
577 return
578 elif o == "-v":
579 cf_verbose += 1
580 elif o == "-c":
581 cf_show_comment = 1
582 elif o == "-x":
583 cf_extract = 1
584 elif o == "-t":
585 cf_test_read += 1
586 elif o == "-T":
587 cf_test_unrar = 1
588 elif o == "-M":
589 cf_test_memory = 1
590 elif o == "-C":
591 cf_charset = v
592 else:
593 raise Exception("unhandled switch: " + o)
595 args2 = []
596 for a in args:
597 if a[0] == "@":
598 for ln in open(a[1:], "r"):
599 fn = ln[:-1]
600 args2.append(fn)
601 else:
602 args2.append(a)
603 args = args2
605 if not args:
606 xprint(usage)
608 for fn in args:
609 test(fn, pwd)
612 if __name__ == "__main__":
613 try:
614 main()
615 except KeyboardInterrupt:
616 pass