doc: add readthedocs config
[rarfile.git] / dumprar.py
blob8dca844b3c6f015ce8e89a5c9e80036ef7f27627
1 #! /usr/bin/env python3
3 """Dump archive contents, test extraction."""
5 import binascii
6 import getopt
7 import io
8 import sys
9 from datetime import datetime
11 import rarfile as rf
13 usage = """
14 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
15 switches:
16 @file read archive names from file
17 -pPWD set password
18 -Ccharset set fallback charset
19 -v increase verbosity
20 -t attempt to read all files
21 -x write read files out
22 -c show archive comment
23 -h show usage
24 -bTOOL set backend tool (unrar, unar, bsdtar, 7z, 7zz)
25 -- stop switch parsing
26 """.strip()
28 os_list = ["DOS", "OS2", "WIN", "UNIX", "MACOS", "BEOS"]
30 block_strs = ["MARK", "MAIN", "FILE", "OLD_COMMENT", "OLD_EXTRA",
31 "OLD_SUB", "OLD_RECOVERY", "OLD_AUTH", "SUB", "ENDARC"]
33 r5_block_types = {
34 rf.RAR5_BLOCK_MAIN: "R5_MAIN",
35 rf.RAR5_BLOCK_FILE: "R5_FILE",
36 rf.RAR5_BLOCK_SERVICE: "R5_SVC",
37 rf.RAR5_BLOCK_ENCRYPTION: "R5_ENC",
38 rf.RAR5_BLOCK_ENDARC: "R5_ENDARC",
42 def rar3_type(btype):
43 """RAR3 type code as string."""
44 if btype < rf.RAR_BLOCK_MARK or btype > rf.RAR_BLOCK_ENDARC:
45 return "*UNKNOWN*"
46 return block_strs[btype - rf.RAR_BLOCK_MARK]
49 def rar5_type(btype):
50 """RAR5 type code as string."""
51 return r5_block_types.get(btype, "*UNKNOWN*")
54 main_bits = (
55 (rf.RAR_MAIN_VOLUME, "VOL"),
56 (rf.RAR_MAIN_COMMENT, "COMMENT"),
57 (rf.RAR_MAIN_LOCK, "LOCK"),
58 (rf.RAR_MAIN_SOLID, "SOLID"),
59 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
60 (rf.RAR_MAIN_AUTH, "AUTH"),
61 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
62 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
63 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
64 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
65 (rf.RAR_LONG_BLOCK, "LONG"),
68 endarc_bits = (
69 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
70 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
71 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
72 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
73 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
74 (rf.RAR_LONG_BLOCK, "LONG"),
77 file_bits = (
78 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
79 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
80 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
81 (rf.RAR_FILE_COMMENT, "COMMENT"),
82 (rf.RAR_FILE_SOLID, "SOLID"),
83 (rf.RAR_FILE_LARGE, "LARGE"),
84 (rf.RAR_FILE_UNICODE, "UNICODE"),
85 (rf.RAR_FILE_SALT, "SALT"),
86 (rf.RAR_FILE_VERSION, "VERSION"),
87 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
88 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
89 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
90 (rf.RAR_LONG_BLOCK, "LONG"),
93 generic_bits = (
94 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
95 (rf.RAR_LONG_BLOCK, "LONG"),
98 file_parms = ("D64", "D128", "D256", "D512",
99 "D1024", "D2048", "D4096", "DIR")
101 r5_block_flags = (
102 (rf.RAR5_BLOCK_FLAG_EXTRA_DATA, "EXTRA"),
103 (rf.RAR5_BLOCK_FLAG_DATA_AREA, "DATA"),
104 (rf.RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN, "SKIP"),
105 (rf.RAR5_BLOCK_FLAG_SPLIT_BEFORE, "SPLIT_BEFORE"),
106 (rf.RAR5_BLOCK_FLAG_SPLIT_AFTER, "SPLIT_AFTER"),
107 (rf.RAR5_BLOCK_FLAG_DEPENDS_PREV, "DEPENDS"),
108 (rf.RAR5_BLOCK_FLAG_KEEP_WITH_PARENT, "KEEP"),
111 r5_main_flags = (
112 (rf.RAR5_MAIN_FLAG_ISVOL, "ISVOL"),
113 (rf.RAR5_MAIN_FLAG_HAS_VOLNR, "VOLNR"),
114 (rf.RAR5_MAIN_FLAG_SOLID, "SOLID"),
115 (rf.RAR5_MAIN_FLAG_RECOVERY, "RECOVERY"),
116 (rf.RAR5_MAIN_FLAG_LOCKED, "LOCKED"),
119 r5_file_flags = (
120 (rf.RAR5_FILE_FLAG_ISDIR, "DIR"),
121 (rf.RAR5_FILE_FLAG_HAS_MTIME, "MTIME"),
122 (rf.RAR5_FILE_FLAG_HAS_CRC32, "CRC32"),
123 (rf.RAR5_FILE_FLAG_UNKNOWN_SIZE, "NOSIZE"),
126 r5_enc_flags = (
127 (rf.RAR5_ENC_FLAG_HAS_CHECKVAL, "CHECKVAL"),
130 r5_endarc_flags = (
131 (rf.RAR5_ENDARC_FLAG_NEXT_VOL, "NEXTVOL"),
134 r5_file_enc_flags = (
135 (rf.RAR5_XENC_CHECKVAL, "CHECKVAL"),
136 (rf.RAR5_XENC_TWEAKED, "TWEAKED"),
139 r5_file_redir_types = {
140 rf.RAR5_XREDIR_UNIX_SYMLINK: "UNIX_SYMLINK",
141 rf.RAR5_XREDIR_WINDOWS_SYMLINK: "WINDOWS_SYMLINK",
142 rf.RAR5_XREDIR_WINDOWS_JUNCTION: "WINDOWS_JUNCTION",
143 rf.RAR5_XREDIR_HARD_LINK: "HARD_LINK",
144 rf.RAR5_XREDIR_FILE_COPY: "FILE_COPY",
147 r5_file_redir_flags = (
148 (rf.RAR5_XREDIR_ISDIR, "DIR"),
152 dos_mode_bits = (
153 (0x01, "READONLY"),
154 (0x02, "HIDDEN"),
155 (0x04, "SYSTEM"),
156 (0x08, "VOLUME_ID"),
157 (0x10, "DIRECTORY"),
158 (0x20, "ARCHIVE"),
159 (0x40, "DEVICE"),
160 (0x80, "NORMAL"),
161 (0x0100, "TEMPORARY"),
162 (0x0200, "SPARSE_FILE"),
163 (0x0400, "REPARSE_POINT"),
164 (0x0800, "COMPRESSED"),
165 (0x1000, "OFFLINE"),
166 (0x2000, "NOT_CONTENT_INDEXED"),
167 (0x4000, "ENCRYPTED"),
168 (0x8000, "INTEGRITY_STREAM"),
169 (0x00010000, "VIRTUAL"),
170 (0x00020000, "NO_SCRUB_DATA"),
171 (0x00040000, "RECALL_ON_OPEN"),
172 (0x00080000, "PINNED"),
173 (0x00100000, "UNPINNED"),
174 (0x00400000, "RECALL_ON_DATA_ACCESS"),
175 (0x20000000, "STRICTLY_SEQUENTIAL"),
179 def xprint(m, *args):
180 """Print string to stdout.
182 if args:
183 m = m % args
184 print(m)
187 def tohex(data):
188 """Return hex string."""
189 return binascii.hexlify(data).decode("ascii")
192 def render_flags(flags, bit_list):
193 """Show bit names.
195 res = []
196 known = 0
197 for bit in bit_list:
198 known = known | bit[0]
199 if flags & bit[0]:
200 res.append(bit[1])
201 unknown = flags & ~known
202 n = 0
203 while unknown:
204 if unknown & 1:
205 res.append("UNK_%04x" % (1 << n))
206 unknown = unknown >> 1
207 n += 1
209 if not res:
210 return "-"
212 return ",".join(res)
215 def get_file_flags(flags):
216 """Show flag names and handle dict size.
218 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
220 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
221 res += "," + file_parms[xf]
222 return res
225 def fmt_time(t):
226 """Format time.
228 if t is None:
229 return "(-)"
230 if isinstance(t, datetime):
231 return t.isoformat("T")
232 return "%04d-%02d-%02d %02d:%02d:%02d" % t
235 def show_item(h):
236 """Show any RAR3/5 record.
238 if isinstance(h, rf.Rar3Info):
239 show_item_v3(h)
240 elif isinstance(h, rf.Rar5Info):
241 show_item_v5(h)
242 else:
243 xprint("Unknown info record")
246 def show_rftype(h):
247 return "".join([
248 h.is_file() and "F" or "-",
249 h.is_dir() and "D" or "-",
250 h.is_symlink() and "L" or "-",
254 def modex3(v):
255 return [v & 4 and "r" or "-", v & 2 and "w" or "-", v & 1 and "x" or "-"]
258 def unix_mode(mode):
259 perms = modex3(mode >> 6) + modex3(mode >> 3) + modex3(mode)
260 if mode & 0x0800:
261 perms[2] = perms[2] == "x" and "s" or "S"
262 if mode & 0x0400:
263 perms[5] = perms[5] == "x" and "s" or "S"
264 if mode & 0x0200:
265 perms[8] = perms[8] == "x" and "t" or "-"
266 rest = mode & 0xF000
267 if rest == 0x4000:
268 perms.insert(0, "d")
269 elif rest == 0xA000:
270 perms.insert(0, "l")
271 elif rest == 0x8000:
272 # common
273 perms.insert(0, "-")
274 elif rest == 0:
275 perms.insert(0, "-")
276 else:
277 perms.insert(0, "?")
278 perms.append("(0x%04x)" % rest)
279 return "".join(perms)
282 def show_mode(h):
283 if h.host_os in (rf.RAR_OS_UNIX, rf.RAR_OS_BEOS):
284 s_mode = unix_mode(h.mode)
285 elif h.host_os in (rf.RAR_OS_MSDOS, rf.RAR_OS_WIN32, rf.RAR_OS_OS2):
286 s_mode = render_flags(h.mode, dos_mode_bits)
287 else:
288 s_mode = "0x%x" % h.mode
289 return s_mode
292 def show_item_v3(h):
293 """Show any RAR3 record.
295 st = rar3_type(h.type)
296 xprint("%s: hdrlen=%d datlen=%d is=%s",
297 st, h.header_size, h.add_size, show_rftype(h))
298 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
299 s_mode = show_mode(h)
300 xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
301 if h.host_os >= 0 and h.host_os < len(os_list):
302 s_os = os_list[h.host_os]
303 else:
304 s_os = "?"
305 if h.flags & rf.RAR_FILE_UNICODE:
306 s_namecmp = " namecmp=%d/%d" % (len(h.orig_filename), h._name_size)
307 else:
308 s_namecmp = ""
309 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d%s",
310 h.host_os, s_os,
311 h.extract_version, s_mode, h.compress_type,
312 h.compress_size, h.file_size, h.volume, s_namecmp)
313 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
314 xprint(" crc=0x%08x (%d) date_time=%s", ucrc, h.CRC, fmt_time(h.date_time))
315 xprint(" name=%s", h.filename)
316 if h.mtime:
317 xprint(" mtime=%s", fmt_time(h.mtime))
318 if h.ctime:
319 xprint(" ctime=%s", fmt_time(h.ctime))
320 if h.atime:
321 xprint(" atime=%s", fmt_time(h.atime))
322 if h.arctime:
323 xprint(" arctime=%s", fmt_time(h.arctime))
324 elif h.type == rf.RAR_BLOCK_MAIN:
325 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, main_bits))
326 elif h.type == rf.RAR_BLOCK_ENDARC:
327 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, endarc_bits))
328 if h.flags & rf.RAR_ENDARC_DATACRC:
329 xprint(" datacrc=0x%08x", h.endarc_datacrc)
330 if h.flags & rf.RAR_ENDARC_DATACRC:
331 xprint(" volnr=%d", h.endarc_volnr)
332 elif h.type == rf.RAR_BLOCK_MARK:
333 xprint(" flags=0x%04x:", h.flags)
334 else:
335 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, generic_bits))
337 if h.comment is not None:
338 cm = repr(h.comment)
339 if cm[0] == "u":
340 cm = cm[1:]
341 xprint(" comment=%s", cm)
344 def show_item_v5(h):
345 """Show any RAR5 record.
347 st = rar5_type(h.block_type)
348 xprint("%s: hdrlen=%d datlen=%d hdr_extra=%d is=%s", st, h.header_size,
349 h.compress_size, h.block_extra_size, show_rftype(h))
350 xprint(" block_flags=0x%04x:%s", h.block_flags, render_flags(h.block_flags, r5_block_flags))
351 if h.block_type in (rf.RAR5_BLOCK_FILE, rf.RAR5_BLOCK_SERVICE):
352 xprint(" name=%s", h.filename)
353 s_mode = show_mode(h)
354 if h.file_host_os == rf.RAR5_OS_UNIX:
355 s_os = "UNIX"
356 else:
357 s_os = "WINDOWS"
358 xprint(" file_flags=0x%04x:%s", h.file_flags, render_flags(h.file_flags, r5_file_flags))
360 cmp_flags = h.file_compress_flags
361 xprint(" cmp_algo=%d cmp_meth=%d dict=%d solid=%r",
362 cmp_flags & 0x3f,
363 (cmp_flags >> 7) & 0x07,
364 cmp_flags >> 10,
365 cmp_flags & rf.RAR5_COMPR_SOLID > 0)
366 xprint(" os=%d:%s mode=%s cmp=%r dec=%r vol=%r",
367 h.file_host_os, s_os, s_mode,
368 h.compress_size, h.file_size, h.volume)
369 if h.CRC is not None:
370 xprint(" crc=0x%08x (%d)", h.CRC, h.CRC)
371 if h.blake2sp_hash is not None:
372 xprint(" blake2sp=%s", tohex(h.blake2sp_hash))
373 if h.date_time is not None:
374 xprint(" date_time=%s", fmt_time(h.date_time))
375 if h.mtime:
376 xprint(" mtime=%s", fmt_time(h.mtime))
377 if h.ctime:
378 xprint(" ctime=%s", fmt_time(h.ctime))
379 if h.atime:
380 xprint(" atime=%s", fmt_time(h.atime))
381 if h.arctime:
382 xprint(" arctime=%s", fmt_time(h.arctime))
383 if h.flags & rf.RAR_FILE_PASSWORD:
384 enc_algo, enc_flags, kdf_count, salt, iv, checkval = h.file_encryption
385 algo_name = "AES256" if enc_algo == rf.RAR5_XENC_CIPHER_AES256 else "UnknownAlgo"
386 xprint(" algo=%d:%s enc_flags=%04x:%s kdf_lg=%d kdf_count=%d salt=%s iv=%s checkval=%s",
387 enc_algo, algo_name, enc_flags, render_flags(enc_flags, r5_file_enc_flags),
388 kdf_count, 1 << kdf_count, tohex(salt), tohex(iv),
389 checkval and tohex(checkval) or "-")
390 if h.file_redir:
391 redir_type, redir_flags, redir_name = h.file_redir
392 xprint(" redir: type=%s flags=%d:%s destination=%s",
393 r5_file_redir_types.get(redir_type, "Unknown"),
394 redir_flags, render_flags(redir_flags, r5_file_redir_flags),
395 redir_name)
396 if h.file_owner:
397 uname, gname, uid, gid = h.file_owner
398 xprint(" owner: name=%r group=%r uid=%r gid=%r",
399 uname, gname, uid, gid)
400 if h.file_version:
401 flags, version = h.file_version
402 xprint(" version: flags=%r version=%r", flags, version)
403 elif h.block_type == rf.RAR5_BLOCK_MAIN:
404 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.main_flags, r5_main_flags))
405 elif h.block_type == rf.RAR5_BLOCK_ENDARC:
406 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.endarc_flags, r5_endarc_flags))
407 elif h.block_type == rf.RAR5_BLOCK_ENCRYPTION:
408 algo_name = "AES256" if h.encryption_algo == rf.RAR5_XENC_CIPHER_AES256 else "UnknownAlgo"
409 xprint(" algo=%d:%s flags=0x%04x:%s", h.encryption_algo, algo_name, h.flags,
410 render_flags(h.encryption_flags, r5_enc_flags))
411 xprint(" kdf_lg=%d kdf_count=%d", h.encryption_kdf_count, 1 << h.encryption_kdf_count)
412 xprint(" salt=%s", tohex(h.encryption_salt))
413 else:
414 xprint(" - missing info -")
416 if h.comment is not None:
417 cm = repr(h.comment)
418 if cm[0] == "u":
419 cm = cm[1:]
420 xprint(" comment=%s", cm)
423 cf_show_comment = 0
424 cf_verbose = 0
425 cf_charset = None
426 cf_extract = 0
427 cf_test_read = 0
428 cf_test_unrar = 0
429 cf_test_memory = 0
432 def check_crc(f, inf, desc):
433 """Compare result crc to expected value.
435 exp = inf._md_expect
436 if exp is None:
437 return
438 ucrc = f._md_context.digest()
439 if ucrc != exp:
440 print("crc error - %s - exp=%r got=%r" % (desc, exp, ucrc))
443 def test_read_long(r, inf):
444 """Test read and readinto.
446 md_class = inf._md_class or rf.NoHashContext
447 bctx = md_class()
448 inf_orig = r.getinfo_orig(inf.filename)
449 f = r.open(inf.filename)
450 total = 0
451 while 1:
452 data = f.read(8192)
453 if not data:
454 break
455 bctx.update(data)
456 total += len(data)
457 if total != inf.file_size:
458 xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
459 xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
460 check_crc(f, inf_orig, "read")
461 bhash = bctx.hexdigest()
462 if cf_verbose > 1:
463 if f._md_context.digest() == inf_orig._md_expect:
464 #xprint(" checkhash: %r", bhash)
465 pass
466 else:
467 xprint(" checkhash: %r got=%r exp=%r cls=%r\n",
468 bhash, f._md_context.digest(), inf._md_expect, inf._md_class)
470 # test .seek() & .readinto()
471 if cf_test_read > 1:
472 f.seek(0, 0)
474 total = 0
475 buf = bytearray(1024)
476 while 1:
477 res = f.readinto(buf)
478 if not res:
479 break
480 total += res
481 if inf.file_size != total:
482 xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
483 #check_crc(f, inf, "readinto")
484 f.close()
487 def test_read(r, inf):
488 """Test file read."""
489 test_read_long(r, inf)
492 def test_real(fn, pwd):
493 """Actual archive processing.
495 xprint("Archive: %s", fn)
497 cb = None
498 if cf_verbose > 1:
499 cb = show_item
501 rfarg = fn
502 if cf_test_memory:
503 rfarg = io.BytesIO(open(fn, "rb").read())
505 # check if rar
506 if not rf.is_rarfile(rfarg):
507 xprint(" --- %s is not a RAR file ---", fn)
508 return
510 # open
511 r = rf.RarFile(rfarg, charset=cf_charset, info_callback=cb)
512 # set password
513 if r.needs_password():
514 if pwd:
515 r.setpassword(pwd)
516 else:
517 xprint(" --- %s requires password ---", fn)
518 return
520 # show comment
521 if cf_show_comment and r.comment:
522 for ln in r.comment.split("\n"):
523 xprint(" %s", ln)
524 elif cf_verbose > 0 and r.comment:
525 cm = repr(r.comment)
526 if cm[0] == "u":
527 cm = cm[1:]
528 xprint(" comment=%s", cm)
530 # process
531 for n in r.namelist():
532 inf = r.getinfo(n)
533 if cf_verbose == 1:
534 show_item(inf)
535 if cf_test_read and inf.is_file():
536 test_read(r, inf)
538 if cf_extract:
539 r.extractall()
540 for inf in r.infolist():
541 r.extract(inf)
543 if cf_test_unrar:
544 r.testrar()
547 def test(fn, pwd):
548 """Process one archive with error handling.
550 try:
551 test_real(fn, pwd)
552 except rf.NeedFirstVolume as ex:
553 xprint(" --- %s is middle part of multi-vol archive (%s)---", fn, str(ex))
554 except rf.Error:
555 exc, msg, tb = sys.exc_info()
556 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
557 del tb
558 except IOError:
559 exc, msg, tb = sys.exc_info()
560 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
561 del tb
564 def main():
565 """Program entry point.
567 global cf_verbose, cf_show_comment, cf_charset
568 global cf_extract, cf_test_read, cf_test_unrar
569 global cf_test_memory
571 cf_backend = None
572 pwd = None
574 # parse args
575 try:
576 opts, args = getopt.getopt(sys.argv[1:], "p:C:hvcxtRMb:")
577 except getopt.error as ex:
578 print(str(ex), file=sys.stderr)
579 sys.exit(1)
581 for o, v in opts:
582 if o == "-p":
583 pwd = v
584 elif o == "-h":
585 xprint(usage)
586 return
587 elif o == "-v":
588 cf_verbose += 1
589 elif o == "-c":
590 cf_show_comment = 1
591 elif o == "-x":
592 cf_extract = 1
593 elif o == "-t":
594 cf_test_read += 1
595 elif o == "-T":
596 cf_test_unrar = 1
597 elif o == "-M":
598 cf_test_memory = 1
599 elif o == "-C":
600 cf_charset = v
601 elif o == "-b":
602 cf_backend = v
603 else:
604 raise ValueError("unhandled switch: " + o)
606 args2 = []
607 for a in args:
608 if a[0] == "@":
609 for ln in open(a[1:], "r", encoding="utf8"):
610 fn = ln[:-1]
611 args2.append(fn)
612 else:
613 args2.append(a)
614 args = args2
616 if not args:
617 xprint(usage)
619 if cf_backend:
620 cf_backend = {"7z": "sevenzip", "7zz": "sevenzip2"}.get(cf_backend, cf_backend)
621 conf = {"unrar": False, "unar": False, "bsdtar": False, "sevenzip": False, "sevenzip2": False}
622 assert cf_backend in conf, f"unknown backend: {cf_backend}"
623 conf[cf_backend] = True
624 rf.tool_setup(force=True, **conf)
626 for fn in args:
627 test(fn, pwd)
630 if __name__ == "__main__":
631 try:
632 main()
633 except KeyboardInterrupt:
634 pass