tox: remove hardcoded path
[rarfile.git] / dumprar.py
blobe082e55d9580e31224e259adae76307556da83b9
1 #! /usr/bin/env python3
3 """Dump archive contents, test extraction."""
5 import io
6 import sys
7 import getopt
9 from datetime import datetime
11 import rarfile as rf
14 usage = """
15 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
16 switches:
17 @file read archive names from file
18 -pPSW set password
19 -Ccharset set fallback charset
20 -v increase verbosity
21 -t attempt to read all files
22 -x write read files out
23 -c show archive comment
24 -h show usage
25 -- stop switch parsing
26 """.strip()
28 os_list = ["DOS", "OS2", "WIN", "UNIX", "MACOS", "BEOS"]
30 block_strs = ["MARK", "MAIN", "FILE", "OLD_COMMENT", "OLD_EXTRA",
31 "OLD_SUB", "OLD_RECOVERY", "OLD_AUTH", "SUB", "ENDARC"]
33 r5_block_types = {
34 rf.RAR5_BLOCK_MAIN: "R5_MAIN",
35 rf.RAR5_BLOCK_FILE: "R5_FILE",
36 rf.RAR5_BLOCK_SERVICE: "R5_SVC",
37 rf.RAR5_BLOCK_ENCRYPTION: "R5_ENC",
38 rf.RAR5_BLOCK_ENDARC: "R5_ENDARC",
42 def rar3_type(btype):
43 """RAR3 type code as string."""
44 if btype < rf.RAR_BLOCK_MARK or btype > rf.RAR_BLOCK_ENDARC:
45 return "*UNKNOWN*"
46 return block_strs[btype - rf.RAR_BLOCK_MARK]
49 def rar5_type(btype):
50 """RAR5 type code as string."""
51 return r5_block_types.get(btype, "*UNKNOWN*")
54 main_bits = (
55 (rf.RAR_MAIN_VOLUME, "VOL"),
56 (rf.RAR_MAIN_COMMENT, "COMMENT"),
57 (rf.RAR_MAIN_LOCK, "LOCK"),
58 (rf.RAR_MAIN_SOLID, "SOLID"),
59 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
60 (rf.RAR_MAIN_AUTH, "AUTH"),
61 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
62 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
63 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
64 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
65 (rf.RAR_LONG_BLOCK, "LONG"),
68 endarc_bits = (
69 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
70 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
71 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
72 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
73 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
74 (rf.RAR_LONG_BLOCK, "LONG"),
77 file_bits = (
78 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
79 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
80 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
81 (rf.RAR_FILE_COMMENT, "COMMENT"),
82 (rf.RAR_FILE_SOLID, "SOLID"),
83 (rf.RAR_FILE_LARGE, "LARGE"),
84 (rf.RAR_FILE_UNICODE, "UNICODE"),
85 (rf.RAR_FILE_SALT, "SALT"),
86 (rf.RAR_FILE_VERSION, "VERSION"),
87 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
88 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
89 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
90 (rf.RAR_LONG_BLOCK, "LONG"),
93 generic_bits = (
94 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
95 (rf.RAR_LONG_BLOCK, "LONG"),
98 file_parms = ("D64", "D128", "D256", "D512",
99 "D1024", "D2048", "D4096", "DIR")
101 r5_block_flags = (
102 (rf.RAR5_BLOCK_FLAG_EXTRA_DATA, "EXTRA"),
103 (rf.RAR5_BLOCK_FLAG_DATA_AREA, "DATA"),
104 (rf.RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN, "SKIP"),
105 (rf.RAR5_BLOCK_FLAG_SPLIT_BEFORE, "SPLIT_BEFORE"),
106 (rf.RAR5_BLOCK_FLAG_SPLIT_AFTER, "SPLIT_AFTER"),
107 (rf.RAR5_BLOCK_FLAG_DEPENDS_PREV, "DEPENDS"),
108 (rf.RAR5_BLOCK_FLAG_KEEP_WITH_PARENT, "KEEP"),
111 r5_main_flags = (
112 (rf.RAR5_MAIN_FLAG_ISVOL, "ISVOL"),
113 (rf.RAR5_MAIN_FLAG_HAS_VOLNR, "VOLNR"),
114 (rf.RAR5_MAIN_FLAG_SOLID, "SOLID"),
115 (rf.RAR5_MAIN_FLAG_RECOVERY, "RECOVERY"),
116 (rf.RAR5_MAIN_FLAG_LOCKED, "LOCKED"),
119 r5_file_flags = (
120 (rf.RAR5_FILE_FLAG_ISDIR, "DIR"),
121 (rf.RAR5_FILE_FLAG_HAS_MTIME, "MTIME"),
122 (rf.RAR5_FILE_FLAG_HAS_CRC32, "CRC32"),
123 (rf.RAR5_FILE_FLAG_UNKNOWN_SIZE, "NOSIZE"),
126 r5_enc_flags = (
127 (rf.RAR5_ENC_FLAG_HAS_CHECKVAL, "CHECKVAL"),
130 r5_endarc_flags = (
131 (rf.RAR5_ENDARC_FLAG_NEXT_VOL, "NEXTVOL"),
134 r5_file_enc_flags = (
135 (rf.RAR5_XENC_CHECKVAL, "CHECKVAL"),
136 (rf.RAR5_XENC_TWEAKED, "TWEAKED"),
139 r5_file_redir_types = {
140 rf.RAR5_XREDIR_UNIX_SYMLINK: "UNIX_SYMLINK",
141 rf.RAR5_XREDIR_WINDOWS_SYMLINK: "WINDOWS_SYMLINK",
142 rf.RAR5_XREDIR_WINDOWS_JUNCTION: "WINDOWS_JUNCTION",
143 rf.RAR5_XREDIR_HARD_LINK: "HARD_LINK",
144 rf.RAR5_XREDIR_FILE_COPY: "FILE_COPY",
147 r5_file_redir_flags = (
148 (rf.RAR5_XREDIR_ISDIR, "DIR"),
152 def xprint(m, *args):
153 """Print string to stdout.
155 Format unicode safely.
157 if sys.hexversion < 0x3000000:
158 m = m.decode("utf8")
159 if args:
160 m = m % args
161 if sys.hexversion < 0x3000000:
162 m = m.encode("utf8")
163 sys.stdout.write(m)
164 sys.stdout.write("\n")
167 def render_flags(flags, bit_list):
168 """Show bit names.
170 res = []
171 known = 0
172 for bit in bit_list:
173 known = known | bit[0]
174 if flags & bit[0]:
175 res.append(bit[1])
176 unknown = flags & ~known
177 n = 0
178 while unknown:
179 if unknown & 1:
180 res.append("UNK_%04x" % (1 << n))
181 unknown = unknown >> 1
182 n += 1
184 if not res:
185 return "-"
187 return ",".join(res)
190 def get_file_flags(flags):
191 """Show flag names and handle dict size.
193 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
195 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
196 res += "," + file_parms[xf]
197 return res
200 def fmt_time(t):
201 """Format time.
203 if t is None:
204 return "(-)"
205 if isinstance(t, datetime):
206 return t.isoformat("T")
207 return "%04d-%02d-%02d %02d:%02d:%02d" % t
210 def show_item(h):
211 """Show any RAR3/5 record.
213 if isinstance(h, rf.Rar3Info):
214 show_item_v3(h)
215 elif isinstance(h, rf.Rar5Info):
216 show_item_v5(h)
217 else:
218 xprint("Unknown info record")
221 def show_item_v3(h):
222 """Show any RAR3 record.
224 st = rar3_type(h.type)
225 xprint("%s: hdrlen=%d datlen=%d", st, h.header_size, h.add_size)
226 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
227 if h.host_os == rf.RAR_OS_UNIX:
228 s_mode = "0%o" % h.mode
229 else:
230 s_mode = "0x%x" % h.mode
231 xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
232 if h.host_os >= 0 and h.host_os < len(os_list):
233 s_os = os_list[h.host_os]
234 else:
235 s_os = "?"
236 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d",
237 h.host_os, s_os,
238 h.extract_version, s_mode, h.compress_type,
239 h.compress_size, h.file_size, h.volume)
240 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
241 xprint(" crc=0x%08x (%d) date_time=%s", ucrc, h.CRC, fmt_time(h.date_time))
242 xprint(" name=%s", h.filename)
243 if h.mtime:
244 xprint(" mtime=%s", fmt_time(h.mtime))
245 if h.ctime:
246 xprint(" ctime=%s", fmt_time(h.ctime))
247 if h.atime:
248 xprint(" atime=%s", fmt_time(h.atime))
249 if h.arctime:
250 xprint(" arctime=%s", fmt_time(h.arctime))
251 elif h.type == rf.RAR_BLOCK_MAIN:
252 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, main_bits))
253 elif h.type == rf.RAR_BLOCK_ENDARC:
254 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, endarc_bits))
255 elif h.type == rf.RAR_BLOCK_MARK:
256 xprint(" flags=0x%04x:", h.flags)
257 else:
258 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, generic_bits))
260 if h.comment is not None:
261 cm = repr(h.comment)
262 if cm[0] == "u":
263 cm = cm[1:]
264 xprint(" comment=%s", cm)
267 def show_item_v5(h):
268 """Show any RAR5 record.
270 st = rar5_type(h.block_type)
271 xprint("%s: hdrlen=%d datlen=%d hdr_extra=%d", st, h.header_size,
272 h.compress_size, h.block_extra_size)
273 xprint(" block_flags=0x%04x:%s", h.block_flags, render_flags(h.block_flags, r5_block_flags))
274 if h.block_type in (rf.RAR5_BLOCK_FILE, rf.RAR5_BLOCK_SERVICE):
275 xprint(" name=%s", h.filename)
276 if h.file_host_os == rf.RAR5_OS_UNIX:
277 s_os = "UNIX"
278 s_mode = "0%o" % h.mode
279 else:
280 s_os = "WINDOWS"
281 s_mode = "0x%x" % h.mode
282 xprint(" file_flags=0x%04x:%s", h.file_flags, render_flags(h.file_flags, r5_file_flags))
284 cmp_flags = h.file_compress_flags
285 xprint(" cmp_algo=%d cmp_meth=%d dict=%d solid=%r",
286 cmp_flags & 0x3f,
287 (cmp_flags >> 7) & 0x07,
288 cmp_flags >> 10,
289 cmp_flags & rf.RAR5_COMPR_SOLID > 0)
290 xprint(" os=%d:%s mode=%s cmp=%r dec=%r vol=%r",
291 h.file_host_os, s_os, s_mode,
292 h.compress_size, h.file_size, h.volume)
293 if h.CRC is not None:
294 xprint(" crc=0x%08x (%d)", h.CRC, h.CRC)
295 if h.blake2sp_hash is not None:
296 xprint(" blake2sp=%s", rf.tohex(h.blake2sp_hash))
297 if h.date_time is not None:
298 xprint(" date_time=%s", fmt_time(h.date_time))
299 if h.mtime:
300 xprint(" mtime=%s", fmt_time(h.mtime))
301 if h.ctime:
302 xprint(" ctime=%s", fmt_time(h.ctime))
303 if h.atime:
304 xprint(" atime=%s", fmt_time(h.atime))
305 if h.arctime:
306 xprint(" arctime=%s", fmt_time(h.arctime))
307 if h.flags & rf.RAR_FILE_PASSWORD:
308 enc_algo, enc_flags, kdf_count, salt, iv, checkval = h.file_encryption
309 algo_name = "AES256" if enc_algo == rf.RAR5_XENC_CIPHER_AES256 else "UnknownAlgo"
310 xprint(" algo=%d:%s enc_flags=%04x:%s kdf_lg=%d kdf_count=%d salt=%s iv=%s checkval=%s",
311 enc_algo, algo_name, enc_flags, render_flags(enc_flags, r5_file_enc_flags),
312 kdf_count, 1 << kdf_count, rf.tohex(salt), rf.tohex(iv),
313 checkval and rf.tohex(checkval) or "-")
314 if h.file_redir:
315 redir_type, redir_flags, redir_name = h.file_redir
316 xprint(" redir: type=%s flags=%d:%s destination=%s",
317 r5_file_redir_types.get(redir_type, "Unknown"),
318 redir_flags, render_flags(redir_flags, r5_file_redir_flags),
319 redir_name)
320 if h.file_owner:
321 uname, gname, uid, gid = h.file_owner
322 xprint(" owner: name=%r group=%r uid=%r gid=%r",
323 uname, gname, uid, gid)
324 if h.file_version:
325 flags, version = h.file_version
326 xprint(" version: flags=%r version=%r", flags, version)
327 elif h.block_type == rf.RAR5_BLOCK_MAIN:
328 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.main_flags, r5_main_flags))
329 elif h.block_type == rf.RAR5_BLOCK_ENDARC:
330 xprint(" flags=0x%04x:%s", h.flags, render_flags(h.endarc_flags, r5_endarc_flags))
331 elif h.block_type == rf.RAR5_BLOCK_ENCRYPTION:
332 algo_name = "AES256" if h.encryption_algo == rf.RAR5_XENC_CIPHER_AES256 else "UnknownAlgo"
333 xprint(" algo=%d:%s flags=0x%04x:%s", h.encryption_algo, algo_name, h.flags,
334 render_flags(h.encryption_flags, r5_enc_flags))
335 xprint(" kdf_lg=%d kdf_count=%d", h.encryption_kdf_count, 1 << h.encryption_kdf_count)
336 xprint(" salt=%s", rf.tohex(h.encryption_salt))
337 else:
338 xprint(" - missing info -")
340 if h.comment is not None:
341 cm = repr(h.comment)
342 if cm[0] == "u":
343 cm = cm[1:]
344 xprint(" comment=%s", cm)
347 cf_show_comment = 0
348 cf_verbose = 0
349 cf_charset = None
350 cf_extract = 0
351 cf_test_read = 0
352 cf_test_unrar = 0
353 cf_test_memory = 0
356 def check_crc(f, inf, desc):
357 """Compare result crc to expected value.
359 exp = inf._md_expect
360 if exp is None:
361 return
362 ucrc = f._md_context.digest()
363 if ucrc != exp:
364 print("crc error - %s - exp=%r got=%r" % (desc, exp, ucrc))
367 def test_read_long(r, inf):
368 """Test read and readinto.
370 md_class = inf._md_class or rf.NoHashContext
371 bctx = md_class()
372 f = r.open(inf.filename)
373 total = 0
374 while 1:
375 data = f.read(8192)
376 if not data:
377 break
378 bctx.update(data)
379 total += len(data)
380 if total != inf.file_size:
381 xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
382 xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
383 check_crc(f, inf, "read")
384 bhash = bctx.hexdigest()
385 if cf_verbose > 1:
386 if f._md_context.digest() == inf._md_expect:
387 #xprint(" checkhash: %r", bhash)
388 pass
389 else:
390 xprint(" checkhash: %r got=%r exp=%r cls=%r\n",
391 bhash, f._md_context.digest(), inf._md_expect, inf._md_class)
393 # test .seek() & .readinto()
394 if cf_test_read > 1:
395 f.seek(0, 0)
397 total = 0
398 buf = bytearray(rf.ZERO * 1024)
399 while 1:
400 res = f.readinto(buf)
401 if not res:
402 break
403 total += res
404 if inf.file_size != total:
405 xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
406 #check_crc(f, inf, "readinto")
407 f.close()
410 def test_read(r, inf):
411 """Test file read."""
412 test_read_long(r, inf)
415 def test_real(fn, psw):
416 """Actual archive processing.
418 xprint("Archive: %s", fn)
420 cb = None
421 if cf_verbose > 1:
422 cb = show_item
424 rfarg = fn
425 if cf_test_memory:
426 rfarg = io.BytesIO(open(fn, "rb").read())
428 # check if rar
429 if not rf.is_rarfile(rfarg):
430 xprint(" --- %s is not a RAR file ---", fn)
431 return
433 # open
434 r = rf.RarFile(rfarg, charset=cf_charset, info_callback=cb)
435 # set password
436 if r.needs_password():
437 if psw:
438 r.setpassword(psw)
439 else:
440 xprint(" --- %s requires password ---", fn)
441 return
443 # show comment
444 if cf_show_comment and r.comment:
445 for ln in r.comment.split("\n"):
446 xprint(" %s", ln)
447 elif cf_verbose > 0 and r.comment:
448 cm = repr(r.comment)
449 if cm[0] == "u":
450 cm = cm[1:]
451 xprint(" comment=%s", cm)
453 # process
454 for n in r.namelist():
455 inf = r.getinfo(n)
456 if inf.isdir():
457 continue
458 if cf_verbose == 1:
459 show_item(inf)
460 if cf_test_read:
461 test_read(r, inf)
463 if cf_extract:
464 r.extractall()
465 for inf in r.infolist():
466 r.extract(inf)
468 if cf_test_unrar:
469 r.testrar()
472 def test(fn, psw):
473 """Process one archive with error handling.
475 try:
476 test_real(fn, psw)
477 except rf.NeedFirstVolume:
478 xprint(" --- %s is middle part of multi-vol archive ---", fn)
479 except rf.Error:
480 exc, msg, tb = sys.exc_info()
481 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
482 del tb
483 except IOError:
484 exc, msg, tb = sys.exc_info()
485 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
486 del tb
489 def main():
490 """Program entry point.
492 global cf_verbose, cf_show_comment, cf_charset
493 global cf_extract, cf_test_read, cf_test_unrar
494 global cf_test_memory
496 psw = None
498 # parse args
499 try:
500 opts, args = getopt.getopt(sys.argv[1:], "p:C:hvcxtRM")
501 except getopt.error as ex:
502 print(str(ex), file=sys.stderr)
503 sys.exit(1)
505 for o, v in opts:
506 if o == "-p":
507 psw = v
508 elif o == "-h":
509 xprint(usage)
510 return
511 elif o == "-v":
512 cf_verbose += 1
513 elif o == "-c":
514 cf_show_comment = 1
515 elif o == "-x":
516 cf_extract = 1
517 elif o == "-t":
518 cf_test_read += 1
519 elif o == "-T":
520 cf_test_unrar = 1
521 elif o == "-M":
522 cf_test_memory = 1
523 elif o == "-C":
524 cf_charset = v
525 else:
526 raise Exception("unhandled switch: " + o)
528 args2 = []
529 for a in args:
530 if a[0] == "@":
531 for ln in open(a[1:], "r"):
532 fn = ln[:-1]
533 args2.append(fn)
534 else:
535 args2.append(a)
536 args = args2
538 if not args:
539 xprint(usage)
541 for fn in args:
542 test(fn, psw)
545 if __name__ == "__main__":
546 try:
547 main()
548 except KeyboardInterrupt:
549 pass