dumprar: fix xprint to work with unicode better
[rarfile.git] / dumprar.py
blobc922fe3aaf2198adeef4b097a8d0c33965fb749f
1 #! /usr/bin/env python
3 """Dump archive contents, test extraction."""
5 import sys
6 import rarfile as rf
7 from binascii import crc32, hexlify
8 from datetime import datetime
10 try:
11 bytearray
12 except NameError:
13 import array
14 def bytearray(v):
15 return array.array('B', v)
17 rf.UNICODE_COMMENTS = 1
18 rf.USE_DATETIME = 1
20 usage = """
21 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
22 switches:
23 @file read archive names from file
24 -pPSW set password
25 -Ccharset set fallback charset
26 -v increase verbosity
27 -t attemt to read all files
28 -x write read files out
29 -c show archive comment
30 -h show usage
31 -- stop switch parsing
32 """.strip()
34 os_list = ['DOS', 'OS2', 'WIN', 'UNIX', 'MACOS', 'BEOS']
36 block_strs = ['MARK', 'MAIN', 'FILE', 'OLD_COMMENT', 'OLD_EXTRA',
37 'OLD_SUB', 'OLD_RECOVERY', 'OLD_AUTH', 'SUB', 'ENDARC']
39 def rarType(type):
40 if type < rf.RAR_BLOCK_MARK or type > rf.RAR_BLOCK_ENDARC:
41 return "*UNKNOWN*"
42 return block_strs[type - rf.RAR_BLOCK_MARK]
44 main_bits = (
45 (rf.RAR_MAIN_VOLUME, "VOL"),
46 (rf.RAR_MAIN_COMMENT, "COMMENT"),
47 (rf.RAR_MAIN_LOCK, "LOCK"),
48 (rf.RAR_MAIN_SOLID, "SOLID"),
49 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
50 (rf.RAR_MAIN_AUTH, "AUTH"),
51 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
52 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
53 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
54 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
55 (rf.RAR_LONG_BLOCK, "LONG"),
58 endarc_bits = (
59 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
60 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
61 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
62 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
63 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
64 (rf.RAR_LONG_BLOCK, "LONG"),
67 file_bits = (
68 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
69 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
70 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
71 (rf.RAR_FILE_COMMENT, "COMMENT"),
72 (rf.RAR_FILE_SOLID, "SOLID"),
73 (rf.RAR_FILE_LARGE, "LARGE"),
74 (rf.RAR_FILE_UNICODE, "UNICODE"),
75 (rf.RAR_FILE_SALT, "SALT"),
76 (rf.RAR_FILE_VERSION, "VERSION"),
77 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
78 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
79 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
80 (rf.RAR_LONG_BLOCK, "LONG"),
83 generic_bits = (
84 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
85 (rf.RAR_LONG_BLOCK, "LONG"),
88 file_parms = ("D64", "D128", "D256", "D512",
89 "D1024", "D2048", "D4096", "DIR")
91 def xprint(m, *args):
92 if sys.hexversion < 0x3000000:
93 m = m.decode('utf8')
94 if args:
95 m = m % args
96 if sys.hexversion < 0x3000000:
97 m = m.encode('utf8')
98 sys.stdout.write(m)
99 sys.stdout.write('\n')
101 def render_flags(flags, bit_list):
102 res = []
103 known = 0
104 for bit in bit_list:
105 known = known | bit[0]
106 if flags & bit[0]:
107 res.append(bit[1])
108 unknown = flags & ~known
109 n = 0
110 while unknown:
111 if unknown & 1:
112 res.append("UNK_%04x" % (1 << n))
113 unknown = unknown >> 1
114 n += 1
116 return ",".join(res)
118 def get_file_flags(flags):
119 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
121 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
122 res += "," + file_parms[xf]
123 return res
125 def get_main_flags(flags):
126 return render_flags(flags, main_bits)
128 def get_endarc_flags(flags):
129 return render_flags(flags, endarc_bits)
131 def get_generic_flags(flags):
132 return render_flags(flags, generic_bits)
134 def fmt_time(t):
135 if isinstance(t, datetime):
136 return t.isoformat(' ')
137 return "%04d-%02d-%02d %02d:%02d:%02d" % t
139 def show_item(h):
140 st = rarType(h.type)
141 unknown = h.header_size - h.header_base
142 xprint("%s: hdrlen=%d datlen=%d hdr_unknown=%d", st, h.header_size,
143 h.add_size, unknown)
144 if unknown > 0 and cf_verbose > 1:
145 dat = h.header_data[h.header_base : ]
146 xprint(" unknown: %s", hexlify(dat))
147 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
148 if h.host_os == rf.RAR_OS_UNIX:
149 s_mode = "0%o" % h.mode
150 else:
151 s_mode = "0x%x" % h.mode
152 xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
153 if h.host_os >= 0 and h.host_os < len(os_list):
154 s_os = os_list[h.host_os]
155 else:
156 s_os = "?"
157 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d",
158 h.host_os, s_os,
159 h.extract_version, s_mode, h.compress_type,
160 h.compress_size, h.file_size, h.volume)
161 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
162 xprint(" crc=0x%08x (%d) time=%s", ucrc, h.CRC, fmt_time(h.date_time))
163 xprint(" name=%s", h.filename)
164 if h.mtime:
165 xprint(" mtime=%s", fmt_time(h.mtime))
166 if h.ctime:
167 xprint(" ctime=%s", fmt_time(h.ctime))
168 if h.atime:
169 xprint(" atime=%s", fmt_time(h.atime))
170 if h.arctime:
171 xprint(" arctime=%s", fmt_time(h.arctime))
172 elif h.type == rf.RAR_BLOCK_MAIN:
173 xprint(" flags=0x%04x:%s", h.flags, get_main_flags(h.flags))
174 elif h.type == rf.RAR_BLOCK_ENDARC:
175 xprint(" flags=0x%04x:%s", h.flags, get_endarc_flags(h.flags))
176 elif h.type == rf.RAR_BLOCK_MARK:
177 xprint(" flags=0x%04x:", h.flags)
178 else:
179 xprint(" flags=0x%04x:%s", h.flags, get_generic_flags(h.flags))
181 if h.comment is not None:
182 cm = repr(h.comment)
183 if cm[0] == 'u':
184 cm = cm[1:]
185 xprint(" comment=%s", cm)
187 cf_show_comment = 0
188 cf_verbose = 0
189 cf_charset = None
190 cf_extract = 0
191 cf_test_read = 0
192 cf_test_unrar = 0
194 def check_crc(f, inf):
195 ucrc = f.CRC
196 if ucrc < 0:
197 ucrc += (long(1) << 32)
198 if ucrc != inf.CRC:
199 print ('crc error')
201 def test_read_long(r, inf):
202 f = r.open(inf.filename)
203 total = 0
204 while 1:
205 data = f.read(8192)
206 if not data:
207 break
208 total += len(data)
209 if total != inf.file_size:
210 xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
211 xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
212 check_crc(f, inf)
214 # test .seek() & .readinto()
215 if cf_test_read > 1:
216 f.seek(0,0)
218 # hack: re-enable crc calc
219 f.crc_check = 1
220 f.CRC = 0
222 total = 0
223 buf = bytearray(rf.ZERO*4096)
224 while 1:
225 res = f.readinto(buf)
226 if not res:
227 break
228 total += res
229 if inf.file_size != total:
230 xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
231 check_crc(f, inf)
232 f.close()
234 def test_read(r, inf):
235 test_read_long(r, inf)
238 def test_real(fn, psw):
239 xprint("Archive: %s", fn)
241 cb = None
242 if cf_verbose > 1:
243 cb = show_item
245 # check if rar
246 if not rf.is_rarfile(fn):
247 xprint(" --- %s is not a RAR file ---", fn)
248 return
250 # open
251 r = rf.RarFile(fn, charset = cf_charset, info_callback = cb)
252 # set password
253 if r.needs_password():
254 if psw:
255 r.setpassword(psw)
256 else:
257 xprint(" --- %s requires password ---", fn)
258 return
260 # show comment
261 if cf_show_comment and r.comment:
262 for ln in r.comment.split('\n'):
263 xprint(" %s", ln)
264 elif cf_verbose == 1 and r.comment:
265 cm = repr(r.comment)
266 if cm[0] == 'u':
267 cm = cm[1:]
268 xprint(" comment=%s", cm)
270 # process
271 for n in r.namelist():
272 inf = r.getinfo(n)
273 if inf.isdir():
274 continue
275 if cf_verbose == 1:
276 show_item(inf)
277 if cf_test_read:
278 test_read(r, inf)
280 if cf_extract:
281 r.extractall()
282 for inf in r.infolist():
283 r.extract(inf)
285 if cf_test_unrar:
286 r.testrar()
288 def test(fn, psw):
289 try:
290 test_real(fn, psw)
291 except rf.NeedFirstVolume:
292 xprint(" --- %s is middle part of multi-vol archive ---", fn)
293 except rf.Error:
294 exc, msg, tb = sys.exc_info()
295 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
296 del tb
297 except IOError:
298 exc, msg, tb = sys.exc_info()
299 xprint("\n *** %s: %s ***\n", exc.__name__, msg)
300 del tb
302 def main():
303 global cf_verbose, cf_show_comment, cf_charset
304 global cf_extract, cf_test_read, cf_test_unrar
306 # parse args
307 args = []
308 psw = None
309 noswitch = False
310 for a in sys.argv[1:]:
311 if noswitch:
312 args.append(a)
313 elif a[0] == "@":
314 for ln in open(a[1:], 'r'):
315 fn = ln[:-1]
316 args.append(fn)
317 elif a[0] != '-':
318 args.append(a)
319 elif a[1] == 'p':
320 psw = a[2:]
321 elif a == '--':
322 noswitch = True
323 elif a == '-h':
324 xprint(usage)
325 return
326 elif a == '-v':
327 cf_verbose += 1
328 elif a == '-c':
329 cf_show_comment = 1
330 elif a == '-x':
331 cf_extract = 1
332 elif a == '-t':
333 cf_test_read += 1
334 elif a == '-T':
335 cf_test_unrar = 1
336 elif a[1] == 'C':
337 cf_charset = a[2:]
338 else:
339 raise Exception("unknown switch: "+a)
340 if not args:
341 xprint(usage)
343 for fn in args:
344 test(fn, psw)
347 if __name__ == '__main__':
348 try:
349 main()
350 except KeyboardInterrupt:
351 pass