Merge pull request #4 from hanwentao/master
[rarfile.git] / dumprar.py
blob5db2918aa65dd98c302010464c0d4a0a43ab2ee7
1 #! /usr/bin/env python
3 """Dump archive contents, test extraction."""
5 import sys
6 import rarfile as rf
7 from binascii import crc32, hexlify
8 from datetime import datetime
10 try:
11 bytearray
12 except NameError:
13 import array
14 def bytearray(v):
15 return array.array('B', v)
17 rf.REPORT_BAD_HEADER = 1
18 rf.UNICODE_COMMENTS = 1
19 rf.USE_DATETIME = 1
21 usage = """
22 dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
23 switches:
24 @file read archive names from file
25 -pPSW set password
26 -Ccharset set fallback charset
27 -v increase verbosity
28 -t attemt to read all files
29 -x write read files out
30 -c show archive comment
31 -h show usage
32 -- stop switch parsing
33 """.strip()
35 os_list = ['DOS', 'OS2', 'WIN', 'UNIX', 'MACOS', 'BEOS']
37 block_strs = ['MARK', 'MAIN', 'FILE', 'OLD_COMMENT', 'OLD_EXTRA',
38 'OLD_SUB', 'OLD_RECOVERY', 'OLD_AUTH', 'SUB', 'ENDARC']
40 def rarType(type):
41 if type < rf.RAR_BLOCK_MARK or type > rf.RAR_BLOCK_ENDARC:
42 return "*UNKNOWN*"
43 return block_strs[type - rf.RAR_BLOCK_MARK]
45 main_bits = (
46 (rf.RAR_MAIN_VOLUME, "VOL"),
47 (rf.RAR_MAIN_COMMENT, "COMMENT"),
48 (rf.RAR_MAIN_LOCK, "LOCK"),
49 (rf.RAR_MAIN_SOLID, "SOLID"),
50 (rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
51 (rf.RAR_MAIN_AUTH, "AUTH"),
52 (rf.RAR_MAIN_RECOVERY, "RECOVERY"),
53 (rf.RAR_MAIN_PASSWORD, "PASSWORD"),
54 (rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
55 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
56 (rf.RAR_LONG_BLOCK, "LONG"),
59 endarc_bits = (
60 (rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
61 (rf.RAR_ENDARC_DATACRC, "DATACRC"),
62 (rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
63 (rf.RAR_ENDARC_VOLNR, "VOLNR"),
64 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
65 (rf.RAR_LONG_BLOCK, "LONG"),
68 file_bits = (
69 (rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
70 (rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
71 (rf.RAR_FILE_PASSWORD, "PASSWORD"),
72 (rf.RAR_FILE_COMMENT, "COMMENT"),
73 (rf.RAR_FILE_SOLID, "SOLID"),
74 (rf.RAR_FILE_LARGE, "LARGE"),
75 (rf.RAR_FILE_UNICODE, "UNICODE"),
76 (rf.RAR_FILE_SALT, "SALT"),
77 (rf.RAR_FILE_VERSION, "VERSION"),
78 (rf.RAR_FILE_EXTTIME, "EXTTIME"),
79 (rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
80 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
81 (rf.RAR_LONG_BLOCK, "LONG"),
84 generic_bits = (
85 (rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
86 (rf.RAR_LONG_BLOCK, "LONG"),
89 file_parms = ("D64", "D128", "D256", "D512",
90 "D1024", "D2048", "D4096", "DIR")
92 def xprint(m):
93 if sys.hexversion < 0x3000000:
94 if isinstance(m, unicode):
95 m = m.encode('utf8')
96 sys.stdout.write(m)
97 sys.stdout.write('\n')
99 def render_flags(flags, bit_list):
100 res = []
101 known = 0
102 for bit in bit_list:
103 known = known | bit[0]
104 if flags & bit[0]:
105 res.append(bit[1])
106 unknown = flags & ~known
107 n = 0
108 while unknown:
109 if unknown & 1:
110 res.append("UNK_%04x" % (1 << n))
111 unknown = unknown >> 1
112 n += 1
114 return ",".join(res)
116 def get_file_flags(flags):
117 res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
119 xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
120 res += "," + file_parms[xf]
121 return res
123 def get_main_flags(flags):
124 return render_flags(flags, main_bits)
126 def get_endarc_flags(flags):
127 return render_flags(flags, endarc_bits)
129 def get_generic_flags(flags):
130 return render_flags(flags, generic_bits)
132 def fmt_time(t):
133 if isinstance(t, datetime):
134 return t.isoformat(' ')
135 return "%04d-%02d-%02d %02d:%02d:%02d" % t
137 def show_item(h):
138 st = rarType(h.type)
139 unknown = h.header_size - h.header_base
140 xprint("%s: hdrlen=%d datlen=%d hdr_unknown=%d" % (st, h.header_size,
141 h.add_size, unknown))
142 if unknown > 0 and cf_verbose > 1:
143 dat = h.header_data[h.header_base : ]
144 xprint(" unknown: %s" % hexlify(dat))
145 if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
146 if h.host_os == rf.RAR_OS_UNIX:
147 s_mode = "0%o" % h.mode
148 else:
149 s_mode = "0x%x" % h.mode
150 xprint(" flags=0x%04x:%s" % (h.flags, get_file_flags(h.flags)))
151 if h.host_os >= 0 and h.host_os < len(os_list):
152 s_os = os_list[h.host_os]
153 else:
154 s_os = "?"
155 xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d" % (
156 h.host_os, s_os,
157 h.extract_version, s_mode, h.compress_type,
158 h.compress_size, h.file_size, h.volume))
159 ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
160 xprint(" crc=0x%08x (%d) time=%s" % (ucrc, h.CRC, fmt_time(h.date_time)))
161 xprint(" name=%s" % h.filename)
162 if h.mtime:
163 xprint(" mtime=%s" % fmt_time(h.mtime))
164 if h.ctime:
165 xprint(" ctime=%s" % fmt_time(h.ctime))
166 if h.atime:
167 xprint(" atime=%s" % fmt_time(h.atime))
168 if h.arctime:
169 xprint(" arctime=%s" % fmt_time(h.arctime))
170 elif h.type == rf.RAR_BLOCK_MAIN:
171 xprint(" flags=0x%04x:%s" % (h.flags, get_main_flags(h.flags)))
172 elif h.type == rf.RAR_BLOCK_ENDARC:
173 xprint(" flags=0x%04x:%s" % (h.flags, get_endarc_flags(h.flags)))
174 elif h.type == rf.RAR_BLOCK_MARK:
175 xprint(" flags=0x%04x:" % (h.flags,))
176 else:
177 xprint(" flags=0x%04x:%s" % (h.flags, get_generic_flags(h.flags)))
179 if h.comment is not None:
180 cm = repr(h.comment)
181 if cm[0] == 'u':
182 cm = cm[1:]
183 xprint(" comment=%s" % cm)
185 cf_show_comment = 0
186 cf_verbose = 0
187 cf_charset = None
188 cf_extract = 0
189 cf_test_read = 0
190 cf_test_unrar = 0
192 def check_crc(f, inf):
193 ucrc = f.CRC
194 if ucrc < 0:
195 ucrc += (long(1) << 32)
196 if ucrc != inf.CRC:
197 print ('crc error')
199 def test_read_long(r, inf):
200 f = r.open(inf.filename)
201 total = 0
202 while 1:
203 data = f.read(8192)
204 if not data:
205 break
206 total += len(data)
207 if total != inf.file_size:
208 xprint("\n *** %s has corrupt file: %s ***" % (r.rarfile, inf.filename))
209 xprint(" *** short read: got=%d, need=%d ***\n" % (total, inf.file_size))
210 check_crc(f, inf)
212 # test .seek() & .readinto()
213 if cf_test_read > 1:
214 f.seek(0,0)
216 # hack: re-enable crc calc
217 f.crc_check = 1
218 f.CRC = 0
220 total = 0
221 buf = bytearray(rf.ZERO*4096)
222 while 1:
223 res = f.readinto(buf)
224 if not res:
225 break
226 total += res
227 if inf.file_size != total:
228 xprint(" *** readinto failed: got=%d, need=%d ***\n" % (total, inf.file_size))
229 check_crc(f, inf)
230 f.close()
232 def test_read(r, inf):
233 test_read_long(r, inf)
236 def test_real(fn, psw):
237 xprint("Archive: %s" % fn)
239 cb = None
240 if cf_verbose > 1:
241 cb = show_item
243 # check if rar
244 if not rf.is_rarfile(fn):
245 xprint(" --- %s is not a RAR file ---" % fn)
246 return
248 # open
249 r = rf.RarFile(fn, charset = cf_charset, info_callback = cb)
250 # set password
251 if r.needs_password():
252 if psw:
253 r.setpassword(psw)
254 else:
255 xprint(" --- %s requires password ---" % fn)
256 return
258 # show comment
259 if cf_show_comment and r.comment:
260 for ln in r.comment.split('\n'):
261 xprint(" %s" % ln)
262 elif cf_verbose == 1 and r.comment:
263 cm = repr(r.comment)
264 if cm[0] == 'u':
265 cm = cm[1:]
266 xprint(" comment=%s" % cm)
268 # process
269 for n in r.namelist():
270 inf = r.getinfo(n)
271 if inf.isdir():
272 continue
273 if cf_verbose == 1:
274 show_item(inf)
275 if cf_test_read:
276 test_read(r, inf)
278 if cf_extract:
279 r.extractall()
280 for inf in r.infolist():
281 r.extract(inf)
283 if cf_test_unrar:
284 r.testrar()
286 def test(fn, psw):
287 try:
288 test_real(fn, psw)
289 except rf.NeedFirstVolume:
290 xprint(" --- %s is middle part of multi-vol archive ---" % fn)
291 except rf.Error:
292 exc, msg, tb = sys.exc_info()
293 xprint("\n *** %s: %s ***\n" % (exc.__name__, msg))
294 del tb
295 except IOError:
296 exc, msg, tb = sys.exc_info()
297 xprint("\n *** %s: %s ***\n" % (exc.__name__, msg))
298 del tb
300 def main():
301 global cf_verbose, cf_show_comment, cf_charset
302 global cf_extract, cf_test_read, cf_test_unrar
304 # parse args
305 args = []
306 psw = None
307 noswitch = False
308 for a in sys.argv[1:]:
309 if noswitch:
310 args.append(a)
311 elif a[0] == "@":
312 for ln in open(a[1:], 'r'):
313 fn = ln[:-1]
314 args.append(fn)
315 elif a[0] != '-':
316 args.append(a)
317 elif a[1] == 'p':
318 psw = a[2:]
319 elif a == '--':
320 noswitch = True
321 elif a == '-h':
322 xprint(usage)
323 return
324 elif a == '-v':
325 cf_verbose += 1
326 elif a == '-c':
327 cf_show_comment = 1
328 elif a == '-x':
329 cf_extract = 1
330 elif a == '-t':
331 cf_test_read += 1
332 elif a == '-T':
333 cf_test_unrar = 1
334 elif a[1] == 'C':
335 cf_charset = a[2:]
336 else:
337 raise Exception("unknown switch: "+a)
338 if not args:
339 xprint(usage)
341 for fn in args:
342 test(fn, psw)
345 if __name__ == '__main__':
346 try:
347 main()
348 except KeyboardInterrupt:
349 pass