Quick binary expression handling for “test_token_buffering“
[vadmium-streams.git] / rarfile.py
blob60e5b561c30f4feb9be0317991bbbd4ac27634cb
1 #! /usr/bin/env python3
3 from sys import stdin
4 from binascii import crc32
5 from shorthand import bitmask
6 from struct import Struct
7 from io import SEEK_CUR, SEEK_END, SEEK_SET
8 import re
9 from contextlib import ExitStack
10 from io import BytesIO
11 from shorthand import read_exactly
12 from datetime import datetime
14 HEADER = Struct('<HBHH')
15 LONG_BLOCK = 0x8000
16 OPTIONAL_BLOCK = 0x4000
17 MARKER = 0x72
18 MAIN = 0x73
19 FILE = 0x74
20 TERM = 0x7B
21 FILE_STRUCT = Struct('< LBL L BcHL')
22 SPLIT_BEFORE = 1
23 SPLIT_AFTER = 2
24 SPLIT_MASK = SPLIT_BEFORE^SPLIT_AFTER
26 def main(volname=None, *files):
27 extract_file = None
28 with Archive() as vol_cleanup, ExitStack() as extract_cleanup:
29 if volname is None:
30 vol_cleanup.vol = stdin.buffer
31 vol_cleanup.volname = ''
32 vol_cleanup.vol = ForwardReader(vol_cleanup.vol)
33 else:
34 vol_cleanup.open(volname)
35 for vol in vol_cleanup:
36 if volname is not None:
37 print(vol_cleanup.volname)
38 blocks = iter(vol)
39 while True:
40 print(end='+{}:'.format(vol.vol.tell()))
41 try:
42 [type, flags, packed, block] = next(blocks)
43 except StopIteration:
44 break
45 if type == MARKER:
46 print(' Marker (signature) block')
47 continue
48 if type == MAIN:
49 print(' Archive volume main block')
50 assert flags & ~0x100 == 0x11
51 assert block.read() == bytes(6)
52 elif type == FILE:
53 print(end=' File;')
55 split = flags & SPLIT_MASK
56 if split:
57 part = {
58 SPLIT_BEFORE: 'last part',
59 SPLIT_AFTER: 'first part',
60 SPLIT_BEFORE^SPLIT_AFTER: 'middle part',
61 }[split]
62 print('', part, end=',')
64 [name, unpacked, crc,
65 mtime, mtime_flags, mtime_frac, exttime] = block
66 print('', repr(name), end=',')
68 dict_size = flags & 0xE0
69 if dict_size == 0xE0:
70 print(' directory')
71 else:
72 if extract_file:
73 assert flags & SPLIT_BEFORE and name == extract_name
74 else:
75 if name in files:
76 assert not flags & SPLIT_BEFORE
77 extract_name = name
78 basename = name[-1]
79 extract_file = open(basename, 'xb')
80 extract_cleanup.callback(extract_file.close)
81 extract_crc = 0
82 if extract_file:
83 while packed > 0:
84 chunk = read_exactly(vol.vol, min(packed, 0x10000))
85 packed -= len(chunk)
86 extract_crc = crc32(chunk, extract_crc)
87 extract_file.write(chunk)
88 assert extract_crc == crc
89 if not flags & SPLIT_AFTER:
90 extract_cleanup.close()
91 extract_file = None
92 print(' CRC', format(crc, '08X'))
93 elif type == TERM:
94 print(end=' Archive volume end block;')
95 crc = int.from_bytes(read_exactly(block, 4), 'little')
96 print(' CRC', format(crc, '08X'), end=',')
97 print(' part', vol.part)
98 block.seek(6)
99 assert block.read() == bytes(7)
100 else:
101 assert type == 0x7A
102 print(' New sub-block')
103 assert flags == LONG_BLOCK
104 if vol.vol.tell() > vol.vol.seek(0, SEEK_END):
105 print(' Block truncated at', vol.vol.tell())
106 else:
107 print(' EOF')
108 if extract_file:
109 raise SystemExit(basename + 'not completely extracted')
111 class Archive(ExitStack):
112 def open(vol_cleanup, volname):
113 vol_cleanup.vol = vol_cleanup.enter_context(open(volname, 'rb'))
114 vol_cleanup.volname = volname
116 def __iter__(vol_cleanup):
117 arc = None
118 while True:
119 vol = Volume(vol_cleanup.vol)
120 yield vol
121 vol_cleanup.close()
122 if not vol.nextvol:
123 break
125 if arc is None:
126 match = r'(.+)\.part(0*{})\.rar'.format(1 + vol.part)
127 match = re.fullmatch(match, vol_cleanup.volname, re.ASCII^re.DOTALL)
128 if not match:
129 raise SystemExit('Cannot determine next volume name')
130 [arc, digits] = match.groups()
131 digits = len(digits)
132 vol.part += 1
133 volname = '{}.part{:0{}}.rar'.format(arc, 1 + vol.part, digits)
134 vol_cleanup.open(volname)
136 class Volume:
137 def __init__(self, vol):
138 self.vol = vol
139 self.nextvol = False
141 def __iter__(self):
142 while True:
143 header = self.vol.read(7)
144 if not header:
145 return
146 [crc, type, flags, size] = HEADER.unpack(header)
147 size -= 7
148 assert size >= 0
149 block = read_exactly(self.vol, size)
150 if type == MARKER:
151 assert header.startswith(b'Rar!\x1A') and not size
152 else:
153 assert crc32(block, crc32(header[2:])) & bitmask(16) == crc
154 block = BytesIO(block)
155 if flags & LONG_BLOCK:
156 packed = int.from_bytes(read_exactly(block, 4), 'little')
157 else:
158 packed = 0
160 if type == FILE:
161 assert flags & ~(SPLIT_MASK^0xE0) == LONG_BLOCK^0x1000
163 file = read_exactly(block, FILE_STRUCT.size)
164 [unpacked, os, crc, time, version, method,
165 name, attrib] = FILE_STRUCT.unpack(file)
166 assert os == 2
167 assert version == 20 and method == b'0'
168 name = read_exactly(block, name)
170 tflags = int.from_bytes(read_exactly(block, 2), 'little')
172 mtime_flags = tflags >> MTIME
173 if mtime_flags & TIME_VALID:
174 mtime_frac = read_exactly(block, mtime_flags & 3)
175 else:
176 mtime_frac = None
178 exttime = list()
179 shift = MTIME
180 for i in range(3):
181 shift -= 4
182 f = tflags >> shift
183 if f & TIME_VALID:
184 f = (read_exactly(block, 4 + (f & 3)), f)
185 exttime.append(f)
186 else:
187 exttime.append(None)
188 assert not block.read(1)
190 dict_size = flags & 0xE0
191 if dict_size == 0xE0:
192 assert not packed and not unpacked and not crc
193 assert attrib == 0x10
194 else:
195 assert dict_size == 0x20
196 assert attrib & ~0x200 == 0x20
198 name = name.decode('ascii').split('\\')
199 block = (name, unpacked, crc,
200 time, mtime_flags, mtime_frac, exttime)
201 elif type == TERM:
202 self.nextvol = flags & 1
203 assert flags & ~1 == OPTIONAL_BLOCK^0xE
204 block.seek(4)
205 self.part = int.from_bytes(read_exactly(block, 2), 'little')
206 block.seek(0)
208 end = self.vol.tell() + packed
209 yield (type, flags, packed, block)
210 self.vol.seek(end)
212 def unpack_dostime(time):
213 time = datetime(
214 1980 + (time >> 25), time >> 21 & 15, time >> 16 & 31,
215 time >> 11 & 31, time >> 5 & 63, (time & 15) * 2,
217 return int(time.timestamp())
219 def unpack_exttime_field(tflags, fract):
220 odd = bool(tflags & 4)
221 fract = int.from_bytes(bytes(3 - len(fract)) + fract, 'little')
222 return (odd, fract * 100)
223 MTIME = 12
224 TIME_VALID = 8
226 class ForwardReader:
227 def __init__(self, reader):
228 self.reader = reader
229 self.pos = 0
231 def read(self, n):
232 result = self.reader.read(n)
233 self.pos += len(result)
234 if len(result) < n:
235 self.end = self.pos
236 return result
238 def seek(self, pos, base=SEEK_SET):
239 try:
240 return self.reader.seek(pos, base)
241 except OSError:
242 if base == SEEK_SET:
243 pos -= self.tell()
244 base = SEEK_CUR
245 if base == SEEK_CUR:
246 assert pos >= 0
247 while pos:
248 chunk = min(pos, 0x10000)
249 data = self.read(chunk)
250 pos -= len(data)
251 if len(data) < chunk:
252 self.end = self.pos
253 self.pos += pos
254 break
255 return self.pos
256 assert base == SEEK_END and not pos
257 return self.end
259 def tell(self):
260 return self.pos
262 if __name__ == "__main__":
263 import clifunc
264 clifunc.run()