Support more than 200 old-style volumes.
[rarfile.git] / rarfile.py
blob4eaa69666ca670b4064f5458614d7561b3dbcbed
1 # rarfile.py
3 # Copyright (c) 2005-2010 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as `zipfile` like as possible.
22 The archive structure parsing and uncompressed files
23 are handled in pure Python. Decompression is done
24 via 'unrar' command line utility.
26 Features:
28 - Works with both Python 2.x and 3.x
29 - Supports RAR 3.x archives.
30 - Supports multi volume archives.
31 - Supports Unicode filenames.
32 - Supports password-protected archives.
33 - Supports archive comments.
34 """
36 __version__ = '2.1'
38 import sys, os, re
39 from struct import pack, unpack
40 from binascii import crc32
41 from tempfile import mkstemp
42 from subprocess import Popen, PIPE
44 # py2.6 has broken bytes()
45 if sys.hexversion < 0x3000000:
46 def bytes(foo, enc):
47 return str(foo)
49 # export only interesting items
50 __all__ = ['is_rarfile', 'RarInfo', 'RarFile']
52 # default fallback charset
53 DEFAULT_CHARSET = "windows-1252"
55 # 'unrar', 'rar' or full path to either one
56 EXTRACT_TOOL = "unrar"
58 # Must be 'rar', because 'unrar' does not have 'cw' command.
59 # Can be full path, or None to disable comment extraction
60 COMMENT_TOOL = "rar"
62 # command line args to use for extracting. (rar, file) will be added.
63 EXTRACT_ARGS = ('p', '-inul')
65 # how to extract comment from archive. (rar, tmpfile) will be added.
66 COMMENT_ARGS = ('cw', '-y', '-inul', '-p-')
68 # whether to speed up decompression by using tmp archive
69 USE_EXTRACT_HACK = 1
72 # rar constants
75 # block types
76 RAR_BLOCK_MARK = 0x72 # r
77 RAR_BLOCK_MAIN = 0x73 # s
78 RAR_BLOCK_FILE = 0x74 # t
79 RAR_BLOCK_OLD_COMMENT = 0x75 # u
80 RAR_BLOCK_OLD_EXTRA = 0x76 # v
81 RAR_BLOCK_OLD_SUB = 0x77 # w
82 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
83 RAR_BLOCK_OLD_AUTH = 0x79 # y
84 RAR_BLOCK_SUB = 0x7a # z
85 RAR_BLOCK_ENDARC = 0x7b # {
87 # main header flags
88 RAR_MAIN_VOLUME = 0x0001
89 RAR_MAIN_COMMENT = 0x0002
90 RAR_MAIN_LOCK = 0x0004
91 RAR_MAIN_SOLID = 0x0008
92 RAR_MAIN_NEWNUMBERING = 0x0010
93 RAR_MAIN_AUTH = 0x0020
94 RAR_MAIN_RECOVERY = 0x0040
95 RAR_MAIN_PASSWORD = 0x0080
96 RAR_MAIN_FIRSTVOLUME = 0x0100
97 RAR_MAIN_ENCRYPTVER = 0x0200
99 # file header flags
100 RAR_FILE_SPLIT_BEFORE = 0x0001
101 RAR_FILE_SPLIT_AFTER = 0x0002
102 RAR_FILE_PASSWORD = 0x0004
103 RAR_FILE_COMMENT = 0x0008
104 RAR_FILE_SOLID = 0x0010
105 RAR_FILE_DICTMASK = 0x00e0
106 RAR_FILE_DICT64 = 0x0000
107 RAR_FILE_DICT128 = 0x0020
108 RAR_FILE_DICT256 = 0x0040
109 RAR_FILE_DICT512 = 0x0060
110 RAR_FILE_DICT1024 = 0x0080
111 RAR_FILE_DICT2048 = 0x00a0
112 RAR_FILE_DICT4096 = 0x00c0
113 RAR_FILE_DIRECTORY = 0x00e0
114 RAR_FILE_LARGE = 0x0100
115 RAR_FILE_UNICODE = 0x0200
116 RAR_FILE_SALT = 0x0400
117 RAR_FILE_VERSION = 0x0800
118 RAR_FILE_EXTTIME = 0x1000
119 RAR_FILE_EXTFLAGS = 0x2000
121 RAR_ENDARC_NEXT_VOLUME = 0x0001
122 RAR_ENDARC_DATACRC = 0x0002
123 RAR_ENDARC_REVSPACE = 0x0004
125 # flags common to all blocks
126 RAR_SKIP_IF_UNKNOWN = 0x4000
127 RAR_LONG_BLOCK = 0x8000
129 # Host OS types
130 RAR_OS_MSDOS = 0
131 RAR_OS_OS2 = 1
132 RAR_OS_WIN32 = 2
133 RAR_OS_UNIX = 3
134 RAR_OS_MACOS = 4
135 RAR_OS_BEOS = 5
137 # internal byte constants
138 RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii')
139 ZERO = bytes("\0", 'ascii')
140 EMPTY = bytes("", 'ascii')
143 # Public interface
146 class Error(Exception):
147 """Base class for rarfile errors."""
148 class BadRarFile(Error):
149 """Incorrect data in archive."""
150 class NotRarFile(Error):
151 """The file is not RAR archive."""
152 class BadRarName(Error):
153 """Cannot guess multipart name components."""
154 class NoRarEntry(Error):
155 """File not found in RAR"""
156 class PasswordRequired(Error):
157 """File requires password"""
158 class NeedFirstVolume(Error):
159 """Need to start from first volume."""
161 def is_rarfile(fn):
162 '''Check quickly whether file is rar archive.'''
163 buf = open(fn, "rb").read(len(RAR_ID))
164 return buf == RAR_ID
166 class RarInfo:
167 '''An entry in rar archive.
169 @ivar filename:
170 File name with relative path.
171 Note that Rar uses "\" as directory separator.
172 Always unicode string.
173 @ivar date_time:
174 Modification time, tuple of (year, month, day, hour, minute, second).
175 @ivar file_size:
176 Uncompressed size.
177 @ivar compress_size:
178 Compressed size.
179 @ivar compress_type:
180 Compression method: 0x30 - 0x35.
181 @ivar extract_version:
182 Minimal Rar version needed for decompressing.
183 @ivar host_os:
184 Host OS type, one of RAR_OS_* constants.
185 @ivar mode:
186 File attributes. May be either dos-style or unix-style, depending on host_os.
187 @ivar CRC:
188 CRC-32 of uncompressed file, unsigned int.
189 @ivar volume:
190 Volume nr, starting from 0.
191 @ivar type:
192 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
193 @ivar flags:
194 For files, RAR_FILE_* bits.
195 @ivar orig_filename:
196 Byte string of non-unicode representation.
198 @ivar mtime:
199 Optional time field: Modification time, tuple of (year, month, day, hour, minute, second).
200 @ivar ctime:
201 Optional time field: ctime time.
202 @ivar atime:
203 Optional time field: access time.
204 @ivar arctime:
205 Optional time field: archival time.
208 __slots__ = (
209 'compress_size',
210 'file_size',
211 'host_os',
212 'CRC',
213 'extract_version',
214 'compress_type',
215 'mode',
216 'type',
217 'flags',
218 'volume',
219 'filename',
220 'orig_filename',
221 'date_time',
223 # optional extended time fields
224 # same format as date_time, but sec is float
225 'mtime',
226 'ctime',
227 'atime',
228 'arctime',
230 # obsolete
231 'unicode_filename',
233 # RAR internals
234 'name_size',
235 'header_size',
236 'header_crc',
237 'file_offset',
238 'add_size',
239 'header_data',
240 'header_unknown',
241 'header_offset',
242 'salt',
245 def isdir(self):
246 '''Returns True if the entry is a directory.'''
247 if self.type == RAR_BLOCK_FILE:
248 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
249 return False
251 def needs_password(self):
252 return self.flags & RAR_FILE_PASSWORD
254 class RarFile:
255 '''Rar archive handling.'''
256 def __init__(self, rarfile, mode="r", charset=None, info_callback=None, crc_check = True):
257 """Open and parse a RAR archive.
259 @param rarfile: archive file name
260 @param mode: only 'r' is supported.
261 @param charset: fallback charset to use, if filenames are not already Unicode-enabled.
262 @param info_callback: debug callback, gets to see all archive entries.
263 @param crc_check: set to False to disable CRC checks
265 self.rarfile = rarfile
266 self.comment = None
267 self._charset = charset or DEFAULT_CHARSET
268 self._info_callback = info_callback
270 self._info_list = []
271 self._gen_volname = self._gen_oldvol
272 self._needs_password = False
273 self._password = None
274 self._crc_check = crc_check
276 self._main = None
278 if mode != "r":
279 raise NotImplementedError("RarFile supports only mode=r")
281 self._parse()
283 if self._main.flags & RAR_MAIN_COMMENT:
284 self._read_comment()
286 def setpassword(self, password):
287 '''Sets the password to use when extracting.'''
288 self._password = password
290 def needs_password(self):
291 '''Returns True if any archive entries require password for extraction.'''
292 return self._needs_password
294 def namelist(self):
295 '''Return list of filenames in archive.'''
296 res = []
297 for f in self._info_list:
298 res.append(f.filename)
299 return res
301 def infolist(self):
302 '''Return RarInfo objects for all files/directories in archive.'''
303 return self._info_list
305 def getinfo(self, fname):
306 '''Return RarInfo for file.'''
307 fname2 = fname.replace("/", "\\")
308 for f in self._info_list:
309 if fname == f.filename or fname2 == f.filename:
310 return f
312 def open(self, fname, psw = None):
313 '''Return open file object, where the data can be read.
315 The object has only .read() and .close() methods.
317 inf = self.getinfo(fname)
318 if not inf:
319 raise NoRarEntry("No such file")
321 if inf.isdir():
322 raise TypeError("Directory does not have any data")
323 if inf.needs_password():
324 psw = psw or self._password
325 if psw is None:
326 raise PasswordRequired("File %s requires password" % fname)
327 else:
328 psw = None
330 is_solid = self._main.flags & RAR_MAIN_SOLID
331 uses_vols = self._main.flags & RAR_MAIN_VOLUME
332 if inf.compress_type == 0x30 and psw is None:
333 return self._open_clear(inf)
334 elif USE_EXTRACT_HACK and not is_solid and not uses_vols:
335 return self._open_hack(inf, psw)
336 else:
337 return self._open_unrar(self.rarfile, inf, psw)
339 def read(self, fname, psw = None):
340 """Return uncompressed data for archive entry.
342 For longer files using .open() may be better idea.
345 f = self.open(fname, psw)
346 data = f.read()
347 f.close()
348 return data
350 def close(self):
351 """Release open resources."""
352 pass
354 def printdir(self):
355 """Print archive file list to stdout."""
356 for f in self._info_list:
357 print(f.filename)
359 # store entry
360 def _process_entry(self, item):
361 # RAR_BLOCK_NEWSUB has files too: CMT, RR
362 if item.type == RAR_BLOCK_FILE:
363 # use only first part
364 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
365 self._info_list.append(item)
366 # remember if any items require password
367 if item.needs_password():
368 self._needs_password = True
369 elif len(self._info_list) > 0:
370 # final crc is in last block
371 old = self._info_list[-1]
372 old.CRC = item.CRC
374 if self._info_callback:
375 self._info_callback(item)
377 # read rar
378 def _parse(self):
379 fd = open(self.rarfile, "rb")
380 id = fd.read(len(RAR_ID))
381 if id != RAR_ID:
382 raise NotRarFile("Not a Rar archive")
384 volume = 0 # first vol (.rar) is 0
385 more_vols = 0
386 while 1:
387 h = self._parse_header(fd)
388 if not h:
389 if more_vols:
390 volume += 1
391 fd = open(self._gen_volname(volume), "rb")
392 more_vols = 0
393 if fd:
394 continue
395 break
396 h.volume = volume
398 if h.type == RAR_BLOCK_MAIN and not self._main:
399 self._main = h
400 if h.flags & RAR_MAIN_VOLUME:
401 if not h.flags & RAR_MAIN_FIRSTVOLUME:
402 raise NeedFirstVolume("Need to start from first volume")
403 if h.flags & RAR_MAIN_NEWNUMBERING:
404 self._gen_volname = self._gen_newvol
405 elif h.type == RAR_BLOCK_ENDARC:
406 more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME
408 # store it
409 self._process_entry(h)
411 # go to next header
412 if h.add_size > 0:
413 fd.seek(h.file_offset + h.add_size, 0)
414 fd.close()
416 # read single header
417 def _parse_header(self, fd):
418 h = self._parse_block_header(fd)
419 if h and (h.type == RAR_BLOCK_FILE or h.type == RAR_BLOCK_SUB):
420 self._parse_file_header(h)
421 return h
423 # common header
424 def _parse_block_header(self, fd):
425 HDRLEN = 7
426 h = RarInfo()
427 h.header_offset = fd.tell()
428 buf = fd.read(HDRLEN)
429 if not buf:
430 return None
432 t = unpack("<HBHH", buf)
433 h.header_crc, h.type, h.flags, h.header_size = t
434 h.header_unknown = h.header_size - HDRLEN
436 if h.header_size > HDRLEN:
437 h.header_data = fd.read(h.header_size - HDRLEN)
438 else:
439 h.header_data = EMPTY
440 h.file_offset = fd.tell()
442 if h.flags & RAR_LONG_BLOCK:
443 h.add_size = unpack("<L", h.header_data[:4])[0]
444 else:
445 h.add_size = 0
447 # no crc check on that
448 if h.type == RAR_BLOCK_MARK:
449 return h
451 # check crc
452 if h.type == RAR_BLOCK_MAIN:
453 crcdat = buf[2:] + h.header_data[:6]
454 elif h.type == RAR_BLOCK_OLD_AUTH:
455 crcdat = buf[2:] + h.header_data[:8]
456 elif h.type == RAR_BLOCK_OLD_SUB:
457 crcdat = buf[2:] + h.header_data + fd.read(h.add_size)
458 else:
459 crcdat = buf[2:] + h.header_data
461 calc_crc = crc32(crcdat) & 0xFFFF
463 # return good header
464 if h.header_crc == calc_crc:
465 return h
467 # instead panicing, send eof
468 return None
470 # read file-specific header
471 def _parse_file_header(self, h):
472 HDRLEN = 4+4+1+4+4+1+1+2+4
473 fld = unpack("<LLBLLBBHL", h.header_data[ : HDRLEN])
474 h.compress_size = fld[0]
475 h.file_size = fld[1]
476 h.host_os = fld[2]
477 h.CRC = fld[3]
478 h.date_time = self._parse_dos_time(fld[4])
479 h.extract_version = fld[5]
480 h.compress_type = fld[6]
481 h.name_size = fld[7]
482 h.mode = fld[8]
483 pos = HDRLEN
485 if h.flags & RAR_FILE_LARGE:
486 h1, h2 = unpack("<LL", h.header_data[pos:pos+8])
487 h.compress_size |= h1 << 32
488 h.file_size |= h2 << 32
489 pos += 8
491 name = h.header_data[pos : pos + h.name_size ]
492 pos += h.name_size
493 if h.flags & RAR_FILE_UNICODE:
494 nul = name.find(ZERO)
495 h.orig_filename = name[:nul]
496 u = _UnicodeFilename(h.orig_filename, name[nul + 1 : ])
497 h.unicode_filename = u.decode()
498 else:
499 h.orig_filename = name
500 h.unicode_filename = name.decode(self._charset, "replace")
502 h.filename = h.unicode_filename
504 if h.flags & RAR_FILE_SALT:
505 h.salt = h.header_data[pos : pos + 8]
506 pos += 8
507 else:
508 h.salt = None
510 # optional extended time stamps
511 if h.flags & RAR_FILE_EXTTIME:
512 pos = self._parse_ext_time(h, pos)
513 else:
514 h.mtime = h.atime = h.ctime = h.arctime = None
516 # unknown contents
517 h.header_unknown -= pos
519 return h
521 def _parse_dos_time(self, stamp):
522 sec = stamp & 0x1F; stamp = stamp >> 5
523 min = stamp & 0x3F; stamp = stamp >> 6
524 hr = stamp & 0x1F; stamp = stamp >> 5
525 day = stamp & 0x1F; stamp = stamp >> 5
526 mon = stamp & 0x0F; stamp = stamp >> 4
527 yr = (stamp & 0x7F) + 1980
528 return (yr, mon, day, hr, min, sec)
530 def _parse_ext_time(self, h, pos):
531 data = h.header_data
532 flags = unpack("<H", data[pos : pos + 2])[0]
533 pos += 2
534 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time)
535 h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos)
536 h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos)
537 h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos)
538 return pos
540 def _parse_xtime(self, flag, data, pos, dostime = None):
541 unit = 10000000.0 # 100 ns units
542 if flag & 8:
543 if not dostime:
544 t = unpack("<I", data[pos : pos + 4])[0]
545 dostime = self._parse_dos_time(t)
546 pos += 4
547 rem = 0
548 cnt = flag & 3
549 for i in range(3):
550 rem <<= 8
551 if i < cnt:
552 rem += unpack("B", data[pos : pos + 1])[0]
553 pos += 1
554 sec = dostime[5] + rem / unit
555 if flag & 4:
556 sec += 1
557 dostime = dostime[:5] + (sec,)
558 return dostime, pos
560 # new-style volume name
561 def _gen_newvol(self, volume):
562 # allow % in filenames
563 fn = self.rarfile.replace("%", "%%")
565 m = re.search(r"([0-9][0-9]*)[^0-9]*$", fn)
566 if not m:
567 raise BadRarName("Cannot construct volume name")
568 n1 = m.start(1)
569 n2 = m.end(1)
570 fmt = "%%0%dd" % (n2 - n1)
571 volfmt = fn[:n1] + fmt + fn[n2:]
572 return volfmt % (volume + 1)
574 # old-style volume naming
575 def _gen_oldvol(self, volume):
576 if volume == 0:
577 return self.rarfile
578 # although 'rar' can generate them, it's unlikely they work well
579 if volume > 900:
580 raise BadRarName("Cannot construct volume name")
582 # strip extension
583 i = self.rarfile.rfind(".")
584 if i >= 0:
585 base = self.rarfile[:i]
586 else:
587 base = self.rarfile
589 # generate new extension
590 d, m = divmod(volume - 1, 100)
591 ext = '.%c%02d' % (ord('r') + d, m)
592 return base + ext
594 def _open_clear(self, inf):
595 return DirectReader(self, inf)
597 # put file compressed data into temporary .rar archive, and run
598 # unrar on that, thus avoiding unrar going over whole archive
599 def _open_hack(self, inf, psw = None):
600 BSIZE = 32*1024
602 size = inf.compress_size + inf.header_size
603 rf = open(self.rarfile, "rb")
604 rf.seek(inf.header_offset)
606 tmpfd, tmpname = mkstemp(suffix='.rar')
607 tmpf = os.fdopen(tmpfd, "wb")
609 try:
610 # create main header: crc, type, flags, size, res1, res2
611 mh = pack("<HBHHHL", 0x90CF, 0x73, 0, 13, 0, 0)
612 tmpf.write(RAR_ID + mh)
613 while size > 0:
614 if size > BSIZE:
615 buf = rf.read(BSIZE)
616 else:
617 buf = rf.read(size)
618 if not buf:
619 raise BadRarFile('read failed - broken archive')
620 tmpf.write(buf)
621 size -= len(buf)
622 tmpf.close()
623 except:
624 os.unlink(tmpname)
625 raise
627 return self._open_unrar(tmpname, inf, psw, tmpname)
629 # extract using unrar
630 def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None):
631 cmd = [EXTRACT_TOOL] + list(EXTRACT_ARGS)
632 if psw is not None:
633 cmd.append("-p" + psw)
634 cmd.append(rarfile)
636 # not giving filename avoids encoding related problems
637 if not tmpfile:
638 fn = inf.filename
639 fn = fn.replace('\\', os.sep)
640 cmd.append(fn)
642 # 3xPIPE seems unreliable, at least on osx
643 try:
644 null = open("/dev/null", "wb")
645 _in = null
646 _err = null
647 except IOError:
648 _in = PIPE
649 _err = PIPE
651 # run unrar
652 p = Popen(cmd, stdout = PIPE, stdin = _in, stderr = _err)
653 return PipeReader(self, inf, p, tmpfile)
655 def _read_comment(self):
656 if not COMMENT_TOOL:
657 return
658 tmpfd, tmpname = mkstemp(suffix='.txt')
659 try:
660 cmd = [COMMENT_TOOL] + list(COMMENT_ARGS)
661 cmd.append(self.rarfile)
662 cmd.append(tmpname)
663 try:
664 p = Popen(cmd)
665 cmt = None
666 if p.wait() == 0:
667 cmt = os.fdopen(tmpfd, 'rb').read()
668 try:
669 self.comment = cmt.decode('utf8')
670 except UnicodeError:
671 self.comment = cmt.decode(self._charset, 'replace')
672 except (OSError, IOError):
673 pass
674 finally:
675 os.unlink(tmpname)
677 # handle unicode filename compression
678 class _UnicodeFilename:
679 def __init__(self, name, encdata):
680 self.std_name = bytearray(name)
681 self.encdata = bytearray(encdata)
682 self.pos = self.encpos = 0
683 self.buf = bytearray()
685 def enc_byte(self):
686 c = self.encdata[self.encpos]
687 self.encpos += 1
688 return c
690 def std_byte(self):
691 return self.std_name[self.pos]
693 def put(self, lo, hi):
694 self.buf.append(lo)
695 self.buf.append(hi)
696 self.pos += 1
698 def decode(self):
699 hi = self.enc_byte()
700 flagbits = 0
701 while self.encpos < len(self.encdata):
702 if flagbits == 0:
703 flags = self.enc_byte()
704 flagbits = 8
705 flagbits -= 2
706 t = (flags >> flagbits) & 3
707 if t == 0:
708 self.put(self.enc_byte(), 0)
709 elif t == 1:
710 self.put(self.enc_byte(), hi)
711 elif t == 2:
712 self.put(self.enc_byte(), self.enc_byte())
713 else:
714 n = self.enc_byte()
715 if n & 0x80:
716 c = self.enc_byte()
717 for i in range((n & 0x7f) + 2):
718 lo = (self.std_byte() + c) & 0xFF
719 self.put(lo, hi)
720 else:
721 for i in range(n + 2):
722 self.put(self.std_byte(), 0)
723 return self.buf.decode("utf-16le", "replace")
726 class BaseReader:
727 """Base class for 'file-like' object that RarFile.open() returns.
729 Provides public methods and common crc checking.
732 def __init__(self, rf, inf, tempfile = None):
733 self.rf = rf
734 self.inf = inf
735 self.crc_check = rf._crc_check
736 self.CRC = 0
737 self.remain = inf.file_size
738 self.tempfile = tempfile
739 self.fd = None
741 def read(self, cnt = None):
742 """Read all or specified amount of data from archive entry."""
744 # sanitize cnt
745 if cnt is None:
746 cnt = self.remain
747 elif cnt > self.remain:
748 cnt = self.remain
749 if cnt <= 0:
750 return EMPTY
752 # actual read
753 data = self._read(cnt)
754 if data:
755 self.CRC = crc32(data, self.CRC)
756 self.remain -= len(data)
758 # done?
759 if not data or self.remain == 0:
760 self.close()
761 self._check()
762 return data
764 def _check(self):
765 """Check final CRC."""
766 if not self.crc_check:
767 return
768 if self.remain != 0:
769 raise BadRarFile("Failed the read enough data")
770 crc = self.CRC
771 if crc < 0:
772 crc += (long(1) << 32)
773 if crc != self.inf.CRC:
774 raise BadRarFile("Corrupt file - CRC check failed")
776 def _read(self, cnt):
777 """Actual read that gets sanitized cnt."""
779 def close(self):
780 """Close open resources."""
782 if self.fd:
783 self.fd.close()
784 self.fd = None
785 if self.tempfile:
786 os.unlink(self.tempfile)
787 self.tempfile = None
789 def __del__(self):
790 """Hook delete to make sure tempfile is removed."""
791 self.close()
794 class PipeReader(BaseReader):
795 """Read data from pipe, handle tempfile cleanup."""
797 def __init__(self, rf, inf, proc, tempfile=None):
798 BaseReader.__init__(self, rf, inf, tempfile)
799 self.fd = proc.stdout
801 def _read(self, cnt):
802 """Read from pipe."""
803 return self.fd.read(cnt)
806 class DirectReader(BaseReader):
807 """Read uncompressed data directly from archive."""
809 def __init__(self, rf, inf):
810 BaseReader.__init__(self, rf, inf)
811 self.vol = inf.volume
812 self.size = inf.file_size
814 self.fd = open(self.rf._gen_volname(self.vol), "rb")
815 self.fd.seek(self.inf.header_offset, 0)
816 self.cur = self.rf._parse_header(self.fd)
817 self.cur_avail = self.cur.add_size
819 def _read(self, cnt):
820 """Read from potentially multi-volume archive."""
822 buf = EMPTY
823 while cnt > 0:
824 # next vol needed?
825 if self.cur_avail == 0:
826 if not self._open_next():
827 break
829 # fd is in read pos, do the read
830 if cnt > self.cur_avail:
831 data = self.fd.read(self.cur_avail)
832 else:
833 data = self.fd.read(cnt)
834 if not data:
835 break
837 # got some data
838 cnt -= len(data)
839 self.cur_avail -= len(data)
840 buf += data
842 return buf
844 def _open_next(self):
845 """Proceed to next volume."""
847 # is the file split over archives?
848 if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
849 return False
851 # open next part
852 self.vol += 1
853 fd = open(self.rf._gen_volname(self.vol), "rb")
854 self.fd = fd
856 # loop until first file header
857 while 1:
858 cur = self.rf._parse_header(fd)
859 if not cur:
860 raise BadRarFile("Unexpected EOF")
861 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
862 if cur.add_size:
863 fd.seek(cur.add_size, 1)
864 continue
865 if cur.orig_filename != self.inf.orig_filename:
866 raise BadRarFile("Did not found file entry")
867 self.cur = cur
868 self.cur_avail = cur.add_size
869 return True
871 # see if compat bytearray() is needed
872 try:
873 bytearray()
874 except NameError:
875 import array
876 class bytearray:
877 def __init__(self, val = ''):
878 self.arr = array.array('B', val)
879 self.append = self.arr.append
880 self.__getitem__ = self.arr.__getitem__
881 self.__len__ = self.arr.__len__
882 def decode(self, *args):
883 return self.arr.tostring().decode(*args)