Draft news for next release.
[rarfile.git] / rarfile.py
blob14de90cd38433e05b1225412af1b910a885c72d7
1 # rarfile.py
3 # Copyright (c) 2005-2013 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
46 for ln in f:
47 print(ln.strip())
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
52 import rarfile
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to 0 if you don't look at comments and want to
58 # avoid wasting time for parsing them
59 rarfile.NEED_COMMENTS = 1
61 # Set up to 1 if you don't want to deal with decoding comments
62 # from unknown encoding. rarfile will try couple of common
63 # encodings in sequence.
64 rarfile.UNICODE_COMMENTS = 0
66 # Set to 1 if you prefer timestamps to be datetime objects
67 # instead tuples
68 rarfile.USE_DATETIME = 0
70 # Set to '/' to be more compatible with zipfile
71 rarfile.PATH_SEP = '\\'
73 For more details, refer to source.
75 """
77 __version__ = '2.6'
79 # export only interesting items
80 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
83 ## Imports and compat - support both Python 2.x and 3.x
86 import sys, os, struct, errno
87 from struct import pack, unpack
88 from binascii import crc32
89 from tempfile import mkstemp
90 from subprocess import Popen, PIPE, STDOUT, CalledProcessError
91 from datetime import datetime
93 # only needed for encryped headers
94 try:
95 from Crypto.Cipher import AES
96 try:
97 from hashlib import sha1
98 except ImportError:
99 from sha import new as sha1
100 _have_crypto = 1
101 except ImportError:
102 _have_crypto = 0
104 # compat with 2.x
105 if sys.hexversion < 0x3000000:
106 # prefer 3.x behaviour
107 range = xrange
108 # py2.6 has broken bytes()
109 def bytes(s, enc):
110 return str(s)
111 else:
112 unicode = str
114 # see if compat bytearray() is needed
115 try:
116 bytearray
117 except NameError:
118 import array
119 class bytearray:
120 def __init__(self, val = ''):
121 self.arr = array.array('B', val)
122 self.append = self.arr.append
123 self.__getitem__ = self.arr.__getitem__
124 self.__len__ = self.arr.__len__
125 def decode(self, *args):
126 return self.arr.tostring().decode(*args)
128 # Optimized .readinto() requires memoryview
129 try:
130 memoryview
131 have_memoryview = 1
132 except NameError:
133 have_memoryview = 0
135 # Struct() for older python
136 try:
137 from struct import Struct
138 except ImportError:
139 class Struct:
140 def __init__(self, fmt):
141 self.format = fmt
142 self.size = struct.calcsize(fmt)
143 def unpack(self, buf):
144 return unpack(self.format, buf)
145 def unpack_from(self, buf, ofs = 0):
146 return unpack(self.format, buf[ofs : ofs + self.size])
147 def pack(self, *args):
148 return pack(self.format, *args)
150 # file object superclass
151 try:
152 from io import RawIOBase
153 except ImportError:
154 class RawIOBase(object):
155 def close(self):
156 pass
160 ## Module configuration. Can be tuned after importing.
163 #: default fallback charset
164 DEFAULT_CHARSET = "windows-1252"
166 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
167 TRY_ENCODINGS = ('utf8', 'utf-16le')
169 #: 'unrar', 'rar' or full path to either one
170 UNRAR_TOOL = "unrar"
172 #: Command line args to use for opening file for reading.
173 OPEN_ARGS = ('p', '-inul')
175 #: Command line args to use for extracting file to disk.
176 EXTRACT_ARGS = ('x', '-y', '-idq')
178 #: args for testrar()
179 TEST_ARGS = ('t', '-idq')
181 CHECK_ARGS = ('-h',)
184 # Allow use of tool that is not compatible with unrar.
186 # By default use 'bsdtar' which is 'tar' program that
187 # sits on top of libarchive.
189 # Problems with libarchive RAR backend:
190 # - Does not support solid archives.
191 # - Does not support password-protected archives.
194 ALT_TOOL = 'bsdtar'
195 ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f')
196 ALT_EXTRACT_ARGS = ('-x', '-f')
197 ALT_TEST_ARGS = ('-t', '-f')
198 ALT_CHECK_ARGS = ('--help',)
200 #: whether to speed up decompression by using tmp archive
201 USE_EXTRACT_HACK = 1
203 #: limit the filesize for tmp archive usage
204 HACK_SIZE_LIMIT = 20*1024*1024
206 #: whether to parse file/archive comments.
207 NEED_COMMENTS = 1
209 #: whether to convert comments to unicode strings
210 UNICODE_COMMENTS = 0
212 #: Convert RAR time tuple into datetime() object
213 USE_DATETIME = 0
215 #: Separator for path name components. RAR internally uses '\\'.
216 #: Use '/' to be similar with zipfile.
217 PATH_SEP = '\\'
220 ## rar constants
223 # block types
224 RAR_BLOCK_MARK = 0x72 # r
225 RAR_BLOCK_MAIN = 0x73 # s
226 RAR_BLOCK_FILE = 0x74 # t
227 RAR_BLOCK_OLD_COMMENT = 0x75 # u
228 RAR_BLOCK_OLD_EXTRA = 0x76 # v
229 RAR_BLOCK_OLD_SUB = 0x77 # w
230 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
231 RAR_BLOCK_OLD_AUTH = 0x79 # y
232 RAR_BLOCK_SUB = 0x7a # z
233 RAR_BLOCK_ENDARC = 0x7b # {
235 # flags for RAR_BLOCK_MAIN
236 RAR_MAIN_VOLUME = 0x0001
237 RAR_MAIN_COMMENT = 0x0002
238 RAR_MAIN_LOCK = 0x0004
239 RAR_MAIN_SOLID = 0x0008
240 RAR_MAIN_NEWNUMBERING = 0x0010
241 RAR_MAIN_AUTH = 0x0020
242 RAR_MAIN_RECOVERY = 0x0040
243 RAR_MAIN_PASSWORD = 0x0080
244 RAR_MAIN_FIRSTVOLUME = 0x0100
245 RAR_MAIN_ENCRYPTVER = 0x0200
247 # flags for RAR_BLOCK_FILE
248 RAR_FILE_SPLIT_BEFORE = 0x0001
249 RAR_FILE_SPLIT_AFTER = 0x0002
250 RAR_FILE_PASSWORD = 0x0004
251 RAR_FILE_COMMENT = 0x0008
252 RAR_FILE_SOLID = 0x0010
253 RAR_FILE_DICTMASK = 0x00e0
254 RAR_FILE_DICT64 = 0x0000
255 RAR_FILE_DICT128 = 0x0020
256 RAR_FILE_DICT256 = 0x0040
257 RAR_FILE_DICT512 = 0x0060
258 RAR_FILE_DICT1024 = 0x0080
259 RAR_FILE_DICT2048 = 0x00a0
260 RAR_FILE_DICT4096 = 0x00c0
261 RAR_FILE_DIRECTORY = 0x00e0
262 RAR_FILE_LARGE = 0x0100
263 RAR_FILE_UNICODE = 0x0200
264 RAR_FILE_SALT = 0x0400
265 RAR_FILE_VERSION = 0x0800
266 RAR_FILE_EXTTIME = 0x1000
267 RAR_FILE_EXTFLAGS = 0x2000
269 # flags for RAR_BLOCK_ENDARC
270 RAR_ENDARC_NEXT_VOLUME = 0x0001
271 RAR_ENDARC_DATACRC = 0x0002
272 RAR_ENDARC_REVSPACE = 0x0004
273 RAR_ENDARC_VOLNR = 0x0008
275 # flags common to all blocks
276 RAR_SKIP_IF_UNKNOWN = 0x4000
277 RAR_LONG_BLOCK = 0x8000
279 # Host OS types
280 RAR_OS_MSDOS = 0
281 RAR_OS_OS2 = 1
282 RAR_OS_WIN32 = 2
283 RAR_OS_UNIX = 3
284 RAR_OS_MACOS = 4
285 RAR_OS_BEOS = 5
287 # Compression methods - '0'..'5'
288 RAR_M0 = 0x30
289 RAR_M1 = 0x31
290 RAR_M2 = 0x32
291 RAR_M3 = 0x33
292 RAR_M4 = 0x34
293 RAR_M5 = 0x35
296 ## internal constants
299 RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii')
300 ZERO = bytes("\0", 'ascii')
301 EMPTY = bytes("", 'ascii')
303 S_BLK_HDR = Struct('<HBHH')
304 S_FILE_HDR = Struct('<LLBLLBBHL')
305 S_LONG = Struct('<L')
306 S_SHORT = Struct('<H')
307 S_BYTE = Struct('<B')
308 S_COMMENT_HDR = Struct('<HBBH')
311 ## Public interface
314 class Error(Exception):
315 """Base class for rarfile errors."""
316 class BadRarFile(Error):
317 """Incorrect data in archive."""
318 class NotRarFile(Error):
319 """The file is not RAR archive."""
320 class BadRarName(Error):
321 """Cannot guess multipart name components."""
322 class NoRarEntry(Error):
323 """File not found in RAR"""
324 class PasswordRequired(Error):
325 """File requires password"""
326 class NeedFirstVolume(Error):
327 """Need to start from first volume."""
328 class NoCrypto(Error):
329 """Cannot parse encrypted headers - no crypto available."""
330 class RarExecError(Error):
331 """Problem reported by unrar/rar."""
332 class RarWarning(RarExecError):
333 """Non-fatal error"""
334 class RarFatalError(RarExecError):
335 """Fatal error"""
336 class RarCRCError(RarExecError):
337 """CRC error during unpacking"""
338 class RarLockedArchiveError(RarExecError):
339 """Must not modify locked archive"""
340 class RarWriteError(RarExecError):
341 """Write error"""
342 class RarOpenError(RarExecError):
343 """Open error"""
344 class RarUserError(RarExecError):
345 """User error"""
346 class RarMemoryError(RarExecError):
347 """Memory error"""
348 class RarCreateError(RarExecError):
349 """Create error"""
350 class RarNoFilesError(RarExecError):
351 """No files that match pattern were found"""
352 class RarUserBreak(RarExecError):
353 """User stop"""
354 class RarUnknownError(RarExecError):
355 """Unknown exit code"""
356 class RarSignalExit(RarExecError):
357 """Unrar exited with signal"""
360 def is_rarfile(xfile):
361 '''Check quickly whether file is rar archive.'''
362 fd = XFile(xfile)
363 buf = fd.read(len(RAR_ID))
364 fd.close()
365 return buf == RAR_ID
368 class RarInfo(object):
369 r'''An entry in rar archive.
371 :mod:`zipfile`-compatible fields:
373 filename
374 File name with relative path.
375 Default path separator is '\\', to change set rarfile.PATH_SEP.
376 Always unicode string.
377 date_time
378 Modification time, tuple of (year, month, day, hour, minute, second).
379 Or datetime() object if USE_DATETIME is set.
380 file_size
381 Uncompressed size.
382 compress_size
383 Compressed size.
385 CRC-32 of uncompressed file, unsigned int.
386 comment
387 File comment. Byte string or None. Use UNICODE_COMMENTS
388 to get automatic decoding to unicode.
389 volume
390 Volume nr, starting from 0.
392 RAR-specific fields:
394 compress_type
395 Compression method: 0x30 - 0x35.
396 extract_version
397 Minimal Rar version needed for decompressing.
398 host_os
399 Host OS type, one of RAR_OS_* constants.
400 mode
401 File attributes. May be either dos-style or unix-style, depending on host_os.
402 volume_file
403 Volume file name, where file starts.
404 mtime
405 Optional time field: Modification time, with float seconds.
406 Same as .date_time but with more precision.
407 ctime
408 Optional time field: creation time, with float seconds.
409 atime
410 Optional time field: last access time, with float seconds.
411 arctime
412 Optional time field: archival time, with float seconds.
414 Internal fields:
416 type
417 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
418 flags
419 For files, RAR_FILE_* bits.
422 __slots__ = (
423 # zipfile-compatible fields
424 'filename',
425 'file_size',
426 'compress_size',
427 'date_time',
428 'comment',
429 'CRC',
430 'volume',
431 'orig_filename', # bytes in unknown encoding
433 # rar-specific fields
434 'extract_version',
435 'compress_type',
436 'host_os',
437 'mode',
438 'type',
439 'flags',
441 # optional extended time fields
442 # tuple where the sec is float, or datetime().
443 'mtime', # same as .date_time
444 'ctime',
445 'atime',
446 'arctime',
448 # RAR internals
449 'name_size',
450 'header_size',
451 'header_crc',
452 'file_offset',
453 'add_size',
454 'header_data',
455 'header_base',
456 'header_offset',
457 'salt',
458 'volume_file',
461 def isdir(self):
462 '''Returns True if the entry is a directory.'''
463 if self.type == RAR_BLOCK_FILE:
464 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
465 return False
467 def needs_password(self):
468 return self.flags & RAR_FILE_PASSWORD
471 class RarFile(object):
472 '''Parse RAR structure, provide access to files in archive.
475 #: Archive comment. Byte string or None. Use :data:`UNICODE_COMMENTS`
476 #: to get automatic decoding to unicode.
477 comment = None
479 def __init__(self, rarfile, mode="r", charset=None, info_callback=None,
480 crc_check = True, errors = "stop"):
481 """Open and parse a RAR archive.
483 Parameters:
485 rarfile
486 archive file name
487 mode
488 only 'r' is supported.
489 charset
490 fallback charset to use, if filenames are not already Unicode-enabled.
491 info_callback
492 debug callback, gets to see all archive entries.
493 crc_check
494 set to False to disable CRC checks
495 errors
496 Either "stop" to quietly stop parsing on errors,
497 or "strict" to raise errors. Default is "stop".
499 self.rarfile = rarfile
500 self.comment = None
501 self._charset = charset or DEFAULT_CHARSET
502 self._info_callback = info_callback
504 self._info_list = []
505 self._info_map = {}
506 self._needs_password = False
507 self._password = None
508 self._crc_check = crc_check
509 self._vol_list = []
511 if errors == "stop":
512 self._strict = False
513 elif errors == "strict":
514 self._strict = True
515 else:
516 raise ValueError("Invalid value for 'errors' parameter.")
518 self._main = None
520 if mode != "r":
521 raise NotImplementedError("RarFile supports only mode=r")
523 self._parse()
525 def __enter__(self):
526 return self
528 def __exit__(self, type, value, traceback):
529 self.close()
531 def setpassword(self, password):
532 '''Sets the password to use when extracting.'''
533 self._password = password
534 if not self._main:
535 self._parse()
537 def needs_password(self):
538 '''Returns True if any archive entries require password for extraction.'''
539 return self._needs_password
541 def namelist(self):
542 '''Return list of filenames in archive.'''
543 return [f.filename for f in self._info_list]
545 def infolist(self):
546 '''Return RarInfo objects for all files/directories in archive.'''
547 return self._info_list
549 def volumelist(self):
550 '''Returns filenames of archive volumes.
552 In case of single-volume archive, the list contains
553 just the name of main archive file.
555 return self._vol_list
557 def getinfo(self, fname):
558 '''Return RarInfo for file.'''
560 if isinstance(fname, RarInfo):
561 return fname
563 # accept both ways here
564 if PATH_SEP == '/':
565 fname2 = fname.replace("\\", "/")
566 else:
567 fname2 = fname.replace("/", "\\")
569 try:
570 return self._info_map[fname]
571 except KeyError:
572 try:
573 return self._info_map[fname2]
574 except KeyError:
575 raise NoRarEntry("No such file: "+fname)
577 def open(self, fname, mode = 'r', psw = None):
578 '''Returns file-like object (:class:`RarExtFile`),
579 from where the data can be read.
581 The object implements :class:`io.RawIOBase` interface, so it can
582 be further wrapped with :class:`io.BufferedReader`
583 and :class:`io.TextIOWrapper`.
585 On older Python where io module is not available, it implements
586 only .read(), .seek(), .tell() and .close() methods.
588 The object is seekable, although the seeking is fast only on
589 uncompressed files, on compressed files the seeking is implemented
590 by reading ahead and/or restarting the decompression.
592 Parameters:
594 fname
595 file name or RarInfo instance.
596 mode
597 must be 'r'
599 password to use for extracting.
602 if mode != 'r':
603 raise NotImplementedError("RarFile.open() supports only mode=r")
605 # entry lookup
606 inf = self.getinfo(fname)
607 if inf.isdir():
608 raise TypeError("Directory does not have any data: " + inf.filename)
610 if inf.flags & RAR_FILE_SPLIT_BEFORE:
611 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
613 # check password
614 if inf.needs_password():
615 psw = psw or self._password
616 if psw is None:
617 raise PasswordRequired("File %s requires password" % inf.filename)
618 else:
619 psw = None
621 # is temp write usable?
622 use_hack = 1
623 if not self._main:
624 use_hack = 0
625 elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
626 use_hack = 0
627 elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
628 use_hack = 0
629 elif is_filelike(self.rarfile):
630 pass
631 elif inf.file_size > HACK_SIZE_LIMIT:
632 use_hack = 0
633 elif not USE_EXTRACT_HACK:
634 use_hack = 0
636 # now extract
637 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0:
638 return self._open_clear(inf)
639 elif use_hack:
640 return self._open_hack(inf, psw)
641 else:
642 return self._open_unrar(self.rarfile, inf, psw)
644 def read(self, fname, psw = None):
645 """Return uncompressed data for archive entry.
647 For longer files using :meth:`RarFile.open` may be better idea.
649 Parameters:
651 fname
652 filename or RarInfo instance
654 password to use for extracting.
657 f = self.open(fname, 'r', psw)
658 try:
659 return f.read()
660 finally:
661 f.close()
663 def close(self):
664 """Release open resources."""
665 pass
667 def printdir(self):
668 """Print archive file list to stdout."""
669 for f in self._info_list:
670 print(f.filename)
672 def extract(self, member, path=None, pwd=None):
673 """Extract single file into current directory.
675 Parameters:
677 member
678 filename or :class:`RarInfo` instance
679 path
680 optional destination path
682 optional password to use
684 if isinstance(member, RarInfo):
685 fname = member.filename
686 else:
687 fname = member
688 self._extract([fname], path, pwd)
690 def extractall(self, path=None, members=None, pwd=None):
691 """Extract all files into current directory.
693 Parameters:
695 path
696 optional destination path
697 members
698 optional filename or :class:`RarInfo` instance list to extract
700 optional password to use
702 fnlist = []
703 if members is not None:
704 for m in members:
705 if isinstance(m, RarInfo):
706 fnlist.append(m.filename)
707 else:
708 fnlist.append(m)
709 self._extract(fnlist, path, pwd)
711 def testrar(self):
712 """Let 'unrar' test the archive.
714 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
715 add_password_arg(cmd, self._password)
716 cmd.append(self.rarfile)
717 p = custom_popen(cmd)
718 output = p.communicate()[0]
719 check_returncode(p, output)
721 def strerror(self):
722 """Return error string if parsing failed,
723 or None if no problems.
725 return self._parse_error
728 ## private methods
731 def _set_error(self, msg, *args):
732 if args:
733 msg = msg % args
734 self._parse_error = msg
735 if self._strict:
736 raise BadRarFile(msg)
738 # store entry
739 def _process_entry(self, item):
740 if item.type == RAR_BLOCK_FILE:
741 # use only first part
742 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
743 self._info_map[item.filename] = item
744 self._info_list.append(item)
745 # remember if any items require password
746 if item.needs_password():
747 self._needs_password = True
748 elif len(self._info_list) > 0:
749 # final crc is in last block
750 old = self._info_list[-1]
751 old.CRC = item.CRC
752 old.compress_size += item.compress_size
754 # parse new-style comment
755 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
756 if not NEED_COMMENTS:
757 pass
758 elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
759 pass
760 elif item.flags & RAR_FILE_SOLID:
761 # file comment
762 cmt = self._read_comment_v3(item, self._password)
763 if len(self._info_list) > 0:
764 old = self._info_list[-1]
765 old.comment = cmt
766 else:
767 # archive comment
768 cmt = self._read_comment_v3(item, self._password)
769 self.comment = cmt
771 if self._info_callback:
772 self._info_callback(item)
774 # read rar
775 def _parse(self):
776 self._fd = None
777 try:
778 self._parse_real()
779 finally:
780 if self._fd:
781 self._fd.close()
782 self._fd = None
784 def _parse_real(self):
785 fd = XFile(self.rarfile)
786 self._fd = fd
787 id = fd.read(len(RAR_ID))
788 if id != RAR_ID:
789 raise NotRarFile("Not a Rar archive: "+self.rarfile)
791 volume = 0 # first vol (.rar) is 0
792 more_vols = 0
793 endarc = 0
794 volfile = self.rarfile
795 self._vol_list = [self.rarfile]
796 while 1:
797 if endarc:
798 h = None # don't read past ENDARC
799 else:
800 h = self._parse_header(fd)
801 if not h:
802 if more_vols:
803 volume += 1
804 fd.close()
805 try:
806 volfile = self._next_volname(volfile)
807 fd = XFile(volfile)
808 except IOError:
809 self._set_error("Cannot open next volume: %s", volfile)
810 break
811 self._fd = fd
812 more_vols = 0
813 endarc = 0
814 self._vol_list.append(volfile)
815 continue
816 break
817 h.volume = volume
818 h.volume_file = volfile
820 if h.type == RAR_BLOCK_MAIN and not self._main:
821 self._main = h
822 if h.flags & RAR_MAIN_NEWNUMBERING:
823 # RAR 2.x does not set FIRSTVOLUME,
824 # so check it only if NEWNUMBERING is used
825 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
826 raise NeedFirstVolume("Need to start from first volume")
827 if h.flags & RAR_MAIN_PASSWORD:
828 self._needs_password = True
829 if not self._password:
830 self._main = None
831 break
832 elif h.type == RAR_BLOCK_ENDARC:
833 more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME
834 endarc = 1
835 elif h.type == RAR_BLOCK_FILE:
836 # RAR 2.x does not write RAR_BLOCK_ENDARC
837 if h.flags & RAR_FILE_SPLIT_AFTER:
838 more_vols = 1
839 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
840 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
841 raise NeedFirstVolume("Need to start from first volume")
843 # store it
844 self._process_entry(h)
846 # go to next header
847 if h.add_size > 0:
848 fd.seek(h.file_offset + h.add_size, 0)
850 # AES encrypted headers
851 _last_aes_key = (None, None, None) # (salt, key, iv)
852 def _decrypt_header(self, fd):
853 if not _have_crypto:
854 raise NoCrypto('Cannot parse encrypted headers - no crypto')
855 salt = fd.read(8)
856 if self._last_aes_key[0] == salt:
857 key, iv = self._last_aes_key[1:]
858 else:
859 key, iv = rar3_s2k(self._password, salt)
860 self._last_aes_key = (salt, key, iv)
861 return HeaderDecrypt(fd, key, iv)
863 # read single header
864 def _parse_header(self, fd):
865 try:
866 # handle encrypted headers
867 if self._main and self._main.flags & RAR_MAIN_PASSWORD:
868 if not self._password:
869 return
870 fd = self._decrypt_header(fd)
872 # now read actual header
873 return self._parse_block_header(fd)
874 except struct.error:
875 self._set_error('Broken header in RAR file')
876 return None
878 # common header
879 def _parse_block_header(self, fd):
880 h = RarInfo()
881 h.header_offset = fd.tell()
882 h.comment = None
884 # read and parse base header
885 buf = fd.read(S_BLK_HDR.size)
886 if not buf:
887 return None
888 t = S_BLK_HDR.unpack_from(buf)
889 h.header_crc, h.type, h.flags, h.header_size = t
890 h.header_base = S_BLK_HDR.size
891 pos = S_BLK_HDR.size
893 # read full header
894 if h.header_size > S_BLK_HDR.size:
895 h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size)
896 else:
897 h.header_data = buf
898 h.file_offset = fd.tell()
900 # unexpected EOF?
901 if len(h.header_data) != h.header_size:
902 self._set_error('Unexpected EOF when reading header')
903 return None
905 # block has data assiciated with it?
906 if h.flags & RAR_LONG_BLOCK:
907 h.add_size = S_LONG.unpack_from(h.header_data, pos)[0]
908 else:
909 h.add_size = 0
911 # parse interesting ones, decide header boundaries for crc
912 if h.type == RAR_BLOCK_MARK:
913 return h
914 elif h.type == RAR_BLOCK_MAIN:
915 h.header_base += 6
916 if h.flags & RAR_MAIN_ENCRYPTVER:
917 h.header_base += 1
918 if h.flags & RAR_MAIN_COMMENT:
919 self._parse_subblocks(h, h.header_base)
920 self.comment = h.comment
921 elif h.type == RAR_BLOCK_FILE:
922 self._parse_file_header(h, pos)
923 elif h.type == RAR_BLOCK_SUB:
924 self._parse_file_header(h, pos)
925 h.header_base = h.header_size
926 elif h.type == RAR_BLOCK_OLD_AUTH:
927 h.header_base += 8
928 elif h.type == RAR_BLOCK_OLD_EXTRA:
929 h.header_base += 7
930 else:
931 h.header_base = h.header_size
933 # check crc
934 if h.type == RAR_BLOCK_OLD_SUB:
935 crcdat = h.header_data[2:] + fd.read(h.add_size)
936 else:
937 crcdat = h.header_data[2:h.header_base]
939 calc_crc = crc32(crcdat) & 0xFFFF
941 # return good header
942 if h.header_crc == calc_crc:
943 return h
945 # header parsing failed.
946 self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)',
947 h.type, h.header_crc, calc_crc, len(crcdat))
949 # instead panicing, send eof
950 return None
952 # read file-specific header
953 def _parse_file_header(self, h, pos):
954 fld = S_FILE_HDR.unpack_from(h.header_data, pos)
955 h.compress_size = fld[0]
956 h.file_size = fld[1]
957 h.host_os = fld[2]
958 h.CRC = fld[3]
959 h.date_time = parse_dos_time(fld[4])
960 h.extract_version = fld[5]
961 h.compress_type = fld[6]
962 h.name_size = fld[7]
963 h.mode = fld[8]
964 pos += S_FILE_HDR.size
966 if h.flags & RAR_FILE_LARGE:
967 h1 = S_LONG.unpack_from(h.header_data, pos)[0]
968 h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0]
969 h.compress_size |= h1 << 32
970 h.file_size |= h2 << 32
971 pos += 8
972 h.add_size = h.compress_size
974 name = h.header_data[pos : pos + h.name_size ]
975 pos += h.name_size
976 if h.flags & RAR_FILE_UNICODE:
977 nul = name.find(ZERO)
978 h.orig_filename = name[:nul]
979 u = UnicodeFilename(h.orig_filename, name[nul + 1 : ])
980 h.filename = u.decode()
982 # if parsing failed fall back to simple name
983 if u.failed:
984 h.filename = self._decode(h.orig_filename)
985 else:
986 h.orig_filename = name
987 h.filename = self._decode(name)
989 # change separator, if requested
990 if PATH_SEP != '\\':
991 h.filename = h.filename.replace('\\', PATH_SEP)
993 if h.flags & RAR_FILE_SALT:
994 h.salt = h.header_data[pos : pos + 8]
995 pos += 8
996 else:
997 h.salt = None
999 # optional extended time stamps
1000 if h.flags & RAR_FILE_EXTTIME:
1001 pos = self._parse_ext_time(h, pos)
1002 else:
1003 h.mtime = h.atime = h.ctime = h.arctime = None
1005 # base header end
1006 h.header_base = pos
1008 if h.flags & RAR_FILE_COMMENT:
1009 self._parse_subblocks(h, pos)
1011 # convert timestamps
1012 if USE_DATETIME:
1013 h.date_time = to_datetime(h.date_time)
1014 h.mtime = to_datetime(h.mtime)
1015 h.atime = to_datetime(h.atime)
1016 h.ctime = to_datetime(h.ctime)
1017 h.arctime = to_datetime(h.arctime)
1019 # .mtime is .date_time with more precision
1020 if h.mtime:
1021 if USE_DATETIME:
1022 h.date_time = h.mtime
1023 else:
1024 # keep seconds int
1025 h.date_time = h.mtime[:5] + (int(h.mtime[5]),)
1027 return pos
1029 # find old-style comment subblock
1030 def _parse_subblocks(self, h, pos):
1031 hdata = h.header_data
1032 while pos < len(hdata):
1033 # ordinary block header
1034 t = S_BLK_HDR.unpack_from(hdata, pos)
1035 scrc, stype, sflags, slen = t
1036 pos_next = pos + slen
1037 pos += S_BLK_HDR.size
1039 # corrupt header
1040 if pos_next < pos:
1041 break
1043 # followed by block-specific header
1044 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1045 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1046 pos += S_COMMENT_HDR.size
1047 data = hdata[pos : pos_next]
1048 cmt = rar_decompress(ver, meth, data, declen, sflags,
1049 crc, self._password)
1050 if not self._crc_check:
1051 h.comment = self._decode_comment(cmt)
1052 elif crc32(cmt) & 0xFFFF == crc:
1053 h.comment = self._decode_comment(cmt)
1055 pos = pos_next
1057 def _parse_ext_time(self, h, pos):
1058 data = h.header_data
1060 # flags and rest of data can be missing
1061 flags = 0
1062 if pos + 2 <= len(data):
1063 flags = S_SHORT.unpack_from(data, pos)[0]
1064 pos += 2
1066 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time)
1067 h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos)
1068 h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos)
1069 h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos)
1070 return pos
1072 def _parse_xtime(self, flag, data, pos, dostime = None):
1073 unit = 10000000.0 # 100 ns units
1074 if flag & 8:
1075 if not dostime:
1076 t = S_LONG.unpack_from(data, pos)[0]
1077 dostime = parse_dos_time(t)
1078 pos += 4
1079 rem = 0
1080 cnt = flag & 3
1081 for i in range(cnt):
1082 b = S_BYTE.unpack_from(data, pos)[0]
1083 rem = (b << 16) | (rem >> 8)
1084 pos += 1
1085 sec = dostime[5] + rem / unit
1086 if flag & 4:
1087 sec += 1
1088 dostime = dostime[:5] + (sec,)
1089 return dostime, pos
1091 # given current vol name, construct next one
1092 def _next_volname(self, volfile):
1093 if is_filelike(volfile):
1094 raise IOError("Working on single FD")
1095 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1096 return self._next_newvol(volfile)
1097 return self._next_oldvol(volfile)
1099 # new-style next volume
1100 def _next_newvol(self, volfile):
1101 i = len(volfile) - 1
1102 while i >= 0:
1103 if volfile[i] >= '0' and volfile[i] <= '9':
1104 return self._inc_volname(volfile, i)
1105 i -= 1
1106 raise BadRarName("Cannot construct volume name: "+volfile)
1108 # old-style next volume
1109 def _next_oldvol(self, volfile):
1110 # rar -> r00
1111 if volfile[-4:].lower() == '.rar':
1112 return volfile[:-2] + '00'
1113 return self._inc_volname(volfile, len(volfile) - 1)
1115 # increase digits with carry, otherwise just increment char
1116 def _inc_volname(self, volfile, i):
1117 fn = list(volfile)
1118 while i >= 0:
1119 if fn[i] != '9':
1120 fn[i] = chr(ord(fn[i]) + 1)
1121 break
1122 fn[i] = '0'
1123 i -= 1
1124 return ''.join(fn)
1126 def _open_clear(self, inf):
1127 return DirectReader(self, inf)
1129 # put file compressed data into temporary .rar archive, and run
1130 # unrar on that, thus avoiding unrar going over whole archive
1131 def _open_hack(self, inf, psw = None):
1132 BSIZE = 32*1024
1134 size = inf.compress_size + inf.header_size
1135 rf = XFile(inf.volume_file, 0)
1136 rf.seek(inf.header_offset)
1138 tmpfd, tmpname = mkstemp(suffix='.rar')
1139 tmpf = os.fdopen(tmpfd, "wb")
1141 try:
1142 # create main header: crc, type, flags, size, res1, res2
1143 mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4)
1144 tmpf.write(RAR_ID + mh)
1145 while size > 0:
1146 if size > BSIZE:
1147 buf = rf.read(BSIZE)
1148 else:
1149 buf = rf.read(size)
1150 if not buf:
1151 raise BadRarFile('read failed: ' + inf.filename)
1152 tmpf.write(buf)
1153 size -= len(buf)
1154 tmpf.close()
1155 rf.close()
1156 except:
1157 rf.close()
1158 tmpf.close()
1159 os.unlink(tmpname)
1160 raise
1162 return self._open_unrar(tmpname, inf, psw, tmpname)
1164 def _read_comment_v3(self, inf, psw=None):
1166 # read data
1167 rf = XFile(inf.volume_file)
1168 rf.seek(inf.file_offset)
1169 data = rf.read(inf.compress_size)
1170 rf.close()
1172 # decompress
1173 cmt = rar_decompress(inf.extract_version, inf.compress_type, data,
1174 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1176 # check crc
1177 if self._crc_check:
1178 crc = crc32(cmt)
1179 if crc < 0:
1180 crc += (long(1) << 32)
1181 if crc != inf.CRC:
1182 return None
1184 return self._decode_comment(cmt)
1186 # extract using unrar
1187 def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None):
1188 if is_filelike(rarfile):
1189 raise ValueError("Cannot use unrar directly on memory buffer")
1190 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1191 add_password_arg(cmd, psw)
1192 cmd.append(rarfile)
1194 # not giving filename avoids encoding related problems
1195 if not tmpfile:
1196 fn = inf.filename
1197 if PATH_SEP != os.sep:
1198 fn = fn.replace(PATH_SEP, os.sep)
1199 cmd.append(fn)
1201 # read from unrar pipe
1202 return PipeReader(self, inf, cmd, tmpfile)
1204 def _decode(self, val):
1205 for c in TRY_ENCODINGS:
1206 try:
1207 return val.decode(c)
1208 except UnicodeError:
1209 pass
1210 return val.decode(self._charset, 'replace')
1212 def _decode_comment(self, val):
1213 if UNICODE_COMMENTS:
1214 return self._decode(val)
1215 return val
1217 # call unrar to extract a file
1218 def _extract(self, fnlist, path=None, psw=None):
1219 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
1221 # pasoword
1222 psw = psw or self._password
1223 add_password_arg(cmd, psw)
1225 # rar file
1226 cmd.append(self.rarfile)
1228 # file list
1229 for fn in fnlist:
1230 if os.sep != PATH_SEP:
1231 fn = fn.replace(PATH_SEP, os.sep)
1232 cmd.append(fn)
1234 # destination path
1235 if path is not None:
1236 cmd.append(path + os.sep)
1238 # call
1239 p = custom_popen(cmd)
1240 output = p.communicate()[0]
1241 check_returncode(p, output)
1244 ## Utility classes
1247 class UnicodeFilename:
1248 """Handle unicode filename decompression"""
1250 def __init__(self, name, encdata):
1251 self.std_name = bytearray(name)
1252 self.encdata = bytearray(encdata)
1253 self.pos = self.encpos = 0
1254 self.buf = bytearray()
1255 self.failed = 0
1257 def enc_byte(self):
1258 try:
1259 c = self.encdata[self.encpos]
1260 self.encpos += 1
1261 return c
1262 except IndexError:
1263 self.failed = 1
1264 return 0
1266 def std_byte(self):
1267 try:
1268 return self.std_name[self.pos]
1269 except IndexError:
1270 self.failed = 1
1271 return ord('?')
1273 def put(self, lo, hi):
1274 self.buf.append(lo)
1275 self.buf.append(hi)
1276 self.pos += 1
1278 def decode(self):
1279 hi = self.enc_byte()
1280 flagbits = 0
1281 while self.encpos < len(self.encdata):
1282 if flagbits == 0:
1283 flags = self.enc_byte()
1284 flagbits = 8
1285 flagbits -= 2
1286 t = (flags >> flagbits) & 3
1287 if t == 0:
1288 self.put(self.enc_byte(), 0)
1289 elif t == 1:
1290 self.put(self.enc_byte(), hi)
1291 elif t == 2:
1292 self.put(self.enc_byte(), self.enc_byte())
1293 else:
1294 n = self.enc_byte()
1295 if n & 0x80:
1296 c = self.enc_byte()
1297 for i in range((n & 0x7f) + 2):
1298 lo = (self.std_byte() + c) & 0xFF
1299 self.put(lo, hi)
1300 else:
1301 for i in range(n + 2):
1302 self.put(self.std_byte(), 0)
1303 return self.buf.decode("utf-16le", "replace")
1306 class RarExtFile(RawIOBase):
1307 """Base class for file-like object that :meth:`RarFile.open` returns.
1309 Provides public methods and common crc checking.
1311 Behaviour:
1312 - no short reads - .read() and .readinfo() read as much as requested.
1313 - no internal buffer, use io.BufferedReader for that.
1315 If :mod:`io` module is available (Python 2.6+, 3.x), then this calls
1316 will inherit from :class:`io.RawIOBase` class. This makes line-based
1317 access available: :meth:`RarExtFile.readline` and ``for ln in f``.
1320 #: Filename of the archive entry
1321 name = None
1323 def __init__(self, rf, inf):
1324 RawIOBase.__init__(self)
1326 # standard io.* properties
1327 self.name = inf.filename
1328 self.mode = 'rb'
1330 self.rf = rf
1331 self.inf = inf
1332 self.crc_check = rf._crc_check
1333 self.fd = None
1334 self.CRC = 0
1335 self.remain = 0
1336 self.returncode = 0
1338 self._open()
1340 def _open(self):
1341 if self.fd:
1342 self.fd.close()
1343 self.fd = None
1344 self.CRC = 0
1345 self.remain = self.inf.file_size
1347 def read(self, cnt = None):
1348 """Read all or specified amount of data from archive entry."""
1350 # sanitize cnt
1351 if cnt is None or cnt < 0:
1352 cnt = self.remain
1353 elif cnt > self.remain:
1354 cnt = self.remain
1355 if cnt == 0:
1356 return EMPTY
1358 # actual read
1359 data = self._read(cnt)
1360 if data:
1361 self.CRC = crc32(data, self.CRC)
1362 self.remain -= len(data)
1363 if len(data) != cnt:
1364 raise BadRarFile("Failed the read enough data")
1366 # done?
1367 if not data or self.remain == 0:
1368 #self.close()
1369 self._check()
1370 return data
1372 def _check(self):
1373 """Check final CRC."""
1374 if not self.crc_check:
1375 return
1376 if self.returncode:
1377 check_returncode(self, '')
1378 if self.remain != 0:
1379 raise BadRarFile("Failed the read enough data")
1380 crc = self.CRC
1381 if crc < 0:
1382 crc += (long(1) << 32)
1383 if crc != self.inf.CRC:
1384 raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename)
1386 def _read(self, cnt):
1387 """Actual read that gets sanitized cnt."""
1389 def close(self):
1390 """Close open resources."""
1392 RawIOBase.close(self)
1394 if self.fd:
1395 self.fd.close()
1396 self.fd = None
1398 def __del__(self):
1399 """Hook delete to make sure tempfile is removed."""
1400 self.close()
1402 def readinto(self, buf):
1403 """Zero-copy read directly into buffer.
1405 Returns bytes read.
1408 data = self.read(len(buf))
1409 n = len(data)
1410 try:
1411 buf[:n] = data
1412 except TypeError:
1413 import array
1414 if not isinstance(buf, array.array):
1415 raise
1416 buf[:n] = array.array(buf.typecode, data)
1417 return n
1419 def tell(self):
1420 """Return current reading position in uncompressed data."""
1421 return self.inf.file_size - self.remain
1423 def seek(self, ofs, whence = 0):
1424 """Seek in data.
1426 On uncompressed files, the seeking works by actual
1427 seeks so it's fast. On compresses files its slow
1428 - forward seeking happends by reading ahead,
1429 backwards by re-opening and decompressing from the start.
1432 # disable crc check when seeking
1433 self.crc_check = 0
1435 fsize = self.inf.file_size
1436 cur_ofs = self.tell()
1438 if whence == 0: # seek from beginning of file
1439 new_ofs = ofs
1440 elif whence == 1: # seek from current position
1441 new_ofs = cur_ofs + ofs
1442 elif whence == 2: # seek from end of file
1443 new_ofs = fsize + ofs
1444 else:
1445 raise ValueError('Invalid value for whence')
1447 # sanity check
1448 if new_ofs < 0:
1449 new_ofs = 0
1450 elif new_ofs > fsize:
1451 new_ofs = fsize
1453 # do the actual seek
1454 if new_ofs >= cur_ofs:
1455 self._skip(new_ofs - cur_ofs)
1456 else:
1457 # process old data ?
1458 #self._skip(fsize - cur_ofs)
1459 # reopen and seek
1460 self._open()
1461 self._skip(new_ofs)
1462 return self.tell()
1464 def _skip(self, cnt):
1465 """Read and discard data"""
1466 while cnt > 0:
1467 if cnt > 8192:
1468 buf = self.read(8192)
1469 else:
1470 buf = self.read(cnt)
1471 if not buf:
1472 break
1473 cnt -= len(buf)
1475 def readable(self):
1476 """Returns True"""
1477 return True
1479 def writable(self):
1480 """Returns False.
1482 Writing is not supported."""
1483 return False
1485 def seekable(self):
1486 """Returns True.
1488 Seeking is supported, although it's slow on compressed files.
1490 return True
1492 def readall(self):
1493 """Read all remaining data"""
1494 # avoid RawIOBase default impl
1495 return self.read()
1498 class PipeReader(RarExtFile):
1499 """Read data from pipe, handle tempfile cleanup."""
1501 def __init__(self, rf, inf, cmd, tempfile=None):
1502 self.cmd = cmd
1503 self.proc = None
1504 self.tempfile = tempfile
1505 RarExtFile.__init__(self, rf, inf)
1507 def _close_proc(self):
1508 if not self.proc:
1509 return
1510 if self.proc.stdout:
1511 self.proc.stdout.close()
1512 if self.proc.stdin:
1513 self.proc.stdin.close()
1514 if self.proc.stderr:
1515 self.proc.stderr.close()
1516 self.proc.wait()
1517 self.returncode = self.proc.returncode
1518 self.proc = None
1520 def _open(self):
1521 RarExtFile._open(self)
1523 # stop old process
1524 self._close_proc()
1526 # launch new process
1527 self.returncode = 0
1528 self.proc = custom_popen(self.cmd)
1529 self.fd = self.proc.stdout
1531 # avoid situation where unrar waits on stdin
1532 if self.proc.stdin:
1533 self.proc.stdin.close()
1535 def _read(self, cnt):
1536 """Read from pipe."""
1538 # normal read is usually enough
1539 data = self.fd.read(cnt)
1540 if len(data) == cnt or not data:
1541 return data
1543 # short read, try looping
1544 buf = [data]
1545 cnt -= len(data)
1546 while cnt > 0:
1547 data = self.fd.read(cnt)
1548 if not data:
1549 break
1550 cnt -= len(data)
1551 buf.append(data)
1552 return EMPTY.join(buf)
1554 def close(self):
1555 """Close open resources."""
1557 self._close_proc()
1558 RarExtFile.close(self)
1560 if self.tempfile:
1561 try:
1562 os.unlink(self.tempfile)
1563 except OSError:
1564 pass
1565 self.tempfile = None
1567 if have_memoryview:
1568 def readinto(self, buf):
1569 """Zero-copy read directly into buffer."""
1570 cnt = len(buf)
1571 if cnt > self.remain:
1572 cnt = self.remain
1573 vbuf = memoryview(buf)
1574 res = got = 0
1575 while got < cnt:
1576 res = self.fd.readinto(vbuf[got : cnt])
1577 if not res:
1578 break
1579 if self.crc_check:
1580 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1581 self.remain -= res
1582 got += res
1583 return got
1586 class DirectReader(RarExtFile):
1587 """Read uncompressed data directly from archive."""
1589 def _open(self):
1590 RarExtFile._open(self)
1592 self.volfile = self.inf.volume_file
1593 self.fd = XFile(self.volfile, 0)
1594 self.fd.seek(self.inf.header_offset, 0)
1595 self.cur = self.rf._parse_header(self.fd)
1596 self.cur_avail = self.cur.add_size
1598 def _skip(self, cnt):
1599 """RAR Seek, skipping through rar files to get to correct position
1602 while cnt > 0:
1603 # next vol needed?
1604 if self.cur_avail == 0:
1605 if not self._open_next():
1606 break
1608 # fd is in read pos, do the read
1609 if cnt > self.cur_avail:
1610 cnt -= self.cur_avail
1611 self.remain -= self.cur_avail
1612 self.cur_avail = 0
1613 else:
1614 self.fd.seek(cnt, 1)
1615 self.cur_avail -= cnt
1616 self.remain -= cnt
1617 cnt = 0
1619 def _read(self, cnt):
1620 """Read from potentially multi-volume archive."""
1622 buf = []
1623 while cnt > 0:
1624 # next vol needed?
1625 if self.cur_avail == 0:
1626 if not self._open_next():
1627 break
1629 # fd is in read pos, do the read
1630 if cnt > self.cur_avail:
1631 data = self.fd.read(self.cur_avail)
1632 else:
1633 data = self.fd.read(cnt)
1634 if not data:
1635 break
1637 # got some data
1638 cnt -= len(data)
1639 self.cur_avail -= len(data)
1640 buf.append(data)
1642 if len(buf) == 1:
1643 return buf[0]
1644 return EMPTY.join(buf)
1646 def _open_next(self):
1647 """Proceed to next volume."""
1649 # is the file split over archives?
1650 if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
1651 return False
1653 if self.fd:
1654 self.fd.close()
1655 self.fd = None
1657 # open next part
1658 self.volfile = self.rf._next_volname(self.volfile)
1659 fd = open(self.volfile, "rb", 0)
1660 self.fd = fd
1662 # loop until first file header
1663 while 1:
1664 cur = self.rf._parse_header(fd)
1665 if not cur:
1666 raise BadRarFile("Unexpected EOF")
1667 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
1668 if cur.add_size:
1669 fd.seek(cur.add_size, 1)
1670 continue
1671 if cur.orig_filename != self.inf.orig_filename:
1672 raise BadRarFile("Did not found file entry")
1673 self.cur = cur
1674 self.cur_avail = cur.add_size
1675 return True
1677 if have_memoryview:
1678 def readinto(self, buf):
1679 """Zero-copy read directly into buffer."""
1680 got = 0
1681 vbuf = memoryview(buf)
1682 while got < len(buf):
1683 # next vol needed?
1684 if self.cur_avail == 0:
1685 if not self._open_next():
1686 break
1688 # lenght for next read
1689 cnt = len(buf) - got
1690 if cnt > self.cur_avail:
1691 cnt = self.cur_avail
1693 # read into temp view
1694 res = self.fd.readinto(vbuf[got : got + cnt])
1695 if not res:
1696 break
1697 if self.crc_check:
1698 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1699 self.cur_avail -= res
1700 self.remain -= res
1701 got += res
1702 return got
1705 class HeaderDecrypt:
1706 """File-like object that decrypts from another file"""
1707 def __init__(self, f, key, iv):
1708 self.f = f
1709 self.ciph = AES.new(key, AES.MODE_CBC, iv)
1710 self.buf = EMPTY
1712 def tell(self):
1713 return self.f.tell()
1715 def read(self, cnt=None):
1716 if cnt > 8*1024:
1717 raise BadRarFile('Bad count to header decrypt - wrong password?')
1719 # consume old data
1720 if cnt <= len(self.buf):
1721 res = self.buf[:cnt]
1722 self.buf = self.buf[cnt:]
1723 return res
1724 res = self.buf
1725 self.buf = EMPTY
1726 cnt -= len(res)
1728 # decrypt new data
1729 BLK = self.ciph.block_size
1730 while cnt > 0:
1731 enc = self.f.read(BLK)
1732 if len(enc) < BLK:
1733 break
1734 dec = self.ciph.decrypt(enc)
1735 if cnt >= len(dec):
1736 res += dec
1737 cnt -= len(dec)
1738 else:
1739 res += dec[:cnt]
1740 self.buf = dec[cnt:]
1741 cnt = 0
1743 return res
1745 # handle (filename|filelike) object
1746 class XFile(object):
1747 __slots__ = ('_fd', '_need_close')
1748 def __init__(self, xfile, bufsize = 1024):
1749 if is_filelike(xfile):
1750 self._need_close = False
1751 self._fd = xfile
1752 self._fd.seek(0)
1753 else:
1754 self._need_close = True
1755 self._fd = open(xfile, 'rb', bufsize)
1756 def read(self, n=None):
1757 return self._fd.read(n)
1758 def tell(self):
1759 return self._fd.tell()
1760 def seek(self, ofs, whence=0):
1761 return self._fd.seek(ofs, whence)
1762 def readinto(self, dst):
1763 return self._fd.readinto(dst)
1764 def close(self):
1765 if self._need_close:
1766 self._fd.close()
1767 def __enter__(self):
1768 return self
1769 def __exit__(self, typ, val, tb):
1770 self.close()
1773 ## Utility functions
1776 def is_filelike(obj):
1777 if isinstance(obj, str) or isinstance(obj, unicode):
1778 return False
1779 res = True
1780 for a in ('read', 'tell', 'seek'):
1781 res = res and hasattr(obj, a)
1782 if not res:
1783 raise ValueError("Invalid object passed as file")
1784 return True
1786 def rar3_s2k(psw, salt):
1787 """String-to-key hash for RAR3."""
1789 seed = psw.encode('utf-16le') + salt
1790 iv = EMPTY
1791 h = sha1()
1792 for i in range(16):
1793 for j in range(0x4000):
1794 cnt = S_LONG.pack(i*0x4000 + j)
1795 h.update(seed + cnt[:3])
1796 if j == 0:
1797 iv += h.digest()[19:20]
1798 key_be = h.digest()[:16]
1799 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
1800 return key_le, iv
1802 def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
1803 """Decompress blob of compressed data.
1805 Used for data with non-standard header - eg. comments.
1808 # already uncompressed?
1809 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
1810 return data
1812 # take only necessary flags
1813 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
1814 flags |= RAR_LONG_BLOCK
1816 # file header
1817 fname = bytes('data', 'ascii')
1818 date = 0
1819 mode = 0x20
1820 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
1821 date, vers, meth, len(fname), mode)
1822 fhdr += fname
1823 if flags & RAR_FILE_SALT:
1824 if not salt:
1825 return EMPTY
1826 fhdr += salt
1828 # full header
1829 hlen = S_BLK_HDR.size + len(fhdr)
1830 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
1831 hcrc = crc32(hdr[2:]) & 0xFFFF
1832 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
1834 # archive main header
1835 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4)
1837 # decompress via temp rar
1838 tmpfd, tmpname = mkstemp(suffix='.rar')
1839 tmpf = os.fdopen(tmpfd, "wb")
1840 try:
1841 tmpf.write(RAR_ID + mh + hdr + data)
1842 tmpf.close()
1844 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1845 add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD))
1846 cmd.append(tmpname)
1848 p = custom_popen(cmd)
1849 return p.communicate()[0]
1850 finally:
1851 tmpf.close()
1852 os.unlink(tmpname)
1854 def to_datetime(t):
1855 """Convert 6-part time tuple into datetime object."""
1857 if t is None:
1858 return None
1860 # extract values
1861 year, mon, day, h, m, xs = t
1862 s = int(xs)
1863 us = int(1000000 * (xs - s))
1865 # assume the values are valid
1866 try:
1867 return datetime(year, mon, day, h, m, s, us)
1868 except ValueError:
1869 pass
1871 # sanitize invalid values
1872 MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1873 if mon < 1: mon = 1
1874 if mon > 12: mon = 12
1875 if day < 1: day = 1
1876 if day > MDAY[mon]: day = MDAY[mon]
1877 if h > 23: h = 23
1878 if m > 59: m = 59
1879 if s > 59: s = 59
1880 if mon == 2 and day == 29:
1881 try:
1882 return datetime(year, mon, day, h, m, s, us)
1883 except ValueError:
1884 day = 28
1885 return datetime(year, mon, day, h, m, s, us)
1887 def parse_dos_time(stamp):
1888 """Parse standard 32-bit DOS timestamp."""
1890 sec = stamp & 0x1F; stamp = stamp >> 5
1891 min = stamp & 0x3F; stamp = stamp >> 6
1892 hr = stamp & 0x1F; stamp = stamp >> 5
1893 day = stamp & 0x1F; stamp = stamp >> 5
1894 mon = stamp & 0x0F; stamp = stamp >> 4
1895 yr = (stamp & 0x7F) + 1980
1896 return (yr, mon, day, hr, min, sec * 2)
1898 def custom_popen(cmd):
1899 """Disconnect cmd from parent fds, read only from stdout."""
1901 # needed for py2exe
1902 creationflags = 0
1903 if sys.platform == 'win32':
1904 creationflags = 0x08000000 # CREATE_NO_WINDOW
1906 # run command
1907 try:
1908 p = Popen(cmd, bufsize = 0,
1909 stdout = PIPE, stdin = PIPE, stderr = STDOUT,
1910 creationflags = creationflags)
1911 except OSError:
1912 ex = sys.exc_info()[1]
1913 if ex.errno == errno.ENOENT:
1914 raise RarExecError("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL)
1915 raise
1916 return p
1918 def custom_check(cmd):
1919 """Run command, collect output, raise error if needed."""
1920 p = custom_popen(cmd)
1921 out, err = p.communicate()
1922 if p.returncode:
1923 raise CalledProcessError(p.returncode, cmd, out)
1924 return out
1926 def add_password_arg(cmd, psw, required=False):
1927 """Append password switch to commandline."""
1928 if UNRAR_TOOL == ALT_TOOL:
1929 return
1930 if psw is not None:
1931 cmd.append('-p' + psw)
1932 else:
1933 cmd.append('-p-')
1935 def check_returncode(p, out):
1936 """Raise exception according to unrar exit code"""
1938 code = p.returncode
1939 if code == 0:
1940 return
1942 # map return code to exception class
1943 errmap = [None,
1944 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError,
1945 RarWriteError, RarOpenError, RarUserError, RarMemoryError,
1946 RarCreateError, RarNoFilesError] # codes from rar.txt
1947 if UNRAR_TOOL == ALT_TOOL:
1948 errmap = [None]
1949 if code > 0 and code < len(errmap):
1950 exc = errmap[code]
1951 elif code == 255:
1952 exc = RarUserBreak
1953 elif code < 0:
1954 exc = RarSignalExit
1955 else:
1956 exc = RarUnknownError
1958 # format message
1959 if out:
1960 msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out)
1961 else:
1962 msg = "%s [%d]" % (exc.__doc__, p.returncode)
1964 raise exc(msg)
1967 # Check if unrar works
1970 try:
1971 # does UNRAR_TOOL work?
1972 custom_check([UNRAR_TOOL] + list(CHECK_ARGS))
1973 except CalledProcessError:
1974 try:
1975 # does ALT_TOOL work?
1976 custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS))
1977 # replace config
1978 UNRAR_TOOL = ALT_TOOL
1979 OPEN_ARGS = ALT_OPEN_ARGS
1980 EXTRACT_ARGS = ALT_EXTRACT_ARGS
1981 TEST_ARGS = ALT_TEST_ARGS
1982 except CalledProcessError:
1983 # no usable tool, only uncompressed archives work
1984 pass