Support rar5 archive format
[rarfile.git] / rarfile.py
blobc6e0d447c589cd5b52dfceada75d13a744f311b8
1 # rarfile.py
3 # Copyright (c) 2005-2016 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
46 for ln in f:
47 print(ln.strip())
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
52 import rarfile
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to '\\' to be more compatible with old rarfile
58 rarfile.PATH_SEP = '/'
60 For more details, refer to source.
62 """
64 from __future__ import division, print_function
67 ## Imports and compat - support both Python 2.x and 3.x
70 import sys
71 import os
72 import errno
73 import struct
75 from struct import pack, unpack, Struct
76 from binascii import crc32, hexlify
77 from tempfile import mkstemp
78 from subprocess import Popen, PIPE, STDOUT
79 from io import RawIOBase
80 from hashlib import sha1, sha256
81 from hmac import HMAC
82 from datetime import datetime, timedelta, tzinfo
84 # fixed offset timezone, for UTC
85 try:
86 from datetime import timezone
87 except ImportError:
88 class timezone(tzinfo):
89 """Compat timezone."""
90 __slots__ = ('_ofs', '_name')
91 _DST = timedelta(0)
93 def __init__(self, offset, name):
94 super(timezone, self).__init__()
95 self._ofs, self._name = offset, name
97 def utcoffset(self, dt):
98 return self._ofs
100 def tzname(self, dt):
101 return self._name
103 def dst(self, dt):
104 return self._DST
106 # only needed for encryped headers
107 try:
108 try:
109 from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher
110 from cryptography.hazmat.backends import default_backend
111 from cryptography.hazmat.primitives import hashes
112 from cryptography.hazmat.primitives.kdf import pbkdf2
114 class AES_CBC_Decrypt(object):
115 """Decrypt API"""
116 def __init__(self, key, iv):
117 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
118 self.decrypt = ciph.decryptor().update
120 def pbkdf2_sha256(password, salt, iters):
121 """PBKDF2 with HMAC-SHA256"""
122 ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend())
123 return ctx.derive(password)
125 except ImportError:
126 from Crypto.Cipher import AES
127 from Crypto.Protocol import KDF
129 class AES_CBC_Decrypt(object):
130 """Decrypt API"""
131 def __init__(self, key, iv):
132 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
134 def pbkdf2_sha256(password, salt, iters):
135 """PBKDF2 with HMAC-SHA256"""
136 return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256)
138 _have_crypto = 1
139 except ImportError:
140 _have_crypto = 0
142 try:
143 from pyblake2 import blake2s
144 _have_blake2 = True
145 except ImportError:
146 _have_blake2 = False
148 # compat with 2.x
149 if sys.hexversion < 0x3000000:
150 def rar_crc32(data, prev=0):
151 """CRC32 with unsigned values.
153 if (prev > 0) and (prev & 0x80000000):
154 prev -= (1 << 32)
155 res = crc32(data, prev)
156 if res < 0:
157 res += (1 << 32)
158 return res
159 tohex = hexlify
160 _byte_code = ord
161 else: # pragma: no cover
162 def tohex(data):
163 """Return hex string."""
164 return hexlify(data).decode('ascii')
165 rar_crc32 = crc32
166 unicode = str
167 _byte_code = int # noqa
170 __version__ = '3.0'
172 # export only interesting items
173 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
176 ## Module configuration. Can be tuned after importing.
179 #: default fallback charset
180 DEFAULT_CHARSET = "windows-1252"
182 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
183 TRY_ENCODINGS = ('utf8', 'utf-16le')
185 #: 'unrar', 'rar' or full path to either one
186 UNRAR_TOOL = "unrar"
188 #: Command line args to use for opening file for reading.
189 OPEN_ARGS = ('p', '-inul')
191 #: Command line args to use for extracting file to disk.
192 EXTRACT_ARGS = ('x', '-y', '-idq')
194 #: args for testrar()
195 TEST_ARGS = ('t', '-idq')
198 # Allow use of tool that is not compatible with unrar.
200 # By default use 'bsdtar' which is 'tar' program that
201 # sits on top of libarchive.
203 # Problems with libarchive RAR backend:
204 # - Does not support solid archives.
205 # - Does not support password-protected archives.
208 ALT_TOOL = 'bsdtar'
209 ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f')
210 ALT_EXTRACT_ARGS = ('-x', '-f')
211 ALT_TEST_ARGS = ('-t', '-f')
212 ALT_CHECK_ARGS = ('--help',)
214 #: whether to speed up decompression by using tmp archive
215 USE_EXTRACT_HACK = 1
217 #: limit the filesize for tmp archive usage
218 HACK_SIZE_LIMIT = 20 * 1024 * 1024
220 #: Separator for path name components. RAR internally uses '\\'.
221 #: Use '/' to be similar with zipfile.
222 PATH_SEP = '/'
225 ## rar constants
228 # block types
229 RAR_BLOCK_MARK = 0x72 # r
230 RAR_BLOCK_MAIN = 0x73 # s
231 RAR_BLOCK_FILE = 0x74 # t
232 RAR_BLOCK_OLD_COMMENT = 0x75 # u
233 RAR_BLOCK_OLD_EXTRA = 0x76 # v
234 RAR_BLOCK_OLD_SUB = 0x77 # w
235 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
236 RAR_BLOCK_OLD_AUTH = 0x79 # y
237 RAR_BLOCK_SUB = 0x7a # z
238 RAR_BLOCK_ENDARC = 0x7b # {
240 # flags for RAR_BLOCK_MAIN
241 RAR_MAIN_VOLUME = 0x0001
242 RAR_MAIN_COMMENT = 0x0002
243 RAR_MAIN_LOCK = 0x0004
244 RAR_MAIN_SOLID = 0x0008
245 RAR_MAIN_NEWNUMBERING = 0x0010
246 RAR_MAIN_AUTH = 0x0020
247 RAR_MAIN_RECOVERY = 0x0040
248 RAR_MAIN_PASSWORD = 0x0080
249 RAR_MAIN_FIRSTVOLUME = 0x0100
250 RAR_MAIN_ENCRYPTVER = 0x0200
252 # flags for RAR_BLOCK_FILE
253 RAR_FILE_SPLIT_BEFORE = 0x0001
254 RAR_FILE_SPLIT_AFTER = 0x0002
255 RAR_FILE_PASSWORD = 0x0004
256 RAR_FILE_COMMENT = 0x0008
257 RAR_FILE_SOLID = 0x0010
258 RAR_FILE_DICTMASK = 0x00e0
259 RAR_FILE_DICT64 = 0x0000
260 RAR_FILE_DICT128 = 0x0020
261 RAR_FILE_DICT256 = 0x0040
262 RAR_FILE_DICT512 = 0x0060
263 RAR_FILE_DICT1024 = 0x0080
264 RAR_FILE_DICT2048 = 0x00a0
265 RAR_FILE_DICT4096 = 0x00c0
266 RAR_FILE_DIRECTORY = 0x00e0
267 RAR_FILE_LARGE = 0x0100
268 RAR_FILE_UNICODE = 0x0200
269 RAR_FILE_SALT = 0x0400
270 RAR_FILE_VERSION = 0x0800
271 RAR_FILE_EXTTIME = 0x1000
272 RAR_FILE_EXTFLAGS = 0x2000
274 # flags for RAR_BLOCK_ENDARC
275 RAR_ENDARC_NEXT_VOLUME = 0x0001
276 RAR_ENDARC_DATACRC = 0x0002
277 RAR_ENDARC_REVSPACE = 0x0004
278 RAR_ENDARC_VOLNR = 0x0008
280 # flags common to all blocks
281 RAR_SKIP_IF_UNKNOWN = 0x4000
282 RAR_LONG_BLOCK = 0x8000
284 # Host OS types
285 RAR_OS_MSDOS = 0
286 RAR_OS_OS2 = 1
287 RAR_OS_WIN32 = 2
288 RAR_OS_UNIX = 3
289 RAR_OS_MACOS = 4
290 RAR_OS_BEOS = 5
292 # Compression methods - '0'..'5'
293 RAR_M0 = 0x30
294 RAR_M1 = 0x31
295 RAR_M2 = 0x32
296 RAR_M3 = 0x33
297 RAR_M4 = 0x34
298 RAR_M5 = 0x35
301 # RAR5 constants
304 RAR5_BLOCK_MAIN = 1
305 RAR5_BLOCK_FILE = 2
306 RAR5_BLOCK_SERVICE = 3
307 RAR5_BLOCK_ENCRYPTION = 4
308 RAR5_BLOCK_ENDARC = 5
310 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
311 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
312 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
313 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
314 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
315 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
316 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
318 RAR5_MAIN_FLAG_ISVOL = 0x01
319 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
320 RAR5_MAIN_FLAG_SOLID = 0x04
321 RAR5_MAIN_FLAG_RECOVERY = 0x08
322 RAR5_MAIN_FLAG_LOCKED = 0x10
324 RAR5_FILE_FLAG_ISDIR = 0x01
325 RAR5_FILE_FLAG_HAS_MTIME = 0x02
326 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
327 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
329 RAR5_COMPR_SOLID = 0x40
331 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
333 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
335 RAR5_XFILE_ENCRYPTION = 1
336 RAR5_XFILE_HASH = 2
337 RAR5_XFILE_TIME = 3
338 RAR5_XFILE_VERSION = 4
339 RAR5_XFILE_REDIR = 5
340 RAR5_XFILE_OWNER = 6
341 RAR5_XFILE_SERVICE = 7
343 RAR5_XTIME_UNIXTIME = 0x01
344 RAR5_XTIME_HAS_MTIME = 0x02
345 RAR5_XTIME_HAS_CTIME = 0x04
346 RAR5_XTIME_HAS_ATIME = 0x08
348 RAR5_XENC_CIPHER_AES256 = 0
350 RAR5_XENC_CHECKVAL = 0x01
351 RAR5_XENC_TWEAKED = 0x02
353 RAR5_XHASH_BLAKE2SP = 0
355 RAR5_XREDIR_UNIX_SYMLINK = 1
356 RAR5_XREDIR_WINDOWS_SYMLINK = 2
357 RAR5_XREDIR_WINDOWS_JUNCTION = 3
358 RAR5_XREDIR_HARD_LINK = 4
359 RAR5_XREDIR_FILE_COPY = 5
361 RAR5_XREDIR_ISDIR = 0x01
363 RAR5_XOWNER_UNAME = 0x01
364 RAR5_XOWNER_GNAME = 0x02
365 RAR5_XOWNER_UID = 0x04
366 RAR5_XOWNER_GID = 0x08
368 RAR5_OS_WINDOWS = 0
369 RAR5_OS_UNIX = 1
372 ## internal constants
375 RAR_ID = b"Rar!\x1a\x07\x00"
376 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
377 ZERO = b'\0'
378 EMPTY = b''
379 UTC = timezone(timedelta(0), 'UTC')
380 BSIZE = 32 * 1024
382 def _get_rar_version(xfile):
383 '''Check quickly whether file is rar archive.
385 with XFile(xfile) as fd:
386 buf = fd.read(len(RAR5_ID))
387 if buf.startswith(RAR_ID):
388 return 3
389 elif buf.startswith(RAR5_ID):
390 return 5
391 return 0
394 ## Public interface
397 def is_rarfile(xfile):
398 '''Check quickly whether file is rar archive.
400 return _get_rar_version(xfile) > 0
402 class Error(Exception):
403 """Base class for rarfile errors."""
405 class BadRarFile(Error):
406 """Incorrect data in archive."""
408 class NotRarFile(Error):
409 """The file is not RAR archive."""
411 class BadRarName(Error):
412 """Cannot guess multipart name components."""
414 class NoRarEntry(Error):
415 """File not found in RAR"""
417 class PasswordRequired(Error):
418 """File requires password"""
420 class NeedFirstVolume(Error):
421 """Need to start from first volume."""
423 class NoCrypto(Error):
424 """Cannot parse encrypted headers - no crypto available."""
426 class RarExecError(Error):
427 """Problem reported by unrar/rar."""
429 class RarWarning(RarExecError):
430 """Non-fatal error"""
432 class RarFatalError(RarExecError):
433 """Fatal error"""
435 class RarCRCError(RarExecError):
436 """CRC error during unpacking"""
438 class RarLockedArchiveError(RarExecError):
439 """Must not modify locked archive"""
441 class RarWriteError(RarExecError):
442 """Write error"""
444 class RarOpenError(RarExecError):
445 """Open error"""
447 class RarUserError(RarExecError):
448 """User error"""
450 class RarMemoryError(RarExecError):
451 """Memory error"""
453 class RarCreateError(RarExecError):
454 """Create error"""
456 class RarNoFilesError(RarExecError):
457 """No files that match pattern were found"""
459 class RarUserBreak(RarExecError):
460 """User stop"""
462 class RarWrongPassword(RarExecError):
463 """Incorrect password"""
465 class RarUnknownError(RarExecError):
466 """Unknown exit code"""
468 class RarSignalExit(RarExecError):
469 """Unrar exited with signal"""
471 class RarCannotExec(RarExecError):
472 """Executable not found."""
475 class RarInfo(object):
476 r'''An entry in rar archive.
478 RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone.
479 RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone.
481 Attributes:
483 filename
484 File name with relative path.
485 Path separator is '/'. Always unicode string.
487 date_time
488 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
489 RAR5 allows archives where it is missing, it's None then.
491 file_size
492 Uncompressed size.
494 compress_size
495 Compressed size.
497 compress_type
498 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
500 extract_version
501 Minimal Rar version needed for decompressing. As (major*10 + minor),
502 so 2.9 is 29.
504 RAR3: 10, 20, 29
506 RAR5 does not have such field in archive, it's simply set to 50.
508 host_os
509 Host OS type, one of RAR_OS_* constants.
511 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
512 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
514 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
516 mode
517 File attributes. May be either dos-style or unix-style, depending on host_os.
519 mtime
520 File modification time. Same value as :attr:`date_time`
521 but as :class:`datetime.datetime` object with extended precision.
523 ctime
524 Optional time field: creation time. As :class:`datetime.datetime` object.
526 atime
527 Optional time field: last access time. As :class:`datetime.datetime` object.
529 arctime
530 Optional time field: archival time. As :class:`datetime.datetime` object.
531 (RAR3-only)
534 CRC-32 of uncompressed file, unsigned int.
536 RAR5: may be None.
538 blake2sp_hash
539 Blake2SP hash over decompressed data. (RAR5-only)
541 comment
542 Optional file comment field. Unicode string. (RAR3-only)
544 file_redir
545 If not None, file is link of some sort. Contains tuple of (type, flags, target).
546 (RAR5-only)
548 Type is one of constants:
550 :data:`RAR5_XREDIR_UNIX_SYMLINK`
551 unix symlink to target.
552 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
553 windows symlink to target.
554 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
555 windows junction.
556 :data:`RAR5_XREDIR_HARD_LINK`
557 hard link to target.
558 :data:`RAR5_XREDIR_FILE_COPY`
559 current file is copy of another archive entry.
561 Flags may contain :data:`RAR5_XREDIR_ISDIR` bit.
563 volume
564 Volume nr, starting from 0.
566 volume_file
567 Volume file name, where file starts.
571 # zipfile-compatible fields
572 filename = None
573 file_size = None
574 compress_size = None
575 date_time = None
576 comment = None
577 CRC = None
578 volume = None
579 orig_filename = None
581 # optional extended time fields, datetime() objects.
582 mtime = None
583 ctime = None
584 atime = None
586 extract_version = None
587 mode = None
588 host_os = None
589 compress_type = None
591 # rar3-only fields
592 comment = None
593 arctime = None
595 # rar5-only fields
596 blake2sp_hash = None
597 file_redir = None
599 # internal fields
600 flags = 0
601 type = None
603 def isdir(self):
604 """Returns True if entry is a directory.
606 if self.type == RAR_BLOCK_FILE:
607 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
608 return False
610 def needs_password(self):
611 """Returns True if data is stored password-protected.
613 if self.type == RAR_BLOCK_FILE:
614 return (self.flags & RAR_FILE_PASSWORD) > 0
615 return False
618 class RarFile(object):
619 '''Parse RAR structure, provide access to files in archive.
622 #: Archive comment. Unicode string or None.
623 comment = None
625 def __init__(self, rarfile, mode="r", charset=None, info_callback=None,
626 crc_check=True, errors="stop"):
627 """Open and parse a RAR archive.
629 Parameters:
631 rarfile
632 archive file name
633 mode
634 only 'r' is supported.
635 charset
636 fallback charset to use, if filenames are not already Unicode-enabled.
637 info_callback
638 debug callback, gets to see all archive entries.
639 crc_check
640 set to False to disable CRC checks
641 errors
642 Either "stop" to quietly stop parsing on errors,
643 or "strict" to raise errors. Default is "stop".
645 self._rarfile = rarfile
646 self._charset = charset or DEFAULT_CHARSET
647 self._info_callback = info_callback
648 self._crc_check = crc_check
649 self._password = None
650 self._file_parser = None
652 if errors == "stop":
653 self._strict = False
654 elif errors == "strict":
655 self._strict = True
656 else:
657 raise ValueError("Invalid value for 'errors' parameter.")
659 if mode != "r":
660 raise NotImplementedError("RarFile supports only mode=r")
662 self._parse()
664 def __enter__(self):
665 return self
667 def __exit__(self, typ, value, traceback):
668 self.close()
670 def setpassword(self, password):
671 '''Sets the password to use when extracting.'''
672 self._password = password
673 if self._file_parser:
674 if self._file_parser.has_header_encryption():
675 self._file_parser = None
676 if not self._file_parser:
677 self._parse()
678 else:
679 self._file_parser.setpassword(self._password)
681 def needs_password(self):
682 '''Returns True if any archive entries require password for extraction.'''
683 return self._file_parser.needs_password()
685 def namelist(self):
686 '''Return list of filenames in archive.'''
687 return [f.filename for f in self.infolist()]
689 def infolist(self):
690 '''Return RarInfo objects for all files/directories in archive.'''
691 return self._file_parser.infolist()
693 def volumelist(self):
694 '''Returns filenames of archive volumes.
696 In case of single-volume archive, the list contains
697 just the name of main archive file.
699 return self._file_parser.volumelist()
701 def getinfo(self, fname):
702 '''Return RarInfo for file.
704 return self._file_parser.getinfo(fname)
706 def open(self, fname, mode='r', psw=None):
707 '''Returns file-like object (:class:`RarExtFile`),
708 from where the data can be read.
710 The object implements :class:`io.RawIOBase` interface, so it can
711 be further wrapped with :class:`io.BufferedReader`
712 and :class:`io.TextIOWrapper`.
714 On older Python where io module is not available, it implements
715 only .read(), .seek(), .tell() and .close() methods.
717 The object is seekable, although the seeking is fast only on
718 uncompressed files, on compressed files the seeking is implemented
719 by reading ahead and/or restarting the decompression.
721 Parameters:
723 fname
724 file name or RarInfo instance.
725 mode
726 must be 'r'
728 password to use for extracting.
731 if mode != 'r':
732 raise NotImplementedError("RarFile.open() supports only mode=r")
734 # entry lookup
735 inf = self.getinfo(fname)
736 if inf.isdir():
737 raise TypeError("Directory does not have any data: " + inf.filename)
739 # check password
740 if inf.needs_password():
741 psw = psw or self._password
742 if psw is None:
743 raise PasswordRequired("File %s requires password" % inf.filename)
744 else:
745 psw = None
747 return self._file_parser.open(inf, psw)
749 def read(self, fname, psw=None):
750 """Return uncompressed data for archive entry.
752 For longer files using :meth:`RarFile.open` may be better idea.
754 Parameters:
756 fname
757 filename or RarInfo instance
759 password to use for extracting.
762 with self.open(fname, 'r', psw) as f:
763 return f.read()
765 def close(self):
766 """Release open resources."""
767 pass
769 def printdir(self):
770 """Print archive file list to stdout."""
771 for f in self.infolist():
772 print(f.filename)
774 def extract(self, member, path=None, pwd=None):
775 """Extract single file into current directory.
777 Parameters:
779 member
780 filename or :class:`RarInfo` instance
781 path
782 optional destination path
784 optional password to use
786 if isinstance(member, RarInfo):
787 fname = member.filename
788 else:
789 fname = member
790 self._extract([fname], path, pwd)
792 def extractall(self, path=None, members=None, pwd=None):
793 """Extract all files into current directory.
795 Parameters:
797 path
798 optional destination path
799 members
800 optional filename or :class:`RarInfo` instance list to extract
802 optional password to use
804 fnlist = []
805 if members is not None:
806 for m in members:
807 if isinstance(m, RarInfo):
808 fnlist.append(m.filename)
809 else:
810 fnlist.append(m)
811 self._extract(fnlist, path, pwd)
813 def testrar(self):
814 """Let 'unrar' test the archive.
816 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
817 add_password_arg(cmd, self._password)
818 cmd.append('--')
819 with XTempFile(self._rarfile) as rarfile:
820 cmd.append(rarfile)
821 p = custom_popen(cmd)
822 output = p.communicate()[0]
823 check_returncode(p, output)
825 def strerror(self):
826 """Return error string if parsing failed,
827 or None if no problems.
829 if not self._file_parser:
830 return "Not a RAR file"
831 return self._file_parser.strerror()
834 ## private methods
837 def _parse(self):
838 ver = _get_rar_version(self._rarfile)
839 if ver == 3:
840 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
841 self._charset, self._strict, self._info_callback)
842 self._file_parser = p3 # noqa
843 elif ver == 5:
844 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
845 self._charset, self._strict, self._info_callback)
846 self._file_parser = p5 # noqa
847 else:
848 raise BadRarFile("Not a RAR file")
850 self._file_parser.parse()
851 self.comment = self._file_parser.comment
853 # call unrar to extract a file
854 def _extract(self, fnlist, path=None, psw=None):
855 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
857 # pasoword
858 psw = psw or self._password
859 add_password_arg(cmd, psw)
860 cmd.append('--')
862 # rar file
863 with XTempFile(self._rarfile) as rarfn:
864 cmd.append(rarfn)
866 # file list
867 for fn in fnlist:
868 if os.sep != PATH_SEP:
869 fn = fn.replace(PATH_SEP, os.sep)
870 cmd.append(fn)
872 # destination path
873 if path is not None:
874 cmd.append(path + os.sep)
876 # call
877 p = custom_popen(cmd)
878 output = p.communicate()[0]
879 check_returncode(p, output)
882 # File format parsing
885 class CommonParser(object):
886 """Shared parser parts."""
887 _main = None
888 _hdrenc_main = None
889 _needs_password = False
890 _fd = None
891 _expect_sig = None
892 _parse_error = None
893 _password = None
894 comment = None
896 def __init__(self, rarfile, password, crc_check, charset, strict, info_cb):
897 self._rarfile = rarfile
898 self._password = password
899 self._crc_check = crc_check
900 self._charset = charset
901 self._strict = strict
902 self._info_callback = info_cb
903 self._info_list = []
904 self._info_map = {}
905 self._vol_list = []
907 def has_header_encryption(self):
908 """Returns True if headers are encrypted
910 if self._hdrenc_main:
911 return True
912 if self._main:
913 if self._main.flags & RAR_MAIN_PASSWORD:
914 return True
915 return False
917 def setpassword(self, psw):
918 """Set cached password."""
919 self._password = psw
921 def volumelist(self):
922 """Volume files"""
923 return self._vol_list
925 def needs_password(self):
926 """Is password required"""
927 return self._needs_password
929 def strerror(self):
930 """Last error"""
931 return self._parse_error
933 def infolist(self):
934 """List of RarInfo records.
936 return self._info_list
938 def getinfo(self, fname):
939 """Return RarInfo for filename
941 # accept both ways here
942 if PATH_SEP == '/':
943 fname2 = fname.replace("\\", "/")
944 else:
945 fname2 = fname.replace("/", "\\")
947 try:
948 return self._info_map[fname]
949 except KeyError:
950 try:
951 return self._info_map[fname2]
952 except KeyError:
953 raise NoRarEntry("No such file: %s" % fname)
955 # read rar
956 def parse(self):
957 """Process file."""
958 self._fd = None
959 try:
960 self._parse_real()
961 finally:
962 if self._fd:
963 self._fd.close()
964 self._fd = None
966 def _parse_real(self):
967 fd = XFile(self._rarfile)
968 self._fd = fd
969 sig = fd.read(len(self._expect_sig))
970 if sig != self._expect_sig:
971 if isinstance(self._rarfile, (str, unicode)):
972 raise NotRarFile("Not a Rar archive: {}".format(self._rarfile))
973 raise NotRarFile("Not a Rar archive")
975 volume = 0 # first vol (.rar) is 0
976 more_vols = False
977 endarc = False
978 volfile = self._rarfile
979 self._vol_list = [self._rarfile]
980 while 1:
981 if endarc:
982 h = None # don't read past ENDARC
983 else:
984 h = self._parse_header(fd)
985 if not h:
986 if more_vols:
987 volume += 1
988 fd.close()
989 try:
990 volfile = self._next_volname(volfile)
991 fd = XFile(volfile)
992 except IOError:
993 self._set_error("Cannot open next volume: %s", volfile)
994 break
995 self._fd = fd
996 sig = fd.read(len(self._expect_sig))
997 if sig != self._expect_sig:
998 self._set_error("Invalid volume sig: %s", volfile)
999 break
1000 more_vols = False
1001 endarc = False
1002 self._vol_list.append(volfile)
1003 continue
1004 break
1005 h.volume = volume
1006 h.volume_file = volfile
1008 if h.type == RAR_BLOCK_MAIN and not self._main:
1009 self._main = h
1010 if h.flags & RAR_MAIN_NEWNUMBERING:
1011 # RAR 2.x does not set FIRSTVOLUME,
1012 # so check it only if NEWNUMBERING is used
1013 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1014 raise NeedFirstVolume("Need to start from first volume")
1015 if h.flags & RAR_MAIN_PASSWORD:
1016 self._needs_password = True
1017 if not self._password:
1018 break
1019 elif h.type == RAR_BLOCK_ENDARC:
1020 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1021 endarc = True
1022 elif h.type == RAR_BLOCK_FILE:
1023 # RAR 2.x does not write RAR_BLOCK_ENDARC
1024 if h.flags & RAR_FILE_SPLIT_AFTER:
1025 more_vols = True
1026 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1027 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1028 raise NeedFirstVolume("Need to start from first volume")
1030 if h.needs_password():
1031 self._needs_password = True
1033 # store it
1034 self.process_entry(fd, h)
1036 if self._info_callback:
1037 self._info_callback(h)
1039 # go to next header
1040 if h.add_size > 0:
1041 fd.seek(h.data_offset + h.add_size, 0)
1043 def process_entry(self, fd, item):
1044 """Examine item, add into lookup cache."""
1045 raise NotImplementedError()
1047 def _decrypt_header(self, fd):
1048 raise NotImplementedError('_decrypt_header')
1050 def _parse_block_header(self, fd):
1051 raise NotImplementedError('_parse_block_header')
1053 def _open_hack(self, inf, psw):
1054 raise NotImplementedError('_open_hack')
1056 # read single header
1057 def _parse_header(self, fd):
1058 try:
1059 # handle encrypted headers
1060 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1061 if not self._password:
1062 return
1063 fd = self._decrypt_header(fd)
1065 # now read actual header
1066 return self._parse_block_header(fd)
1067 except struct.error:
1068 self._set_error('Broken header in RAR file')
1069 return None
1071 # given current vol name, construct next one
1072 def _next_volname(self, volfile):
1073 if is_filelike(volfile):
1074 raise IOError("Working on single FD")
1075 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1076 return _next_newvol(volfile)
1077 return _next_oldvol(volfile)
1079 def _set_error(self, msg, *args):
1080 if args:
1081 msg = msg % args
1082 self._parse_error = msg
1083 if self._strict:
1084 raise BadRarFile(msg)
1086 def open(self, inf, psw):
1087 """Return stream object for file data."""
1089 if inf.file_redir:
1090 # cannot leave to unrar as it expects copied file to exist
1091 if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1092 inf = self.getinfo(inf.file_redir[2])
1093 if not inf:
1094 raise BadRarFile('cannot find copied file')
1096 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1097 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
1099 # is temp write usable?
1100 use_hack = 1
1101 if not self._main:
1102 use_hack = 0
1103 elif self._main._must_disable_hack():
1104 use_hack = 0
1105 elif inf._must_disable_hack():
1106 use_hack = 0
1107 elif is_filelike(self._rarfile):
1108 pass
1109 elif inf.file_size > HACK_SIZE_LIMIT:
1110 use_hack = 0
1111 elif not USE_EXTRACT_HACK:
1112 use_hack = 0
1114 # now extract
1115 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1116 return self._open_clear(inf)
1117 elif use_hack:
1118 return self._open_hack(inf, psw)
1119 elif is_filelike(self._rarfile):
1120 return self._open_unrar_membuf(self._rarfile, inf, psw)
1121 else:
1122 return self._open_unrar(self._rarfile, inf, psw)
1124 def _open_clear(self, inf):
1125 return DirectReader(self, inf)
1127 def _open_hack_core(self, inf, psw, prefix, suffix):
1129 size = inf.compress_size + inf.header_size
1130 rf = XFile(inf.volume_file, 0)
1131 rf.seek(inf.header_offset)
1133 tmpfd, tmpname = mkstemp(suffix='.rar')
1134 tmpf = os.fdopen(tmpfd, "wb")
1136 try:
1137 tmpf.write(prefix)
1138 while size > 0:
1139 if size > BSIZE:
1140 buf = rf.read(BSIZE)
1141 else:
1142 buf = rf.read(size)
1143 if not buf:
1144 raise BadRarFile('read failed: ' + inf.filename)
1145 tmpf.write(buf)
1146 size -= len(buf)
1147 tmpf.write(suffix)
1148 tmpf.close()
1149 rf.close()
1150 except:
1151 rf.close()
1152 tmpf.close()
1153 os.unlink(tmpname)
1154 raise
1156 return self._open_unrar(tmpname, inf, psw, tmpname)
1158 # write in-memory archive to temp file - needed for solid archives
1159 def _open_unrar_membuf(self, memfile, inf, psw):
1160 tmpname = membuf_tempfile(memfile)
1161 return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True)
1163 # extract using unrar
1164 def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False):
1165 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1166 add_password_arg(cmd, psw)
1167 cmd.append("--")
1168 cmd.append(rarfile)
1170 # not giving filename avoids encoding related problems
1171 if not tmpfile or force_file:
1172 fn = inf.filename
1173 if PATH_SEP != os.sep:
1174 fn = fn.replace(PATH_SEP, os.sep)
1175 cmd.append(fn)
1177 # read from unrar pipe
1178 return PipeReader(self, inf, cmd, tmpfile)
1181 # RAR3 format
1184 class Rar3Info(RarInfo):
1185 """RAR3 specific fields."""
1186 extract_version = 15
1187 salt = None
1188 add_size = 0
1189 header_crc = None
1190 header_size = None
1191 header_offset = None
1192 data_offset = None
1193 _md_class = None
1194 _md_expect = None
1196 # make sure some rar5 fields are always present
1197 file_redir = None
1198 blake2sp_hash = None
1200 def _must_disable_hack(self):
1201 if self.type == RAR_BLOCK_FILE:
1202 if self.flags & RAR_FILE_PASSWORD:
1203 return True
1204 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1205 return True
1206 elif self.type == RAR_BLOCK_MAIN:
1207 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1208 return True
1209 return False
1212 class RAR3Parser(CommonParser):
1213 """Parse RAR3 file format.
1215 _expect_sig = RAR_ID
1216 _last_aes_key = (None, None, None) # (salt, key, iv)
1218 def _decrypt_header(self, fd):
1219 if not _have_crypto:
1220 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1221 salt = fd.read(8)
1222 if self._last_aes_key[0] == salt:
1223 key, iv = self._last_aes_key[1:]
1224 else:
1225 key, iv = rar3_s2k(self._password, salt)
1226 self._last_aes_key = (salt, key, iv)
1227 return HeaderDecrypt(fd, key, iv)
1229 # common header
1230 def _parse_block_header(self, fd):
1231 h = Rar3Info()
1232 h.header_offset = fd.tell()
1234 # read and parse base header
1235 buf = fd.read(S_BLK_HDR.size)
1236 if not buf:
1237 return None
1238 t = S_BLK_HDR.unpack_from(buf)
1239 h.header_crc, h.type, h.flags, h.header_size = t
1241 # read full header
1242 if h.header_size > S_BLK_HDR.size:
1243 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1244 else:
1245 hdata = buf
1246 h.data_offset = fd.tell()
1248 # unexpected EOF?
1249 if len(hdata) != h.header_size:
1250 self._set_error('Unexpected EOF when reading header')
1251 return None
1253 pos = S_BLK_HDR.size
1255 # block has data assiciated with it?
1256 if h.flags & RAR_LONG_BLOCK:
1257 h.add_size, pos = load_le32(hdata, pos)
1258 else:
1259 h.add_size = 0
1261 # parse interesting ones, decide header boundaries for crc
1262 if h.type == RAR_BLOCK_MARK:
1263 return h
1264 elif h.type == RAR_BLOCK_MAIN:
1265 pos += 6
1266 if h.flags & RAR_MAIN_ENCRYPTVER:
1267 pos += 1
1268 crc_pos = pos
1269 if h.flags & RAR_MAIN_COMMENT:
1270 self._parse_subblocks(h, hdata, pos)
1271 elif h.type == RAR_BLOCK_FILE:
1272 pos = self._parse_file_header(h, hdata, pos - 4)
1273 crc_pos = pos
1274 if h.flags & RAR_FILE_COMMENT:
1275 pos = self._parse_subblocks(h, hdata, pos)
1276 elif h.type == RAR_BLOCK_SUB:
1277 pos = self._parse_file_header(h, hdata, pos - 4)
1278 crc_pos = h.header_size
1279 elif h.type == RAR_BLOCK_OLD_AUTH:
1280 pos += 8
1281 crc_pos = pos
1282 elif h.type == RAR_BLOCK_OLD_EXTRA:
1283 pos += 7
1284 crc_pos = pos
1285 else:
1286 crc_pos = h.header_size
1288 # check crc
1289 if h.type == RAR_BLOCK_OLD_SUB:
1290 crcdat = hdata[2:] + fd.read(h.add_size)
1291 else:
1292 crcdat = hdata[2:crc_pos]
1294 calc_crc = rar_crc32(crcdat) & 0xFFFF
1296 # return good header
1297 if h.header_crc == calc_crc:
1298 return h
1300 # header parsing failed.
1301 self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)',
1302 h.type, h.header_crc, calc_crc, len(crcdat))
1304 # instead panicing, send eof
1305 return None
1307 # read file-specific header
1308 def _parse_file_header(self, h, hdata, pos):
1309 fld = S_FILE_HDR.unpack_from(hdata, pos)
1310 pos += S_FILE_HDR.size
1312 h.compress_size = fld[0]
1313 h.file_size = fld[1]
1314 h.host_os = fld[2]
1315 h.CRC = fld[3]
1316 h.date_time = parse_dos_time(fld[4])
1317 h.mtime = to_datetime(h.date_time)
1318 h.extract_version = fld[5]
1319 h.compress_type = fld[6]
1320 name_size = fld[7]
1321 h.mode = fld[8]
1323 h._md_class = CRC32Context
1324 h._md_expect = h.CRC
1326 if h.flags & RAR_FILE_LARGE:
1327 h1, pos = load_le32(hdata, pos)
1328 h2, pos = load_le32(hdata, pos)
1329 h.compress_size |= h1 << 32
1330 h.file_size |= h2 << 32
1331 h.add_size = h.compress_size
1333 name, pos = load_bytes(hdata, name_size, pos)
1334 if h.flags & RAR_FILE_UNICODE:
1335 nul = name.find(ZERO)
1336 h.orig_filename = name[:nul]
1337 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1338 h.filename = u.decode()
1340 # if parsing failed fall back to simple name
1341 if u.failed:
1342 h.filename = self._decode(h.orig_filename)
1343 else:
1344 h.orig_filename = name
1345 h.filename = self._decode(name)
1347 # change separator, if requested
1348 if PATH_SEP != '\\':
1349 h.filename = h.filename.replace('\\', PATH_SEP)
1351 if h.flags & RAR_FILE_SALT:
1352 h.salt, pos = load_bytes(hdata, 8, pos)
1353 else:
1354 h.salt = None
1356 # optional extended time stamps
1357 if h.flags & RAR_FILE_EXTTIME:
1358 pos = _parse_ext_time(h, hdata, pos)
1359 else:
1360 h.mtime = h.atime = h.ctime = h.arctime = None
1362 return pos
1364 # find old-style comment subblock
1365 def _parse_subblocks(self, h, hdata, pos):
1366 while pos < len(hdata):
1367 # ordinary block header
1368 t = S_BLK_HDR.unpack_from(hdata, pos)
1369 ___scrc, stype, sflags, slen = t
1370 pos_next = pos + slen
1371 pos += S_BLK_HDR.size
1373 # corrupt header
1374 if pos_next < pos:
1375 break
1377 # followed by block-specific header
1378 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1379 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1380 pos += S_COMMENT_HDR.size
1381 data = hdata[pos : pos_next]
1382 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1383 crc, self._password)
1384 if not self._crc_check:
1385 h.comment = self._decode_comment(cmt)
1386 elif rar_crc32(cmt) & 0xFFFF == crc:
1387 h.comment = self._decode_comment(cmt)
1389 pos = pos_next
1390 return pos
1392 def _read_comment_v3(self, inf, psw=None):
1394 # read data
1395 with XFile(inf.volume_file) as rf:
1396 rf.seek(inf.data_offset)
1397 data = rf.read(inf.compress_size)
1399 # decompress
1400 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1401 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1403 # check crc
1404 if self._crc_check:
1405 crc = rar_crc32(cmt)
1406 if crc != inf.CRC:
1407 return None
1409 return self._decode_comment(cmt)
1411 def _decode(self, val):
1412 for c in TRY_ENCODINGS:
1413 try:
1414 return val.decode(c)
1415 except UnicodeError:
1416 pass
1417 return val.decode(self._charset, 'replace')
1419 def _decode_comment(self, val):
1420 return self._decode(val)
1422 def process_entry(self, fd, item):
1423 if item.type == RAR_BLOCK_FILE:
1424 # use only first part
1425 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1426 self._info_map[item.filename] = item
1427 self._info_list.append(item)
1428 elif len(self._info_list) > 0:
1429 # final crc is in last block
1430 old = self._info_list[-1]
1431 old.CRC = item.CRC
1432 old._md_expect = item._md_expect
1433 old.compress_size += item.compress_size
1435 # parse new-style comment
1436 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
1437 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1438 pass
1439 elif item.flags & RAR_FILE_SOLID:
1440 # file comment
1441 cmt = self._read_comment_v3(item, self._password)
1442 if len(self._info_list) > 0:
1443 old = self._info_list[-1]
1444 old.comment = cmt
1445 else:
1446 # archive comment
1447 cmt = self._read_comment_v3(item, self._password)
1448 self.comment = cmt
1450 if item.type == RAR_BLOCK_MAIN:
1451 if item.flags & RAR_MAIN_COMMENT:
1452 self.comment = item.comment
1453 if item.flags & RAR_MAIN_PASSWORD:
1454 self._needs_password = True
1456 # put file compressed data into temporary .rar archive, and run
1457 # unrar on that, thus avoiding unrar going over whole archive
1458 def _open_hack(self, inf, psw):
1459 # create main header: crc, type, flags, size, res1, res2
1460 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4)
1461 return self._open_hack_core(inf, psw, prefix, EMPTY)
1464 # RAR5 format
1467 class Rar5Info(RarInfo):
1468 """Shared fields for RAR5 records.
1470 extract_version = 50
1471 header_crc = None
1472 header_size = None
1473 header_offset = None
1474 data_offset = None
1476 # type=all
1477 block_type = None
1478 block_flags = None
1479 add_size = 0
1480 block_extra_size = 0
1482 # type=MAIN
1483 volume_number = None
1484 _md_class = None
1485 _md_expect = None
1487 def _must_disable_hack(self):
1488 return False
1491 class Rar5BaseFile(Rar5Info):
1492 """Shared sturct for file & service record.
1494 type = -1
1495 file_flags = None
1496 file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY)
1497 file_compress_flags = None
1498 file_redir = None
1499 file_owner = None
1500 file_version = None
1501 blake2sp_hash = None
1503 def _must_disable_hack(self):
1504 if self.flags & RAR_FILE_PASSWORD:
1505 return True
1506 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1507 return True
1508 if self.file_compress_flags & RAR5_COMPR_SOLID:
1509 return True
1510 if self.file_redir:
1511 return True
1512 return False
1515 class Rar5FileInfo(Rar5BaseFile):
1516 """RAR5 file record.
1518 type = RAR_BLOCK_FILE
1521 class Rar5ServiceInfo(Rar5BaseFile):
1522 """RAR5 service record.
1524 type = RAR_BLOCK_SUB
1527 class Rar5MainInfo(Rar5Info):
1528 """RAR5 archive main record.
1530 type = RAR_BLOCK_MAIN
1531 main_flags = None
1532 main_volume_number = None
1534 def _must_disable_hack(self):
1535 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1536 return True
1537 return False
1540 class Rar5EncryptionInfo(Rar5Info):
1541 """RAR5 archive header encryption record.
1543 type = RAR5_BLOCK_ENCRYPTION
1544 encryption_algo = None
1545 encryption_flags = None
1546 encryption_kdf_count = None
1547 encryption_salt = None
1548 encryption_check_value = None
1550 def needs_password(self):
1551 return True
1554 class Rar5EndArcInfo(Rar5Info):
1555 """RAR5 end of archive record.
1557 type = RAR_BLOCK_ENDARC
1558 endarc_flags = None
1561 class RAR5Parser(CommonParser):
1562 """Parse RAR5 format.
1564 _expect_sig = RAR5_ID
1565 _hdrenc_main = None
1567 # AES encrypted headers
1568 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1570 def _gen_key(self, kdf_count, salt):
1571 if self._last_aes256_key[:2] == (kdf_count, salt):
1572 return self._last_aes256_key[2]
1573 if kdf_count > 24:
1574 raise BadRarFile('Too large kdf_count')
1575 psw = self._password
1576 if isinstance(psw, unicode):
1577 psw = psw.encode('utf8')
1578 key = pbkdf2_sha256(psw, salt, 1 << kdf_count)
1579 self._last_aes256_key = (kdf_count, salt, key)
1580 return key
1582 def _decrypt_header(self, fd):
1583 if not _have_crypto:
1584 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1585 h = self._hdrenc_main
1586 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1587 iv = fd.read(16)
1588 return HeaderDecrypt(fd, key, iv)
1590 # common header
1591 def _parse_block_header(self, fd):
1592 header_offset = fd.tell()
1594 preload = 4 + 3
1595 start_bytes = fd.read(preload)
1596 header_crc, pos = load_le32(start_bytes, 0)
1597 hdrlen, pos = load_vint(start_bytes, pos)
1598 if hdrlen > 2 * 1024 * 1024:
1599 return None
1600 header_size = pos + hdrlen
1602 # read full header, check for EOF
1603 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1604 if len(hdata) != header_size:
1605 self._set_error('Unexpected EOF when reading header')
1606 return None
1607 data_offset = fd.tell()
1609 calc_crc = rar_crc32(memoryview(hdata)[4:])
1610 if header_crc != calc_crc:
1611 # header parsing failed.
1612 self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)',
1613 header_crc, calc_crc, len(hdata))
1614 return None
1616 block_type, pos = load_vint(hdata, pos)
1618 if block_type == RAR5_BLOCK_MAIN:
1619 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1620 h = self._parse_main_block(h, hdata, pos)
1621 elif block_type == RAR5_BLOCK_FILE:
1622 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1623 h = self._parse_file_block(h, hdata, pos)
1624 elif block_type == RAR5_BLOCK_SERVICE:
1625 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1626 h = self._parse_file_block(h, hdata, pos)
1627 elif block_type == RAR5_BLOCK_ENCRYPTION:
1628 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1629 h = self._parse_encryption_block(h, hdata, pos)
1630 elif block_type == RAR5_BLOCK_ENDARC:
1631 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1632 h = self._parse_endarc_block(h, hdata, pos)
1633 else:
1634 h = None
1635 if h:
1636 h.header_offset = header_offset
1637 h.data_offset = data_offset
1638 return h
1640 def _parse_block_common(self, h, hdata):
1641 h.header_crc, pos = load_le32(hdata, 0)
1642 hdrlen, pos = load_vint(hdata, pos)
1643 h.header_size = hdrlen + pos
1644 h.block_type, pos = load_vint(hdata, pos)
1645 h.block_flags, pos = load_vint(hdata, pos)
1647 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1648 h.block_extra_size, pos = load_vint(hdata, pos)
1649 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1650 h.add_size, pos = load_vint(hdata, pos)
1652 h.compress_size = h.add_size
1654 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1655 h.flags |= RAR_SKIP_IF_UNKNOWN
1656 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1657 h.flags |= RAR_LONG_BLOCK
1658 return h, pos
1660 def _parse_main_block(self, h, hdata, pos):
1661 h.main_flags, pos = load_vint(hdata, pos)
1662 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1663 h.main_volume_number = load_vint(hdata, pos)
1665 h.flags |= RAR_MAIN_NEWNUMBERING
1666 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1667 h.flags |= RAR_MAIN_SOLID
1668 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1669 h.flags |= RAR_MAIN_VOLUME
1670 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1671 h.flags |= RAR_MAIN_RECOVERY
1672 if self._hdrenc_main:
1673 h.flags |= RAR_MAIN_PASSWORD
1674 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1675 h.flags |= RAR_MAIN_FIRSTVOLUME
1677 return h
1679 def _parse_file_block(self, h, hdata, pos):
1680 h.file_flags, pos = load_vint(hdata, pos)
1681 h.file_size, pos = load_vint(hdata, pos)
1682 h.mode, pos = load_vint(hdata, pos)
1684 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1685 h.mtime, pos = load_unixtime(hdata, pos)
1686 h.date_time = h.mtime.timetuple()[:6]
1687 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1688 h.CRC, pos = load_le32(hdata, pos)
1689 h._md_class = CRC32Context
1690 h._md_expect = h.CRC
1692 h.file_compress_flags, pos = load_vint(hdata, pos)
1693 h.file_host_os, pos = load_vint(hdata, pos)
1694 h.orig_filename, pos = load_vstr(hdata, pos)
1695 h.filename = h.orig_filename.decode('utf8', 'replace')
1697 # use compatible values
1698 if h.file_host_os == RAR5_OS_WINDOWS:
1699 h.host_os = RAR_OS_WIN32
1700 else:
1701 h.host_os = RAR_OS_UNIX
1702 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1704 if h.block_extra_size:
1705 # allow 1 byte of garbage
1706 while pos < len(hdata) - 1:
1707 xsize, pos = load_vint(hdata, pos)
1708 xdata, pos = load_bytes(hdata, xsize, pos)
1709 self._process_file_extra(h, xdata)
1711 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1712 h.flags |= RAR_FILE_SPLIT_BEFORE
1713 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1714 h.flags |= RAR_FILE_SPLIT_AFTER
1715 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1716 h.flags |= RAR_FILE_DIRECTORY
1717 if h.file_compress_flags & RAR5_COMPR_SOLID:
1718 h.flags |= RAR_FILE_SOLID
1720 return h
1722 def _parse_endarc_block(self, h, hdata, pos):
1723 h.endarc_flags, pos = load_vint(hdata, pos)
1724 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1725 h.flags |= RAR_ENDARC_NEXT_VOLUME
1726 return h
1728 def _parse_encryption_block(self, h, hdata, pos):
1729 h.encryption_algo, pos = load_vint(hdata, pos)
1730 h.encryption_flags, pos = load_vint(hdata, pos)
1731 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1732 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1733 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1734 h.encryption_check_value = load_bytes(hdata, 12, pos)
1735 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1736 raise BadRarFile('Unsupported header encryption cipher')
1737 self._hdrenc_main = h
1738 return h
1740 # file extra record
1741 def _process_file_extra(self, h, xdata):
1742 xtype, pos = load_vint(xdata, 0)
1743 if xtype == RAR5_XFILE_TIME:
1744 self._parse_file_xtime(h, xdata, pos)
1745 elif xtype == RAR5_XFILE_ENCRYPTION:
1746 self._parse_file_encryption(h, xdata, pos)
1747 elif xtype == RAR5_XFILE_HASH:
1748 self._parse_file_hash(h, xdata, pos)
1749 elif xtype == RAR5_XFILE_VERSION:
1750 self._parse_file_version(h, xdata, pos)
1751 elif xtype == RAR5_XFILE_REDIR:
1752 self._parse_file_redir(h, xdata, pos)
1753 elif xtype == RAR5_XFILE_OWNER:
1754 self._parse_file_owner(h, xdata, pos)
1755 elif xtype == RAR5_XFILE_SERVICE:
1756 pass
1757 else:
1758 pass
1760 # extra block for file time record
1761 def _parse_file_xtime(self, h, xdata, pos):
1762 tflags, pos = load_vint(xdata, pos)
1763 ldr = load_windowstime
1764 if tflags & RAR5_XTIME_UNIXTIME:
1765 ldr = load_unixtime
1766 if tflags & RAR5_XTIME_HAS_MTIME:
1767 h.mtime, pos = ldr(xdata, pos)
1768 h.date_time = h.mtime.timetuple()[:6]
1769 if tflags & RAR5_XTIME_HAS_CTIME:
1770 h.ctime, pos = ldr(xdata, pos)
1771 if tflags & RAR5_XTIME_HAS_ATIME:
1772 h.atime, pos = ldr(xdata, pos)
1774 # just remember encryption info
1775 def _parse_file_encryption(self, h, xdata, pos):
1776 algo, pos = load_vint(xdata, pos)
1777 flags, pos = load_vint(xdata, pos)
1778 kdf_count, pos = load_byte(xdata, pos)
1779 salt, pos = load_bytes(xdata, 16, pos)
1780 iv, pos = load_bytes(xdata, 16, pos)
1781 checkval = None
1782 if flags & RAR5_XENC_CHECKVAL:
1783 checkval, pos = load_bytes(xdata, 12, pos)
1784 if flags & RAR5_XENC_TWEAKED:
1785 h._md_expect = None
1786 h._md_class = NoHashContext
1788 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
1789 h.flags |= RAR_FILE_PASSWORD
1791 def _parse_file_hash(self, h, xdata, pos):
1792 hash_type, pos = load_vint(xdata, pos)
1793 if hash_type == RAR5_XHASH_BLAKE2SP:
1794 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
1795 if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
1796 h._md_class = Blake2SP
1797 h._md_expect = h.blake2sp_hash
1799 def _parse_file_version(self, h, xdata, pos):
1800 flags, pos = load_vint(xdata, pos)
1801 version, pos = load_vint(xdata, pos)
1802 h.file_version = (flags, version)
1804 def _parse_file_redir(self, h, xdata, pos):
1805 redir_type, pos = load_vint(xdata, pos)
1806 redir_flags, pos = load_vint(xdata, pos)
1807 redir_name, pos = load_vstr(xdata, pos)
1808 redir_name = redir_name.decode('utf8', 'replace')
1809 h.file_redir = (redir_type, redir_flags, redir_name)
1811 def _parse_file_owner(self, h, xdata, pos):
1812 user_name = group_name = user_id = group_id = None
1814 flags, pos = load_vint(xdata, pos)
1815 if flags & RAR5_XOWNER_UNAME:
1816 user_name, pos = load_vstr(xdata, pos)
1817 if flags & RAR5_XOWNER_GNAME:
1818 group_name, pos = load_vstr(xdata, pos)
1819 if flags & RAR5_XOWNER_UID:
1820 user_id, pos = load_vint(xdata, pos)
1821 if flags & RAR5_XOWNER_GID:
1822 group_id, pos = load_vint(xdata, pos)
1824 h.file_owner = (user_name, group_name, user_id, group_id)
1826 def process_entry(self, fd, item):
1827 if item.block_type == RAR5_BLOCK_FILE:
1828 # use only first part
1829 if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
1830 self._info_map[item.filename] = item
1831 self._info_list.append(item)
1832 elif len(self._info_list) > 0:
1833 # final crc is in last block
1834 old = self._info_list[-1]
1835 old.CRC = item.CRC
1836 old._md_expect = item._md_expect
1837 old.blake2sp_hash = item.blake2sp_hash
1838 old.compress_size += item.compress_size
1839 elif item.block_type == RAR5_BLOCK_SERVICE:
1840 if item.filename == 'CMT':
1841 self._load_comment(fd, item)
1843 def _load_comment(self, fd, item):
1844 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1845 return None
1846 if item.compress_type != RAR_M0:
1847 return None
1849 if item.flags & RAR_FILE_PASSWORD:
1850 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
1851 if algo != RAR5_XENC_CIPHER_AES256:
1852 return None
1853 key = self._gen_key(kdf_count, salt)
1854 f = HeaderDecrypt(fd, key, iv)
1855 cmt = f.read(item.file_size)
1856 else:
1857 # archive comment
1858 with self._open_clear(item) as cmtstream:
1859 cmt = cmtstream.read()
1861 # rar bug? - appends zero to comment
1862 cmt = cmt.split(ZERO, 1)[0]
1863 self.comment = cmt.decode('utf8')
1865 def _open_hack(self, inf, psw):
1866 # len, type, blk_flags, flags
1867 main_hdr = b'\x03\x01\x00\x00'
1868 endarc_hdr = b'\x03\x05\x00\x00'
1869 main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr
1870 endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr
1871 return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr)
1874 ## Utility classes
1877 class UnicodeFilename(object):
1878 """Handle RAR3 unicode filename decompression.
1880 def __init__(self, name, encdata):
1881 self.std_name = bytearray(name)
1882 self.encdata = bytearray(encdata)
1883 self.pos = self.encpos = 0
1884 self.buf = bytearray()
1885 self.failed = 0
1887 def enc_byte(self):
1888 """Copy encoded byte."""
1889 try:
1890 c = self.encdata[self.encpos]
1891 self.encpos += 1
1892 return c
1893 except IndexError:
1894 self.failed = 1
1895 return 0
1897 def std_byte(self):
1898 """Copy byte from 8-bit representation."""
1899 try:
1900 return self.std_name[self.pos]
1901 except IndexError:
1902 self.failed = 1
1903 return ord('?')
1905 def put(self, lo, hi):
1906 """Copy 16-bit value to result."""
1907 self.buf.append(lo)
1908 self.buf.append(hi)
1909 self.pos += 1
1911 def decode(self):
1912 """Decompress compressed UTF16 value."""
1913 hi = self.enc_byte()
1914 flagbits = 0
1915 while self.encpos < len(self.encdata):
1916 if flagbits == 0:
1917 flags = self.enc_byte()
1918 flagbits = 8
1919 flagbits -= 2
1920 t = (flags >> flagbits) & 3
1921 if t == 0:
1922 self.put(self.enc_byte(), 0)
1923 elif t == 1:
1924 self.put(self.enc_byte(), hi)
1925 elif t == 2:
1926 self.put(self.enc_byte(), self.enc_byte())
1927 else:
1928 n = self.enc_byte()
1929 if n & 0x80:
1930 c = self.enc_byte()
1931 for _ in range((n & 0x7f) + 2):
1932 lo = (self.std_byte() + c) & 0xFF
1933 self.put(lo, hi)
1934 else:
1935 for _ in range(n + 2):
1936 self.put(self.std_byte(), 0)
1937 return self.buf.decode("utf-16le", "replace")
1940 class RarExtFile(RawIOBase):
1941 """Base class for file-like object that :meth:`RarFile.open` returns.
1943 Provides public methods and common crc checking.
1945 Behaviour:
1946 - no short reads - .read() and .readinfo() read as much as requested.
1947 - no internal buffer, use io.BufferedReader for that.
1950 #: Filename of the archive entry
1951 name = None
1953 def __init__(self, parser, inf):
1954 super(RarExtFile, self).__init__()
1956 # standard io.* properties
1957 self.name = inf.filename
1958 self.mode = 'rb'
1960 self._parser = parser
1961 self._inf = inf
1962 self._fd = None
1963 self._remain = 0
1964 self._returncode = 0
1966 self._md_context = None
1968 self._open()
1970 def _open(self):
1971 if self._fd:
1972 self._fd.close()
1973 md_class = self._inf._md_class or NoHashContext
1974 self._md_context = md_class()
1975 self._fd = None
1976 self._remain = self._inf.file_size
1978 def read(self, cnt=None):
1979 """Read all or specified amount of data from archive entry."""
1981 # sanitize cnt
1982 if cnt is None or cnt < 0:
1983 cnt = self._remain
1984 elif cnt > self._remain:
1985 cnt = self._remain
1986 if cnt == 0:
1987 return EMPTY
1989 # actual read
1990 data = self._read(cnt)
1991 if data:
1992 self._md_context.update(data)
1993 self._remain -= len(data)
1994 if len(data) != cnt:
1995 raise BadRarFile("Failed the read enough data")
1997 # done?
1998 if not data or self._remain == 0:
1999 # self.close()
2000 self._check()
2001 return data
2003 def _check(self):
2004 """Check final CRC."""
2005 final = self._md_context.digest()
2006 exp = self._inf._md_expect
2007 if exp is None:
2008 return
2009 if final is None:
2010 return
2011 if self._returncode:
2012 check_returncode(self, '')
2013 if self._remain != 0:
2014 raise BadRarFile("Failed the read enough data")
2015 if final != exp:
2016 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2017 self._inf.filename, exp, final))
2019 def _read(self, cnt):
2020 """Actual read that gets sanitized cnt."""
2022 def close(self):
2023 """Close open resources."""
2025 super(RarExtFile, self).close()
2027 if self._fd:
2028 self._fd.close()
2029 self._fd = None
2031 def __del__(self):
2032 """Hook delete to make sure tempfile is removed."""
2033 self.close()
2035 def readinto(self, buf):
2036 """Zero-copy read directly into buffer.
2038 Returns bytes read.
2040 raise NotImplementedError('readinto')
2042 def tell(self):
2043 """Return current reading position in uncompressed data."""
2044 return self._inf.file_size - self._remain
2046 def seek(self, ofs, whence=0):
2047 """Seek in data.
2049 On uncompressed files, the seeking works by actual
2050 seeks so it's fast. On compresses files its slow
2051 - forward seeking happends by reading ahead,
2052 backwards by re-opening and decompressing from the start.
2055 # disable crc check when seeking
2056 self._md_context = NoHashContext()
2058 fsize = self._inf.file_size
2059 cur_ofs = self.tell()
2061 if whence == 0: # seek from beginning of file
2062 new_ofs = ofs
2063 elif whence == 1: # seek from current position
2064 new_ofs = cur_ofs + ofs
2065 elif whence == 2: # seek from end of file
2066 new_ofs = fsize + ofs
2067 else:
2068 raise ValueError('Invalid value for whence')
2070 # sanity check
2071 if new_ofs < 0:
2072 new_ofs = 0
2073 elif new_ofs > fsize:
2074 new_ofs = fsize
2076 # do the actual seek
2077 if new_ofs >= cur_ofs:
2078 self._skip(new_ofs - cur_ofs)
2079 else:
2080 # reopen and seek
2081 self._open()
2082 self._skip(new_ofs)
2083 return self.tell()
2085 def _skip(self, cnt):
2086 """Read and discard data"""
2087 while cnt > 0:
2088 if cnt > 8192:
2089 buf = self.read(8192)
2090 else:
2091 buf = self.read(cnt)
2092 if not buf:
2093 break
2094 cnt -= len(buf)
2096 def readable(self):
2097 """Returns True"""
2098 return True
2100 def writable(self):
2101 """Returns False.
2103 Writing is not supported."""
2104 return False
2106 def seekable(self):
2107 """Returns True.
2109 Seeking is supported, although it's slow on compressed files.
2111 return True
2113 def readall(self):
2114 """Read all remaining data"""
2115 # avoid RawIOBase default impl
2116 return self.read()
2119 class PipeReader(RarExtFile):
2120 """Read data from pipe, handle tempfile cleanup."""
2122 def __init__(self, rf, inf, cmd, tempfile=None):
2123 self._cmd = cmd
2124 self._proc = None
2125 self._tempfile = tempfile
2126 super(PipeReader, self).__init__(rf, inf)
2128 def _close_proc(self):
2129 if not self._proc:
2130 return
2131 if self._proc.stdout:
2132 self._proc.stdout.close()
2133 if self._proc.stdin:
2134 self._proc.stdin.close()
2135 if self._proc.stderr:
2136 self._proc.stderr.close()
2137 self._proc.wait()
2138 self._returncode = self._proc.returncode
2139 self._proc = None
2141 def _open(self):
2142 super(PipeReader, self)._open()
2144 # stop old process
2145 self._close_proc()
2147 # launch new process
2148 self._returncode = 0
2149 self._proc = custom_popen(self._cmd)
2150 self._fd = self._proc.stdout
2152 # avoid situation where unrar waits on stdin
2153 if self._proc.stdin:
2154 self._proc.stdin.close()
2156 def _read(self, cnt):
2157 """Read from pipe."""
2159 # normal read is usually enough
2160 data = self._fd.read(cnt)
2161 if len(data) == cnt or not data:
2162 return data
2164 # short read, try looping
2165 buf = [data]
2166 cnt -= len(data)
2167 while cnt > 0:
2168 data = self._fd.read(cnt)
2169 if not data:
2170 break
2171 cnt -= len(data)
2172 buf.append(data)
2173 return EMPTY.join(buf)
2175 def close(self):
2176 """Close open resources."""
2178 self._close_proc()
2179 super(PipeReader, self).close()
2181 if self._tempfile:
2182 try:
2183 os.unlink(self._tempfile)
2184 except OSError:
2185 pass
2186 self._tempfile = None
2188 def readinto(self, buf):
2189 """Zero-copy read directly into buffer."""
2190 cnt = len(buf)
2191 if cnt > self._remain:
2192 cnt = self._remain
2193 vbuf = memoryview(buf)
2194 res = got = 0
2195 while got < cnt:
2196 res = self._fd.readinto(vbuf[got : cnt])
2197 if not res:
2198 break
2199 self._md_context.update(vbuf[got : got + res])
2200 self._remain -= res
2201 got += res
2202 return got
2205 class DirectReader(RarExtFile):
2206 """Read uncompressed data directly from archive.
2208 _cur = None
2209 _cur_avail = None
2210 _volfile = None
2212 def _open(self):
2213 super(DirectReader, self)._open()
2215 self._volfile = self._inf.volume_file
2216 self._fd = XFile(self._volfile, 0)
2217 self._fd.seek(self._inf.header_offset, 0)
2218 self._cur = self._parser._parse_header(self._fd)
2219 self._cur_avail = self._cur.add_size
2221 def _skip(self, cnt):
2222 """RAR Seek, skipping through rar files to get to correct position
2225 while cnt > 0:
2226 # next vol needed?
2227 if self._cur_avail == 0:
2228 if not self._open_next():
2229 break
2231 # fd is in read pos, do the read
2232 if cnt > self._cur_avail:
2233 cnt -= self._cur_avail
2234 self._remain -= self._cur_avail
2235 self._cur_avail = 0
2236 else:
2237 self._fd.seek(cnt, 1)
2238 self._cur_avail -= cnt
2239 self._remain -= cnt
2240 cnt = 0
2242 def _read(self, cnt):
2243 """Read from potentially multi-volume archive."""
2245 buf = []
2246 while cnt > 0:
2247 # next vol needed?
2248 if self._cur_avail == 0:
2249 if not self._open_next():
2250 break
2252 # fd is in read pos, do the read
2253 if cnt > self._cur_avail:
2254 data = self._fd.read(self._cur_avail)
2255 else:
2256 data = self._fd.read(cnt)
2257 if not data:
2258 break
2260 # got some data
2261 cnt -= len(data)
2262 self._cur_avail -= len(data)
2263 buf.append(data)
2265 if len(buf) == 1:
2266 return buf[0]
2267 return EMPTY.join(buf)
2269 def _open_next(self):
2270 """Proceed to next volume."""
2272 # is the file split over archives?
2273 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2274 return False
2276 if self._fd:
2277 self._fd.close()
2278 self._fd = None
2280 # open next part
2281 self._volfile = self._parser._next_volname(self._volfile)
2282 fd = open(self._volfile, "rb", 0)
2283 self._fd = fd
2284 sig = fd.read(len(self._parser._expect_sig))
2285 if sig != self._parser._expect_sig:
2286 raise BadRarFile("Invalid signature")
2288 # loop until first file header
2289 while 1:
2290 cur = self._parser._parse_header(fd)
2291 if not cur:
2292 raise BadRarFile("Unexpected EOF")
2293 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2294 if cur.add_size:
2295 fd.seek(cur.add_size, 1)
2296 continue
2297 if cur.orig_filename != self._inf.orig_filename:
2298 raise BadRarFile("Did not found file entry")
2299 self._cur = cur
2300 self._cur_avail = cur.add_size
2301 return True
2303 def readinto(self, buf):
2304 """Zero-copy read directly into buffer."""
2305 got = 0
2306 vbuf = memoryview(buf)
2307 while got < len(buf):
2308 # next vol needed?
2309 if self._cur_avail == 0:
2310 if not self._open_next():
2311 break
2313 # length for next read
2314 cnt = len(buf) - got
2315 if cnt > self._cur_avail:
2316 cnt = self._cur_avail
2318 # read into temp view
2319 res = self._fd.readinto(vbuf[got : got + cnt])
2320 if not res:
2321 break
2322 self._md_context.update(vbuf[got : got + res])
2323 self._cur_avail -= res
2324 self._remain -= res
2325 got += res
2326 return got
2329 class HeaderDecrypt(object):
2330 """File-like object that decrypts from another file"""
2331 def __init__(self, f, key, iv):
2332 self.f = f
2333 self.ciph = AES_CBC_Decrypt(key, iv)
2334 self.buf = EMPTY
2336 def tell(self):
2337 """Current file pos - works only on block boundaries."""
2338 return self.f.tell()
2340 def read(self, cnt=None):
2341 """Read and decrypt."""
2342 if cnt > 8 * 1024:
2343 raise BadRarFile('Bad count to header decrypt - wrong password?')
2345 # consume old data
2346 if cnt <= len(self.buf):
2347 res = self.buf[:cnt]
2348 self.buf = self.buf[cnt:]
2349 return res
2350 res = self.buf
2351 self.buf = EMPTY
2352 cnt -= len(res)
2354 # decrypt new data
2355 blklen = 16
2356 while cnt > 0:
2357 enc = self.f.read(blklen)
2358 if len(enc) < blklen:
2359 break
2360 dec = self.ciph.decrypt(enc)
2361 if cnt >= len(dec):
2362 res += dec
2363 cnt -= len(dec)
2364 else:
2365 res += dec[:cnt]
2366 self.buf = dec[cnt:]
2367 cnt = 0
2369 return res
2372 # handle (filename|filelike) object
2373 class XFile(object):
2374 """Input may be filename or file object.
2376 __slots__ = ('_fd', '_need_close')
2378 def __init__(self, xfile, bufsize=1024):
2379 if is_filelike(xfile):
2380 self._need_close = False
2381 self._fd = xfile
2382 self._fd.seek(0)
2383 else:
2384 self._need_close = True
2385 self._fd = open(xfile, 'rb', bufsize)
2387 def read(self, n=None):
2388 """Read from file."""
2389 return self._fd.read(n)
2391 def tell(self):
2392 """Return file pos."""
2393 return self._fd.tell()
2395 def seek(self, ofs, whence=0):
2396 """Move file pos."""
2397 return self._fd.seek(ofs, whence)
2399 def readinto(self, dst):
2400 """Read into buffer."""
2401 return self._fd.readinto(dst)
2403 def close(self):
2404 """Close file object."""
2405 if self._need_close:
2406 self._fd.close()
2408 def __enter__(self):
2409 return self
2411 def __exit__(self, typ, val, tb):
2412 self.close()
2415 class NoHashContext(object):
2416 """No-op hash function."""
2417 def __init__(self, data=None):
2418 """Initialize"""
2419 def update(self, data):
2420 """Update data"""
2421 def digest(self):
2422 """Final hash"""
2423 def hexdigest(self):
2424 """Hexadecimal digest."""
2427 class CRC32Context(object):
2428 """Hash context that uses CRC32."""
2429 __slots__ = ['_crc']
2431 def __init__(self, data=None):
2432 self._crc = 0
2433 if data:
2434 self.update(data)
2436 def update(self, data):
2437 """Process data."""
2438 self._crc = rar_crc32(data, self._crc)
2440 def digest(self):
2441 """Final hash."""
2442 return self._crc
2444 def hexdigest(self):
2445 """Hexadecimal digest."""
2446 return '%08x' % self.digest()
2449 class Blake2SP(object):
2450 """Blake2sp hash context.
2452 __slots__ = ['_thread', '_buf', '_cur', '_digest']
2453 digest_size = 32
2454 block_size = 64
2455 parallelism = 8
2457 def __init__(self, data=None):
2458 self._buf = b''
2459 self._cur = 0
2460 self._digest = None
2461 self._thread = []
2463 for i in range(self.parallelism):
2464 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2465 self._thread.append(ctx)
2467 if data:
2468 self.update(data)
2470 def _blake2s(self, ofs, depth, is_last):
2471 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2472 depth=2, inner_size=32, fanout=self.parallelism)
2474 def _add_block(self, blk):
2475 self._thread[self._cur].update(blk)
2476 self._cur = (self._cur + 1) % self.parallelism
2478 def update(self, data):
2479 """Hash data.
2481 view = memoryview(data)
2482 bs = self.block_size
2483 if self._buf:
2484 need = bs - len(self._buf)
2485 if len(view) < need:
2486 self._buf += view.tobytes()
2487 return
2488 self._add_block(self._buf + view[:need].tobytes())
2489 view = view[need:]
2490 while len(view) >= bs:
2491 self._add_block(view[:bs])
2492 view = view[bs:]
2493 self._buf = view.tobytes()
2495 def digest(self):
2496 """Return final digest value.
2498 if self._digest is None:
2499 if self._buf:
2500 self._add_block(self._buf)
2501 self._buf = EMPTY
2502 ctx = self._blake2s(0, 1, True)
2503 for t in self._thread:
2504 ctx.update(t.digest())
2505 self._digest = ctx.digest()
2506 return self._digest
2508 def hexdigest(self):
2509 """Hexadecimal digest."""
2510 return tohex(self.digest())
2513 ## Utility functions
2516 S_LONG = Struct('<L')
2517 S_SHORT = Struct('<H')
2518 S_BYTE = Struct('<B')
2520 S_BLK_HDR = Struct('<HBHH')
2521 S_FILE_HDR = Struct('<LLBLLBBHL')
2522 S_COMMENT_HDR = Struct('<HBBH')
2524 def load_vint(buf, pos):
2525 """Load variable-size int."""
2526 limit = min(pos + 11, len(buf))
2527 res = ofs = 0
2528 while pos < limit:
2529 b = _byte_code(buf[pos])
2530 res += ((b & 0x7F) << ofs)
2531 pos += 1
2532 ofs += 7
2533 if b < 0x80:
2534 return res, pos
2535 raise BadRarFile('cannot load vint')
2537 def load_byte(buf, pos):
2538 """Load single byte"""
2539 end = pos + 1
2540 if end > len(buf):
2541 raise BadRarFile('cannot load byte')
2542 return S_BYTE.unpack_from(buf, pos)[0], end
2544 def load_le32(buf, pos):
2545 """Load little-endian 32-bit integer"""
2546 end = pos + 4
2547 if end > len(buf):
2548 raise BadRarFile('cannot load le32')
2549 return S_LONG.unpack_from(buf, pos)[0], pos + 4
2551 def load_bytes(buf, num, pos):
2552 """Load sequence of bytes"""
2553 end = pos + num
2554 if end > len(buf):
2555 raise BadRarFile('cannot load bytes')
2556 return buf[pos : end], end
2558 def load_vstr(buf, pos):
2559 """Load bytes prefixed by vint length"""
2560 slen, pos = load_vint(buf, pos)
2561 return load_bytes(buf, slen, pos)
2563 def load_dostime(buf, pos):
2564 """Load LE32 dos timestamp"""
2565 stamp, pos = load_le32(buf, pos)
2566 tup = parse_dos_time(stamp)
2567 return to_datetime(tup), pos
2569 def load_unixtime(buf, pos):
2570 """Load LE32 unix timestamp"""
2571 secs, pos = load_le32(buf, pos)
2572 dt = datetime.fromtimestamp(secs, UTC)
2573 return dt, pos
2575 def load_windowstime(buf, pos):
2576 """Load LE64 windows timestamp"""
2577 # unix epoch (1970) in seconds from windows epoch (1601)
2578 unix_epoch = 11644473600
2579 val1, pos = load_le32(buf, pos)
2580 val2, pos = load_le32(buf, pos)
2581 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2582 dt = datetime.fromtimestamp(secs - unix_epoch, UTC)
2583 dt = dt.replace(microsecond=n1secs // 10)
2584 return dt, pos
2586 # new-style next volume
2587 def _next_newvol(volfile):
2588 i = len(volfile) - 1
2589 while i >= 0:
2590 if volfile[i] >= '0' and volfile[i] <= '9':
2591 return _inc_volname(volfile, i)
2592 i -= 1
2593 raise BadRarName("Cannot construct volume name: " + volfile)
2595 # old-style next volume
2596 def _next_oldvol(volfile):
2597 # rar -> r00
2598 if volfile[-4:].lower() == '.rar':
2599 return volfile[:-2] + '00'
2600 return _inc_volname(volfile, len(volfile) - 1)
2602 # increase digits with carry, otherwise just increment char
2603 def _inc_volname(volfile, i):
2604 fn = list(volfile)
2605 while i >= 0:
2606 if fn[i] != '9':
2607 fn[i] = chr(ord(fn[i]) + 1)
2608 break
2609 fn[i] = '0'
2610 i -= 1
2611 return ''.join(fn)
2613 # rar3 extended time fields
2614 def _parse_ext_time(h, data, pos):
2615 # flags and rest of data can be missing
2616 flags = 0
2617 if pos + 2 <= len(data):
2618 flags = S_SHORT.unpack_from(data, pos)[0]
2619 pos += 2
2621 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2622 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2623 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2624 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2625 if mtime:
2626 h.mtime = mtime
2627 h.date_time = mtime.timetuple()[:6]
2628 return pos
2630 # rar3 one extended time field
2631 def _parse_xtime(flag, data, pos, basetime=None):
2632 res = None
2633 if flag & 8:
2634 if not basetime:
2635 basetime, pos = load_dostime(data, pos)
2637 # load second fractions
2638 rem = 0
2639 cnt = flag & 3
2640 for _ in range(cnt):
2641 b, pos = load_byte(data, pos)
2642 rem = (b << 16) | (rem >> 8)
2644 # convert 100ns units to microseconds
2645 usec = rem // 10
2646 if usec > 1000000:
2647 usec = 999999
2649 # dostime has room for 30 seconds only, correct if needed
2650 if flag & 4 and basetime.second < 59:
2651 res = basetime.replace(microsecond=usec, second=basetime.second + 1)
2652 else:
2653 res = basetime.replace(microsecond=usec)
2654 return res, pos
2656 def is_filelike(obj):
2657 """Filename or file object?
2659 if isinstance(obj, str) or isinstance(obj, unicode):
2660 return False
2661 res = True
2662 for a in ('read', 'tell', 'seek'):
2663 res = res and hasattr(obj, a)
2664 if not res:
2665 raise ValueError("Invalid object passed as file")
2666 return True
2668 def rar3_s2k(psw, salt):
2669 """String-to-key hash for RAR3.
2671 if not isinstance(psw, unicode):
2672 psw = psw.decode('utf8')
2673 seed = psw.encode('utf-16le') + salt
2674 iv = EMPTY
2675 h = sha1()
2676 for i in range(16):
2677 for j in range(0x4000):
2678 cnt = S_LONG.pack(i * 0x4000 + j)
2679 h.update(seed + cnt[:3])
2680 if j == 0:
2681 iv += h.digest()[19:20]
2682 key_be = h.digest()[:16]
2683 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2684 return key_le, iv
2686 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
2687 """Decompress blob of compressed data.
2689 Used for data with non-standard header - eg. comments.
2691 # already uncompressed?
2692 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
2693 return data
2695 # take only necessary flags
2696 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
2697 flags |= RAR_LONG_BLOCK
2699 # file header
2700 fname = b'data'
2701 date = 0
2702 mode = 0x20
2703 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
2704 date, vers, meth, len(fname), mode)
2705 fhdr += fname
2706 if flags & RAR_FILE_SALT:
2707 if not salt:
2708 return EMPTY
2709 fhdr += salt
2711 # full header
2712 hlen = S_BLK_HDR.size + len(fhdr)
2713 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
2714 hcrc = rar_crc32(hdr[2:]) & 0xFFFF
2715 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
2717 # archive main header
2718 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4)
2720 # decompress via temp rar
2721 tmpfd, tmpname = mkstemp(suffix='.rar')
2722 tmpf = os.fdopen(tmpfd, "wb")
2723 try:
2724 tmpf.write(RAR_ID + mh + hdr + data)
2725 tmpf.close()
2727 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
2728 add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD))
2729 cmd.append(tmpname)
2731 p = custom_popen(cmd)
2732 return p.communicate()[0]
2733 finally:
2734 tmpf.close()
2735 os.unlink(tmpname)
2737 def to_datetime(t):
2738 """Convert 6-part time tuple into datetime object.
2740 if t is None:
2741 return None
2743 # extract values
2744 year, mon, day, h, m, s = t
2746 # assume the values are valid
2747 try:
2748 return datetime(year, mon, day, h, m, s)
2749 except ValueError:
2750 pass
2752 # sanitize invalid values
2753 mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
2754 if mon < 1:
2755 mon = 1
2756 if mon > 12:
2757 mon = 12
2758 if day < 1:
2759 day = 1
2760 if day > mday[mon]:
2761 day = mday[mon]
2762 if h > 23:
2763 h = 23
2764 if m > 59:
2765 m = 59
2766 if s > 59:
2767 s = 59
2768 if mon == 2 and day == 29:
2769 try:
2770 return datetime(year, mon, day, h, m, s)
2771 except ValueError:
2772 day = 28
2773 return datetime(year, mon, day, h, m, s)
2775 def parse_dos_time(stamp):
2776 """Parse standard 32-bit DOS timestamp.
2778 sec, stamp = stamp & 0x1F, stamp >> 5
2779 mn, stamp = stamp & 0x3F, stamp >> 6
2780 hr, stamp = stamp & 0x1F, stamp >> 5
2781 day, stamp = stamp & 0x1F, stamp >> 5
2782 mon, stamp = stamp & 0x0F, stamp >> 4
2783 yr = (stamp & 0x7F) + 1980
2784 return (yr, mon, day, hr, mn, sec * 2)
2786 def custom_popen(cmd):
2787 """Disconnect cmd from parent fds, read only from stdout.
2789 # needed for py2exe
2790 creationflags = 0
2791 if sys.platform == 'win32':
2792 creationflags = 0x08000000 # CREATE_NO_WINDOW
2794 # run command
2795 try:
2796 p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT,
2797 creationflags=creationflags)
2798 except OSError as ex:
2799 if ex.errno == errno.ENOENT:
2800 raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL)
2801 raise
2802 return p
2804 def custom_check(cmd, ignore_retcode=False):
2805 """Run command, collect output, raise error if needed.
2807 p = custom_popen(cmd)
2808 out, _ = p.communicate()
2809 if p.returncode and not ignore_retcode:
2810 raise RarExecError("Check-run failed")
2811 return out
2813 def add_password_arg(cmd, psw, ___required=False):
2814 """Append password switch to commandline.
2816 if UNRAR_TOOL == ALT_TOOL:
2817 return
2818 if psw is not None:
2819 cmd.append('-p' + psw)
2820 else:
2821 cmd.append('-p-')
2823 def check_returncode(p, out):
2824 """Raise exception according to unrar exit code.
2826 code = p.returncode
2827 if code == 0:
2828 return
2830 # map return code to exception class, codes from rar.txt
2831 errmap = [None,
2832 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
2833 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
2834 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
2835 if UNRAR_TOOL == ALT_TOOL:
2836 errmap = [None]
2837 if code > 0 and code < len(errmap):
2838 exc = errmap[code]
2839 elif code == 255:
2840 exc = RarUserBreak
2841 elif code < 0:
2842 exc = RarSignalExit
2843 else:
2844 exc = RarUnknownError
2846 # format message
2847 if out:
2848 msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out)
2849 else:
2850 msg = "%s [%d]" % (exc.__doc__, p.returncode)
2852 raise exc(msg)
2854 def hmac_sha256(key, data):
2855 """HMAC-SHA256"""
2856 return HMAC(key, data, sha256).digest()
2858 def membuf_tempfile(memfile):
2859 memfile.seek(0, 0)
2861 tmpfd, tmpname = mkstemp(suffix='.rar')
2862 tmpf = os.fdopen(tmpfd, "wb")
2864 try:
2865 while True:
2866 buf = memfile.read(BSIZE)
2867 if not buf:
2868 break
2869 tmpf.write(buf)
2870 tmpf.close()
2871 except:
2872 tmpf.close()
2873 os.unlink(tmpname)
2874 raise
2875 return tmpname
2877 class XTempFile(object):
2878 __slots__ = ('_tmpfile', '_filename')
2880 def __init__(self, rarfile):
2881 if is_filelike(rarfile):
2882 self._tmpfile = membuf_tempfile(rarfile)
2883 self._filename = self._tmpfile
2884 else:
2885 self._tmpfile = None
2886 self._filename = rarfile
2888 def __enter__(self):
2889 return self._filename
2891 def __exit__(self, exc_type, exc_value, tb):
2892 if self._tmpfile:
2893 try:
2894 os.unlink(self._tmpfile)
2895 except OSError:
2896 pass
2897 self._tmpfile = None
2900 # Check if unrar works
2903 ORIG_UNRAR_TOOL = UNRAR_TOOL
2904 ORIG_OPEN_ARGS = OPEN_ARGS
2905 ORIG_EXTRACT_ARGS = EXTRACT_ARGS
2906 ORIG_TEST_ARGS = TEST_ARGS
2908 def _check_unrar_tool():
2909 global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS
2910 try:
2911 # does UNRAR_TOOL work?
2912 custom_check([ORIG_UNRAR_TOOL], True)
2914 UNRAR_TOOL = ORIG_UNRAR_TOOL
2915 OPEN_ARGS = ORIG_OPEN_ARGS
2916 EXTRACT_ARGS = ORIG_EXTRACT_ARGS
2917 TEST_ARGS = ORIG_TEST_ARGS
2918 except RarCannotExec:
2919 try:
2920 # does ALT_TOOL work?
2921 custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True)
2922 # replace config
2923 UNRAR_TOOL = ALT_TOOL
2924 OPEN_ARGS = ALT_OPEN_ARGS
2925 EXTRACT_ARGS = ALT_EXTRACT_ARGS
2926 TEST_ARGS = ALT_TEST_ARGS
2927 except RarCannotExec:
2928 # no usable tool, only uncompressed archives work
2929 pass
2931 _check_unrar_tool()