From 63310e6df93f44f8cc1157b6a33337f293af2532 Mon Sep 17 00:00:00 2001 From: Marko Kreen Date: Wed, 8 Jun 2016 23:19:47 +0300 Subject: [PATCH] Support rar5 archive format - Refactor parsing out from main class, with shared code - RARv5 RarInfo will fake most existing fields to be compatible with current RARv3 fields, eg: .compress_type, .flags, .host_os. This makes internally some things simpler, hopefully for users as well. Incompat: - PATH_SEP is now '/' by default. It's default in RARv5 and in zipfile. Cleanups: - Remove NEED_COMMENTS option, comments are always extracted. - Remove UNICODE_COMMENTS option, they are always decoded. - Remove USE_DATETIME option, .date_time is always tuple, mtime, ctime, atime, arctime are always datetime objects. - Bunch of random code cleanups --- rarfile.py | 2408 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 1669 insertions(+), 739 deletions(-) diff --git a/rarfile.py b/rarfile.py index 25b6119..c6e0d44 100644 --- a/rarfile.py +++ b/rarfile.py @@ -54,74 +54,123 @@ here they are with defaults, and reason to change it:: # Set to full path of unrar.exe if it is not in PATH rarfile.UNRAR_TOOL = "unrar" - # Set to 0 if you don't look at comments and want to - # avoid wasting time for parsing them - rarfile.NEED_COMMENTS = 1 - - # Set up to 1 if you don't want to deal with decoding comments - # from unknown encoding. rarfile will try couple of common - # encodings in sequence. - rarfile.UNICODE_COMMENTS = 0 - - # Set to 1 if you prefer timestamps to be datetime objects - # instead tuples - rarfile.USE_DATETIME = 0 - - # Set to '/' to be more compatible with zipfile - rarfile.PATH_SEP = '\\' + # Set to '\\' to be more compatible with old rarfile + rarfile.PATH_SEP = '/' For more details, refer to source. """ -__version__ = '2.8' - -# export only interesting items -__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] +from __future__ import division, print_function ## ## Imports and compat - support both Python 2.x and 3.x ## -import sys, os, struct, errno +import sys +import os +import errno +import struct + from struct import pack, unpack, Struct -from binascii import crc32 +from binascii import crc32, hexlify from tempfile import mkstemp from subprocess import Popen, PIPE, STDOUT -from datetime import datetime from io import RawIOBase -from hashlib import sha1 +from hashlib import sha1, sha256 +from hmac import HMAC +from datetime import datetime, timedelta, tzinfo + +# fixed offset timezone, for UTC +try: + from datetime import timezone +except ImportError: + class timezone(tzinfo): + """Compat timezone.""" + __slots__ = ('_ofs', '_name') + _DST = timedelta(0) + + def __init__(self, offset, name): + super(timezone, self).__init__() + self._ofs, self._name = offset, name + + def utcoffset(self, dt): + return self._ofs + + def tzname(self, dt): + return self._name + + def dst(self, dt): + return self._DST # only needed for encryped headers try: try: from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.kdf import pbkdf2 + class AES_CBC_Decrypt(object): - block_size = 16 + """Decrypt API""" def __init__(self, key, iv): ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) - self.dec = ciph.decryptor() - def decrypt(self, data): - return self.dec.update(data) + self.decrypt = ciph.decryptor().update + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend()) + return ctx.derive(password) + except ImportError: from Crypto.Cipher import AES + from Crypto.Protocol import KDF + class AES_CBC_Decrypt(object): - block_size = 16 + """Decrypt API""" def __init__(self, key, iv): - self.dec = AES.new(key, AES.MODE_CBC, iv) - def decrypt(self, data): - return self.dec.decrypt(data) + self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256) + _have_crypto = 1 except ImportError: _have_crypto = 0 +try: + from pyblake2 import blake2s + _have_blake2 = True +except ImportError: + _have_blake2 = False + # compat with 2.x if sys.hexversion < 0x3000000: - # prefer 3.x behaviour - range = xrange -else: + def rar_crc32(data, prev=0): + """CRC32 with unsigned values. + """ + if (prev > 0) and (prev & 0x80000000): + prev -= (1 << 32) + res = crc32(data, prev) + if res < 0: + res += (1 << 32) + return res + tohex = hexlify + _byte_code = ord +else: # pragma: no cover + def tohex(data): + """Return hex string.""" + return hexlify(data).decode('ascii') + rar_crc32 = crc32 unicode = str + _byte_code = int # noqa + + +__version__ = '3.0' + +# export only interesting items +__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] ## ## Module configuration. Can be tuned after importing. @@ -166,36 +215,27 @@ ALT_CHECK_ARGS = ('--help',) USE_EXTRACT_HACK = 1 #: limit the filesize for tmp archive usage -HACK_SIZE_LIMIT = 20*1024*1024 - -#: whether to parse file/archive comments. -NEED_COMMENTS = 1 - -#: whether to convert comments to unicode strings -UNICODE_COMMENTS = 0 - -#: Convert RAR time tuple into datetime() object -USE_DATETIME = 0 +HACK_SIZE_LIMIT = 20 * 1024 * 1024 #: Separator for path name components. RAR internally uses '\\'. #: Use '/' to be similar with zipfile. -PATH_SEP = '\\' +PATH_SEP = '/' ## ## rar constants ## # block types -RAR_BLOCK_MARK = 0x72 # r -RAR_BLOCK_MAIN = 0x73 # s -RAR_BLOCK_FILE = 0x74 # t -RAR_BLOCK_OLD_COMMENT = 0x75 # u -RAR_BLOCK_OLD_EXTRA = 0x76 # v -RAR_BLOCK_OLD_SUB = 0x77 # w -RAR_BLOCK_OLD_RECOVERY = 0x78 # x -RAR_BLOCK_OLD_AUTH = 0x79 # y -RAR_BLOCK_SUB = 0x7a # z -RAR_BLOCK_ENDARC = 0x7b # { +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { # flags for RAR_BLOCK_MAIN RAR_MAIN_VOLUME = 0x0001 @@ -257,196 +297,335 @@ RAR_M3 = 0x33 RAR_M4 = 0x34 RAR_M5 = 0x35 +# +# RAR5 constants +# + +RAR5_BLOCK_MAIN = 1 +RAR5_BLOCK_FILE = 2 +RAR5_BLOCK_SERVICE = 3 +RAR5_BLOCK_ENCRYPTION = 4 +RAR5_BLOCK_ENDARC = 5 + +RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01 +RAR5_BLOCK_FLAG_DATA_AREA = 0x02 +RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04 +RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08 +RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10 +RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20 +RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40 + +RAR5_MAIN_FLAG_ISVOL = 0x01 +RAR5_MAIN_FLAG_HAS_VOLNR = 0x02 +RAR5_MAIN_FLAG_SOLID = 0x04 +RAR5_MAIN_FLAG_RECOVERY = 0x08 +RAR5_MAIN_FLAG_LOCKED = 0x10 + +RAR5_FILE_FLAG_ISDIR = 0x01 +RAR5_FILE_FLAG_HAS_MTIME = 0x02 +RAR5_FILE_FLAG_HAS_CRC32 = 0x04 +RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08 + +RAR5_COMPR_SOLID = 0x40 + +RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01 + +RAR5_ENDARC_FLAG_NEXT_VOL = 0x01 + +RAR5_XFILE_ENCRYPTION = 1 +RAR5_XFILE_HASH = 2 +RAR5_XFILE_TIME = 3 +RAR5_XFILE_VERSION = 4 +RAR5_XFILE_REDIR = 5 +RAR5_XFILE_OWNER = 6 +RAR5_XFILE_SERVICE = 7 + +RAR5_XTIME_UNIXTIME = 0x01 +RAR5_XTIME_HAS_MTIME = 0x02 +RAR5_XTIME_HAS_CTIME = 0x04 +RAR5_XTIME_HAS_ATIME = 0x08 + +RAR5_XENC_CIPHER_AES256 = 0 + +RAR5_XENC_CHECKVAL = 0x01 +RAR5_XENC_TWEAKED = 0x02 + +RAR5_XHASH_BLAKE2SP = 0 + +RAR5_XREDIR_UNIX_SYMLINK = 1 +RAR5_XREDIR_WINDOWS_SYMLINK = 2 +RAR5_XREDIR_WINDOWS_JUNCTION = 3 +RAR5_XREDIR_HARD_LINK = 4 +RAR5_XREDIR_FILE_COPY = 5 + +RAR5_XREDIR_ISDIR = 0x01 + +RAR5_XOWNER_UNAME = 0x01 +RAR5_XOWNER_GNAME = 0x02 +RAR5_XOWNER_UID = 0x04 +RAR5_XOWNER_GID = 0x08 + +RAR5_OS_WINDOWS = 0 +RAR5_OS_UNIX = 1 + ## ## internal constants ## RAR_ID = b"Rar!\x1a\x07\x00" -ZERO = b"\0" -EMPTY = b"" - -S_BLK_HDR = Struct(' 0 + class Error(Exception): """Base class for rarfile errors.""" + class BadRarFile(Error): """Incorrect data in archive.""" + class NotRarFile(Error): """The file is not RAR archive.""" + class BadRarName(Error): """Cannot guess multipart name components.""" + class NoRarEntry(Error): """File not found in RAR""" + class PasswordRequired(Error): """File requires password""" + class NeedFirstVolume(Error): """Need to start from first volume.""" + class NoCrypto(Error): """Cannot parse encrypted headers - no crypto available.""" + class RarExecError(Error): """Problem reported by unrar/rar.""" + class RarWarning(RarExecError): """Non-fatal error""" + class RarFatalError(RarExecError): """Fatal error""" + class RarCRCError(RarExecError): """CRC error during unpacking""" + class RarLockedArchiveError(RarExecError): """Must not modify locked archive""" + class RarWriteError(RarExecError): """Write error""" + class RarOpenError(RarExecError): """Open error""" + class RarUserError(RarExecError): """User error""" + class RarMemoryError(RarExecError): """Memory error""" + class RarCreateError(RarExecError): """Create error""" + class RarNoFilesError(RarExecError): """No files that match pattern were found""" + class RarUserBreak(RarExecError): """User stop""" + +class RarWrongPassword(RarExecError): + """Incorrect password""" + class RarUnknownError(RarExecError): """Unknown exit code""" + class RarSignalExit(RarExecError): """Unrar exited with signal""" + class RarCannotExec(RarExecError): """Executable not found.""" -def is_rarfile(xfile): - '''Check quickly whether file is rar archive.''' - fd = XFile(xfile) - buf = fd.read(len(RAR_ID)) - fd.close() - return buf == RAR_ID - - class RarInfo(object): r'''An entry in rar archive. - :mod:`zipfile`-compatible fields: - + RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone. + RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone. + + Attributes: + filename File name with relative path. - Default path separator is '\\', to change set rarfile.PATH_SEP. - Always unicode string. + Path separator is '/'. Always unicode string. + date_time - Modification time, tuple of (year, month, day, hour, minute, second). - Or datetime() object if USE_DATETIME is set. + File modification timestamp. As tuple of (year, month, day, hour, minute, second). + RAR5 allows archives where it is missing, it's None then. + file_size Uncompressed size. + compress_size Compressed size. - CRC - CRC-32 of uncompressed file, unsigned int. - comment - File comment. Byte string or None. Use UNICODE_COMMENTS - to get automatic decoding to unicode. - volume - Volume nr, starting from 0. - - RAR-specific fields: compress_type - Compression method: 0x30 - 0x35. + Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants. + extract_version - Minimal Rar version needed for decompressing. + Minimal Rar version needed for decompressing. As (major*10 + minor), + so 2.9 is 29. + + RAR3: 10, 20, 29 + + RAR5 does not have such field in archive, it's simply set to 50. + host_os Host OS type, one of RAR_OS_* constants. + + RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`, + :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`. + + RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`. + mode File attributes. May be either dos-style or unix-style, depending on host_os. - volume_file - Volume file name, where file starts. + mtime - Optional time field: Modification time, with float seconds. - Same as .date_time but with more precision. + File modification time. Same value as :attr:`date_time` + but as :class:`datetime.datetime` object with extended precision. + ctime - Optional time field: creation time, with float seconds. + Optional time field: creation time. As :class:`datetime.datetime` object. + atime - Optional time field: last access time, with float seconds. + Optional time field: last access time. As :class:`datetime.datetime` object. + arctime - Optional time field: archival time, with float seconds. + Optional time field: archival time. As :class:`datetime.datetime` object. + (RAR3-only) + + CRC + CRC-32 of uncompressed file, unsigned int. + + RAR5: may be None. + + blake2sp_hash + Blake2SP hash over decompressed data. (RAR5-only) - Internal fields: + comment + Optional file comment field. Unicode string. (RAR3-only) + + file_redir + If not None, file is link of some sort. Contains tuple of (type, flags, target). + (RAR5-only) + + Type is one of constants: + + :data:`RAR5_XREDIR_UNIX_SYMLINK` + unix symlink to target. + :data:`RAR5_XREDIR_WINDOWS_SYMLINK` + windows symlink to target. + :data:`RAR5_XREDIR_WINDOWS_JUNCTION` + windows junction. + :data:`RAR5_XREDIR_HARD_LINK` + hard link to target. + :data:`RAR5_XREDIR_FILE_COPY` + current file is copy of another archive entry. + + Flags may contain :data:`RAR5_XREDIR_ISDIR` bit. + + volume + Volume nr, starting from 0. + + volume_file + Volume file name, where file starts. - type - One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist(). - flags - For files, RAR_FILE_* bits. ''' - __slots__ = ( - # zipfile-compatible fields - 'filename', - 'file_size', - 'compress_size', - 'date_time', - 'comment', - 'CRC', - 'volume', - 'orig_filename', # bytes in unknown encoding - - # rar-specific fields - 'extract_version', - 'compress_type', - 'host_os', - 'mode', - 'type', - 'flags', - - # optional extended time fields - # tuple where the sec is float, or datetime(). - 'mtime', # same as .date_time - 'ctime', - 'atime', - 'arctime', - - # RAR internals - 'name_size', - 'header_size', - 'header_crc', - 'file_offset', - 'add_size', - 'header_data', - 'header_base', - 'header_offset', - 'salt', - 'volume_file', - ) + # zipfile-compatible fields + filename = None + file_size = None + compress_size = None + date_time = None + comment = None + CRC = None + volume = None + orig_filename = None + + # optional extended time fields, datetime() objects. + mtime = None + ctime = None + atime = None + + extract_version = None + mode = None + host_os = None + compress_type = None + + # rar3-only fields + comment = None + arctime = None + + # rar5-only fields + blake2sp_hash = None + file_redir = None + + # internal fields + flags = 0 + type = None def isdir(self): - '''Returns True if the entry is a directory.''' + """Returns True if entry is a directory. + """ if self.type == RAR_BLOCK_FILE: return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY return False def needs_password(self): - return (self.flags & RAR_FILE_PASSWORD) > 0 + """Returns True if data is stored password-protected. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_PASSWORD) > 0 + return False class RarFile(object): '''Parse RAR structure, provide access to files in archive. ''' - #: Archive comment. Byte string or None. Use :data:`UNICODE_COMMENTS` - #: to get automatic decoding to unicode. + #: Archive comment. Unicode string or None. comment = None def __init__(self, rarfile, mode="r", charset=None, info_callback=None, - crc_check = True, errors = "stop"): + crc_check=True, errors="stop"): """Open and parse a RAR archive. - + Parameters: rarfile @@ -463,18 +642,12 @@ class RarFile(object): Either "stop" to quietly stop parsing on errors, or "strict" to raise errors. Default is "stop". """ - self.rarfile = rarfile - self.comment = None + self._rarfile = rarfile self._charset = charset or DEFAULT_CHARSET self._info_callback = info_callback - - self._info_list = [] - self._info_map = {} - self._parse_error = None - self._needs_password = False - self._password = None self._crc_check = crc_check - self._vol_list = [] + self._password = None + self._file_parser = None if errors == "stop": self._strict = False @@ -483,8 +656,6 @@ class RarFile(object): else: raise ValueError("Invalid value for 'errors' parameter.") - self._main = None - if mode != "r": raise NotImplementedError("RarFile supports only mode=r") @@ -493,18 +664,23 @@ class RarFile(object): def __enter__(self): return self - def __exit__(self, type, value, traceback): + def __exit__(self, typ, value, traceback): self.close() def setpassword(self, password): '''Sets the password to use when extracting.''' self._password = password - if not self._main: + if self._file_parser: + if self._file_parser.has_header_encryption(): + self._file_parser = None + if not self._file_parser: self._parse() + else: + self._file_parser.setpassword(self._password) def needs_password(self): '''Returns True if any archive entries require password for extraction.''' - return self._needs_password + return self._file_parser.needs_password() def namelist(self): '''Return list of filenames in archive.''' @@ -512,7 +688,7 @@ class RarFile(object): def infolist(self): '''Return RarInfo objects for all files/directories in archive.''' - return self._info_list + return self._file_parser.infolist() def volumelist(self): '''Returns filenames of archive volumes. @@ -520,32 +696,17 @@ class RarFile(object): In case of single-volume archive, the list contains just the name of main archive file. ''' - return self._vol_list + return self._file_parser.volumelist() def getinfo(self, fname): - '''Return RarInfo for file.''' - - if isinstance(fname, RarInfo): - return fname - - # accept both ways here - if PATH_SEP == '/': - fname2 = fname.replace("\\", "/") - else: - fname2 = fname.replace("/", "\\") - - try: - return self._info_map[fname] - except KeyError: - try: - return self._info_map[fname2] - except KeyError: - raise NoRarEntry("No such file: "+fname) + '''Return RarInfo for file. + ''' + return self._file_parser.getinfo(fname) - def open(self, fname, mode = 'r', psw = None): + def open(self, fname, mode='r', psw=None): '''Returns file-like object (:class:`RarExtFile`), from where the data can be read. - + The object implements :class:`io.RawIOBase` interface, so it can be further wrapped with :class:`io.BufferedReader` and :class:`io.TextIOWrapper`. @@ -575,9 +736,6 @@ class RarFile(object): if inf.isdir(): raise TypeError("Directory does not have any data: " + inf.filename) - if inf.flags & RAR_FILE_SPLIT_BEFORE: - raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) - # check password if inf.needs_password(): psw = psw or self._password @@ -586,34 +744,11 @@ class RarFile(object): else: psw = None - # is temp write usable? - use_hack = 1 - if not self._main: - use_hack = 0 - elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): - use_hack = 0 - elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): - use_hack = 0 - elif is_filelike(self.rarfile): - pass - elif inf.file_size > HACK_SIZE_LIMIT: - use_hack = 0 - elif not USE_EXTRACT_HACK: - use_hack = 0 - - # now extract - if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0: - return self._open_clear(inf) - elif use_hack: - return self._open_hack(inf, psw) - elif is_filelike(self.rarfile): - return self._open_unrar_membuf(self.rarfile, inf, psw) - else: - return self._open_unrar(self.rarfile, inf, psw) + return self._file_parser.open(inf, psw) - def read(self, fname, psw = None): + def read(self, fname, psw=None): """Return uncompressed data for archive entry. - + For longer files using :meth:`RarFile.open` may be better idea. Parameters: @@ -624,11 +759,8 @@ class RarFile(object): password to use for extracting. """ - f = self.open(fname, 'r', psw) - try: + with self.open(fname, 'r', psw) as f: return f.read() - finally: - f.close() def close(self): """Release open resources.""" @@ -641,7 +773,7 @@ class RarFile(object): def extract(self, member, path=None, pwd=None): """Extract single file into current directory. - + Parameters: member @@ -659,7 +791,7 @@ class RarFile(object): def extractall(self, path=None, members=None, pwd=None): """Extract all files into current directory. - + Parameters: path @@ -684,77 +816,145 @@ class RarFile(object): cmd = [UNRAR_TOOL] + list(TEST_ARGS) add_password_arg(cmd, self._password) cmd.append('--') - - if is_filelike(self.rarfile): - tmpname = membuf_tempfile(self.rarfile) - cmd.append(tmpname) - else: - tmpname = None - cmd.append(self.rarfile) - - try: + with XTempFile(self._rarfile) as rarfile: + cmd.append(rarfile) p = custom_popen(cmd) output = p.communicate()[0] check_returncode(p, output) - finally: - if tmpname: - os.unlink(tmpname) def strerror(self): """Return error string if parsing failed, or None if no problems. """ - return self._parse_error + if not self._file_parser: + return "Not a RAR file" + return self._file_parser.strerror() ## ## private methods ## - def _set_error(self, msg, *args): - if args: - msg = msg % args - self._parse_error = msg - if self._strict: - raise BadRarFile(msg) + def _parse(self): + ver = _get_rar_version(self._rarfile) + if ver == 3: + p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback) + self._file_parser = p3 # noqa + elif ver == 5: + p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback) + self._file_parser = p5 # noqa + else: + raise BadRarFile("Not a RAR file") - # store entry - def _process_entry(self, item): - if item.type == RAR_BLOCK_FILE: - # use only first part - if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: - self._info_map[item.filename] = item - self._info_list.append(item) - # remember if any items require password - if item.needs_password(): - self._needs_password = True - elif len(self._info_list) > 0: - # final crc is in last block - old = self._info_list[-1] - old.CRC = item.CRC - old.compress_size += item.compress_size + self._file_parser.parse() + self.comment = self._file_parser.comment - # parse new-style comment - if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': - if not NEED_COMMENTS: - pass - elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): - pass - elif item.flags & RAR_FILE_SOLID: - # file comment - cmt = self._read_comment_v3(item, self._password) - if len(self._info_list) > 0: - old = self._info_list[-1] - old.comment = cmt - else: - # archive comment - cmt = self._read_comment_v3(item, self._password) - self.comment = cmt + # call unrar to extract a file + def _extract(self, fnlist, path=None, psw=None): + cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) - if self._info_callback: - self._info_callback(item) + # pasoword + psw = psw or self._password + add_password_arg(cmd, psw) + cmd.append('--') + + # rar file + with XTempFile(self._rarfile) as rarfn: + cmd.append(rarfn) + + # file list + for fn in fnlist: + if os.sep != PATH_SEP: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # destination path + if path is not None: + cmd.append(path + os.sep) + + # call + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + +# +# File format parsing +# + +class CommonParser(object): + """Shared parser parts.""" + _main = None + _hdrenc_main = None + _needs_password = False + _fd = None + _expect_sig = None + _parse_error = None + _password = None + comment = None + + def __init__(self, rarfile, password, crc_check, charset, strict, info_cb): + self._rarfile = rarfile + self._password = password + self._crc_check = crc_check + self._charset = charset + self._strict = strict + self._info_callback = info_cb + self._info_list = [] + self._info_map = {} + self._vol_list = [] + + def has_header_encryption(self): + """Returns True if headers are encrypted + """ + if self._hdrenc_main: + return True + if self._main: + if self._main.flags & RAR_MAIN_PASSWORD: + return True + return False + + def setpassword(self, psw): + """Set cached password.""" + self._password = psw + + def volumelist(self): + """Volume files""" + return self._vol_list + + def needs_password(self): + """Is password required""" + return self._needs_password + + def strerror(self): + """Last error""" + return self._parse_error + + def infolist(self): + """List of RarInfo records. + """ + return self._info_list + + def getinfo(self, fname): + """Return RarInfo for filename + """ + # accept both ways here + if PATH_SEP == '/': + fname2 = fname.replace("\\", "/") + else: + fname2 = fname.replace("/", "\\") + + try: + return self._info_map[fname] + except KeyError: + try: + return self._info_map[fname2] + except KeyError: + raise NoRarEntry("No such file: %s" % fname) # read rar - def _parse(self): + def parse(self): + """Process file.""" self._fd = None try: self._parse_real() @@ -764,19 +964,19 @@ class RarFile(object): self._fd = None def _parse_real(self): - fd = XFile(self.rarfile) + fd = XFile(self._rarfile) self._fd = fd - id = fd.read(len(RAR_ID)) - if id != RAR_ID: - if isinstance(self.rarfile, (str, unicode)): - raise NotRarFile("Not a Rar archive: {}".format(self.rarfile)) + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + if isinstance(self._rarfile, (str, unicode)): + raise NotRarFile("Not a Rar archive: {}".format(self._rarfile)) raise NotRarFile("Not a Rar archive") volume = 0 # first vol (.rar) is 0 - more_vols = 0 - endarc = 0 - volfile = self.rarfile - self._vol_list = [self.rarfile] + more_vols = False + endarc = False + volfile = self._rarfile + self._vol_list = [self._rarfile] while 1: if endarc: h = None # don't read past ENDARC @@ -793,8 +993,12 @@ class RarFile(object): self._set_error("Cannot open next volume: %s", volfile) break self._fd = fd - more_vols = 0 - endarc = 0 + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + self._set_error("Invalid volume sig: %s", volfile) + break + more_vols = False + endarc = False self._vol_list.append(volfile) continue break @@ -811,44 +1015,49 @@ class RarFile(object): if h.flags & RAR_MAIN_PASSWORD: self._needs_password = True if not self._password: - self._main = None break elif h.type == RAR_BLOCK_ENDARC: - more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME - endarc = 1 + more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0 + endarc = True elif h.type == RAR_BLOCK_FILE: # RAR 2.x does not write RAR_BLOCK_ENDARC if h.flags & RAR_FILE_SPLIT_AFTER: - more_vols = 1 + more_vols = True # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: raise NeedFirstVolume("Need to start from first volume") + if h.needs_password(): + self._needs_password = True + # store it - self._process_entry(h) + self.process_entry(fd, h) + + if self._info_callback: + self._info_callback(h) # go to next header if h.add_size > 0: - fd.seek(h.file_offset + h.add_size, 0) + fd.seek(h.data_offset + h.add_size, 0) + + def process_entry(self, fd, item): + """Examine item, add into lookup cache.""" + raise NotImplementedError() - # AES encrypted headers - _last_aes_key = (None, None, None) # (salt, key, iv) def _decrypt_header(self, fd): - if not _have_crypto: - raise NoCrypto('Cannot parse encrypted headers - no crypto') - salt = fd.read(8) - if self._last_aes_key[0] == salt: - key, iv = self._last_aes_key[1:] - else: - key, iv = rar3_s2k(self._password, salt) - self._last_aes_key = (salt, key, iv) - return HeaderDecrypt(fd, key, iv) + raise NotImplementedError('_decrypt_header') + + def _parse_block_header(self, fd): + raise NotImplementedError('_parse_block_header') + + def _open_hack(self, inf, psw): + raise NotImplementedError('_open_hack') # read single header def _parse_header(self, fd): try: # handle encrypted headers - if self._main and self._main.flags & RAR_MAIN_PASSWORD: + if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main: if not self._password: return fd = self._decrypt_header(fd) @@ -859,11 +1068,168 @@ class RarFile(object): self._set_error('Broken header in RAR file') return None + # given current vol name, construct next one + def _next_volname(self, volfile): + if is_filelike(volfile): + raise IOError("Working on single FD") + if self._main.flags & RAR_MAIN_NEWNUMBERING: + return _next_newvol(volfile) + return _next_oldvol(volfile) + + def _set_error(self, msg, *args): + if args: + msg = msg % args + self._parse_error = msg + if self._strict: + raise BadRarFile(msg) + + def open(self, inf, psw): + """Return stream object for file data.""" + + if inf.file_redir: + # cannot leave to unrar as it expects copied file to exist + if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): + inf = self.getinfo(inf.file_redir[2]) + if not inf: + raise BadRarFile('cannot find copied file') + + if inf.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + + # is temp write usable? + use_hack = 1 + if not self._main: + use_hack = 0 + elif self._main._must_disable_hack(): + use_hack = 0 + elif inf._must_disable_hack(): + use_hack = 0 + elif is_filelike(self._rarfile): + pass + elif inf.file_size > HACK_SIZE_LIMIT: + use_hack = 0 + elif not USE_EXTRACT_HACK: + use_hack = 0 + + # now extract + if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None: + return self._open_clear(inf) + elif use_hack: + return self._open_hack(inf, psw) + elif is_filelike(self._rarfile): + return self._open_unrar_membuf(self._rarfile, inf, psw) + else: + return self._open_unrar(self._rarfile, inf, psw) + + def _open_clear(self, inf): + return DirectReader(self, inf) + + def _open_hack_core(self, inf, psw, prefix, suffix): + + size = inf.compress_size + inf.header_size + rf = XFile(inf.volume_file, 0) + rf.seek(inf.header_offset) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + tmpf.write(prefix) + while size > 0: + if size > BSIZE: + buf = rf.read(BSIZE) + else: + buf = rf.read(size) + if not buf: + raise BadRarFile('read failed: ' + inf.filename) + tmpf.write(buf) + size -= len(buf) + tmpf.write(suffix) + tmpf.close() + rf.close() + except: + rf.close() + tmpf.close() + os.unlink(tmpname) + raise + + return self._open_unrar(tmpname, inf, psw, tmpname) + + # write in-memory archive to temp file - needed for solid archives + def _open_unrar_membuf(self, memfile, inf, psw): + tmpname = membuf_tempfile(memfile) + return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True) + + # extract using unrar + def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False): + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + add_password_arg(cmd, psw) + cmd.append("--") + cmd.append(rarfile) + + # not giving filename avoids encoding related problems + if not tmpfile or force_file: + fn = inf.filename + if PATH_SEP != os.sep: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # read from unrar pipe + return PipeReader(self, inf, cmd, tmpfile) + +# +# RAR3 format +# + +class Rar3Info(RarInfo): + """RAR3 specific fields.""" + extract_version = 15 + salt = None + add_size = 0 + header_crc = None + header_size = None + header_offset = None + data_offset = None + _md_class = None + _md_expect = None + + # make sure some rar5 fields are always present + file_redir = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.type == RAR_BLOCK_FILE: + if self.flags & RAR_FILE_PASSWORD: + return True + elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + return True + elif self.type == RAR_BLOCK_MAIN: + if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): + return True + return False + + +class RAR3Parser(CommonParser): + """Parse RAR3 file format. + """ + _expect_sig = RAR_ID + _last_aes_key = (None, None, None) # (salt, key, iv) + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + salt = fd.read(8) + if self._last_aes_key[0] == salt: + key, iv = self._last_aes_key[1:] + else: + key, iv = rar3_s2k(self._password, salt) + self._last_aes_key = (salt, key, iv) + return HeaderDecrypt(fd, key, iv) + # common header def _parse_block_header(self, fd): - h = RarInfo() + h = Rar3Info() h.header_offset = fd.tell() - h.comment = None # read and parse base header buf = fd.read(S_BLK_HDR.size) @@ -871,24 +1237,24 @@ class RarFile(object): return None t = S_BLK_HDR.unpack_from(buf) h.header_crc, h.type, h.flags, h.header_size = t - h.header_base = S_BLK_HDR.size - pos = S_BLK_HDR.size # read full header if h.header_size > S_BLK_HDR.size: - h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size) + hdata = buf + fd.read(h.header_size - S_BLK_HDR.size) else: - h.header_data = buf - h.file_offset = fd.tell() + hdata = buf + h.data_offset = fd.tell() # unexpected EOF? - if len(h.header_data) != h.header_size: + if len(hdata) != h.header_size: self._set_error('Unexpected EOF when reading header') return None + pos = S_BLK_HDR.size + # block has data assiciated with it? if h.flags & RAR_LONG_BLOCK: - h.add_size = S_LONG.unpack_from(h.header_data, pos)[0] + h.add_size, pos = load_le32(hdata, pos) else: h.add_size = 0 @@ -896,31 +1262,36 @@ class RarFile(object): if h.type == RAR_BLOCK_MARK: return h elif h.type == RAR_BLOCK_MAIN: - h.header_base += 6 + pos += 6 if h.flags & RAR_MAIN_ENCRYPTVER: - h.header_base += 1 + pos += 1 + crc_pos = pos if h.flags & RAR_MAIN_COMMENT: - self._parse_subblocks(h, h.header_base) - self.comment = h.comment + self._parse_subblocks(h, hdata, pos) elif h.type == RAR_BLOCK_FILE: - self._parse_file_header(h, pos) + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = pos + if h.flags & RAR_FILE_COMMENT: + pos = self._parse_subblocks(h, hdata, pos) elif h.type == RAR_BLOCK_SUB: - self._parse_file_header(h, pos) - h.header_base = h.header_size + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = h.header_size elif h.type == RAR_BLOCK_OLD_AUTH: - h.header_base += 8 + pos += 8 + crc_pos = pos elif h.type == RAR_BLOCK_OLD_EXTRA: - h.header_base += 7 + pos += 7 + crc_pos = pos else: - h.header_base = h.header_size + crc_pos = h.header_size # check crc if h.type == RAR_BLOCK_OLD_SUB: - crcdat = h.header_data[2:] + fd.read(h.add_size) + crcdat = hdata[2:] + fd.read(h.add_size) else: - crcdat = h.header_data[2:h.header_base] + crcdat = hdata[2:crc_pos] - calc_crc = crc32(crcdat) & 0xFFFF + calc_crc = rar_crc32(crcdat) & 0xFFFF # return good header if h.header_crc == calc_crc: @@ -928,39 +1299,42 @@ class RarFile(object): # header parsing failed. self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)', - h.type, h.header_crc, calc_crc, len(crcdat)) + h.type, h.header_crc, calc_crc, len(crcdat)) # instead panicing, send eof return None # read file-specific header - def _parse_file_header(self, h, pos): - fld = S_FILE_HDR.unpack_from(h.header_data, pos) + def _parse_file_header(self, h, hdata, pos): + fld = S_FILE_HDR.unpack_from(hdata, pos) + pos += S_FILE_HDR.size + h.compress_size = fld[0] h.file_size = fld[1] h.host_os = fld[2] h.CRC = fld[3] h.date_time = parse_dos_time(fld[4]) + h.mtime = to_datetime(h.date_time) h.extract_version = fld[5] h.compress_type = fld[6] - h.name_size = fld[7] + name_size = fld[7] h.mode = fld[8] - pos += S_FILE_HDR.size + + h._md_class = CRC32Context + h._md_expect = h.CRC if h.flags & RAR_FILE_LARGE: - h1 = S_LONG.unpack_from(h.header_data, pos)[0] - h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0] + h1, pos = load_le32(hdata, pos) + h2, pos = load_le32(hdata, pos) h.compress_size |= h1 << 32 h.file_size |= h2 << 32 - pos += 8 h.add_size = h.compress_size - name = h.header_data[pos : pos + h.name_size ] - pos += h.name_size + name, pos = load_bytes(hdata, name_size, pos) if h.flags & RAR_FILE_UNICODE: nul = name.find(ZERO) h.orig_filename = name[:nul] - u = UnicodeFilename(h.orig_filename, name[nul + 1 : ]) + u = UnicodeFilename(h.orig_filename, name[nul + 1:]) h.filename = u.decode() # if parsing failed fall back to simple name @@ -975,222 +1349,65 @@ class RarFile(object): h.filename = h.filename.replace('\\', PATH_SEP) if h.flags & RAR_FILE_SALT: - h.salt = h.header_data[pos : pos + 8] - pos += 8 + h.salt, pos = load_bytes(hdata, 8, pos) else: h.salt = None # optional extended time stamps if h.flags & RAR_FILE_EXTTIME: - pos = self._parse_ext_time(h, pos) + pos = _parse_ext_time(h, hdata, pos) else: h.mtime = h.atime = h.ctime = h.arctime = None - # base header end - h.header_base = pos - - if h.flags & RAR_FILE_COMMENT: - self._parse_subblocks(h, pos) - - # convert timestamps - if USE_DATETIME: - h.date_time = to_datetime(h.date_time) - h.mtime = to_datetime(h.mtime) - h.atime = to_datetime(h.atime) - h.ctime = to_datetime(h.ctime) - h.arctime = to_datetime(h.arctime) - - # .mtime is .date_time with more precision - if h.mtime: - if USE_DATETIME: - h.date_time = h.mtime - else: - # keep seconds int - h.date_time = h.mtime[:5] + (int(h.mtime[5]),) - return pos # find old-style comment subblock - def _parse_subblocks(self, h, pos): - hdata = h.header_data + def _parse_subblocks(self, h, hdata, pos): while pos < len(hdata): # ordinary block header t = S_BLK_HDR.unpack_from(hdata, pos) - scrc, stype, sflags, slen = t + ___scrc, stype, sflags, slen = t pos_next = pos + slen pos += S_BLK_HDR.size # corrupt header - if pos_next < pos: - break - - # followed by block-specific header - if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: - declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) - pos += S_COMMENT_HDR.size - data = hdata[pos : pos_next] - cmt = rar_decompress(ver, meth, data, declen, sflags, - crc, self._password) - if not self._crc_check: - h.comment = self._decode_comment(cmt) - elif crc32(cmt) & 0xFFFF == crc: - h.comment = self._decode_comment(cmt) - - pos = pos_next - - def _parse_ext_time(self, h, pos): - data = h.header_data - - # flags and rest of data can be missing - flags = 0 - if pos + 2 <= len(data): - flags = S_SHORT.unpack_from(data, pos)[0] - pos += 2 - - h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time) - h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos) - h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos) - h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos) - return pos - - def _parse_xtime(self, flag, data, pos, dostime = None): - unit = 10000000.0 # 100 ns units - if flag & 8: - if not dostime: - t = S_LONG.unpack_from(data, pos)[0] - dostime = parse_dos_time(t) - pos += 4 - rem = 0 - cnt = flag & 3 - for i in range(cnt): - b = S_BYTE.unpack_from(data, pos)[0] - rem = (b << 16) | (rem >> 8) - pos += 1 - sec = dostime[5] + rem / unit - if flag & 4: - sec += 1 - dostime = dostime[:5] + (sec,) - return dostime, pos - - # given current vol name, construct next one - def _next_volname(self, volfile): - if is_filelike(volfile): - raise IOError("Working on single FD") - if self._main.flags & RAR_MAIN_NEWNUMBERING: - return self._next_newvol(volfile) - return self._next_oldvol(volfile) - - # new-style next volume - def _next_newvol(self, volfile): - i = len(volfile) - 1 - while i >= 0: - if volfile[i] >= '0' and volfile[i] <= '9': - return self._inc_volname(volfile, i) - i -= 1 - raise BadRarName("Cannot construct volume name: "+volfile) - - # old-style next volume - def _next_oldvol(self, volfile): - # rar -> r00 - if volfile[-4:].lower() == '.rar': - return volfile[:-2] + '00' - return self._inc_volname(volfile, len(volfile) - 1) - - # increase digits with carry, otherwise just increment char - def _inc_volname(self, volfile, i): - fn = list(volfile) - while i >= 0: - if fn[i] != '9': - fn[i] = chr(ord(fn[i]) + 1) - break - fn[i] = '0' - i -= 1 - return ''.join(fn) - - def _open_clear(self, inf): - return DirectReader(self, inf) - - # put file compressed data into temporary .rar archive, and run - # unrar on that, thus avoiding unrar going over whole archive - def _open_hack(self, inf, psw = None): - BSIZE = 32*1024 - - size = inf.compress_size + inf.header_size - rf = XFile(inf.volume_file, 0) - rf.seek(inf.header_offset) - - tmpfd, tmpname = mkstemp(suffix='.rar') - tmpf = os.fdopen(tmpfd, "wb") + if pos_next < pos: + break - try: - # create main header: crc, type, flags, size, res1, res2 - mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4) - tmpf.write(RAR_ID + mh) - while size > 0: - if size > BSIZE: - buf = rf.read(BSIZE) - else: - buf = rf.read(size) - if not buf: - raise BadRarFile('read failed: ' + inf.filename) - tmpf.write(buf) - size -= len(buf) - tmpf.close() - rf.close() - except: - rf.close() - tmpf.close() - os.unlink(tmpname) - raise + # followed by block-specific header + if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: + declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) + pos += S_COMMENT_HDR.size + data = hdata[pos : pos_next] + cmt = rar3_decompress(ver, meth, data, declen, sflags, + crc, self._password) + if not self._crc_check: + h.comment = self._decode_comment(cmt) + elif rar_crc32(cmt) & 0xFFFF == crc: + h.comment = self._decode_comment(cmt) - return self._open_unrar(tmpname, inf, psw, tmpname) + pos = pos_next + return pos def _read_comment_v3(self, inf, psw=None): # read data - rf = XFile(inf.volume_file) - rf.seek(inf.file_offset) - data = rf.read(inf.compress_size) - rf.close() + with XFile(inf.volume_file) as rf: + rf.seek(inf.data_offset) + data = rf.read(inf.compress_size) # decompress - cmt = rar_decompress(inf.extract_version, inf.compress_type, data, - inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + cmt = rar3_decompress(inf.extract_version, inf.compress_type, data, + inf.file_size, inf.flags, inf.CRC, psw, inf.salt) # check crc if self._crc_check: - crc = crc32(cmt) - if crc < 0: - crc += (1 << 32) + crc = rar_crc32(cmt) if crc != inf.CRC: return None return self._decode_comment(cmt) - # write in-memory archive to temp file - needed for solid archives - def _open_unrar_membuf(self, memfile, inf, psw): - tmpname = membuf_tempfile(memfile) - return self._open_unrar(tmpname, inf, psw, tmpname) - - # extract using unrar - def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None): - if is_filelike(rarfile): - raise ValueError("Cannot use unrar directly on memory buffer") - cmd = [UNRAR_TOOL] + list(OPEN_ARGS) - add_password_arg(cmd, psw) - cmd.append("--") - cmd.append(rarfile) - - # not giving filename avoids encoding related problems - if not tmpfile: - fn = inf.filename - if PATH_SEP != os.sep: - fn = fn.replace(PATH_SEP, os.sep) - cmd.append(fn) - - # read from unrar pipe - return PipeReader(self, inf, cmd, tmpfile) - def _decode(self, val): for c in TRY_ENCODINGS: try: @@ -1200,53 +1417,466 @@ class RarFile(object): return val.decode(self._charset, 'replace') def _decode_comment(self, val): - if UNICODE_COMMENTS: - return self._decode(val) - return val + return self._decode(val) - # call unrar to extract a file - def _extract(self, fnlist, path=None, psw=None): - cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) + def process_entry(self, fd, item): + if item.type == RAR_BLOCK_FILE: + # use only first part + if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.compress_size += item.compress_size - # pasoword - psw = psw or self._password - add_password_arg(cmd, psw) - cmd.append('--') + # parse new-style comment + if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': + if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + pass + elif item.flags & RAR_FILE_SOLID: + # file comment + cmt = self._read_comment_v3(item, self._password) + if len(self._info_list) > 0: + old = self._info_list[-1] + old.comment = cmt + else: + # archive comment + cmt = self._read_comment_v3(item, self._password) + self.comment = cmt - # rar file - if is_filelike(self.rarfile): - tmpname = membuf_tempfile(self.rarfile) - cmd.append(tmpname) + if item.type == RAR_BLOCK_MAIN: + if item.flags & RAR_MAIN_COMMENT: + self.comment = item.comment + if item.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + + # put file compressed data into temporary .rar archive, and run + # unrar on that, thus avoiding unrar going over whole archive + def _open_hack(self, inf, psw): + # create main header: crc, type, flags, size, res1, res2 + prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4) + return self._open_hack_core(inf, psw, prefix, EMPTY) + +# +# RAR5 format +# + +class Rar5Info(RarInfo): + """Shared fields for RAR5 records. + """ + extract_version = 50 + header_crc = None + header_size = None + header_offset = None + data_offset = None + + # type=all + block_type = None + block_flags = None + add_size = 0 + block_extra_size = 0 + + # type=MAIN + volume_number = None + _md_class = None + _md_expect = None + + def _must_disable_hack(self): + return False + + +class Rar5BaseFile(Rar5Info): + """Shared sturct for file & service record. + """ + type = -1 + file_flags = None + file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY) + file_compress_flags = None + file_redir = None + file_owner = None + file_version = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.flags & RAR_FILE_PASSWORD: + return True + if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return True + if self.file_compress_flags & RAR5_COMPR_SOLID: + return True + if self.file_redir: + return True + return False + + +class Rar5FileInfo(Rar5BaseFile): + """RAR5 file record. + """ + type = RAR_BLOCK_FILE + + +class Rar5ServiceInfo(Rar5BaseFile): + """RAR5 service record. + """ + type = RAR_BLOCK_SUB + + +class Rar5MainInfo(Rar5Info): + """RAR5 archive main record. + """ + type = RAR_BLOCK_MAIN + main_flags = None + main_volume_number = None + + def _must_disable_hack(self): + if self.main_flags & RAR5_MAIN_FLAG_SOLID: + return True + return False + + +class Rar5EncryptionInfo(Rar5Info): + """RAR5 archive header encryption record. + """ + type = RAR5_BLOCK_ENCRYPTION + encryption_algo = None + encryption_flags = None + encryption_kdf_count = None + encryption_salt = None + encryption_check_value = None + + def needs_password(self): + return True + + +class Rar5EndArcInfo(Rar5Info): + """RAR5 end of archive record. + """ + type = RAR_BLOCK_ENDARC + endarc_flags = None + + +class RAR5Parser(CommonParser): + """Parse RAR5 format. + """ + _expect_sig = RAR5_ID + _hdrenc_main = None + + # AES encrypted headers + _last_aes256_key = (-1, None, None) # (kdf_count, salt, key) + + def _gen_key(self, kdf_count, salt): + if self._last_aes256_key[:2] == (kdf_count, salt): + return self._last_aes256_key[2] + if kdf_count > 24: + raise BadRarFile('Too large kdf_count') + psw = self._password + if isinstance(psw, unicode): + psw = psw.encode('utf8') + key = pbkdf2_sha256(psw, salt, 1 << kdf_count) + self._last_aes256_key = (kdf_count, salt, key) + return key + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + h = self._hdrenc_main + key = self._gen_key(h.encryption_kdf_count, h.encryption_salt) + iv = fd.read(16) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + header_offset = fd.tell() + + preload = 4 + 3 + start_bytes = fd.read(preload) + header_crc, pos = load_le32(start_bytes, 0) + hdrlen, pos = load_vint(start_bytes, pos) + if hdrlen > 2 * 1024 * 1024: + return None + header_size = pos + hdrlen + + # read full header, check for EOF + hdata = start_bytes + fd.read(header_size - len(start_bytes)) + if len(hdata) != header_size: + self._set_error('Unexpected EOF when reading header') + return None + data_offset = fd.tell() + + calc_crc = rar_crc32(memoryview(hdata)[4:]) + if header_crc != calc_crc: + # header parsing failed. + self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)', + header_crc, calc_crc, len(hdata)) + return None + + block_type, pos = load_vint(hdata, pos) + + if block_type == RAR5_BLOCK_MAIN: + h, pos = self._parse_block_common(Rar5MainInfo(), hdata) + h = self._parse_main_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_FILE: + h, pos = self._parse_block_common(Rar5FileInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_SERVICE: + h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENCRYPTION: + h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata) + h = self._parse_encryption_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENDARC: + h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata) + h = self._parse_endarc_block(h, hdata, pos) + else: + h = None + if h: + h.header_offset = header_offset + h.data_offset = data_offset + return h + + def _parse_block_common(self, h, hdata): + h.header_crc, pos = load_le32(hdata, 0) + hdrlen, pos = load_vint(hdata, pos) + h.header_size = hdrlen + pos + h.block_type, pos = load_vint(hdata, pos) + h.block_flags, pos = load_vint(hdata, pos) + + if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA: + h.block_extra_size, pos = load_vint(hdata, pos) + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.add_size, pos = load_vint(hdata, pos) + + h.compress_size = h.add_size + + if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN: + h.flags |= RAR_SKIP_IF_UNKNOWN + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.flags |= RAR_LONG_BLOCK + return h, pos + + def _parse_main_block(self, h, hdata, pos): + h.main_flags, pos = load_vint(hdata, pos) + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR: + h.main_volume_number = load_vint(hdata, pos) + + h.flags |= RAR_MAIN_NEWNUMBERING + if h.main_flags & RAR5_MAIN_FLAG_SOLID: + h.flags |= RAR_MAIN_SOLID + if h.main_flags & RAR5_MAIN_FLAG_ISVOL: + h.flags |= RAR_MAIN_VOLUME + if h.main_flags & RAR5_MAIN_FLAG_RECOVERY: + h.flags |= RAR_MAIN_RECOVERY + if self._hdrenc_main: + h.flags |= RAR_MAIN_PASSWORD + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0: + h.flags |= RAR_MAIN_FIRSTVOLUME + + return h + + def _parse_file_block(self, h, hdata, pos): + h.file_flags, pos = load_vint(hdata, pos) + h.file_size, pos = load_vint(hdata, pos) + h.mode, pos = load_vint(hdata, pos) + + if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME: + h.mtime, pos = load_unixtime(hdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32: + h.CRC, pos = load_le32(hdata, pos) + h._md_class = CRC32Context + h._md_expect = h.CRC + + h.file_compress_flags, pos = load_vint(hdata, pos) + h.file_host_os, pos = load_vint(hdata, pos) + h.orig_filename, pos = load_vstr(hdata, pos) + h.filename = h.orig_filename.decode('utf8', 'replace') + + # use compatible values + if h.file_host_os == RAR5_OS_WINDOWS: + h.host_os = RAR_OS_WIN32 + else: + h.host_os = RAR_OS_UNIX + h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7) + + if h.block_extra_size: + # allow 1 byte of garbage + while pos < len(hdata) - 1: + xsize, pos = load_vint(hdata, pos) + xdata, pos = load_bytes(hdata, xsize, pos) + self._process_file_extra(h, xdata) + + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE: + h.flags |= RAR_FILE_SPLIT_BEFORE + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER: + h.flags |= RAR_FILE_SPLIT_AFTER + if h.file_flags & RAR5_FILE_FLAG_ISDIR: + h.flags |= RAR_FILE_DIRECTORY + if h.file_compress_flags & RAR5_COMPR_SOLID: + h.flags |= RAR_FILE_SOLID + + return h + + def _parse_endarc_block(self, h, hdata, pos): + h.endarc_flags, pos = load_vint(hdata, pos) + if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL: + h.flags |= RAR_ENDARC_NEXT_VOLUME + return h + + def _parse_encryption_block(self, h, hdata, pos): + h.encryption_algo, pos = load_vint(hdata, pos) + h.encryption_flags, pos = load_vint(hdata, pos) + h.encryption_kdf_count, pos = load_byte(hdata, pos) + h.encryption_salt, pos = load_bytes(hdata, 16, pos) + if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: + h.encryption_check_value = load_bytes(hdata, 12, pos) + if h.encryption_algo != RAR5_XENC_CIPHER_AES256: + raise BadRarFile('Unsupported header encryption cipher') + self._hdrenc_main = h + return h + + # file extra record + def _process_file_extra(self, h, xdata): + xtype, pos = load_vint(xdata, 0) + if xtype == RAR5_XFILE_TIME: + self._parse_file_xtime(h, xdata, pos) + elif xtype == RAR5_XFILE_ENCRYPTION: + self._parse_file_encryption(h, xdata, pos) + elif xtype == RAR5_XFILE_HASH: + self._parse_file_hash(h, xdata, pos) + elif xtype == RAR5_XFILE_VERSION: + self._parse_file_version(h, xdata, pos) + elif xtype == RAR5_XFILE_REDIR: + self._parse_file_redir(h, xdata, pos) + elif xtype == RAR5_XFILE_OWNER: + self._parse_file_owner(h, xdata, pos) + elif xtype == RAR5_XFILE_SERVICE: + pass else: - tmpname = None - cmd.append(self.rarfile) + pass - # file list - for fn in fnlist: - if os.sep != PATH_SEP: - fn = fn.replace(PATH_SEP, os.sep) - cmd.append(fn) + # extra block for file time record + def _parse_file_xtime(self, h, xdata, pos): + tflags, pos = load_vint(xdata, pos) + ldr = load_windowstime + if tflags & RAR5_XTIME_UNIXTIME: + ldr = load_unixtime + if tflags & RAR5_XTIME_HAS_MTIME: + h.mtime, pos = ldr(xdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if tflags & RAR5_XTIME_HAS_CTIME: + h.ctime, pos = ldr(xdata, pos) + if tflags & RAR5_XTIME_HAS_ATIME: + h.atime, pos = ldr(xdata, pos) + + # just remember encryption info + def _parse_file_encryption(self, h, xdata, pos): + algo, pos = load_vint(xdata, pos) + flags, pos = load_vint(xdata, pos) + kdf_count, pos = load_byte(xdata, pos) + salt, pos = load_bytes(xdata, 16, pos) + iv, pos = load_bytes(xdata, 16, pos) + checkval = None + if flags & RAR5_XENC_CHECKVAL: + checkval, pos = load_bytes(xdata, 12, pos) + if flags & RAR5_XENC_TWEAKED: + h._md_expect = None + h._md_class = NoHashContext + + h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval) + h.flags |= RAR_FILE_PASSWORD + + def _parse_file_hash(self, h, xdata, pos): + hash_type, pos = load_vint(xdata, pos) + if hash_type == RAR5_XHASH_BLAKE2SP: + h.blake2sp_hash, pos = load_bytes(xdata, 32, pos) + if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: + h._md_class = Blake2SP + h._md_expect = h.blake2sp_hash + + def _parse_file_version(self, h, xdata, pos): + flags, pos = load_vint(xdata, pos) + version, pos = load_vint(xdata, pos) + h.file_version = (flags, version) + + def _parse_file_redir(self, h, xdata, pos): + redir_type, pos = load_vint(xdata, pos) + redir_flags, pos = load_vint(xdata, pos) + redir_name, pos = load_vstr(xdata, pos) + redir_name = redir_name.decode('utf8', 'replace') + h.file_redir = (redir_type, redir_flags, redir_name) + + def _parse_file_owner(self, h, xdata, pos): + user_name = group_name = user_id = group_id = None + + flags, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_UNAME: + user_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_GNAME: + group_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_UID: + user_id, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_GID: + group_id, pos = load_vint(xdata, pos) + + h.file_owner = (user_name, group_name, user_id, group_id) + + def process_entry(self, fd, item): + if item.block_type == RAR5_BLOCK_FILE: + # use only first part + if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.blake2sp_hash = item.blake2sp_hash + old.compress_size += item.compress_size + elif item.block_type == RAR5_BLOCK_SERVICE: + if item.filename == 'CMT': + self._load_comment(fd, item) - # destination path - if path is not None: - cmd.append(path + os.sep) + def _load_comment(self, fd, item): + if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return None + if item.compress_type != RAR_M0: + return None - # call - try: - p = custom_popen(cmd) - output = p.communicate()[0] - check_returncode(p, output) - finally: - if tmpname: - os.unlink(tmpname) + if item.flags & RAR_FILE_PASSWORD: + algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption + if algo != RAR5_XENC_CIPHER_AES256: + return None + key = self._gen_key(kdf_count, salt) + f = HeaderDecrypt(fd, key, iv) + cmt = f.read(item.file_size) + else: + # archive comment + with self._open_clear(item) as cmtstream: + cmt = cmtstream.read() + + # rar bug? - appends zero to comment + cmt = cmt.split(ZERO, 1)[0] + self.comment = cmt.decode('utf8') + + def _open_hack(self, inf, psw): + # len, type, blk_flags, flags + main_hdr = b'\x03\x01\x00\x00' + endarc_hdr = b'\x03\x05\x00\x00' + main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr + endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr + return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr) ## ## Utility classes ## class UnicodeFilename(object): - """Handle unicode filename decompression""" - + """Handle RAR3 unicode filename decompression. + """ def __init__(self, name, encdata): self.std_name = bytearray(name) self.encdata = bytearray(encdata) @@ -1255,6 +1885,7 @@ class UnicodeFilename(object): self.failed = 0 def enc_byte(self): + """Copy encoded byte.""" try: c = self.encdata[self.encpos] self.encpos += 1 @@ -1264,6 +1895,7 @@ class UnicodeFilename(object): return 0 def std_byte(self): + """Copy byte from 8-bit representation.""" try: return self.std_name[self.pos] except IndexError: @@ -1271,11 +1903,13 @@ class UnicodeFilename(object): return ord('?') def put(self, lo, hi): + """Copy 16-bit value to result.""" self.buf.append(lo) self.buf.append(hi) self.pos += 1 def decode(self): + """Decompress compressed UTF16 value.""" hi = self.enc_byte() flagbits = 0 while self.encpos < len(self.encdata): @@ -1294,11 +1928,11 @@ class UnicodeFilename(object): n = self.enc_byte() if n & 0x80: c = self.enc_byte() - for i in range((n & 0x7f) + 2): + for _ in range((n & 0x7f) + 2): lo = (self.std_byte() + c) & 0xFF self.put(lo, hi) else: - for i in range(n + 2): + for _ in range(n + 2): self.put(self.std_byte(), 0) return self.buf.decode("utf-16le", "replace") @@ -1311,77 +1945,76 @@ class RarExtFile(RawIOBase): Behaviour: - no short reads - .read() and .readinfo() read as much as requested. - no internal buffer, use io.BufferedReader for that. - - If :mod:`io` module is available (Python 2.6+, 3.x), then this calls - will inherit from :class:`io.RawIOBase` class. This makes line-based - access available: :meth:`RarExtFile.readline` and ``for ln in f``. """ #: Filename of the archive entry name = None - def __init__(self, rf, inf): + def __init__(self, parser, inf): super(RarExtFile, self).__init__() # standard io.* properties self.name = inf.filename self.mode = 'rb' - self.rf = rf - self.inf = inf - self.crc_check = rf._crc_check - self.fd = None - self.CRC = 0 - self.remain = 0 - self.returncode = 0 + self._parser = parser + self._inf = inf + self._fd = None + self._remain = 0 + self._returncode = 0 + + self._md_context = None self._open() def _open(self): - if self.fd: - self.fd.close() - self.fd = None - self.CRC = 0 - self.remain = self.inf.file_size + if self._fd: + self._fd.close() + md_class = self._inf._md_class or NoHashContext + self._md_context = md_class() + self._fd = None + self._remain = self._inf.file_size - def read(self, cnt = None): + def read(self, cnt=None): """Read all or specified amount of data from archive entry.""" # sanitize cnt if cnt is None or cnt < 0: - cnt = self.remain - elif cnt > self.remain: - cnt = self.remain + cnt = self._remain + elif cnt > self._remain: + cnt = self._remain if cnt == 0: return EMPTY # actual read data = self._read(cnt) if data: - self.CRC = crc32(data, self.CRC) - self.remain -= len(data) + self._md_context.update(data) + self._remain -= len(data) if len(data) != cnt: raise BadRarFile("Failed the read enough data") # done? - if not data or self.remain == 0: - #self.close() + if not data or self._remain == 0: + # self.close() self._check() return data def _check(self): """Check final CRC.""" - if not self.crc_check: + final = self._md_context.digest() + exp = self._inf._md_expect + if exp is None: return - if self.returncode: + if final is None: + return + if self._returncode: check_returncode(self, '') - if self.remain != 0: + if self._remain != 0: raise BadRarFile("Failed the read enough data") - crc = self.CRC - if crc < 0: - crc += (1 << 32) - if crc != self.inf.CRC: - raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename) + if final != exp: + raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % ( + self._inf.filename, exp, final)) def _read(self, cnt): """Actual read that gets sanitized cnt.""" @@ -1391,9 +2024,9 @@ class RarExtFile(RawIOBase): super(RarExtFile, self).close() - if self.fd: - self.fd.close() - self.fd = None + if self._fd: + self._fd.close() + self._fd = None def __del__(self): """Hook delete to make sure tempfile is removed.""" @@ -1404,25 +2037,15 @@ class RarExtFile(RawIOBase): Returns bytes read. """ - - data = self.read(len(buf)) - n = len(data) - try: - buf[:n] = data - except TypeError: - import array - if not isinstance(buf, array.array): - raise - buf[:n] = array.array(buf.typecode, data) - return n + raise NotImplementedError('readinto') def tell(self): """Return current reading position in uncompressed data.""" - return self.inf.file_size - self.remain + return self._inf.file_size - self._remain - def seek(self, ofs, whence = 0): + def seek(self, ofs, whence=0): """Seek in data. - + On uncompressed files, the seeking works by actual seeks so it's fast. On compresses files its slow - forward seeking happends by reading ahead, @@ -1430,9 +2053,9 @@ class RarExtFile(RawIOBase): """ # disable crc check when seeking - self.crc_check = 0 + self._md_context = NoHashContext() - fsize = self.inf.file_size + fsize = self._inf.file_size cur_ofs = self.tell() if whence == 0: # seek from beginning of file @@ -1454,8 +2077,6 @@ class RarExtFile(RawIOBase): if new_ofs >= cur_ofs: self._skip(new_ofs - cur_ofs) else: - # process old data ? - #self._skip(fsize - cur_ofs) # reopen and seek self._open() self._skip(new_ofs) @@ -1478,13 +2099,13 @@ class RarExtFile(RawIOBase): def writable(self): """Returns False. - + Writing is not supported.""" return False def seekable(self): """Returns True. - + Seeking is supported, although it's slow on compressed files. """ return True @@ -1499,23 +2120,23 @@ class PipeReader(RarExtFile): """Read data from pipe, handle tempfile cleanup.""" def __init__(self, rf, inf, cmd, tempfile=None): - self.cmd = cmd - self.proc = None - self.tempfile = tempfile + self._cmd = cmd + self._proc = None + self._tempfile = tempfile super(PipeReader, self).__init__(rf, inf) def _close_proc(self): - if not self.proc: + if not self._proc: return - if self.proc.stdout: - self.proc.stdout.close() - if self.proc.stdin: - self.proc.stdin.close() - if self.proc.stderr: - self.proc.stderr.close() - self.proc.wait() - self.returncode = self.proc.returncode - self.proc = None + if self._proc.stdout: + self._proc.stdout.close() + if self._proc.stdin: + self._proc.stdin.close() + if self._proc.stderr: + self._proc.stderr.close() + self._proc.wait() + self._returncode = self._proc.returncode + self._proc = None def _open(self): super(PipeReader, self)._open() @@ -1524,19 +2145,19 @@ class PipeReader(RarExtFile): self._close_proc() # launch new process - self.returncode = 0 - self.proc = custom_popen(self.cmd) - self.fd = self.proc.stdout + self._returncode = 0 + self._proc = custom_popen(self._cmd) + self._fd = self._proc.stdout # avoid situation where unrar waits on stdin - if self.proc.stdin: - self.proc.stdin.close() + if self._proc.stdin: + self._proc.stdin.close() def _read(self, cnt): """Read from pipe.""" # normal read is usually enough - data = self.fd.read(cnt) + data = self._fd.read(cnt) if len(data) == cnt or not data: return data @@ -1544,7 +2165,7 @@ class PipeReader(RarExtFile): buf = [data] cnt -= len(data) while cnt > 0: - data = self.fd.read(cnt) + data = self._fd.read(cnt) if not data: break cnt -= len(data) @@ -1557,42 +2178,45 @@ class PipeReader(RarExtFile): self._close_proc() super(PipeReader, self).close() - if self.tempfile: + if self._tempfile: try: - os.unlink(self.tempfile) + os.unlink(self._tempfile) except OSError: pass - self.tempfile = None + self._tempfile = None def readinto(self, buf): """Zero-copy read directly into buffer.""" cnt = len(buf) - if cnt > self.remain: - cnt = self.remain + if cnt > self._remain: + cnt = self._remain vbuf = memoryview(buf) res = got = 0 while got < cnt: - res = self.fd.readinto(vbuf[got : cnt]) + res = self._fd.readinto(vbuf[got : cnt]) if not res: break - if self.crc_check: - self.CRC = crc32(vbuf[got : got + res], self.CRC) - self.remain -= res + self._md_context.update(vbuf[got : got + res]) + self._remain -= res got += res return got class DirectReader(RarExtFile): - """Read uncompressed data directly from archive.""" + """Read uncompressed data directly from archive. + """ + _cur = None + _cur_avail = None + _volfile = None def _open(self): super(DirectReader, self)._open() - self.volfile = self.inf.volume_file - self.fd = XFile(self.volfile, 0) - self.fd.seek(self.inf.header_offset, 0) - self.cur = self.rf._parse_header(self.fd) - self.cur_avail = self.cur.add_size + self._volfile = self._inf.volume_file + self._fd = XFile(self._volfile, 0) + self._fd.seek(self._inf.header_offset, 0) + self._cur = self._parser._parse_header(self._fd) + self._cur_avail = self._cur.add_size def _skip(self, cnt): """RAR Seek, skipping through rar files to get to correct position @@ -1600,19 +2224,19 @@ class DirectReader(RarExtFile): while cnt > 0: # next vol needed? - if self.cur_avail == 0: + if self._cur_avail == 0: if not self._open_next(): break # fd is in read pos, do the read - if cnt > self.cur_avail: - cnt -= self.cur_avail - self.remain -= self.cur_avail - self.cur_avail = 0 + if cnt > self._cur_avail: + cnt -= self._cur_avail + self._remain -= self._cur_avail + self._cur_avail = 0 else: - self.fd.seek(cnt, 1) - self.cur_avail -= cnt - self.remain -= cnt + self._fd.seek(cnt, 1) + self._cur_avail -= cnt + self._remain -= cnt cnt = 0 def _read(self, cnt): @@ -1621,21 +2245,21 @@ class DirectReader(RarExtFile): buf = [] while cnt > 0: # next vol needed? - if self.cur_avail == 0: + if self._cur_avail == 0: if not self._open_next(): break # fd is in read pos, do the read - if cnt > self.cur_avail: - data = self.fd.read(self.cur_avail) + if cnt > self._cur_avail: + data = self._fd.read(self._cur_avail) else: - data = self.fd.read(cnt) + data = self._fd.read(cnt) if not data: break # got some data cnt -= len(data) - self.cur_avail -= len(data) + self._cur_avail -= len(data) buf.append(data) if len(buf) == 1: @@ -1646,31 +2270,34 @@ class DirectReader(RarExtFile): """Proceed to next volume.""" # is the file split over archives? - if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0: + if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0: return False - if self.fd: - self.fd.close() - self.fd = None + if self._fd: + self._fd.close() + self._fd = None # open next part - self.volfile = self.rf._next_volname(self.volfile) - fd = open(self.volfile, "rb", 0) - self.fd = fd + self._volfile = self._parser._next_volname(self._volfile) + fd = open(self._volfile, "rb", 0) + self._fd = fd + sig = fd.read(len(self._parser._expect_sig)) + if sig != self._parser._expect_sig: + raise BadRarFile("Invalid signature") # loop until first file header while 1: - cur = self.rf._parse_header(fd) + cur = self._parser._parse_header(fd) if not cur: raise BadRarFile("Unexpected EOF") if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): if cur.add_size: fd.seek(cur.add_size, 1) continue - if cur.orig_filename != self.inf.orig_filename: + if cur.orig_filename != self._inf.orig_filename: raise BadRarFile("Did not found file entry") - self.cur = cur - self.cur_avail = cur.add_size + self._cur = cur + self._cur_avail = cur.add_size return True def readinto(self, buf): @@ -1679,23 +2306,22 @@ class DirectReader(RarExtFile): vbuf = memoryview(buf) while got < len(buf): # next vol needed? - if self.cur_avail == 0: + if self._cur_avail == 0: if not self._open_next(): break # length for next read cnt = len(buf) - got - if cnt > self.cur_avail: - cnt = self.cur_avail + if cnt > self._cur_avail: + cnt = self._cur_avail # read into temp view - res = self.fd.readinto(vbuf[got : got + cnt]) + res = self._fd.readinto(vbuf[got : got + cnt]) if not res: break - if self.crc_check: - self.CRC = crc32(vbuf[got : got + res], self.CRC) - self.cur_avail -= res - self.remain -= res + self._md_context.update(vbuf[got : got + res]) + self._cur_avail -= res + self._remain -= res got += res return got @@ -1708,10 +2334,12 @@ class HeaderDecrypt(object): self.buf = EMPTY def tell(self): + """Current file pos - works only on block boundaries.""" return self.f.tell() def read(self, cnt=None): - if cnt > 8*1024: + """Read and decrypt.""" + if cnt > 8 * 1024: raise BadRarFile('Bad count to header decrypt - wrong password?') # consume old data @@ -1724,10 +2352,10 @@ class HeaderDecrypt(object): cnt -= len(res) # decrypt new data - BLK = self.ciph.block_size + blklen = 16 while cnt > 0: - enc = self.f.read(BLK) - if len(enc) < BLK: + enc = self.f.read(blklen) + if len(enc) < blklen: break dec = self.ciph.decrypt(enc) if cnt >= len(dec): @@ -1740,10 +2368,14 @@ class HeaderDecrypt(object): return res + # handle (filename|filelike) object class XFile(object): + """Input may be filename or file object. + """ __slots__ = ('_fd', '_need_close') - def __init__(self, xfile, bufsize = 1024): + + def __init__(self, xfile, bufsize=1024): if is_filelike(xfile): self._need_close = False self._fd = xfile @@ -1751,27 +2383,279 @@ class XFile(object): else: self._need_close = True self._fd = open(xfile, 'rb', bufsize) + def read(self, n=None): + """Read from file.""" return self._fd.read(n) + def tell(self): + """Return file pos.""" return self._fd.tell() + def seek(self, ofs, whence=0): + """Move file pos.""" return self._fd.seek(ofs, whence) + def readinto(self, dst): + """Read into buffer.""" return self._fd.readinto(dst) + def close(self): + """Close file object.""" if self._need_close: self._fd.close() + def __enter__(self): return self + def __exit__(self, typ, val, tb): self.close() + +class NoHashContext(object): + """No-op hash function.""" + def __init__(self, data=None): + """Initialize""" + def update(self, data): + """Update data""" + def digest(self): + """Final hash""" + def hexdigest(self): + """Hexadecimal digest.""" + + +class CRC32Context(object): + """Hash context that uses CRC32.""" + __slots__ = ['_crc'] + + def __init__(self, data=None): + self._crc = 0 + if data: + self.update(data) + + def update(self, data): + """Process data.""" + self._crc = rar_crc32(data, self._crc) + + def digest(self): + """Final hash.""" + return self._crc + + def hexdigest(self): + """Hexadecimal digest.""" + return '%08x' % self.digest() + + +class Blake2SP(object): + """Blake2sp hash context. + """ + __slots__ = ['_thread', '_buf', '_cur', '_digest'] + digest_size = 32 + block_size = 64 + parallelism = 8 + + def __init__(self, data=None): + self._buf = b'' + self._cur = 0 + self._digest = None + self._thread = [] + + for i in range(self.parallelism): + ctx = self._blake2s(i, 0, i == (self.parallelism - 1)) + self._thread.append(ctx) + + if data: + self.update(data) + + def _blake2s(self, ofs, depth, is_last): + return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last, + depth=2, inner_size=32, fanout=self.parallelism) + + def _add_block(self, blk): + self._thread[self._cur].update(blk) + self._cur = (self._cur + 1) % self.parallelism + + def update(self, data): + """Hash data. + """ + view = memoryview(data) + bs = self.block_size + if self._buf: + need = bs - len(self._buf) + if len(view) < need: + self._buf += view.tobytes() + return + self._add_block(self._buf + view[:need].tobytes()) + view = view[need:] + while len(view) >= bs: + self._add_block(view[:bs]) + view = view[bs:] + self._buf = view.tobytes() + + def digest(self): + """Return final digest value. + """ + if self._digest is None: + if self._buf: + self._add_block(self._buf) + self._buf = EMPTY + ctx = self._blake2s(0, 1, True) + for t in self._thread: + ctx.update(t.digest()) + self._digest = ctx.digest() + return self._digest + + def hexdigest(self): + """Hexadecimal digest.""" + return tohex(self.digest()) + ## ## Utility functions ## +S_LONG = Struct(' len(buf): + raise BadRarFile('cannot load byte') + return S_BYTE.unpack_from(buf, pos)[0], end + +def load_le32(buf, pos): + """Load little-endian 32-bit integer""" + end = pos + 4 + if end > len(buf): + raise BadRarFile('cannot load le32') + return S_LONG.unpack_from(buf, pos)[0], pos + 4 + +def load_bytes(buf, num, pos): + """Load sequence of bytes""" + end = pos + num + if end > len(buf): + raise BadRarFile('cannot load bytes') + return buf[pos : end], end + +def load_vstr(buf, pos): + """Load bytes prefixed by vint length""" + slen, pos = load_vint(buf, pos) + return load_bytes(buf, slen, pos) + +def load_dostime(buf, pos): + """Load LE32 dos timestamp""" + stamp, pos = load_le32(buf, pos) + tup = parse_dos_time(stamp) + return to_datetime(tup), pos + +def load_unixtime(buf, pos): + """Load LE32 unix timestamp""" + secs, pos = load_le32(buf, pos) + dt = datetime.fromtimestamp(secs, UTC) + return dt, pos + +def load_windowstime(buf, pos): + """Load LE64 windows timestamp""" + # unix epoch (1970) in seconds from windows epoch (1601) + unix_epoch = 11644473600 + val1, pos = load_le32(buf, pos) + val2, pos = load_le32(buf, pos) + secs, n1secs = divmod((val2 << 32) | val1, 10000000) + dt = datetime.fromtimestamp(secs - unix_epoch, UTC) + dt = dt.replace(microsecond=n1secs // 10) + return dt, pos + +# new-style next volume +def _next_newvol(volfile): + i = len(volfile) - 1 + while i >= 0: + if volfile[i] >= '0' and volfile[i] <= '9': + return _inc_volname(volfile, i) + i -= 1 + raise BadRarName("Cannot construct volume name: " + volfile) + +# old-style next volume +def _next_oldvol(volfile): + # rar -> r00 + if volfile[-4:].lower() == '.rar': + return volfile[:-2] + '00' + return _inc_volname(volfile, len(volfile) - 1) + +# increase digits with carry, otherwise just increment char +def _inc_volname(volfile, i): + fn = list(volfile) + while i >= 0: + if fn[i] != '9': + fn[i] = chr(ord(fn[i]) + 1) + break + fn[i] = '0' + i -= 1 + return ''.join(fn) + +# rar3 extended time fields +def _parse_ext_time(h, data, pos): + # flags and rest of data can be missing + flags = 0 + if pos + 2 <= len(data): + flags = S_SHORT.unpack_from(data, pos)[0] + pos += 2 + + mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime) + h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos) + h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos) + h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos) + if mtime: + h.mtime = mtime + h.date_time = mtime.timetuple()[:6] + return pos + +# rar3 one extended time field +def _parse_xtime(flag, data, pos, basetime=None): + res = None + if flag & 8: + if not basetime: + basetime, pos = load_dostime(data, pos) + + # load second fractions + rem = 0 + cnt = flag & 3 + for _ in range(cnt): + b, pos = load_byte(data, pos) + rem = (b << 16) | (rem >> 8) + + # convert 100ns units to microseconds + usec = rem // 10 + if usec > 1000000: + usec = 999999 + + # dostime has room for 30 seconds only, correct if needed + if flag & 4 and basetime.second < 59: + res = basetime.replace(microsecond=usec, second=basetime.second + 1) + else: + res = basetime.replace(microsecond=usec) + return res, pos + def is_filelike(obj): + """Filename or file object? + """ if isinstance(obj, str) or isinstance(obj, unicode): return False res = True @@ -1782,14 +2666,16 @@ def is_filelike(obj): return True def rar3_s2k(psw, salt): - """String-to-key hash for RAR3.""" - + """String-to-key hash for RAR3. + """ + if not isinstance(psw, unicode): + psw = psw.decode('utf8') seed = psw.encode('utf-16le') + salt iv = EMPTY h = sha1() for i in range(16): for j in range(0x4000): - cnt = S_LONG.pack(i*0x4000 + j) + cnt = S_LONG.pack(i * 0x4000 + j) h.update(seed + cnt[:3]) if j == 0: iv += h.digest()[19:20] @@ -1797,12 +2683,11 @@ def rar3_s2k(psw, salt): key_le = pack("LLLL", key_be)) return key_le, iv -def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): +def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): """Decompress blob of compressed data. Used for data with non-standard header - eg. comments. """ - # already uncompressed? if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: return data @@ -1826,11 +2711,11 @@ def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=No # full header hlen = S_BLK_HDR.size + len(fhdr) hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr - hcrc = crc32(hdr[2:]) & 0xFFFF + hcrc = rar_crc32(hdr[2:]) & 0xFFFF hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr # archive main header - mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4) + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4) # decompress via temp rar tmpfd, tmpname = mkstemp(suffix='.rar') @@ -1850,62 +2735,66 @@ def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=No os.unlink(tmpname) def to_datetime(t): - """Convert 6-part time tuple into datetime object.""" - + """Convert 6-part time tuple into datetime object. + """ if t is None: return None # extract values - year, mon, day, h, m, xs = t - s = int(xs) - us = int(1000000 * (xs - s)) + year, mon, day, h, m, s = t # assume the values are valid try: - return datetime(year, mon, day, h, m, s, us) + return datetime(year, mon, day, h, m, s) except ValueError: pass # sanitize invalid values - MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - if mon < 1: mon = 1 - if mon > 12: mon = 12 - if day < 1: day = 1 - if day > MDAY[mon]: day = MDAY[mon] - if h > 23: h = 23 - if m > 59: m = 59 - if s > 59: s = 59 + mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + if mon < 1: + mon = 1 + if mon > 12: + mon = 12 + if day < 1: + day = 1 + if day > mday[mon]: + day = mday[mon] + if h > 23: + h = 23 + if m > 59: + m = 59 + if s > 59: + s = 59 if mon == 2 and day == 29: try: - return datetime(year, mon, day, h, m, s, us) + return datetime(year, mon, day, h, m, s) except ValueError: day = 28 - return datetime(year, mon, day, h, m, s, us) + return datetime(year, mon, day, h, m, s) def parse_dos_time(stamp): - """Parse standard 32-bit DOS timestamp.""" - - sec = stamp & 0x1F; stamp = stamp >> 5 - min = stamp & 0x3F; stamp = stamp >> 6 - hr = stamp & 0x1F; stamp = stamp >> 5 - day = stamp & 0x1F; stamp = stamp >> 5 - mon = stamp & 0x0F; stamp = stamp >> 4 + """Parse standard 32-bit DOS timestamp. + """ + sec, stamp = stamp & 0x1F, stamp >> 5 + mn, stamp = stamp & 0x3F, stamp >> 6 + hr, stamp = stamp & 0x1F, stamp >> 5 + day, stamp = stamp & 0x1F, stamp >> 5 + mon, stamp = stamp & 0x0F, stamp >> 4 yr = (stamp & 0x7F) + 1980 - return (yr, mon, day, hr, min, sec * 2) + return (yr, mon, day, hr, mn, sec * 2) def custom_popen(cmd): - """Disconnect cmd from parent fds, read only from stdout.""" - + """Disconnect cmd from parent fds, read only from stdout. + """ # needed for py2exe creationflags = 0 if sys.platform == 'win32': - creationflags = 0x08000000 # CREATE_NO_WINDOW + creationflags = 0x08000000 # CREATE_NO_WINDOW # run command try: - p = Popen(cmd, bufsize = 0, - stdout = PIPE, stdin = PIPE, stderr = STDOUT, - creationflags = creationflags) + p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT, + creationflags=creationflags) except OSError as ex: if ex.errno == errno.ENOENT: raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) @@ -1913,15 +2802,17 @@ def custom_popen(cmd): return p def custom_check(cmd, ignore_retcode=False): - """Run command, collect output, raise error if needed.""" + """Run command, collect output, raise error if needed. + """ p = custom_popen(cmd) - out, err = p.communicate() + out, _ = p.communicate() if p.returncode and not ignore_retcode: raise RarExecError("Check-run failed") return out -def add_password_arg(cmd, psw, required=False): - """Append password switch to commandline.""" +def add_password_arg(cmd, psw, ___required=False): + """Append password switch to commandline. + """ if UNRAR_TOOL == ALT_TOOL: return if psw is not None: @@ -1930,17 +2821,17 @@ def add_password_arg(cmd, psw, required=False): cmd.append('-p-') def check_returncode(p, out): - """Raise exception according to unrar exit code""" - + """Raise exception according to unrar exit code. + """ code = p.returncode if code == 0: return - # map return code to exception class + # map return code to exception class, codes from rar.txt errmap = [None, - RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, - RarWriteError, RarOpenError, RarUserError, RarMemoryError, - RarCreateError, RarNoFilesError] # codes from rar.txt + RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 + RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 + RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 if UNRAR_TOOL == ALT_TOOL: errmap = [None] if code > 0 and code < len(errmap): @@ -1960,6 +2851,10 @@ def check_returncode(p, out): raise exc(msg) +def hmac_sha256(key, data): + """HMAC-SHA256""" + return HMAC(key, data, sha256).digest() + def membuf_tempfile(memfile): memfile.seek(0, 0) @@ -1967,36 +2862,71 @@ def membuf_tempfile(memfile): tmpf = os.fdopen(tmpfd, "wb") try: - BSIZE = 32*1024 while True: buf = memfile.read(BSIZE) if not buf: break tmpf.write(buf) tmpf.close() - return tmpname except: tmpf.close() os.unlink(tmpname) raise + return tmpname + +class XTempFile(object): + __slots__ = ('_tmpfile', '_filename') + + def __init__(self, rarfile): + if is_filelike(rarfile): + self._tmpfile = membuf_tempfile(rarfile) + self._filename = self._tmpfile + else: + self._tmpfile = None + self._filename = rarfile + + def __enter__(self): + return self._filename + + def __exit__(self, exc_type, exc_value, tb): + if self._tmpfile: + try: + os.unlink(self._tmpfile) + except OSError: + pass + self._tmpfile = None # # Check if unrar works # -try: - # does UNRAR_TOOL work? - custom_check([UNRAR_TOOL], True) -except RarCannotExec: +ORIG_UNRAR_TOOL = UNRAR_TOOL +ORIG_OPEN_ARGS = OPEN_ARGS +ORIG_EXTRACT_ARGS = EXTRACT_ARGS +ORIG_TEST_ARGS = TEST_ARGS + +def _check_unrar_tool(): + global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS try: - # does ALT_TOOL work? - custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True) - # replace config - UNRAR_TOOL = ALT_TOOL - OPEN_ARGS = ALT_OPEN_ARGS - EXTRACT_ARGS = ALT_EXTRACT_ARGS - TEST_ARGS = ALT_TEST_ARGS + # does UNRAR_TOOL work? + custom_check([ORIG_UNRAR_TOOL], True) + + UNRAR_TOOL = ORIG_UNRAR_TOOL + OPEN_ARGS = ORIG_OPEN_ARGS + EXTRACT_ARGS = ORIG_EXTRACT_ARGS + TEST_ARGS = ORIG_TEST_ARGS except RarCannotExec: - # no usable tool, only uncompressed archives work - pass + try: + # does ALT_TOOL work? + custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True) + # replace config + UNRAR_TOOL = ALT_TOOL + OPEN_ARGS = ALT_OPEN_ARGS + EXTRACT_ARGS = ALT_EXTRACT_ARGS + TEST_ARGS = ALT_TEST_ARGS + except RarCannotExec: + # no usable tool, only uncompressed archives work + pass + +_check_unrar_tool() -- 2.11.4.GIT