sha1: cleanups
[rarfile.git] / rarfile.py
blob9715067bf6a3aab751300d69f4c41cda818c7407
1 # rarfile.py
3 # Copyright (c) 2005-2016 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
46 for ln in f:
47 print(ln.strip())
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
52 import rarfile
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to '\\' to be more compatible with old rarfile
58 rarfile.PATH_SEP = '/'
60 For more details, refer to source.
62 """
64 from __future__ import division, print_function
67 ## Imports and compat - support both Python 2.x and 3.x
70 import sys
71 import os
72 import errno
73 import struct
75 from struct import pack, unpack, Struct
76 from binascii import crc32, hexlify
77 from tempfile import mkstemp
78 from subprocess import Popen, PIPE, STDOUT
79 from io import RawIOBase
80 from hashlib import sha1, sha256
81 from hmac import HMAC
82 from datetime import datetime, timedelta, tzinfo
84 # fixed offset timezone, for UTC
85 try:
86 from datetime import timezone
87 except ImportError:
88 class timezone(tzinfo):
89 """Compat timezone."""
90 __slots__ = ('_ofs', '_name')
91 _DST = timedelta(0)
93 def __init__(self, offset, name):
94 super(timezone, self).__init__()
95 self._ofs, self._name = offset, name
97 def utcoffset(self, dt):
98 return self._ofs
100 def tzname(self, dt):
101 return self._name
103 def dst(self, dt):
104 return self._DST
106 # only needed for encryped headers
107 try:
108 try:
109 from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher
110 from cryptography.hazmat.backends import default_backend
111 from cryptography.hazmat.primitives import hashes
112 from cryptography.hazmat.primitives.kdf import pbkdf2
114 class AES_CBC_Decrypt(object):
115 """Decrypt API"""
116 def __init__(self, key, iv):
117 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
118 self.decrypt = ciph.decryptor().update
120 def pbkdf2_sha256(password, salt, iters):
121 """PBKDF2 with HMAC-SHA256"""
122 ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend())
123 return ctx.derive(password)
125 except ImportError:
126 from Crypto.Cipher import AES
127 from Crypto.Protocol import KDF
129 class AES_CBC_Decrypt(object):
130 """Decrypt API"""
131 def __init__(self, key, iv):
132 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
134 def pbkdf2_sha256(password, salt, iters):
135 """PBKDF2 with HMAC-SHA256"""
136 return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256)
138 _have_crypto = 1
139 except ImportError:
140 _have_crypto = 0
142 try:
143 try:
144 from hashlib import blake2s
145 _have_blake2 = True
146 except ImportError:
147 from pyblake2 import blake2s
148 _have_blake2 = True
149 except ImportError:
150 _have_blake2 = False
152 # compat with 2.x
153 if sys.hexversion < 0x3000000:
154 def rar_crc32(data, prev=0):
155 """CRC32 with unsigned values.
157 if (prev > 0) and (prev & 0x80000000):
158 prev -= (1 << 32)
159 res = crc32(data, prev)
160 if res < 0:
161 res += (1 << 32)
162 return res
163 tohex = hexlify
164 _byte_code = ord
165 else: # pragma: no cover
166 def tohex(data):
167 """Return hex string."""
168 return hexlify(data).decode('ascii')
169 rar_crc32 = crc32
170 unicode = str
171 _byte_code = int # noqa
174 __version__ = '3.0'
176 # export only interesting items
177 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
180 ## Module configuration. Can be tuned after importing.
183 #: default fallback charset
184 DEFAULT_CHARSET = "windows-1252"
186 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
187 TRY_ENCODINGS = ('utf8', 'utf-16le')
189 #: 'unrar', 'rar' or full path to either one
190 UNRAR_TOOL = "unrar"
192 #: Command line args to use for opening file for reading.
193 OPEN_ARGS = ('p', '-inul')
195 #: Command line args to use for extracting file to disk.
196 EXTRACT_ARGS = ('x', '-y', '-idq')
198 #: args for testrar()
199 TEST_ARGS = ('t', '-idq')
202 # Allow use of tool that is not compatible with unrar.
204 # By default use 'bsdtar' which is 'tar' program that
205 # sits on top of libarchive.
207 # Problems with libarchive RAR backend:
208 # - Does not support solid archives.
209 # - Does not support password-protected archives.
212 ALT_TOOL = 'bsdtar'
213 ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f')
214 ALT_EXTRACT_ARGS = ('-x', '-f')
215 ALT_TEST_ARGS = ('-t', '-f')
216 ALT_CHECK_ARGS = ('--help',)
218 #: whether to speed up decompression by using tmp archive
219 USE_EXTRACT_HACK = 1
221 #: limit the filesize for tmp archive usage
222 HACK_SIZE_LIMIT = 20 * 1024 * 1024
224 #: Separator for path name components. RAR internally uses '\\'.
225 #: Use '/' to be similar with zipfile.
226 PATH_SEP = '/'
229 ## rar constants
232 # block types
233 RAR_BLOCK_MARK = 0x72 # r
234 RAR_BLOCK_MAIN = 0x73 # s
235 RAR_BLOCK_FILE = 0x74 # t
236 RAR_BLOCK_OLD_COMMENT = 0x75 # u
237 RAR_BLOCK_OLD_EXTRA = 0x76 # v
238 RAR_BLOCK_OLD_SUB = 0x77 # w
239 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
240 RAR_BLOCK_OLD_AUTH = 0x79 # y
241 RAR_BLOCK_SUB = 0x7a # z
242 RAR_BLOCK_ENDARC = 0x7b # {
244 # flags for RAR_BLOCK_MAIN
245 RAR_MAIN_VOLUME = 0x0001
246 RAR_MAIN_COMMENT = 0x0002
247 RAR_MAIN_LOCK = 0x0004
248 RAR_MAIN_SOLID = 0x0008
249 RAR_MAIN_NEWNUMBERING = 0x0010
250 RAR_MAIN_AUTH = 0x0020
251 RAR_MAIN_RECOVERY = 0x0040
252 RAR_MAIN_PASSWORD = 0x0080
253 RAR_MAIN_FIRSTVOLUME = 0x0100
254 RAR_MAIN_ENCRYPTVER = 0x0200
256 # flags for RAR_BLOCK_FILE
257 RAR_FILE_SPLIT_BEFORE = 0x0001
258 RAR_FILE_SPLIT_AFTER = 0x0002
259 RAR_FILE_PASSWORD = 0x0004
260 RAR_FILE_COMMENT = 0x0008
261 RAR_FILE_SOLID = 0x0010
262 RAR_FILE_DICTMASK = 0x00e0
263 RAR_FILE_DICT64 = 0x0000
264 RAR_FILE_DICT128 = 0x0020
265 RAR_FILE_DICT256 = 0x0040
266 RAR_FILE_DICT512 = 0x0060
267 RAR_FILE_DICT1024 = 0x0080
268 RAR_FILE_DICT2048 = 0x00a0
269 RAR_FILE_DICT4096 = 0x00c0
270 RAR_FILE_DIRECTORY = 0x00e0
271 RAR_FILE_LARGE = 0x0100
272 RAR_FILE_UNICODE = 0x0200
273 RAR_FILE_SALT = 0x0400
274 RAR_FILE_VERSION = 0x0800
275 RAR_FILE_EXTTIME = 0x1000
276 RAR_FILE_EXTFLAGS = 0x2000
278 # flags for RAR_BLOCK_ENDARC
279 RAR_ENDARC_NEXT_VOLUME = 0x0001
280 RAR_ENDARC_DATACRC = 0x0002
281 RAR_ENDARC_REVSPACE = 0x0004
282 RAR_ENDARC_VOLNR = 0x0008
284 # flags common to all blocks
285 RAR_SKIP_IF_UNKNOWN = 0x4000
286 RAR_LONG_BLOCK = 0x8000
288 # Host OS types
289 RAR_OS_MSDOS = 0
290 RAR_OS_OS2 = 1
291 RAR_OS_WIN32 = 2
292 RAR_OS_UNIX = 3
293 RAR_OS_MACOS = 4
294 RAR_OS_BEOS = 5
296 # Compression methods - '0'..'5'
297 RAR_M0 = 0x30
298 RAR_M1 = 0x31
299 RAR_M2 = 0x32
300 RAR_M3 = 0x33
301 RAR_M4 = 0x34
302 RAR_M5 = 0x35
305 # RAR5 constants
308 RAR5_BLOCK_MAIN = 1
309 RAR5_BLOCK_FILE = 2
310 RAR5_BLOCK_SERVICE = 3
311 RAR5_BLOCK_ENCRYPTION = 4
312 RAR5_BLOCK_ENDARC = 5
314 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
315 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
316 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
317 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
318 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
319 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
320 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
322 RAR5_MAIN_FLAG_ISVOL = 0x01
323 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
324 RAR5_MAIN_FLAG_SOLID = 0x04
325 RAR5_MAIN_FLAG_RECOVERY = 0x08
326 RAR5_MAIN_FLAG_LOCKED = 0x10
328 RAR5_FILE_FLAG_ISDIR = 0x01
329 RAR5_FILE_FLAG_HAS_MTIME = 0x02
330 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
331 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
333 RAR5_COMPR_SOLID = 0x40
335 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
337 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
339 RAR5_XFILE_ENCRYPTION = 1
340 RAR5_XFILE_HASH = 2
341 RAR5_XFILE_TIME = 3
342 RAR5_XFILE_VERSION = 4
343 RAR5_XFILE_REDIR = 5
344 RAR5_XFILE_OWNER = 6
345 RAR5_XFILE_SERVICE = 7
347 RAR5_XTIME_UNIXTIME = 0x01
348 RAR5_XTIME_HAS_MTIME = 0x02
349 RAR5_XTIME_HAS_CTIME = 0x04
350 RAR5_XTIME_HAS_ATIME = 0x08
352 RAR5_XENC_CIPHER_AES256 = 0
354 RAR5_XENC_CHECKVAL = 0x01
355 RAR5_XENC_TWEAKED = 0x02
357 RAR5_XHASH_BLAKE2SP = 0
359 RAR5_XREDIR_UNIX_SYMLINK = 1
360 RAR5_XREDIR_WINDOWS_SYMLINK = 2
361 RAR5_XREDIR_WINDOWS_JUNCTION = 3
362 RAR5_XREDIR_HARD_LINK = 4
363 RAR5_XREDIR_FILE_COPY = 5
365 RAR5_XREDIR_ISDIR = 0x01
367 RAR5_XOWNER_UNAME = 0x01
368 RAR5_XOWNER_GNAME = 0x02
369 RAR5_XOWNER_UID = 0x04
370 RAR5_XOWNER_GID = 0x08
372 RAR5_OS_WINDOWS = 0
373 RAR5_OS_UNIX = 1
376 ## internal constants
379 RAR_ID = b"Rar!\x1a\x07\x00"
380 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
381 ZERO = b'\0'
382 EMPTY = b''
383 UTC = timezone(timedelta(0), 'UTC')
384 BSIZE = 32 * 1024
386 def _get_rar_version(xfile):
387 """Check quickly whether file is rar archive.
389 with XFile(xfile) as fd:
390 buf = fd.read(len(RAR5_ID))
391 if buf.startswith(RAR_ID):
392 return 3
393 elif buf.startswith(RAR5_ID):
394 return 5
395 return 0
398 ## Public interface
401 def is_rarfile(xfile):
402 """Check quickly whether file is rar archive.
404 return _get_rar_version(xfile) > 0
406 class Error(Exception):
407 """Base class for rarfile errors."""
409 class BadRarFile(Error):
410 """Incorrect data in archive."""
412 class NotRarFile(Error):
413 """The file is not RAR archive."""
415 class BadRarName(Error):
416 """Cannot guess multipart name components."""
418 class NoRarEntry(Error):
419 """File not found in RAR"""
421 class PasswordRequired(Error):
422 """File requires password"""
424 class NeedFirstVolume(Error):
425 """Need to start from first volume."""
427 class NoCrypto(Error):
428 """Cannot parse encrypted headers - no crypto available."""
430 class RarExecError(Error):
431 """Problem reported by unrar/rar."""
433 class RarWarning(RarExecError):
434 """Non-fatal error"""
436 class RarFatalError(RarExecError):
437 """Fatal error"""
439 class RarCRCError(RarExecError):
440 """CRC error during unpacking"""
442 class RarLockedArchiveError(RarExecError):
443 """Must not modify locked archive"""
445 class RarWriteError(RarExecError):
446 """Write error"""
448 class RarOpenError(RarExecError):
449 """Open error"""
451 class RarUserError(RarExecError):
452 """User error"""
454 class RarMemoryError(RarExecError):
455 """Memory error"""
457 class RarCreateError(RarExecError):
458 """Create error"""
460 class RarNoFilesError(RarExecError):
461 """No files that match pattern were found"""
463 class RarUserBreak(RarExecError):
464 """User stop"""
466 class RarWrongPassword(RarExecError):
467 """Incorrect password"""
469 class RarUnknownError(RarExecError):
470 """Unknown exit code"""
472 class RarSignalExit(RarExecError):
473 """Unrar exited with signal"""
475 class RarCannotExec(RarExecError):
476 """Executable not found."""
479 class RarInfo(object):
480 r"""An entry in rar archive.
482 RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone.
483 RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone.
485 Attributes:
487 filename
488 File name with relative path.
489 Path separator is '/'. Always unicode string.
491 date_time
492 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
493 RAR5 allows archives where it is missing, it's None then.
495 file_size
496 Uncompressed size.
498 compress_size
499 Compressed size.
501 compress_type
502 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
504 extract_version
505 Minimal Rar version needed for decompressing. As (major*10 + minor),
506 so 2.9 is 29.
508 RAR3: 10, 20, 29
510 RAR5 does not have such field in archive, it's simply set to 50.
512 host_os
513 Host OS type, one of RAR_OS_* constants.
515 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
516 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
518 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
520 mode
521 File attributes. May be either dos-style or unix-style, depending on host_os.
523 mtime
524 File modification time. Same value as :attr:`date_time`
525 but as :class:`datetime.datetime` object with extended precision.
527 ctime
528 Optional time field: creation time. As :class:`datetime.datetime` object.
530 atime
531 Optional time field: last access time. As :class:`datetime.datetime` object.
533 arctime
534 Optional time field: archival time. As :class:`datetime.datetime` object.
535 (RAR3-only)
538 CRC-32 of uncompressed file, unsigned int.
540 RAR5: may be None.
542 blake2sp_hash
543 Blake2SP hash over decompressed data. (RAR5-only)
545 comment
546 Optional file comment field. Unicode string. (RAR3-only)
548 file_redir
549 If not None, file is link of some sort. Contains tuple of (type, flags, target).
550 (RAR5-only)
552 Type is one of constants:
554 :data:`RAR5_XREDIR_UNIX_SYMLINK`
555 unix symlink to target.
556 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
557 windows symlink to target.
558 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
559 windows junction.
560 :data:`RAR5_XREDIR_HARD_LINK`
561 hard link to target.
562 :data:`RAR5_XREDIR_FILE_COPY`
563 current file is copy of another archive entry.
565 Flags may contain :data:`RAR5_XREDIR_ISDIR` bit.
567 volume
568 Volume nr, starting from 0.
570 volume_file
571 Volume file name, where file starts.
575 # zipfile-compatible fields
576 filename = None
577 file_size = None
578 compress_size = None
579 date_time = None
580 comment = None
581 CRC = None
582 volume = None
583 orig_filename = None
585 # optional extended time fields, datetime() objects.
586 mtime = None
587 ctime = None
588 atime = None
590 extract_version = None
591 mode = None
592 host_os = None
593 compress_type = None
595 # rar3-only fields
596 comment = None
597 arctime = None
599 # rar5-only fields
600 blake2sp_hash = None
601 file_redir = None
603 # internal fields
604 flags = 0
605 type = None
607 def isdir(self):
608 """Returns True if entry is a directory.
610 if self.type == RAR_BLOCK_FILE:
611 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
612 return False
614 def needs_password(self):
615 """Returns True if data is stored password-protected.
617 if self.type == RAR_BLOCK_FILE:
618 return (self.flags & RAR_FILE_PASSWORD) > 0
619 return False
622 class RarFile(object):
623 """Parse RAR structure, provide access to files in archive.
626 #: Archive comment. Unicode string or None.
627 comment = None
629 def __init__(self, rarfile, mode="r", charset=None, info_callback=None,
630 crc_check=True, errors="stop"):
631 """Open and parse a RAR archive.
633 Parameters:
635 rarfile
636 archive file name
637 mode
638 only 'r' is supported.
639 charset
640 fallback charset to use, if filenames are not already Unicode-enabled.
641 info_callback
642 debug callback, gets to see all archive entries.
643 crc_check
644 set to False to disable CRC checks
645 errors
646 Either "stop" to quietly stop parsing on errors,
647 or "strict" to raise errors. Default is "stop".
649 self._rarfile = rarfile
650 self._charset = charset or DEFAULT_CHARSET
651 self._info_callback = info_callback
652 self._crc_check = crc_check
653 self._password = None
654 self._file_parser = None
656 if errors == "stop":
657 self._strict = False
658 elif errors == "strict":
659 self._strict = True
660 else:
661 raise ValueError("Invalid value for 'errors' parameter.")
663 if mode != "r":
664 raise NotImplementedError("RarFile supports only mode=r")
666 self._parse()
668 def __enter__(self):
669 """Open context."""
670 return self
672 def __exit__(self, typ, value, traceback):
673 """Exit context"""
674 self.close()
676 def setpassword(self, password):
677 """Sets the password to use when extracting.
679 self._password = password
680 if self._file_parser:
681 if self._file_parser.has_header_encryption():
682 self._file_parser = None
683 if not self._file_parser:
684 self._parse()
685 else:
686 self._file_parser.setpassword(self._password)
688 def needs_password(self):
689 """Returns True if any archive entries require password for extraction.
691 return self._file_parser.needs_password()
693 def namelist(self):
694 """Return list of filenames in archive.
696 return [f.filename for f in self.infolist()]
698 def infolist(self):
699 """Return RarInfo objects for all files/directories in archive.
701 return self._file_parser.infolist()
703 def volumelist(self):
704 """Returns filenames of archive volumes.
706 In case of single-volume archive, the list contains
707 just the name of main archive file.
709 return self._file_parser.volumelist()
711 def getinfo(self, fname):
712 """Return RarInfo for file.
714 return self._file_parser.getinfo(fname)
716 def open(self, fname, mode='r', psw=None):
717 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
719 The object implements :class:`io.RawIOBase` interface, so it can
720 be further wrapped with :class:`io.BufferedReader`
721 and :class:`io.TextIOWrapper`.
723 On older Python where io module is not available, it implements
724 only .read(), .seek(), .tell() and .close() methods.
726 The object is seekable, although the seeking is fast only on
727 uncompressed files, on compressed files the seeking is implemented
728 by reading ahead and/or restarting the decompression.
730 Parameters:
732 fname
733 file name or RarInfo instance.
734 mode
735 must be 'r'
737 password to use for extracting.
740 if mode != 'r':
741 raise NotImplementedError("RarFile.open() supports only mode=r")
743 # entry lookup
744 inf = self.getinfo(fname)
745 if inf.isdir():
746 raise TypeError("Directory does not have any data: " + inf.filename)
748 # check password
749 if inf.needs_password():
750 psw = psw or self._password
751 if psw is None:
752 raise PasswordRequired("File %s requires password" % inf.filename)
753 else:
754 psw = None
756 return self._file_parser.open(inf, psw)
758 def read(self, fname, psw=None):
759 """Return uncompressed data for archive entry.
761 For longer files using :meth:`RarFile.open` may be better idea.
763 Parameters:
765 fname
766 filename or RarInfo instance
768 password to use for extracting.
771 with self.open(fname, 'r', psw) as f:
772 return f.read()
774 def close(self):
775 """Release open resources."""
776 pass
778 def printdir(self):
779 """Print archive file list to stdout."""
780 for f in self.infolist():
781 print(f.filename)
783 def extract(self, member, path=None, pwd=None):
784 """Extract single file into current directory.
786 Parameters:
788 member
789 filename or :class:`RarInfo` instance
790 path
791 optional destination path
793 optional password to use
795 if isinstance(member, RarInfo):
796 fname = member.filename
797 else:
798 fname = member
799 self._extract([fname], path, pwd)
801 def extractall(self, path=None, members=None, pwd=None):
802 """Extract all files into current directory.
804 Parameters:
806 path
807 optional destination path
808 members
809 optional filename or :class:`RarInfo` instance list to extract
811 optional password to use
813 fnlist = []
814 if members is not None:
815 for m in members:
816 if isinstance(m, RarInfo):
817 fnlist.append(m.filename)
818 else:
819 fnlist.append(m)
820 self._extract(fnlist, path, pwd)
822 def testrar(self):
823 """Let 'unrar' test the archive.
825 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
826 add_password_arg(cmd, self._password)
827 cmd.append('--')
828 with XTempFile(self._rarfile) as rarfile:
829 cmd.append(rarfile)
830 p = custom_popen(cmd)
831 output = p.communicate()[0]
832 check_returncode(p, output)
834 def strerror(self):
835 """Return error string if parsing failed or None if no problems.
837 if not self._file_parser:
838 return "Not a RAR file"
839 return self._file_parser.strerror()
842 ## private methods
845 def _parse(self):
846 ver = _get_rar_version(self._rarfile)
847 if ver == 3:
848 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
849 self._charset, self._strict, self._info_callback)
850 self._file_parser = p3 # noqa
851 elif ver == 5:
852 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
853 self._charset, self._strict, self._info_callback)
854 self._file_parser = p5 # noqa
855 else:
856 raise BadRarFile("Not a RAR file")
858 self._file_parser.parse()
859 self.comment = self._file_parser.comment
861 # call unrar to extract a file
862 def _extract(self, fnlist, path=None, psw=None):
863 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
865 # pasoword
866 psw = psw or self._password
867 add_password_arg(cmd, psw)
868 cmd.append('--')
870 # rar file
871 with XTempFile(self._rarfile) as rarfn:
872 cmd.append(rarfn)
874 # file list
875 for fn in fnlist:
876 if os.sep != PATH_SEP:
877 fn = fn.replace(PATH_SEP, os.sep)
878 cmd.append(fn)
880 # destination path
881 if path is not None:
882 cmd.append(path + os.sep)
884 # call
885 p = custom_popen(cmd)
886 output = p.communicate()[0]
887 check_returncode(p, output)
890 # File format parsing
893 class CommonParser(object):
894 """Shared parser parts."""
895 _main = None
896 _hdrenc_main = None
897 _needs_password = False
898 _fd = None
899 _expect_sig = None
900 _parse_error = None
901 _password = None
902 comment = None
904 def __init__(self, rarfile, password, crc_check, charset, strict, info_cb):
905 self._rarfile = rarfile
906 self._password = password
907 self._crc_check = crc_check
908 self._charset = charset
909 self._strict = strict
910 self._info_callback = info_cb
911 self._info_list = []
912 self._info_map = {}
913 self._vol_list = []
915 def has_header_encryption(self):
916 """Returns True if headers are encrypted
918 if self._hdrenc_main:
919 return True
920 if self._main:
921 if self._main.flags & RAR_MAIN_PASSWORD:
922 return True
923 return False
925 def setpassword(self, psw):
926 """Set cached password."""
927 self._password = psw
929 def volumelist(self):
930 """Volume files"""
931 return self._vol_list
933 def needs_password(self):
934 """Is password required"""
935 return self._needs_password
937 def strerror(self):
938 """Last error"""
939 return self._parse_error
941 def infolist(self):
942 """List of RarInfo records.
944 return self._info_list
946 def getinfo(self, member):
947 """Return RarInfo for filename
949 if isinstance(member, RarInfo):
950 fname = member.filename
951 else:
952 fname = member
954 # accept both ways here
955 if PATH_SEP == '/':
956 fname2 = fname.replace("\\", "/")
957 else:
958 fname2 = fname.replace("/", "\\")
960 try:
961 return self._info_map[fname]
962 except KeyError:
963 try:
964 return self._info_map[fname2]
965 except KeyError:
966 raise NoRarEntry("No such file: %s" % fname)
968 # read rar
969 def parse(self):
970 """Process file."""
971 self._fd = None
972 try:
973 self._parse_real()
974 finally:
975 if self._fd:
976 self._fd.close()
977 self._fd = None
979 def _parse_real(self):
980 fd = XFile(self._rarfile)
981 self._fd = fd
982 sig = fd.read(len(self._expect_sig))
983 if sig != self._expect_sig:
984 if isinstance(self._rarfile, (str, unicode)):
985 raise NotRarFile("Not a Rar archive: {}".format(self._rarfile))
986 raise NotRarFile("Not a Rar archive")
988 volume = 0 # first vol (.rar) is 0
989 more_vols = False
990 endarc = False
991 volfile = self._rarfile
992 self._vol_list = [self._rarfile]
993 while 1:
994 if endarc:
995 h = None # don't read past ENDARC
996 else:
997 h = self._parse_header(fd)
998 if not h:
999 if more_vols:
1000 volume += 1
1001 fd.close()
1002 try:
1003 volfile = self._next_volname(volfile)
1004 fd = XFile(volfile)
1005 except IOError:
1006 self._set_error("Cannot open next volume: %s", volfile)
1007 break
1008 self._fd = fd
1009 sig = fd.read(len(self._expect_sig))
1010 if sig != self._expect_sig:
1011 self._set_error("Invalid volume sig: %s", volfile)
1012 break
1013 more_vols = False
1014 endarc = False
1015 self._vol_list.append(volfile)
1016 continue
1017 break
1018 h.volume = volume
1019 h.volume_file = volfile
1021 if h.type == RAR_BLOCK_MAIN and not self._main:
1022 self._main = h
1023 if h.flags & RAR_MAIN_NEWNUMBERING:
1024 # RAR 2.x does not set FIRSTVOLUME,
1025 # so check it only if NEWNUMBERING is used
1026 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1027 raise NeedFirstVolume("Need to start from first volume")
1028 if h.flags & RAR_MAIN_PASSWORD:
1029 self._needs_password = True
1030 if not self._password:
1031 break
1032 elif h.type == RAR_BLOCK_ENDARC:
1033 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1034 endarc = True
1035 elif h.type == RAR_BLOCK_FILE:
1036 # RAR 2.x does not write RAR_BLOCK_ENDARC
1037 if h.flags & RAR_FILE_SPLIT_AFTER:
1038 more_vols = True
1039 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1040 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1041 raise NeedFirstVolume("Need to start from first volume")
1043 if h.needs_password():
1044 self._needs_password = True
1046 # store it
1047 self.process_entry(fd, h)
1049 if self._info_callback:
1050 self._info_callback(h)
1052 # go to next header
1053 if h.add_size > 0:
1054 fd.seek(h.data_offset + h.add_size, 0)
1056 def process_entry(self, fd, item):
1057 """Examine item, add into lookup cache."""
1058 raise NotImplementedError()
1060 def _decrypt_header(self, fd):
1061 raise NotImplementedError('_decrypt_header')
1063 def _parse_block_header(self, fd):
1064 raise NotImplementedError('_parse_block_header')
1066 def _open_hack(self, inf, psw):
1067 raise NotImplementedError('_open_hack')
1069 # read single header
1070 def _parse_header(self, fd):
1071 try:
1072 # handle encrypted headers
1073 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1074 if not self._password:
1075 return
1076 fd = self._decrypt_header(fd)
1078 # now read actual header
1079 return self._parse_block_header(fd)
1080 except struct.error:
1081 self._set_error('Broken header in RAR file')
1082 return None
1084 # given current vol name, construct next one
1085 def _next_volname(self, volfile):
1086 if is_filelike(volfile):
1087 raise IOError("Working on single FD")
1088 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1089 return _next_newvol(volfile)
1090 return _next_oldvol(volfile)
1092 def _set_error(self, msg, *args):
1093 if args:
1094 msg = msg % args
1095 self._parse_error = msg
1096 if self._strict:
1097 raise BadRarFile(msg)
1099 def open(self, inf, psw):
1100 """Return stream object for file data."""
1102 if inf.file_redir:
1103 # cannot leave to unrar as it expects copied file to exist
1104 if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1105 inf = self.getinfo(inf.file_redir[2])
1106 if not inf:
1107 raise BadRarFile('cannot find copied file')
1109 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1110 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
1112 # is temp write usable?
1113 use_hack = 1
1114 if not self._main:
1115 use_hack = 0
1116 elif self._main._must_disable_hack():
1117 use_hack = 0
1118 elif inf._must_disable_hack():
1119 use_hack = 0
1120 elif is_filelike(self._rarfile):
1121 pass
1122 elif inf.file_size > HACK_SIZE_LIMIT:
1123 use_hack = 0
1124 elif not USE_EXTRACT_HACK:
1125 use_hack = 0
1127 # now extract
1128 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1129 return self._open_clear(inf)
1130 elif use_hack:
1131 return self._open_hack(inf, psw)
1132 elif is_filelike(self._rarfile):
1133 return self._open_unrar_membuf(self._rarfile, inf, psw)
1134 else:
1135 return self._open_unrar(self._rarfile, inf, psw)
1137 def _open_clear(self, inf):
1138 return DirectReader(self, inf)
1140 def _open_hack_core(self, inf, psw, prefix, suffix):
1142 size = inf.compress_size + inf.header_size
1143 rf = XFile(inf.volume_file, 0)
1144 rf.seek(inf.header_offset)
1146 tmpfd, tmpname = mkstemp(suffix='.rar')
1147 tmpf = os.fdopen(tmpfd, "wb")
1149 try:
1150 tmpf.write(prefix)
1151 while size > 0:
1152 if size > BSIZE:
1153 buf = rf.read(BSIZE)
1154 else:
1155 buf = rf.read(size)
1156 if not buf:
1157 raise BadRarFile('read failed: ' + inf.filename)
1158 tmpf.write(buf)
1159 size -= len(buf)
1160 tmpf.write(suffix)
1161 tmpf.close()
1162 rf.close()
1163 except:
1164 rf.close()
1165 tmpf.close()
1166 os.unlink(tmpname)
1167 raise
1169 return self._open_unrar(tmpname, inf, psw, tmpname)
1171 # write in-memory archive to temp file - needed for solid archives
1172 def _open_unrar_membuf(self, memfile, inf, psw):
1173 tmpname = membuf_tempfile(memfile)
1174 return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True)
1176 # extract using unrar
1177 def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False):
1178 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1179 add_password_arg(cmd, psw)
1180 cmd.append("--")
1181 cmd.append(rarfile)
1183 # not giving filename avoids encoding related problems
1184 if not tmpfile or force_file:
1185 fn = inf.filename
1186 if PATH_SEP != os.sep:
1187 fn = fn.replace(PATH_SEP, os.sep)
1188 cmd.append(fn)
1190 # read from unrar pipe
1191 return PipeReader(self, inf, cmd, tmpfile)
1194 # RAR3 format
1197 class Rar3Info(RarInfo):
1198 """RAR3 specific fields."""
1199 extract_version = 15
1200 salt = None
1201 add_size = 0
1202 header_crc = None
1203 header_size = None
1204 header_offset = None
1205 data_offset = None
1206 _md_class = None
1207 _md_expect = None
1209 # make sure some rar5 fields are always present
1210 file_redir = None
1211 blake2sp_hash = None
1213 def _must_disable_hack(self):
1214 if self.type == RAR_BLOCK_FILE:
1215 if self.flags & RAR_FILE_PASSWORD:
1216 return True
1217 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1218 return True
1219 elif self.type == RAR_BLOCK_MAIN:
1220 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1221 return True
1222 return False
1225 class RAR3Parser(CommonParser):
1226 """Parse RAR3 file format.
1228 _expect_sig = RAR_ID
1229 _last_aes_key = (None, None, None) # (salt, key, iv)
1231 def _decrypt_header(self, fd):
1232 if not _have_crypto:
1233 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1234 salt = fd.read(8)
1235 if self._last_aes_key[0] == salt:
1236 key, iv = self._last_aes_key[1:]
1237 else:
1238 key, iv = rar3_s2k(self._password, salt)
1239 self._last_aes_key = (salt, key, iv)
1240 return HeaderDecrypt(fd, key, iv)
1242 # common header
1243 def _parse_block_header(self, fd):
1244 h = Rar3Info()
1245 h.header_offset = fd.tell()
1247 # read and parse base header
1248 buf = fd.read(S_BLK_HDR.size)
1249 if not buf:
1250 return None
1251 t = S_BLK_HDR.unpack_from(buf)
1252 h.header_crc, h.type, h.flags, h.header_size = t
1254 # read full header
1255 if h.header_size > S_BLK_HDR.size:
1256 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1257 else:
1258 hdata = buf
1259 h.data_offset = fd.tell()
1261 # unexpected EOF?
1262 if len(hdata) != h.header_size:
1263 self._set_error('Unexpected EOF when reading header')
1264 return None
1266 pos = S_BLK_HDR.size
1268 # block has data assiciated with it?
1269 if h.flags & RAR_LONG_BLOCK:
1270 h.add_size, pos = load_le32(hdata, pos)
1271 else:
1272 h.add_size = 0
1274 # parse interesting ones, decide header boundaries for crc
1275 if h.type == RAR_BLOCK_MARK:
1276 return h
1277 elif h.type == RAR_BLOCK_MAIN:
1278 pos += 6
1279 if h.flags & RAR_MAIN_ENCRYPTVER:
1280 pos += 1
1281 crc_pos = pos
1282 if h.flags & RAR_MAIN_COMMENT:
1283 self._parse_subblocks(h, hdata, pos)
1284 elif h.type == RAR_BLOCK_FILE:
1285 pos = self._parse_file_header(h, hdata, pos - 4)
1286 crc_pos = pos
1287 if h.flags & RAR_FILE_COMMENT:
1288 pos = self._parse_subblocks(h, hdata, pos)
1289 elif h.type == RAR_BLOCK_SUB:
1290 pos = self._parse_file_header(h, hdata, pos - 4)
1291 crc_pos = h.header_size
1292 elif h.type == RAR_BLOCK_OLD_AUTH:
1293 pos += 8
1294 crc_pos = pos
1295 elif h.type == RAR_BLOCK_OLD_EXTRA:
1296 pos += 7
1297 crc_pos = pos
1298 else:
1299 crc_pos = h.header_size
1301 # check crc
1302 if h.type == RAR_BLOCK_OLD_SUB:
1303 crcdat = hdata[2:] + fd.read(h.add_size)
1304 else:
1305 crcdat = hdata[2:crc_pos]
1307 calc_crc = rar_crc32(crcdat) & 0xFFFF
1309 # return good header
1310 if h.header_crc == calc_crc:
1311 return h
1313 # header parsing failed.
1314 self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)',
1315 h.type, h.header_crc, calc_crc, len(crcdat))
1317 # instead panicing, send eof
1318 return None
1320 # read file-specific header
1321 def _parse_file_header(self, h, hdata, pos):
1322 fld = S_FILE_HDR.unpack_from(hdata, pos)
1323 pos += S_FILE_HDR.size
1325 h.compress_size = fld[0]
1326 h.file_size = fld[1]
1327 h.host_os = fld[2]
1328 h.CRC = fld[3]
1329 h.date_time = parse_dos_time(fld[4])
1330 h.mtime = to_datetime(h.date_time)
1331 h.extract_version = fld[5]
1332 h.compress_type = fld[6]
1333 name_size = fld[7]
1334 h.mode = fld[8]
1336 h._md_class = CRC32Context
1337 h._md_expect = h.CRC
1339 if h.flags & RAR_FILE_LARGE:
1340 h1, pos = load_le32(hdata, pos)
1341 h2, pos = load_le32(hdata, pos)
1342 h.compress_size |= h1 << 32
1343 h.file_size |= h2 << 32
1344 h.add_size = h.compress_size
1346 name, pos = load_bytes(hdata, name_size, pos)
1347 if h.flags & RAR_FILE_UNICODE:
1348 nul = name.find(ZERO)
1349 h.orig_filename = name[:nul]
1350 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1351 h.filename = u.decode()
1353 # if parsing failed fall back to simple name
1354 if u.failed:
1355 h.filename = self._decode(h.orig_filename)
1356 else:
1357 h.orig_filename = name
1358 h.filename = self._decode(name)
1360 # change separator, if requested
1361 if PATH_SEP != '\\':
1362 h.filename = h.filename.replace('\\', PATH_SEP)
1364 if h.flags & RAR_FILE_SALT:
1365 h.salt, pos = load_bytes(hdata, 8, pos)
1366 else:
1367 h.salt = None
1369 # optional extended time stamps
1370 if h.flags & RAR_FILE_EXTTIME:
1371 pos = _parse_ext_time(h, hdata, pos)
1372 else:
1373 h.mtime = h.atime = h.ctime = h.arctime = None
1375 return pos
1377 # find old-style comment subblock
1378 def _parse_subblocks(self, h, hdata, pos):
1379 while pos < len(hdata):
1380 # ordinary block header
1381 t = S_BLK_HDR.unpack_from(hdata, pos)
1382 ___scrc, stype, sflags, slen = t
1383 pos_next = pos + slen
1384 pos += S_BLK_HDR.size
1386 # corrupt header
1387 if pos_next < pos:
1388 break
1390 # followed by block-specific header
1391 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1392 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1393 pos += S_COMMENT_HDR.size
1394 data = hdata[pos : pos_next]
1395 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1396 crc, self._password)
1397 if not self._crc_check:
1398 h.comment = self._decode_comment(cmt)
1399 elif rar_crc32(cmt) & 0xFFFF == crc:
1400 h.comment = self._decode_comment(cmt)
1402 pos = pos_next
1403 return pos
1405 def _read_comment_v3(self, inf, psw=None):
1407 # read data
1408 with XFile(inf.volume_file) as rf:
1409 rf.seek(inf.data_offset)
1410 data = rf.read(inf.compress_size)
1412 # decompress
1413 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1414 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1416 # check crc
1417 if self._crc_check:
1418 crc = rar_crc32(cmt)
1419 if crc != inf.CRC:
1420 return None
1422 return self._decode_comment(cmt)
1424 def _decode(self, val):
1425 for c in TRY_ENCODINGS:
1426 try:
1427 return val.decode(c)
1428 except UnicodeError:
1429 pass
1430 return val.decode(self._charset, 'replace')
1432 def _decode_comment(self, val):
1433 return self._decode(val)
1435 def process_entry(self, fd, item):
1436 if item.type == RAR_BLOCK_FILE:
1437 # use only first part
1438 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1439 self._info_map[item.filename] = item
1440 self._info_list.append(item)
1441 elif len(self._info_list) > 0:
1442 # final crc is in last block
1443 old = self._info_list[-1]
1444 old.CRC = item.CRC
1445 old._md_expect = item._md_expect
1446 old.compress_size += item.compress_size
1448 # parse new-style comment
1449 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
1450 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1451 pass
1452 elif item.flags & RAR_FILE_SOLID:
1453 # file comment
1454 cmt = self._read_comment_v3(item, self._password)
1455 if len(self._info_list) > 0:
1456 old = self._info_list[-1]
1457 old.comment = cmt
1458 else:
1459 # archive comment
1460 cmt = self._read_comment_v3(item, self._password)
1461 self.comment = cmt
1463 if item.type == RAR_BLOCK_MAIN:
1464 if item.flags & RAR_MAIN_COMMENT:
1465 self.comment = item.comment
1466 if item.flags & RAR_MAIN_PASSWORD:
1467 self._needs_password = True
1469 # put file compressed data into temporary .rar archive, and run
1470 # unrar on that, thus avoiding unrar going over whole archive
1471 def _open_hack(self, inf, psw):
1472 # create main header: crc, type, flags, size, res1, res2
1473 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4)
1474 return self._open_hack_core(inf, psw, prefix, EMPTY)
1477 # RAR5 format
1480 class Rar5Info(RarInfo):
1481 """Shared fields for RAR5 records.
1483 extract_version = 50
1484 header_crc = None
1485 header_size = None
1486 header_offset = None
1487 data_offset = None
1489 # type=all
1490 block_type = None
1491 block_flags = None
1492 add_size = 0
1493 block_extra_size = 0
1495 # type=MAIN
1496 volume_number = None
1497 _md_class = None
1498 _md_expect = None
1500 def _must_disable_hack(self):
1501 return False
1504 class Rar5BaseFile(Rar5Info):
1505 """Shared sturct for file & service record.
1507 type = -1
1508 file_flags = None
1509 file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY)
1510 file_compress_flags = None
1511 file_redir = None
1512 file_owner = None
1513 file_version = None
1514 blake2sp_hash = None
1516 def _must_disable_hack(self):
1517 if self.flags & RAR_FILE_PASSWORD:
1518 return True
1519 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1520 return True
1521 if self.file_compress_flags & RAR5_COMPR_SOLID:
1522 return True
1523 if self.file_redir:
1524 return True
1525 return False
1528 class Rar5FileInfo(Rar5BaseFile):
1529 """RAR5 file record.
1531 type = RAR_BLOCK_FILE
1534 class Rar5ServiceInfo(Rar5BaseFile):
1535 """RAR5 service record.
1537 type = RAR_BLOCK_SUB
1540 class Rar5MainInfo(Rar5Info):
1541 """RAR5 archive main record.
1543 type = RAR_BLOCK_MAIN
1544 main_flags = None
1545 main_volume_number = None
1547 def _must_disable_hack(self):
1548 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1549 return True
1550 return False
1553 class Rar5EncryptionInfo(Rar5Info):
1554 """RAR5 archive header encryption record.
1556 type = RAR5_BLOCK_ENCRYPTION
1557 encryption_algo = None
1558 encryption_flags = None
1559 encryption_kdf_count = None
1560 encryption_salt = None
1561 encryption_check_value = None
1563 def needs_password(self):
1564 return True
1567 class Rar5EndArcInfo(Rar5Info):
1568 """RAR5 end of archive record.
1570 type = RAR_BLOCK_ENDARC
1571 endarc_flags = None
1574 class RAR5Parser(CommonParser):
1575 """Parse RAR5 format.
1577 _expect_sig = RAR5_ID
1578 _hdrenc_main = None
1580 # AES encrypted headers
1581 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1583 def _gen_key(self, kdf_count, salt):
1584 if self._last_aes256_key[:2] == (kdf_count, salt):
1585 return self._last_aes256_key[2]
1586 if kdf_count > 24:
1587 raise BadRarFile('Too large kdf_count')
1588 psw = self._password
1589 if isinstance(psw, unicode):
1590 psw = psw.encode('utf8')
1591 key = pbkdf2_sha256(psw, salt, 1 << kdf_count)
1592 self._last_aes256_key = (kdf_count, salt, key)
1593 return key
1595 def _decrypt_header(self, fd):
1596 if not _have_crypto:
1597 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1598 h = self._hdrenc_main
1599 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1600 iv = fd.read(16)
1601 return HeaderDecrypt(fd, key, iv)
1603 # common header
1604 def _parse_block_header(self, fd):
1605 header_offset = fd.tell()
1607 preload = 4 + 3
1608 start_bytes = fd.read(preload)
1609 header_crc, pos = load_le32(start_bytes, 0)
1610 hdrlen, pos = load_vint(start_bytes, pos)
1611 if hdrlen > 2 * 1024 * 1024:
1612 return None
1613 header_size = pos + hdrlen
1615 # read full header, check for EOF
1616 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1617 if len(hdata) != header_size:
1618 self._set_error('Unexpected EOF when reading header')
1619 return None
1620 data_offset = fd.tell()
1622 calc_crc = rar_crc32(memoryview(hdata)[4:])
1623 if header_crc != calc_crc:
1624 # header parsing failed.
1625 self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)',
1626 header_crc, calc_crc, len(hdata))
1627 return None
1629 block_type, pos = load_vint(hdata, pos)
1631 if block_type == RAR5_BLOCK_MAIN:
1632 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1633 h = self._parse_main_block(h, hdata, pos)
1634 elif block_type == RAR5_BLOCK_FILE:
1635 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1636 h = self._parse_file_block(h, hdata, pos)
1637 elif block_type == RAR5_BLOCK_SERVICE:
1638 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1639 h = self._parse_file_block(h, hdata, pos)
1640 elif block_type == RAR5_BLOCK_ENCRYPTION:
1641 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1642 h = self._parse_encryption_block(h, hdata, pos)
1643 elif block_type == RAR5_BLOCK_ENDARC:
1644 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1645 h = self._parse_endarc_block(h, hdata, pos)
1646 else:
1647 h = None
1648 if h:
1649 h.header_offset = header_offset
1650 h.data_offset = data_offset
1651 return h
1653 def _parse_block_common(self, h, hdata):
1654 h.header_crc, pos = load_le32(hdata, 0)
1655 hdrlen, pos = load_vint(hdata, pos)
1656 h.header_size = hdrlen + pos
1657 h.block_type, pos = load_vint(hdata, pos)
1658 h.block_flags, pos = load_vint(hdata, pos)
1660 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1661 h.block_extra_size, pos = load_vint(hdata, pos)
1662 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1663 h.add_size, pos = load_vint(hdata, pos)
1665 h.compress_size = h.add_size
1667 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1668 h.flags |= RAR_SKIP_IF_UNKNOWN
1669 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1670 h.flags |= RAR_LONG_BLOCK
1671 return h, pos
1673 def _parse_main_block(self, h, hdata, pos):
1674 h.main_flags, pos = load_vint(hdata, pos)
1675 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1676 h.main_volume_number = load_vint(hdata, pos)
1678 h.flags |= RAR_MAIN_NEWNUMBERING
1679 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1680 h.flags |= RAR_MAIN_SOLID
1681 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1682 h.flags |= RAR_MAIN_VOLUME
1683 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1684 h.flags |= RAR_MAIN_RECOVERY
1685 if self._hdrenc_main:
1686 h.flags |= RAR_MAIN_PASSWORD
1687 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1688 h.flags |= RAR_MAIN_FIRSTVOLUME
1690 return h
1692 def _parse_file_block(self, h, hdata, pos):
1693 h.file_flags, pos = load_vint(hdata, pos)
1694 h.file_size, pos = load_vint(hdata, pos)
1695 h.mode, pos = load_vint(hdata, pos)
1697 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1698 h.mtime, pos = load_unixtime(hdata, pos)
1699 h.date_time = h.mtime.timetuple()[:6]
1700 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1701 h.CRC, pos = load_le32(hdata, pos)
1702 h._md_class = CRC32Context
1703 h._md_expect = h.CRC
1705 h.file_compress_flags, pos = load_vint(hdata, pos)
1706 h.file_host_os, pos = load_vint(hdata, pos)
1707 h.orig_filename, pos = load_vstr(hdata, pos)
1708 h.filename = h.orig_filename.decode('utf8', 'replace')
1710 # use compatible values
1711 if h.file_host_os == RAR5_OS_WINDOWS:
1712 h.host_os = RAR_OS_WIN32
1713 else:
1714 h.host_os = RAR_OS_UNIX
1715 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1717 if h.block_extra_size:
1718 # allow 1 byte of garbage
1719 while pos < len(hdata) - 1:
1720 xsize, pos = load_vint(hdata, pos)
1721 xdata, pos = load_bytes(hdata, xsize, pos)
1722 self._process_file_extra(h, xdata)
1724 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1725 h.flags |= RAR_FILE_SPLIT_BEFORE
1726 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1727 h.flags |= RAR_FILE_SPLIT_AFTER
1728 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1729 h.flags |= RAR_FILE_DIRECTORY
1730 if h.file_compress_flags & RAR5_COMPR_SOLID:
1731 h.flags |= RAR_FILE_SOLID
1733 return h
1735 def _parse_endarc_block(self, h, hdata, pos):
1736 h.endarc_flags, pos = load_vint(hdata, pos)
1737 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1738 h.flags |= RAR_ENDARC_NEXT_VOLUME
1739 return h
1741 def _parse_encryption_block(self, h, hdata, pos):
1742 h.encryption_algo, pos = load_vint(hdata, pos)
1743 h.encryption_flags, pos = load_vint(hdata, pos)
1744 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1745 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1746 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1747 h.encryption_check_value = load_bytes(hdata, 12, pos)
1748 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1749 raise BadRarFile('Unsupported header encryption cipher')
1750 self._hdrenc_main = h
1751 return h
1753 # file extra record
1754 def _process_file_extra(self, h, xdata):
1755 xtype, pos = load_vint(xdata, 0)
1756 if xtype == RAR5_XFILE_TIME:
1757 self._parse_file_xtime(h, xdata, pos)
1758 elif xtype == RAR5_XFILE_ENCRYPTION:
1759 self._parse_file_encryption(h, xdata, pos)
1760 elif xtype == RAR5_XFILE_HASH:
1761 self._parse_file_hash(h, xdata, pos)
1762 elif xtype == RAR5_XFILE_VERSION:
1763 self._parse_file_version(h, xdata, pos)
1764 elif xtype == RAR5_XFILE_REDIR:
1765 self._parse_file_redir(h, xdata, pos)
1766 elif xtype == RAR5_XFILE_OWNER:
1767 self._parse_file_owner(h, xdata, pos)
1768 elif xtype == RAR5_XFILE_SERVICE:
1769 pass
1770 else:
1771 pass
1773 # extra block for file time record
1774 def _parse_file_xtime(self, h, xdata, pos):
1775 tflags, pos = load_vint(xdata, pos)
1776 ldr = load_windowstime
1777 if tflags & RAR5_XTIME_UNIXTIME:
1778 ldr = load_unixtime
1779 if tflags & RAR5_XTIME_HAS_MTIME:
1780 h.mtime, pos = ldr(xdata, pos)
1781 h.date_time = h.mtime.timetuple()[:6]
1782 if tflags & RAR5_XTIME_HAS_CTIME:
1783 h.ctime, pos = ldr(xdata, pos)
1784 if tflags & RAR5_XTIME_HAS_ATIME:
1785 h.atime, pos = ldr(xdata, pos)
1787 # just remember encryption info
1788 def _parse_file_encryption(self, h, xdata, pos):
1789 algo, pos = load_vint(xdata, pos)
1790 flags, pos = load_vint(xdata, pos)
1791 kdf_count, pos = load_byte(xdata, pos)
1792 salt, pos = load_bytes(xdata, 16, pos)
1793 iv, pos = load_bytes(xdata, 16, pos)
1794 checkval = None
1795 if flags & RAR5_XENC_CHECKVAL:
1796 checkval, pos = load_bytes(xdata, 12, pos)
1797 if flags & RAR5_XENC_TWEAKED:
1798 h._md_expect = None
1799 h._md_class = NoHashContext
1801 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
1802 h.flags |= RAR_FILE_PASSWORD
1804 def _parse_file_hash(self, h, xdata, pos):
1805 hash_type, pos = load_vint(xdata, pos)
1806 if hash_type == RAR5_XHASH_BLAKE2SP:
1807 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
1808 if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
1809 h._md_class = Blake2SP
1810 h._md_expect = h.blake2sp_hash
1812 def _parse_file_version(self, h, xdata, pos):
1813 flags, pos = load_vint(xdata, pos)
1814 version, pos = load_vint(xdata, pos)
1815 h.file_version = (flags, version)
1817 def _parse_file_redir(self, h, xdata, pos):
1818 redir_type, pos = load_vint(xdata, pos)
1819 redir_flags, pos = load_vint(xdata, pos)
1820 redir_name, pos = load_vstr(xdata, pos)
1821 redir_name = redir_name.decode('utf8', 'replace')
1822 h.file_redir = (redir_type, redir_flags, redir_name)
1824 def _parse_file_owner(self, h, xdata, pos):
1825 user_name = group_name = user_id = group_id = None
1827 flags, pos = load_vint(xdata, pos)
1828 if flags & RAR5_XOWNER_UNAME:
1829 user_name, pos = load_vstr(xdata, pos)
1830 if flags & RAR5_XOWNER_GNAME:
1831 group_name, pos = load_vstr(xdata, pos)
1832 if flags & RAR5_XOWNER_UID:
1833 user_id, pos = load_vint(xdata, pos)
1834 if flags & RAR5_XOWNER_GID:
1835 group_id, pos = load_vint(xdata, pos)
1837 h.file_owner = (user_name, group_name, user_id, group_id)
1839 def process_entry(self, fd, item):
1840 if item.block_type == RAR5_BLOCK_FILE:
1841 # use only first part
1842 if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
1843 self._info_map[item.filename] = item
1844 self._info_list.append(item)
1845 elif len(self._info_list) > 0:
1846 # final crc is in last block
1847 old = self._info_list[-1]
1848 old.CRC = item.CRC
1849 old._md_expect = item._md_expect
1850 old.blake2sp_hash = item.blake2sp_hash
1851 old.compress_size += item.compress_size
1852 elif item.block_type == RAR5_BLOCK_SERVICE:
1853 if item.filename == 'CMT':
1854 self._load_comment(fd, item)
1856 def _load_comment(self, fd, item):
1857 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1858 return None
1859 if item.compress_type != RAR_M0:
1860 return None
1862 if item.flags & RAR_FILE_PASSWORD:
1863 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
1864 if algo != RAR5_XENC_CIPHER_AES256:
1865 return None
1866 key = self._gen_key(kdf_count, salt)
1867 f = HeaderDecrypt(fd, key, iv)
1868 cmt = f.read(item.file_size)
1869 else:
1870 # archive comment
1871 with self._open_clear(item) as cmtstream:
1872 cmt = cmtstream.read()
1874 # rar bug? - appends zero to comment
1875 cmt = cmt.split(ZERO, 1)[0]
1876 self.comment = cmt.decode('utf8')
1878 def _open_hack(self, inf, psw):
1879 # len, type, blk_flags, flags
1880 main_hdr = b'\x03\x01\x00\x00'
1881 endarc_hdr = b'\x03\x05\x00\x00'
1882 main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr
1883 endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr
1884 return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr)
1887 ## Utility classes
1890 class UnicodeFilename(object):
1891 """Handle RAR3 unicode filename decompression.
1893 def __init__(self, name, encdata):
1894 self.std_name = bytearray(name)
1895 self.encdata = bytearray(encdata)
1896 self.pos = self.encpos = 0
1897 self.buf = bytearray()
1898 self.failed = 0
1900 def enc_byte(self):
1901 """Copy encoded byte."""
1902 try:
1903 c = self.encdata[self.encpos]
1904 self.encpos += 1
1905 return c
1906 except IndexError:
1907 self.failed = 1
1908 return 0
1910 def std_byte(self):
1911 """Copy byte from 8-bit representation."""
1912 try:
1913 return self.std_name[self.pos]
1914 except IndexError:
1915 self.failed = 1
1916 return ord('?')
1918 def put(self, lo, hi):
1919 """Copy 16-bit value to result."""
1920 self.buf.append(lo)
1921 self.buf.append(hi)
1922 self.pos += 1
1924 def decode(self):
1925 """Decompress compressed UTF16 value."""
1926 hi = self.enc_byte()
1927 flagbits = 0
1928 while self.encpos < len(self.encdata):
1929 if flagbits == 0:
1930 flags = self.enc_byte()
1931 flagbits = 8
1932 flagbits -= 2
1933 t = (flags >> flagbits) & 3
1934 if t == 0:
1935 self.put(self.enc_byte(), 0)
1936 elif t == 1:
1937 self.put(self.enc_byte(), hi)
1938 elif t == 2:
1939 self.put(self.enc_byte(), self.enc_byte())
1940 else:
1941 n = self.enc_byte()
1942 if n & 0x80:
1943 c = self.enc_byte()
1944 for _ in range((n & 0x7f) + 2):
1945 lo = (self.std_byte() + c) & 0xFF
1946 self.put(lo, hi)
1947 else:
1948 for _ in range(n + 2):
1949 self.put(self.std_byte(), 0)
1950 return self.buf.decode("utf-16le", "replace")
1953 class RarExtFile(RawIOBase):
1954 """Base class for file-like object that :meth:`RarFile.open` returns.
1956 Provides public methods and common crc checking.
1958 Behaviour:
1959 - no short reads - .read() and .readinfo() read as much as requested.
1960 - no internal buffer, use io.BufferedReader for that.
1963 #: Filename of the archive entry
1964 name = None
1966 def __init__(self, parser, inf):
1967 """Open archive entry.
1969 super(RarExtFile, self).__init__()
1971 # standard io.* properties
1972 self.name = inf.filename
1973 self.mode = 'rb'
1975 self._parser = parser
1976 self._inf = inf
1977 self._fd = None
1978 self._remain = 0
1979 self._returncode = 0
1981 self._md_context = None
1983 self._open()
1985 def _open(self):
1986 if self._fd:
1987 self._fd.close()
1988 md_class = self._inf._md_class or NoHashContext
1989 self._md_context = md_class()
1990 self._fd = None
1991 self._remain = self._inf.file_size
1993 def read(self, cnt=None):
1994 """Read all or specified amount of data from archive entry."""
1996 # sanitize cnt
1997 if cnt is None or cnt < 0:
1998 cnt = self._remain
1999 elif cnt > self._remain:
2000 cnt = self._remain
2001 if cnt == 0:
2002 return EMPTY
2004 # actual read
2005 data = self._read(cnt)
2006 if data:
2007 self._md_context.update(data)
2008 self._remain -= len(data)
2009 if len(data) != cnt:
2010 raise BadRarFile("Failed the read enough data")
2012 # done?
2013 if not data or self._remain == 0:
2014 # self.close()
2015 self._check()
2016 return data
2018 def _check(self):
2019 """Check final CRC."""
2020 final = self._md_context.digest()
2021 exp = self._inf._md_expect
2022 if exp is None:
2023 return
2024 if final is None:
2025 return
2026 if self._returncode:
2027 check_returncode(self, '')
2028 if self._remain != 0:
2029 raise BadRarFile("Failed the read enough data")
2030 if final != exp:
2031 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2032 self._inf.filename, exp, final))
2034 def _read(self, cnt):
2035 """Actual read that gets sanitized cnt."""
2037 def close(self):
2038 """Close open resources."""
2040 super(RarExtFile, self).close()
2042 if self._fd:
2043 self._fd.close()
2044 self._fd = None
2046 def __del__(self):
2047 """Hook delete to make sure tempfile is removed."""
2048 self.close()
2050 def readinto(self, buf):
2051 """Zero-copy read directly into buffer.
2053 Returns bytes read.
2055 raise NotImplementedError('readinto')
2057 def tell(self):
2058 """Return current reading position in uncompressed data."""
2059 return self._inf.file_size - self._remain
2061 def seek(self, ofs, whence=0):
2062 """Seek in data.
2064 On uncompressed files, the seeking works by actual
2065 seeks so it's fast. On compresses files its slow
2066 - forward seeking happends by reading ahead,
2067 backwards by re-opening and decompressing from the start.
2070 # disable crc check when seeking
2071 self._md_context = NoHashContext()
2073 fsize = self._inf.file_size
2074 cur_ofs = self.tell()
2076 if whence == 0: # seek from beginning of file
2077 new_ofs = ofs
2078 elif whence == 1: # seek from current position
2079 new_ofs = cur_ofs + ofs
2080 elif whence == 2: # seek from end of file
2081 new_ofs = fsize + ofs
2082 else:
2083 raise ValueError('Invalid value for whence')
2085 # sanity check
2086 if new_ofs < 0:
2087 new_ofs = 0
2088 elif new_ofs > fsize:
2089 new_ofs = fsize
2091 # do the actual seek
2092 if new_ofs >= cur_ofs:
2093 self._skip(new_ofs - cur_ofs)
2094 else:
2095 # reopen and seek
2096 self._open()
2097 self._skip(new_ofs)
2098 return self.tell()
2100 def _skip(self, cnt):
2101 """Read and discard data"""
2102 while cnt > 0:
2103 if cnt > 8192:
2104 buf = self.read(8192)
2105 else:
2106 buf = self.read(cnt)
2107 if not buf:
2108 break
2109 cnt -= len(buf)
2111 def readable(self):
2112 """Returns True"""
2113 return True
2115 def writable(self):
2116 """Returns False.
2118 Writing is not supported.
2120 return False
2122 def seekable(self):
2123 """Returns True.
2125 Seeking is supported, although it's slow on compressed files.
2127 return True
2129 def readall(self):
2130 """Read all remaining data"""
2131 # avoid RawIOBase default impl
2132 return self.read()
2135 class PipeReader(RarExtFile):
2136 """Read data from pipe, handle tempfile cleanup."""
2138 def __init__(self, rf, inf, cmd, tempfile=None):
2139 self._cmd = cmd
2140 self._proc = None
2141 self._tempfile = tempfile
2142 super(PipeReader, self).__init__(rf, inf)
2144 def _close_proc(self):
2145 if not self._proc:
2146 return
2147 if self._proc.stdout:
2148 self._proc.stdout.close()
2149 if self._proc.stdin:
2150 self._proc.stdin.close()
2151 if self._proc.stderr:
2152 self._proc.stderr.close()
2153 self._proc.wait()
2154 self._returncode = self._proc.returncode
2155 self._proc = None
2157 def _open(self):
2158 super(PipeReader, self)._open()
2160 # stop old process
2161 self._close_proc()
2163 # launch new process
2164 self._returncode = 0
2165 self._proc = custom_popen(self._cmd)
2166 self._fd = self._proc.stdout
2168 # avoid situation where unrar waits on stdin
2169 if self._proc.stdin:
2170 self._proc.stdin.close()
2172 def _read(self, cnt):
2173 """Read from pipe."""
2175 # normal read is usually enough
2176 data = self._fd.read(cnt)
2177 if len(data) == cnt or not data:
2178 return data
2180 # short read, try looping
2181 buf = [data]
2182 cnt -= len(data)
2183 while cnt > 0:
2184 data = self._fd.read(cnt)
2185 if not data:
2186 break
2187 cnt -= len(data)
2188 buf.append(data)
2189 return EMPTY.join(buf)
2191 def close(self):
2192 """Close open resources."""
2194 self._close_proc()
2195 super(PipeReader, self).close()
2197 if self._tempfile:
2198 try:
2199 os.unlink(self._tempfile)
2200 except OSError:
2201 pass
2202 self._tempfile = None
2204 def readinto(self, buf):
2205 """Zero-copy read directly into buffer."""
2206 cnt = len(buf)
2207 if cnt > self._remain:
2208 cnt = self._remain
2209 vbuf = memoryview(buf)
2210 res = got = 0
2211 while got < cnt:
2212 res = self._fd.readinto(vbuf[got : cnt])
2213 if not res:
2214 break
2215 self._md_context.update(vbuf[got : got + res])
2216 self._remain -= res
2217 got += res
2218 return got
2221 class DirectReader(RarExtFile):
2222 """Read uncompressed data directly from archive.
2224 _cur = None
2225 _cur_avail = None
2226 _volfile = None
2228 def _open(self):
2229 super(DirectReader, self)._open()
2231 self._volfile = self._inf.volume_file
2232 self._fd = XFile(self._volfile, 0)
2233 self._fd.seek(self._inf.header_offset, 0)
2234 self._cur = self._parser._parse_header(self._fd)
2235 self._cur_avail = self._cur.add_size
2237 def _skip(self, cnt):
2238 """RAR Seek, skipping through rar files to get to correct position
2241 while cnt > 0:
2242 # next vol needed?
2243 if self._cur_avail == 0:
2244 if not self._open_next():
2245 break
2247 # fd is in read pos, do the read
2248 if cnt > self._cur_avail:
2249 cnt -= self._cur_avail
2250 self._remain -= self._cur_avail
2251 self._cur_avail = 0
2252 else:
2253 self._fd.seek(cnt, 1)
2254 self._cur_avail -= cnt
2255 self._remain -= cnt
2256 cnt = 0
2258 def _read(self, cnt):
2259 """Read from potentially multi-volume archive."""
2261 buf = []
2262 while cnt > 0:
2263 # next vol needed?
2264 if self._cur_avail == 0:
2265 if not self._open_next():
2266 break
2268 # fd is in read pos, do the read
2269 if cnt > self._cur_avail:
2270 data = self._fd.read(self._cur_avail)
2271 else:
2272 data = self._fd.read(cnt)
2273 if not data:
2274 break
2276 # got some data
2277 cnt -= len(data)
2278 self._cur_avail -= len(data)
2279 buf.append(data)
2281 if len(buf) == 1:
2282 return buf[0]
2283 return EMPTY.join(buf)
2285 def _open_next(self):
2286 """Proceed to next volume."""
2288 # is the file split over archives?
2289 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2290 return False
2292 if self._fd:
2293 self._fd.close()
2294 self._fd = None
2296 # open next part
2297 self._volfile = self._parser._next_volname(self._volfile)
2298 fd = open(self._volfile, "rb", 0)
2299 self._fd = fd
2300 sig = fd.read(len(self._parser._expect_sig))
2301 if sig != self._parser._expect_sig:
2302 raise BadRarFile("Invalid signature")
2304 # loop until first file header
2305 while 1:
2306 cur = self._parser._parse_header(fd)
2307 if not cur:
2308 raise BadRarFile("Unexpected EOF")
2309 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2310 if cur.add_size:
2311 fd.seek(cur.add_size, 1)
2312 continue
2313 if cur.orig_filename != self._inf.orig_filename:
2314 raise BadRarFile("Did not found file entry")
2315 self._cur = cur
2316 self._cur_avail = cur.add_size
2317 return True
2319 def readinto(self, buf):
2320 """Zero-copy read directly into buffer."""
2321 got = 0
2322 vbuf = memoryview(buf)
2323 while got < len(buf):
2324 # next vol needed?
2325 if self._cur_avail == 0:
2326 if not self._open_next():
2327 break
2329 # length for next read
2330 cnt = len(buf) - got
2331 if cnt > self._cur_avail:
2332 cnt = self._cur_avail
2334 # read into temp view
2335 res = self._fd.readinto(vbuf[got : got + cnt])
2336 if not res:
2337 break
2338 self._md_context.update(vbuf[got : got + res])
2339 self._cur_avail -= res
2340 self._remain -= res
2341 got += res
2342 return got
2345 class HeaderDecrypt(object):
2346 """File-like object that decrypts from another file"""
2347 def __init__(self, f, key, iv):
2348 self.f = f
2349 self.ciph = AES_CBC_Decrypt(key, iv)
2350 self.buf = EMPTY
2352 def tell(self):
2353 """Current file pos - works only on block boundaries."""
2354 return self.f.tell()
2356 def read(self, cnt=None):
2357 """Read and decrypt."""
2358 if cnt > 8 * 1024:
2359 raise BadRarFile('Bad count to header decrypt - wrong password?')
2361 # consume old data
2362 if cnt <= len(self.buf):
2363 res = self.buf[:cnt]
2364 self.buf = self.buf[cnt:]
2365 return res
2366 res = self.buf
2367 self.buf = EMPTY
2368 cnt -= len(res)
2370 # decrypt new data
2371 blklen = 16
2372 while cnt > 0:
2373 enc = self.f.read(blklen)
2374 if len(enc) < blklen:
2375 break
2376 dec = self.ciph.decrypt(enc)
2377 if cnt >= len(dec):
2378 res += dec
2379 cnt -= len(dec)
2380 else:
2381 res += dec[:cnt]
2382 self.buf = dec[cnt:]
2383 cnt = 0
2385 return res
2388 # handle (filename|filelike) object
2389 class XFile(object):
2390 """Input may be filename or file object.
2392 __slots__ = ('_fd', '_need_close')
2394 def __init__(self, xfile, bufsize=1024):
2395 if is_filelike(xfile):
2396 self._need_close = False
2397 self._fd = xfile
2398 self._fd.seek(0)
2399 else:
2400 self._need_close = True
2401 self._fd = open(xfile, 'rb', bufsize)
2403 def read(self, n=None):
2404 """Read from file."""
2405 return self._fd.read(n)
2407 def tell(self):
2408 """Return file pos."""
2409 return self._fd.tell()
2411 def seek(self, ofs, whence=0):
2412 """Move file pos."""
2413 return self._fd.seek(ofs, whence)
2415 def readinto(self, dst):
2416 """Read into buffer."""
2417 return self._fd.readinto(dst)
2419 def close(self):
2420 """Close file object."""
2421 if self._need_close:
2422 self._fd.close()
2424 def __enter__(self):
2425 return self
2427 def __exit__(self, typ, val, tb):
2428 self.close()
2431 class NoHashContext(object):
2432 """No-op hash function."""
2433 def __init__(self, data=None):
2434 """Initialize"""
2435 def update(self, data):
2436 """Update data"""
2437 def digest(self):
2438 """Final hash"""
2439 def hexdigest(self):
2440 """Hexadecimal digest."""
2443 class CRC32Context(object):
2444 """Hash context that uses CRC32."""
2445 __slots__ = ['_crc']
2447 def __init__(self, data=None):
2448 self._crc = 0
2449 if data:
2450 self.update(data)
2452 def update(self, data):
2453 """Process data."""
2454 self._crc = rar_crc32(data, self._crc)
2456 def digest(self):
2457 """Final hash."""
2458 return self._crc
2460 def hexdigest(self):
2461 """Hexadecimal digest."""
2462 return '%08x' % self.digest()
2465 class Blake2SP(object):
2466 """Blake2sp hash context.
2468 __slots__ = ['_thread', '_buf', '_cur', '_digest']
2469 digest_size = 32
2470 block_size = 64
2471 parallelism = 8
2473 def __init__(self, data=None):
2474 self._buf = b''
2475 self._cur = 0
2476 self._digest = None
2477 self._thread = []
2479 for i in range(self.parallelism):
2480 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2481 self._thread.append(ctx)
2483 if data:
2484 self.update(data)
2486 def _blake2s(self, ofs, depth, is_last):
2487 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2488 depth=2, inner_size=32, fanout=self.parallelism)
2490 def _add_block(self, blk):
2491 self._thread[self._cur].update(blk)
2492 self._cur = (self._cur + 1) % self.parallelism
2494 def update(self, data):
2495 """Hash data.
2497 view = memoryview(data)
2498 bs = self.block_size
2499 if self._buf:
2500 need = bs - len(self._buf)
2501 if len(view) < need:
2502 self._buf += view.tobytes()
2503 return
2504 self._add_block(self._buf + view[:need].tobytes())
2505 view = view[need:]
2506 while len(view) >= bs:
2507 self._add_block(view[:bs])
2508 view = view[bs:]
2509 self._buf = view.tobytes()
2511 def digest(self):
2512 """Return final digest value.
2514 if self._digest is None:
2515 if self._buf:
2516 self._add_block(self._buf)
2517 self._buf = EMPTY
2518 ctx = self._blake2s(0, 1, True)
2519 for t in self._thread:
2520 ctx.update(t.digest())
2521 self._digest = ctx.digest()
2522 return self._digest
2524 def hexdigest(self):
2525 """Hexadecimal digest."""
2526 return tohex(self.digest())
2529 class Rar3Sha1(object):
2530 """Bug-compat for SHA1
2532 digest_size = 20
2533 block_size = 64
2535 _BLK = struct.Struct(b'>16L')
2536 _BLKx = struct.Struct(b'<16L')
2538 __slots__ = ('_nbytes', '_md', '_rarbug', '_workspace')
2540 def __init__(self, data=b'', rarbug=False):
2541 self._md = sha1()
2542 self._nbytes = 0
2543 self._rarbug = rarbug
2544 self._workspace = [0] * 16
2545 self.update(data)
2547 def update(self, data):
2548 """Process more data."""
2549 self._md.update(data)
2550 bufpos = self._nbytes & 63
2551 self._nbytes += len(data)
2553 if self._rarbug and len(data) > 64:
2554 dpos = self.block_size - bufpos
2555 while dpos + self.block_size <= len(data):
2556 self._corrupt(data, dpos)
2557 dpos += self.block_size
2559 def digest(self):
2560 """Return final state."""
2561 return self._md.digest()
2563 def hexdigest(self):
2564 """Return final state as hex string."""
2565 return self._md.hexdigest()
2567 def _corrupt(self, data, dpos):
2568 """Corruption from SHA1 core."""
2569 ws = self._workspace
2570 ws[:] = self._BLK.unpack_from(data, dpos)
2571 for t in range(16, 80):
2572 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2573 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2574 self._BLKx.pack_into(data, dpos, *ws)
2578 ## Utility functions
2581 S_LONG = Struct('<L')
2582 S_SHORT = Struct('<H')
2583 S_BYTE = Struct('<B')
2585 S_BLK_HDR = Struct('<HBHH')
2586 S_FILE_HDR = Struct('<LLBLLBBHL')
2587 S_COMMENT_HDR = Struct('<HBBH')
2589 def load_vint(buf, pos):
2590 """Load variable-size int."""
2591 limit = min(pos + 11, len(buf))
2592 res = ofs = 0
2593 while pos < limit:
2594 b = _byte_code(buf[pos])
2595 res += ((b & 0x7F) << ofs)
2596 pos += 1
2597 ofs += 7
2598 if b < 0x80:
2599 return res, pos
2600 raise BadRarFile('cannot load vint')
2602 def load_byte(buf, pos):
2603 """Load single byte"""
2604 end = pos + 1
2605 if end > len(buf):
2606 raise BadRarFile('cannot load byte')
2607 return S_BYTE.unpack_from(buf, pos)[0], end
2609 def load_le32(buf, pos):
2610 """Load little-endian 32-bit integer"""
2611 end = pos + 4
2612 if end > len(buf):
2613 raise BadRarFile('cannot load le32')
2614 return S_LONG.unpack_from(buf, pos)[0], pos + 4
2616 def load_bytes(buf, num, pos):
2617 """Load sequence of bytes"""
2618 end = pos + num
2619 if end > len(buf):
2620 raise BadRarFile('cannot load bytes')
2621 return buf[pos : end], end
2623 def load_vstr(buf, pos):
2624 """Load bytes prefixed by vint length"""
2625 slen, pos = load_vint(buf, pos)
2626 return load_bytes(buf, slen, pos)
2628 def load_dostime(buf, pos):
2629 """Load LE32 dos timestamp"""
2630 stamp, pos = load_le32(buf, pos)
2631 tup = parse_dos_time(stamp)
2632 return to_datetime(tup), pos
2634 def load_unixtime(buf, pos):
2635 """Load LE32 unix timestamp"""
2636 secs, pos = load_le32(buf, pos)
2637 dt = datetime.fromtimestamp(secs, UTC)
2638 return dt, pos
2640 def load_windowstime(buf, pos):
2641 """Load LE64 windows timestamp"""
2642 # unix epoch (1970) in seconds from windows epoch (1601)
2643 unix_epoch = 11644473600
2644 val1, pos = load_le32(buf, pos)
2645 val2, pos = load_le32(buf, pos)
2646 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2647 dt = datetime.fromtimestamp(secs - unix_epoch, UTC)
2648 dt = dt.replace(microsecond=n1secs // 10)
2649 return dt, pos
2651 # new-style next volume
2652 def _next_newvol(volfile):
2653 i = len(volfile) - 1
2654 while i >= 0:
2655 if volfile[i] >= '0' and volfile[i] <= '9':
2656 return _inc_volname(volfile, i)
2657 i -= 1
2658 raise BadRarName("Cannot construct volume name: " + volfile)
2660 # old-style next volume
2661 def _next_oldvol(volfile):
2662 # rar -> r00
2663 if volfile[-4:].lower() == '.rar':
2664 return volfile[:-2] + '00'
2665 return _inc_volname(volfile, len(volfile) - 1)
2667 # increase digits with carry, otherwise just increment char
2668 def _inc_volname(volfile, i):
2669 fn = list(volfile)
2670 while i >= 0:
2671 if fn[i] != '9':
2672 fn[i] = chr(ord(fn[i]) + 1)
2673 break
2674 fn[i] = '0'
2675 i -= 1
2676 return ''.join(fn)
2678 # rar3 extended time fields
2679 def _parse_ext_time(h, data, pos):
2680 # flags and rest of data can be missing
2681 flags = 0
2682 if pos + 2 <= len(data):
2683 flags = S_SHORT.unpack_from(data, pos)[0]
2684 pos += 2
2686 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2687 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2688 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2689 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2690 if mtime:
2691 h.mtime = mtime
2692 h.date_time = mtime.timetuple()[:6]
2693 return pos
2695 # rar3 one extended time field
2696 def _parse_xtime(flag, data, pos, basetime=None):
2697 res = None
2698 if flag & 8:
2699 if not basetime:
2700 basetime, pos = load_dostime(data, pos)
2702 # load second fractions
2703 rem = 0
2704 cnt = flag & 3
2705 for _ in range(cnt):
2706 b, pos = load_byte(data, pos)
2707 rem = (b << 16) | (rem >> 8)
2709 # convert 100ns units to microseconds
2710 usec = rem // 10
2711 if usec > 1000000:
2712 usec = 999999
2714 # dostime has room for 30 seconds only, correct if needed
2715 if flag & 4 and basetime.second < 59:
2716 res = basetime.replace(microsecond=usec, second=basetime.second + 1)
2717 else:
2718 res = basetime.replace(microsecond=usec)
2719 return res, pos
2721 def is_filelike(obj):
2722 """Filename or file object?
2724 if isinstance(obj, (bytes, unicode)):
2725 return False
2726 res = True
2727 for a in ('read', 'tell', 'seek'):
2728 res = res and hasattr(obj, a)
2729 if not res:
2730 raise ValueError("Invalid object passed as file")
2731 return True
2733 def rar3_s2k(psw, salt):
2734 """String-to-key hash for RAR3.
2736 if not isinstance(psw, unicode):
2737 psw = psw.decode('utf8')
2738 seed = bytearray(psw.encode('utf-16le') + salt)
2739 h = Rar3Sha1(rarbug=True)
2740 iv = EMPTY
2741 for i in range(16):
2742 for j in range(0x4000):
2743 cnt = S_LONG.pack(i * 0x4000 + j)
2744 h.update(seed)
2745 h.update(cnt[:3])
2746 if j == 0:
2747 iv += h.digest()[19:20]
2748 key_be = h.digest()[:16]
2749 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2750 return key_le, iv
2752 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
2753 """Decompress blob of compressed data.
2755 Used for data with non-standard header - eg. comments.
2757 # already uncompressed?
2758 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
2759 return data
2761 # take only necessary flags
2762 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
2763 flags |= RAR_LONG_BLOCK
2765 # file header
2766 fname = b'data'
2767 date = 0
2768 mode = 0x20
2769 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
2770 date, vers, meth, len(fname), mode)
2771 fhdr += fname
2772 if flags & RAR_FILE_SALT:
2773 if not salt:
2774 return EMPTY
2775 fhdr += salt
2777 # full header
2778 hlen = S_BLK_HDR.size + len(fhdr)
2779 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
2780 hcrc = rar_crc32(hdr[2:]) & 0xFFFF
2781 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
2783 # archive main header
2784 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4)
2786 # decompress via temp rar
2787 tmpfd, tmpname = mkstemp(suffix='.rar')
2788 tmpf = os.fdopen(tmpfd, "wb")
2789 try:
2790 tmpf.write(RAR_ID + mh + hdr + data)
2791 tmpf.close()
2793 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
2794 add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD))
2795 cmd.append(tmpname)
2797 p = custom_popen(cmd)
2798 return p.communicate()[0]
2799 finally:
2800 tmpf.close()
2801 os.unlink(tmpname)
2803 def to_datetime(t):
2804 """Convert 6-part time tuple into datetime object.
2806 if t is None:
2807 return None
2809 # extract values
2810 year, mon, day, h, m, s = t
2812 # assume the values are valid
2813 try:
2814 return datetime(year, mon, day, h, m, s)
2815 except ValueError:
2816 pass
2818 # sanitize invalid values
2819 mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
2820 if mon < 1:
2821 mon = 1
2822 if mon > 12:
2823 mon = 12
2824 if day < 1:
2825 day = 1
2826 if day > mday[mon]:
2827 day = mday[mon]
2828 if h > 23:
2829 h = 23
2830 if m > 59:
2831 m = 59
2832 if s > 59:
2833 s = 59
2834 if mon == 2 and day == 29:
2835 try:
2836 return datetime(year, mon, day, h, m, s)
2837 except ValueError:
2838 day = 28
2839 return datetime(year, mon, day, h, m, s)
2841 def parse_dos_time(stamp):
2842 """Parse standard 32-bit DOS timestamp.
2844 sec, stamp = stamp & 0x1F, stamp >> 5
2845 mn, stamp = stamp & 0x3F, stamp >> 6
2846 hr, stamp = stamp & 0x1F, stamp >> 5
2847 day, stamp = stamp & 0x1F, stamp >> 5
2848 mon, stamp = stamp & 0x0F, stamp >> 4
2849 yr = (stamp & 0x7F) + 1980
2850 return (yr, mon, day, hr, mn, sec * 2)
2852 def custom_popen(cmd):
2853 """Disconnect cmd from parent fds, read only from stdout.
2855 # needed for py2exe
2856 creationflags = 0
2857 if sys.platform == 'win32':
2858 creationflags = 0x08000000 # CREATE_NO_WINDOW
2860 # run command
2861 try:
2862 p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT,
2863 creationflags=creationflags)
2864 except OSError as ex:
2865 if ex.errno == errno.ENOENT:
2866 raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL)
2867 raise
2868 return p
2870 def custom_check(cmd, ignore_retcode=False):
2871 """Run command, collect output, raise error if needed.
2873 p = custom_popen(cmd)
2874 out, _ = p.communicate()
2875 if p.returncode and not ignore_retcode:
2876 raise RarExecError("Check-run failed")
2877 return out
2879 def add_password_arg(cmd, psw, ___required=False):
2880 """Append password switch to commandline.
2882 if UNRAR_TOOL == ALT_TOOL:
2883 return
2884 if psw is not None:
2885 cmd.append('-p' + psw)
2886 else:
2887 cmd.append('-p-')
2889 def check_returncode(p, out):
2890 """Raise exception according to unrar exit code.
2892 code = p.returncode
2893 if code == 0:
2894 return
2896 # map return code to exception class, codes from rar.txt
2897 errmap = [None,
2898 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
2899 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
2900 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
2901 if UNRAR_TOOL == ALT_TOOL:
2902 errmap = [None]
2903 if code > 0 and code < len(errmap):
2904 exc = errmap[code]
2905 elif code == 255:
2906 exc = RarUserBreak
2907 elif code < 0:
2908 exc = RarSignalExit
2909 else:
2910 exc = RarUnknownError
2912 # format message
2913 if out:
2914 msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out)
2915 else:
2916 msg = "%s [%d]" % (exc.__doc__, p.returncode)
2918 raise exc(msg)
2920 def hmac_sha256(key, data):
2921 """HMAC-SHA256"""
2922 return HMAC(key, data, sha256).digest()
2924 def membuf_tempfile(memfile):
2925 """Write in-memory file object to real file."""
2926 memfile.seek(0, 0)
2928 tmpfd, tmpname = mkstemp(suffix='.rar')
2929 tmpf = os.fdopen(tmpfd, "wb")
2931 try:
2932 while True:
2933 buf = memfile.read(BSIZE)
2934 if not buf:
2935 break
2936 tmpf.write(buf)
2937 tmpf.close()
2938 except:
2939 tmpf.close()
2940 os.unlink(tmpname)
2941 raise
2942 return tmpname
2944 class XTempFile(object):
2945 """Real file for archive.
2947 __slots__ = ('_tmpfile', '_filename')
2949 def __init__(self, rarfile):
2950 if is_filelike(rarfile):
2951 self._tmpfile = membuf_tempfile(rarfile)
2952 self._filename = self._tmpfile
2953 else:
2954 self._tmpfile = None
2955 self._filename = rarfile
2957 def __enter__(self):
2958 return self._filename
2960 def __exit__(self, exc_type, exc_value, tb):
2961 if self._tmpfile:
2962 try:
2963 os.unlink(self._tmpfile)
2964 except OSError:
2965 pass
2966 self._tmpfile = None
2969 # Check if unrar works
2972 ORIG_UNRAR_TOOL = UNRAR_TOOL
2973 ORIG_OPEN_ARGS = OPEN_ARGS
2974 ORIG_EXTRACT_ARGS = EXTRACT_ARGS
2975 ORIG_TEST_ARGS = TEST_ARGS
2977 def _check_unrar_tool():
2978 global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS
2979 try:
2980 # does UNRAR_TOOL work?
2981 custom_check([ORIG_UNRAR_TOOL], True)
2983 UNRAR_TOOL = ORIG_UNRAR_TOOL
2984 OPEN_ARGS = ORIG_OPEN_ARGS
2985 EXTRACT_ARGS = ORIG_EXTRACT_ARGS
2986 TEST_ARGS = ORIG_TEST_ARGS
2987 except RarCannotExec:
2988 try:
2989 # does ALT_TOOL work?
2990 custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True)
2991 # replace config
2992 UNRAR_TOOL = ALT_TOOL
2993 OPEN_ARGS = ALT_OPEN_ARGS
2994 EXTRACT_ARGS = ALT_EXTRACT_ARGS
2995 TEST_ARGS = ALT_TEST_ARGS
2996 except RarCannotExec:
2997 # no usable tool, only uncompressed archives work
2998 return False
2999 return True
3001 _check_unrar_tool()