sdist: include etc/*.txt
[rarfile.git] / rarfile.py
blobd46047ed06d6b5dcabf739b601c6a0b102df6f8f
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import hashlib
54 import io
55 import os
56 import re
57 import shutil
58 import struct
59 import sys
60 import warnings
61 from binascii import crc32, hexlify
62 from datetime import datetime, timezone
63 from hashlib import blake2s, pbkdf2_hmac, sha1
64 from pathlib import Path
65 from struct import Struct, pack, unpack
66 from subprocess import DEVNULL, PIPE, STDOUT, Popen
67 from tempfile import mkstemp
69 AES = None
71 # only needed for encrypted headers
72 try:
73 try:
74 from cryptography.hazmat.backends import default_backend
75 from cryptography.hazmat.primitives.ciphers import (
76 Cipher, algorithms, modes,
78 _have_crypto = 1
79 except ImportError:
80 from Crypto.Cipher import AES
81 _have_crypto = 2
82 except ImportError:
83 _have_crypto = 0
86 class AES_CBC_Decrypt:
87 """Decrypt API"""
88 def __init__(self, key, iv):
89 if _have_crypto == 2:
90 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
91 else:
92 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
93 self.decrypt = ciph.decryptor().update
96 __version__ = "4.1a1"
98 # export only interesting items
99 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
102 ## Module configuration. Can be tuned after importing.
105 #: executable for unrar tool
106 UNRAR_TOOL = "unrar"
108 #: executable for unar tool
109 UNAR_TOOL = "unar"
111 #: executable for bsdtar tool
112 BSDTAR_TOOL = "bsdtar"
114 #: executable for p7zip/7z tool
115 SEVENZIP_TOOL = "7z"
117 #: executable for alternative 7z tool
118 SEVENZIP2_TOOL = "7zz"
120 #: default fallback charset
121 DEFAULT_CHARSET = "windows-1252"
123 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
124 TRY_ENCODINGS = ("utf8", "utf-16le")
126 #: whether to speed up decompression by using tmp archive
127 USE_EXTRACT_HACK = 1
129 #: limit the filesize for tmp archive usage
130 HACK_SIZE_LIMIT = 20 * 1024 * 1024
132 #: set specific directory for mkstemp() used by hack dir usage
133 HACK_TMP_DIR = None
135 #: Separator for path name components. Always "/".
136 PATH_SEP = "/"
139 ## rar constants
142 # block types
143 RAR_BLOCK_MARK = 0x72 # r
144 RAR_BLOCK_MAIN = 0x73 # s
145 RAR_BLOCK_FILE = 0x74 # t
146 RAR_BLOCK_OLD_COMMENT = 0x75 # u
147 RAR_BLOCK_OLD_EXTRA = 0x76 # v
148 RAR_BLOCK_OLD_SUB = 0x77 # w
149 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
150 RAR_BLOCK_OLD_AUTH = 0x79 # y
151 RAR_BLOCK_SUB = 0x7a # z
152 RAR_BLOCK_ENDARC = 0x7b # {
154 # flags for RAR_BLOCK_MAIN
155 RAR_MAIN_VOLUME = 0x0001
156 RAR_MAIN_COMMENT = 0x0002
157 RAR_MAIN_LOCK = 0x0004
158 RAR_MAIN_SOLID = 0x0008
159 RAR_MAIN_NEWNUMBERING = 0x0010
160 RAR_MAIN_AUTH = 0x0020
161 RAR_MAIN_RECOVERY = 0x0040
162 RAR_MAIN_PASSWORD = 0x0080
163 RAR_MAIN_FIRSTVOLUME = 0x0100
164 RAR_MAIN_ENCRYPTVER = 0x0200
166 # flags for RAR_BLOCK_FILE
167 RAR_FILE_SPLIT_BEFORE = 0x0001
168 RAR_FILE_SPLIT_AFTER = 0x0002
169 RAR_FILE_PASSWORD = 0x0004
170 RAR_FILE_COMMENT = 0x0008
171 RAR_FILE_SOLID = 0x0010
172 RAR_FILE_DICTMASK = 0x00e0
173 RAR_FILE_DICT64 = 0x0000
174 RAR_FILE_DICT128 = 0x0020
175 RAR_FILE_DICT256 = 0x0040
176 RAR_FILE_DICT512 = 0x0060
177 RAR_FILE_DICT1024 = 0x0080
178 RAR_FILE_DICT2048 = 0x00a0
179 RAR_FILE_DICT4096 = 0x00c0
180 RAR_FILE_DIRECTORY = 0x00e0
181 RAR_FILE_LARGE = 0x0100
182 RAR_FILE_UNICODE = 0x0200
183 RAR_FILE_SALT = 0x0400
184 RAR_FILE_VERSION = 0x0800
185 RAR_FILE_EXTTIME = 0x1000
186 RAR_FILE_EXTFLAGS = 0x2000
188 # flags for RAR_BLOCK_ENDARC
189 RAR_ENDARC_NEXT_VOLUME = 0x0001
190 RAR_ENDARC_DATACRC = 0x0002
191 RAR_ENDARC_REVSPACE = 0x0004
192 RAR_ENDARC_VOLNR = 0x0008
194 # flags common to all blocks
195 RAR_SKIP_IF_UNKNOWN = 0x4000
196 RAR_LONG_BLOCK = 0x8000
198 # Host OS types
199 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
200 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
201 RAR_OS_WIN32 = 2 #: Windows
202 RAR_OS_UNIX = 3 #: UNIX
203 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
204 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
206 # Compression methods - "0".."5"
207 RAR_M0 = 0x30 #: No compression.
208 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
209 RAR_M2 = 0x32 #: Compression level `-m2`.
210 RAR_M3 = 0x33 #: Compression level `-m3`.
211 RAR_M4 = 0x34 #: Compression level `-m4`.
212 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
215 # RAR5 constants
218 RAR5_BLOCK_MAIN = 1
219 RAR5_BLOCK_FILE = 2
220 RAR5_BLOCK_SERVICE = 3
221 RAR5_BLOCK_ENCRYPTION = 4
222 RAR5_BLOCK_ENDARC = 5
224 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
225 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
226 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
227 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
228 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
229 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
230 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
232 RAR5_MAIN_FLAG_ISVOL = 0x01
233 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
234 RAR5_MAIN_FLAG_SOLID = 0x04
235 RAR5_MAIN_FLAG_RECOVERY = 0x08
236 RAR5_MAIN_FLAG_LOCKED = 0x10
238 RAR5_FILE_FLAG_ISDIR = 0x01
239 RAR5_FILE_FLAG_HAS_MTIME = 0x02
240 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
241 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
243 RAR5_COMPR_SOLID = 0x40
245 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
247 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
249 RAR5_XFILE_ENCRYPTION = 1
250 RAR5_XFILE_HASH = 2
251 RAR5_XFILE_TIME = 3
252 RAR5_XFILE_VERSION = 4
253 RAR5_XFILE_REDIR = 5
254 RAR5_XFILE_OWNER = 6
255 RAR5_XFILE_SERVICE = 7
257 RAR5_XTIME_UNIXTIME = 0x01
258 RAR5_XTIME_HAS_MTIME = 0x02
259 RAR5_XTIME_HAS_CTIME = 0x04
260 RAR5_XTIME_HAS_ATIME = 0x08
261 RAR5_XTIME_UNIXTIME_NS = 0x10
263 RAR5_XENC_CIPHER_AES256 = 0
265 RAR5_XENC_CHECKVAL = 0x01
266 RAR5_XENC_TWEAKED = 0x02
268 RAR5_XHASH_BLAKE2SP = 0
270 RAR5_XREDIR_UNIX_SYMLINK = 1
271 RAR5_XREDIR_WINDOWS_SYMLINK = 2
272 RAR5_XREDIR_WINDOWS_JUNCTION = 3
273 RAR5_XREDIR_HARD_LINK = 4
274 RAR5_XREDIR_FILE_COPY = 5
276 RAR5_XREDIR_ISDIR = 0x01
278 RAR5_XOWNER_UNAME = 0x01
279 RAR5_XOWNER_GNAME = 0x02
280 RAR5_XOWNER_UID = 0x04
281 RAR5_XOWNER_GID = 0x08
283 RAR5_OS_WINDOWS = 0
284 RAR5_OS_UNIX = 1
286 DOS_MODE_ARCHIVE = 0x20
287 DOS_MODE_DIR = 0x10
288 DOS_MODE_SYSTEM = 0x04
289 DOS_MODE_HIDDEN = 0x02
290 DOS_MODE_READONLY = 0x01
292 RAR5_PW_CHECK_SIZE = 8
295 ## internal constants
298 RAR_ID = b"Rar!\x1a\x07\x00"
299 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
301 WIN32 = sys.platform == "win32"
302 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
304 SFX_MAX_SIZE = 2 * 1024 * 1024
305 RAR_V3 = 3
306 RAR_V5 = 5
308 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
309 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
310 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
313 def _find_sfx_header(xfile):
314 sig = RAR_ID[:-1]
315 buf = io.BytesIO()
316 steps = (64, SFX_MAX_SIZE)
318 with XFile(xfile) as fd:
319 for step in steps:
320 data = fd.read(step)
321 if not data:
322 break
323 buf.write(data)
324 curdata = buf.getvalue()
325 findpos = 0
326 while True:
327 pos = curdata.find(sig, findpos)
328 if pos < 0:
329 break
330 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
331 return RAR_V3, pos
332 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
333 return RAR_V5, pos
334 findpos = pos + len(sig)
335 return 0, 0
339 ## Public interface
343 def get_rar_version(xfile):
344 """Check quickly whether file is rar archive.
346 with XFile(xfile) as fd:
347 buf = fd.read(len(RAR5_ID))
348 if buf.startswith(RAR_ID):
349 return RAR_V3
350 elif buf.startswith(RAR5_ID):
351 return RAR_V5
352 return 0
355 def is_rarfile(xfile):
356 """Check quickly whether file is rar archive.
358 try:
359 return get_rar_version(xfile) > 0
360 except OSError:
361 # File not found or not accessible, ignore
362 return False
365 def is_rarfile_sfx(xfile):
366 """Check whether file is rar archive with support for SFX.
368 It will read 2M from file.
370 return _find_sfx_header(xfile)[0] > 0
373 class Error(Exception):
374 """Base class for rarfile errors."""
377 class BadRarFile(Error):
378 """Incorrect data in archive."""
381 class NotRarFile(Error):
382 """The file is not RAR archive."""
385 class BadRarName(Error):
386 """Cannot guess multipart name components."""
389 class NoRarEntry(Error):
390 """File not found in RAR"""
393 class PasswordRequired(Error):
394 """File requires password"""
397 class NeedFirstVolume(Error):
398 """Need to start from first volume.
400 Attributes:
402 current_volume
403 Volume number of current file or None if not known
405 def __init__(self, msg, volume):
406 super().__init__(msg)
407 self.current_volume = volume
410 class NoCrypto(Error):
411 """Cannot parse encrypted headers - no crypto available."""
414 class RarExecError(Error):
415 """Problem reported by unrar/rar."""
418 class RarWarning(RarExecError):
419 """Non-fatal error"""
422 class RarFatalError(RarExecError):
423 """Fatal error"""
426 class RarCRCError(RarExecError):
427 """CRC error during unpacking"""
430 class RarLockedArchiveError(RarExecError):
431 """Must not modify locked archive"""
434 class RarWriteError(RarExecError):
435 """Write error"""
438 class RarOpenError(RarExecError):
439 """Open error"""
442 class RarUserError(RarExecError):
443 """User error"""
446 class RarMemoryError(RarExecError):
447 """Memory error"""
450 class RarCreateError(RarExecError):
451 """Create error"""
454 class RarNoFilesError(RarExecError):
455 """No files that match pattern were found"""
458 class RarUserBreak(RarExecError):
459 """User stop"""
462 class RarWrongPassword(RarExecError):
463 """Incorrect password"""
466 class RarUnknownError(RarExecError):
467 """Unknown exit code"""
470 class RarSignalExit(RarExecError):
471 """Unrar exited with signal"""
474 class RarCannotExec(RarExecError):
475 """Executable not found."""
478 class UnsupportedWarning(UserWarning):
479 """Archive uses feature that are unsupported by rarfile.
481 .. versionadded:: 4.0
485 class RarInfo:
486 r"""An entry in rar archive.
488 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
489 with UTC timezone in RAR5 archives.
491 Attributes:
493 filename
494 File name with relative path.
495 Path separator is "/". Always unicode string.
497 date_time
498 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
499 RAR5 allows archives where it is missing, it's None then.
501 comment
502 Optional file comment field. Unicode string. (RAR3-only)
504 file_size
505 Uncompressed size.
507 compress_size
508 Compressed size.
510 compress_type
511 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
513 extract_version
514 Minimal Rar version needed for decompressing. As (major*10 + minor),
515 so 2.9 is 29.
517 RAR3: 10, 20, 29
519 RAR5 does not have such field in archive, it's simply set to 50.
521 host_os
522 Host OS type, one of RAR_OS_* constants.
524 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
525 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
527 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
529 mode
530 File attributes. May be either dos-style or unix-style, depending on host_os.
532 mtime
533 File modification time. Same value as :attr:`date_time`
534 but as :class:`~datetime.datetime` object with extended precision.
536 ctime
537 Optional time field: creation time. As :class:`~datetime.datetime` object.
539 atime
540 Optional time field: last access time. As :class:`~datetime.datetime` object.
542 arctime
543 Optional time field: archival time. As :class:`~datetime.datetime` object.
544 (RAR3-only)
547 CRC-32 of uncompressed file, unsigned int.
549 RAR5: may be None.
551 blake2sp_hash
552 Blake2SP hash over decompressed data. (RAR5-only)
554 volume
555 Volume nr, starting from 0.
557 volume_file
558 Volume file name, where file starts.
560 file_redir
561 If not None, file is link of some sort. Contains tuple of (type, flags, target).
562 (RAR5-only)
564 Type is one of constants:
566 :data:`RAR5_XREDIR_UNIX_SYMLINK`
567 Unix symlink.
568 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
569 Windows symlink.
570 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
571 Windows junction.
572 :data:`RAR5_XREDIR_HARD_LINK`
573 Hard link to target.
574 :data:`RAR5_XREDIR_FILE_COPY`
575 Current file is copy of another archive entry.
577 Flags may contain bits:
579 :data:`RAR5_XREDIR_ISDIR`
580 Symlink points to directory.
583 # zipfile-compatible fields
584 filename = None
585 file_size = None
586 compress_size = None
587 date_time = None
588 CRC = None
589 volume = None
590 orig_filename = None
592 # optional extended time fields, datetime() objects.
593 mtime = None
594 ctime = None
595 atime = None
597 extract_version = None
598 mode = None
599 host_os = None
600 compress_type = None
602 # rar3-only fields
603 comment = None
604 arctime = None
606 # rar5-only fields
607 blake2sp_hash = None
608 file_redir = None
610 # internal fields
611 flags = 0
612 type = None
614 # zipfile compat
615 def is_dir(self):
616 """Returns True if entry is a directory.
618 .. versionadded:: 4.0
620 return False
622 def is_symlink(self):
623 """Returns True if entry is a symlink.
625 .. versionadded:: 4.0
627 return False
629 def is_file(self):
630 """Returns True if entry is a normal file.
632 .. versionadded:: 4.0
634 return False
636 def needs_password(self):
637 """Returns True if data is stored password-protected.
639 if self.type == RAR_BLOCK_FILE:
640 return (self.flags & RAR_FILE_PASSWORD) > 0
641 return False
643 def isdir(self):
644 """Returns True if entry is a directory.
646 .. deprecated:: 4.0
648 return self.is_dir()
651 class RarFile:
652 """Parse RAR structure, provide access to files in archive.
655 #: File name, if available. Unicode string or None.
656 filename = None
658 #: Archive comment. Unicode string or None.
659 comment = None
661 def __init__(self, file, mode="r", charset=None, info_callback=None,
662 crc_check=True, errors="stop", part_only=False):
663 """Open and parse a RAR archive.
665 Parameters:
667 file
668 archive file name or file-like object.
669 mode
670 only "r" is supported.
671 charset
672 fallback charset to use, if filenames are not already Unicode-enabled.
673 info_callback
674 debug callback, gets to see all archive entries.
675 crc_check
676 set to False to disable CRC checks
677 errors
678 Either "stop" to quietly stop parsing on errors,
679 or "strict" to raise errors. Default is "stop".
680 part_only
681 If True, read only single file and allow it to be middle-part
682 of multi-volume archive.
684 if is_filelike(file):
685 self.filename = getattr(file, "name", None)
686 else:
687 if isinstance(file, Path):
688 file = str(file)
689 self.filename = file
690 self._rarfile = file
692 self._charset = charset or DEFAULT_CHARSET
693 self._info_callback = info_callback
694 self._crc_check = crc_check
695 self._part_only = part_only
696 self._password = None
697 self._file_parser = None
699 if errors == "stop":
700 self._strict = False
701 elif errors == "strict":
702 self._strict = True
703 else:
704 raise ValueError("Invalid value for errors= parameter.")
706 if mode != "r":
707 raise NotImplementedError("RarFile supports only mode=r")
709 self._parse()
711 def __enter__(self):
712 """Open context."""
713 return self
715 def __exit__(self, typ, value, traceback):
716 """Exit context."""
717 self.close()
719 def __iter__(self):
720 """Iterate over members."""
721 return iter(self.infolist())
723 def setpassword(self, pwd):
724 """Sets the password to use when extracting.
726 self._password = pwd
727 if self._file_parser:
728 if self._file_parser.has_header_encryption():
729 self._file_parser = None
730 if not self._file_parser:
731 self._parse()
732 else:
733 self._file_parser.setpassword(self._password)
735 def needs_password(self):
736 """Returns True if any archive entries require password for extraction.
738 return self._file_parser.needs_password()
740 def namelist(self):
741 """Return list of filenames in archive.
743 return [f.filename for f in self.infolist()]
745 def infolist(self):
746 """Return RarInfo objects for all files/directories in archive.
748 return self._file_parser.infolist()
750 def volumelist(self):
751 """Returns filenames of archive volumes.
753 In case of single-volume archive, the list contains
754 just the name of main archive file.
756 return self._file_parser.volumelist()
758 def getinfo(self, name):
759 """Return RarInfo for file.
761 return self._file_parser.getinfo(name)
763 def getinfo_orig(self, name):
764 """Return RarInfo for file source.
766 RAR5: if name is hard-linked or copied file,
767 returns original entry with original filename.
769 return self._file_parser.getinfo_orig(name)
771 def open(self, name, mode="r", pwd=None):
772 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
774 The object implements :class:`io.RawIOBase` interface, so it can
775 be further wrapped with :class:`io.BufferedReader`
776 and :class:`io.TextIOWrapper`.
778 On older Python where io module is not available, it implements
779 only .read(), .seek(), .tell() and .close() methods.
781 The object is seekable, although the seeking is fast only on
782 uncompressed files, on compressed files the seeking is implemented
783 by reading ahead and/or restarting the decompression.
785 Parameters:
787 name
788 file name or RarInfo instance.
789 mode
790 must be "r"
792 password to use for extracting.
795 if mode != "r":
796 raise NotImplementedError("RarFile.open() supports only mode=r")
798 # entry lookup
799 inf = self.getinfo(name)
800 if inf.is_dir():
801 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
803 # check password
804 if inf.needs_password():
805 pwd = pwd or self._password
806 if pwd is None:
807 raise PasswordRequired("File %s requires password" % inf.filename)
808 else:
809 pwd = None
811 return self._file_parser.open(inf, pwd)
813 def read(self, name, pwd=None):
814 """Return uncompressed data for archive entry.
816 For longer files using :meth:`~RarFile.open` may be better idea.
818 Parameters:
820 name
821 filename or RarInfo instance
823 password to use for extracting.
826 with self.open(name, "r", pwd) as f:
827 return f.read()
829 def close(self):
830 """Release open resources."""
831 pass
833 def printdir(self, file=None):
834 """Print archive file list to stdout or given file.
836 if file is None:
837 file = sys.stdout
838 for f in self.infolist():
839 print(f.filename, file=file)
841 def extract(self, member, path=None, pwd=None):
842 """Extract single file into current directory.
844 Parameters:
846 member
847 filename or :class:`RarInfo` instance
848 path
849 optional destination path
851 optional password to use
853 inf = self.getinfo(member)
854 return self._extract_one(inf, path, pwd, True)
856 def extractall(self, path=None, members=None, pwd=None):
857 """Extract all files into current directory.
859 Parameters:
861 path
862 optional destination path
863 members
864 optional filename or :class:`RarInfo` instance list to extract
866 optional password to use
868 if members is None:
869 members = self.namelist()
871 done = set()
872 dirs = []
873 for m in members:
874 inf = self.getinfo(m)
875 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
876 if inf.is_dir():
877 if dst not in done:
878 dirs.append((dst, inf))
879 done.add(dst)
880 if dirs:
881 dirs.sort(reverse=True)
882 for dst, inf in dirs:
883 self._set_attrs(inf, dst)
885 def testrar(self, pwd=None):
886 """Read all files and test CRC.
888 for member in self.infolist():
889 if member.is_file():
890 with self.open(member, 'r', pwd) as f:
891 empty_read(f, member.file_size, BSIZE)
893 def strerror(self):
894 """Return error string if parsing failed or None if no problems.
896 if not self._file_parser:
897 return "Not a RAR file"
898 return self._file_parser.strerror()
901 ## private methods
904 def _parse(self):
905 """Run parser for file type
907 ver, sfx_ofs = _find_sfx_header(self._rarfile)
908 if ver == RAR_V3:
909 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
910 self._charset, self._strict, self._info_callback,
911 sfx_ofs, self._part_only)
912 self._file_parser = p3 # noqa
913 elif ver == RAR_V5:
914 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
915 self._charset, self._strict, self._info_callback,
916 sfx_ofs, self._part_only)
917 self._file_parser = p5 # noqa
918 else:
919 raise NotRarFile("Not a RAR file")
921 self._file_parser.parse()
922 self.comment = self._file_parser.comment
924 def _extract_one(self, info, path, pwd, set_attrs):
925 fname = sanitize_filename(
926 info.filename, os.path.sep, WIN32
929 if path is None:
930 path = os.getcwd()
931 else:
932 path = os.fspath(path)
933 dstfn = os.path.join(path, fname)
935 dirname = os.path.dirname(dstfn)
936 if dirname and dirname != ".":
937 os.makedirs(dirname, exist_ok=True)
939 if info.is_file():
940 return self._make_file(info, dstfn, pwd, set_attrs)
941 if info.is_dir():
942 return self._make_dir(info, dstfn, pwd, set_attrs)
943 if info.is_symlink():
944 return self._make_symlink(info, dstfn, pwd, set_attrs)
945 return None
947 def _create_helper(self, name, flags, info):
948 return os.open(name, flags)
950 def _make_file(self, info, dstfn, pwd, set_attrs):
951 def helper(name, flags):
952 return self._create_helper(name, flags, info)
953 with self.open(info, "r", pwd) as src:
954 with open(dstfn, "wb", opener=helper) as dst:
955 shutil.copyfileobj(src, dst)
956 if set_attrs:
957 self._set_attrs(info, dstfn)
958 return dstfn
960 def _make_dir(self, info, dstfn, pwd, set_attrs):
961 os.makedirs(dstfn, exist_ok=True)
962 if set_attrs:
963 self._set_attrs(info, dstfn)
964 return dstfn
966 def _make_symlink(self, info, dstfn, pwd, set_attrs):
967 target_is_directory = False
968 if info.host_os == RAR_OS_UNIX:
969 link_name = self.read(info, pwd)
970 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
971 elif info.file_redir:
972 redir_type, redir_flags, link_name = info.file_redir
973 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
974 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
975 return None
976 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
977 else:
978 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
979 return None
981 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
982 return dstfn
984 def _set_attrs(self, info, dstfn):
985 if info.host_os == RAR_OS_UNIX:
986 os.chmod(dstfn, info.mode & 0o777)
987 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
988 # only keep R/O attr, except for dirs on win32
989 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
990 st = os.stat(dstfn)
991 new_mode = st.st_mode & ~0o222
992 os.chmod(dstfn, new_mode)
994 if info.mtime:
995 mtime_ns = to_nsecs(info.mtime)
996 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
997 os.utime(dstfn, ns=(atime_ns, mtime_ns))
1001 # File format parsing
1004 class CommonParser:
1005 """Shared parser parts."""
1006 _main = None
1007 _hdrenc_main = None
1008 _needs_password = False
1009 _fd = None
1010 _expect_sig = None
1011 _parse_error = None
1012 _password = None
1013 comment = None
1015 def __init__(self, rarfile, password, crc_check, charset, strict,
1016 info_cb, sfx_offset, part_only):
1017 self._rarfile = rarfile
1018 self._password = password
1019 self._crc_check = crc_check
1020 self._charset = charset
1021 self._strict = strict
1022 self._info_callback = info_cb
1023 self._info_list = []
1024 self._info_map = {}
1025 self._vol_list = []
1026 self._sfx_offset = sfx_offset
1027 self._part_only = part_only
1029 def has_header_encryption(self):
1030 """Returns True if headers are encrypted
1032 if self._hdrenc_main:
1033 return True
1034 if self._main:
1035 if self._main.flags & RAR_MAIN_PASSWORD:
1036 return True
1037 return False
1039 def setpassword(self, pwd):
1040 """Set cached password."""
1041 self._password = pwd
1043 def volumelist(self):
1044 """Volume files"""
1045 return self._vol_list
1047 def needs_password(self):
1048 """Is password required"""
1049 return self._needs_password
1051 def strerror(self):
1052 """Last error"""
1053 return self._parse_error
1055 def infolist(self):
1056 """List of RarInfo records.
1058 return self._info_list
1060 def getinfo(self, member):
1061 """Return RarInfo for filename
1063 if isinstance(member, RarInfo):
1064 fname = member.filename
1065 elif isinstance(member, Path):
1066 fname = str(member)
1067 else:
1068 fname = member
1070 if fname.endswith("/"):
1071 fname = fname.rstrip("/")
1073 try:
1074 return self._info_map[fname]
1075 except KeyError:
1076 raise NoRarEntry("No such file: %s" % fname) from None
1078 def getinfo_orig(self, member):
1079 inf = self.getinfo(member)
1080 if inf.file_redir:
1081 redir_type, redir_flags, redir_name = inf.file_redir
1082 # cannot leave to unrar as it expects copied file to exist
1083 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1084 inf = self.getinfo(redir_name)
1085 return inf
1087 def parse(self):
1088 """Process file."""
1089 self._fd = None
1090 try:
1091 self._parse_real()
1092 finally:
1093 if self._fd:
1094 self._fd.close()
1095 self._fd = None
1097 def _parse_real(self):
1098 """Actually read file.
1100 fd = XFile(self._rarfile)
1101 self._fd = fd
1102 fd.seek(self._sfx_offset, 0)
1103 sig = fd.read(len(self._expect_sig))
1104 if sig != self._expect_sig:
1105 raise NotRarFile("Not a Rar archive")
1107 volume = 0 # first vol (.rar) is 0
1108 more_vols = False
1109 endarc = False
1110 volfile = self._rarfile
1111 self._vol_list = [self._rarfile]
1112 raise_need_first_vol = False
1113 while True:
1114 if endarc:
1115 h = None # don"t read past ENDARC
1116 else:
1117 h = self._parse_header(fd)
1118 if not h:
1119 if raise_need_first_vol:
1120 # did not find ENDARC with VOLNR
1121 raise NeedFirstVolume("Need to start from first volume", None)
1122 if more_vols and not self._part_only:
1123 volume += 1
1124 fd.close()
1125 try:
1126 volfile = self._next_volname(volfile)
1127 fd = XFile(volfile)
1128 except IOError:
1129 self._set_error("Cannot open next volume: %s", volfile)
1130 break
1131 self._fd = fd
1132 sig = fd.read(len(self._expect_sig))
1133 if sig != self._expect_sig:
1134 self._set_error("Invalid volume sig: %s", volfile)
1135 break
1136 more_vols = False
1137 endarc = False
1138 self._vol_list.append(volfile)
1139 self._main = None
1140 self._hdrenc_main = None
1141 continue
1142 break
1143 h.volume = volume
1144 h.volume_file = volfile
1146 if h.type == RAR_BLOCK_MAIN and not self._main:
1147 self._main = h
1148 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
1149 # RAR 2.x does not set FIRSTVOLUME,
1150 # so check it only if NEWNUMBERING is used
1151 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1152 if getattr(h, "main_volume_number", None) is not None:
1153 # rar5 may have more info
1154 raise NeedFirstVolume(
1155 "Need to start from first volume (current: %r)"
1156 % (h.main_volume_number,),
1157 h.main_volume_number
1159 # delay raise until we have volnr from ENDARC
1160 raise_need_first_vol = True
1161 if h.flags & RAR_MAIN_PASSWORD:
1162 self._needs_password = True
1163 if not self._password:
1164 break
1165 elif h.type == RAR_BLOCK_ENDARC:
1166 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1167 endarc = True
1168 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1169 raise NeedFirstVolume(
1170 "Need to start from first volume (current: %r)"
1171 % (h.endarc_volnr,),
1172 h.endarc_volnr
1174 elif h.type == RAR_BLOCK_FILE:
1175 # RAR 2.x does not write RAR_BLOCK_ENDARC
1176 if h.flags & RAR_FILE_SPLIT_AFTER:
1177 more_vols = True
1178 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1179 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1180 if not self._part_only:
1181 raise_need_first_vol = True
1183 if h.needs_password():
1184 self._needs_password = True
1186 # store it
1187 self.process_entry(fd, h)
1189 if self._info_callback:
1190 self._info_callback(h)
1192 # go to next header
1193 if h.add_size > 0:
1194 fd.seek(h.data_offset + h.add_size, 0)
1196 def process_entry(self, fd, item):
1197 """Examine item, add into lookup cache."""
1198 raise NotImplementedError()
1200 def _decrypt_header(self, fd):
1201 raise NotImplementedError("_decrypt_header")
1203 def _parse_block_header(self, fd):
1204 raise NotImplementedError("_parse_block_header")
1206 def _open_hack(self, inf, pwd):
1207 raise NotImplementedError("_open_hack")
1209 def _parse_header(self, fd):
1210 """Read single header
1212 try:
1213 # handle encrypted headers
1214 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1215 if not self._password:
1216 return None
1217 fd = self._decrypt_header(fd)
1219 # now read actual header
1220 return self._parse_block_header(fd)
1221 except struct.error:
1222 self._set_error("Broken header in RAR file")
1223 return None
1225 def _next_volname(self, volfile):
1226 """Given current vol name, construct next one
1228 if is_filelike(volfile):
1229 raise IOError("Working on single FD")
1230 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1231 return _next_newvol(volfile)
1232 return _next_oldvol(volfile)
1234 def _set_error(self, msg, *args):
1235 if args:
1236 msg = msg % args
1237 self._parse_error = msg
1238 if self._strict:
1239 raise BadRarFile(msg)
1241 def open(self, inf, pwd):
1242 """Return stream object for file data."""
1244 if inf.file_redir:
1245 redir_type, redir_flags, redir_name = inf.file_redir
1246 # cannot leave to unrar as it expects copied file to exist
1247 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1248 inf = self.getinfo(redir_name)
1249 if not inf:
1250 raise BadRarFile("cannot find copied file")
1251 elif redir_type in (
1252 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1253 RAR5_XREDIR_WINDOWS_JUNCTION,
1255 return io.BytesIO(redir_name.encode("utf8"))
1256 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1257 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1259 # is temp write usable?
1260 use_hack = 1
1261 if not self._main:
1262 use_hack = 0
1263 elif self._main._must_disable_hack():
1264 use_hack = 0
1265 elif inf._must_disable_hack():
1266 use_hack = 0
1267 elif is_filelike(self._rarfile):
1268 pass
1269 elif inf.file_size > HACK_SIZE_LIMIT:
1270 use_hack = 0
1271 elif not USE_EXTRACT_HACK:
1272 use_hack = 0
1274 # now extract
1275 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1276 return self._open_clear(inf)
1277 elif use_hack:
1278 return self._open_hack(inf, pwd)
1279 elif is_filelike(self._rarfile):
1280 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1281 else:
1282 return self._open_unrar(self._rarfile, inf, pwd)
1284 def _open_clear(self, inf):
1285 return DirectReader(self, inf)
1287 def _open_hack_core(self, inf, pwd, prefix, suffix):
1289 size = inf.compress_size + inf.header_size
1290 rf = XFile(inf.volume_file, 0)
1291 rf.seek(inf.header_offset)
1293 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1294 tmpf = os.fdopen(tmpfd, "wb")
1296 try:
1297 tmpf.write(prefix)
1298 while size > 0:
1299 if size > BSIZE:
1300 buf = rf.read(BSIZE)
1301 else:
1302 buf = rf.read(size)
1303 if not buf:
1304 raise BadRarFile("read failed: " + inf.filename)
1305 tmpf.write(buf)
1306 size -= len(buf)
1307 tmpf.write(suffix)
1308 tmpf.close()
1309 rf.close()
1310 except BaseException:
1311 rf.close()
1312 tmpf.close()
1313 os.unlink(tmpname)
1314 raise
1316 return self._open_unrar(tmpname, inf, pwd, tmpname)
1318 def _open_unrar_membuf(self, memfile, inf, pwd):
1319 """Write in-memory archive to temp file, needed for solid archives.
1321 tmpname = membuf_tempfile(memfile)
1322 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1324 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1325 """Extract using unrar
1327 setup = tool_setup()
1329 # not giving filename avoids encoding related problems
1330 fn = None
1331 if not tmpfile or force_file:
1332 fn = inf.filename
1334 # read from unrar pipe
1335 cmd = setup.open_cmdline(pwd, rarfile, fn)
1336 return PipeReader(self, inf, cmd, tmpfile)
1340 # RAR3 format
1343 class Rar3Info(RarInfo):
1344 """RAR3 specific fields."""
1345 extract_version = 15
1346 salt = None
1347 add_size = 0
1348 header_crc = None
1349 header_size = None
1350 header_offset = None
1351 data_offset = None
1352 _md_class = None
1353 _md_expect = None
1354 _name_size = None
1356 # make sure some rar5 fields are always present
1357 file_redir = None
1358 blake2sp_hash = None
1360 endarc_datacrc = None
1361 endarc_volnr = None
1363 def _must_disable_hack(self):
1364 if self.type == RAR_BLOCK_FILE:
1365 if self.flags & RAR_FILE_PASSWORD:
1366 return True
1367 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1368 return True
1369 elif self.type == RAR_BLOCK_MAIN:
1370 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1371 return True
1372 return False
1374 def is_dir(self):
1375 """Returns True if entry is a directory."""
1376 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1377 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1378 return False
1380 def is_symlink(self):
1381 """Returns True if entry is a symlink."""
1382 return (
1383 self.type == RAR_BLOCK_FILE and
1384 self.host_os == RAR_OS_UNIX and
1385 self.mode & 0xF000 == 0xA000
1388 def is_file(self):
1389 """Returns True if entry is a normal file."""
1390 return (
1391 self.type == RAR_BLOCK_FILE and
1392 not (self.is_dir() or self.is_symlink())
1396 class RAR3Parser(CommonParser):
1397 """Parse RAR3 file format.
1399 _expect_sig = RAR_ID
1400 _last_aes_key = (None, None, None) # (salt, key, iv)
1402 def _decrypt_header(self, fd):
1403 if not _have_crypto:
1404 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1405 salt = fd.read(8)
1406 if self._last_aes_key[0] == salt:
1407 key, iv = self._last_aes_key[1:]
1408 else:
1409 key, iv = rar3_s2k(self._password, salt)
1410 self._last_aes_key = (salt, key, iv)
1411 return HeaderDecrypt(fd, key, iv)
1413 def _parse_block_header(self, fd):
1414 """Parse common block header
1416 h = Rar3Info()
1417 h.header_offset = fd.tell()
1419 # read and parse base header
1420 buf = fd.read(S_BLK_HDR.size)
1421 if not buf:
1422 return None
1423 if len(buf) < S_BLK_HDR.size:
1424 self._set_error("Unexpected EOF when reading header")
1425 return None
1426 t = S_BLK_HDR.unpack_from(buf)
1427 h.header_crc, h.type, h.flags, h.header_size = t
1429 # read full header
1430 if h.header_size > S_BLK_HDR.size:
1431 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1432 else:
1433 hdata = buf
1434 h.data_offset = fd.tell()
1436 # unexpected EOF?
1437 if len(hdata) != h.header_size:
1438 self._set_error("Unexpected EOF when reading header")
1439 return None
1441 pos = S_BLK_HDR.size
1443 # block has data assiciated with it?
1444 if h.flags & RAR_LONG_BLOCK:
1445 h.add_size, pos = load_le32(hdata, pos)
1446 else:
1447 h.add_size = 0
1449 # parse interesting ones, decide header boundaries for crc
1450 if h.type == RAR_BLOCK_MARK:
1451 return h
1452 elif h.type == RAR_BLOCK_MAIN:
1453 pos += 6
1454 if h.flags & RAR_MAIN_ENCRYPTVER:
1455 pos += 1
1456 crc_pos = pos
1457 if h.flags & RAR_MAIN_COMMENT:
1458 self._parse_subblocks(h, hdata, pos)
1459 elif h.type == RAR_BLOCK_FILE:
1460 pos = self._parse_file_header(h, hdata, pos - 4)
1461 crc_pos = pos
1462 if h.flags & RAR_FILE_COMMENT:
1463 pos = self._parse_subblocks(h, hdata, pos)
1464 elif h.type == RAR_BLOCK_SUB:
1465 pos = self._parse_file_header(h, hdata, pos - 4)
1466 crc_pos = h.header_size
1467 elif h.type == RAR_BLOCK_OLD_AUTH:
1468 pos += 8
1469 crc_pos = pos
1470 elif h.type == RAR_BLOCK_OLD_EXTRA:
1471 pos += 7
1472 crc_pos = pos
1473 elif h.type == RAR_BLOCK_ENDARC:
1474 if h.flags & RAR_ENDARC_DATACRC:
1475 h.endarc_datacrc, pos = load_le32(hdata, pos)
1476 if h.flags & RAR_ENDARC_VOLNR:
1477 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1478 pos += 2
1479 crc_pos = h.header_size
1480 else:
1481 crc_pos = h.header_size
1483 # check crc
1484 if h.type == RAR_BLOCK_OLD_SUB:
1485 crcdat = hdata[2:] + fd.read(h.add_size)
1486 else:
1487 crcdat = hdata[2:crc_pos]
1489 calc_crc = crc32(crcdat) & 0xFFFF
1491 # return good header
1492 if h.header_crc == calc_crc:
1493 return h
1495 # header parsing failed.
1496 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1497 h.type, h.header_crc, calc_crc, len(crcdat))
1499 # instead panicing, send eof
1500 return None
1502 def _parse_file_header(self, h, hdata, pos):
1503 """Read file-specific header
1505 fld = S_FILE_HDR.unpack_from(hdata, pos)
1506 pos += S_FILE_HDR.size
1508 h.compress_size = fld[0]
1509 h.file_size = fld[1]
1510 h.host_os = fld[2]
1511 h.CRC = fld[3]
1512 h.date_time = parse_dos_time(fld[4])
1513 h.mtime = to_datetime(h.date_time)
1514 h.extract_version = fld[5]
1515 h.compress_type = fld[6]
1516 h._name_size = name_size = fld[7]
1517 h.mode = fld[8]
1519 h._md_class = CRC32Context
1520 h._md_expect = h.CRC
1522 if h.flags & RAR_FILE_LARGE:
1523 h1, pos = load_le32(hdata, pos)
1524 h2, pos = load_le32(hdata, pos)
1525 h.compress_size |= h1 << 32
1526 h.file_size |= h2 << 32
1527 h.add_size = h.compress_size
1529 name, pos = load_bytes(hdata, name_size, pos)
1530 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1531 # stored in custom encoding
1532 nul = name.find(b"\0")
1533 h.orig_filename = name[:nul]
1534 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1535 h.filename = u.decode()
1537 # if parsing failed fall back to simple name
1538 if u.failed:
1539 h.filename = self._decode(h.orig_filename)
1540 elif h.flags & RAR_FILE_UNICODE:
1541 # stored in UTF8
1542 h.orig_filename = name
1543 h.filename = name.decode("utf8", "replace")
1544 else:
1545 # stored in random encoding
1546 h.orig_filename = name
1547 h.filename = self._decode(name)
1549 # change separator, set dir suffix
1550 h.filename = h.filename.replace("\\", "/").rstrip("/")
1551 if h.is_dir():
1552 h.filename = h.filename + "/"
1554 if h.flags & RAR_FILE_SALT:
1555 h.salt, pos = load_bytes(hdata, 8, pos)
1556 else:
1557 h.salt = None
1559 # optional extended time stamps
1560 if h.flags & RAR_FILE_EXTTIME:
1561 pos = _parse_ext_time(h, hdata, pos)
1562 else:
1563 h.mtime = h.atime = h.ctime = h.arctime = None
1565 return pos
1567 def _parse_subblocks(self, h, hdata, pos):
1568 """Find old-style comment subblock
1570 while pos < len(hdata):
1571 # ordinary block header
1572 t = S_BLK_HDR.unpack_from(hdata, pos)
1573 ___scrc, stype, sflags, slen = t
1574 pos_next = pos + slen
1575 pos += S_BLK_HDR.size
1577 # corrupt header
1578 if pos_next < pos:
1579 break
1581 # followed by block-specific header
1582 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1583 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1584 pos += S_COMMENT_HDR.size
1585 data = hdata[pos: pos_next]
1586 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1587 crc, self._password)
1588 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1589 h.comment = self._decode_comment(cmt)
1591 pos = pos_next
1592 return pos
1594 def _read_comment_v3(self, inf, pwd=None):
1596 # read data
1597 with XFile(inf.volume_file) as rf:
1598 rf.seek(inf.data_offset)
1599 data = rf.read(inf.compress_size)
1601 # decompress
1602 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1603 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1605 # check crc
1606 if self._crc_check:
1607 crc = crc32(cmt)
1608 if crc != inf.CRC:
1609 return None
1611 return self._decode_comment(cmt)
1613 def _decode(self, val):
1614 for c in TRY_ENCODINGS:
1615 try:
1616 return val.decode(c)
1617 except UnicodeError:
1618 pass
1619 return val.decode(self._charset, "replace")
1621 def _decode_comment(self, val):
1622 return self._decode(val)
1624 def process_entry(self, fd, item):
1625 if item.type == RAR_BLOCK_FILE:
1626 # use only first part
1627 if item.flags & RAR_FILE_VERSION:
1628 pass # skip old versions
1629 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1630 self._info_map[item.filename.rstrip("/")] = item
1631 self._info_list.append(item)
1632 elif len(self._info_list) > 0:
1633 # final crc is in last block
1634 old = self._info_list[-1]
1635 old.CRC = item.CRC
1636 old._md_expect = item._md_expect
1637 old.compress_size += item.compress_size
1639 # parse new-style comment
1640 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1641 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1642 pass
1643 elif item.flags & RAR_FILE_SOLID:
1644 # file comment
1645 cmt = self._read_comment_v3(item, self._password)
1646 if len(self._info_list) > 0:
1647 old = self._info_list[-1]
1648 old.comment = cmt
1649 else:
1650 # archive comment
1651 cmt = self._read_comment_v3(item, self._password)
1652 self.comment = cmt
1654 if item.type == RAR_BLOCK_MAIN:
1655 if item.flags & RAR_MAIN_COMMENT:
1656 self.comment = item.comment
1657 if item.flags & RAR_MAIN_PASSWORD:
1658 self._needs_password = True
1660 # put file compressed data into temporary .rar archive, and run
1661 # unrar on that, thus avoiding unrar going over whole archive
1662 def _open_hack(self, inf, pwd):
1663 # create main header: crc, type, flags, size, res1, res2
1664 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1665 return self._open_hack_core(inf, pwd, prefix, b"")
1669 # RAR5 format
1672 class Rar5Info(RarInfo):
1673 """Shared fields for RAR5 records.
1675 extract_version = 50
1676 header_crc = None
1677 header_size = None
1678 header_offset = None
1679 data_offset = None
1681 # type=all
1682 block_type = None
1683 block_flags = None
1684 add_size = 0
1685 block_extra_size = 0
1687 # type=MAIN
1688 volume_number = None
1689 _md_class = None
1690 _md_expect = None
1692 def _must_disable_hack(self):
1693 return False
1696 class Rar5BaseFile(Rar5Info):
1697 """Shared sturct for file & service record.
1699 type = -1
1700 file_flags = None
1701 file_encryption = (0, 0, 0, b"", b"", b"")
1702 file_compress_flags = None
1703 file_redir = None
1704 file_owner = None
1705 file_version = None
1706 blake2sp_hash = None
1708 def _must_disable_hack(self):
1709 if self.flags & RAR_FILE_PASSWORD:
1710 return True
1711 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1712 return True
1713 if self.file_compress_flags & RAR5_COMPR_SOLID:
1714 return True
1715 if self.file_redir:
1716 return True
1717 return False
1720 class Rar5FileInfo(Rar5BaseFile):
1721 """RAR5 file record.
1723 type = RAR_BLOCK_FILE
1725 def is_symlink(self):
1726 """Returns True if entry is a symlink."""
1727 # pylint: disable=unsubscriptable-object
1728 return (
1729 self.file_redir is not None and
1730 self.file_redir[0] in (
1731 RAR5_XREDIR_UNIX_SYMLINK,
1732 RAR5_XREDIR_WINDOWS_SYMLINK,
1733 RAR5_XREDIR_WINDOWS_JUNCTION,
1737 def is_file(self):
1738 """Returns True if entry is a normal file."""
1739 return not (self.is_dir() or self.is_symlink())
1741 def is_dir(self):
1742 """Returns True if entry is a directory."""
1743 if not self.file_redir:
1744 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1745 return True
1746 return False
1749 class Rar5ServiceInfo(Rar5BaseFile):
1750 """RAR5 service record.
1752 type = RAR_BLOCK_SUB
1755 class Rar5MainInfo(Rar5Info):
1756 """RAR5 archive main record.
1758 type = RAR_BLOCK_MAIN
1759 main_flags = None
1760 main_volume_number = None
1762 def _must_disable_hack(self):
1763 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1764 return True
1765 return False
1768 class Rar5EncryptionInfo(Rar5Info):
1769 """RAR5 archive header encryption record.
1771 type = RAR5_BLOCK_ENCRYPTION
1772 encryption_algo = None
1773 encryption_flags = None
1774 encryption_kdf_count = None
1775 encryption_salt = None
1776 encryption_check_value = None
1778 def needs_password(self):
1779 return True
1782 class Rar5EndArcInfo(Rar5Info):
1783 """RAR5 end of archive record.
1785 type = RAR_BLOCK_ENDARC
1786 endarc_flags = None
1789 class RAR5Parser(CommonParser):
1790 """Parse RAR5 format.
1792 _expect_sig = RAR5_ID
1793 _hdrenc_main = None
1795 # AES encrypted headers
1796 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1798 def _gen_key(self, kdf_count, salt):
1799 if self._last_aes256_key[:2] == (kdf_count, salt):
1800 return self._last_aes256_key[2]
1801 if kdf_count > 24:
1802 raise BadRarFile("Too large kdf_count")
1803 pwd = self._password
1804 if isinstance(pwd, str):
1805 pwd = pwd.encode("utf8")
1806 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1807 self._last_aes256_key = (kdf_count, salt, key)
1808 return key
1810 def _decrypt_header(self, fd):
1811 if not _have_crypto:
1812 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1813 h = self._hdrenc_main
1814 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1815 iv = fd.read(16)
1816 return HeaderDecrypt(fd, key, iv)
1818 def _parse_block_header(self, fd):
1819 """Parse common block header
1821 header_offset = fd.tell()
1823 preload = 4 + 1
1824 start_bytes = fd.read(preload)
1825 if len(start_bytes) < preload:
1826 self._set_error("Unexpected EOF when reading header")
1827 return None
1828 while start_bytes[-1] & 0x80:
1829 b = fd.read(1)
1830 if not b:
1831 self._set_error("Unexpected EOF when reading header")
1832 return None
1833 start_bytes += b
1834 header_crc, pos = load_le32(start_bytes, 0)
1835 hdrlen, pos = load_vint(start_bytes, pos)
1836 if hdrlen > 2 * 1024 * 1024:
1837 return None
1838 header_size = pos + hdrlen
1840 # read full header, check for EOF
1841 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1842 if len(hdata) != header_size:
1843 self._set_error("Unexpected EOF when reading header")
1844 return None
1845 data_offset = fd.tell()
1847 calc_crc = crc32(memoryview(hdata)[4:])
1848 if header_crc != calc_crc:
1849 # header parsing failed.
1850 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1851 header_crc, calc_crc, len(hdata))
1852 return None
1854 block_type, pos = load_vint(hdata, pos)
1856 if block_type == RAR5_BLOCK_MAIN:
1857 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1858 h = self._parse_main_block(h, hdata, pos)
1859 elif block_type == RAR5_BLOCK_FILE:
1860 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1861 h = self._parse_file_block(h, hdata, pos)
1862 elif block_type == RAR5_BLOCK_SERVICE:
1863 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1864 h = self._parse_file_block(h, hdata, pos)
1865 elif block_type == RAR5_BLOCK_ENCRYPTION:
1866 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1867 h = self._parse_encryption_block(h, hdata, pos)
1868 elif block_type == RAR5_BLOCK_ENDARC:
1869 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1870 h = self._parse_endarc_block(h, hdata, pos)
1871 else:
1872 h = None
1873 if h:
1874 h.header_offset = header_offset
1875 h.data_offset = data_offset
1876 return h
1878 def _parse_block_common(self, h, hdata):
1879 h.header_crc, pos = load_le32(hdata, 0)
1880 hdrlen, pos = load_vint(hdata, pos)
1881 h.header_size = hdrlen + pos
1882 h.block_type, pos = load_vint(hdata, pos)
1883 h.block_flags, pos = load_vint(hdata, pos)
1885 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1886 h.block_extra_size, pos = load_vint(hdata, pos)
1887 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1888 h.add_size, pos = load_vint(hdata, pos)
1890 h.compress_size = h.add_size
1892 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1893 h.flags |= RAR_SKIP_IF_UNKNOWN
1894 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1895 h.flags |= RAR_LONG_BLOCK
1896 return h, pos
1898 def _parse_main_block(self, h, hdata, pos):
1899 h.main_flags, pos = load_vint(hdata, pos)
1900 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1901 h.main_volume_number, pos = load_vint(hdata, pos)
1903 h.flags |= RAR_MAIN_NEWNUMBERING
1904 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1905 h.flags |= RAR_MAIN_SOLID
1906 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1907 h.flags |= RAR_MAIN_VOLUME
1908 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1909 h.flags |= RAR_MAIN_RECOVERY
1910 if self._hdrenc_main:
1911 h.flags |= RAR_MAIN_PASSWORD
1912 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1913 h.flags |= RAR_MAIN_FIRSTVOLUME
1915 return h
1917 def _parse_file_block(self, h, hdata, pos):
1918 h.file_flags, pos = load_vint(hdata, pos)
1919 h.file_size, pos = load_vint(hdata, pos)
1920 h.mode, pos = load_vint(hdata, pos)
1922 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1923 h.mtime, pos = load_unixtime(hdata, pos)
1924 h.date_time = h.mtime.timetuple()[:6]
1925 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1926 h.CRC, pos = load_le32(hdata, pos)
1927 h._md_class = CRC32Context
1928 h._md_expect = h.CRC
1930 h.file_compress_flags, pos = load_vint(hdata, pos)
1931 h.file_host_os, pos = load_vint(hdata, pos)
1932 h.orig_filename, pos = load_vstr(hdata, pos)
1933 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1935 # use compatible values
1936 if h.file_host_os == RAR5_OS_WINDOWS:
1937 h.host_os = RAR_OS_WIN32
1938 else:
1939 h.host_os = RAR_OS_UNIX
1940 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1942 if h.block_extra_size:
1943 # allow 1 byte of garbage
1944 while pos < len(hdata) - 1:
1945 xsize, pos = load_vint(hdata, pos)
1946 xdata, pos = load_bytes(hdata, xsize, pos)
1947 self._process_file_extra(h, xdata)
1949 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1950 h.flags |= RAR_FILE_SPLIT_BEFORE
1951 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1952 h.flags |= RAR_FILE_SPLIT_AFTER
1953 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1954 h.flags |= RAR_FILE_DIRECTORY
1955 if h.file_compress_flags & RAR5_COMPR_SOLID:
1956 h.flags |= RAR_FILE_SOLID
1958 if h.is_dir():
1959 h.filename = h.filename + "/"
1960 return h
1962 def _parse_endarc_block(self, h, hdata, pos):
1963 h.endarc_flags, pos = load_vint(hdata, pos)
1964 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1965 h.flags |= RAR_ENDARC_NEXT_VOLUME
1966 return h
1968 def _check_password(self, h):
1969 if len(h.encryption_check_value) != 12:
1970 return
1971 pwd = self._password
1972 if isinstance(pwd, str):
1973 pwd = pwd.encode("utf8")
1974 pwd_check = bytearray(
1975 pbkdf2_hmac("sha256", pwd, h.encryption_salt, (1 << h.encryption_kdf_count) + 32))
1976 for i, v in enumerate(pwd_check[RAR5_PW_CHECK_SIZE:]):
1977 pwd_check[i & (RAR5_PW_CHECK_SIZE - 1)] ^= v
1978 pwd_check = pwd_check[:RAR5_PW_CHECK_SIZE]
1980 def sha256(b):
1981 m = hashlib.sha256()
1982 m.update(b)
1983 return m.digest()
1985 if sha256(h.encryption_check_value[:8])[:4] != h.encryption_check_value[8:]:
1986 return
1987 if pwd_check != h.encryption_check_value[:8]:
1988 raise RarWrongPassword()
1991 def _parse_encryption_block(self, h, hdata, pos):
1992 h.encryption_algo, pos = load_vint(hdata, pos)
1993 h.encryption_flags, pos = load_vint(hdata, pos)
1994 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1995 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1996 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1997 h.encryption_check_value, pos = load_bytes(hdata, 12, pos)
1998 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1999 raise BadRarFile("Unsupported header encryption cipher")
2000 if h.encryption_check_value and self._password:
2001 self._check_password(h)
2002 self._hdrenc_main = h
2003 return h
2005 def _process_file_extra(self, h, xdata):
2006 xtype, pos = load_vint(xdata, 0)
2007 if xtype == RAR5_XFILE_TIME:
2008 self._parse_file_xtime(h, xdata, pos)
2009 elif xtype == RAR5_XFILE_ENCRYPTION:
2010 self._parse_file_encryption(h, xdata, pos)
2011 elif xtype == RAR5_XFILE_HASH:
2012 self._parse_file_hash(h, xdata, pos)
2013 elif xtype == RAR5_XFILE_VERSION:
2014 self._parse_file_version(h, xdata, pos)
2015 elif xtype == RAR5_XFILE_REDIR:
2016 self._parse_file_redir(h, xdata, pos)
2017 elif xtype == RAR5_XFILE_OWNER:
2018 self._parse_file_owner(h, xdata, pos)
2019 elif xtype == RAR5_XFILE_SERVICE:
2020 pass
2021 else:
2022 pass
2024 # extra block for file time record
2025 def _parse_file_xtime(self, h, xdata, pos):
2026 tflags, pos = load_vint(xdata, pos)
2028 ldr = load_windowstime
2029 if tflags & RAR5_XTIME_UNIXTIME:
2030 ldr = load_unixtime
2032 if tflags & RAR5_XTIME_HAS_MTIME:
2033 h.mtime, pos = ldr(xdata, pos)
2034 h.date_time = h.mtime.timetuple()[:6]
2035 if tflags & RAR5_XTIME_HAS_CTIME:
2036 h.ctime, pos = ldr(xdata, pos)
2037 if tflags & RAR5_XTIME_HAS_ATIME:
2038 h.atime, pos = ldr(xdata, pos)
2040 if tflags & RAR5_XTIME_UNIXTIME_NS:
2041 if tflags & RAR5_XTIME_HAS_MTIME:
2042 nsec, pos = load_le32(xdata, pos)
2043 h.mtime = to_nsdatetime(h.mtime, nsec)
2044 if tflags & RAR5_XTIME_HAS_CTIME:
2045 nsec, pos = load_le32(xdata, pos)
2046 h.ctime = to_nsdatetime(h.ctime, nsec)
2047 if tflags & RAR5_XTIME_HAS_ATIME:
2048 nsec, pos = load_le32(xdata, pos)
2049 h.atime = to_nsdatetime(h.atime, nsec)
2051 # just remember encryption info
2052 def _parse_file_encryption(self, h, xdata, pos):
2053 algo, pos = load_vint(xdata, pos)
2054 flags, pos = load_vint(xdata, pos)
2055 kdf_count, pos = load_byte(xdata, pos)
2056 salt, pos = load_bytes(xdata, 16, pos)
2057 iv, pos = load_bytes(xdata, 16, pos)
2058 checkval = None
2059 if flags & RAR5_XENC_CHECKVAL:
2060 checkval, pos = load_bytes(xdata, 12, pos)
2061 if flags & RAR5_XENC_TWEAKED:
2062 h._md_expect = None
2063 h._md_class = NoHashContext
2065 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
2066 h.flags |= RAR_FILE_PASSWORD
2068 def _parse_file_hash(self, h, xdata, pos):
2069 hash_type, pos = load_vint(xdata, pos)
2070 if hash_type == RAR5_XHASH_BLAKE2SP:
2071 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
2072 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2073 h._md_class = Blake2SP
2074 h._md_expect = h.blake2sp_hash
2076 def _parse_file_version(self, h, xdata, pos):
2077 flags, pos = load_vint(xdata, pos)
2078 version, pos = load_vint(xdata, pos)
2079 h.file_version = (flags, version)
2081 def _parse_file_redir(self, h, xdata, pos):
2082 redir_type, pos = load_vint(xdata, pos)
2083 redir_flags, pos = load_vint(xdata, pos)
2084 redir_name, pos = load_vstr(xdata, pos)
2085 redir_name = redir_name.decode("utf8", "replace")
2086 h.file_redir = (redir_type, redir_flags, redir_name)
2088 def _parse_file_owner(self, h, xdata, pos):
2089 user_name = group_name = user_id = group_id = None
2091 flags, pos = load_vint(xdata, pos)
2092 if flags & RAR5_XOWNER_UNAME:
2093 user_name, pos = load_vstr(xdata, pos)
2094 if flags & RAR5_XOWNER_GNAME:
2095 group_name, pos = load_vstr(xdata, pos)
2096 if flags & RAR5_XOWNER_UID:
2097 user_id, pos = load_vint(xdata, pos)
2098 if flags & RAR5_XOWNER_GID:
2099 group_id, pos = load_vint(xdata, pos)
2101 h.file_owner = (user_name, group_name, user_id, group_id)
2103 def process_entry(self, fd, item):
2104 if item.block_type == RAR5_BLOCK_FILE:
2105 if item.file_version:
2106 pass # skip old versions
2107 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2108 # use only first part
2109 self._info_map[item.filename.rstrip("/")] = item
2110 self._info_list.append(item)
2111 elif len(self._info_list) > 0:
2112 # final crc is in last block
2113 old = self._info_list[-1]
2114 old.CRC = item.CRC
2115 old._md_expect = item._md_expect
2116 old.blake2sp_hash = item.blake2sp_hash
2117 old.compress_size += item.compress_size
2118 elif item.block_type == RAR5_BLOCK_SERVICE:
2119 if item.filename == "CMT":
2120 self._load_comment(fd, item)
2122 def _load_comment(self, fd, item):
2123 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2124 return None
2125 if item.compress_type != RAR_M0:
2126 return None
2128 if item.flags & RAR_FILE_PASSWORD:
2129 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2130 if algo != RAR5_XENC_CIPHER_AES256:
2131 return None
2132 key = self._gen_key(kdf_count, salt)
2133 f = HeaderDecrypt(fd, key, iv)
2134 cmt = f.read(item.file_size)
2135 else:
2136 # archive comment
2137 with self._open_clear(item) as cmtstream:
2138 cmt = cmtstream.read()
2140 # rar bug? - appends zero to comment
2141 cmt = cmt.split(b"\0", 1)[0]
2142 self.comment = cmt.decode("utf8")
2143 return None
2145 def _open_hack(self, inf, pwd):
2146 # len, type, blk_flags, flags
2147 main_hdr = b"\x03\x01\x00\x00"
2148 endarc_hdr = b"\x03\x05\x00\x00"
2149 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2150 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2151 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2155 ## Utility classes
2158 class UnicodeFilename:
2159 """Handle RAR3 unicode filename decompression.
2161 def __init__(self, name, encdata):
2162 self.std_name = bytearray(name)
2163 self.encdata = bytearray(encdata)
2164 self.pos = self.encpos = 0
2165 self.buf = bytearray()
2166 self.failed = 0
2168 def enc_byte(self):
2169 """Copy encoded byte."""
2170 try:
2171 c = self.encdata[self.encpos]
2172 self.encpos += 1
2173 return c
2174 except IndexError:
2175 self.failed = 1
2176 return 0
2178 def std_byte(self):
2179 """Copy byte from 8-bit representation."""
2180 try:
2181 return self.std_name[self.pos]
2182 except IndexError:
2183 self.failed = 1
2184 return ord("?")
2186 def put(self, lo, hi):
2187 """Copy 16-bit value to result."""
2188 self.buf.append(lo)
2189 self.buf.append(hi)
2190 self.pos += 1
2192 def decode(self):
2193 """Decompress compressed UTF16 value."""
2194 hi = self.enc_byte()
2195 flagbits = 0
2196 while self.encpos < len(self.encdata):
2197 if flagbits == 0:
2198 flags = self.enc_byte()
2199 flagbits = 8
2200 flagbits -= 2
2201 t = (flags >> flagbits) & 3
2202 if t == 0:
2203 self.put(self.enc_byte(), 0)
2204 elif t == 1:
2205 self.put(self.enc_byte(), hi)
2206 elif t == 2:
2207 self.put(self.enc_byte(), self.enc_byte())
2208 else:
2209 n = self.enc_byte()
2210 if n & 0x80:
2211 c = self.enc_byte()
2212 for _ in range((n & 0x7f) + 2):
2213 lo = (self.std_byte() + c) & 0xFF
2214 self.put(lo, hi)
2215 else:
2216 for _ in range(n + 2):
2217 self.put(self.std_byte(), 0)
2218 return self.buf.decode("utf-16le", "replace")
2221 class RarExtFile(io.RawIOBase):
2222 """Base class for file-like object that :meth:`RarFile.open` returns.
2224 Provides public methods and common crc checking.
2226 Behaviour:
2227 - no short reads - .read() and .readinfo() read as much as requested.
2228 - no internal buffer, use io.BufferedReader for that.
2230 name = None #: Filename of the archive entry
2231 mode = "rb"
2232 _parser = None
2233 _inf = None
2234 _fd = None
2235 _remain = 0
2236 _returncode = 0
2237 _md_context = None
2239 def _open_extfile(self, parser, inf):
2240 self.name = inf.filename
2241 self._parser = parser
2242 self._inf = inf
2244 if self._fd:
2245 self._fd.close()
2246 md_class = self._inf._md_class or NoHashContext
2247 self._md_context = md_class()
2248 self._fd = None
2249 self._remain = self._inf.file_size
2251 def read(self, n=-1):
2252 """Read all or specified amount of data from archive entry."""
2254 # sanitize count
2255 if n is None or n < 0:
2256 n = self._remain
2257 elif n > self._remain:
2258 n = self._remain
2259 if n == 0:
2260 return b""
2262 buf = []
2263 orig = n
2264 while n > 0:
2265 # actual read
2266 data = self._read(n)
2267 if not data:
2268 break
2269 buf.append(data)
2270 self._md_context.update(data)
2271 self._remain -= len(data)
2272 n -= len(data)
2273 data = b"".join(buf)
2274 if n > 0:
2275 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2277 # done?
2278 if not data or self._remain == 0:
2279 # self.close()
2280 self._check()
2281 return data
2283 def _check(self):
2284 """Check final CRC."""
2285 final = self._md_context.digest()
2286 exp = self._inf._md_expect
2287 if exp is None:
2288 return
2289 if final is None:
2290 return
2291 if self._returncode:
2292 check_returncode(self._returncode, "", tool_setup().get_errmap())
2293 if self._remain != 0:
2294 raise BadRarFile("Failed the read enough data")
2295 if final != exp:
2296 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2297 self._inf.filename, exp, final))
2299 def _read(self, cnt):
2300 """Actual read that gets sanitized cnt."""
2301 raise NotImplementedError("_read")
2303 def close(self):
2304 """Close open resources."""
2306 super().close()
2308 if self._fd:
2309 self._fd.close()
2310 self._fd = None
2312 def __del__(self):
2313 """Hook delete to make sure tempfile is removed."""
2314 self.close()
2316 def readinto(self, buf):
2317 """Zero-copy read directly into buffer.
2319 Returns bytes read.
2321 raise NotImplementedError("readinto")
2323 def tell(self):
2324 """Return current reading position in uncompressed data."""
2325 return self._inf.file_size - self._remain
2327 def seek(self, offset, whence=0):
2328 """Seek in data.
2330 On uncompressed files, the seeking works by actual
2331 seeks so it's fast. On compresses files its slow
2332 - forward seeking happends by reading ahead,
2333 backwards by re-opening and decompressing from the start.
2336 # disable crc check when seeking
2337 self._md_context = NoHashContext()
2339 fsize = self._inf.file_size
2340 cur_ofs = self.tell()
2342 if whence == 0: # seek from beginning of file
2343 new_ofs = offset
2344 elif whence == 1: # seek from current position
2345 new_ofs = cur_ofs + offset
2346 elif whence == 2: # seek from end of file
2347 new_ofs = fsize + offset
2348 else:
2349 raise ValueError("Invalid value for whence")
2351 # sanity check
2352 if new_ofs < 0:
2353 new_ofs = 0
2354 elif new_ofs > fsize:
2355 new_ofs = fsize
2357 # do the actual seek
2358 if new_ofs >= cur_ofs:
2359 self._skip(new_ofs - cur_ofs)
2360 else:
2361 # reopen and seek
2362 self._open_extfile(self._parser, self._inf)
2363 self._skip(new_ofs)
2364 return self.tell()
2366 def _skip(self, cnt):
2367 """Read and discard data"""
2368 empty_read(self, cnt, BSIZE)
2370 def readable(self):
2371 """Returns True"""
2372 return True
2374 def writable(self):
2375 """Returns False.
2377 Writing is not supported.
2379 return False
2381 def seekable(self):
2382 """Returns True.
2384 Seeking is supported, although it's slow on compressed files.
2386 return True
2388 def readall(self):
2389 """Read all remaining data"""
2390 # avoid RawIOBase default impl
2391 return self.read()
2394 class PipeReader(RarExtFile):
2395 """Read data from pipe, handle tempfile cleanup."""
2397 def __init__(self, parser, inf, cmd, tempfile=None):
2398 super().__init__()
2399 self._cmd = cmd
2400 self._proc = None
2401 self._tempfile = tempfile
2402 self._open_extfile(parser, inf)
2404 def _close_proc(self):
2405 if not self._proc:
2406 return
2407 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2408 if f:
2409 f.close()
2410 self._proc.wait()
2411 self._returncode = self._proc.returncode
2412 self._proc = None
2414 def _open_extfile(self, parser, inf):
2415 super()._open_extfile(parser, inf)
2417 # stop old process
2418 self._close_proc()
2420 # launch new process
2421 self._returncode = 0
2422 self._proc = custom_popen(self._cmd)
2423 self._fd = self._proc.stdout
2425 def _read(self, cnt):
2426 """Read from pipe."""
2428 # normal read is usually enough
2429 data = self._fd.read(cnt)
2430 if len(data) == cnt or not data:
2431 return data
2433 # short read, try looping
2434 buf = [data]
2435 cnt -= len(data)
2436 while cnt > 0:
2437 data = self._fd.read(cnt)
2438 if not data:
2439 break
2440 cnt -= len(data)
2441 buf.append(data)
2442 return b"".join(buf)
2444 def close(self):
2445 """Close open resources."""
2447 self._close_proc()
2448 super().close()
2450 if self._tempfile:
2451 try:
2452 os.unlink(self._tempfile)
2453 except OSError:
2454 pass
2455 self._tempfile = None
2457 def readinto(self, buf):
2458 """Zero-copy read directly into buffer."""
2459 cnt = len(buf)
2460 if cnt > self._remain:
2461 cnt = self._remain
2462 vbuf = memoryview(buf)
2463 res = got = 0
2464 while got < cnt:
2465 res = self._fd.readinto(vbuf[got: cnt])
2466 if not res:
2467 break
2468 self._md_context.update(vbuf[got: got + res])
2469 self._remain -= res
2470 got += res
2471 return got
2474 class DirectReader(RarExtFile):
2475 """Read uncompressed data directly from archive.
2477 _cur = None
2478 _cur_avail = None
2479 _volfile = None
2481 def __init__(self, parser, inf):
2482 super().__init__()
2483 self._open_extfile(parser, inf)
2485 def _open_extfile(self, parser, inf):
2486 super()._open_extfile(parser, inf)
2488 self._volfile = self._inf.volume_file
2489 self._fd = XFile(self._volfile, 0)
2490 self._fd.seek(self._inf.header_offset, 0)
2491 self._cur = self._parser._parse_header(self._fd)
2492 self._cur_avail = self._cur.add_size
2494 def _skip(self, cnt):
2495 """RAR Seek, skipping through rar files to get to correct position
2498 while cnt > 0:
2499 # next vol needed?
2500 if self._cur_avail == 0:
2501 if not self._open_next():
2502 break
2504 # fd is in read pos, do the read
2505 if cnt > self._cur_avail:
2506 cnt -= self._cur_avail
2507 self._remain -= self._cur_avail
2508 self._cur_avail = 0
2509 else:
2510 self._fd.seek(cnt, 1)
2511 self._cur_avail -= cnt
2512 self._remain -= cnt
2513 cnt = 0
2515 def _read(self, cnt):
2516 """Read from potentially multi-volume archive."""
2518 buf = []
2519 while cnt > 0:
2520 # next vol needed?
2521 if self._cur_avail == 0:
2522 if not self._open_next():
2523 break
2525 # fd is in read pos, do the read
2526 if cnt > self._cur_avail:
2527 data = self._fd.read(self._cur_avail)
2528 else:
2529 data = self._fd.read(cnt)
2530 if not data:
2531 break
2533 # got some data
2534 cnt -= len(data)
2535 self._cur_avail -= len(data)
2536 buf.append(data)
2538 if len(buf) == 1:
2539 return buf[0]
2540 return b"".join(buf)
2542 def _open_next(self):
2543 """Proceed to next volume."""
2545 # is the file split over archives?
2546 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2547 return False
2549 if self._fd:
2550 self._fd.close()
2551 self._fd = None
2553 # open next part
2554 self._volfile = self._parser._next_volname(self._volfile)
2555 fd = open(self._volfile, "rb", 0)
2556 self._fd = fd
2557 sig = fd.read(len(self._parser._expect_sig))
2558 if sig != self._parser._expect_sig:
2559 raise BadRarFile("Invalid signature")
2561 # loop until first file header
2562 while True:
2563 cur = self._parser._parse_header(fd)
2564 if not cur:
2565 raise BadRarFile("Unexpected EOF")
2566 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2567 if cur.add_size:
2568 fd.seek(cur.add_size, 1)
2569 continue
2570 if cur.orig_filename != self._inf.orig_filename:
2571 raise BadRarFile("Did not found file entry")
2572 self._cur = cur
2573 self._cur_avail = cur.add_size
2574 return True
2576 def readinto(self, buf):
2577 """Zero-copy read directly into buffer."""
2578 got = 0
2579 vbuf = memoryview(buf)
2580 while got < len(buf):
2581 # next vol needed?
2582 if self._cur_avail == 0:
2583 if not self._open_next():
2584 break
2586 # length for next read
2587 cnt = len(buf) - got
2588 if cnt > self._cur_avail:
2589 cnt = self._cur_avail
2591 # read into temp view
2592 res = self._fd.readinto(vbuf[got: got + cnt])
2593 if not res:
2594 break
2595 self._md_context.update(vbuf[got: got + res])
2596 self._cur_avail -= res
2597 self._remain -= res
2598 got += res
2599 return got
2602 class HeaderDecrypt:
2603 """File-like object that decrypts from another file"""
2604 def __init__(self, f, key, iv):
2605 self.f = f
2606 self.ciph = AES_CBC_Decrypt(key, iv)
2607 self.buf = b""
2609 def tell(self):
2610 """Current file pos - works only on block boundaries."""
2611 return self.f.tell()
2613 def read(self, cnt=None):
2614 """Read and decrypt."""
2615 if cnt > 8 * 1024:
2616 raise BadRarFile("Bad count to header decrypt - wrong password?")
2618 # consume old data
2619 if cnt <= len(self.buf):
2620 res = self.buf[:cnt]
2621 self.buf = self.buf[cnt:]
2622 return res
2623 res = self.buf
2624 self.buf = b""
2625 cnt -= len(res)
2627 # decrypt new data
2628 blklen = 16
2629 while cnt > 0:
2630 enc = self.f.read(blklen)
2631 if len(enc) < blklen:
2632 break
2633 dec = self.ciph.decrypt(enc)
2634 if cnt >= len(dec):
2635 res += dec
2636 cnt -= len(dec)
2637 else:
2638 res += dec[:cnt]
2639 self.buf = dec[cnt:]
2640 cnt = 0
2642 return res
2645 class XFile:
2646 """Input may be filename or file object.
2648 __slots__ = ("_fd", "_need_close")
2650 def __init__(self, xfile, bufsize=1024):
2651 if is_filelike(xfile):
2652 self._need_close = False
2653 self._fd = xfile
2654 self._fd.seek(0)
2655 else:
2656 self._need_close = True
2657 self._fd = open(xfile, "rb", bufsize)
2659 def read(self, n=None):
2660 """Read from file."""
2661 return self._fd.read(n)
2663 def tell(self):
2664 """Return file pos."""
2665 return self._fd.tell()
2667 def seek(self, ofs, whence=0):
2668 """Move file pos."""
2669 return self._fd.seek(ofs, whence)
2671 def readinto(self, buf):
2672 """Read into buffer."""
2673 return self._fd.readinto(buf)
2675 def close(self):
2676 """Close file object."""
2677 if self._need_close:
2678 self._fd.close()
2680 def __enter__(self):
2681 return self
2683 def __exit__(self, typ, val, tb):
2684 self.close()
2687 class NoHashContext:
2688 """No-op hash function."""
2689 def __init__(self, data=None):
2690 """Initialize"""
2691 def update(self, data):
2692 """Update data"""
2693 def digest(self):
2694 """Final hash"""
2695 def hexdigest(self):
2696 """Hexadecimal digest."""
2699 class CRC32Context:
2700 """Hash context that uses CRC32."""
2701 __slots__ = ["_crc"]
2703 def __init__(self, data=None):
2704 self._crc = 0
2705 if data:
2706 self.update(data)
2708 def update(self, data):
2709 """Process data."""
2710 self._crc = crc32(data, self._crc)
2712 def digest(self):
2713 """Final hash."""
2714 return self._crc
2716 def hexdigest(self):
2717 """Hexadecimal digest."""
2718 return "%08x" % self.digest()
2721 class Blake2SP:
2722 """Blake2sp hash context.
2724 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2725 digest_size = 32
2726 block_size = 64
2727 parallelism = 8
2729 def __init__(self, data=None):
2730 self._buf = b""
2731 self._cur = 0
2732 self._digest = None
2733 self._thread = []
2735 for i in range(self.parallelism):
2736 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2737 self._thread.append(ctx)
2739 if data:
2740 self.update(data)
2742 def _blake2s(self, ofs, depth, is_last):
2743 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2744 depth=2, inner_size=32, fanout=self.parallelism)
2746 def _add_block(self, blk):
2747 self._thread[self._cur].update(blk)
2748 self._cur = (self._cur + 1) % self.parallelism
2750 def update(self, data):
2751 """Hash data.
2753 view = memoryview(data)
2754 bs = self.block_size
2755 if self._buf:
2756 need = bs - len(self._buf)
2757 if len(view) < need:
2758 self._buf += view.tobytes()
2759 return
2760 self._add_block(self._buf + view[:need].tobytes())
2761 view = view[need:]
2762 while len(view) >= bs:
2763 self._add_block(view[:bs])
2764 view = view[bs:]
2765 self._buf = view.tobytes()
2767 def digest(self):
2768 """Return final digest value.
2770 if self._digest is None:
2771 if self._buf:
2772 self._add_block(self._buf)
2773 self._buf = b""
2774 ctx = self._blake2s(0, 1, True)
2775 for t in self._thread:
2776 ctx.update(t.digest())
2777 self._digest = ctx.digest()
2778 return self._digest
2780 def hexdigest(self):
2781 """Hexadecimal digest."""
2782 return hexlify(self.digest()).decode("ascii")
2785 class Rar3Sha1:
2786 """Emulate buggy SHA1 from RAR3.
2788 digest_size = 20
2789 block_size = 64
2791 _BLK_BE = struct.Struct(b">16L")
2792 _BLK_LE = struct.Struct(b"<16L")
2794 __slots__ = ("_nbytes", "_md", "_rarbug")
2796 def __init__(self, data=b"", rarbug=False):
2797 self._md = sha1()
2798 self._nbytes = 0
2799 self._rarbug = rarbug
2800 self.update(data)
2802 def update(self, data):
2803 """Process more data."""
2804 self._md.update(data)
2805 bufpos = self._nbytes & 63
2806 self._nbytes += len(data)
2808 if self._rarbug and len(data) > 64:
2809 dpos = self.block_size - bufpos
2810 while dpos + self.block_size <= len(data):
2811 self._corrupt(data, dpos)
2812 dpos += self.block_size
2814 def digest(self):
2815 """Return final state."""
2816 return self._md.digest()
2818 def hexdigest(self):
2819 """Return final state as hex string."""
2820 return self._md.hexdigest()
2822 def _corrupt(self, data, dpos):
2823 """Corruption from SHA1 core."""
2824 ws = list(self._BLK_BE.unpack_from(data, dpos))
2825 for t in range(16, 80):
2826 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2827 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2828 self._BLK_LE.pack_into(data, dpos, *ws)
2832 ## Utility functions
2835 S_LONG = Struct("<L")
2836 S_SHORT = Struct("<H")
2837 S_BYTE = Struct("<B")
2839 S_BLK_HDR = Struct("<HBHH")
2840 S_FILE_HDR = Struct("<LLBLLBBHL")
2841 S_COMMENT_HDR = Struct("<HBBH")
2844 def load_vint(buf, pos):
2845 """Load RAR5 variable-size int."""
2846 limit = min(pos + 11, len(buf))
2847 res = ofs = 0
2848 while pos < limit:
2849 b = buf[pos]
2850 res += ((b & 0x7F) << ofs)
2851 pos += 1
2852 ofs += 7
2853 if b < 0x80:
2854 return res, pos
2855 raise BadRarFile("cannot load vint")
2858 def load_byte(buf, pos):
2859 """Load single byte"""
2860 end = pos + 1
2861 if end > len(buf):
2862 raise BadRarFile("cannot load byte")
2863 return S_BYTE.unpack_from(buf, pos)[0], end
2866 def load_le32(buf, pos):
2867 """Load little-endian 32-bit integer"""
2868 end = pos + 4
2869 if end > len(buf):
2870 raise BadRarFile("cannot load le32")
2871 return S_LONG.unpack_from(buf, pos)[0], end
2874 def load_bytes(buf, num, pos):
2875 """Load sequence of bytes"""
2876 end = pos + num
2877 if end > len(buf):
2878 raise BadRarFile("cannot load bytes")
2879 return buf[pos: end], end
2882 def load_vstr(buf, pos):
2883 """Load bytes prefixed by vint length"""
2884 slen, pos = load_vint(buf, pos)
2885 return load_bytes(buf, slen, pos)
2888 def load_dostime(buf, pos):
2889 """Load LE32 dos timestamp"""
2890 stamp, pos = load_le32(buf, pos)
2891 tup = parse_dos_time(stamp)
2892 return to_datetime(tup), pos
2895 def load_unixtime(buf, pos):
2896 """Load LE32 unix timestamp"""
2897 secs, pos = load_le32(buf, pos)
2898 dt = datetime.fromtimestamp(secs, timezone.utc)
2899 return dt, pos
2902 def load_windowstime(buf, pos):
2903 """Load LE64 windows timestamp"""
2904 # unix epoch (1970) in seconds from windows epoch (1601)
2905 unix_epoch = 11644473600
2906 val1, pos = load_le32(buf, pos)
2907 val2, pos = load_le32(buf, pos)
2908 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2909 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2910 dt = to_nsdatetime(dt, n1secs * 100)
2911 return dt, pos
2915 # volume numbering
2918 _rc_num = re.compile('^[0-9]+$')
2921 def _next_newvol(volfile):
2922 """New-style next volume
2924 name, ext = os.path.splitext(volfile)
2925 if ext.lower() in ("", ".exe", ".sfx"):
2926 volfile = name + ".rar"
2927 i = len(volfile) - 1
2928 while i >= 0:
2929 if "0" <= volfile[i] <= "9":
2930 return _inc_volname(volfile, i, False)
2931 if volfile[i] in ("/", os.sep):
2932 break
2933 i -= 1
2934 raise BadRarName("Cannot construct volume name: " + volfile)
2938 def _next_oldvol(volfile):
2939 """Old-style next volume
2941 name, ext = os.path.splitext(volfile)
2942 if ext.lower() in ("", ".exe", ".sfx"):
2943 ext = ".rar"
2944 sfx = ext[2:]
2945 if _rc_num.match(sfx):
2946 ext = _inc_volname(ext, len(ext) - 1, True)
2947 else:
2948 # .rar -> .r00
2949 ext = ext[:2] + "00"
2950 return name + ext
2953 def _inc_volname(volfile, i, inc_chars):
2954 """increase digits with carry, otherwise just increment char
2956 fn = list(volfile)
2957 while i >= 0:
2958 if fn[i] == "9":
2959 fn[i] = "0"
2960 i -= 1
2961 if i < 0:
2962 fn.insert(0, "1")
2963 elif "0" <= fn[i] < "9" or inc_chars:
2964 fn[i] = chr(ord(fn[i]) + 1)
2965 break
2966 else:
2967 fn.insert(i + 1, "1")
2968 break
2969 return "".join(fn)
2972 def _parse_ext_time(h, data, pos):
2973 """Parse all RAR3 extended time fields
2975 # flags and rest of data can be missing
2976 flags = 0
2977 if pos + 2 <= len(data):
2978 flags = S_SHORT.unpack_from(data, pos)[0]
2979 pos += 2
2981 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2982 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2983 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2984 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2985 if mtime:
2986 h.mtime = mtime
2987 h.date_time = mtime.timetuple()[:6]
2988 return pos
2991 def _parse_xtime(flag, data, pos, basetime=None):
2992 """Parse one RAR3 extended time field
2994 res = None
2995 if flag & 8:
2996 if not basetime:
2997 basetime, pos = load_dostime(data, pos)
2999 # load second fractions of 100ns units
3000 rem = 0
3001 cnt = flag & 3
3002 for _ in range(cnt):
3003 b, pos = load_byte(data, pos)
3004 rem = (b << 16) | (rem >> 8)
3006 # dostime has room for 30 seconds only, correct if needed
3007 if flag & 4 and basetime.second < 59:
3008 basetime = basetime.replace(second=basetime.second + 1)
3010 res = to_nsdatetime(basetime, rem * 100)
3011 return res, pos
3014 def is_filelike(obj):
3015 """Filename or file object?
3017 if isinstance(obj, (bytes, str, Path)):
3018 return False
3019 res = True
3020 for a in ("read", "tell", "seek"):
3021 res = res and hasattr(obj, a)
3022 if not res:
3023 raise ValueError("Invalid object passed as file")
3024 return True
3027 def rar3_s2k(pwd, salt):
3028 """String-to-key hash for RAR3.
3030 if not isinstance(pwd, str):
3031 pwd = pwd.decode("utf8")
3032 seed = bytearray(pwd.encode("utf-16le") + salt)
3033 h = Rar3Sha1(rarbug=True)
3034 iv = b""
3035 for i in range(16):
3036 for j in range(0x4000):
3037 cnt = S_LONG.pack(i * 0x4000 + j)
3038 h.update(seed)
3039 h.update(cnt[:3])
3040 if j == 0:
3041 iv += h.digest()[19:20]
3042 key_be = h.digest()[:16]
3043 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
3044 return key_le, iv
3047 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
3048 """Decompress blob of compressed data.
3050 Used for data with non-standard header - eg. comments.
3052 # already uncompressed?
3053 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
3054 return data
3056 # take only necessary flags
3057 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
3058 flags |= RAR_LONG_BLOCK
3060 # file header
3061 fname = b"data"
3062 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
3063 mode = DOS_MODE_ARCHIVE
3064 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
3065 date, vers, meth, len(fname), mode)
3066 fhdr += fname
3067 if salt:
3068 fhdr += salt
3070 # full header
3071 hlen = S_BLK_HDR.size + len(fhdr)
3072 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3073 hcrc = crc32(hdr[2:]) & 0xFFFF
3074 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3076 # archive main header
3077 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3079 # decompress via temp rar
3080 setup = tool_setup()
3081 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3082 tmpf = os.fdopen(tmpfd, "wb")
3083 try:
3084 tmpf.write(RAR_ID + mh + hdr + data)
3085 tmpf.close()
3087 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3088 cmd = setup.open_cmdline(curpwd, tmpname)
3089 p = custom_popen(cmd)
3090 return p.communicate()[0]
3091 finally:
3092 tmpf.close()
3093 os.unlink(tmpname)
3096 def sanitize_filename(fname, pathsep, is_win32):
3097 """Simulate unrar sanitization.
3099 if is_win32:
3100 if len(fname) > 1 and fname[1] == ":":
3101 fname = fname[2:]
3102 rc = RC_BAD_CHARS_WIN32
3103 else:
3104 rc = RC_BAD_CHARS_UNIX
3105 if rc.search(fname):
3106 fname = rc.sub("_", fname)
3108 parts = []
3109 for seg in fname.split("/"):
3110 if seg in ("", ".", ".."):
3111 continue
3112 if is_win32 and seg[-1] in (" ", "."):
3113 seg = seg[:-1] + "_"
3114 parts.append(seg)
3115 return pathsep.join(parts)
3118 def empty_read(src, size, blklen):
3119 """Read and drop fixed amount of data.
3121 while size > 0:
3122 if size > blklen:
3123 res = src.read(blklen)
3124 else:
3125 res = src.read(size)
3126 if not res:
3127 raise BadRarFile("cannot load data")
3128 size -= len(res)
3131 def to_datetime(t):
3132 """Convert 6-part time tuple into datetime object.
3134 # extract values
3135 year, mon, day, h, m, s = t
3137 # assume the values are valid
3138 try:
3139 return datetime(year, mon, day, h, m, s)
3140 except ValueError:
3141 pass
3143 # sanitize invalid values
3144 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3145 mon = max(1, min(mon, 12))
3146 day = max(1, min(day, mday[mon]))
3147 h = min(h, 23)
3148 m = min(m, 59)
3149 s = min(s, 59)
3150 return datetime(year, mon, day, h, m, s)
3153 def parse_dos_time(stamp):
3154 """Parse standard 32-bit DOS timestamp.
3156 sec, stamp = stamp & 0x1F, stamp >> 5
3157 mn, stamp = stamp & 0x3F, stamp >> 6
3158 hr, stamp = stamp & 0x1F, stamp >> 5
3159 day, stamp = stamp & 0x1F, stamp >> 5
3160 mon, stamp = stamp & 0x0F, stamp >> 4
3161 yr = (stamp & 0x7F) + 1980
3162 return (yr, mon, day, hr, mn, sec * 2)
3165 # pylint: disable=arguments-differ,signature-differs
3166 class nsdatetime(datetime):
3167 """Datetime that carries nanoseconds.
3169 Arithmetic not supported, will lose nanoseconds.
3171 .. versionadded:: 4.0
3173 __slots__ = ("nanosecond",)
3174 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3176 def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
3177 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3178 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3179 if mod == 0:
3180 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3181 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3182 self.nanosecond = nanosecond
3183 return self
3185 def isoformat(self, sep="T", timespec="auto"):
3186 """Formats with nanosecond precision by default.
3188 if timespec == "auto":
3189 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3190 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3191 return super().isoformat(sep, timespec)
3193 def astimezone(self, tz=None):
3194 """Convert to new timezone.
3196 tmp = super().astimezone(tz)
3197 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3198 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3200 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3201 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3202 """Return new timestamp with specified fields replaced.
3204 return self.__class__(
3205 self.year if year is None else year,
3206 self.month if month is None else month,
3207 self.day if day is None else day,
3208 self.hour if hour is None else hour,
3209 self.minute if minute is None else minute,
3210 self.second if second is None else second,
3211 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3212 if nanosecond is None else nanosecond),
3213 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3214 fold=self.fold if fold is None else fold)
3216 def __hash__(self):
3217 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3219 def __eq__(self, other):
3220 return super().__eq__(other) and self.nanosecond == (
3221 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3223 def __gt__(self, other):
3224 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3225 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3227 def __lt__(self, other):
3228 return not (self > other or self == other)
3230 def __ge__(self, other):
3231 return not self < other
3233 def __le__(self, other):
3234 return not self > other
3236 def __ne__(self, other):
3237 return not self == other
3240 def to_nsdatetime(dt, nsec):
3241 """Apply nanoseconds to datetime.
3243 if not nsec:
3244 return dt
3245 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3246 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3249 def to_nsecs(dt):
3250 """Convert datatime instance to nanoseconds.
3252 secs = int(dt.timestamp())
3253 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3254 return secs * 1000000000 + nsecs
3257 def custom_popen(cmd):
3258 """Disconnect cmd from parent fds, read only from stdout.
3260 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3261 try:
3262 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3263 creationflags=creationflags)
3264 except OSError as ex:
3265 if ex.errno == errno.ENOENT:
3266 raise RarCannotExec("Unrar not installed?") from None
3267 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3268 raise RarCannotExec("Cannot execute unrar") from None
3269 raise
3270 return p
3273 def check_returncode(code, out, errmap):
3274 """Raise exception according to unrar exit code.
3276 if code == 0:
3277 return
3279 if code > 0 and code < len(errmap):
3280 exc = errmap[code]
3281 elif code == 255:
3282 exc = RarUserBreak
3283 elif code < 0:
3284 exc = RarSignalExit
3285 else:
3286 exc = RarUnknownError
3288 # format message
3289 if out:
3290 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3291 else:
3292 msg = "%s [%d]" % (exc.__doc__, code)
3294 raise exc(msg)
3297 def membuf_tempfile(memfile):
3298 """Write in-memory file object to real file.
3300 memfile.seek(0, 0)
3302 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3303 tmpf = os.fdopen(tmpfd, "wb")
3305 try:
3306 shutil.copyfileobj(memfile, tmpf, BSIZE)
3307 tmpf.close()
3308 except BaseException:
3309 tmpf.close()
3310 os.unlink(tmpname)
3311 raise
3312 return tmpname
3316 # Find working command-line tool
3319 class ToolSetup:
3320 def __init__(self, setup):
3321 self.setup = setup
3323 def check(self):
3324 cmdline = self.get_cmdline("check_cmd", None)
3325 try:
3326 p = custom_popen(cmdline)
3327 out, _ = p.communicate()
3328 return p.returncode == 0
3329 except RarCannotExec:
3330 return False
3332 def open_cmdline(self, pwd, rarfn, filefn=None):
3333 cmdline = self.get_cmdline("open_cmd", pwd)
3334 cmdline.append(rarfn)
3335 if filefn:
3336 self.add_file_arg(cmdline, filefn)
3337 return cmdline
3339 def get_errmap(self):
3340 return self.setup["errmap"]
3342 def get_cmdline(self, key, pwd, nodash=False):
3343 cmdline = list(self.setup[key])
3344 cmdline[0] = globals()[cmdline[0]]
3345 if key == "check_cmd":
3346 return cmdline
3347 self.add_password_arg(cmdline, pwd)
3348 if not nodash:
3349 cmdline.append("--")
3350 return cmdline
3352 def add_file_arg(self, cmdline, filename):
3353 cmdline.append(filename)
3355 def add_password_arg(self, cmdline, pwd):
3356 """Append password switch to commandline.
3358 if pwd is not None:
3359 if not isinstance(pwd, str):
3360 pwd = pwd.decode("utf8")
3361 args = self.setup["password"]
3362 if args is None:
3363 tool = self.setup["open_cmd"][0]
3364 raise RarCannotExec(f"{tool} does not support passwords")
3365 elif isinstance(args, str):
3366 cmdline.append(args + pwd)
3367 else:
3368 cmdline.extend(args)
3369 cmdline.append(pwd)
3370 else:
3371 cmdline.extend(self.setup["no_password"])
3374 UNRAR_CONFIG = {
3375 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3376 "check_cmd": ("UNRAR_TOOL", "-inul"),
3377 "password": "-p",
3378 "no_password": ("-p-",),
3379 # map return code to exception class, codes from rar.txt
3380 "errmap": [None,
3381 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3382 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3383 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3386 # Problems with unar RAR backend:
3387 # - Does not support RAR2 locked files [fails to read]
3388 # - Does not support RAR5 Blake2sp hash [reading works]
3389 UNAR_CONFIG = {
3390 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3391 "check_cmd": ("UNAR_TOOL", "-version"),
3392 "password": ("-p",),
3393 "no_password": ("-p", ""),
3394 "errmap": [None],
3397 # Problems with libarchive RAR backend:
3398 # - Does not support solid archives.
3399 # - Does not support password-protected archives.
3400 # - Does not support RARVM-based compression filters.
3401 BSDTAR_CONFIG = {
3402 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3403 "check_cmd": ("BSDTAR_TOOL", "--version"),
3404 "password": None,
3405 "no_password": (),
3406 "errmap": [None],
3409 SEVENZIP_CONFIG = {
3410 "open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"),
3411 "check_cmd": ("SEVENZIP_TOOL", "i"),
3412 "password": "-p",
3413 "no_password": ("-p",),
3414 "errmap": [None,
3415 RarWarning, RarFatalError, None, None, # 1..4
3416 None, None, RarUserError, RarMemoryError] # 5..8
3419 SEVENZIP2_CONFIG = {
3420 "open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"),
3421 "check_cmd": ("SEVENZIP2_TOOL", "i"),
3422 "password": "-p",
3423 "no_password": ("-p",),
3424 "errmap": [None,
3425 RarWarning, RarFatalError, None, None, # 1..4
3426 None, None, RarUserError, RarMemoryError] # 5..8
3429 CURRENT_SETUP = None
3432 def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False):
3433 """Pick a tool, return cached ToolSetup.
3435 global CURRENT_SETUP
3436 if force:
3437 CURRENT_SETUP = None
3438 if CURRENT_SETUP is not None:
3439 return CURRENT_SETUP
3440 lst = []
3441 if unrar:
3442 lst.append(UNRAR_CONFIG)
3443 if unar:
3444 lst.append(UNAR_CONFIG)
3445 if sevenzip:
3446 lst.append(SEVENZIP_CONFIG)
3447 if sevenzip2:
3448 lst.append(SEVENZIP2_CONFIG)
3449 if bsdtar:
3450 lst.append(BSDTAR_CONFIG)
3452 for conf in lst:
3453 setup = ToolSetup(conf)
3454 if setup.check():
3455 CURRENT_SETUP = setup
3456 break
3457 if CURRENT_SETUP is None:
3458 raise RarCannotExec("Cannot find working tool")
3459 return CURRENT_SETUP
3462 def main(args):
3463 """Minimal command-line interface for rarfile module.
3465 import argparse
3466 p = argparse.ArgumentParser(description=main.__doc__)
3467 g = p.add_mutually_exclusive_group(required=True)
3468 g.add_argument("-l", "--list", metavar="<rarfile>",
3469 help="Show archive listing")
3470 g.add_argument("-e", "--extract", nargs=2,
3471 metavar=("<rarfile>", "<output_dir>"),
3472 help="Extract archive into target dir")
3473 g.add_argument("-t", "--test", metavar="<rarfile>",
3474 help="Test if a archive is valid")
3475 cmd = p.parse_args(args)
3477 if cmd.list:
3478 with RarFile(cmd.list) as rf:
3479 rf.printdir()
3480 elif cmd.test:
3481 with RarFile(cmd.test) as rf:
3482 rf.testrar()
3483 elif cmd.extract:
3484 with RarFile(cmd.extract[0]) as rf:
3485 rf.extractall(cmd.extract[1])
3488 if __name__ == "__main__":
3489 main(sys.argv[1:])