doc: add breaking change note to v3.2 release notes
[rarfile.git] / rarfile.py
blob37c6499241d5505bee4d2ac6d7704bc9f1f17ea0
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1, sha256
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 AES = None
70 # only needed for encrypted headers
71 try:
72 try:
73 from cryptography.hazmat.backends import default_backend
74 from cryptography.hazmat.primitives.ciphers import (
75 Cipher, algorithms, modes,
77 _have_crypto = 1
78 except ImportError:
79 from Crypto.Cipher import AES
80 _have_crypto = 2
81 except ImportError:
82 _have_crypto = 0
85 class AES_CBC_Decrypt:
86 """Decrypt API"""
87 def __init__(self, key, iv):
88 if _have_crypto == 2:
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
90 else:
91 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
92 self.decrypt = ciph.decryptor().update
95 __version__ = "4.1"
97 # export only interesting items
98 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
101 ## Module configuration. Can be tuned after importing.
104 #: executable for unrar tool
105 UNRAR_TOOL = "unrar"
107 #: executable for unar tool
108 UNAR_TOOL = "unar"
110 #: executable for bsdtar tool
111 BSDTAR_TOOL = "bsdtar"
113 #: executable for p7zip/7z tool
114 SEVENZIP_TOOL = "7z"
116 #: executable for alternative 7z tool
117 SEVENZIP2_TOOL = "7zz"
119 #: default fallback charset
120 DEFAULT_CHARSET = "windows-1252"
122 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
123 TRY_ENCODINGS = ("utf8", "utf-16le")
125 #: whether to speed up decompression by using tmp archive
126 USE_EXTRACT_HACK = 1
128 #: limit the filesize for tmp archive usage
129 HACK_SIZE_LIMIT = 20 * 1024 * 1024
131 #: set specific directory for mkstemp() used by hack dir usage
132 HACK_TMP_DIR = None
134 #: Separator for path name components. Always "/".
135 PATH_SEP = "/"
138 ## rar constants
141 # block types
142 RAR_BLOCK_MARK = 0x72 # r
143 RAR_BLOCK_MAIN = 0x73 # s
144 RAR_BLOCK_FILE = 0x74 # t
145 RAR_BLOCK_OLD_COMMENT = 0x75 # u
146 RAR_BLOCK_OLD_EXTRA = 0x76 # v
147 RAR_BLOCK_OLD_SUB = 0x77 # w
148 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
149 RAR_BLOCK_OLD_AUTH = 0x79 # y
150 RAR_BLOCK_SUB = 0x7a # z
151 RAR_BLOCK_ENDARC = 0x7b # {
153 # flags for RAR_BLOCK_MAIN
154 RAR_MAIN_VOLUME = 0x0001
155 RAR_MAIN_COMMENT = 0x0002
156 RAR_MAIN_LOCK = 0x0004
157 RAR_MAIN_SOLID = 0x0008
158 RAR_MAIN_NEWNUMBERING = 0x0010
159 RAR_MAIN_AUTH = 0x0020
160 RAR_MAIN_RECOVERY = 0x0040
161 RAR_MAIN_PASSWORD = 0x0080
162 RAR_MAIN_FIRSTVOLUME = 0x0100
163 RAR_MAIN_ENCRYPTVER = 0x0200
165 # flags for RAR_BLOCK_FILE
166 RAR_FILE_SPLIT_BEFORE = 0x0001
167 RAR_FILE_SPLIT_AFTER = 0x0002
168 RAR_FILE_PASSWORD = 0x0004
169 RAR_FILE_COMMENT = 0x0008
170 RAR_FILE_SOLID = 0x0010
171 RAR_FILE_DICTMASK = 0x00e0
172 RAR_FILE_DICT64 = 0x0000
173 RAR_FILE_DICT128 = 0x0020
174 RAR_FILE_DICT256 = 0x0040
175 RAR_FILE_DICT512 = 0x0060
176 RAR_FILE_DICT1024 = 0x0080
177 RAR_FILE_DICT2048 = 0x00a0
178 RAR_FILE_DICT4096 = 0x00c0
179 RAR_FILE_DIRECTORY = 0x00e0
180 RAR_FILE_LARGE = 0x0100
181 RAR_FILE_UNICODE = 0x0200
182 RAR_FILE_SALT = 0x0400
183 RAR_FILE_VERSION = 0x0800
184 RAR_FILE_EXTTIME = 0x1000
185 RAR_FILE_EXTFLAGS = 0x2000
187 # flags for RAR_BLOCK_ENDARC
188 RAR_ENDARC_NEXT_VOLUME = 0x0001
189 RAR_ENDARC_DATACRC = 0x0002
190 RAR_ENDARC_REVSPACE = 0x0004
191 RAR_ENDARC_VOLNR = 0x0008
193 # flags common to all blocks
194 RAR_SKIP_IF_UNKNOWN = 0x4000
195 RAR_LONG_BLOCK = 0x8000
197 # Host OS types
198 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
199 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
200 RAR_OS_WIN32 = 2 #: Windows
201 RAR_OS_UNIX = 3 #: UNIX
202 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
203 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
205 # Compression methods - "0".."5"
206 RAR_M0 = 0x30 #: No compression.
207 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
208 RAR_M2 = 0x32 #: Compression level `-m2`.
209 RAR_M3 = 0x33 #: Compression level `-m3`.
210 RAR_M4 = 0x34 #: Compression level `-m4`.
211 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
214 # RAR5 constants
217 RAR5_BLOCK_MAIN = 1
218 RAR5_BLOCK_FILE = 2
219 RAR5_BLOCK_SERVICE = 3
220 RAR5_BLOCK_ENCRYPTION = 4
221 RAR5_BLOCK_ENDARC = 5
223 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
224 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
225 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
226 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
227 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
228 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
229 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
231 RAR5_MAIN_FLAG_ISVOL = 0x01
232 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
233 RAR5_MAIN_FLAG_SOLID = 0x04
234 RAR5_MAIN_FLAG_RECOVERY = 0x08
235 RAR5_MAIN_FLAG_LOCKED = 0x10
237 RAR5_FILE_FLAG_ISDIR = 0x01
238 RAR5_FILE_FLAG_HAS_MTIME = 0x02
239 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
240 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
242 RAR5_COMPR_SOLID = 0x40
244 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
246 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
248 RAR5_XFILE_ENCRYPTION = 1
249 RAR5_XFILE_HASH = 2
250 RAR5_XFILE_TIME = 3
251 RAR5_XFILE_VERSION = 4
252 RAR5_XFILE_REDIR = 5
253 RAR5_XFILE_OWNER = 6
254 RAR5_XFILE_SERVICE = 7
256 RAR5_XTIME_UNIXTIME = 0x01
257 RAR5_XTIME_HAS_MTIME = 0x02
258 RAR5_XTIME_HAS_CTIME = 0x04
259 RAR5_XTIME_HAS_ATIME = 0x08
260 RAR5_XTIME_UNIXTIME_NS = 0x10
262 RAR5_XENC_CIPHER_AES256 = 0
264 RAR5_XENC_CHECKVAL = 0x01
265 RAR5_XENC_TWEAKED = 0x02
267 RAR5_XHASH_BLAKE2SP = 0
269 RAR5_XREDIR_UNIX_SYMLINK = 1
270 RAR5_XREDIR_WINDOWS_SYMLINK = 2
271 RAR5_XREDIR_WINDOWS_JUNCTION = 3
272 RAR5_XREDIR_HARD_LINK = 4
273 RAR5_XREDIR_FILE_COPY = 5
275 RAR5_XREDIR_ISDIR = 0x01
277 RAR5_XOWNER_UNAME = 0x01
278 RAR5_XOWNER_GNAME = 0x02
279 RAR5_XOWNER_UID = 0x04
280 RAR5_XOWNER_GID = 0x08
282 RAR5_OS_WINDOWS = 0
283 RAR5_OS_UNIX = 1
285 DOS_MODE_ARCHIVE = 0x20
286 DOS_MODE_DIR = 0x10
287 DOS_MODE_SYSTEM = 0x04
288 DOS_MODE_HIDDEN = 0x02
289 DOS_MODE_READONLY = 0x01
291 RAR5_PW_CHECK_SIZE = 8
292 RAR5_PW_SUM_SIZE = 4
295 ## internal constants
298 RAR_ID = b"Rar!\x1a\x07\x00"
299 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
301 WIN32 = sys.platform == "win32"
302 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
304 SFX_MAX_SIZE = 2 * 1024 * 1024
305 RAR_V3 = 3
306 RAR_V5 = 5
308 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
309 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
310 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
312 FORCE_TOOL = False
315 def _find_sfx_header(xfile):
316 sig = RAR_ID[:-1]
317 buf = io.BytesIO()
318 steps = (64, SFX_MAX_SIZE)
320 with XFile(xfile) as fd:
321 for step in steps:
322 data = fd.read(step)
323 if not data:
324 break
325 buf.write(data)
326 curdata = buf.getvalue()
327 findpos = 0
328 while True:
329 pos = curdata.find(sig, findpos)
330 if pos < 0:
331 break
332 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
333 return RAR_V3, pos
334 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
335 return RAR_V5, pos
336 findpos = pos + len(sig)
337 return 0, 0
341 ## Public interface
345 def get_rar_version(xfile):
346 """Check quickly whether file is rar archive.
348 with XFile(xfile) as fd:
349 buf = fd.read(len(RAR5_ID))
350 if buf.startswith(RAR_ID):
351 return RAR_V3
352 elif buf.startswith(RAR5_ID):
353 return RAR_V5
354 return 0
357 def is_rarfile(xfile):
358 """Check quickly whether file is rar archive.
360 try:
361 return get_rar_version(xfile) > 0
362 except OSError:
363 # File not found or not accessible, ignore
364 return False
367 def is_rarfile_sfx(xfile):
368 """Check whether file is rar archive with support for SFX.
370 It will read 2M from file.
372 return _find_sfx_header(xfile)[0] > 0
375 class Error(Exception):
376 """Base class for rarfile errors."""
379 class BadRarFile(Error):
380 """Incorrect data in archive."""
383 class NotRarFile(Error):
384 """The file is not RAR archive."""
387 class BadRarName(Error):
388 """Cannot guess multipart name components."""
391 class NoRarEntry(Error):
392 """File not found in RAR"""
395 class PasswordRequired(Error):
396 """File requires password"""
399 class NeedFirstVolume(Error):
400 """Need to start from first volume.
402 Attributes:
404 current_volume
405 Volume number of current file or None if not known
407 def __init__(self, msg, volume):
408 super().__init__(msg)
409 self.current_volume = volume
412 class NoCrypto(Error):
413 """Cannot parse encrypted headers - no crypto available."""
416 class RarExecError(Error):
417 """Problem reported by unrar/rar."""
420 class RarWarning(RarExecError):
421 """Non-fatal error"""
424 class RarFatalError(RarExecError):
425 """Fatal error"""
428 class RarCRCError(RarExecError):
429 """CRC error during unpacking"""
432 class RarLockedArchiveError(RarExecError):
433 """Must not modify locked archive"""
436 class RarWriteError(RarExecError):
437 """Write error"""
440 class RarOpenError(RarExecError):
441 """Open error"""
444 class RarUserError(RarExecError):
445 """User error"""
448 class RarMemoryError(RarExecError):
449 """Memory error"""
452 class RarCreateError(RarExecError):
453 """Create error"""
456 class RarNoFilesError(RarExecError):
457 """No files that match pattern were found"""
460 class RarUserBreak(RarExecError):
461 """User stop"""
464 class RarWrongPassword(RarExecError):
465 """Incorrect password"""
468 class RarUnknownError(RarExecError):
469 """Unknown exit code"""
472 class RarSignalExit(RarExecError):
473 """Unrar exited with signal"""
476 class RarCannotExec(RarExecError):
477 """Executable not found."""
480 class UnsupportedWarning(UserWarning):
481 """Archive uses feature that are unsupported by rarfile.
483 .. versionadded:: 4.0
487 class RarInfo:
488 r"""An entry in rar archive.
490 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
491 with UTC timezone in RAR5 archives.
493 Attributes:
495 filename
496 File name with relative path.
497 Path separator is "/". Always unicode string.
499 date_time
500 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
501 RAR5 allows archives where it is missing, it's None then.
503 comment
504 Optional file comment field. Unicode string. (RAR3-only)
506 file_size
507 Uncompressed size.
509 compress_size
510 Compressed size.
512 compress_type
513 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
515 extract_version
516 Minimal Rar version needed for decompressing. As (major*10 + minor),
517 so 2.9 is 29.
519 RAR3: 10, 20, 29
521 RAR5 does not have such field in archive, it's simply set to 50.
523 host_os
524 Host OS type, one of RAR_OS_* constants.
526 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
527 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
529 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
531 mode
532 File attributes. May be either dos-style or unix-style, depending on host_os.
534 mtime
535 File modification time. Same value as :attr:`date_time`
536 but as :class:`~datetime.datetime` object with extended precision.
538 ctime
539 Optional time field: creation time. As :class:`~datetime.datetime` object.
541 atime
542 Optional time field: last access time. As :class:`~datetime.datetime` object.
544 arctime
545 Optional time field: archival time. As :class:`~datetime.datetime` object.
546 (RAR3-only)
549 CRC-32 of uncompressed file, unsigned int.
551 RAR5: may be None.
553 blake2sp_hash
554 Blake2SP hash over decompressed data. (RAR5-only)
556 volume
557 Volume nr, starting from 0.
559 volume_file
560 Volume file name, where file starts.
562 file_redir
563 If not None, file is link of some sort. Contains tuple of (type, flags, target).
564 (RAR5-only)
566 Type is one of constants:
568 :data:`RAR5_XREDIR_UNIX_SYMLINK`
569 Unix symlink.
570 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
571 Windows symlink.
572 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
573 Windows junction.
574 :data:`RAR5_XREDIR_HARD_LINK`
575 Hard link to target.
576 :data:`RAR5_XREDIR_FILE_COPY`
577 Current file is copy of another archive entry.
579 Flags may contain bits:
581 :data:`RAR5_XREDIR_ISDIR`
582 Symlink points to directory.
585 # zipfile-compatible fields
586 filename = None
587 file_size = None
588 compress_size = None
589 date_time = None
590 CRC = None
591 volume = None
592 orig_filename = None
594 # optional extended time fields, datetime() objects.
595 mtime = None
596 ctime = None
597 atime = None
599 extract_version = None
600 mode = None
601 host_os = None
602 compress_type = None
604 # rar3-only fields
605 comment = None
606 arctime = None
608 # rar5-only fields
609 blake2sp_hash = None
610 file_redir = None
612 # internal fields
613 flags = 0
614 type = None
616 # zipfile compat
617 def is_dir(self):
618 """Returns True if entry is a directory.
620 .. versionadded:: 4.0
622 return False
624 def is_symlink(self):
625 """Returns True if entry is a symlink.
627 .. versionadded:: 4.0
629 return False
631 def is_file(self):
632 """Returns True if entry is a normal file.
634 .. versionadded:: 4.0
636 return False
638 def needs_password(self):
639 """Returns True if data is stored password-protected.
641 if self.type == RAR_BLOCK_FILE:
642 return (self.flags & RAR_FILE_PASSWORD) > 0
643 return False
645 def isdir(self):
646 """Returns True if entry is a directory.
648 .. deprecated:: 4.0
650 return self.is_dir()
653 class RarFile:
654 """Parse RAR structure, provide access to files in archive.
656 Parameters:
658 file
659 archive file name or file-like object.
660 mode
661 only "r" is supported.
662 charset
663 fallback charset to use, if filenames are not already Unicode-enabled.
664 info_callback
665 debug callback, gets to see all archive entries.
666 crc_check
667 set to False to disable CRC checks
668 errors
669 Either "stop" to quietly stop parsing on errors,
670 or "strict" to raise errors. Default is "stop".
671 part_only
672 If True, read only single file and allow it to be middle-part
673 of multi-volume archive.
675 .. versionadded:: 4.0
678 #: File name, if available. Unicode string or None.
679 filename = None
681 #: Archive comment. Unicode string or None.
682 comment = None
684 def __init__(self, file, mode="r", charset=None, info_callback=None,
685 crc_check=True, errors="stop", part_only=False):
686 if is_filelike(file):
687 self.filename = getattr(file, "name", None)
688 else:
689 if isinstance(file, Path):
690 file = str(file)
691 self.filename = file
692 self._rarfile = file
694 self._charset = charset or DEFAULT_CHARSET
695 self._info_callback = info_callback
696 self._crc_check = crc_check
697 self._part_only = part_only
698 self._password = None
699 self._file_parser = None
701 if errors == "stop":
702 self._strict = False
703 elif errors == "strict":
704 self._strict = True
705 else:
706 raise ValueError("Invalid value for errors= parameter.")
708 if mode != "r":
709 raise NotImplementedError("RarFile supports only mode=r")
711 self._parse()
713 def __enter__(self):
714 """Open context."""
715 return self
717 def __exit__(self, typ, value, traceback):
718 """Exit context."""
719 self.close()
721 def __iter__(self):
722 """Iterate over members."""
723 return iter(self.infolist())
725 def setpassword(self, pwd):
726 """Sets the password to use when extracting.
728 self._password = pwd
729 if self._file_parser:
730 if self._file_parser.has_header_encryption():
731 self._file_parser = None
732 if not self._file_parser:
733 self._parse()
734 else:
735 self._file_parser.setpassword(self._password)
737 def needs_password(self):
738 """Returns True if any archive entries require password for extraction.
740 return self._file_parser.needs_password()
742 def namelist(self):
743 """Return list of filenames in archive.
745 return [f.filename for f in self.infolist()]
747 def infolist(self):
748 """Return RarInfo objects for all files/directories in archive.
750 return self._file_parser.infolist()
752 def volumelist(self):
753 """Returns filenames of archive volumes.
755 In case of single-volume archive, the list contains
756 just the name of main archive file.
758 return self._file_parser.volumelist()
760 def getinfo(self, name):
761 """Return RarInfo for file.
763 return self._file_parser.getinfo(name)
765 def getinfo_orig(self, name):
766 """Return RarInfo for file source.
768 RAR5: if name is hard-linked or copied file,
769 returns original entry with original filename.
771 .. versionadded:: 4.1
773 return self._file_parser.getinfo_orig(name)
775 def open(self, name, mode="r", pwd=None):
776 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
778 The object implements :class:`io.RawIOBase` interface, so it can
779 be further wrapped with :class:`io.BufferedReader`
780 and :class:`io.TextIOWrapper`.
782 On older Python where io module is not available, it implements
783 only .read(), .seek(), .tell() and .close() methods.
785 The object is seekable, although the seeking is fast only on
786 uncompressed files, on compressed files the seeking is implemented
787 by reading ahead and/or restarting the decompression.
789 Parameters:
791 name
792 file name or RarInfo instance.
793 mode
794 must be "r"
796 password to use for extracting.
799 if mode != "r":
800 raise NotImplementedError("RarFile.open() supports only mode=r")
802 # entry lookup
803 inf = self.getinfo(name)
804 if inf.is_dir():
805 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
807 # check password
808 if inf.needs_password():
809 pwd = pwd or self._password
810 if pwd is None:
811 raise PasswordRequired("File %s requires password" % inf.filename)
812 else:
813 pwd = None
815 return self._file_parser.open(inf, pwd)
817 def read(self, name, pwd=None):
818 """Return uncompressed data for archive entry.
820 For longer files using :meth:`~RarFile.open` may be better idea.
822 Parameters:
824 name
825 filename or RarInfo instance
827 password to use for extracting.
830 with self.open(name, "r", pwd) as f:
831 return f.read()
833 def close(self):
834 """Release open resources."""
835 pass
837 def printdir(self, file=None):
838 """Print archive file list to stdout or given file.
840 if file is None:
841 file = sys.stdout
842 for f in self.infolist():
843 print(f.filename, file=file)
845 def extract(self, member, path=None, pwd=None):
846 """Extract single file into current directory.
848 Parameters:
850 member
851 filename or :class:`RarInfo` instance
852 path
853 optional destination path
855 optional password to use
857 inf = self.getinfo(member)
858 return self._extract_one(inf, path, pwd, True)
860 def extractall(self, path=None, members=None, pwd=None):
861 """Extract all files into current directory.
863 Parameters:
865 path
866 optional destination path
867 members
868 optional filename or :class:`RarInfo` instance list to extract
870 optional password to use
872 if members is None:
873 members = self.namelist()
875 done = set()
876 dirs = []
877 for m in members:
878 inf = self.getinfo(m)
879 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
880 if inf.is_dir():
881 if dst not in done:
882 dirs.append((dst, inf))
883 done.add(dst)
884 if dirs:
885 dirs.sort(reverse=True)
886 for dst, inf in dirs:
887 self._set_attrs(inf, dst)
889 def testrar(self, pwd=None):
890 """Read all files and test CRC.
892 for member in self.infolist():
893 if member.is_file():
894 with self.open(member, 'r', pwd) as f:
895 empty_read(f, member.file_size, BSIZE)
897 def strerror(self):
898 """Return error string if parsing failed or None if no problems.
900 if not self._file_parser:
901 return "Not a RAR file"
902 return self._file_parser.strerror()
905 ## private methods
908 def _parse(self):
909 """Run parser for file type
911 ver, sfx_ofs = _find_sfx_header(self._rarfile)
912 if ver == RAR_V3:
913 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
914 self._charset, self._strict, self._info_callback,
915 sfx_ofs, self._part_only)
916 self._file_parser = p3 # noqa
917 elif ver == RAR_V5:
918 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
919 self._charset, self._strict, self._info_callback,
920 sfx_ofs, self._part_only)
921 self._file_parser = p5 # noqa
922 else:
923 raise NotRarFile("Not a RAR file")
925 self._file_parser.parse()
926 self.comment = self._file_parser.comment
928 def _extract_one(self, info, path, pwd, set_attrs):
929 fname = sanitize_filename(
930 info.filename, os.path.sep, WIN32
933 if path is None:
934 path = os.getcwd()
935 else:
936 path = os.fspath(path)
937 dstfn = os.path.join(path, fname)
939 dirname = os.path.dirname(dstfn)
940 if dirname and dirname != ".":
941 os.makedirs(dirname, exist_ok=True)
943 if info.is_file():
944 return self._make_file(info, dstfn, pwd, set_attrs)
945 if info.is_dir():
946 return self._make_dir(info, dstfn, pwd, set_attrs)
947 if info.is_symlink():
948 return self._make_symlink(info, dstfn, pwd, set_attrs)
949 return None
951 def _create_helper(self, name, flags, info):
952 return os.open(name, flags)
954 def _make_file(self, info, dstfn, pwd, set_attrs):
955 def helper(name, flags):
956 return self._create_helper(name, flags, info)
957 with self.open(info, "r", pwd) as src:
958 with open(dstfn, "wb", opener=helper) as dst:
959 shutil.copyfileobj(src, dst)
960 if set_attrs:
961 self._set_attrs(info, dstfn)
962 return dstfn
964 def _make_dir(self, info, dstfn, pwd, set_attrs):
965 os.makedirs(dstfn, exist_ok=True)
966 if set_attrs:
967 self._set_attrs(info, dstfn)
968 return dstfn
970 def _make_symlink(self, info, dstfn, pwd, set_attrs):
971 target_is_directory = False
972 if info.host_os == RAR_OS_UNIX:
973 link_name = self.read(info, pwd)
974 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
975 elif info.file_redir:
976 redir_type, redir_flags, link_name = info.file_redir
977 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
978 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
979 return None
980 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
981 else:
982 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
983 return None
985 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
986 return dstfn
988 def _set_attrs(self, info, dstfn):
989 if info.host_os == RAR_OS_UNIX:
990 os.chmod(dstfn, info.mode & 0o777)
991 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
992 # only keep R/O attr, except for dirs on win32
993 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
994 st = os.stat(dstfn)
995 new_mode = st.st_mode & ~0o222
996 os.chmod(dstfn, new_mode)
998 if info.mtime:
999 mtime_ns = to_nsecs(info.mtime)
1000 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
1001 os.utime(dstfn, ns=(atime_ns, mtime_ns))
1005 # File format parsing
1008 class CommonParser:
1009 """Shared parser parts."""
1010 _main = None
1011 _hdrenc_main = None
1012 _needs_password = False
1013 _fd = None
1014 _expect_sig = None
1015 _parse_error = None
1016 _password = None
1017 comment = None
1019 def __init__(self, rarfile, password, crc_check, charset, strict,
1020 info_cb, sfx_offset, part_only):
1021 self._rarfile = rarfile
1022 self._password = password
1023 self._crc_check = crc_check
1024 self._charset = charset
1025 self._strict = strict
1026 self._info_callback = info_cb
1027 self._info_list = []
1028 self._info_map = {}
1029 self._vol_list = []
1030 self._sfx_offset = sfx_offset
1031 self._part_only = part_only
1033 def has_header_encryption(self):
1034 """Returns True if headers are encrypted
1036 if self._hdrenc_main:
1037 return True
1038 if self._main:
1039 if self._main.flags & RAR_MAIN_PASSWORD:
1040 return True
1041 return False
1043 def setpassword(self, pwd):
1044 """Set cached password."""
1045 self._password = pwd
1047 def volumelist(self):
1048 """Volume files"""
1049 return self._vol_list
1051 def needs_password(self):
1052 """Is password required"""
1053 return self._needs_password
1055 def strerror(self):
1056 """Last error"""
1057 return self._parse_error
1059 def infolist(self):
1060 """List of RarInfo records.
1062 return self._info_list
1064 def getinfo(self, member):
1065 """Return RarInfo for filename
1067 if isinstance(member, RarInfo):
1068 fname = member.filename
1069 elif isinstance(member, Path):
1070 fname = str(member)
1071 else:
1072 fname = member
1074 if fname.endswith("/"):
1075 fname = fname.rstrip("/")
1077 try:
1078 return self._info_map[fname]
1079 except KeyError:
1080 raise NoRarEntry("No such file: %s" % fname) from None
1082 def getinfo_orig(self, member):
1083 inf = self.getinfo(member)
1084 if inf.file_redir:
1085 redir_type, redir_flags, redir_name = inf.file_redir
1086 # cannot leave to unrar as it expects copied file to exist
1087 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1088 inf = self.getinfo(redir_name)
1089 return inf
1091 def parse(self):
1092 """Process file."""
1093 self._fd = None
1094 try:
1095 self._parse_real()
1096 finally:
1097 if self._fd:
1098 self._fd.close()
1099 self._fd = None
1101 def _parse_real(self):
1102 """Actually read file.
1104 fd = XFile(self._rarfile)
1105 self._fd = fd
1106 fd.seek(self._sfx_offset, 0)
1107 sig = fd.read(len(self._expect_sig))
1108 if sig != self._expect_sig:
1109 raise NotRarFile("Not a Rar archive")
1111 volume = 0 # first vol (.rar) is 0
1112 more_vols = False
1113 endarc = False
1114 volfile = self._rarfile
1115 self._vol_list = [self._rarfile]
1116 raise_need_first_vol = False
1117 while True:
1118 if endarc:
1119 h = None # don"t read past ENDARC
1120 else:
1121 h = self._parse_header(fd)
1122 if not h:
1123 if raise_need_first_vol:
1124 # did not find ENDARC with VOLNR
1125 raise NeedFirstVolume("Need to start from first volume", None)
1126 if more_vols and not self._part_only:
1127 volume += 1
1128 fd.close()
1129 try:
1130 volfile = self._next_volname(volfile)
1131 fd = XFile(volfile)
1132 except IOError:
1133 self._set_error("Cannot open next volume: %s", volfile)
1134 break
1135 self._fd = fd
1136 sig = fd.read(len(self._expect_sig))
1137 if sig != self._expect_sig:
1138 self._set_error("Invalid volume sig: %s", volfile)
1139 break
1140 more_vols = False
1141 endarc = False
1142 self._vol_list.append(volfile)
1143 self._main = None
1144 self._hdrenc_main = None
1145 continue
1146 break
1147 h.volume = volume
1148 h.volume_file = volfile
1150 if h.type == RAR_BLOCK_MAIN and not self._main:
1151 self._main = h
1152 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
1153 # RAR 2.x does not set FIRSTVOLUME,
1154 # so check it only if NEWNUMBERING is used
1155 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1156 if getattr(h, "main_volume_number", None) is not None:
1157 # rar5 may have more info
1158 raise NeedFirstVolume(
1159 "Need to start from first volume (current: %r)"
1160 % (h.main_volume_number,),
1161 h.main_volume_number
1163 # delay raise until we have volnr from ENDARC
1164 raise_need_first_vol = True
1165 if h.flags & RAR_MAIN_PASSWORD:
1166 self._needs_password = True
1167 if not self._password:
1168 break
1169 elif h.type == RAR_BLOCK_ENDARC:
1170 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1171 endarc = True
1172 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1173 raise NeedFirstVolume(
1174 "Need to start from first volume (current: %r)"
1175 % (h.endarc_volnr,),
1176 h.endarc_volnr
1178 elif h.type == RAR_BLOCK_FILE:
1179 # RAR 2.x does not write RAR_BLOCK_ENDARC
1180 if h.flags & RAR_FILE_SPLIT_AFTER:
1181 more_vols = True
1182 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1183 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1184 if not self._part_only:
1185 raise_need_first_vol = True
1187 if h.needs_password():
1188 self._needs_password = True
1190 # store it
1191 self.process_entry(fd, h)
1193 if self._info_callback:
1194 self._info_callback(h)
1196 # go to next header
1197 if h.add_size > 0:
1198 fd.seek(h.data_offset + h.add_size, 0)
1200 def process_entry(self, fd, item):
1201 """Examine item, add into lookup cache."""
1202 raise NotImplementedError()
1204 def _decrypt_header(self, fd):
1205 raise NotImplementedError("_decrypt_header")
1207 def _parse_block_header(self, fd):
1208 raise NotImplementedError("_parse_block_header")
1210 def _open_hack(self, inf, pwd):
1211 raise NotImplementedError("_open_hack")
1213 def _parse_header(self, fd):
1214 """Read single header
1216 try:
1217 # handle encrypted headers
1218 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1219 if not self._password:
1220 return None
1221 fd = self._decrypt_header(fd)
1223 # now read actual header
1224 return self._parse_block_header(fd)
1225 except struct.error:
1226 self._set_error("Broken header in RAR file")
1227 return None
1229 def _next_volname(self, volfile):
1230 """Given current vol name, construct next one
1232 if is_filelike(volfile):
1233 raise IOError("Working on single FD")
1234 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1235 return _next_newvol(volfile)
1236 return _next_oldvol(volfile)
1238 def _set_error(self, msg, *args):
1239 if args:
1240 msg = msg % args
1241 self._parse_error = msg
1242 if self._strict:
1243 raise BadRarFile(msg)
1245 def open(self, inf, pwd):
1246 """Return stream object for file data."""
1248 if inf.file_redir:
1249 redir_type, redir_flags, redir_name = inf.file_redir
1250 # cannot leave to unrar as it expects copied file to exist
1251 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1252 inf = self.getinfo(redir_name)
1253 if not inf:
1254 raise BadRarFile("cannot find copied file")
1255 elif redir_type in (
1256 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1257 RAR5_XREDIR_WINDOWS_JUNCTION,
1259 return io.BytesIO(redir_name.encode("utf8"))
1260 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1261 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1263 # is temp write usable?
1264 use_hack = 1
1265 if not self._main:
1266 use_hack = 0
1267 elif self._main._must_disable_hack():
1268 use_hack = 0
1269 elif inf._must_disable_hack():
1270 use_hack = 0
1271 elif is_filelike(self._rarfile):
1272 pass
1273 elif inf.file_size > HACK_SIZE_LIMIT:
1274 use_hack = 0
1275 elif not USE_EXTRACT_HACK:
1276 use_hack = 0
1278 # now extract
1279 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1280 return self._open_clear(inf)
1281 elif use_hack:
1282 return self._open_hack(inf, pwd)
1283 elif is_filelike(self._rarfile):
1284 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1285 else:
1286 return self._open_unrar(self._rarfile, inf, pwd)
1288 def _open_clear(self, inf):
1289 if FORCE_TOOL:
1290 return self._open_unrar(self._rarfile, inf)
1291 return DirectReader(self, inf)
1293 def _open_hack_core(self, inf, pwd, prefix, suffix):
1295 size = inf.compress_size + inf.header_size
1296 rf = XFile(inf.volume_file, 0)
1297 rf.seek(inf.header_offset)
1299 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1300 tmpf = os.fdopen(tmpfd, "wb")
1302 try:
1303 tmpf.write(prefix)
1304 while size > 0:
1305 if size > BSIZE:
1306 buf = rf.read(BSIZE)
1307 else:
1308 buf = rf.read(size)
1309 if not buf:
1310 raise BadRarFile("read failed: " + inf.filename)
1311 tmpf.write(buf)
1312 size -= len(buf)
1313 tmpf.write(suffix)
1314 tmpf.close()
1315 rf.close()
1316 except BaseException:
1317 rf.close()
1318 tmpf.close()
1319 os.unlink(tmpname)
1320 raise
1322 return self._open_unrar(tmpname, inf, pwd, tmpname)
1324 def _open_unrar_membuf(self, memfile, inf, pwd):
1325 """Write in-memory archive to temp file, needed for solid archives.
1327 tmpname = membuf_tempfile(memfile)
1328 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1330 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1331 """Extract using unrar
1333 setup = tool_setup()
1335 # not giving filename avoids encoding related problems
1336 fn = None
1337 if not tmpfile or force_file:
1338 fn = inf.filename.replace("/", os.path.sep)
1340 # read from unrar pipe
1341 cmd = setup.open_cmdline(pwd, rarfile, fn)
1342 return PipeReader(self, inf, cmd, tmpfile)
1346 # RAR3 format
1349 class Rar3Info(RarInfo):
1350 """RAR3 specific fields."""
1351 extract_version = 15
1352 salt = None
1353 add_size = 0
1354 header_crc = None
1355 header_size = None
1356 header_offset = None
1357 data_offset = None
1358 _md_class = None
1359 _md_expect = None
1360 _name_size = None
1362 # make sure some rar5 fields are always present
1363 file_redir = None
1364 blake2sp_hash = None
1366 endarc_datacrc = None
1367 endarc_volnr = None
1369 def _must_disable_hack(self):
1370 if self.type == RAR_BLOCK_FILE:
1371 if self.flags & RAR_FILE_PASSWORD:
1372 return True
1373 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1374 return True
1375 elif self.type == RAR_BLOCK_MAIN:
1376 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1377 return True
1378 return False
1380 def is_dir(self):
1381 """Returns True if entry is a directory."""
1382 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1383 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1384 return False
1386 def is_symlink(self):
1387 """Returns True if entry is a symlink."""
1388 return (
1389 self.type == RAR_BLOCK_FILE and
1390 self.host_os == RAR_OS_UNIX and
1391 self.mode & 0xF000 == 0xA000
1394 def is_file(self):
1395 """Returns True if entry is a normal file."""
1396 return (
1397 self.type == RAR_BLOCK_FILE and
1398 not (self.is_dir() or self.is_symlink())
1402 class RAR3Parser(CommonParser):
1403 """Parse RAR3 file format.
1405 _expect_sig = RAR_ID
1406 _last_aes_key = (None, None, None) # (salt, key, iv)
1408 def _decrypt_header(self, fd):
1409 if not _have_crypto:
1410 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1411 salt = fd.read(8)
1412 if self._last_aes_key[0] == salt:
1413 key, iv = self._last_aes_key[1:]
1414 else:
1415 key, iv = rar3_s2k(self._password, salt)
1416 self._last_aes_key = (salt, key, iv)
1417 return HeaderDecrypt(fd, key, iv)
1419 def _parse_block_header(self, fd):
1420 """Parse common block header
1422 h = Rar3Info()
1423 h.header_offset = fd.tell()
1425 # read and parse base header
1426 buf = fd.read(S_BLK_HDR.size)
1427 if not buf:
1428 return None
1429 if len(buf) < S_BLK_HDR.size:
1430 self._set_error("Unexpected EOF when reading header")
1431 return None
1432 t = S_BLK_HDR.unpack_from(buf)
1433 h.header_crc, h.type, h.flags, h.header_size = t
1435 # read full header
1436 if h.header_size > S_BLK_HDR.size:
1437 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1438 else:
1439 hdata = buf
1440 h.data_offset = fd.tell()
1442 # unexpected EOF?
1443 if len(hdata) != h.header_size:
1444 self._set_error("Unexpected EOF when reading header")
1445 return None
1447 pos = S_BLK_HDR.size
1449 # block has data assiciated with it?
1450 if h.flags & RAR_LONG_BLOCK:
1451 h.add_size, pos = load_le32(hdata, pos)
1452 else:
1453 h.add_size = 0
1455 # parse interesting ones, decide header boundaries for crc
1456 if h.type == RAR_BLOCK_MARK:
1457 return h
1458 elif h.type == RAR_BLOCK_MAIN:
1459 pos += 6
1460 if h.flags & RAR_MAIN_ENCRYPTVER:
1461 pos += 1
1462 crc_pos = pos
1463 if h.flags & RAR_MAIN_COMMENT:
1464 self._parse_subblocks(h, hdata, pos)
1465 elif h.type == RAR_BLOCK_FILE:
1466 pos = self._parse_file_header(h, hdata, pos - 4)
1467 crc_pos = pos
1468 if h.flags & RAR_FILE_COMMENT:
1469 pos = self._parse_subblocks(h, hdata, pos)
1470 elif h.type == RAR_BLOCK_SUB:
1471 pos = self._parse_file_header(h, hdata, pos - 4)
1472 crc_pos = h.header_size
1473 elif h.type == RAR_BLOCK_OLD_AUTH:
1474 pos += 8
1475 crc_pos = pos
1476 elif h.type == RAR_BLOCK_OLD_EXTRA:
1477 pos += 7
1478 crc_pos = pos
1479 elif h.type == RAR_BLOCK_ENDARC:
1480 if h.flags & RAR_ENDARC_DATACRC:
1481 h.endarc_datacrc, pos = load_le32(hdata, pos)
1482 if h.flags & RAR_ENDARC_VOLNR:
1483 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1484 pos += 2
1485 crc_pos = h.header_size
1486 else:
1487 crc_pos = h.header_size
1489 # check crc
1490 if h.type == RAR_BLOCK_OLD_SUB:
1491 crcdat = hdata[2:] + fd.read(h.add_size)
1492 else:
1493 crcdat = hdata[2:crc_pos]
1495 calc_crc = crc32(crcdat) & 0xFFFF
1497 # return good header
1498 if h.header_crc == calc_crc:
1499 return h
1501 # header parsing failed.
1502 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1503 h.type, h.header_crc, calc_crc, len(crcdat))
1505 # instead panicing, send eof
1506 return None
1508 def _parse_file_header(self, h, hdata, pos):
1509 """Read file-specific header
1511 fld = S_FILE_HDR.unpack_from(hdata, pos)
1512 pos += S_FILE_HDR.size
1514 h.compress_size = fld[0]
1515 h.file_size = fld[1]
1516 h.host_os = fld[2]
1517 h.CRC = fld[3]
1518 h.date_time = parse_dos_time(fld[4])
1519 h.mtime = to_datetime(h.date_time)
1520 h.extract_version = fld[5]
1521 h.compress_type = fld[6]
1522 h._name_size = name_size = fld[7]
1523 h.mode = fld[8]
1525 h._md_class = CRC32Context
1526 h._md_expect = h.CRC
1528 if h.flags & RAR_FILE_LARGE:
1529 h1, pos = load_le32(hdata, pos)
1530 h2, pos = load_le32(hdata, pos)
1531 h.compress_size |= h1 << 32
1532 h.file_size |= h2 << 32
1533 h.add_size = h.compress_size
1535 name, pos = load_bytes(hdata, name_size, pos)
1536 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1537 # stored in custom encoding
1538 nul = name.find(b"\0")
1539 h.orig_filename = name[:nul]
1540 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1541 h.filename = u.decode()
1543 # if parsing failed fall back to simple name
1544 if u.failed:
1545 h.filename = self._decode(h.orig_filename)
1546 elif h.flags & RAR_FILE_UNICODE:
1547 # stored in UTF8
1548 h.orig_filename = name
1549 h.filename = name.decode("utf8", "replace")
1550 else:
1551 # stored in random encoding
1552 h.orig_filename = name
1553 h.filename = self._decode(name)
1555 # change separator, set dir suffix
1556 h.filename = h.filename.replace("\\", "/").rstrip("/")
1557 if h.is_dir():
1558 h.filename = h.filename + "/"
1560 if h.flags & RAR_FILE_SALT:
1561 h.salt, pos = load_bytes(hdata, 8, pos)
1562 else:
1563 h.salt = None
1565 # optional extended time stamps
1566 if h.flags & RAR_FILE_EXTTIME:
1567 pos = _parse_ext_time(h, hdata, pos)
1568 else:
1569 h.mtime = h.atime = h.ctime = h.arctime = None
1571 return pos
1573 def _parse_subblocks(self, h, hdata, pos):
1574 """Find old-style comment subblock
1576 while pos < len(hdata):
1577 # ordinary block header
1578 t = S_BLK_HDR.unpack_from(hdata, pos)
1579 ___scrc, stype, sflags, slen = t
1580 pos_next = pos + slen
1581 pos += S_BLK_HDR.size
1583 # corrupt header
1584 if pos_next < pos:
1585 break
1587 # followed by block-specific header
1588 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1589 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1590 pos += S_COMMENT_HDR.size
1591 data = hdata[pos: pos_next]
1592 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1593 crc, self._password)
1594 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1595 h.comment = self._decode_comment(cmt)
1597 pos = pos_next
1598 return pos
1600 def _read_comment_v3(self, inf, pwd=None):
1602 # read data
1603 with XFile(inf.volume_file) as rf:
1604 rf.seek(inf.data_offset)
1605 data = rf.read(inf.compress_size)
1607 # decompress
1608 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1609 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1611 # check crc
1612 if self._crc_check:
1613 crc = crc32(cmt)
1614 if crc != inf.CRC:
1615 return None
1617 return self._decode_comment(cmt)
1619 def _decode(self, val):
1620 for c in TRY_ENCODINGS:
1621 try:
1622 return val.decode(c)
1623 except UnicodeError:
1624 pass
1625 return val.decode(self._charset, "replace")
1627 def _decode_comment(self, val):
1628 return self._decode(val)
1630 def process_entry(self, fd, item):
1631 if item.type == RAR_BLOCK_FILE:
1632 # use only first part
1633 if item.flags & RAR_FILE_VERSION:
1634 pass # skip old versions
1635 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1636 self._info_map[item.filename.rstrip("/")] = item
1637 self._info_list.append(item)
1638 elif len(self._info_list) > 0:
1639 # final crc is in last block
1640 old = self._info_list[-1]
1641 old.CRC = item.CRC
1642 old._md_expect = item._md_expect
1643 old.compress_size += item.compress_size
1645 # parse new-style comment
1646 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1647 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1648 pass
1649 elif item.flags & RAR_FILE_SOLID:
1650 # file comment
1651 cmt = self._read_comment_v3(item, self._password)
1652 if len(self._info_list) > 0:
1653 old = self._info_list[-1]
1654 old.comment = cmt
1655 else:
1656 # archive comment
1657 cmt = self._read_comment_v3(item, self._password)
1658 self.comment = cmt
1660 if item.type == RAR_BLOCK_MAIN:
1661 if item.flags & RAR_MAIN_COMMENT:
1662 self.comment = item.comment
1663 if item.flags & RAR_MAIN_PASSWORD:
1664 self._needs_password = True
1666 # put file compressed data into temporary .rar archive, and run
1667 # unrar on that, thus avoiding unrar going over whole archive
1668 def _open_hack(self, inf, pwd):
1669 # create main header: crc, type, flags, size, res1, res2
1670 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1671 return self._open_hack_core(inf, pwd, prefix, b"")
1675 # RAR5 format
1678 class Rar5Info(RarInfo):
1679 """Shared fields for RAR5 records.
1681 extract_version = 50
1682 header_crc = None
1683 header_size = None
1684 header_offset = None
1685 data_offset = None
1687 # type=all
1688 block_type = None
1689 block_flags = None
1690 add_size = 0
1691 block_extra_size = 0
1693 # type=MAIN
1694 volume_number = None
1695 _md_class = None
1696 _md_expect = None
1698 def _must_disable_hack(self):
1699 return False
1702 class Rar5BaseFile(Rar5Info):
1703 """Shared sturct for file & service record.
1705 type = -1
1706 file_flags = None
1707 file_encryption = (0, 0, 0, b"", b"", b"")
1708 file_compress_flags = None
1709 file_redir = None
1710 file_owner = None
1711 file_version = None
1712 blake2sp_hash = None
1714 def _must_disable_hack(self):
1715 if self.flags & RAR_FILE_PASSWORD:
1716 return True
1717 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1718 return True
1719 if self.file_compress_flags & RAR5_COMPR_SOLID:
1720 return True
1721 if self.file_redir:
1722 return True
1723 return False
1726 class Rar5FileInfo(Rar5BaseFile):
1727 """RAR5 file record.
1729 type = RAR_BLOCK_FILE
1731 def is_symlink(self):
1732 """Returns True if entry is a symlink."""
1733 # pylint: disable=unsubscriptable-object
1734 return (
1735 self.file_redir is not None and
1736 self.file_redir[0] in (
1737 RAR5_XREDIR_UNIX_SYMLINK,
1738 RAR5_XREDIR_WINDOWS_SYMLINK,
1739 RAR5_XREDIR_WINDOWS_JUNCTION,
1743 def is_file(self):
1744 """Returns True if entry is a normal file."""
1745 return not (self.is_dir() or self.is_symlink())
1747 def is_dir(self):
1748 """Returns True if entry is a directory."""
1749 if not self.file_redir:
1750 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1751 return True
1752 return False
1755 class Rar5ServiceInfo(Rar5BaseFile):
1756 """RAR5 service record.
1758 type = RAR_BLOCK_SUB
1761 class Rar5MainInfo(Rar5Info):
1762 """RAR5 archive main record.
1764 type = RAR_BLOCK_MAIN
1765 main_flags = None
1766 main_volume_number = None
1768 def _must_disable_hack(self):
1769 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1770 return True
1771 return False
1774 class Rar5EncryptionInfo(Rar5Info):
1775 """RAR5 archive header encryption record.
1777 type = RAR5_BLOCK_ENCRYPTION
1778 encryption_algo = None
1779 encryption_flags = None
1780 encryption_kdf_count = None
1781 encryption_salt = None
1782 encryption_check_value = None
1784 def needs_password(self):
1785 return True
1788 class Rar5EndArcInfo(Rar5Info):
1789 """RAR5 end of archive record.
1791 type = RAR_BLOCK_ENDARC
1792 endarc_flags = None
1795 class RAR5Parser(CommonParser):
1796 """Parse RAR5 format.
1798 _expect_sig = RAR5_ID
1799 _hdrenc_main = None
1801 # AES encrypted headers
1802 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1804 def _get_utf8_password(self):
1805 pwd = self._password
1806 if isinstance(pwd, str):
1807 return pwd.encode("utf8")
1808 return pwd
1810 def _gen_key(self, kdf_count, salt):
1811 if self._last_aes256_key[:2] == (kdf_count, salt):
1812 return self._last_aes256_key[2]
1813 if kdf_count > 24:
1814 raise BadRarFile("Too large kdf_count")
1815 pwd = self._get_utf8_password()
1816 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1817 self._last_aes256_key = (kdf_count, salt, key)
1818 return key
1820 def _decrypt_header(self, fd):
1821 if not _have_crypto:
1822 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1823 h = self._hdrenc_main
1824 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1825 iv = fd.read(16)
1826 return HeaderDecrypt(fd, key, iv)
1828 def _parse_block_header(self, fd):
1829 """Parse common block header
1831 header_offset = fd.tell()
1833 preload = 4 + 1
1834 start_bytes = fd.read(preload)
1835 if len(start_bytes) < preload:
1836 self._set_error("Unexpected EOF when reading header")
1837 return None
1838 while start_bytes[-1] & 0x80:
1839 b = fd.read(1)
1840 if not b:
1841 self._set_error("Unexpected EOF when reading header")
1842 return None
1843 start_bytes += b
1844 header_crc, pos = load_le32(start_bytes, 0)
1845 hdrlen, pos = load_vint(start_bytes, pos)
1846 if hdrlen > 2 * 1024 * 1024:
1847 return None
1848 header_size = pos + hdrlen
1850 # read full header, check for EOF
1851 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1852 if len(hdata) != header_size:
1853 self._set_error("Unexpected EOF when reading header")
1854 return None
1855 data_offset = fd.tell()
1857 calc_crc = crc32(memoryview(hdata)[4:])
1858 if header_crc != calc_crc:
1859 # header parsing failed.
1860 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1861 header_crc, calc_crc, len(hdata))
1862 return None
1864 block_type, pos = load_vint(hdata, pos)
1866 if block_type == RAR5_BLOCK_MAIN:
1867 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1868 h = self._parse_main_block(h, hdata, pos)
1869 elif block_type == RAR5_BLOCK_FILE:
1870 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1871 h = self._parse_file_block(h, hdata, pos)
1872 elif block_type == RAR5_BLOCK_SERVICE:
1873 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1874 h = self._parse_file_block(h, hdata, pos)
1875 elif block_type == RAR5_BLOCK_ENCRYPTION:
1876 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1877 h = self._parse_encryption_block(h, hdata, pos)
1878 elif block_type == RAR5_BLOCK_ENDARC:
1879 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1880 h = self._parse_endarc_block(h, hdata, pos)
1881 else:
1882 h = None
1883 if h:
1884 h.header_offset = header_offset
1885 h.data_offset = data_offset
1886 return h
1888 def _parse_block_common(self, h, hdata):
1889 h.header_crc, pos = load_le32(hdata, 0)
1890 hdrlen, pos = load_vint(hdata, pos)
1891 h.header_size = hdrlen + pos
1892 h.block_type, pos = load_vint(hdata, pos)
1893 h.block_flags, pos = load_vint(hdata, pos)
1895 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1896 h.block_extra_size, pos = load_vint(hdata, pos)
1897 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1898 h.add_size, pos = load_vint(hdata, pos)
1900 h.compress_size = h.add_size
1902 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1903 h.flags |= RAR_SKIP_IF_UNKNOWN
1904 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1905 h.flags |= RAR_LONG_BLOCK
1906 return h, pos
1908 def _parse_main_block(self, h, hdata, pos):
1909 h.main_flags, pos = load_vint(hdata, pos)
1910 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1911 h.main_volume_number, pos = load_vint(hdata, pos)
1913 h.flags |= RAR_MAIN_NEWNUMBERING
1914 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1915 h.flags |= RAR_MAIN_SOLID
1916 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1917 h.flags |= RAR_MAIN_VOLUME
1918 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1919 h.flags |= RAR_MAIN_RECOVERY
1920 if self._hdrenc_main:
1921 h.flags |= RAR_MAIN_PASSWORD
1922 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1923 h.flags |= RAR_MAIN_FIRSTVOLUME
1925 return h
1927 def _parse_file_block(self, h, hdata, pos):
1928 h.file_flags, pos = load_vint(hdata, pos)
1929 h.file_size, pos = load_vint(hdata, pos)
1930 h.mode, pos = load_vint(hdata, pos)
1932 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1933 h.mtime, pos = load_unixtime(hdata, pos)
1934 h.date_time = h.mtime.timetuple()[:6]
1935 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1936 h.CRC, pos = load_le32(hdata, pos)
1937 h._md_class = CRC32Context
1938 h._md_expect = h.CRC
1940 h.file_compress_flags, pos = load_vint(hdata, pos)
1941 h.file_host_os, pos = load_vint(hdata, pos)
1942 h.orig_filename, pos = load_vstr(hdata, pos)
1943 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1945 # use compatible values
1946 if h.file_host_os == RAR5_OS_WINDOWS:
1947 h.host_os = RAR_OS_WIN32
1948 else:
1949 h.host_os = RAR_OS_UNIX
1950 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1952 if h.block_extra_size:
1953 # allow 1 byte of garbage
1954 while pos < len(hdata) - 1:
1955 xsize, pos = load_vint(hdata, pos)
1956 xdata, pos = load_bytes(hdata, xsize, pos)
1957 self._process_file_extra(h, xdata)
1959 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1960 h.flags |= RAR_FILE_SPLIT_BEFORE
1961 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1962 h.flags |= RAR_FILE_SPLIT_AFTER
1963 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1964 h.flags |= RAR_FILE_DIRECTORY
1965 if h.file_compress_flags & RAR5_COMPR_SOLID:
1966 h.flags |= RAR_FILE_SOLID
1968 if h.is_dir():
1969 h.filename = h.filename + "/"
1970 return h
1972 def _parse_endarc_block(self, h, hdata, pos):
1973 h.endarc_flags, pos = load_vint(hdata, pos)
1974 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1975 h.flags |= RAR_ENDARC_NEXT_VOLUME
1976 return h
1978 def _check_password(self, check_value, kdf_count_shift, salt):
1979 if len(check_value) != RAR5_PW_CHECK_SIZE + RAR5_PW_SUM_SIZE:
1980 return
1982 hdr_check = check_value[:RAR5_PW_CHECK_SIZE]
1983 hdr_sum = check_value[RAR5_PW_CHECK_SIZE:]
1984 sum_hash = sha256(hdr_check).digest()
1985 if sum_hash[:RAR5_PW_SUM_SIZE] != hdr_sum:
1986 return
1988 kdf_count = (1 << kdf_count_shift) + 32
1989 pwd = self._get_utf8_password()
1990 pwd_hash = pbkdf2_hmac("sha256", pwd, salt, kdf_count)
1992 pwd_check = bytearray(RAR5_PW_CHECK_SIZE)
1993 len_mask = RAR5_PW_CHECK_SIZE - 1
1994 for i, v in enumerate(pwd_hash):
1995 pwd_check[i & len_mask] ^= v
1997 if pwd_check != hdr_check:
1998 raise RarWrongPassword()
2000 def _parse_encryption_block(self, h, hdata, pos):
2001 h.encryption_algo, pos = load_vint(hdata, pos)
2002 h.encryption_flags, pos = load_vint(hdata, pos)
2003 h.encryption_kdf_count, pos = load_byte(hdata, pos)
2004 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
2005 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
2006 h.encryption_check_value, pos = load_bytes(hdata, 12, pos)
2007 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
2008 raise BadRarFile("Unsupported header encryption cipher")
2009 if h.encryption_check_value and self._password:
2010 self._check_password(h.encryption_check_value, h.encryption_kdf_count, h.encryption_salt)
2011 self._hdrenc_main = h
2012 return h
2014 def _process_file_extra(self, h, xdata):
2015 xtype, pos = load_vint(xdata, 0)
2016 if xtype == RAR5_XFILE_TIME:
2017 self._parse_file_xtime(h, xdata, pos)
2018 elif xtype == RAR5_XFILE_ENCRYPTION:
2019 self._parse_file_encryption(h, xdata, pos)
2020 elif xtype == RAR5_XFILE_HASH:
2021 self._parse_file_hash(h, xdata, pos)
2022 elif xtype == RAR5_XFILE_VERSION:
2023 self._parse_file_version(h, xdata, pos)
2024 elif xtype == RAR5_XFILE_REDIR:
2025 self._parse_file_redir(h, xdata, pos)
2026 elif xtype == RAR5_XFILE_OWNER:
2027 self._parse_file_owner(h, xdata, pos)
2028 elif xtype == RAR5_XFILE_SERVICE:
2029 pass
2030 else:
2031 pass
2033 # extra block for file time record
2034 def _parse_file_xtime(self, h, xdata, pos):
2035 tflags, pos = load_vint(xdata, pos)
2037 ldr = load_windowstime
2038 if tflags & RAR5_XTIME_UNIXTIME:
2039 ldr = load_unixtime
2041 if tflags & RAR5_XTIME_HAS_MTIME:
2042 h.mtime, pos = ldr(xdata, pos)
2043 h.date_time = h.mtime.timetuple()[:6]
2044 if tflags & RAR5_XTIME_HAS_CTIME:
2045 h.ctime, pos = ldr(xdata, pos)
2046 if tflags & RAR5_XTIME_HAS_ATIME:
2047 h.atime, pos = ldr(xdata, pos)
2049 if tflags & RAR5_XTIME_UNIXTIME_NS:
2050 if tflags & RAR5_XTIME_HAS_MTIME:
2051 nsec, pos = load_le32(xdata, pos)
2052 h.mtime = to_nsdatetime(h.mtime, nsec)
2053 if tflags & RAR5_XTIME_HAS_CTIME:
2054 nsec, pos = load_le32(xdata, pos)
2055 h.ctime = to_nsdatetime(h.ctime, nsec)
2056 if tflags & RAR5_XTIME_HAS_ATIME:
2057 nsec, pos = load_le32(xdata, pos)
2058 h.atime = to_nsdatetime(h.atime, nsec)
2060 # just remember encryption info
2061 def _parse_file_encryption(self, h, xdata, pos):
2062 algo, pos = load_vint(xdata, pos)
2063 flags, pos = load_vint(xdata, pos)
2064 kdf_count, pos = load_byte(xdata, pos)
2065 salt, pos = load_bytes(xdata, 16, pos)
2066 iv, pos = load_bytes(xdata, 16, pos)
2067 checkval = None
2068 if flags & RAR5_XENC_CHECKVAL:
2069 checkval, pos = load_bytes(xdata, 12, pos)
2070 if flags & RAR5_XENC_TWEAKED:
2071 h._md_expect = None
2072 h._md_class = NoHashContext
2074 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
2075 h.flags |= RAR_FILE_PASSWORD
2077 def _parse_file_hash(self, h, xdata, pos):
2078 hash_type, pos = load_vint(xdata, pos)
2079 if hash_type == RAR5_XHASH_BLAKE2SP:
2080 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
2081 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2082 h._md_class = Blake2SP
2083 h._md_expect = h.blake2sp_hash
2085 def _parse_file_version(self, h, xdata, pos):
2086 flags, pos = load_vint(xdata, pos)
2087 version, pos = load_vint(xdata, pos)
2088 h.file_version = (flags, version)
2090 def _parse_file_redir(self, h, xdata, pos):
2091 redir_type, pos = load_vint(xdata, pos)
2092 redir_flags, pos = load_vint(xdata, pos)
2093 redir_name, pos = load_vstr(xdata, pos)
2094 redir_name = redir_name.decode("utf8", "replace")
2095 h.file_redir = (redir_type, redir_flags, redir_name)
2097 def _parse_file_owner(self, h, xdata, pos):
2098 user_name = group_name = user_id = group_id = None
2100 flags, pos = load_vint(xdata, pos)
2101 if flags & RAR5_XOWNER_UNAME:
2102 user_name, pos = load_vstr(xdata, pos)
2103 if flags & RAR5_XOWNER_GNAME:
2104 group_name, pos = load_vstr(xdata, pos)
2105 if flags & RAR5_XOWNER_UID:
2106 user_id, pos = load_vint(xdata, pos)
2107 if flags & RAR5_XOWNER_GID:
2108 group_id, pos = load_vint(xdata, pos)
2110 h.file_owner = (user_name, group_name, user_id, group_id)
2112 def process_entry(self, fd, item):
2113 if item.block_type == RAR5_BLOCK_FILE:
2114 if item.file_version:
2115 pass # skip old versions
2116 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2117 # use only first part
2118 self._info_map[item.filename.rstrip("/")] = item
2119 self._info_list.append(item)
2120 elif len(self._info_list) > 0:
2121 # final crc is in last block
2122 old = self._info_list[-1]
2123 old.CRC = item.CRC
2124 old._md_expect = item._md_expect
2125 old.blake2sp_hash = item.blake2sp_hash
2126 old.compress_size += item.compress_size
2127 elif item.block_type == RAR5_BLOCK_SERVICE:
2128 if item.filename == "CMT":
2129 self._load_comment(fd, item)
2131 def _load_comment(self, fd, item):
2132 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2133 return None
2134 if item.compress_type != RAR_M0:
2135 return None
2137 if item.flags & RAR_FILE_PASSWORD:
2138 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2139 if algo != RAR5_XENC_CIPHER_AES256:
2140 return None
2141 key = self._gen_key(kdf_count, salt)
2142 f = HeaderDecrypt(fd, key, iv)
2143 cmt = f.read(item.file_size)
2144 else:
2145 # archive comment
2146 with self._open_clear(item) as cmtstream:
2147 cmt = cmtstream.read()
2149 # rar bug? - appends zero to comment
2150 cmt = cmt.split(b"\0", 1)[0]
2151 self.comment = cmt.decode("utf8")
2152 return None
2154 def _open_hack(self, inf, pwd):
2155 # len, type, blk_flags, flags
2156 main_hdr = b"\x03\x01\x00\x00"
2157 endarc_hdr = b"\x03\x05\x00\x00"
2158 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2159 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2160 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2164 ## Utility classes
2167 class UnicodeFilename:
2168 """Handle RAR3 unicode filename decompression.
2170 def __init__(self, name, encdata):
2171 self.std_name = bytearray(name)
2172 self.encdata = bytearray(encdata)
2173 self.pos = self.encpos = 0
2174 self.buf = bytearray()
2175 self.failed = 0
2177 def enc_byte(self):
2178 """Copy encoded byte."""
2179 try:
2180 c = self.encdata[self.encpos]
2181 self.encpos += 1
2182 return c
2183 except IndexError:
2184 self.failed = 1
2185 return 0
2187 def std_byte(self):
2188 """Copy byte from 8-bit representation."""
2189 try:
2190 return self.std_name[self.pos]
2191 except IndexError:
2192 self.failed = 1
2193 return ord("?")
2195 def put(self, lo, hi):
2196 """Copy 16-bit value to result."""
2197 self.buf.append(lo)
2198 self.buf.append(hi)
2199 self.pos += 1
2201 def decode(self):
2202 """Decompress compressed UTF16 value."""
2203 hi = self.enc_byte()
2204 flagbits = 0
2205 while self.encpos < len(self.encdata):
2206 if flagbits == 0:
2207 flags = self.enc_byte()
2208 flagbits = 8
2209 flagbits -= 2
2210 t = (flags >> flagbits) & 3
2211 if t == 0:
2212 self.put(self.enc_byte(), 0)
2213 elif t == 1:
2214 self.put(self.enc_byte(), hi)
2215 elif t == 2:
2216 self.put(self.enc_byte(), self.enc_byte())
2217 else:
2218 n = self.enc_byte()
2219 if n & 0x80:
2220 c = self.enc_byte()
2221 for _ in range((n & 0x7f) + 2):
2222 lo = (self.std_byte() + c) & 0xFF
2223 self.put(lo, hi)
2224 else:
2225 for _ in range(n + 2):
2226 self.put(self.std_byte(), 0)
2227 return self.buf.decode("utf-16le", "replace")
2230 class RarExtFile(io.RawIOBase):
2231 """Base class for file-like object that :meth:`RarFile.open` returns.
2233 Provides public methods and common crc checking.
2235 Behaviour:
2236 - no short reads - .read() and .readinfo() read as much as requested.
2237 - no internal buffer, use io.BufferedReader for that.
2239 name = None #: Filename of the archive entry
2240 mode = "rb"
2241 _parser = None
2242 _inf = None
2243 _fd = None
2244 _remain = 0
2245 _returncode = 0
2246 _md_context = None
2247 _seeking = False
2249 def _open_extfile(self, parser, inf):
2250 self.name = inf.filename
2251 self._parser = parser
2252 self._inf = inf
2254 if self._fd:
2255 self._fd.close()
2256 if self._seeking:
2257 md_class = NoHashContext
2258 else:
2259 md_class = self._inf._md_class or NoHashContext
2260 self._md_context = md_class()
2261 self._fd = None
2262 self._remain = self._inf.file_size
2264 def read(self, n=-1):
2265 """Read all or specified amount of data from archive entry."""
2267 # sanitize count
2268 if n is None or n < 0:
2269 n = self._remain
2270 elif n > self._remain:
2271 n = self._remain
2272 if n == 0:
2273 return b""
2275 buf = []
2276 orig = n
2277 while n > 0:
2278 # actual read
2279 data = self._read(n)
2280 if not data:
2281 break
2282 buf.append(data)
2283 self._md_context.update(data)
2284 self._remain -= len(data)
2285 n -= len(data)
2286 data = b"".join(buf)
2287 if n > 0:
2288 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2290 # done?
2291 if not data or self._remain == 0:
2292 # self.close()
2293 self._check()
2294 return data
2296 def _check(self):
2297 """Check final CRC."""
2298 final = self._md_context.digest()
2299 exp = self._inf._md_expect
2300 if exp is None:
2301 return
2302 if final is None:
2303 return
2304 if self._returncode:
2305 check_returncode(self._returncode, "", tool_setup().get_errmap())
2306 if self._remain != 0:
2307 raise BadRarFile("Failed the read enough data")
2308 if final != exp:
2309 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2310 self._inf.filename, exp, final))
2312 def _read(self, cnt):
2313 """Actual read that gets sanitized cnt."""
2314 raise NotImplementedError("_read")
2316 def close(self):
2317 """Close open resources."""
2319 super().close()
2321 if self._fd:
2322 self._fd.close()
2323 self._fd = None
2325 def __del__(self):
2326 """Hook delete to make sure tempfile is removed."""
2327 self.close()
2329 def readinto(self, buf):
2330 """Zero-copy read directly into buffer.
2332 Returns bytes read.
2334 raise NotImplementedError("readinto")
2336 def tell(self):
2337 """Return current reading position in uncompressed data."""
2338 return self._inf.file_size - self._remain
2340 def seek(self, offset, whence=0):
2341 """Seek in data.
2343 On uncompressed files, the seeking works by actual
2344 seeks so it's fast. On compresses files its slow
2345 - forward seeking happends by reading ahead,
2346 backwards by re-opening and decompressing from the start.
2349 # disable crc check when seeking
2350 if not self._seeking:
2351 self._md_context = NoHashContext()
2352 self._seeking = True
2354 fsize = self._inf.file_size
2355 cur_ofs = self.tell()
2357 if whence == 0: # seek from beginning of file
2358 new_ofs = offset
2359 elif whence == 1: # seek from current position
2360 new_ofs = cur_ofs + offset
2361 elif whence == 2: # seek from end of file
2362 new_ofs = fsize + offset
2363 else:
2364 raise ValueError("Invalid value for whence")
2366 # sanity check
2367 if new_ofs < 0:
2368 new_ofs = 0
2369 elif new_ofs > fsize:
2370 new_ofs = fsize
2372 # do the actual seek
2373 if new_ofs >= cur_ofs:
2374 self._skip(new_ofs - cur_ofs)
2375 else:
2376 # reopen and seek
2377 self._open_extfile(self._parser, self._inf)
2378 self._skip(new_ofs)
2379 return self.tell()
2381 def _skip(self, cnt):
2382 """Read and discard data"""
2383 empty_read(self, cnt, BSIZE)
2385 def readable(self):
2386 """Returns True"""
2387 return True
2389 def writable(self):
2390 """Returns False.
2392 Writing is not supported.
2394 return False
2396 def seekable(self):
2397 """Returns True.
2399 Seeking is supported, although it's slow on compressed files.
2401 return True
2403 def readall(self):
2404 """Read all remaining data"""
2405 # avoid RawIOBase default impl
2406 return self.read()
2409 class PipeReader(RarExtFile):
2410 """Read data from pipe, handle tempfile cleanup."""
2412 def __init__(self, parser, inf, cmd, tempfile=None):
2413 super().__init__()
2414 self._cmd = cmd
2415 self._proc = None
2416 self._tempfile = tempfile
2417 self._open_extfile(parser, inf)
2419 def _close_proc(self):
2420 if not self._proc:
2421 return
2422 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2423 if f:
2424 f.close()
2425 self._proc.wait()
2426 self._returncode = self._proc.returncode
2427 self._proc = None
2429 def _open_extfile(self, parser, inf):
2430 super()._open_extfile(parser, inf)
2432 # stop old process
2433 self._close_proc()
2435 # launch new process
2436 self._returncode = 0
2437 self._proc = custom_popen(self._cmd)
2438 self._fd = self._proc.stdout
2440 def _read(self, cnt):
2441 """Read from pipe."""
2443 # normal read is usually enough
2444 data = self._fd.read(cnt)
2445 if len(data) == cnt or not data:
2446 return data
2448 # short read, try looping
2449 buf = [data]
2450 cnt -= len(data)
2451 while cnt > 0:
2452 data = self._fd.read(cnt)
2453 if not data:
2454 break
2455 cnt -= len(data)
2456 buf.append(data)
2457 return b"".join(buf)
2459 def close(self):
2460 """Close open resources."""
2462 self._close_proc()
2463 super().close()
2465 if self._tempfile:
2466 try:
2467 os.unlink(self._tempfile)
2468 except OSError:
2469 pass
2470 self._tempfile = None
2472 def readinto(self, buf):
2473 """Zero-copy read directly into buffer."""
2474 cnt = len(buf)
2475 if cnt > self._remain:
2476 cnt = self._remain
2477 vbuf = memoryview(buf)
2478 res = got = 0
2479 while got < cnt:
2480 res = self._fd.readinto(vbuf[got: cnt])
2481 if not res:
2482 break
2483 self._md_context.update(vbuf[got: got + res])
2484 self._remain -= res
2485 got += res
2486 return got
2489 class DirectReader(RarExtFile):
2490 """Read uncompressed data directly from archive.
2492 _cur = None
2493 _cur_avail = None
2494 _volfile = None
2496 def __init__(self, parser, inf):
2497 super().__init__()
2498 self._open_extfile(parser, inf)
2500 def _open_extfile(self, parser, inf):
2501 super()._open_extfile(parser, inf)
2503 self._volfile = self._inf.volume_file
2504 self._fd = XFile(self._volfile, 0)
2505 self._fd.seek(self._inf.header_offset, 0)
2506 self._cur = self._parser._parse_header(self._fd)
2507 self._cur_avail = self._cur.add_size
2509 def _skip(self, cnt):
2510 """RAR Seek, skipping through rar files to get to correct position
2513 while cnt > 0:
2514 # next vol needed?
2515 if self._cur_avail == 0:
2516 if not self._open_next():
2517 break
2519 # fd is in read pos, do the read
2520 if cnt > self._cur_avail:
2521 cnt -= self._cur_avail
2522 self._remain -= self._cur_avail
2523 self._cur_avail = 0
2524 else:
2525 self._fd.seek(cnt, 1)
2526 self._cur_avail -= cnt
2527 self._remain -= cnt
2528 cnt = 0
2530 def _read(self, cnt):
2531 """Read from potentially multi-volume archive."""
2533 pos = self._fd.tell()
2534 need = self._cur.data_offset + self._cur.add_size - self._cur_avail
2535 if pos != need:
2536 self._fd.seek(need, 0)
2538 buf = []
2539 while cnt > 0:
2540 # next vol needed?
2541 if self._cur_avail == 0:
2542 if not self._open_next():
2543 break
2545 # fd is in read pos, do the read
2546 if cnt > self._cur_avail:
2547 data = self._fd.read(self._cur_avail)
2548 else:
2549 data = self._fd.read(cnt)
2550 if not data:
2551 break
2553 # got some data
2554 cnt -= len(data)
2555 self._cur_avail -= len(data)
2556 buf.append(data)
2558 if len(buf) == 1:
2559 return buf[0]
2560 return b"".join(buf)
2562 def _open_next(self):
2563 """Proceed to next volume."""
2565 # is the file split over archives?
2566 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2567 return False
2569 if self._fd:
2570 self._fd.close()
2571 self._fd = None
2573 # open next part
2574 self._volfile = self._parser._next_volname(self._volfile)
2575 fd = open(self._volfile, "rb", 0)
2576 self._fd = fd
2577 sig = fd.read(len(self._parser._expect_sig))
2578 if sig != self._parser._expect_sig:
2579 raise BadRarFile("Invalid signature")
2581 # loop until first file header
2582 while True:
2583 cur = self._parser._parse_header(fd)
2584 if not cur:
2585 raise BadRarFile("Unexpected EOF")
2586 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2587 if cur.add_size:
2588 fd.seek(cur.add_size, 1)
2589 continue
2590 if cur.orig_filename != self._inf.orig_filename:
2591 raise BadRarFile("Did not found file entry")
2592 self._cur = cur
2593 self._cur_avail = cur.add_size
2594 return True
2596 def readinto(self, buf):
2597 """Zero-copy read directly into buffer."""
2598 got = 0
2599 vbuf = memoryview(buf)
2600 while got < len(buf):
2601 # next vol needed?
2602 if self._cur_avail == 0:
2603 if not self._open_next():
2604 break
2606 # length for next read
2607 cnt = len(buf) - got
2608 if cnt > self._cur_avail:
2609 cnt = self._cur_avail
2611 # read into temp view
2612 res = self._fd.readinto(vbuf[got: got + cnt])
2613 if not res:
2614 break
2615 self._md_context.update(vbuf[got: got + res])
2616 self._cur_avail -= res
2617 self._remain -= res
2618 got += res
2619 return got
2622 class HeaderDecrypt:
2623 """File-like object that decrypts from another file"""
2624 def __init__(self, f, key, iv):
2625 self.f = f
2626 self.ciph = AES_CBC_Decrypt(key, iv)
2627 self.buf = b""
2629 def tell(self):
2630 """Current file pos - works only on block boundaries."""
2631 return self.f.tell()
2633 def read(self, cnt=None):
2634 """Read and decrypt."""
2635 if cnt > 8 * 1024:
2636 raise BadRarFile("Bad count to header decrypt - wrong password?")
2638 # consume old data
2639 if cnt <= len(self.buf):
2640 res = self.buf[:cnt]
2641 self.buf = self.buf[cnt:]
2642 return res
2643 res = self.buf
2644 self.buf = b""
2645 cnt -= len(res)
2647 # decrypt new data
2648 blklen = 16
2649 while cnt > 0:
2650 enc = self.f.read(blklen)
2651 if len(enc) < blklen:
2652 break
2653 dec = self.ciph.decrypt(enc)
2654 if cnt >= len(dec):
2655 res += dec
2656 cnt -= len(dec)
2657 else:
2658 res += dec[:cnt]
2659 self.buf = dec[cnt:]
2660 cnt = 0
2662 return res
2665 class XFile:
2666 """Input may be filename or file object.
2668 __slots__ = ("_fd", "_need_close")
2670 def __init__(self, xfile, bufsize=1024):
2671 if is_filelike(xfile):
2672 self._need_close = False
2673 self._fd = xfile
2674 self._fd.seek(0)
2675 else:
2676 self._need_close = True
2677 self._fd = open(xfile, "rb", bufsize)
2679 def read(self, n=None):
2680 """Read from file."""
2681 return self._fd.read(n)
2683 def tell(self):
2684 """Return file pos."""
2685 return self._fd.tell()
2687 def seek(self, ofs, whence=0):
2688 """Move file pos."""
2689 return self._fd.seek(ofs, whence)
2691 def readinto(self, buf):
2692 """Read into buffer."""
2693 return self._fd.readinto(buf)
2695 def close(self):
2696 """Close file object."""
2697 if self._need_close:
2698 self._fd.close()
2700 def __enter__(self):
2701 return self
2703 def __exit__(self, typ, val, tb):
2704 self.close()
2707 class NoHashContext:
2708 """No-op hash function."""
2709 def __init__(self, data=None):
2710 """Initialize"""
2711 def update(self, data):
2712 """Update data"""
2713 def digest(self):
2714 """Final hash"""
2715 def hexdigest(self):
2716 """Hexadecimal digest."""
2719 class CRC32Context:
2720 """Hash context that uses CRC32."""
2721 __slots__ = ["_crc"]
2723 def __init__(self, data=None):
2724 self._crc = 0
2725 if data:
2726 self.update(data)
2728 def update(self, data):
2729 """Process data."""
2730 self._crc = crc32(data, self._crc)
2732 def digest(self):
2733 """Final hash."""
2734 return self._crc
2736 def hexdigest(self):
2737 """Hexadecimal digest."""
2738 return "%08x" % self.digest()
2741 class Blake2SP:
2742 """Blake2sp hash context.
2744 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2745 digest_size = 32
2746 block_size = 64
2747 parallelism = 8
2749 def __init__(self, data=None):
2750 self._buf = b""
2751 self._cur = 0
2752 self._digest = None
2753 self._thread = []
2755 for i in range(self.parallelism):
2756 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2757 self._thread.append(ctx)
2759 if data:
2760 self.update(data)
2762 def _blake2s(self, ofs, depth, is_last):
2763 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2764 depth=2, inner_size=32, fanout=self.parallelism)
2766 def _add_block(self, blk):
2767 self._thread[self._cur].update(blk)
2768 self._cur = (self._cur + 1) % self.parallelism
2770 def update(self, data):
2771 """Hash data.
2773 view = memoryview(data)
2774 bs = self.block_size
2775 if self._buf:
2776 need = bs - len(self._buf)
2777 if len(view) < need:
2778 self._buf += view.tobytes()
2779 return
2780 self._add_block(self._buf + view[:need].tobytes())
2781 view = view[need:]
2782 while len(view) >= bs:
2783 self._add_block(view[:bs])
2784 view = view[bs:]
2785 self._buf = view.tobytes()
2787 def digest(self):
2788 """Return final digest value.
2790 if self._digest is None:
2791 if self._buf:
2792 self._add_block(self._buf)
2793 self._buf = b""
2794 ctx = self._blake2s(0, 1, True)
2795 for t in self._thread:
2796 ctx.update(t.digest())
2797 self._digest = ctx.digest()
2798 return self._digest
2800 def hexdigest(self):
2801 """Hexadecimal digest."""
2802 return hexlify(self.digest()).decode("ascii")
2805 class Rar3Sha1:
2806 """Emulate buggy SHA1 from RAR3.
2808 digest_size = 20
2809 block_size = 64
2811 _BLK_BE = struct.Struct(b">16L")
2812 _BLK_LE = struct.Struct(b"<16L")
2814 __slots__ = ("_nbytes", "_md", "_rarbug")
2816 def __init__(self, data=b"", rarbug=False):
2817 self._md = sha1()
2818 self._nbytes = 0
2819 self._rarbug = rarbug
2820 self.update(data)
2822 def update(self, data):
2823 """Process more data."""
2824 self._md.update(data)
2825 bufpos = self._nbytes & 63
2826 self._nbytes += len(data)
2828 if self._rarbug and len(data) > 64:
2829 dpos = self.block_size - bufpos
2830 while dpos + self.block_size <= len(data):
2831 self._corrupt(data, dpos)
2832 dpos += self.block_size
2834 def digest(self):
2835 """Return final state."""
2836 return self._md.digest()
2838 def hexdigest(self):
2839 """Return final state as hex string."""
2840 return self._md.hexdigest()
2842 def _corrupt(self, data, dpos):
2843 """Corruption from SHA1 core."""
2844 ws = list(self._BLK_BE.unpack_from(data, dpos))
2845 for t in range(16, 80):
2846 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2847 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2848 self._BLK_LE.pack_into(data, dpos, *ws)
2852 ## Utility functions
2855 S_LONG = Struct("<L")
2856 S_SHORT = Struct("<H")
2857 S_BYTE = Struct("<B")
2859 S_BLK_HDR = Struct("<HBHH")
2860 S_FILE_HDR = Struct("<LLBLLBBHL")
2861 S_COMMENT_HDR = Struct("<HBBH")
2864 def load_vint(buf, pos):
2865 """Load RAR5 variable-size int."""
2866 limit = min(pos + 11, len(buf))
2867 res = ofs = 0
2868 while pos < limit:
2869 b = buf[pos]
2870 res += ((b & 0x7F) << ofs)
2871 pos += 1
2872 ofs += 7
2873 if b < 0x80:
2874 return res, pos
2875 raise BadRarFile("cannot load vint")
2878 def load_byte(buf, pos):
2879 """Load single byte"""
2880 end = pos + 1
2881 if end > len(buf):
2882 raise BadRarFile("cannot load byte")
2883 return S_BYTE.unpack_from(buf, pos)[0], end
2886 def load_le32(buf, pos):
2887 """Load little-endian 32-bit integer"""
2888 end = pos + 4
2889 if end > len(buf):
2890 raise BadRarFile("cannot load le32")
2891 return S_LONG.unpack_from(buf, pos)[0], end
2894 def load_bytes(buf, num, pos):
2895 """Load sequence of bytes"""
2896 end = pos + num
2897 if end > len(buf):
2898 raise BadRarFile("cannot load bytes")
2899 return buf[pos: end], end
2902 def load_vstr(buf, pos):
2903 """Load bytes prefixed by vint length"""
2904 slen, pos = load_vint(buf, pos)
2905 return load_bytes(buf, slen, pos)
2908 def load_dostime(buf, pos):
2909 """Load LE32 dos timestamp"""
2910 stamp, pos = load_le32(buf, pos)
2911 tup = parse_dos_time(stamp)
2912 return to_datetime(tup), pos
2915 def load_unixtime(buf, pos):
2916 """Load LE32 unix timestamp"""
2917 secs, pos = load_le32(buf, pos)
2918 dt = datetime.fromtimestamp(secs, timezone.utc)
2919 return dt, pos
2922 def load_windowstime(buf, pos):
2923 """Load LE64 windows timestamp"""
2924 # unix epoch (1970) in seconds from windows epoch (1601)
2925 unix_epoch = 11644473600
2926 val1, pos = load_le32(buf, pos)
2927 val2, pos = load_le32(buf, pos)
2928 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2929 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2930 dt = to_nsdatetime(dt, n1secs * 100)
2931 return dt, pos
2935 # volume numbering
2938 _rc_num = re.compile('^[0-9]+$')
2941 def _next_newvol(volfile):
2942 """New-style next volume
2944 name, ext = os.path.splitext(volfile)
2945 if ext.lower() in ("", ".exe", ".sfx"):
2946 volfile = name + ".rar"
2947 i = len(volfile) - 1
2948 while i >= 0:
2949 if "0" <= volfile[i] <= "9":
2950 return _inc_volname(volfile, i, False)
2951 if volfile[i] in ("/", os.sep):
2952 break
2953 i -= 1
2954 raise BadRarName("Cannot construct volume name: " + volfile)
2958 def _next_oldvol(volfile):
2959 """Old-style next volume
2961 name, ext = os.path.splitext(volfile)
2962 if ext.lower() in ("", ".exe", ".sfx"):
2963 ext = ".rar"
2964 sfx = ext[2:]
2965 if _rc_num.match(sfx):
2966 ext = _inc_volname(ext, len(ext) - 1, True)
2967 else:
2968 # .rar -> .r00
2969 ext = ext[:2] + "00"
2970 return name + ext
2973 def _inc_volname(volfile, i, inc_chars):
2974 """increase digits with carry, otherwise just increment char
2976 fn = list(volfile)
2977 while i >= 0:
2978 if fn[i] == "9":
2979 fn[i] = "0"
2980 i -= 1
2981 if i < 0:
2982 fn.insert(0, "1")
2983 elif "0" <= fn[i] < "9" or inc_chars:
2984 fn[i] = chr(ord(fn[i]) + 1)
2985 break
2986 else:
2987 fn.insert(i + 1, "1")
2988 break
2989 return "".join(fn)
2992 def _parse_ext_time(h, data, pos):
2993 """Parse all RAR3 extended time fields
2995 # flags and rest of data can be missing
2996 flags = 0
2997 if pos + 2 <= len(data):
2998 flags = S_SHORT.unpack_from(data, pos)[0]
2999 pos += 2
3001 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
3002 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
3003 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
3004 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
3005 if mtime:
3006 h.mtime = mtime
3007 h.date_time = mtime.timetuple()[:6]
3008 return pos
3011 def _parse_xtime(flag, data, pos, basetime=None):
3012 """Parse one RAR3 extended time field
3014 res = None
3015 if flag & 8:
3016 if not basetime:
3017 basetime, pos = load_dostime(data, pos)
3019 # load second fractions of 100ns units
3020 rem = 0
3021 cnt = flag & 3
3022 for _ in range(cnt):
3023 b, pos = load_byte(data, pos)
3024 rem = (b << 16) | (rem >> 8)
3026 # dostime has room for 30 seconds only, correct if needed
3027 if flag & 4 and basetime.second < 59:
3028 basetime = basetime.replace(second=basetime.second + 1)
3030 res = to_nsdatetime(basetime, rem * 100)
3031 return res, pos
3034 def is_filelike(obj):
3035 """Filename or file object?
3037 if isinstance(obj, (bytes, str, Path)):
3038 return False
3039 res = True
3040 for a in ("read", "tell", "seek"):
3041 res = res and hasattr(obj, a)
3042 if not res:
3043 raise ValueError("Invalid object passed as file")
3044 return True
3047 def rar3_s2k(pwd, salt):
3048 """String-to-key hash for RAR3.
3050 if not isinstance(pwd, str):
3051 pwd = pwd.decode("utf8")
3052 seed = bytearray(pwd.encode("utf-16le") + salt)
3053 h = Rar3Sha1(rarbug=True)
3054 iv = b""
3055 for i in range(16):
3056 for j in range(0x4000):
3057 cnt = S_LONG.pack(i * 0x4000 + j)
3058 h.update(seed)
3059 h.update(cnt[:3])
3060 if j == 0:
3061 iv += h.digest()[19:20]
3062 key_be = h.digest()[:16]
3063 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
3064 return key_le, iv
3067 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
3068 """Decompress blob of compressed data.
3070 Used for data with non-standard header - eg. comments.
3072 # already uncompressed?
3073 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
3074 return data
3076 # take only necessary flags
3077 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
3078 flags |= RAR_LONG_BLOCK
3080 # file header
3081 fname = b"data"
3082 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
3083 mode = DOS_MODE_ARCHIVE
3084 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
3085 date, vers, meth, len(fname), mode)
3086 fhdr += fname
3087 if salt:
3088 fhdr += salt
3090 # full header
3091 hlen = S_BLK_HDR.size + len(fhdr)
3092 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3093 hcrc = crc32(hdr[2:]) & 0xFFFF
3094 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3096 # archive main header
3097 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3099 # decompress via temp rar
3100 setup = tool_setup()
3101 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3102 tmpf = os.fdopen(tmpfd, "wb")
3103 try:
3104 tmpf.write(RAR_ID + mh + hdr + data)
3105 tmpf.close()
3107 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3108 cmd = setup.open_cmdline(curpwd, tmpname)
3109 p = custom_popen(cmd)
3110 return p.communicate()[0]
3111 finally:
3112 tmpf.close()
3113 os.unlink(tmpname)
3116 def sanitize_filename(fname, pathsep, is_win32):
3117 """Simulate unrar sanitization.
3119 if is_win32:
3120 if len(fname) > 1 and fname[1] == ":":
3121 fname = fname[2:]
3122 rc = RC_BAD_CHARS_WIN32
3123 else:
3124 rc = RC_BAD_CHARS_UNIX
3125 if rc.search(fname):
3126 fname = rc.sub("_", fname)
3128 parts = []
3129 for seg in fname.split("/"):
3130 if seg in ("", ".", ".."):
3131 continue
3132 if is_win32 and seg[-1] in (" ", "."):
3133 seg = seg[:-1] + "_"
3134 parts.append(seg)
3135 return pathsep.join(parts)
3138 def empty_read(src, size, blklen):
3139 """Read and drop fixed amount of data.
3141 while size > 0:
3142 if size > blklen:
3143 res = src.read(blklen)
3144 else:
3145 res = src.read(size)
3146 if not res:
3147 raise BadRarFile("cannot load data")
3148 size -= len(res)
3151 def to_datetime(t):
3152 """Convert 6-part time tuple into datetime object.
3154 # extract values
3155 year, mon, day, h, m, s = t
3157 # assume the values are valid
3158 try:
3159 return datetime(year, mon, day, h, m, s)
3160 except ValueError:
3161 pass
3163 # sanitize invalid values
3164 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3165 mon = max(1, min(mon, 12))
3166 day = max(1, min(day, mday[mon]))
3167 h = min(h, 23)
3168 m = min(m, 59)
3169 s = min(s, 59)
3170 return datetime(year, mon, day, h, m, s)
3173 def parse_dos_time(stamp):
3174 """Parse standard 32-bit DOS timestamp.
3176 sec, stamp = stamp & 0x1F, stamp >> 5
3177 mn, stamp = stamp & 0x3F, stamp >> 6
3178 hr, stamp = stamp & 0x1F, stamp >> 5
3179 day, stamp = stamp & 0x1F, stamp >> 5
3180 mon, stamp = stamp & 0x0F, stamp >> 4
3181 yr = (stamp & 0x7F) + 1980
3182 return (yr, mon, day, hr, mn, sec * 2)
3185 # pylint: disable=arguments-differ,signature-differs
3186 class nsdatetime(datetime):
3187 """Datetime that carries nanoseconds.
3189 Arithmetic not supported, will lose nanoseconds.
3191 .. versionadded:: 4.0
3193 __slots__ = ("nanosecond",)
3194 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3196 def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
3197 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3198 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3199 if mod == 0:
3200 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3201 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3202 self.nanosecond = nanosecond
3203 return self
3205 def isoformat(self, sep="T", timespec="auto"):
3206 """Formats with nanosecond precision by default.
3208 if timespec == "auto":
3209 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3210 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3211 return super().isoformat(sep, timespec)
3213 def astimezone(self, tz=None):
3214 """Convert to new timezone.
3216 tmp = super().astimezone(tz)
3217 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3218 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3220 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3221 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3222 """Return new timestamp with specified fields replaced.
3224 return self.__class__(
3225 self.year if year is None else year,
3226 self.month if month is None else month,
3227 self.day if day is None else day,
3228 self.hour if hour is None else hour,
3229 self.minute if minute is None else minute,
3230 self.second if second is None else second,
3231 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3232 if nanosecond is None else nanosecond),
3233 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3234 fold=self.fold if fold is None else fold)
3236 def __hash__(self):
3237 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3239 def __eq__(self, other):
3240 return super().__eq__(other) and self.nanosecond == (
3241 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3243 def __gt__(self, other):
3244 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3245 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3247 def __lt__(self, other):
3248 return not (self > other or self == other)
3250 def __ge__(self, other):
3251 return not self < other
3253 def __le__(self, other):
3254 return not self > other
3256 def __ne__(self, other):
3257 return not self == other
3260 def to_nsdatetime(dt, nsec):
3261 """Apply nanoseconds to datetime.
3263 if not nsec:
3264 return dt
3265 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3266 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3269 def to_nsecs(dt):
3270 """Convert datatime instance to nanoseconds.
3272 secs = int(dt.timestamp())
3273 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3274 return secs * 1000000000 + nsecs
3277 def custom_popen(cmd):
3278 """Disconnect cmd from parent fds, read only from stdout.
3280 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3281 try:
3282 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3283 creationflags=creationflags)
3284 except OSError as ex:
3285 if ex.errno == errno.ENOENT:
3286 raise RarCannotExec("Unrar not installed?") from None
3287 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3288 raise RarCannotExec("Cannot execute unrar") from None
3289 raise
3290 return p
3293 def check_returncode(code, out, errmap):
3294 """Raise exception according to unrar exit code.
3296 if code == 0:
3297 return
3299 if code > 0 and code < len(errmap):
3300 exc = errmap[code]
3301 elif code == 255:
3302 exc = RarUserBreak
3303 elif code < 0:
3304 exc = RarSignalExit
3305 else:
3306 exc = RarUnknownError
3308 # format message
3309 if out:
3310 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3311 else:
3312 msg = "%s [%d]" % (exc.__doc__, code)
3314 raise exc(msg)
3317 def membuf_tempfile(memfile):
3318 """Write in-memory file object to real file.
3320 memfile.seek(0, 0)
3322 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3323 tmpf = os.fdopen(tmpfd, "wb")
3325 try:
3326 shutil.copyfileobj(memfile, tmpf, BSIZE)
3327 tmpf.close()
3328 except BaseException:
3329 tmpf.close()
3330 os.unlink(tmpname)
3331 raise
3332 return tmpname
3336 # Find working command-line tool
3339 class ToolSetup:
3340 def __init__(self, setup):
3341 self.setup = setup
3343 def check(self):
3344 cmdline = self.get_cmdline("check_cmd", None)
3345 try:
3346 p = custom_popen(cmdline)
3347 out, _ = p.communicate()
3348 return p.returncode == 0
3349 except RarCannotExec:
3350 return False
3352 def open_cmdline(self, pwd, rarfn, filefn=None):
3353 cmdline = self.get_cmdline("open_cmd", pwd)
3354 cmdline.append(rarfn)
3355 if filefn:
3356 self.add_file_arg(cmdline, filefn)
3357 return cmdline
3359 def get_errmap(self):
3360 return self.setup["errmap"]
3362 def get_cmdline(self, key, pwd, nodash=False):
3363 cmdline = list(self.setup[key])
3364 cmdline[0] = globals()[cmdline[0]]
3365 if key == "check_cmd":
3366 return cmdline
3367 self.add_password_arg(cmdline, pwd)
3368 if not nodash:
3369 cmdline.append("--")
3370 return cmdline
3372 def add_file_arg(self, cmdline, filename):
3373 cmdline.append(filename)
3375 def add_password_arg(self, cmdline, pwd):
3376 """Append password switch to commandline.
3378 if pwd is not None:
3379 if not isinstance(pwd, str):
3380 pwd = pwd.decode("utf8")
3381 args = self.setup["password"]
3382 if args is None:
3383 tool = self.setup["open_cmd"][0]
3384 raise RarCannotExec(f"{tool} does not support passwords")
3385 elif isinstance(args, str):
3386 cmdline.append(args + pwd)
3387 else:
3388 cmdline.extend(args)
3389 cmdline.append(pwd)
3390 else:
3391 cmdline.extend(self.setup["no_password"])
3394 UNRAR_CONFIG = {
3395 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3396 "check_cmd": ("UNRAR_TOOL", "-inul"),
3397 "password": "-p",
3398 "no_password": ("-p-",),
3399 # map return code to exception class, codes from rar.txt
3400 "errmap": [None,
3401 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3402 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3403 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3406 # Problems with unar RAR backend:
3407 # - Does not support RAR2 locked files [fails to read]
3408 # - Does not support RAR5 Blake2sp hash [reading works]
3409 UNAR_CONFIG = {
3410 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3411 "check_cmd": ("UNAR_TOOL", "-version"),
3412 "password": ("-p",),
3413 "no_password": ("-p", ""),
3414 "errmap": [None],
3417 # Problems with libarchive RAR backend:
3418 # - Does not support solid archives.
3419 # - Does not support password-protected archives.
3420 # - Does not support RARVM-based compression filters.
3421 BSDTAR_CONFIG = {
3422 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3423 "check_cmd": ("BSDTAR_TOOL", "--version"),
3424 "password": None,
3425 "no_password": (),
3426 "errmap": [None],
3429 SEVENZIP_CONFIG = {
3430 "open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"),
3431 "check_cmd": ("SEVENZIP_TOOL", "i"),
3432 "password": "-p",
3433 "no_password": ("-p",),
3434 "errmap": [None,
3435 RarWarning, RarFatalError, None, None, # 1..4
3436 None, None, RarUserError, RarMemoryError] # 5..8
3439 SEVENZIP2_CONFIG = {
3440 "open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"),
3441 "check_cmd": ("SEVENZIP2_TOOL", "i"),
3442 "password": "-p",
3443 "no_password": ("-p",),
3444 "errmap": [None,
3445 RarWarning, RarFatalError, None, None, # 1..4
3446 None, None, RarUserError, RarMemoryError] # 5..8
3449 CURRENT_SETUP = None
3452 def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False):
3453 """Pick a tool, return cached ToolSetup.
3455 global CURRENT_SETUP
3456 if force:
3457 CURRENT_SETUP = None
3458 if CURRENT_SETUP is not None:
3459 return CURRENT_SETUP
3460 lst = []
3461 if unrar:
3462 lst.append(UNRAR_CONFIG)
3463 if unar:
3464 lst.append(UNAR_CONFIG)
3465 if sevenzip:
3466 lst.append(SEVENZIP_CONFIG)
3467 if sevenzip2:
3468 lst.append(SEVENZIP2_CONFIG)
3469 if bsdtar:
3470 lst.append(BSDTAR_CONFIG)
3472 for conf in lst:
3473 setup = ToolSetup(conf)
3474 if setup.check():
3475 CURRENT_SETUP = setup
3476 break
3477 if CURRENT_SETUP is None:
3478 raise RarCannotExec("Cannot find working tool")
3479 return CURRENT_SETUP
3482 def main(args):
3483 """Minimal command-line interface for rarfile module.
3485 import argparse
3486 p = argparse.ArgumentParser(description=main.__doc__)
3487 g = p.add_mutually_exclusive_group(required=True)
3488 g.add_argument("-l", "--list", metavar="<rarfile>",
3489 help="Show archive listing")
3490 g.add_argument("-e", "--extract", nargs=2,
3491 metavar=("<rarfile>", "<output_dir>"),
3492 help="Extract archive into target dir")
3493 g.add_argument("-t", "--test", metavar="<rarfile>",
3494 help="Test if a archive is valid")
3495 cmd = p.parse_args(args)
3497 if cmd.list:
3498 with RarFile(cmd.list) as rf:
3499 rf.printdir()
3500 elif cmd.test:
3501 with RarFile(cmd.test) as rf:
3502 rf.testrar()
3503 elif cmd.extract:
3504 with RarFile(cmd.extract[0]) as rf:
3505 rf.extractall(cmd.extract[1])
3508 if __name__ == "__main__":
3509 main(sys.argv[1:])