Add RarFile.is_solid()
[rarfile.git] / rarfile.py
blob478a2be98c567611be92a709998e06ac45591928
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1, sha256
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 AES = None
70 # only needed for encrypted headers
71 try:
72 try:
73 from cryptography.hazmat.backends import default_backend
74 from cryptography.hazmat.primitives.ciphers import (
75 Cipher, algorithms, modes,
77 _have_crypto = 1
78 except ImportError:
79 from Crypto.Cipher import AES
80 _have_crypto = 2
81 except ImportError:
82 _have_crypto = 0
85 class AES_CBC_Decrypt:
86 """Decrypt API"""
87 def __init__(self, key, iv):
88 if _have_crypto == 2:
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
90 else:
91 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
92 self.decrypt = ciph.decryptor().update
95 __version__ = "4.1"
97 # export only interesting items
98 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
101 ## Module configuration. Can be tuned after importing.
104 #: executable for unrar tool
105 UNRAR_TOOL = "unrar"
107 #: executable for unar tool
108 UNAR_TOOL = "unar"
110 #: executable for bsdtar tool
111 BSDTAR_TOOL = "bsdtar"
113 #: executable for p7zip/7z tool
114 SEVENZIP_TOOL = "7z"
116 #: executable for alternative 7z tool
117 SEVENZIP2_TOOL = "7zz"
119 #: default fallback charset
120 DEFAULT_CHARSET = "windows-1252"
122 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
123 TRY_ENCODINGS = ("utf8", "utf-16le")
125 #: whether to speed up decompression by using tmp archive
126 USE_EXTRACT_HACK = 1
128 #: limit the filesize for tmp archive usage
129 HACK_SIZE_LIMIT = 20 * 1024 * 1024
131 #: set specific directory for mkstemp() used by hack dir usage
132 HACK_TMP_DIR = None
134 #: Separator for path name components. Always "/".
135 PATH_SEP = "/"
138 ## rar constants
141 # block types
142 RAR_BLOCK_MARK = 0x72 # r
143 RAR_BLOCK_MAIN = 0x73 # s
144 RAR_BLOCK_FILE = 0x74 # t
145 RAR_BLOCK_OLD_COMMENT = 0x75 # u
146 RAR_BLOCK_OLD_EXTRA = 0x76 # v
147 RAR_BLOCK_OLD_SUB = 0x77 # w
148 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
149 RAR_BLOCK_OLD_AUTH = 0x79 # y
150 RAR_BLOCK_SUB = 0x7a # z
151 RAR_BLOCK_ENDARC = 0x7b # {
153 # flags for RAR_BLOCK_MAIN
154 RAR_MAIN_VOLUME = 0x0001
155 RAR_MAIN_COMMENT = 0x0002
156 RAR_MAIN_LOCK = 0x0004
157 RAR_MAIN_SOLID = 0x0008
158 RAR_MAIN_NEWNUMBERING = 0x0010
159 RAR_MAIN_AUTH = 0x0020
160 RAR_MAIN_RECOVERY = 0x0040
161 RAR_MAIN_PASSWORD = 0x0080
162 RAR_MAIN_FIRSTVOLUME = 0x0100
163 RAR_MAIN_ENCRYPTVER = 0x0200
165 # flags for RAR_BLOCK_FILE
166 RAR_FILE_SPLIT_BEFORE = 0x0001
167 RAR_FILE_SPLIT_AFTER = 0x0002
168 RAR_FILE_PASSWORD = 0x0004
169 RAR_FILE_COMMENT = 0x0008
170 RAR_FILE_SOLID = 0x0010
171 RAR_FILE_DICTMASK = 0x00e0
172 RAR_FILE_DICT64 = 0x0000
173 RAR_FILE_DICT128 = 0x0020
174 RAR_FILE_DICT256 = 0x0040
175 RAR_FILE_DICT512 = 0x0060
176 RAR_FILE_DICT1024 = 0x0080
177 RAR_FILE_DICT2048 = 0x00a0
178 RAR_FILE_DICT4096 = 0x00c0
179 RAR_FILE_DIRECTORY = 0x00e0
180 RAR_FILE_LARGE = 0x0100
181 RAR_FILE_UNICODE = 0x0200
182 RAR_FILE_SALT = 0x0400
183 RAR_FILE_VERSION = 0x0800
184 RAR_FILE_EXTTIME = 0x1000
185 RAR_FILE_EXTFLAGS = 0x2000
187 # flags for RAR_BLOCK_ENDARC
188 RAR_ENDARC_NEXT_VOLUME = 0x0001
189 RAR_ENDARC_DATACRC = 0x0002
190 RAR_ENDARC_REVSPACE = 0x0004
191 RAR_ENDARC_VOLNR = 0x0008
193 # flags common to all blocks
194 RAR_SKIP_IF_UNKNOWN = 0x4000
195 RAR_LONG_BLOCK = 0x8000
197 # Host OS types
198 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
199 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
200 RAR_OS_WIN32 = 2 #: Windows
201 RAR_OS_UNIX = 3 #: UNIX
202 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
203 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
205 # Compression methods - "0".."5"
206 RAR_M0 = 0x30 #: No compression.
207 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
208 RAR_M2 = 0x32 #: Compression level `-m2`.
209 RAR_M3 = 0x33 #: Compression level `-m3`.
210 RAR_M4 = 0x34 #: Compression level `-m4`.
211 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
214 # RAR5 constants
217 RAR5_BLOCK_MAIN = 1
218 RAR5_BLOCK_FILE = 2
219 RAR5_BLOCK_SERVICE = 3
220 RAR5_BLOCK_ENCRYPTION = 4
221 RAR5_BLOCK_ENDARC = 5
223 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
224 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
225 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
226 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
227 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
228 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
229 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
231 RAR5_MAIN_FLAG_ISVOL = 0x01
232 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
233 RAR5_MAIN_FLAG_SOLID = 0x04
234 RAR5_MAIN_FLAG_RECOVERY = 0x08
235 RAR5_MAIN_FLAG_LOCKED = 0x10
237 RAR5_FILE_FLAG_ISDIR = 0x01
238 RAR5_FILE_FLAG_HAS_MTIME = 0x02
239 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
240 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
242 RAR5_COMPR_SOLID = 0x40
244 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
246 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
248 RAR5_XFILE_ENCRYPTION = 1
249 RAR5_XFILE_HASH = 2
250 RAR5_XFILE_TIME = 3
251 RAR5_XFILE_VERSION = 4
252 RAR5_XFILE_REDIR = 5
253 RAR5_XFILE_OWNER = 6
254 RAR5_XFILE_SERVICE = 7
256 RAR5_XTIME_UNIXTIME = 0x01
257 RAR5_XTIME_HAS_MTIME = 0x02
258 RAR5_XTIME_HAS_CTIME = 0x04
259 RAR5_XTIME_HAS_ATIME = 0x08
260 RAR5_XTIME_UNIXTIME_NS = 0x10
262 RAR5_XENC_CIPHER_AES256 = 0
264 RAR5_XENC_CHECKVAL = 0x01
265 RAR5_XENC_TWEAKED = 0x02
267 RAR5_XHASH_BLAKE2SP = 0
269 RAR5_XREDIR_UNIX_SYMLINK = 1
270 RAR5_XREDIR_WINDOWS_SYMLINK = 2
271 RAR5_XREDIR_WINDOWS_JUNCTION = 3
272 RAR5_XREDIR_HARD_LINK = 4
273 RAR5_XREDIR_FILE_COPY = 5
275 RAR5_XREDIR_ISDIR = 0x01
277 RAR5_XOWNER_UNAME = 0x01
278 RAR5_XOWNER_GNAME = 0x02
279 RAR5_XOWNER_UID = 0x04
280 RAR5_XOWNER_GID = 0x08
282 RAR5_OS_WINDOWS = 0
283 RAR5_OS_UNIX = 1
285 DOS_MODE_ARCHIVE = 0x20
286 DOS_MODE_DIR = 0x10
287 DOS_MODE_SYSTEM = 0x04
288 DOS_MODE_HIDDEN = 0x02
289 DOS_MODE_READONLY = 0x01
291 RAR5_PW_CHECK_SIZE = 8
292 RAR5_PW_SUM_SIZE = 4
295 ## internal constants
298 RAR_ID = b"Rar!\x1a\x07\x00"
299 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
301 WIN32 = sys.platform == "win32"
302 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
304 SFX_MAX_SIZE = 2 * 1024 * 1024
305 RAR_V3 = 3
306 RAR_V5 = 5
308 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
309 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
310 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
312 FORCE_TOOL = False
315 def _find_sfx_header(xfile):
316 sig = RAR_ID[:-1]
317 buf = io.BytesIO()
318 steps = (64, SFX_MAX_SIZE)
320 with XFile(xfile) as fd:
321 for step in steps:
322 data = fd.read(step)
323 if not data:
324 break
325 buf.write(data)
326 curdata = buf.getvalue()
327 findpos = 0
328 while True:
329 pos = curdata.find(sig, findpos)
330 if pos < 0:
331 break
332 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
333 return RAR_V3, pos
334 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
335 return RAR_V5, pos
336 findpos = pos + len(sig)
337 return 0, 0
341 ## Public interface
345 def get_rar_version(xfile):
346 """Check quickly whether file is rar archive.
348 with XFile(xfile) as fd:
349 buf = fd.read(len(RAR5_ID))
350 if buf.startswith(RAR_ID):
351 return RAR_V3
352 elif buf.startswith(RAR5_ID):
353 return RAR_V5
354 return 0
357 def is_rarfile(xfile):
358 """Check quickly whether file is rar archive.
360 try:
361 return get_rar_version(xfile) > 0
362 except OSError:
363 # File not found or not accessible, ignore
364 return False
367 def is_rarfile_sfx(xfile):
368 """Check whether file is rar archive with support for SFX.
370 It will read 2M from file.
372 return _find_sfx_header(xfile)[0] > 0
375 class Error(Exception):
376 """Base class for rarfile errors."""
379 class BadRarFile(Error):
380 """Incorrect data in archive."""
383 class NotRarFile(Error):
384 """The file is not RAR archive."""
387 class BadRarName(Error):
388 """Cannot guess multipart name components."""
391 class NoRarEntry(Error):
392 """File not found in RAR"""
395 class PasswordRequired(Error):
396 """File requires password"""
399 class NeedFirstVolume(Error):
400 """Need to start from first volume.
402 Attributes:
404 current_volume
405 Volume number of current file or None if not known
407 def __init__(self, msg, volume):
408 super().__init__(msg)
409 self.current_volume = volume
412 class NoCrypto(Error):
413 """Cannot parse encrypted headers - no crypto available."""
416 class RarExecError(Error):
417 """Problem reported by unrar/rar."""
420 class RarWarning(RarExecError):
421 """Non-fatal error"""
424 class RarFatalError(RarExecError):
425 """Fatal error"""
428 class RarCRCError(RarExecError):
429 """CRC error during unpacking"""
432 class RarLockedArchiveError(RarExecError):
433 """Must not modify locked archive"""
436 class RarWriteError(RarExecError):
437 """Write error"""
440 class RarOpenError(RarExecError):
441 """Open error"""
444 class RarUserError(RarExecError):
445 """User error"""
448 class RarMemoryError(RarExecError):
449 """Memory error"""
452 class RarCreateError(RarExecError):
453 """Create error"""
456 class RarNoFilesError(RarExecError):
457 """No files that match pattern were found"""
460 class RarUserBreak(RarExecError):
461 """User stop"""
464 class RarWrongPassword(RarExecError):
465 """Incorrect password"""
468 class RarUnknownError(RarExecError):
469 """Unknown exit code"""
472 class RarSignalExit(RarExecError):
473 """Unrar exited with signal"""
476 class RarCannotExec(RarExecError):
477 """Executable not found."""
480 class UnsupportedWarning(UserWarning):
481 """Archive uses feature that are unsupported by rarfile.
483 .. versionadded:: 4.0
487 class RarInfo:
488 r"""An entry in rar archive.
490 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
491 with UTC timezone in RAR5 archives.
493 Attributes:
495 filename
496 File name with relative path.
497 Path separator is "/". Always unicode string.
499 date_time
500 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
501 RAR5 allows archives where it is missing, it's None then.
503 comment
504 Optional file comment field. Unicode string. (RAR3-only)
506 file_size
507 Uncompressed size.
509 compress_size
510 Compressed size.
512 compress_type
513 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
515 extract_version
516 Minimal Rar version needed for decompressing. As (major*10 + minor),
517 so 2.9 is 29.
519 RAR3: 10, 20, 29
521 RAR5 does not have such field in archive, it's simply set to 50.
523 host_os
524 Host OS type, one of RAR_OS_* constants.
526 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
527 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
529 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
531 mode
532 File attributes. May be either dos-style or unix-style, depending on host_os.
534 mtime
535 File modification time. Same value as :attr:`date_time`
536 but as :class:`~datetime.datetime` object with extended precision.
538 ctime
539 Optional time field: creation time. As :class:`~datetime.datetime` object.
541 atime
542 Optional time field: last access time. As :class:`~datetime.datetime` object.
544 arctime
545 Optional time field: archival time. As :class:`~datetime.datetime` object.
546 (RAR3-only)
549 CRC-32 of uncompressed file, unsigned int.
551 RAR5: may be None.
553 blake2sp_hash
554 Blake2SP hash over decompressed data. (RAR5-only)
556 volume
557 Volume nr, starting from 0.
559 volume_file
560 Volume file name, where file starts.
562 file_redir
563 If not None, file is link of some sort. Contains tuple of (type, flags, target).
564 (RAR5-only)
566 Type is one of constants:
568 :data:`RAR5_XREDIR_UNIX_SYMLINK`
569 Unix symlink.
570 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
571 Windows symlink.
572 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
573 Windows junction.
574 :data:`RAR5_XREDIR_HARD_LINK`
575 Hard link to target.
576 :data:`RAR5_XREDIR_FILE_COPY`
577 Current file is copy of another archive entry.
579 Flags may contain bits:
581 :data:`RAR5_XREDIR_ISDIR`
582 Symlink points to directory.
585 # zipfile-compatible fields
586 filename = None
587 file_size = None
588 compress_size = None
589 date_time = None
590 CRC = None
591 volume = None
592 orig_filename = None
594 # optional extended time fields, datetime() objects.
595 mtime = None
596 ctime = None
597 atime = None
599 extract_version = None
600 mode = None
601 host_os = None
602 compress_type = None
604 # rar3-only fields
605 comment = None
606 arctime = None
608 # rar5-only fields
609 blake2sp_hash = None
610 file_redir = None
612 # internal fields
613 flags = 0
614 type = None
616 # zipfile compat
617 def is_dir(self):
618 """Returns True if entry is a directory.
620 .. versionadded:: 4.0
622 return False
624 def is_symlink(self):
625 """Returns True if entry is a symlink.
627 .. versionadded:: 4.0
629 return False
631 def is_file(self):
632 """Returns True if entry is a normal file.
634 .. versionadded:: 4.0
636 return False
638 def needs_password(self):
639 """Returns True if data is stored password-protected.
641 if self.type == RAR_BLOCK_FILE:
642 return (self.flags & RAR_FILE_PASSWORD) > 0
643 return False
645 def isdir(self):
646 """Returns True if entry is a directory.
648 .. deprecated:: 4.0
650 return self.is_dir()
653 class RarFile:
654 """Parse RAR structure, provide access to files in archive.
656 Parameters:
658 file
659 archive file name or file-like object.
660 mode
661 only "r" is supported.
662 charset
663 fallback charset to use, if filenames are not already Unicode-enabled.
664 info_callback
665 debug callback, gets to see all archive entries.
666 crc_check
667 set to False to disable CRC checks
668 errors
669 Either "stop" to quietly stop parsing on errors,
670 or "strict" to raise errors. Default is "stop".
671 part_only
672 If True, read only single file and allow it to be middle-part
673 of multi-volume archive.
675 .. versionadded:: 4.0
678 #: File name, if available. Unicode string or None.
679 filename = None
681 #: Archive comment. Unicode string or None.
682 comment = None
684 def __init__(self, file, mode="r", charset=None, info_callback=None,
685 crc_check=True, errors="stop", part_only=False):
686 if is_filelike(file):
687 self.filename = getattr(file, "name", None)
688 else:
689 if isinstance(file, Path):
690 file = str(file)
691 self.filename = file
692 self._rarfile = file
694 self._charset = charset or DEFAULT_CHARSET
695 self._info_callback = info_callback
696 self._crc_check = crc_check
697 self._part_only = part_only
698 self._password = None
699 self._file_parser = None
701 if errors == "stop":
702 self._strict = False
703 elif errors == "strict":
704 self._strict = True
705 else:
706 raise ValueError("Invalid value for errors= parameter.")
708 if mode != "r":
709 raise NotImplementedError("RarFile supports only mode=r")
711 self._parse()
713 def __enter__(self):
714 """Open context."""
715 return self
717 def __exit__(self, typ, value, traceback):
718 """Exit context."""
719 self.close()
721 def __iter__(self):
722 """Iterate over members."""
723 return iter(self.infolist())
725 def setpassword(self, pwd):
726 """Sets the password to use when extracting.
728 self._password = pwd
729 if self._file_parser:
730 if self._file_parser.has_header_encryption():
731 self._file_parser = None
732 if not self._file_parser:
733 self._parse()
734 else:
735 self._file_parser.setpassword(self._password)
737 def needs_password(self):
738 """Returns True if any archive entries require password for extraction.
740 return self._file_parser.needs_password()
742 def is_solid(self):
743 """Returns True if archive uses solid compression.
745 return self._file_parser.is_solid()
747 def namelist(self):
748 """Return list of filenames in archive.
750 return [f.filename for f in self.infolist()]
752 def infolist(self):
753 """Return RarInfo objects for all files/directories in archive.
755 return self._file_parser.infolist()
757 def volumelist(self):
758 """Returns filenames of archive volumes.
760 In case of single-volume archive, the list contains
761 just the name of main archive file.
763 return self._file_parser.volumelist()
765 def getinfo(self, name):
766 """Return RarInfo for file.
768 return self._file_parser.getinfo(name)
770 def getinfo_orig(self, name):
771 """Return RarInfo for file source.
773 RAR5: if name is hard-linked or copied file,
774 returns original entry with original filename.
776 .. versionadded:: 4.1
778 return self._file_parser.getinfo_orig(name)
780 def open(self, name, mode="r", pwd=None):
781 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
783 The object implements :class:`io.RawIOBase` interface, so it can
784 be further wrapped with :class:`io.BufferedReader`
785 and :class:`io.TextIOWrapper`.
787 On older Python where io module is not available, it implements
788 only .read(), .seek(), .tell() and .close() methods.
790 The object is seekable, although the seeking is fast only on
791 uncompressed files, on compressed files the seeking is implemented
792 by reading ahead and/or restarting the decompression.
794 Parameters:
796 name
797 file name or RarInfo instance.
798 mode
799 must be "r"
801 password to use for extracting.
804 if mode != "r":
805 raise NotImplementedError("RarFile.open() supports only mode=r")
807 # entry lookup
808 inf = self.getinfo(name)
809 if inf.is_dir():
810 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
812 # check password
813 if inf.needs_password():
814 pwd = pwd or self._password
815 if pwd is None:
816 raise PasswordRequired("File %s requires password" % inf.filename)
817 else:
818 pwd = None
820 return self._file_parser.open(inf, pwd)
822 def read(self, name, pwd=None):
823 """Return uncompressed data for archive entry.
825 For longer files using :meth:`~RarFile.open` may be better idea.
827 Parameters:
829 name
830 filename or RarInfo instance
832 password to use for extracting.
835 with self.open(name, "r", pwd) as f:
836 return f.read()
838 def close(self):
839 """Release open resources."""
840 pass
842 def printdir(self, file=None):
843 """Print archive file list to stdout or given file.
845 if file is None:
846 file = sys.stdout
847 for f in self.infolist():
848 print(f.filename, file=file)
850 def extract(self, member, path=None, pwd=None):
851 """Extract single file into current directory.
853 Parameters:
855 member
856 filename or :class:`RarInfo` instance
857 path
858 optional destination path
860 optional password to use
862 inf = self.getinfo(member)
863 return self._extract_one(inf, path, pwd, True)
865 def extractall(self, path=None, members=None, pwd=None):
866 """Extract all files into current directory.
868 Parameters:
870 path
871 optional destination path
872 members
873 optional filename or :class:`RarInfo` instance list to extract
875 optional password to use
877 if members is None:
878 members = self.namelist()
880 done = set()
881 dirs = []
882 for m in members:
883 inf = self.getinfo(m)
884 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
885 if inf.is_dir():
886 if dst not in done:
887 dirs.append((dst, inf))
888 done.add(dst)
889 if dirs:
890 dirs.sort(reverse=True)
891 for dst, inf in dirs:
892 self._set_attrs(inf, dst)
894 def testrar(self, pwd=None):
895 """Read all files and test CRC.
897 for member in self.infolist():
898 if member.is_file():
899 with self.open(member, 'r', pwd) as f:
900 empty_read(f, member.file_size, BSIZE)
902 def strerror(self):
903 """Return error string if parsing failed or None if no problems.
905 if not self._file_parser:
906 return "Not a RAR file"
907 return self._file_parser.strerror()
910 ## private methods
913 def _parse(self):
914 """Run parser for file type
916 ver, sfx_ofs = _find_sfx_header(self._rarfile)
917 if ver == RAR_V3:
918 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
919 self._charset, self._strict, self._info_callback,
920 sfx_ofs, self._part_only)
921 self._file_parser = p3 # noqa
922 elif ver == RAR_V5:
923 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
924 self._charset, self._strict, self._info_callback,
925 sfx_ofs, self._part_only)
926 self._file_parser = p5 # noqa
927 else:
928 raise NotRarFile("Not a RAR file")
930 self._file_parser.parse()
931 self.comment = self._file_parser.comment
933 def _extract_one(self, info, path, pwd, set_attrs):
934 fname = sanitize_filename(
935 info.filename, os.path.sep, WIN32
938 if path is None:
939 path = os.getcwd()
940 else:
941 path = os.fspath(path)
942 dstfn = os.path.join(path, fname)
944 dirname = os.path.dirname(dstfn)
945 if dirname and dirname != ".":
946 os.makedirs(dirname, exist_ok=True)
948 if info.is_file():
949 return self._make_file(info, dstfn, pwd, set_attrs)
950 if info.is_dir():
951 return self._make_dir(info, dstfn, pwd, set_attrs)
952 if info.is_symlink():
953 return self._make_symlink(info, dstfn, pwd, set_attrs)
954 return None
956 def _create_helper(self, name, flags, info):
957 return os.open(name, flags)
959 def _make_file(self, info, dstfn, pwd, set_attrs):
960 def helper(name, flags):
961 return self._create_helper(name, flags, info)
962 with self.open(info, "r", pwd) as src:
963 with open(dstfn, "wb", opener=helper) as dst:
964 shutil.copyfileobj(src, dst)
965 if set_attrs:
966 self._set_attrs(info, dstfn)
967 return dstfn
969 def _make_dir(self, info, dstfn, pwd, set_attrs):
970 os.makedirs(dstfn, exist_ok=True)
971 if set_attrs:
972 self._set_attrs(info, dstfn)
973 return dstfn
975 def _make_symlink(self, info, dstfn, pwd, set_attrs):
976 target_is_directory = False
977 if info.host_os == RAR_OS_UNIX:
978 link_name = self.read(info, pwd)
979 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
980 elif info.file_redir:
981 redir_type, redir_flags, link_name = info.file_redir
982 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
983 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
984 return None
985 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
986 else:
987 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
988 return None
990 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
991 return dstfn
993 def _set_attrs(self, info, dstfn):
994 if info.host_os == RAR_OS_UNIX:
995 os.chmod(dstfn, info.mode & 0o777)
996 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
997 # only keep R/O attr, except for dirs on win32
998 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
999 st = os.stat(dstfn)
1000 new_mode = st.st_mode & ~0o222
1001 os.chmod(dstfn, new_mode)
1003 if info.mtime:
1004 mtime_ns = to_nsecs(info.mtime)
1005 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
1006 os.utime(dstfn, ns=(atime_ns, mtime_ns))
1010 # File format parsing
1013 class CommonParser:
1014 """Shared parser parts."""
1015 _main = None
1016 _hdrenc_main = None
1017 _needs_password = False
1018 _fd = None
1019 _expect_sig = None
1020 _parse_error = None
1021 _password = None
1022 comment = None
1024 def __init__(self, rarfile, password, crc_check, charset, strict,
1025 info_cb, sfx_offset, part_only):
1026 self._rarfile = rarfile
1027 self._password = password
1028 self._crc_check = crc_check
1029 self._charset = charset
1030 self._strict = strict
1031 self._info_callback = info_cb
1032 self._info_list = []
1033 self._info_map = {}
1034 self._vol_list = []
1035 self._sfx_offset = sfx_offset
1036 self._part_only = part_only
1038 def is_solid(self):
1039 """Returns True if archive uses solid compression.
1041 if self._main:
1042 if self._main.flags & RAR_MAIN_SOLID:
1043 return True
1044 return False
1046 def has_header_encryption(self):
1047 """Returns True if headers are encrypted
1049 if self._hdrenc_main:
1050 return True
1051 if self._main:
1052 if self._main.flags & RAR_MAIN_PASSWORD:
1053 return True
1054 return False
1056 def setpassword(self, pwd):
1057 """Set cached password."""
1058 self._password = pwd
1060 def volumelist(self):
1061 """Volume files"""
1062 return self._vol_list
1064 def needs_password(self):
1065 """Is password required"""
1066 return self._needs_password
1068 def strerror(self):
1069 """Last error"""
1070 return self._parse_error
1072 def infolist(self):
1073 """List of RarInfo records.
1075 return self._info_list
1077 def getinfo(self, member):
1078 """Return RarInfo for filename
1080 if isinstance(member, RarInfo):
1081 fname = member.filename
1082 elif isinstance(member, Path):
1083 fname = str(member)
1084 else:
1085 fname = member
1087 if fname.endswith("/"):
1088 fname = fname.rstrip("/")
1090 try:
1091 return self._info_map[fname]
1092 except KeyError:
1093 raise NoRarEntry("No such file: %s" % fname) from None
1095 def getinfo_orig(self, member):
1096 inf = self.getinfo(member)
1097 if inf.file_redir:
1098 redir_type, redir_flags, redir_name = inf.file_redir
1099 # cannot leave to unrar as it expects copied file to exist
1100 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1101 inf = self.getinfo(redir_name)
1102 return inf
1104 def parse(self):
1105 """Process file."""
1106 self._fd = None
1107 try:
1108 self._parse_real()
1109 finally:
1110 if self._fd:
1111 self._fd.close()
1112 self._fd = None
1114 def _parse_real(self):
1115 """Actually read file.
1117 fd = XFile(self._rarfile)
1118 self._fd = fd
1119 fd.seek(self._sfx_offset, 0)
1120 sig = fd.read(len(self._expect_sig))
1121 if sig != self._expect_sig:
1122 raise NotRarFile("Not a Rar archive")
1124 volume = 0 # first vol (.rar) is 0
1125 more_vols = False
1126 endarc = False
1127 volfile = self._rarfile
1128 self._vol_list = [self._rarfile]
1129 raise_need_first_vol = False
1130 while True:
1131 if endarc:
1132 h = None # don"t read past ENDARC
1133 else:
1134 h = self._parse_header(fd)
1135 if not h:
1136 if raise_need_first_vol:
1137 # did not find ENDARC with VOLNR
1138 raise NeedFirstVolume("Need to start from first volume", None)
1139 if more_vols and not self._part_only:
1140 volume += 1
1141 fd.close()
1142 try:
1143 volfile = self._next_volname(volfile)
1144 fd = XFile(volfile)
1145 except IOError:
1146 self._set_error("Cannot open next volume: %s", volfile)
1147 break
1148 self._fd = fd
1149 sig = fd.read(len(self._expect_sig))
1150 if sig != self._expect_sig:
1151 self._set_error("Invalid volume sig: %s", volfile)
1152 break
1153 more_vols = False
1154 endarc = False
1155 self._vol_list.append(volfile)
1156 self._main = None
1157 self._hdrenc_main = None
1158 continue
1159 break
1160 h.volume = volume
1161 h.volume_file = volfile
1163 if h.type == RAR_BLOCK_MAIN and not self._main:
1164 self._main = h
1165 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
1166 # RAR 2.x does not set FIRSTVOLUME,
1167 # so check it only if NEWNUMBERING is used
1168 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1169 if getattr(h, "main_volume_number", None) is not None:
1170 # rar5 may have more info
1171 raise NeedFirstVolume(
1172 "Need to start from first volume (current: %r)"
1173 % (h.main_volume_number,),
1174 h.main_volume_number
1176 # delay raise until we have volnr from ENDARC
1177 raise_need_first_vol = True
1178 if h.flags & RAR_MAIN_PASSWORD:
1179 self._needs_password = True
1180 if not self._password:
1181 break
1182 elif h.type == RAR_BLOCK_ENDARC:
1183 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1184 endarc = True
1185 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1186 raise NeedFirstVolume(
1187 "Need to start from first volume (current: %r)"
1188 % (h.endarc_volnr,),
1189 h.endarc_volnr
1191 elif h.type == RAR_BLOCK_FILE:
1192 # RAR 2.x does not write RAR_BLOCK_ENDARC
1193 if h.flags & RAR_FILE_SPLIT_AFTER:
1194 more_vols = True
1195 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1196 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1197 if not self._part_only:
1198 raise_need_first_vol = True
1200 if h.needs_password():
1201 self._needs_password = True
1203 # store it
1204 self.process_entry(fd, h)
1206 if self._info_callback:
1207 self._info_callback(h)
1209 # go to next header
1210 if h.add_size > 0:
1211 fd.seek(h.data_offset + h.add_size, 0)
1213 def process_entry(self, fd, item):
1214 """Examine item, add into lookup cache."""
1215 raise NotImplementedError()
1217 def _decrypt_header(self, fd):
1218 raise NotImplementedError("_decrypt_header")
1220 def _parse_block_header(self, fd):
1221 raise NotImplementedError("_parse_block_header")
1223 def _open_hack(self, inf, pwd):
1224 raise NotImplementedError("_open_hack")
1226 def _parse_header(self, fd):
1227 """Read single header
1229 try:
1230 # handle encrypted headers
1231 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1232 if not self._password:
1233 return None
1234 fd = self._decrypt_header(fd)
1236 # now read actual header
1237 return self._parse_block_header(fd)
1238 except struct.error:
1239 self._set_error("Broken header in RAR file")
1240 return None
1242 def _next_volname(self, volfile):
1243 """Given current vol name, construct next one
1245 if is_filelike(volfile):
1246 raise IOError("Working on single FD")
1247 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1248 return _next_newvol(volfile)
1249 return _next_oldvol(volfile)
1251 def _set_error(self, msg, *args):
1252 if args:
1253 msg = msg % args
1254 self._parse_error = msg
1255 if self._strict:
1256 raise BadRarFile(msg)
1258 def open(self, inf, pwd):
1259 """Return stream object for file data."""
1261 if inf.file_redir:
1262 redir_type, redir_flags, redir_name = inf.file_redir
1263 # cannot leave to unrar as it expects copied file to exist
1264 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1265 inf = self.getinfo(redir_name)
1266 if not inf:
1267 raise BadRarFile("cannot find copied file")
1268 elif redir_type in (
1269 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1270 RAR5_XREDIR_WINDOWS_JUNCTION,
1272 return io.BytesIO(redir_name.encode("utf8"))
1273 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1274 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1276 # is temp write usable?
1277 use_hack = 1
1278 if not self._main:
1279 use_hack = 0
1280 elif self._main._must_disable_hack():
1281 use_hack = 0
1282 elif inf._must_disable_hack():
1283 use_hack = 0
1284 elif is_filelike(self._rarfile):
1285 pass
1286 elif inf.file_size > HACK_SIZE_LIMIT:
1287 use_hack = 0
1288 elif not USE_EXTRACT_HACK:
1289 use_hack = 0
1291 # now extract
1292 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1293 return self._open_clear(inf)
1294 elif use_hack:
1295 return self._open_hack(inf, pwd)
1296 elif is_filelike(self._rarfile):
1297 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1298 else:
1299 return self._open_unrar(self._rarfile, inf, pwd)
1301 def _open_clear(self, inf):
1302 if FORCE_TOOL:
1303 return self._open_unrar(self._rarfile, inf)
1304 return DirectReader(self, inf)
1306 def _open_hack_core(self, inf, pwd, prefix, suffix):
1308 size = inf.compress_size + inf.header_size
1309 rf = XFile(inf.volume_file, 0)
1310 rf.seek(inf.header_offset)
1312 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1313 tmpf = os.fdopen(tmpfd, "wb")
1315 try:
1316 tmpf.write(prefix)
1317 while size > 0:
1318 if size > BSIZE:
1319 buf = rf.read(BSIZE)
1320 else:
1321 buf = rf.read(size)
1322 if not buf:
1323 raise BadRarFile("read failed: " + inf.filename)
1324 tmpf.write(buf)
1325 size -= len(buf)
1326 tmpf.write(suffix)
1327 tmpf.close()
1328 rf.close()
1329 except BaseException:
1330 rf.close()
1331 tmpf.close()
1332 os.unlink(tmpname)
1333 raise
1335 return self._open_unrar(tmpname, inf, pwd, tmpname)
1337 def _open_unrar_membuf(self, memfile, inf, pwd):
1338 """Write in-memory archive to temp file, needed for solid archives.
1340 tmpname = membuf_tempfile(memfile)
1341 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1343 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1344 """Extract using unrar
1346 setup = tool_setup()
1348 # not giving filename avoids encoding related problems
1349 fn = None
1350 if not tmpfile or force_file:
1351 fn = inf.filename.replace("/", os.path.sep)
1353 # read from unrar pipe
1354 cmd = setup.open_cmdline(pwd, rarfile, fn)
1355 return PipeReader(self, inf, cmd, tmpfile)
1359 # RAR3 format
1362 class Rar3Info(RarInfo):
1363 """RAR3 specific fields."""
1364 extract_version = 15
1365 salt = None
1366 add_size = 0
1367 header_crc = None
1368 header_size = None
1369 header_offset = None
1370 data_offset = None
1371 _md_class = None
1372 _md_expect = None
1373 _name_size = None
1375 # make sure some rar5 fields are always present
1376 file_redir = None
1377 blake2sp_hash = None
1379 endarc_datacrc = None
1380 endarc_volnr = None
1382 def _must_disable_hack(self):
1383 if self.type == RAR_BLOCK_FILE:
1384 if self.flags & RAR_FILE_PASSWORD:
1385 return True
1386 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1387 return True
1388 elif self.type == RAR_BLOCK_MAIN:
1389 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1390 return True
1391 return False
1393 def is_dir(self):
1394 """Returns True if entry is a directory."""
1395 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1396 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1397 return False
1399 def is_symlink(self):
1400 """Returns True if entry is a symlink."""
1401 return (
1402 self.type == RAR_BLOCK_FILE and
1403 self.host_os == RAR_OS_UNIX and
1404 self.mode & 0xF000 == 0xA000
1407 def is_file(self):
1408 """Returns True if entry is a normal file."""
1409 return (
1410 self.type == RAR_BLOCK_FILE and
1411 not (self.is_dir() or self.is_symlink())
1415 class RAR3Parser(CommonParser):
1416 """Parse RAR3 file format.
1418 _expect_sig = RAR_ID
1419 _last_aes_key = (None, None, None) # (salt, key, iv)
1421 def _decrypt_header(self, fd):
1422 if not _have_crypto:
1423 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1424 salt = fd.read(8)
1425 if self._last_aes_key[0] == salt:
1426 key, iv = self._last_aes_key[1:]
1427 else:
1428 key, iv = rar3_s2k(self._password, salt)
1429 self._last_aes_key = (salt, key, iv)
1430 return HeaderDecrypt(fd, key, iv)
1432 def _parse_block_header(self, fd):
1433 """Parse common block header
1435 h = Rar3Info()
1436 h.header_offset = fd.tell()
1438 # read and parse base header
1439 buf = fd.read(S_BLK_HDR.size)
1440 if not buf:
1441 return None
1442 if len(buf) < S_BLK_HDR.size:
1443 self._set_error("Unexpected EOF when reading header")
1444 return None
1445 t = S_BLK_HDR.unpack_from(buf)
1446 h.header_crc, h.type, h.flags, h.header_size = t
1448 # read full header
1449 if h.header_size > S_BLK_HDR.size:
1450 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1451 else:
1452 hdata = buf
1453 h.data_offset = fd.tell()
1455 # unexpected EOF?
1456 if len(hdata) != h.header_size:
1457 self._set_error("Unexpected EOF when reading header")
1458 return None
1460 pos = S_BLK_HDR.size
1462 # block has data assiciated with it?
1463 if h.flags & RAR_LONG_BLOCK:
1464 h.add_size, pos = load_le32(hdata, pos)
1465 else:
1466 h.add_size = 0
1468 # parse interesting ones, decide header boundaries for crc
1469 if h.type == RAR_BLOCK_MARK:
1470 return h
1471 elif h.type == RAR_BLOCK_MAIN:
1472 pos += 6
1473 if h.flags & RAR_MAIN_ENCRYPTVER:
1474 pos += 1
1475 crc_pos = pos
1476 if h.flags & RAR_MAIN_COMMENT:
1477 self._parse_subblocks(h, hdata, pos)
1478 elif h.type == RAR_BLOCK_FILE:
1479 pos = self._parse_file_header(h, hdata, pos - 4)
1480 crc_pos = pos
1481 if h.flags & RAR_FILE_COMMENT:
1482 pos = self._parse_subblocks(h, hdata, pos)
1483 elif h.type == RAR_BLOCK_SUB:
1484 pos = self._parse_file_header(h, hdata, pos - 4)
1485 crc_pos = h.header_size
1486 elif h.type == RAR_BLOCK_OLD_AUTH:
1487 pos += 8
1488 crc_pos = pos
1489 elif h.type == RAR_BLOCK_OLD_EXTRA:
1490 pos += 7
1491 crc_pos = pos
1492 elif h.type == RAR_BLOCK_ENDARC:
1493 if h.flags & RAR_ENDARC_DATACRC:
1494 h.endarc_datacrc, pos = load_le32(hdata, pos)
1495 if h.flags & RAR_ENDARC_VOLNR:
1496 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1497 pos += 2
1498 crc_pos = h.header_size
1499 else:
1500 crc_pos = h.header_size
1502 # check crc
1503 if h.type == RAR_BLOCK_OLD_SUB:
1504 crcdat = hdata[2:] + fd.read(h.add_size)
1505 else:
1506 crcdat = hdata[2:crc_pos]
1508 calc_crc = crc32(crcdat) & 0xFFFF
1510 # return good header
1511 if h.header_crc == calc_crc:
1512 return h
1514 # header parsing failed.
1515 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1516 h.type, h.header_crc, calc_crc, len(crcdat))
1518 # instead panicing, send eof
1519 return None
1521 def _parse_file_header(self, h, hdata, pos):
1522 """Read file-specific header
1524 fld = S_FILE_HDR.unpack_from(hdata, pos)
1525 pos += S_FILE_HDR.size
1527 h.compress_size = fld[0]
1528 h.file_size = fld[1]
1529 h.host_os = fld[2]
1530 h.CRC = fld[3]
1531 h.date_time = parse_dos_time(fld[4])
1532 h.mtime = to_datetime(h.date_time)
1533 h.extract_version = fld[5]
1534 h.compress_type = fld[6]
1535 h._name_size = name_size = fld[7]
1536 h.mode = fld[8]
1538 h._md_class = CRC32Context
1539 h._md_expect = h.CRC
1541 if h.flags & RAR_FILE_LARGE:
1542 h1, pos = load_le32(hdata, pos)
1543 h2, pos = load_le32(hdata, pos)
1544 h.compress_size |= h1 << 32
1545 h.file_size |= h2 << 32
1546 h.add_size = h.compress_size
1548 name, pos = load_bytes(hdata, name_size, pos)
1549 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1550 # stored in custom encoding
1551 nul = name.find(b"\0")
1552 h.orig_filename = name[:nul]
1553 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1554 h.filename = u.decode()
1556 # if parsing failed fall back to simple name
1557 if u.failed:
1558 h.filename = self._decode(h.orig_filename)
1559 elif h.flags & RAR_FILE_UNICODE:
1560 # stored in UTF8
1561 h.orig_filename = name
1562 h.filename = name.decode("utf8", "replace")
1563 else:
1564 # stored in random encoding
1565 h.orig_filename = name
1566 h.filename = self._decode(name)
1568 # change separator, set dir suffix
1569 h.filename = h.filename.replace("\\", "/").rstrip("/")
1570 if h.is_dir():
1571 h.filename = h.filename + "/"
1573 if h.flags & RAR_FILE_SALT:
1574 h.salt, pos = load_bytes(hdata, 8, pos)
1575 else:
1576 h.salt = None
1578 # optional extended time stamps
1579 if h.flags & RAR_FILE_EXTTIME:
1580 pos = _parse_ext_time(h, hdata, pos)
1581 else:
1582 h.mtime = h.atime = h.ctime = h.arctime = None
1584 return pos
1586 def _parse_subblocks(self, h, hdata, pos):
1587 """Find old-style comment subblock
1589 while pos < len(hdata):
1590 # ordinary block header
1591 t = S_BLK_HDR.unpack_from(hdata, pos)
1592 ___scrc, stype, sflags, slen = t
1593 pos_next = pos + slen
1594 pos += S_BLK_HDR.size
1596 # corrupt header
1597 if pos_next < pos:
1598 break
1600 # followed by block-specific header
1601 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1602 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1603 pos += S_COMMENT_HDR.size
1604 data = hdata[pos: pos_next]
1605 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1606 crc, self._password)
1607 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1608 h.comment = self._decode_comment(cmt)
1610 pos = pos_next
1611 return pos
1613 def _read_comment_v3(self, inf, pwd=None):
1615 # read data
1616 with XFile(inf.volume_file) as rf:
1617 rf.seek(inf.data_offset)
1618 data = rf.read(inf.compress_size)
1620 # decompress
1621 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1622 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1624 # check crc
1625 if self._crc_check:
1626 crc = crc32(cmt)
1627 if crc != inf.CRC:
1628 return None
1630 return self._decode_comment(cmt)
1632 def _decode(self, val):
1633 for c in TRY_ENCODINGS:
1634 try:
1635 return val.decode(c)
1636 except UnicodeError:
1637 pass
1638 return val.decode(self._charset, "replace")
1640 def _decode_comment(self, val):
1641 return self._decode(val)
1643 def process_entry(self, fd, item):
1644 if item.type == RAR_BLOCK_FILE:
1645 # use only first part
1646 if item.flags & RAR_FILE_VERSION:
1647 pass # skip old versions
1648 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1649 self._info_map[item.filename.rstrip("/")] = item
1650 self._info_list.append(item)
1651 elif len(self._info_list) > 0:
1652 # final crc is in last block
1653 old = self._info_list[-1]
1654 old.CRC = item.CRC
1655 old._md_expect = item._md_expect
1656 old.compress_size += item.compress_size
1658 # parse new-style comment
1659 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1660 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1661 pass
1662 elif item.flags & RAR_FILE_SOLID:
1663 # file comment
1664 cmt = self._read_comment_v3(item, self._password)
1665 if len(self._info_list) > 0:
1666 old = self._info_list[-1]
1667 old.comment = cmt
1668 else:
1669 # archive comment
1670 cmt = self._read_comment_v3(item, self._password)
1671 self.comment = cmt
1673 if item.type == RAR_BLOCK_MAIN:
1674 if item.flags & RAR_MAIN_COMMENT:
1675 self.comment = item.comment
1676 if item.flags & RAR_MAIN_PASSWORD:
1677 self._needs_password = True
1679 # put file compressed data into temporary .rar archive, and run
1680 # unrar on that, thus avoiding unrar going over whole archive
1681 def _open_hack(self, inf, pwd):
1682 # create main header: crc, type, flags, size, res1, res2
1683 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1684 return self._open_hack_core(inf, pwd, prefix, b"")
1688 # RAR5 format
1691 class Rar5Info(RarInfo):
1692 """Shared fields for RAR5 records.
1694 extract_version = 50
1695 header_crc = None
1696 header_size = None
1697 header_offset = None
1698 data_offset = None
1700 # type=all
1701 block_type = None
1702 block_flags = None
1703 add_size = 0
1704 block_extra_size = 0
1706 # type=MAIN
1707 volume_number = None
1708 _md_class = None
1709 _md_expect = None
1711 def _must_disable_hack(self):
1712 return False
1715 class Rar5BaseFile(Rar5Info):
1716 """Shared sturct for file & service record.
1718 type = -1
1719 file_flags = None
1720 file_encryption = (0, 0, 0, b"", b"", b"")
1721 file_compress_flags = None
1722 file_redir = None
1723 file_owner = None
1724 file_version = None
1725 blake2sp_hash = None
1727 def _must_disable_hack(self):
1728 if self.flags & RAR_FILE_PASSWORD:
1729 return True
1730 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1731 return True
1732 if self.file_compress_flags & RAR5_COMPR_SOLID:
1733 return True
1734 if self.file_redir:
1735 return True
1736 return False
1739 class Rar5FileInfo(Rar5BaseFile):
1740 """RAR5 file record.
1742 type = RAR_BLOCK_FILE
1744 def is_symlink(self):
1745 """Returns True if entry is a symlink."""
1746 # pylint: disable=unsubscriptable-object
1747 return (
1748 self.file_redir is not None and
1749 self.file_redir[0] in (
1750 RAR5_XREDIR_UNIX_SYMLINK,
1751 RAR5_XREDIR_WINDOWS_SYMLINK,
1752 RAR5_XREDIR_WINDOWS_JUNCTION,
1756 def is_file(self):
1757 """Returns True if entry is a normal file."""
1758 return not (self.is_dir() or self.is_symlink())
1760 def is_dir(self):
1761 """Returns True if entry is a directory."""
1762 if not self.file_redir:
1763 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1764 return True
1765 return False
1768 class Rar5ServiceInfo(Rar5BaseFile):
1769 """RAR5 service record.
1771 type = RAR_BLOCK_SUB
1774 class Rar5MainInfo(Rar5Info):
1775 """RAR5 archive main record.
1777 type = RAR_BLOCK_MAIN
1778 main_flags = None
1779 main_volume_number = None
1781 def _must_disable_hack(self):
1782 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1783 return True
1784 return False
1787 class Rar5EncryptionInfo(Rar5Info):
1788 """RAR5 archive header encryption record.
1790 type = RAR5_BLOCK_ENCRYPTION
1791 encryption_algo = None
1792 encryption_flags = None
1793 encryption_kdf_count = None
1794 encryption_salt = None
1795 encryption_check_value = None
1797 def needs_password(self):
1798 return True
1801 class Rar5EndArcInfo(Rar5Info):
1802 """RAR5 end of archive record.
1804 type = RAR_BLOCK_ENDARC
1805 endarc_flags = None
1808 class RAR5Parser(CommonParser):
1809 """Parse RAR5 format.
1811 _expect_sig = RAR5_ID
1812 _hdrenc_main = None
1814 # AES encrypted headers
1815 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1817 def _get_utf8_password(self):
1818 pwd = self._password
1819 if isinstance(pwd, str):
1820 return pwd.encode("utf8")
1821 return pwd
1823 def _gen_key(self, kdf_count, salt):
1824 if self._last_aes256_key[:2] == (kdf_count, salt):
1825 return self._last_aes256_key[2]
1826 if kdf_count > 24:
1827 raise BadRarFile("Too large kdf_count")
1828 pwd = self._get_utf8_password()
1829 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1830 self._last_aes256_key = (kdf_count, salt, key)
1831 return key
1833 def _decrypt_header(self, fd):
1834 if not _have_crypto:
1835 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1836 h = self._hdrenc_main
1837 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1838 iv = fd.read(16)
1839 return HeaderDecrypt(fd, key, iv)
1841 def _parse_block_header(self, fd):
1842 """Parse common block header
1844 header_offset = fd.tell()
1846 preload = 4 + 1
1847 start_bytes = fd.read(preload)
1848 if len(start_bytes) < preload:
1849 self._set_error("Unexpected EOF when reading header")
1850 return None
1851 while start_bytes[-1] & 0x80:
1852 b = fd.read(1)
1853 if not b:
1854 self._set_error("Unexpected EOF when reading header")
1855 return None
1856 start_bytes += b
1857 header_crc, pos = load_le32(start_bytes, 0)
1858 hdrlen, pos = load_vint(start_bytes, pos)
1859 if hdrlen > 2 * 1024 * 1024:
1860 return None
1861 header_size = pos + hdrlen
1863 # read full header, check for EOF
1864 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1865 if len(hdata) != header_size:
1866 self._set_error("Unexpected EOF when reading header")
1867 return None
1868 data_offset = fd.tell()
1870 calc_crc = crc32(memoryview(hdata)[4:])
1871 if header_crc != calc_crc:
1872 # header parsing failed.
1873 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1874 header_crc, calc_crc, len(hdata))
1875 return None
1877 block_type, pos = load_vint(hdata, pos)
1879 if block_type == RAR5_BLOCK_MAIN:
1880 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1881 h = self._parse_main_block(h, hdata, pos)
1882 elif block_type == RAR5_BLOCK_FILE:
1883 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1884 h = self._parse_file_block(h, hdata, pos)
1885 elif block_type == RAR5_BLOCK_SERVICE:
1886 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1887 h = self._parse_file_block(h, hdata, pos)
1888 elif block_type == RAR5_BLOCK_ENCRYPTION:
1889 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1890 h = self._parse_encryption_block(h, hdata, pos)
1891 elif block_type == RAR5_BLOCK_ENDARC:
1892 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1893 h = self._parse_endarc_block(h, hdata, pos)
1894 else:
1895 h = None
1896 if h:
1897 h.header_offset = header_offset
1898 h.data_offset = data_offset
1899 return h
1901 def _parse_block_common(self, h, hdata):
1902 h.header_crc, pos = load_le32(hdata, 0)
1903 hdrlen, pos = load_vint(hdata, pos)
1904 h.header_size = hdrlen + pos
1905 h.block_type, pos = load_vint(hdata, pos)
1906 h.block_flags, pos = load_vint(hdata, pos)
1908 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1909 h.block_extra_size, pos = load_vint(hdata, pos)
1910 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1911 h.add_size, pos = load_vint(hdata, pos)
1913 h.compress_size = h.add_size
1915 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1916 h.flags |= RAR_SKIP_IF_UNKNOWN
1917 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1918 h.flags |= RAR_LONG_BLOCK
1919 return h, pos
1921 def _parse_main_block(self, h, hdata, pos):
1922 h.main_flags, pos = load_vint(hdata, pos)
1923 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1924 h.main_volume_number, pos = load_vint(hdata, pos)
1926 h.flags |= RAR_MAIN_NEWNUMBERING
1927 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1928 h.flags |= RAR_MAIN_SOLID
1929 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1930 h.flags |= RAR_MAIN_VOLUME
1931 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1932 h.flags |= RAR_MAIN_RECOVERY
1933 if self._hdrenc_main:
1934 h.flags |= RAR_MAIN_PASSWORD
1935 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1936 h.flags |= RAR_MAIN_FIRSTVOLUME
1938 return h
1940 def _parse_file_block(self, h, hdata, pos):
1941 h.file_flags, pos = load_vint(hdata, pos)
1942 h.file_size, pos = load_vint(hdata, pos)
1943 h.mode, pos = load_vint(hdata, pos)
1945 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1946 h.mtime, pos = load_unixtime(hdata, pos)
1947 h.date_time = h.mtime.timetuple()[:6]
1948 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1949 h.CRC, pos = load_le32(hdata, pos)
1950 h._md_class = CRC32Context
1951 h._md_expect = h.CRC
1953 h.file_compress_flags, pos = load_vint(hdata, pos)
1954 h.file_host_os, pos = load_vint(hdata, pos)
1955 h.orig_filename, pos = load_vstr(hdata, pos)
1956 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1958 # use compatible values
1959 if h.file_host_os == RAR5_OS_WINDOWS:
1960 h.host_os = RAR_OS_WIN32
1961 else:
1962 h.host_os = RAR_OS_UNIX
1963 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1965 if h.block_extra_size:
1966 # allow 1 byte of garbage
1967 while pos < len(hdata) - 1:
1968 xsize, pos = load_vint(hdata, pos)
1969 xdata, pos = load_bytes(hdata, xsize, pos)
1970 self._process_file_extra(h, xdata)
1972 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1973 h.flags |= RAR_FILE_SPLIT_BEFORE
1974 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1975 h.flags |= RAR_FILE_SPLIT_AFTER
1976 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1977 h.flags |= RAR_FILE_DIRECTORY
1978 if h.file_compress_flags & RAR5_COMPR_SOLID:
1979 h.flags |= RAR_FILE_SOLID
1981 if h.is_dir():
1982 h.filename = h.filename + "/"
1983 return h
1985 def _parse_endarc_block(self, h, hdata, pos):
1986 h.endarc_flags, pos = load_vint(hdata, pos)
1987 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1988 h.flags |= RAR_ENDARC_NEXT_VOLUME
1989 return h
1991 def _check_password(self, check_value, kdf_count_shift, salt):
1992 if len(check_value) != RAR5_PW_CHECK_SIZE + RAR5_PW_SUM_SIZE:
1993 return
1995 hdr_check = check_value[:RAR5_PW_CHECK_SIZE]
1996 hdr_sum = check_value[RAR5_PW_CHECK_SIZE:]
1997 sum_hash = sha256(hdr_check).digest()
1998 if sum_hash[:RAR5_PW_SUM_SIZE] != hdr_sum:
1999 return
2001 kdf_count = (1 << kdf_count_shift) + 32
2002 pwd = self._get_utf8_password()
2003 pwd_hash = pbkdf2_hmac("sha256", pwd, salt, kdf_count)
2005 pwd_check = bytearray(RAR5_PW_CHECK_SIZE)
2006 len_mask = RAR5_PW_CHECK_SIZE - 1
2007 for i, v in enumerate(pwd_hash):
2008 pwd_check[i & len_mask] ^= v
2010 if pwd_check != hdr_check:
2011 raise RarWrongPassword()
2013 def _parse_encryption_block(self, h, hdata, pos):
2014 h.encryption_algo, pos = load_vint(hdata, pos)
2015 h.encryption_flags, pos = load_vint(hdata, pos)
2016 h.encryption_kdf_count, pos = load_byte(hdata, pos)
2017 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
2018 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
2019 h.encryption_check_value, pos = load_bytes(hdata, 12, pos)
2020 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
2021 raise BadRarFile("Unsupported header encryption cipher")
2022 if h.encryption_check_value and self._password:
2023 self._check_password(h.encryption_check_value, h.encryption_kdf_count, h.encryption_salt)
2024 self._hdrenc_main = h
2025 return h
2027 def _process_file_extra(self, h, xdata):
2028 xtype, pos = load_vint(xdata, 0)
2029 if xtype == RAR5_XFILE_TIME:
2030 self._parse_file_xtime(h, xdata, pos)
2031 elif xtype == RAR5_XFILE_ENCRYPTION:
2032 self._parse_file_encryption(h, xdata, pos)
2033 elif xtype == RAR5_XFILE_HASH:
2034 self._parse_file_hash(h, xdata, pos)
2035 elif xtype == RAR5_XFILE_VERSION:
2036 self._parse_file_version(h, xdata, pos)
2037 elif xtype == RAR5_XFILE_REDIR:
2038 self._parse_file_redir(h, xdata, pos)
2039 elif xtype == RAR5_XFILE_OWNER:
2040 self._parse_file_owner(h, xdata, pos)
2041 elif xtype == RAR5_XFILE_SERVICE:
2042 pass
2043 else:
2044 pass
2046 # extra block for file time record
2047 def _parse_file_xtime(self, h, xdata, pos):
2048 tflags, pos = load_vint(xdata, pos)
2050 ldr = load_windowstime
2051 if tflags & RAR5_XTIME_UNIXTIME:
2052 ldr = load_unixtime
2054 if tflags & RAR5_XTIME_HAS_MTIME:
2055 h.mtime, pos = ldr(xdata, pos)
2056 h.date_time = h.mtime.timetuple()[:6]
2057 if tflags & RAR5_XTIME_HAS_CTIME:
2058 h.ctime, pos = ldr(xdata, pos)
2059 if tflags & RAR5_XTIME_HAS_ATIME:
2060 h.atime, pos = ldr(xdata, pos)
2062 if tflags & RAR5_XTIME_UNIXTIME_NS:
2063 if tflags & RAR5_XTIME_HAS_MTIME:
2064 nsec, pos = load_le32(xdata, pos)
2065 h.mtime = to_nsdatetime(h.mtime, nsec)
2066 if tflags & RAR5_XTIME_HAS_CTIME:
2067 nsec, pos = load_le32(xdata, pos)
2068 h.ctime = to_nsdatetime(h.ctime, nsec)
2069 if tflags & RAR5_XTIME_HAS_ATIME:
2070 nsec, pos = load_le32(xdata, pos)
2071 h.atime = to_nsdatetime(h.atime, nsec)
2073 # just remember encryption info
2074 def _parse_file_encryption(self, h, xdata, pos):
2075 algo, pos = load_vint(xdata, pos)
2076 flags, pos = load_vint(xdata, pos)
2077 kdf_count, pos = load_byte(xdata, pos)
2078 salt, pos = load_bytes(xdata, 16, pos)
2079 iv, pos = load_bytes(xdata, 16, pos)
2080 checkval = None
2081 if flags & RAR5_XENC_CHECKVAL:
2082 checkval, pos = load_bytes(xdata, 12, pos)
2083 if flags & RAR5_XENC_TWEAKED:
2084 h._md_expect = None
2085 h._md_class = NoHashContext
2087 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
2088 h.flags |= RAR_FILE_PASSWORD
2090 def _parse_file_hash(self, h, xdata, pos):
2091 hash_type, pos = load_vint(xdata, pos)
2092 if hash_type == RAR5_XHASH_BLAKE2SP:
2093 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
2094 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2095 h._md_class = Blake2SP
2096 h._md_expect = h.blake2sp_hash
2098 def _parse_file_version(self, h, xdata, pos):
2099 flags, pos = load_vint(xdata, pos)
2100 version, pos = load_vint(xdata, pos)
2101 h.file_version = (flags, version)
2103 def _parse_file_redir(self, h, xdata, pos):
2104 redir_type, pos = load_vint(xdata, pos)
2105 redir_flags, pos = load_vint(xdata, pos)
2106 redir_name, pos = load_vstr(xdata, pos)
2107 redir_name = redir_name.decode("utf8", "replace")
2108 h.file_redir = (redir_type, redir_flags, redir_name)
2110 def _parse_file_owner(self, h, xdata, pos):
2111 user_name = group_name = user_id = group_id = None
2113 flags, pos = load_vint(xdata, pos)
2114 if flags & RAR5_XOWNER_UNAME:
2115 user_name, pos = load_vstr(xdata, pos)
2116 if flags & RAR5_XOWNER_GNAME:
2117 group_name, pos = load_vstr(xdata, pos)
2118 if flags & RAR5_XOWNER_UID:
2119 user_id, pos = load_vint(xdata, pos)
2120 if flags & RAR5_XOWNER_GID:
2121 group_id, pos = load_vint(xdata, pos)
2123 h.file_owner = (user_name, group_name, user_id, group_id)
2125 def process_entry(self, fd, item):
2126 if item.block_type == RAR5_BLOCK_FILE:
2127 if item.file_version:
2128 pass # skip old versions
2129 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2130 # use only first part
2131 self._info_map[item.filename.rstrip("/")] = item
2132 self._info_list.append(item)
2133 elif len(self._info_list) > 0:
2134 # final crc is in last block
2135 old = self._info_list[-1]
2136 old.CRC = item.CRC
2137 old._md_expect = item._md_expect
2138 old.blake2sp_hash = item.blake2sp_hash
2139 old.compress_size += item.compress_size
2140 elif item.block_type == RAR5_BLOCK_SERVICE:
2141 if item.filename == "CMT":
2142 self._load_comment(fd, item)
2144 def _load_comment(self, fd, item):
2145 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2146 return None
2147 if item.compress_type != RAR_M0:
2148 return None
2150 if item.flags & RAR_FILE_PASSWORD:
2151 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2152 if algo != RAR5_XENC_CIPHER_AES256:
2153 return None
2154 key = self._gen_key(kdf_count, salt)
2155 f = HeaderDecrypt(fd, key, iv)
2156 cmt = f.read(item.file_size)
2157 else:
2158 # archive comment
2159 with self._open_clear(item) as cmtstream:
2160 cmt = cmtstream.read()
2162 # rar bug? - appends zero to comment
2163 cmt = cmt.split(b"\0", 1)[0]
2164 self.comment = cmt.decode("utf8")
2165 return None
2167 def _open_hack(self, inf, pwd):
2168 # len, type, blk_flags, flags
2169 main_hdr = b"\x03\x01\x00\x00"
2170 endarc_hdr = b"\x03\x05\x00\x00"
2171 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2172 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2173 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2177 ## Utility classes
2180 class UnicodeFilename:
2181 """Handle RAR3 unicode filename decompression.
2183 def __init__(self, name, encdata):
2184 self.std_name = bytearray(name)
2185 self.encdata = bytearray(encdata)
2186 self.pos = self.encpos = 0
2187 self.buf = bytearray()
2188 self.failed = 0
2190 def enc_byte(self):
2191 """Copy encoded byte."""
2192 try:
2193 c = self.encdata[self.encpos]
2194 self.encpos += 1
2195 return c
2196 except IndexError:
2197 self.failed = 1
2198 return 0
2200 def std_byte(self):
2201 """Copy byte from 8-bit representation."""
2202 try:
2203 return self.std_name[self.pos]
2204 except IndexError:
2205 self.failed = 1
2206 return ord("?")
2208 def put(self, lo, hi):
2209 """Copy 16-bit value to result."""
2210 self.buf.append(lo)
2211 self.buf.append(hi)
2212 self.pos += 1
2214 def decode(self):
2215 """Decompress compressed UTF16 value."""
2216 hi = self.enc_byte()
2217 flagbits = 0
2218 while self.encpos < len(self.encdata):
2219 if flagbits == 0:
2220 flags = self.enc_byte()
2221 flagbits = 8
2222 flagbits -= 2
2223 t = (flags >> flagbits) & 3
2224 if t == 0:
2225 self.put(self.enc_byte(), 0)
2226 elif t == 1:
2227 self.put(self.enc_byte(), hi)
2228 elif t == 2:
2229 self.put(self.enc_byte(), self.enc_byte())
2230 else:
2231 n = self.enc_byte()
2232 if n & 0x80:
2233 c = self.enc_byte()
2234 for _ in range((n & 0x7f) + 2):
2235 lo = (self.std_byte() + c) & 0xFF
2236 self.put(lo, hi)
2237 else:
2238 for _ in range(n + 2):
2239 self.put(self.std_byte(), 0)
2240 return self.buf.decode("utf-16le", "replace")
2243 class RarExtFile(io.RawIOBase):
2244 """Base class for file-like object that :meth:`RarFile.open` returns.
2246 Provides public methods and common crc checking.
2248 Behaviour:
2249 - no short reads - .read() and .readinfo() read as much as requested.
2250 - no internal buffer, use io.BufferedReader for that.
2252 name = None #: Filename of the archive entry
2253 mode = "rb"
2254 _parser = None
2255 _inf = None
2256 _fd = None
2257 _remain = 0
2258 _returncode = 0
2259 _md_context = None
2260 _seeking = False
2262 def _open_extfile(self, parser, inf):
2263 self.name = inf.filename
2264 self._parser = parser
2265 self._inf = inf
2267 if self._fd:
2268 self._fd.close()
2269 if self._seeking:
2270 md_class = NoHashContext
2271 else:
2272 md_class = self._inf._md_class or NoHashContext
2273 self._md_context = md_class()
2274 self._fd = None
2275 self._remain = self._inf.file_size
2277 def read(self, n=-1):
2278 """Read all or specified amount of data from archive entry."""
2280 # sanitize count
2281 if n is None or n < 0:
2282 n = self._remain
2283 elif n > self._remain:
2284 n = self._remain
2285 if n == 0:
2286 return b""
2288 buf = []
2289 orig = n
2290 while n > 0:
2291 # actual read
2292 data = self._read(n)
2293 if not data:
2294 break
2295 buf.append(data)
2296 self._md_context.update(data)
2297 self._remain -= len(data)
2298 n -= len(data)
2299 data = b"".join(buf)
2300 if n > 0:
2301 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2303 # done?
2304 if not data or self._remain == 0:
2305 # self.close()
2306 self._check()
2307 return data
2309 def _check(self):
2310 """Check final CRC."""
2311 final = self._md_context.digest()
2312 exp = self._inf._md_expect
2313 if exp is None:
2314 return
2315 if final is None:
2316 return
2317 if self._returncode:
2318 check_returncode(self._returncode, "", tool_setup().get_errmap())
2319 if self._remain != 0:
2320 raise BadRarFile("Failed the read enough data")
2321 if final != exp:
2322 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2323 self._inf.filename, exp, final))
2325 def _read(self, cnt):
2326 """Actual read that gets sanitized cnt."""
2327 raise NotImplementedError("_read")
2329 def close(self):
2330 """Close open resources."""
2332 super().close()
2334 if self._fd:
2335 self._fd.close()
2336 self._fd = None
2338 def __del__(self):
2339 """Hook delete to make sure tempfile is removed."""
2340 self.close()
2342 def readinto(self, buf):
2343 """Zero-copy read directly into buffer.
2345 Returns bytes read.
2347 raise NotImplementedError("readinto")
2349 def tell(self):
2350 """Return current reading position in uncompressed data."""
2351 return self._inf.file_size - self._remain
2353 def seek(self, offset, whence=0):
2354 """Seek in data.
2356 On uncompressed files, the seeking works by actual
2357 seeks so it's fast. On compresses files its slow
2358 - forward seeking happends by reading ahead,
2359 backwards by re-opening and decompressing from the start.
2362 # disable crc check when seeking
2363 if not self._seeking:
2364 self._md_context = NoHashContext()
2365 self._seeking = True
2367 fsize = self._inf.file_size
2368 cur_ofs = self.tell()
2370 if whence == 0: # seek from beginning of file
2371 new_ofs = offset
2372 elif whence == 1: # seek from current position
2373 new_ofs = cur_ofs + offset
2374 elif whence == 2: # seek from end of file
2375 new_ofs = fsize + offset
2376 else:
2377 raise ValueError("Invalid value for whence")
2379 # sanity check
2380 if new_ofs < 0:
2381 new_ofs = 0
2382 elif new_ofs > fsize:
2383 new_ofs = fsize
2385 # do the actual seek
2386 if new_ofs >= cur_ofs:
2387 self._skip(new_ofs - cur_ofs)
2388 else:
2389 # reopen and seek
2390 self._open_extfile(self._parser, self._inf)
2391 self._skip(new_ofs)
2392 return self.tell()
2394 def _skip(self, cnt):
2395 """Read and discard data"""
2396 empty_read(self, cnt, BSIZE)
2398 def readable(self):
2399 """Returns True"""
2400 return True
2402 def writable(self):
2403 """Returns False.
2405 Writing is not supported.
2407 return False
2409 def seekable(self):
2410 """Returns True.
2412 Seeking is supported, although it's slow on compressed files.
2414 return True
2416 def readall(self):
2417 """Read all remaining data"""
2418 # avoid RawIOBase default impl
2419 return self.read()
2422 class PipeReader(RarExtFile):
2423 """Read data from pipe, handle tempfile cleanup."""
2425 def __init__(self, parser, inf, cmd, tempfile=None):
2426 super().__init__()
2427 self._cmd = cmd
2428 self._proc = None
2429 self._tempfile = tempfile
2430 self._open_extfile(parser, inf)
2432 def _close_proc(self):
2433 if not self._proc:
2434 return
2435 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2436 if f:
2437 f.close()
2438 self._proc.wait()
2439 self._returncode = self._proc.returncode
2440 self._proc = None
2442 def _open_extfile(self, parser, inf):
2443 super()._open_extfile(parser, inf)
2445 # stop old process
2446 self._close_proc()
2448 # launch new process
2449 self._returncode = 0
2450 self._proc = custom_popen(self._cmd)
2451 self._fd = self._proc.stdout
2453 def _read(self, cnt):
2454 """Read from pipe."""
2456 # normal read is usually enough
2457 data = self._fd.read(cnt)
2458 if len(data) == cnt or not data:
2459 return data
2461 # short read, try looping
2462 buf = [data]
2463 cnt -= len(data)
2464 while cnt > 0:
2465 data = self._fd.read(cnt)
2466 if not data:
2467 break
2468 cnt -= len(data)
2469 buf.append(data)
2470 return b"".join(buf)
2472 def close(self):
2473 """Close open resources."""
2475 self._close_proc()
2476 super().close()
2478 if self._tempfile:
2479 try:
2480 os.unlink(self._tempfile)
2481 except OSError:
2482 pass
2483 self._tempfile = None
2485 def readinto(self, buf):
2486 """Zero-copy read directly into buffer."""
2487 cnt = len(buf)
2488 if cnt > self._remain:
2489 cnt = self._remain
2490 vbuf = memoryview(buf)
2491 res = got = 0
2492 while got < cnt:
2493 res = self._fd.readinto(vbuf[got: cnt])
2494 if not res:
2495 break
2496 self._md_context.update(vbuf[got: got + res])
2497 self._remain -= res
2498 got += res
2499 return got
2502 class DirectReader(RarExtFile):
2503 """Read uncompressed data directly from archive.
2505 _cur = None
2506 _cur_avail = None
2507 _volfile = None
2509 def __init__(self, parser, inf):
2510 super().__init__()
2511 self._open_extfile(parser, inf)
2513 def _open_extfile(self, parser, inf):
2514 super()._open_extfile(parser, inf)
2516 self._volfile = self._inf.volume_file
2517 self._fd = XFile(self._volfile, 0)
2518 self._fd.seek(self._inf.header_offset, 0)
2519 self._cur = self._parser._parse_header(self._fd)
2520 self._cur_avail = self._cur.add_size
2522 def _skip(self, cnt):
2523 """RAR Seek, skipping through rar files to get to correct position
2526 while cnt > 0:
2527 # next vol needed?
2528 if self._cur_avail == 0:
2529 if not self._open_next():
2530 break
2532 # fd is in read pos, do the read
2533 if cnt > self._cur_avail:
2534 cnt -= self._cur_avail
2535 self._remain -= self._cur_avail
2536 self._cur_avail = 0
2537 else:
2538 self._fd.seek(cnt, 1)
2539 self._cur_avail -= cnt
2540 self._remain -= cnt
2541 cnt = 0
2543 def _read(self, cnt):
2544 """Read from potentially multi-volume archive."""
2546 pos = self._fd.tell()
2547 need = self._cur.data_offset + self._cur.add_size - self._cur_avail
2548 if pos != need:
2549 self._fd.seek(need, 0)
2551 buf = []
2552 while cnt > 0:
2553 # next vol needed?
2554 if self._cur_avail == 0:
2555 if not self._open_next():
2556 break
2558 # fd is in read pos, do the read
2559 if cnt > self._cur_avail:
2560 data = self._fd.read(self._cur_avail)
2561 else:
2562 data = self._fd.read(cnt)
2563 if not data:
2564 break
2566 # got some data
2567 cnt -= len(data)
2568 self._cur_avail -= len(data)
2569 buf.append(data)
2571 if len(buf) == 1:
2572 return buf[0]
2573 return b"".join(buf)
2575 def _open_next(self):
2576 """Proceed to next volume."""
2578 # is the file split over archives?
2579 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2580 return False
2582 if self._fd:
2583 self._fd.close()
2584 self._fd = None
2586 # open next part
2587 self._volfile = self._parser._next_volname(self._volfile)
2588 fd = open(self._volfile, "rb", 0)
2589 self._fd = fd
2590 sig = fd.read(len(self._parser._expect_sig))
2591 if sig != self._parser._expect_sig:
2592 raise BadRarFile("Invalid signature")
2594 # loop until first file header
2595 while True:
2596 cur = self._parser._parse_header(fd)
2597 if not cur:
2598 raise BadRarFile("Unexpected EOF")
2599 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2600 if cur.add_size:
2601 fd.seek(cur.add_size, 1)
2602 continue
2603 if cur.orig_filename != self._inf.orig_filename:
2604 raise BadRarFile("Did not found file entry")
2605 self._cur = cur
2606 self._cur_avail = cur.add_size
2607 return True
2609 def readinto(self, buf):
2610 """Zero-copy read directly into buffer."""
2611 got = 0
2612 vbuf = memoryview(buf)
2613 while got < len(buf):
2614 # next vol needed?
2615 if self._cur_avail == 0:
2616 if not self._open_next():
2617 break
2619 # length for next read
2620 cnt = len(buf) - got
2621 if cnt > self._cur_avail:
2622 cnt = self._cur_avail
2624 # read into temp view
2625 res = self._fd.readinto(vbuf[got: got + cnt])
2626 if not res:
2627 break
2628 self._md_context.update(vbuf[got: got + res])
2629 self._cur_avail -= res
2630 self._remain -= res
2631 got += res
2632 return got
2635 class HeaderDecrypt:
2636 """File-like object that decrypts from another file"""
2637 def __init__(self, f, key, iv):
2638 self.f = f
2639 self.ciph = AES_CBC_Decrypt(key, iv)
2640 self.buf = b""
2642 def tell(self):
2643 """Current file pos - works only on block boundaries."""
2644 return self.f.tell()
2646 def read(self, cnt=None):
2647 """Read and decrypt."""
2648 if cnt > 8 * 1024:
2649 raise BadRarFile("Bad count to header decrypt - wrong password?")
2651 # consume old data
2652 if cnt <= len(self.buf):
2653 res = self.buf[:cnt]
2654 self.buf = self.buf[cnt:]
2655 return res
2656 res = self.buf
2657 self.buf = b""
2658 cnt -= len(res)
2660 # decrypt new data
2661 blklen = 16
2662 while cnt > 0:
2663 enc = self.f.read(blklen)
2664 if len(enc) < blklen:
2665 break
2666 dec = self.ciph.decrypt(enc)
2667 if cnt >= len(dec):
2668 res += dec
2669 cnt -= len(dec)
2670 else:
2671 res += dec[:cnt]
2672 self.buf = dec[cnt:]
2673 cnt = 0
2675 return res
2678 class XFile:
2679 """Input may be filename or file object.
2681 __slots__ = ("_fd", "_need_close")
2683 def __init__(self, xfile, bufsize=1024):
2684 if is_filelike(xfile):
2685 self._need_close = False
2686 self._fd = xfile
2687 self._fd.seek(0)
2688 else:
2689 self._need_close = True
2690 self._fd = open(xfile, "rb", bufsize)
2692 def read(self, n=None):
2693 """Read from file."""
2694 return self._fd.read(n)
2696 def tell(self):
2697 """Return file pos."""
2698 return self._fd.tell()
2700 def seek(self, ofs, whence=0):
2701 """Move file pos."""
2702 return self._fd.seek(ofs, whence)
2704 def readinto(self, buf):
2705 """Read into buffer."""
2706 return self._fd.readinto(buf)
2708 def close(self):
2709 """Close file object."""
2710 if self._need_close:
2711 self._fd.close()
2713 def __enter__(self):
2714 return self
2716 def __exit__(self, typ, val, tb):
2717 self.close()
2720 class NoHashContext:
2721 """No-op hash function."""
2722 def __init__(self, data=None):
2723 """Initialize"""
2724 def update(self, data):
2725 """Update data"""
2726 def digest(self):
2727 """Final hash"""
2728 def hexdigest(self):
2729 """Hexadecimal digest."""
2732 class CRC32Context:
2733 """Hash context that uses CRC32."""
2734 __slots__ = ["_crc"]
2736 def __init__(self, data=None):
2737 self._crc = 0
2738 if data:
2739 self.update(data)
2741 def update(self, data):
2742 """Process data."""
2743 self._crc = crc32(data, self._crc)
2745 def digest(self):
2746 """Final hash."""
2747 return self._crc
2749 def hexdigest(self):
2750 """Hexadecimal digest."""
2751 return "%08x" % self.digest()
2754 class Blake2SP:
2755 """Blake2sp hash context.
2757 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2758 digest_size = 32
2759 block_size = 64
2760 parallelism = 8
2762 def __init__(self, data=None):
2763 self._buf = b""
2764 self._cur = 0
2765 self._digest = None
2766 self._thread = []
2768 for i in range(self.parallelism):
2769 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2770 self._thread.append(ctx)
2772 if data:
2773 self.update(data)
2775 def _blake2s(self, ofs, depth, is_last):
2776 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2777 depth=2, inner_size=32, fanout=self.parallelism)
2779 def _add_block(self, blk):
2780 self._thread[self._cur].update(blk)
2781 self._cur = (self._cur + 1) % self.parallelism
2783 def update(self, data):
2784 """Hash data.
2786 view = memoryview(data)
2787 bs = self.block_size
2788 if self._buf:
2789 need = bs - len(self._buf)
2790 if len(view) < need:
2791 self._buf += view.tobytes()
2792 return
2793 self._add_block(self._buf + view[:need].tobytes())
2794 view = view[need:]
2795 while len(view) >= bs:
2796 self._add_block(view[:bs])
2797 view = view[bs:]
2798 self._buf = view.tobytes()
2800 def digest(self):
2801 """Return final digest value.
2803 if self._digest is None:
2804 if self._buf:
2805 self._add_block(self._buf)
2806 self._buf = b""
2807 ctx = self._blake2s(0, 1, True)
2808 for t in self._thread:
2809 ctx.update(t.digest())
2810 self._digest = ctx.digest()
2811 return self._digest
2813 def hexdigest(self):
2814 """Hexadecimal digest."""
2815 return hexlify(self.digest()).decode("ascii")
2818 class Rar3Sha1:
2819 """Emulate buggy SHA1 from RAR3.
2821 digest_size = 20
2822 block_size = 64
2824 _BLK_BE = struct.Struct(b">16L")
2825 _BLK_LE = struct.Struct(b"<16L")
2827 __slots__ = ("_nbytes", "_md", "_rarbug")
2829 def __init__(self, data=b"", rarbug=False):
2830 self._md = sha1()
2831 self._nbytes = 0
2832 self._rarbug = rarbug
2833 self.update(data)
2835 def update(self, data):
2836 """Process more data."""
2837 self._md.update(data)
2838 bufpos = self._nbytes & 63
2839 self._nbytes += len(data)
2841 if self._rarbug and len(data) > 64:
2842 dpos = self.block_size - bufpos
2843 while dpos + self.block_size <= len(data):
2844 self._corrupt(data, dpos)
2845 dpos += self.block_size
2847 def digest(self):
2848 """Return final state."""
2849 return self._md.digest()
2851 def hexdigest(self):
2852 """Return final state as hex string."""
2853 return self._md.hexdigest()
2855 def _corrupt(self, data, dpos):
2856 """Corruption from SHA1 core."""
2857 ws = list(self._BLK_BE.unpack_from(data, dpos))
2858 for t in range(16, 80):
2859 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2860 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2861 self._BLK_LE.pack_into(data, dpos, *ws)
2865 ## Utility functions
2868 S_LONG = Struct("<L")
2869 S_SHORT = Struct("<H")
2870 S_BYTE = Struct("<B")
2872 S_BLK_HDR = Struct("<HBHH")
2873 S_FILE_HDR = Struct("<LLBLLBBHL")
2874 S_COMMENT_HDR = Struct("<HBBH")
2877 def load_vint(buf, pos):
2878 """Load RAR5 variable-size int."""
2879 limit = min(pos + 11, len(buf))
2880 res = ofs = 0
2881 while pos < limit:
2882 b = buf[pos]
2883 res += ((b & 0x7F) << ofs)
2884 pos += 1
2885 ofs += 7
2886 if b < 0x80:
2887 return res, pos
2888 raise BadRarFile("cannot load vint")
2891 def load_byte(buf, pos):
2892 """Load single byte"""
2893 end = pos + 1
2894 if end > len(buf):
2895 raise BadRarFile("cannot load byte")
2896 return S_BYTE.unpack_from(buf, pos)[0], end
2899 def load_le32(buf, pos):
2900 """Load little-endian 32-bit integer"""
2901 end = pos + 4
2902 if end > len(buf):
2903 raise BadRarFile("cannot load le32")
2904 return S_LONG.unpack_from(buf, pos)[0], end
2907 def load_bytes(buf, num, pos):
2908 """Load sequence of bytes"""
2909 end = pos + num
2910 if end > len(buf):
2911 raise BadRarFile("cannot load bytes")
2912 return buf[pos: end], end
2915 def load_vstr(buf, pos):
2916 """Load bytes prefixed by vint length"""
2917 slen, pos = load_vint(buf, pos)
2918 return load_bytes(buf, slen, pos)
2921 def load_dostime(buf, pos):
2922 """Load LE32 dos timestamp"""
2923 stamp, pos = load_le32(buf, pos)
2924 tup = parse_dos_time(stamp)
2925 return to_datetime(tup), pos
2928 def load_unixtime(buf, pos):
2929 """Load LE32 unix timestamp"""
2930 secs, pos = load_le32(buf, pos)
2931 dt = datetime.fromtimestamp(secs, timezone.utc)
2932 return dt, pos
2935 def load_windowstime(buf, pos):
2936 """Load LE64 windows timestamp"""
2937 # unix epoch (1970) in seconds from windows epoch (1601)
2938 unix_epoch = 11644473600
2939 val1, pos = load_le32(buf, pos)
2940 val2, pos = load_le32(buf, pos)
2941 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2942 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2943 dt = to_nsdatetime(dt, n1secs * 100)
2944 return dt, pos
2948 # volume numbering
2951 _rc_num = re.compile('^[0-9]+$')
2954 def _next_newvol(volfile):
2955 """New-style next volume
2957 name, ext = os.path.splitext(volfile)
2958 if ext.lower() in ("", ".exe", ".sfx"):
2959 volfile = name + ".rar"
2960 i = len(volfile) - 1
2961 while i >= 0:
2962 if "0" <= volfile[i] <= "9":
2963 return _inc_volname(volfile, i, False)
2964 if volfile[i] in ("/", os.sep):
2965 break
2966 i -= 1
2967 raise BadRarName("Cannot construct volume name: " + volfile)
2971 def _next_oldvol(volfile):
2972 """Old-style next volume
2974 name, ext = os.path.splitext(volfile)
2975 if ext.lower() in ("", ".exe", ".sfx"):
2976 ext = ".rar"
2977 sfx = ext[2:]
2978 if _rc_num.match(sfx):
2979 ext = _inc_volname(ext, len(ext) - 1, True)
2980 else:
2981 # .rar -> .r00
2982 ext = ext[:2] + "00"
2983 return name + ext
2986 def _inc_volname(volfile, i, inc_chars):
2987 """increase digits with carry, otherwise just increment char
2989 fn = list(volfile)
2990 while i >= 0:
2991 if fn[i] == "9":
2992 fn[i] = "0"
2993 i -= 1
2994 if i < 0:
2995 fn.insert(0, "1")
2996 elif "0" <= fn[i] < "9" or inc_chars:
2997 fn[i] = chr(ord(fn[i]) + 1)
2998 break
2999 else:
3000 fn.insert(i + 1, "1")
3001 break
3002 return "".join(fn)
3005 def _parse_ext_time(h, data, pos):
3006 """Parse all RAR3 extended time fields
3008 # flags and rest of data can be missing
3009 flags = 0
3010 if pos + 2 <= len(data):
3011 flags = S_SHORT.unpack_from(data, pos)[0]
3012 pos += 2
3014 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
3015 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
3016 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
3017 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
3018 if mtime:
3019 h.mtime = mtime
3020 h.date_time = mtime.timetuple()[:6]
3021 return pos
3024 def _parse_xtime(flag, data, pos, basetime=None):
3025 """Parse one RAR3 extended time field
3027 res = None
3028 if flag & 8:
3029 if not basetime:
3030 basetime, pos = load_dostime(data, pos)
3032 # load second fractions of 100ns units
3033 rem = 0
3034 cnt = flag & 3
3035 for _ in range(cnt):
3036 b, pos = load_byte(data, pos)
3037 rem = (b << 16) | (rem >> 8)
3039 # dostime has room for 30 seconds only, correct if needed
3040 if flag & 4 and basetime.second < 59:
3041 basetime = basetime.replace(second=basetime.second + 1)
3043 res = to_nsdatetime(basetime, rem * 100)
3044 return res, pos
3047 def is_filelike(obj):
3048 """Filename or file object?
3050 if isinstance(obj, (bytes, str, Path)):
3051 return False
3052 res = True
3053 for a in ("read", "tell", "seek"):
3054 res = res and hasattr(obj, a)
3055 if not res:
3056 raise ValueError("Invalid object passed as file")
3057 return True
3060 def rar3_s2k(pwd, salt):
3061 """String-to-key hash for RAR3.
3063 if not isinstance(pwd, str):
3064 pwd = pwd.decode("utf8")
3065 seed = bytearray(pwd.encode("utf-16le") + salt)
3066 h = Rar3Sha1(rarbug=True)
3067 iv = b""
3068 for i in range(16):
3069 for j in range(0x4000):
3070 cnt = S_LONG.pack(i * 0x4000 + j)
3071 h.update(seed)
3072 h.update(cnt[:3])
3073 if j == 0:
3074 iv += h.digest()[19:20]
3075 key_be = h.digest()[:16]
3076 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
3077 return key_le, iv
3080 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
3081 """Decompress blob of compressed data.
3083 Used for data with non-standard header - eg. comments.
3085 # already uncompressed?
3086 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
3087 return data
3089 # take only necessary flags
3090 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
3091 flags |= RAR_LONG_BLOCK
3093 # file header
3094 fname = b"data"
3095 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
3096 mode = DOS_MODE_ARCHIVE
3097 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
3098 date, vers, meth, len(fname), mode)
3099 fhdr += fname
3100 if salt:
3101 fhdr += salt
3103 # full header
3104 hlen = S_BLK_HDR.size + len(fhdr)
3105 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3106 hcrc = crc32(hdr[2:]) & 0xFFFF
3107 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3109 # archive main header
3110 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3112 # decompress via temp rar
3113 setup = tool_setup()
3114 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3115 tmpf = os.fdopen(tmpfd, "wb")
3116 try:
3117 tmpf.write(RAR_ID + mh + hdr + data)
3118 tmpf.close()
3120 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3121 cmd = setup.open_cmdline(curpwd, tmpname)
3122 p = custom_popen(cmd)
3123 return p.communicate()[0]
3124 finally:
3125 tmpf.close()
3126 os.unlink(tmpname)
3129 def sanitize_filename(fname, pathsep, is_win32):
3130 """Simulate unrar sanitization.
3132 if is_win32:
3133 if len(fname) > 1 and fname[1] == ":":
3134 fname = fname[2:]
3135 rc = RC_BAD_CHARS_WIN32
3136 else:
3137 rc = RC_BAD_CHARS_UNIX
3138 if rc.search(fname):
3139 fname = rc.sub("_", fname)
3141 parts = []
3142 for seg in fname.split("/"):
3143 if seg in ("", ".", ".."):
3144 continue
3145 if is_win32 and seg[-1] in (" ", "."):
3146 seg = seg[:-1] + "_"
3147 parts.append(seg)
3148 return pathsep.join(parts)
3151 def empty_read(src, size, blklen):
3152 """Read and drop fixed amount of data.
3154 while size > 0:
3155 if size > blklen:
3156 res = src.read(blklen)
3157 else:
3158 res = src.read(size)
3159 if not res:
3160 raise BadRarFile("cannot load data")
3161 size -= len(res)
3164 def to_datetime(t):
3165 """Convert 6-part time tuple into datetime object.
3167 # extract values
3168 year, mon, day, h, m, s = t
3170 # assume the values are valid
3171 try:
3172 return datetime(year, mon, day, h, m, s)
3173 except ValueError:
3174 pass
3176 # sanitize invalid values
3177 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3178 mon = max(1, min(mon, 12))
3179 day = max(1, min(day, mday[mon]))
3180 h = min(h, 23)
3181 m = min(m, 59)
3182 s = min(s, 59)
3183 return datetime(year, mon, day, h, m, s)
3186 def parse_dos_time(stamp):
3187 """Parse standard 32-bit DOS timestamp.
3189 sec, stamp = stamp & 0x1F, stamp >> 5
3190 mn, stamp = stamp & 0x3F, stamp >> 6
3191 hr, stamp = stamp & 0x1F, stamp >> 5
3192 day, stamp = stamp & 0x1F, stamp >> 5
3193 mon, stamp = stamp & 0x0F, stamp >> 4
3194 yr = (stamp & 0x7F) + 1980
3195 return (yr, mon, day, hr, mn, sec * 2)
3198 # pylint: disable=arguments-differ,signature-differs
3199 class nsdatetime(datetime):
3200 """Datetime that carries nanoseconds.
3202 Arithmetic not supported, will lose nanoseconds.
3204 .. versionadded:: 4.0
3206 __slots__ = ("nanosecond",)
3207 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3209 def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
3210 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3211 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3212 if mod == 0:
3213 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3214 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3215 self.nanosecond = nanosecond
3216 return self
3218 def isoformat(self, sep="T", timespec="auto"):
3219 """Formats with nanosecond precision by default.
3221 if timespec == "auto":
3222 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3223 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3224 return super().isoformat(sep, timespec)
3226 def astimezone(self, tz=None):
3227 """Convert to new timezone.
3229 tmp = super().astimezone(tz)
3230 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3231 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3233 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3234 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3235 """Return new timestamp with specified fields replaced.
3237 return self.__class__(
3238 self.year if year is None else year,
3239 self.month if month is None else month,
3240 self.day if day is None else day,
3241 self.hour if hour is None else hour,
3242 self.minute if minute is None else minute,
3243 self.second if second is None else second,
3244 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3245 if nanosecond is None else nanosecond),
3246 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3247 fold=self.fold if fold is None else fold)
3249 def __hash__(self):
3250 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3252 def __eq__(self, other):
3253 return super().__eq__(other) and self.nanosecond == (
3254 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3256 def __gt__(self, other):
3257 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3258 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3260 def __lt__(self, other):
3261 return not (self > other or self == other)
3263 def __ge__(self, other):
3264 return not self < other
3266 def __le__(self, other):
3267 return not self > other
3269 def __ne__(self, other):
3270 return not self == other
3273 def to_nsdatetime(dt, nsec):
3274 """Apply nanoseconds to datetime.
3276 if not nsec:
3277 return dt
3278 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3279 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3282 def to_nsecs(dt):
3283 """Convert datatime instance to nanoseconds.
3285 secs = int(dt.timestamp())
3286 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3287 return secs * 1000000000 + nsecs
3290 def custom_popen(cmd):
3291 """Disconnect cmd from parent fds, read only from stdout.
3293 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3294 try:
3295 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3296 creationflags=creationflags)
3297 except OSError as ex:
3298 if ex.errno == errno.ENOENT:
3299 raise RarCannotExec("Unrar not installed?") from None
3300 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3301 raise RarCannotExec("Cannot execute unrar") from None
3302 raise
3303 return p
3306 def check_returncode(code, out, errmap):
3307 """Raise exception according to unrar exit code.
3309 if code == 0:
3310 return
3312 if code > 0 and code < len(errmap):
3313 exc = errmap[code]
3314 elif code == 255:
3315 exc = RarUserBreak
3316 elif code < 0:
3317 exc = RarSignalExit
3318 else:
3319 exc = RarUnknownError
3321 # format message
3322 if out:
3323 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3324 else:
3325 msg = "%s [%d]" % (exc.__doc__, code)
3327 raise exc(msg)
3330 def membuf_tempfile(memfile):
3331 """Write in-memory file object to real file.
3333 memfile.seek(0, 0)
3335 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3336 tmpf = os.fdopen(tmpfd, "wb")
3338 try:
3339 shutil.copyfileobj(memfile, tmpf, BSIZE)
3340 tmpf.close()
3341 except BaseException:
3342 tmpf.close()
3343 os.unlink(tmpname)
3344 raise
3345 return tmpname
3349 # Find working command-line tool
3352 class ToolSetup:
3353 def __init__(self, setup):
3354 self.setup = setup
3356 def check(self):
3357 cmdline = self.get_cmdline("check_cmd", None)
3358 try:
3359 p = custom_popen(cmdline)
3360 out, _ = p.communicate()
3361 return p.returncode == 0
3362 except RarCannotExec:
3363 return False
3365 def open_cmdline(self, pwd, rarfn, filefn=None):
3366 cmdline = self.get_cmdline("open_cmd", pwd)
3367 cmdline.append(rarfn)
3368 if filefn:
3369 self.add_file_arg(cmdline, filefn)
3370 return cmdline
3372 def get_errmap(self):
3373 return self.setup["errmap"]
3375 def get_cmdline(self, key, pwd, nodash=False):
3376 cmdline = list(self.setup[key])
3377 cmdline[0] = globals()[cmdline[0]]
3378 if key == "check_cmd":
3379 return cmdline
3380 self.add_password_arg(cmdline, pwd)
3381 if not nodash:
3382 cmdline.append("--")
3383 return cmdline
3385 def add_file_arg(self, cmdline, filename):
3386 cmdline.append(filename)
3388 def add_password_arg(self, cmdline, pwd):
3389 """Append password switch to commandline.
3391 if pwd is not None:
3392 if not isinstance(pwd, str):
3393 pwd = pwd.decode("utf8")
3394 args = self.setup["password"]
3395 if args is None:
3396 tool = self.setup["open_cmd"][0]
3397 raise RarCannotExec(f"{tool} does not support passwords")
3398 elif isinstance(args, str):
3399 cmdline.append(args + pwd)
3400 else:
3401 cmdline.extend(args)
3402 cmdline.append(pwd)
3403 else:
3404 cmdline.extend(self.setup["no_password"])
3407 UNRAR_CONFIG = {
3408 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3409 "check_cmd": ("UNRAR_TOOL", "-inul", "-?"),
3410 "password": "-p",
3411 "no_password": ("-p-",),
3412 # map return code to exception class, codes from rar.txt
3413 "errmap": [None,
3414 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3415 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3416 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3419 # Problems with unar RAR backend:
3420 # - Does not support RAR2 locked files [fails to read]
3421 # - Does not support RAR5 Blake2sp hash [reading works]
3422 UNAR_CONFIG = {
3423 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3424 "check_cmd": ("UNAR_TOOL", "-version"),
3425 "password": ("-p",),
3426 "no_password": ("-p", ""),
3427 "errmap": [None],
3430 # Problems with libarchive RAR backend:
3431 # - Does not support solid archives.
3432 # - Does not support password-protected archives.
3433 # - Does not support RARVM-based compression filters.
3434 BSDTAR_CONFIG = {
3435 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3436 "check_cmd": ("BSDTAR_TOOL", "--version"),
3437 "password": None,
3438 "no_password": (),
3439 "errmap": [None],
3442 SEVENZIP_CONFIG = {
3443 "open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"),
3444 "check_cmd": ("SEVENZIP_TOOL", "i"),
3445 "password": "-p",
3446 "no_password": ("-p",),
3447 "errmap": [None,
3448 RarWarning, RarFatalError, None, None, # 1..4
3449 None, None, RarUserError, RarMemoryError] # 5..8
3452 SEVENZIP2_CONFIG = {
3453 "open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"),
3454 "check_cmd": ("SEVENZIP2_TOOL", "i"),
3455 "password": "-p",
3456 "no_password": ("-p",),
3457 "errmap": [None,
3458 RarWarning, RarFatalError, None, None, # 1..4
3459 None, None, RarUserError, RarMemoryError] # 5..8
3462 CURRENT_SETUP = None
3465 def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False):
3466 """Pick a tool, return cached ToolSetup.
3468 global CURRENT_SETUP
3469 if force:
3470 CURRENT_SETUP = None
3471 if CURRENT_SETUP is not None:
3472 return CURRENT_SETUP
3473 lst = []
3474 if unrar:
3475 lst.append(UNRAR_CONFIG)
3476 if unar:
3477 lst.append(UNAR_CONFIG)
3478 if sevenzip:
3479 lst.append(SEVENZIP_CONFIG)
3480 if sevenzip2:
3481 lst.append(SEVENZIP2_CONFIG)
3482 if bsdtar:
3483 lst.append(BSDTAR_CONFIG)
3485 for conf in lst:
3486 setup = ToolSetup(conf)
3487 if setup.check():
3488 CURRENT_SETUP = setup
3489 break
3490 if CURRENT_SETUP is None:
3491 raise RarCannotExec("Cannot find working tool")
3492 return CURRENT_SETUP
3495 def main(args):
3496 """Minimal command-line interface for rarfile module.
3498 import argparse
3499 p = argparse.ArgumentParser(description=main.__doc__)
3500 g = p.add_mutually_exclusive_group(required=True)
3501 g.add_argument("-l", "--list", metavar="<rarfile>",
3502 help="Show archive listing")
3503 g.add_argument("-e", "--extract", nargs=2,
3504 metavar=("<rarfile>", "<output_dir>"),
3505 help="Extract archive into target dir")
3506 g.add_argument("-t", "--test", metavar="<rarfile>",
3507 help="Test if a archive is valid")
3508 cmd = p.parse_args(args)
3510 if cmd.list:
3511 with RarFile(cmd.list) as rf:
3512 rf.printdir()
3513 elif cmd.test:
3514 with RarFile(cmd.test) as rf:
3515 rf.testrar()
3516 elif cmd.extract:
3517 with RarFile(cmd.extract[0]) as rf:
3518 rf.extractall(cmd.extract[1])
3521 if __name__ == "__main__":
3522 main(sys.argv[1:])