doc: add readthedocs config
[rarfile.git] / rarfile.py
blobfc95f7fd8e3bcd53cac843a545bdd2a29a7634c5
1 # rarfile.py
3 # Copyright (c) 2005-2024 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1, sha256
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 AES = None
70 # only needed for encrypted headers
71 try:
72 try:
73 from cryptography.hazmat.backends import default_backend
74 from cryptography.hazmat.primitives.ciphers import (
75 Cipher, algorithms, modes,
77 _have_crypto = 1
78 except ImportError:
79 from Crypto.Cipher import AES
80 _have_crypto = 2
81 except ImportError:
82 _have_crypto = 0
85 class AES_CBC_Decrypt:
86 """Decrypt API"""
87 def __init__(self, key, iv):
88 if _have_crypto == 2:
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
90 else:
91 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
92 self.decrypt = ciph.decryptor().update
95 __version__ = "4.2"
97 # export only interesting items
98 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
101 ## Module configuration. Can be tuned after importing.
104 #: executable for unrar tool
105 UNRAR_TOOL = "unrar"
107 #: executable for unar tool
108 UNAR_TOOL = "unar"
110 #: executable for bsdtar tool
111 BSDTAR_TOOL = "bsdtar"
113 #: executable for p7zip/7z tool
114 SEVENZIP_TOOL = "7z"
116 #: executable for alternative 7z tool
117 SEVENZIP2_TOOL = "7zz"
119 #: default fallback charset
120 DEFAULT_CHARSET = "windows-1252"
122 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
123 TRY_ENCODINGS = ("utf8", "utf-16le")
125 #: whether to speed up decompression by using tmp archive
126 USE_EXTRACT_HACK = 1
128 #: limit the filesize for tmp archive usage
129 HACK_SIZE_LIMIT = 20 * 1024 * 1024
131 #: set specific directory for mkstemp() used by hack dir usage
132 HACK_TMP_DIR = None
134 #: Separator for path name components. Always "/".
135 PATH_SEP = "/"
138 ## rar constants
141 # block types
142 RAR_BLOCK_MARK = 0x72 # r
143 RAR_BLOCK_MAIN = 0x73 # s
144 RAR_BLOCK_FILE = 0x74 # t
145 RAR_BLOCK_OLD_COMMENT = 0x75 # u
146 RAR_BLOCK_OLD_EXTRA = 0x76 # v
147 RAR_BLOCK_OLD_SUB = 0x77 # w
148 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
149 RAR_BLOCK_OLD_AUTH = 0x79 # y
150 RAR_BLOCK_SUB = 0x7a # z
151 RAR_BLOCK_ENDARC = 0x7b # {
153 # flags for RAR_BLOCK_MAIN
154 RAR_MAIN_VOLUME = 0x0001
155 RAR_MAIN_COMMENT = 0x0002
156 RAR_MAIN_LOCK = 0x0004
157 RAR_MAIN_SOLID = 0x0008
158 RAR_MAIN_NEWNUMBERING = 0x0010
159 RAR_MAIN_AUTH = 0x0020
160 RAR_MAIN_RECOVERY = 0x0040
161 RAR_MAIN_PASSWORD = 0x0080
162 RAR_MAIN_FIRSTVOLUME = 0x0100
163 RAR_MAIN_ENCRYPTVER = 0x0200
165 # flags for RAR_BLOCK_FILE
166 RAR_FILE_SPLIT_BEFORE = 0x0001
167 RAR_FILE_SPLIT_AFTER = 0x0002
168 RAR_FILE_PASSWORD = 0x0004
169 RAR_FILE_COMMENT = 0x0008
170 RAR_FILE_SOLID = 0x0010
171 RAR_FILE_DICTMASK = 0x00e0
172 RAR_FILE_DICT64 = 0x0000
173 RAR_FILE_DICT128 = 0x0020
174 RAR_FILE_DICT256 = 0x0040
175 RAR_FILE_DICT512 = 0x0060
176 RAR_FILE_DICT1024 = 0x0080
177 RAR_FILE_DICT2048 = 0x00a0
178 RAR_FILE_DICT4096 = 0x00c0
179 RAR_FILE_DIRECTORY = 0x00e0
180 RAR_FILE_LARGE = 0x0100
181 RAR_FILE_UNICODE = 0x0200
182 RAR_FILE_SALT = 0x0400
183 RAR_FILE_VERSION = 0x0800
184 RAR_FILE_EXTTIME = 0x1000
185 RAR_FILE_EXTFLAGS = 0x2000
187 # flags for RAR_BLOCK_ENDARC
188 RAR_ENDARC_NEXT_VOLUME = 0x0001
189 RAR_ENDARC_DATACRC = 0x0002
190 RAR_ENDARC_REVSPACE = 0x0004
191 RAR_ENDARC_VOLNR = 0x0008
193 # flags common to all blocks
194 RAR_SKIP_IF_UNKNOWN = 0x4000
195 RAR_LONG_BLOCK = 0x8000
197 # Host OS types
198 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
199 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
200 RAR_OS_WIN32 = 2 #: Windows
201 RAR_OS_UNIX = 3 #: UNIX
202 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
203 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
205 # Compression methods - "0".."5"
206 RAR_M0 = 0x30 #: No compression.
207 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
208 RAR_M2 = 0x32 #: Compression level `-m2`.
209 RAR_M3 = 0x33 #: Compression level `-m3`.
210 RAR_M4 = 0x34 #: Compression level `-m4`.
211 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
213 RAR_MAX_PASSWORD = 127 #: Max number of utf-16 chars in passwords.
214 RAR_MAX_KDF_SHIFT = 24 #: Max power-of-2 for KDF count
217 # RAR5 constants
220 RAR5_BLOCK_MAIN = 1
221 RAR5_BLOCK_FILE = 2
222 RAR5_BLOCK_SERVICE = 3
223 RAR5_BLOCK_ENCRYPTION = 4
224 RAR5_BLOCK_ENDARC = 5
226 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
227 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
228 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
229 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
230 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
231 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
232 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
234 RAR5_MAIN_FLAG_ISVOL = 0x01
235 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
236 RAR5_MAIN_FLAG_SOLID = 0x04
237 RAR5_MAIN_FLAG_RECOVERY = 0x08
238 RAR5_MAIN_FLAG_LOCKED = 0x10
240 RAR5_FILE_FLAG_ISDIR = 0x01
241 RAR5_FILE_FLAG_HAS_MTIME = 0x02
242 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
243 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
245 RAR5_COMPR_SOLID = 0x40
247 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
249 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
251 RAR5_XFILE_ENCRYPTION = 1
252 RAR5_XFILE_HASH = 2
253 RAR5_XFILE_TIME = 3
254 RAR5_XFILE_VERSION = 4
255 RAR5_XFILE_REDIR = 5
256 RAR5_XFILE_OWNER = 6
257 RAR5_XFILE_SERVICE = 7
259 RAR5_XTIME_UNIXTIME = 0x01
260 RAR5_XTIME_HAS_MTIME = 0x02
261 RAR5_XTIME_HAS_CTIME = 0x04
262 RAR5_XTIME_HAS_ATIME = 0x08
263 RAR5_XTIME_UNIXTIME_NS = 0x10
265 RAR5_XENC_CIPHER_AES256 = 0
267 RAR5_XENC_CHECKVAL = 0x01
268 RAR5_XENC_TWEAKED = 0x02
270 RAR5_XHASH_BLAKE2SP = 0
272 RAR5_XREDIR_UNIX_SYMLINK = 1
273 RAR5_XREDIR_WINDOWS_SYMLINK = 2
274 RAR5_XREDIR_WINDOWS_JUNCTION = 3
275 RAR5_XREDIR_HARD_LINK = 4
276 RAR5_XREDIR_FILE_COPY = 5
278 RAR5_XREDIR_ISDIR = 0x01
280 RAR5_XOWNER_UNAME = 0x01
281 RAR5_XOWNER_GNAME = 0x02
282 RAR5_XOWNER_UID = 0x04
283 RAR5_XOWNER_GID = 0x08
285 RAR5_OS_WINDOWS = 0
286 RAR5_OS_UNIX = 1
288 DOS_MODE_ARCHIVE = 0x20
289 DOS_MODE_DIR = 0x10
290 DOS_MODE_SYSTEM = 0x04
291 DOS_MODE_HIDDEN = 0x02
292 DOS_MODE_READONLY = 0x01
294 RAR5_PW_CHECK_SIZE = 8
295 RAR5_PW_SUM_SIZE = 4
298 ## internal constants
301 RAR_ID = b"Rar!\x1a\x07\x00"
302 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
304 WIN32 = sys.platform == "win32"
305 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
307 SFX_MAX_SIZE = 2 * 1024 * 1024
308 RAR_V3 = 3
309 RAR_V5 = 5
311 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
312 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
313 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
315 FORCE_TOOL = False
318 def _find_sfx_header(xfile):
319 sig = RAR_ID[:-1]
320 buf = io.BytesIO()
321 steps = (64, SFX_MAX_SIZE)
323 with XFile(xfile) as fd:
324 for step in steps:
325 data = fd.read(step)
326 if not data:
327 break
328 buf.write(data)
329 curdata = buf.getvalue()
330 findpos = 0
331 while True:
332 pos = curdata.find(sig, findpos)
333 if pos < 0:
334 break
335 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
336 return RAR_V3, pos
337 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
338 return RAR_V5, pos
339 findpos = pos + len(sig)
340 return 0, 0
344 ## Public interface
348 def get_rar_version(xfile):
349 """Check quickly whether file is rar archive.
351 with XFile(xfile) as fd:
352 buf = fd.read(len(RAR5_ID))
353 if buf.startswith(RAR_ID):
354 return RAR_V3
355 elif buf.startswith(RAR5_ID):
356 return RAR_V5
357 return 0
360 def is_rarfile(xfile):
361 """Check quickly whether file is rar archive.
363 try:
364 return get_rar_version(xfile) > 0
365 except OSError:
366 # File not found or not accessible, ignore
367 return False
370 def is_rarfile_sfx(xfile):
371 """Check whether file is rar archive with support for SFX.
373 It will read 2M from file.
375 return _find_sfx_header(xfile)[0] > 0
378 class Error(Exception):
379 """Base class for rarfile errors."""
382 class BadRarFile(Error):
383 """Incorrect data in archive."""
386 class NotRarFile(Error):
387 """The file is not RAR archive."""
390 class BadRarName(Error):
391 """Cannot guess multipart name components."""
394 class NoRarEntry(Error):
395 """File not found in RAR"""
398 class PasswordRequired(Error):
399 """File requires password"""
402 class NeedFirstVolume(Error):
403 """Need to start from first volume.
405 Attributes:
407 current_volume
408 Volume number of current file or None if not known
410 def __init__(self, msg, volume):
411 super().__init__(msg)
412 self.current_volume = volume
415 class NoCrypto(Error):
416 """Cannot parse encrypted headers - no crypto available."""
419 class RarExecError(Error):
420 """Problem reported by unrar/rar."""
423 class RarWarning(RarExecError):
424 """Non-fatal error"""
427 class RarFatalError(RarExecError):
428 """Fatal error"""
431 class RarCRCError(RarExecError):
432 """CRC error during unpacking"""
435 class RarLockedArchiveError(RarExecError):
436 """Must not modify locked archive"""
439 class RarWriteError(RarExecError):
440 """Write error"""
443 class RarOpenError(RarExecError):
444 """Open error"""
447 class RarUserError(RarExecError):
448 """User error"""
451 class RarMemoryError(RarExecError):
452 """Memory error"""
455 class RarCreateError(RarExecError):
456 """Create error"""
459 class RarNoFilesError(RarExecError):
460 """No files that match pattern were found"""
463 class RarUserBreak(RarExecError):
464 """User stop"""
467 class RarWrongPassword(RarExecError):
468 """Incorrect password"""
471 class RarUnknownError(RarExecError):
472 """Unknown exit code"""
475 class RarSignalExit(RarExecError):
476 """Unrar exited with signal"""
479 class RarCannotExec(RarExecError):
480 """Executable not found."""
483 class UnsupportedWarning(UserWarning):
484 """Archive uses feature that are unsupported by rarfile.
486 .. versionadded:: 4.0
490 class RarInfo:
491 r"""An entry in rar archive.
493 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
494 with UTC timezone in RAR5 archives.
496 Attributes:
498 filename
499 File name with relative path.
500 Path separator is "/". Always unicode string.
502 date_time
503 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
504 RAR5 allows archives where it is missing, it's None then.
506 comment
507 Optional file comment field. Unicode string. (RAR3-only)
509 file_size
510 Uncompressed size.
512 compress_size
513 Compressed size.
515 compress_type
516 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
518 extract_version
519 Minimal Rar version needed for decompressing. As (major*10 + minor),
520 so 2.9 is 29.
522 RAR3: 10, 20, 29
524 RAR5 does not have such field in archive, it's simply set to 50.
526 host_os
527 Host OS type, one of RAR_OS_* constants.
529 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
530 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
532 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
534 mode
535 File attributes. May be either dos-style or unix-style, depending on host_os.
537 mtime
538 File modification time. Same value as :attr:`date_time`
539 but as :class:`~datetime.datetime` object with extended precision.
541 ctime
542 Optional time field: creation time. As :class:`~datetime.datetime` object.
544 atime
545 Optional time field: last access time. As :class:`~datetime.datetime` object.
547 arctime
548 Optional time field: archival time. As :class:`~datetime.datetime` object.
549 (RAR3-only)
552 CRC-32 of uncompressed file, unsigned int.
554 RAR5: may be None.
556 blake2sp_hash
557 Blake2SP hash over decompressed data. (RAR5-only)
559 volume
560 Volume nr, starting from 0.
562 volume_file
563 Volume file name, where file starts.
565 file_redir
566 If not None, file is link of some sort. Contains tuple of (type, flags, target).
567 (RAR5-only)
569 Type is one of constants:
571 :data:`RAR5_XREDIR_UNIX_SYMLINK`
572 Unix symlink.
573 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
574 Windows symlink.
575 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
576 Windows junction.
577 :data:`RAR5_XREDIR_HARD_LINK`
578 Hard link to target.
579 :data:`RAR5_XREDIR_FILE_COPY`
580 Current file is copy of another archive entry.
582 Flags may contain bits:
584 :data:`RAR5_XREDIR_ISDIR`
585 Symlink points to directory.
588 # zipfile-compatible fields
589 filename = None
590 file_size = None
591 compress_size = None
592 date_time = None
593 CRC = None
594 volume = None
595 orig_filename = None
597 # optional extended time fields, datetime() objects.
598 mtime = None
599 ctime = None
600 atime = None
602 extract_version = None
603 mode = None
604 host_os = None
605 compress_type = None
607 # rar3-only fields
608 comment = None
609 arctime = None
611 # rar5-only fields
612 blake2sp_hash = None
613 file_redir = None
615 # internal fields
616 flags = 0
617 type = None
619 # zipfile compat
620 def is_dir(self):
621 """Returns True if entry is a directory.
623 .. versionadded:: 4.0
625 return False
627 def is_symlink(self):
628 """Returns True if entry is a symlink.
630 .. versionadded:: 4.0
632 return False
634 def is_file(self):
635 """Returns True if entry is a normal file.
637 .. versionadded:: 4.0
639 return False
641 def needs_password(self):
642 """Returns True if data is stored password-protected.
644 if self.type == RAR_BLOCK_FILE:
645 return (self.flags & RAR_FILE_PASSWORD) > 0
646 return False
648 def isdir(self):
649 """Returns True if entry is a directory.
651 .. deprecated:: 4.0
653 return self.is_dir()
656 class RarFile:
657 """Parse RAR structure, provide access to files in archive.
659 Parameters:
661 file
662 archive file name or file-like object.
663 mode
664 only "r" is supported.
665 charset
666 fallback charset to use, if filenames are not already Unicode-enabled.
667 info_callback
668 debug callback, gets to see all archive entries.
669 crc_check
670 set to False to disable CRC checks
671 errors
672 Either "stop" to quietly stop parsing on errors,
673 or "strict" to raise errors. Default is "stop".
674 part_only
675 If True, read only single file and allow it to be middle-part
676 of multi-volume archive.
678 .. versionadded:: 4.0
681 #: File name, if available. Unicode string or None.
682 filename = None
684 #: Archive comment. Unicode string or None.
685 comment = None
687 def __init__(self, file, mode="r", charset=None, info_callback=None,
688 crc_check=True, errors="stop", part_only=False):
689 if is_filelike(file):
690 self.filename = getattr(file, "name", None)
691 else:
692 if isinstance(file, Path):
693 file = str(file)
694 self.filename = file
695 self._rarfile = file
697 self._charset = charset or DEFAULT_CHARSET
698 self._info_callback = info_callback
699 self._crc_check = crc_check
700 self._part_only = part_only
701 self._password = None
702 self._file_parser = None
704 if errors == "stop":
705 self._strict = False
706 elif errors == "strict":
707 self._strict = True
708 else:
709 raise ValueError("Invalid value for errors= parameter.")
711 if mode != "r":
712 raise NotImplementedError("RarFile supports only mode=r")
714 self._parse()
716 def __enter__(self):
717 """Open context."""
718 return self
720 def __exit__(self, typ, value, traceback):
721 """Exit context."""
722 self.close()
724 def __iter__(self):
725 """Iterate over members."""
726 return iter(self.infolist())
728 def setpassword(self, pwd):
729 """Sets the password to use when extracting.
731 self._password = pwd
732 if self._file_parser:
733 if self._file_parser.has_header_encryption():
734 self._file_parser = None
735 if not self._file_parser:
736 self._parse()
737 else:
738 self._file_parser.setpassword(self._password)
740 def needs_password(self):
741 """Returns True if any archive entries require password for extraction.
743 return self._file_parser.needs_password()
745 def is_solid(self):
746 """Returns True if archive uses solid compression.
748 .. versionadded:: 4.2
750 return self._file_parser.is_solid()
752 def namelist(self):
753 """Return list of filenames in archive.
755 return [f.filename for f in self.infolist()]
757 def infolist(self):
758 """Return RarInfo objects for all files/directories in archive.
760 return self._file_parser.infolist()
762 def volumelist(self):
763 """Returns filenames of archive volumes.
765 In case of single-volume archive, the list contains
766 just the name of main archive file.
768 return self._file_parser.volumelist()
770 def getinfo(self, name):
771 """Return RarInfo for file.
773 return self._file_parser.getinfo(name)
775 def getinfo_orig(self, name):
776 """Return RarInfo for file source.
778 RAR5: if name is hard-linked or copied file,
779 returns original entry with original filename.
781 .. versionadded:: 4.1
783 return self._file_parser.getinfo_orig(name)
785 def open(self, name, mode="r", pwd=None):
786 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
788 The object implements :class:`io.RawIOBase` interface, so it can
789 be further wrapped with :class:`io.BufferedReader`
790 and :class:`io.TextIOWrapper`.
792 On older Python where io module is not available, it implements
793 only .read(), .seek(), .tell() and .close() methods.
795 The object is seekable, although the seeking is fast only on
796 uncompressed files, on compressed files the seeking is implemented
797 by reading ahead and/or restarting the decompression.
799 Parameters:
801 name
802 file name or RarInfo instance.
803 mode
804 must be "r"
806 password to use for extracting.
809 if mode != "r":
810 raise NotImplementedError("RarFile.open() supports only mode=r")
812 # entry lookup
813 inf = self.getinfo(name)
814 if inf.is_dir():
815 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
817 # check password
818 if inf.needs_password():
819 pwd = pwd or self._password
820 if pwd is None:
821 raise PasswordRequired("File %s requires password" % inf.filename)
822 else:
823 pwd = None
825 return self._file_parser.open(inf, pwd)
827 def read(self, name, pwd=None):
828 """Return uncompressed data for archive entry.
830 For longer files using :meth:`~RarFile.open` may be better idea.
832 Parameters:
834 name
835 filename or RarInfo instance
837 password to use for extracting.
840 with self.open(name, "r", pwd) as f:
841 return f.read()
843 def close(self):
844 """Release open resources."""
845 pass
847 def printdir(self, file=None):
848 """Print archive file list to stdout or given file.
850 if file is None:
851 file = sys.stdout
852 for f in self.infolist():
853 print(f.filename, file=file)
855 def extract(self, member, path=None, pwd=None):
856 """Extract single file into current directory.
858 Parameters:
860 member
861 filename or :class:`RarInfo` instance
862 path
863 optional destination path
865 optional password to use
867 inf = self.getinfo(member)
868 return self._extract_one(inf, path, pwd, True)
870 def extractall(self, path=None, members=None, pwd=None):
871 """Extract all files into current directory.
873 Parameters:
875 path
876 optional destination path
877 members
878 optional filename or :class:`RarInfo` instance list to extract
880 optional password to use
882 if members is None:
883 members = self.namelist()
885 done = set()
886 dirs = []
887 for m in members:
888 inf = self.getinfo(m)
889 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
890 if inf.is_dir():
891 if dst not in done:
892 dirs.append((dst, inf))
893 done.add(dst)
894 if dirs:
895 dirs.sort(reverse=True)
896 for dst, inf in dirs:
897 self._set_attrs(inf, dst)
899 def testrar(self, pwd=None):
900 """Read all files and test CRC.
902 for member in self.infolist():
903 if member.is_file():
904 with self.open(member, 'r', pwd) as f:
905 empty_read(f, member.file_size, BSIZE)
907 def strerror(self):
908 """Return error string if parsing failed or None if no problems.
910 if not self._file_parser:
911 return "Not a RAR file"
912 return self._file_parser.strerror()
915 ## private methods
918 def _parse(self):
919 """Run parser for file type
921 ver, sfx_ofs = _find_sfx_header(self._rarfile)
922 if ver == RAR_V3:
923 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
924 self._charset, self._strict, self._info_callback,
925 sfx_ofs, self._part_only)
926 self._file_parser = p3 # noqa
927 elif ver == RAR_V5:
928 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
929 self._charset, self._strict, self._info_callback,
930 sfx_ofs, self._part_only)
931 self._file_parser = p5 # noqa
932 else:
933 raise NotRarFile("Not a RAR file")
935 self._file_parser.parse()
936 self.comment = self._file_parser.comment
938 def _extract_one(self, info, path, pwd, set_attrs):
939 fname = sanitize_filename(
940 info.filename, os.path.sep, WIN32
943 if path is None:
944 path = os.getcwd()
945 else:
946 path = os.fspath(path)
947 dstfn = os.path.join(path, fname)
949 dirname = os.path.dirname(dstfn)
950 if dirname and dirname != ".":
951 os.makedirs(dirname, exist_ok=True)
953 if info.is_file():
954 return self._make_file(info, dstfn, pwd, set_attrs)
955 if info.is_dir():
956 return self._make_dir(info, dstfn, pwd, set_attrs)
957 if info.is_symlink():
958 return self._make_symlink(info, dstfn, pwd, set_attrs)
959 return None
961 def _create_helper(self, name, flags, info):
962 return os.open(name, flags)
964 def _make_file(self, info, dstfn, pwd, set_attrs):
965 def helper(name, flags):
966 return self._create_helper(name, flags, info)
967 with self.open(info, "r", pwd) as src:
968 with open(dstfn, "wb", opener=helper) as dst:
969 shutil.copyfileobj(src, dst)
970 if set_attrs:
971 self._set_attrs(info, dstfn)
972 return dstfn
974 def _make_dir(self, info, dstfn, pwd, set_attrs):
975 os.makedirs(dstfn, exist_ok=True)
976 if set_attrs:
977 self._set_attrs(info, dstfn)
978 return dstfn
980 def _make_symlink(self, info, dstfn, pwd, set_attrs):
981 target_is_directory = False
982 if info.host_os == RAR_OS_UNIX:
983 link_name = self.read(info, pwd)
984 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
985 elif info.file_redir:
986 redir_type, redir_flags, link_name = info.file_redir
987 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
988 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
989 return None
990 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
991 else:
992 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
993 return None
995 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
996 return dstfn
998 def _set_attrs(self, info, dstfn):
999 if info.host_os == RAR_OS_UNIX:
1000 os.chmod(dstfn, info.mode & 0o777)
1001 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
1002 # only keep R/O attr, except for dirs on win32
1003 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
1004 st = os.stat(dstfn)
1005 new_mode = st.st_mode & ~0o222
1006 os.chmod(dstfn, new_mode)
1008 if info.mtime:
1009 mtime_ns = to_nsecs(info.mtime)
1010 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
1011 os.utime(dstfn, ns=(atime_ns, mtime_ns))
1015 # File format parsing
1018 class CommonParser:
1019 """Shared parser parts."""
1020 _main = None
1021 _hdrenc_main = None
1022 _needs_password = False
1023 _fd = None
1024 _expect_sig = None
1025 _parse_error = None
1026 _password = None
1027 comment = None
1029 def __init__(self, rarfile, password, crc_check, charset, strict,
1030 info_cb, sfx_offset, part_only):
1031 self._rarfile = rarfile
1032 self._password = password
1033 self._crc_check = crc_check
1034 self._charset = charset
1035 self._strict = strict
1036 self._info_callback = info_cb
1037 self._info_list = []
1038 self._info_map = {}
1039 self._vol_list = []
1040 self._sfx_offset = sfx_offset
1041 self._part_only = part_only
1043 def is_solid(self):
1044 """Returns True if archive uses solid compression.
1046 if self._main:
1047 if self._main.flags & RAR_MAIN_SOLID:
1048 return True
1049 return False
1051 def has_header_encryption(self):
1052 """Returns True if headers are encrypted
1054 if self._hdrenc_main:
1055 return True
1056 if self._main:
1057 if self._main.flags & RAR_MAIN_PASSWORD:
1058 return True
1059 return False
1061 def setpassword(self, pwd):
1062 """Set cached password."""
1063 self._password = pwd
1065 def volumelist(self):
1066 """Volume files"""
1067 return self._vol_list
1069 def needs_password(self):
1070 """Is password required"""
1071 return self._needs_password
1073 def strerror(self):
1074 """Last error"""
1075 return self._parse_error
1077 def infolist(self):
1078 """List of RarInfo records.
1080 return self._info_list
1082 def getinfo(self, member):
1083 """Return RarInfo for filename
1085 if isinstance(member, RarInfo):
1086 fname = member.filename
1087 elif isinstance(member, Path):
1088 fname = str(member)
1089 else:
1090 fname = member
1092 if fname.endswith("/"):
1093 fname = fname.rstrip("/")
1095 try:
1096 return self._info_map[fname]
1097 except KeyError:
1098 raise NoRarEntry("No such file: %s" % fname) from None
1100 def getinfo_orig(self, member):
1101 inf = self.getinfo(member)
1102 if inf.file_redir:
1103 redir_type, redir_flags, redir_name = inf.file_redir
1104 # cannot leave to unrar as it expects copied file to exist
1105 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1106 inf = self.getinfo(redir_name)
1107 return inf
1109 def parse(self):
1110 """Process file."""
1111 self._fd = None
1112 try:
1113 self._parse_real()
1114 finally:
1115 if self._fd:
1116 self._fd.close()
1117 self._fd = None
1119 def _parse_real(self):
1120 """Actually read file.
1122 fd = XFile(self._rarfile)
1123 self._fd = fd
1124 fd.seek(self._sfx_offset, 0)
1125 sig = fd.read(len(self._expect_sig))
1126 if sig != self._expect_sig:
1127 raise NotRarFile("Not a Rar archive")
1129 volume = 0 # first vol (.rar) is 0
1130 more_vols = False
1131 endarc = False
1132 volfile = self._rarfile
1133 self._vol_list = [self._rarfile]
1134 raise_need_first_vol = False
1135 while True:
1136 if endarc:
1137 h = None # don"t read past ENDARC
1138 else:
1139 h = self._parse_header(fd)
1140 if not h:
1141 if raise_need_first_vol:
1142 # did not find ENDARC with VOLNR
1143 raise NeedFirstVolume("Need to start from first volume", None)
1144 if more_vols and not self._part_only:
1145 volume += 1
1146 fd.close()
1147 try:
1148 volfile = self._next_volname(volfile)
1149 fd = XFile(volfile)
1150 except IOError:
1151 self._set_error("Cannot open next volume: %s", volfile)
1152 break
1153 self._fd = fd
1154 sig = fd.read(len(self._expect_sig))
1155 if sig != self._expect_sig:
1156 self._set_error("Invalid volume sig: %s", volfile)
1157 break
1158 more_vols = False
1159 endarc = False
1160 self._vol_list.append(volfile)
1161 self._main = None
1162 self._hdrenc_main = None
1163 continue
1164 break
1165 h.volume = volume
1166 h.volume_file = volfile
1168 if h.type == RAR_BLOCK_MAIN and not self._main:
1169 self._main = h
1170 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
1171 # RAR 2.x does not set FIRSTVOLUME,
1172 # so check it only if NEWNUMBERING is used
1173 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1174 if getattr(h, "main_volume_number", None) is not None:
1175 # rar5 may have more info
1176 raise NeedFirstVolume(
1177 "Need to start from first volume (current: %r)"
1178 % (h.main_volume_number,),
1179 h.main_volume_number
1181 # delay raise until we have volnr from ENDARC
1182 raise_need_first_vol = True
1183 if h.flags & RAR_MAIN_PASSWORD:
1184 self._needs_password = True
1185 if not self._password:
1186 break
1187 elif h.type == RAR_BLOCK_ENDARC:
1188 # use flag, but also allow RAR 2.x logic below to trigger
1189 if h.flags & RAR_ENDARC_NEXT_VOLUME:
1190 more_vols = True
1191 endarc = True
1192 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1193 raise NeedFirstVolume(
1194 "Need to start from first volume (current: %r)"
1195 % (h.endarc_volnr,),
1196 h.endarc_volnr
1198 elif h.type == RAR_BLOCK_FILE:
1199 # RAR 2.x does not write RAR_BLOCK_ENDARC
1200 if h.flags & RAR_FILE_SPLIT_AFTER:
1201 more_vols = True
1202 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1203 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1204 if not self._part_only:
1205 raise_need_first_vol = True
1207 if h.needs_password():
1208 self._needs_password = True
1210 # store it
1211 self.process_entry(fd, h)
1213 if self._info_callback:
1214 self._info_callback(h)
1216 # go to next header
1217 if h.add_size > 0:
1218 fd.seek(h.data_offset + h.add_size, 0)
1220 def process_entry(self, fd, item):
1221 """Examine item, add into lookup cache."""
1222 raise NotImplementedError()
1224 def _decrypt_header(self, fd):
1225 raise NotImplementedError("_decrypt_header")
1227 def _parse_block_header(self, fd):
1228 raise NotImplementedError("_parse_block_header")
1230 def _open_hack(self, inf, pwd):
1231 raise NotImplementedError("_open_hack")
1233 def _parse_header(self, fd):
1234 """Read single header
1236 try:
1237 # handle encrypted headers
1238 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1239 if not self._password:
1240 return None
1241 fd = self._decrypt_header(fd)
1243 # now read actual header
1244 return self._parse_block_header(fd)
1245 except struct.error:
1246 self._set_error("Broken header in RAR file")
1247 return None
1249 def _next_volname(self, volfile):
1250 """Given current vol name, construct next one
1252 if is_filelike(volfile):
1253 raise IOError("Working on single FD")
1254 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1255 return _next_newvol(volfile)
1256 return _next_oldvol(volfile)
1258 def _set_error(self, msg, *args):
1259 if args:
1260 msg = msg % args
1261 self._parse_error = msg
1262 if self._strict:
1263 raise BadRarFile(msg)
1265 def open(self, inf, pwd):
1266 """Return stream object for file data."""
1268 if inf.file_redir:
1269 redir_type, redir_flags, redir_name = inf.file_redir
1270 # cannot leave to unrar as it expects copied file to exist
1271 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1272 inf = self.getinfo(redir_name)
1273 if not inf:
1274 raise BadRarFile("cannot find copied file")
1275 elif redir_type in (
1276 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1277 RAR5_XREDIR_WINDOWS_JUNCTION,
1279 return io.BytesIO(redir_name.encode("utf8"))
1280 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1281 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1283 # is temp write usable?
1284 use_hack = 1
1285 if not self._main:
1286 use_hack = 0
1287 elif self._main._must_disable_hack():
1288 use_hack = 0
1289 elif inf._must_disable_hack():
1290 use_hack = 0
1291 elif is_filelike(self._rarfile):
1292 pass
1293 elif inf.file_size > HACK_SIZE_LIMIT:
1294 use_hack = 0
1295 elif not USE_EXTRACT_HACK:
1296 use_hack = 0
1298 # now extract
1299 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1300 return self._open_clear(inf)
1301 elif use_hack:
1302 return self._open_hack(inf, pwd)
1303 elif is_filelike(self._rarfile):
1304 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1305 else:
1306 return self._open_unrar(self._rarfile, inf, pwd)
1308 def _open_clear(self, inf):
1309 if FORCE_TOOL:
1310 return self._open_unrar(self._rarfile, inf)
1311 return DirectReader(self, inf)
1313 def _open_hack_core(self, inf, pwd, prefix, suffix):
1315 size = inf.compress_size + inf.header_size
1316 rf = XFile(inf.volume_file, 0)
1317 rf.seek(inf.header_offset)
1319 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1320 tmpf = os.fdopen(tmpfd, "wb")
1322 try:
1323 tmpf.write(prefix)
1324 while size > 0:
1325 if size > BSIZE:
1326 buf = rf.read(BSIZE)
1327 else:
1328 buf = rf.read(size)
1329 if not buf:
1330 raise BadRarFile("read failed: " + inf.filename)
1331 tmpf.write(buf)
1332 size -= len(buf)
1333 tmpf.write(suffix)
1334 tmpf.close()
1335 rf.close()
1336 except BaseException:
1337 rf.close()
1338 tmpf.close()
1339 os.unlink(tmpname)
1340 raise
1342 return self._open_unrar(tmpname, inf, pwd, tmpname)
1344 def _open_unrar_membuf(self, memfile, inf, pwd):
1345 """Write in-memory archive to temp file, needed for solid archives.
1347 tmpname = membuf_tempfile(memfile)
1348 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1350 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1351 """Extract using unrar
1353 setup = tool_setup()
1355 # not giving filename avoids encoding related problems
1356 fn = None
1357 if not tmpfile or force_file:
1358 fn = inf.filename.replace("/", os.path.sep)
1360 # read from unrar pipe
1361 cmd = setup.open_cmdline(pwd, rarfile, fn)
1362 return PipeReader(self, inf, cmd, tmpfile)
1366 # RAR3 format
1369 class Rar3Info(RarInfo):
1370 """RAR3 specific fields."""
1371 extract_version = 15
1372 salt = None
1373 add_size = 0
1374 header_crc = None
1375 header_size = None
1376 header_offset = None
1377 data_offset = None
1378 _md_class = None
1379 _md_expect = None
1380 _name_size = None
1382 # make sure some rar5 fields are always present
1383 file_redir = None
1384 blake2sp_hash = None
1386 endarc_datacrc = None
1387 endarc_volnr = None
1389 def _must_disable_hack(self):
1390 if self.type == RAR_BLOCK_FILE:
1391 if self.flags & RAR_FILE_PASSWORD:
1392 return True
1393 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1394 return True
1395 elif self.type == RAR_BLOCK_MAIN:
1396 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1397 return True
1398 return False
1400 def is_dir(self):
1401 """Returns True if entry is a directory."""
1402 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1403 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1404 return False
1406 def is_symlink(self):
1407 """Returns True if entry is a symlink."""
1408 return (
1409 self.type == RAR_BLOCK_FILE and
1410 self.host_os == RAR_OS_UNIX and
1411 self.mode & 0xF000 == 0xA000
1414 def is_file(self):
1415 """Returns True if entry is a normal file."""
1416 return (
1417 self.type == RAR_BLOCK_FILE and
1418 not (self.is_dir() or self.is_symlink())
1422 class RAR3Parser(CommonParser):
1423 """Parse RAR3 file format.
1425 _expect_sig = RAR_ID
1426 _last_aes_key = (None, None, None) # (salt, key, iv)
1428 def _decrypt_header(self, fd):
1429 if not _have_crypto:
1430 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1431 salt = fd.read(8)
1432 if self._last_aes_key[0] == salt:
1433 key, iv = self._last_aes_key[1:]
1434 else:
1435 key, iv = rar3_s2k(self._password, salt)
1436 self._last_aes_key = (salt, key, iv)
1437 return HeaderDecrypt(fd, key, iv)
1439 def _parse_block_header(self, fd):
1440 """Parse common block header
1442 h = Rar3Info()
1443 h.header_offset = fd.tell()
1445 # read and parse base header
1446 buf = fd.read(S_BLK_HDR.size)
1447 if not buf:
1448 return None
1449 if len(buf) < S_BLK_HDR.size:
1450 self._set_error("Unexpected EOF when reading header")
1451 return None
1452 t = S_BLK_HDR.unpack_from(buf)
1453 h.header_crc, h.type, h.flags, h.header_size = t
1455 # read full header
1456 if h.header_size > S_BLK_HDR.size:
1457 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1458 else:
1459 hdata = buf
1460 h.data_offset = fd.tell()
1462 # unexpected EOF?
1463 if len(hdata) != h.header_size:
1464 self._set_error("Unexpected EOF when reading header")
1465 return None
1467 pos = S_BLK_HDR.size
1469 # block has data assiciated with it?
1470 if h.flags & RAR_LONG_BLOCK:
1471 h.add_size, pos = load_le32(hdata, pos)
1472 else:
1473 h.add_size = 0
1475 # parse interesting ones, decide header boundaries for crc
1476 if h.type == RAR_BLOCK_MARK:
1477 return h
1478 elif h.type == RAR_BLOCK_MAIN:
1479 pos += 6
1480 if h.flags & RAR_MAIN_ENCRYPTVER:
1481 pos += 1
1482 crc_pos = pos
1483 if h.flags & RAR_MAIN_COMMENT:
1484 self._parse_subblocks(h, hdata, pos)
1485 elif h.type == RAR_BLOCK_FILE:
1486 pos = self._parse_file_header(h, hdata, pos - 4)
1487 crc_pos = pos
1488 if h.flags & RAR_FILE_COMMENT:
1489 pos = self._parse_subblocks(h, hdata, pos)
1490 elif h.type == RAR_BLOCK_SUB:
1491 pos = self._parse_file_header(h, hdata, pos - 4)
1492 crc_pos = h.header_size
1493 elif h.type == RAR_BLOCK_OLD_AUTH:
1494 pos += 8
1495 crc_pos = pos
1496 elif h.type == RAR_BLOCK_OLD_EXTRA:
1497 pos += 7
1498 crc_pos = pos
1499 elif h.type == RAR_BLOCK_ENDARC:
1500 if h.flags & RAR_ENDARC_DATACRC:
1501 h.endarc_datacrc, pos = load_le32(hdata, pos)
1502 if h.flags & RAR_ENDARC_VOLNR:
1503 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1504 pos += 2
1505 crc_pos = h.header_size
1506 else:
1507 crc_pos = h.header_size
1509 # check crc
1510 if h.type == RAR_BLOCK_OLD_SUB:
1511 crcdat = hdata[2:] + fd.read(h.add_size)
1512 else:
1513 crcdat = hdata[2:crc_pos]
1515 calc_crc = crc32(crcdat) & 0xFFFF
1517 # return good header
1518 if h.header_crc == calc_crc:
1519 return h
1521 # header parsing failed.
1522 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1523 h.type, h.header_crc, calc_crc, len(crcdat))
1525 # instead panicing, send eof
1526 return None
1528 def _parse_file_header(self, h, hdata, pos):
1529 """Read file-specific header
1531 fld = S_FILE_HDR.unpack_from(hdata, pos)
1532 pos += S_FILE_HDR.size
1534 h.compress_size = fld[0]
1535 h.file_size = fld[1]
1536 h.host_os = fld[2]
1537 h.CRC = fld[3]
1538 h.date_time = parse_dos_time(fld[4])
1539 h.mtime = to_datetime(h.date_time)
1540 h.extract_version = fld[5]
1541 h.compress_type = fld[6]
1542 h._name_size = name_size = fld[7]
1543 h.mode = fld[8]
1545 h._md_class = CRC32Context
1546 h._md_expect = h.CRC
1548 if h.flags & RAR_FILE_LARGE:
1549 h1, pos = load_le32(hdata, pos)
1550 h2, pos = load_le32(hdata, pos)
1551 h.compress_size |= h1 << 32
1552 h.file_size |= h2 << 32
1553 h.add_size = h.compress_size
1555 name, pos = load_bytes(hdata, name_size, pos)
1556 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1557 # stored in custom encoding
1558 nul = name.find(b"\0")
1559 h.orig_filename = name[:nul]
1560 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1561 h.filename = u.decode()
1563 # if parsing failed fall back to simple name
1564 if u.failed:
1565 h.filename = self._decode(h.orig_filename)
1566 elif h.flags & RAR_FILE_UNICODE:
1567 # stored in UTF8
1568 h.orig_filename = name
1569 h.filename = name.decode("utf8", "replace")
1570 else:
1571 # stored in random encoding
1572 h.orig_filename = name
1573 h.filename = self._decode(name)
1575 # change separator, set dir suffix
1576 h.filename = h.filename.replace("\\", "/").rstrip("/")
1577 if h.is_dir():
1578 h.filename = h.filename + "/"
1580 if h.flags & RAR_FILE_SALT:
1581 h.salt, pos = load_bytes(hdata, 8, pos)
1582 else:
1583 h.salt = None
1585 # optional extended time stamps
1586 if h.flags & RAR_FILE_EXTTIME:
1587 pos = _parse_ext_time(h, hdata, pos)
1588 else:
1589 h.mtime = h.atime = h.ctime = h.arctime = None
1591 return pos
1593 def _parse_subblocks(self, h, hdata, pos):
1594 """Find old-style comment subblock
1596 while pos < len(hdata):
1597 # ordinary block header
1598 t = S_BLK_HDR.unpack_from(hdata, pos)
1599 ___scrc, stype, sflags, slen = t
1600 pos_next = pos + slen
1601 pos += S_BLK_HDR.size
1603 # corrupt header
1604 if pos_next < pos:
1605 break
1607 # followed by block-specific header
1608 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1609 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1610 pos += S_COMMENT_HDR.size
1611 data = hdata[pos: pos_next]
1612 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1613 crc, self._password)
1614 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1615 h.comment = self._decode_comment(cmt)
1617 pos = pos_next
1618 return pos
1620 def _read_comment_v3(self, inf, pwd=None):
1622 # read data
1623 with XFile(inf.volume_file) as rf:
1624 rf.seek(inf.data_offset)
1625 data = rf.read(inf.compress_size)
1627 # decompress
1628 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1629 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1631 # check crc
1632 if self._crc_check:
1633 crc = crc32(cmt)
1634 if crc != inf.CRC:
1635 return None
1637 return self._decode_comment(cmt)
1639 def _decode(self, val):
1640 for c in TRY_ENCODINGS:
1641 try:
1642 return val.decode(c)
1643 except UnicodeError:
1644 pass
1645 return val.decode(self._charset, "replace")
1647 def _decode_comment(self, val):
1648 return self._decode(val)
1650 def process_entry(self, fd, item):
1651 if item.type == RAR_BLOCK_FILE:
1652 # use only first part
1653 if item.flags & RAR_FILE_VERSION:
1654 pass # skip old versions
1655 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1656 self._info_map[item.filename.rstrip("/")] = item
1657 self._info_list.append(item)
1658 elif len(self._info_list) > 0:
1659 # final crc is in last block
1660 old = self._info_list[-1]
1661 old.CRC = item.CRC
1662 old._md_expect = item._md_expect
1663 old.compress_size += item.compress_size
1665 # parse new-style comment
1666 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1667 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1668 pass
1669 elif item.flags & RAR_FILE_SOLID:
1670 # file comment
1671 cmt = self._read_comment_v3(item, self._password)
1672 if len(self._info_list) > 0:
1673 old = self._info_list[-1]
1674 old.comment = cmt
1675 else:
1676 # archive comment
1677 cmt = self._read_comment_v3(item, self._password)
1678 self.comment = cmt
1680 if item.type == RAR_BLOCK_MAIN:
1681 if item.flags & RAR_MAIN_COMMENT:
1682 self.comment = item.comment
1683 if item.flags & RAR_MAIN_PASSWORD:
1684 self._needs_password = True
1686 # put file compressed data into temporary .rar archive, and run
1687 # unrar on that, thus avoiding unrar going over whole archive
1688 def _open_hack(self, inf, pwd):
1689 # create main header: crc, type, flags, size, res1, res2
1690 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1691 return self._open_hack_core(inf, pwd, prefix, b"")
1695 # RAR5 format
1698 class Rar5Info(RarInfo):
1699 """Shared fields for RAR5 records.
1701 extract_version = 50
1702 header_crc = None
1703 header_size = None
1704 header_offset = None
1705 data_offset = None
1707 # type=all
1708 block_type = None
1709 block_flags = None
1710 add_size = 0
1711 block_extra_size = 0
1713 # type=MAIN
1714 volume_number = None
1715 _md_class = None
1716 _md_expect = None
1718 def _must_disable_hack(self):
1719 return False
1722 class Rar5BaseFile(Rar5Info):
1723 """Shared sturct for file & service record.
1725 type = -1
1726 file_flags = None
1727 file_encryption = (0, 0, 0, b"", b"", b"")
1728 file_compress_flags = None
1729 file_redir = None
1730 file_owner = None
1731 file_version = None
1732 blake2sp_hash = None
1734 def _must_disable_hack(self):
1735 if self.flags & RAR_FILE_PASSWORD:
1736 return True
1737 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1738 return True
1739 if self.file_compress_flags & RAR5_COMPR_SOLID:
1740 return True
1741 if self.file_redir:
1742 return True
1743 return False
1746 class Rar5FileInfo(Rar5BaseFile):
1747 """RAR5 file record.
1749 type = RAR_BLOCK_FILE
1751 def is_symlink(self):
1752 """Returns True if entry is a symlink."""
1753 # pylint: disable=unsubscriptable-object
1754 return (
1755 self.file_redir is not None and
1756 self.file_redir[0] in (
1757 RAR5_XREDIR_UNIX_SYMLINK,
1758 RAR5_XREDIR_WINDOWS_SYMLINK,
1759 RAR5_XREDIR_WINDOWS_JUNCTION,
1763 def is_file(self):
1764 """Returns True if entry is a normal file."""
1765 return not (self.is_dir() or self.is_symlink())
1767 def is_dir(self):
1768 """Returns True if entry is a directory."""
1769 if not self.file_redir:
1770 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1771 return True
1772 return False
1775 class Rar5ServiceInfo(Rar5BaseFile):
1776 """RAR5 service record.
1778 type = RAR_BLOCK_SUB
1781 class Rar5MainInfo(Rar5Info):
1782 """RAR5 archive main record.
1784 type = RAR_BLOCK_MAIN
1785 main_flags = None
1786 main_volume_number = None
1788 def _must_disable_hack(self):
1789 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1790 return True
1791 return False
1794 class Rar5EncryptionInfo(Rar5Info):
1795 """RAR5 archive header encryption record.
1797 type = RAR5_BLOCK_ENCRYPTION
1798 encryption_algo = None
1799 encryption_flags = None
1800 encryption_kdf_count = None
1801 encryption_salt = None
1802 encryption_check_value = None
1804 def needs_password(self):
1805 return True
1808 class Rar5EndArcInfo(Rar5Info):
1809 """RAR5 end of archive record.
1811 type = RAR_BLOCK_ENDARC
1812 endarc_flags = None
1815 class RAR5Parser(CommonParser):
1816 """Parse RAR5 format.
1818 _expect_sig = RAR5_ID
1819 _hdrenc_main = None
1821 # AES encrypted headers
1822 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1824 def _get_utf8_password(self):
1825 pwd = self._password
1826 if isinstance(pwd, str):
1827 return pwd.encode("utf8")
1828 return pwd
1830 def _gen_key(self, kdf_count, salt):
1831 if self._last_aes256_key[:2] == (kdf_count, salt):
1832 return self._last_aes256_key[2]
1833 if kdf_count > RAR_MAX_KDF_SHIFT:
1834 raise BadRarFile("Too large kdf_count")
1835 pwd = self._get_utf8_password()
1836 key = rar5_s2k(pwd, salt, 1 << kdf_count)
1837 self._last_aes256_key = (kdf_count, salt, key)
1838 return key
1840 def _decrypt_header(self, fd):
1841 if not _have_crypto:
1842 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1843 h = self._hdrenc_main
1844 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1845 iv = fd.read(16)
1846 return HeaderDecrypt(fd, key, iv)
1848 def _parse_block_header(self, fd):
1849 """Parse common block header
1851 header_offset = fd.tell()
1853 preload = 4 + 1
1854 start_bytes = fd.read(preload)
1855 if len(start_bytes) < preload:
1856 self._set_error("Unexpected EOF when reading header")
1857 return None
1858 while start_bytes[-1] & 0x80:
1859 b = fd.read(1)
1860 if not b:
1861 self._set_error("Unexpected EOF when reading header")
1862 return None
1863 start_bytes += b
1864 header_crc, pos = load_le32(start_bytes, 0)
1865 hdrlen, pos = load_vint(start_bytes, pos)
1866 if hdrlen > 2 * 1024 * 1024:
1867 return None
1868 header_size = pos + hdrlen
1870 # read full header, check for EOF
1871 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1872 if len(hdata) != header_size:
1873 self._set_error("Unexpected EOF when reading header")
1874 return None
1875 data_offset = fd.tell()
1877 calc_crc = crc32(memoryview(hdata)[4:])
1878 if header_crc != calc_crc:
1879 # header parsing failed.
1880 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1881 header_crc, calc_crc, len(hdata))
1882 return None
1884 block_type, pos = load_vint(hdata, pos)
1886 if block_type == RAR5_BLOCK_MAIN:
1887 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1888 h = self._parse_main_block(h, hdata, pos)
1889 elif block_type == RAR5_BLOCK_FILE:
1890 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1891 h = self._parse_file_block(h, hdata, pos)
1892 elif block_type == RAR5_BLOCK_SERVICE:
1893 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1894 h = self._parse_file_block(h, hdata, pos)
1895 elif block_type == RAR5_BLOCK_ENCRYPTION:
1896 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1897 h = self._parse_encryption_block(h, hdata, pos)
1898 elif block_type == RAR5_BLOCK_ENDARC:
1899 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1900 h = self._parse_endarc_block(h, hdata, pos)
1901 else:
1902 h = None
1903 if h:
1904 h.header_offset = header_offset
1905 h.data_offset = data_offset
1906 return h
1908 def _parse_block_common(self, h, hdata):
1909 h.header_crc, pos = load_le32(hdata, 0)
1910 hdrlen, pos = load_vint(hdata, pos)
1911 h.header_size = hdrlen + pos
1912 h.block_type, pos = load_vint(hdata, pos)
1913 h.block_flags, pos = load_vint(hdata, pos)
1915 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1916 h.block_extra_size, pos = load_vint(hdata, pos)
1917 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1918 h.add_size, pos = load_vint(hdata, pos)
1920 h.compress_size = h.add_size
1922 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1923 h.flags |= RAR_SKIP_IF_UNKNOWN
1924 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1925 h.flags |= RAR_LONG_BLOCK
1926 return h, pos
1928 def _parse_main_block(self, h, hdata, pos):
1929 h.main_flags, pos = load_vint(hdata, pos)
1930 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1931 h.main_volume_number, pos = load_vint(hdata, pos)
1933 h.flags |= RAR_MAIN_NEWNUMBERING
1934 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1935 h.flags |= RAR_MAIN_SOLID
1936 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1937 h.flags |= RAR_MAIN_VOLUME
1938 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1939 h.flags |= RAR_MAIN_RECOVERY
1940 if self._hdrenc_main:
1941 h.flags |= RAR_MAIN_PASSWORD
1942 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1943 h.flags |= RAR_MAIN_FIRSTVOLUME
1945 return h
1947 def _parse_file_block(self, h, hdata, pos):
1948 h.file_flags, pos = load_vint(hdata, pos)
1949 h.file_size, pos = load_vint(hdata, pos)
1950 h.mode, pos = load_vint(hdata, pos)
1952 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1953 h.mtime, pos = load_unixtime(hdata, pos)
1954 h.date_time = h.mtime.timetuple()[:6]
1955 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1956 h.CRC, pos = load_le32(hdata, pos)
1957 h._md_class = CRC32Context
1958 h._md_expect = h.CRC
1960 h.file_compress_flags, pos = load_vint(hdata, pos)
1961 h.file_host_os, pos = load_vint(hdata, pos)
1962 h.orig_filename, pos = load_vstr(hdata, pos)
1963 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1965 # use compatible values
1966 if h.file_host_os == RAR5_OS_WINDOWS:
1967 h.host_os = RAR_OS_WIN32
1968 else:
1969 h.host_os = RAR_OS_UNIX
1970 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1972 if h.block_extra_size:
1973 # allow 1 byte of garbage
1974 while pos < len(hdata) - 1:
1975 xsize, pos = load_vint(hdata, pos)
1976 xdata, pos = load_bytes(hdata, xsize, pos)
1977 self._process_file_extra(h, xdata)
1979 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1980 h.flags |= RAR_FILE_SPLIT_BEFORE
1981 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1982 h.flags |= RAR_FILE_SPLIT_AFTER
1983 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1984 h.flags |= RAR_FILE_DIRECTORY
1985 if h.file_compress_flags & RAR5_COMPR_SOLID:
1986 h.flags |= RAR_FILE_SOLID
1988 if h.is_dir():
1989 h.filename = h.filename + "/"
1990 return h
1992 def _parse_endarc_block(self, h, hdata, pos):
1993 h.endarc_flags, pos = load_vint(hdata, pos)
1994 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1995 h.flags |= RAR_ENDARC_NEXT_VOLUME
1996 return h
1998 def _check_password(self, check_value, kdf_count_shift, salt):
1999 if len(check_value) != RAR5_PW_CHECK_SIZE + RAR5_PW_SUM_SIZE:
2000 return
2001 if kdf_count_shift > RAR_MAX_KDF_SHIFT:
2002 raise BadRarFile("Too large kdf_count")
2004 hdr_check = check_value[:RAR5_PW_CHECK_SIZE]
2005 hdr_sum = check_value[RAR5_PW_CHECK_SIZE:]
2006 sum_hash = sha256(hdr_check).digest()
2007 if sum_hash[:RAR5_PW_SUM_SIZE] != hdr_sum:
2008 return
2010 kdf_count = (1 << kdf_count_shift) + 32
2011 pwd = self._get_utf8_password()
2012 pwd_hash = rar5_s2k(pwd, salt, kdf_count)
2014 pwd_check = bytearray(RAR5_PW_CHECK_SIZE)
2015 len_mask = RAR5_PW_CHECK_SIZE - 1
2016 for i, v in enumerate(pwd_hash):
2017 pwd_check[i & len_mask] ^= v
2019 if pwd_check != hdr_check:
2020 raise RarWrongPassword()
2022 def _parse_encryption_block(self, h, hdata, pos):
2023 h.encryption_algo, pos = load_vint(hdata, pos)
2024 h.encryption_flags, pos = load_vint(hdata, pos)
2025 h.encryption_kdf_count, pos = load_byte(hdata, pos)
2026 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
2027 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
2028 h.encryption_check_value, pos = load_bytes(hdata, 12, pos)
2029 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
2030 raise BadRarFile("Unsupported header encryption cipher")
2031 if h.encryption_check_value and self._password:
2032 self._check_password(h.encryption_check_value, h.encryption_kdf_count, h.encryption_salt)
2033 self._hdrenc_main = h
2034 return h
2036 def _process_file_extra(self, h, xdata):
2037 xtype, pos = load_vint(xdata, 0)
2038 if xtype == RAR5_XFILE_TIME:
2039 self._parse_file_xtime(h, xdata, pos)
2040 elif xtype == RAR5_XFILE_ENCRYPTION:
2041 self._parse_file_encryption(h, xdata, pos)
2042 elif xtype == RAR5_XFILE_HASH:
2043 self._parse_file_hash(h, xdata, pos)
2044 elif xtype == RAR5_XFILE_VERSION:
2045 self._parse_file_version(h, xdata, pos)
2046 elif xtype == RAR5_XFILE_REDIR:
2047 self._parse_file_redir(h, xdata, pos)
2048 elif xtype == RAR5_XFILE_OWNER:
2049 self._parse_file_owner(h, xdata, pos)
2050 elif xtype == RAR5_XFILE_SERVICE:
2051 pass
2052 else:
2053 pass
2055 # extra block for file time record
2056 def _parse_file_xtime(self, h, xdata, pos):
2057 tflags, pos = load_vint(xdata, pos)
2059 ldr = load_windowstime
2060 if tflags & RAR5_XTIME_UNIXTIME:
2061 ldr = load_unixtime
2063 if tflags & RAR5_XTIME_HAS_MTIME:
2064 h.mtime, pos = ldr(xdata, pos)
2065 h.date_time = h.mtime.timetuple()[:6]
2066 if tflags & RAR5_XTIME_HAS_CTIME:
2067 h.ctime, pos = ldr(xdata, pos)
2068 if tflags & RAR5_XTIME_HAS_ATIME:
2069 h.atime, pos = ldr(xdata, pos)
2071 if tflags & RAR5_XTIME_UNIXTIME_NS:
2072 if tflags & RAR5_XTIME_HAS_MTIME:
2073 nsec, pos = load_le32(xdata, pos)
2074 h.mtime = to_nsdatetime(h.mtime, nsec)
2075 if tflags & RAR5_XTIME_HAS_CTIME:
2076 nsec, pos = load_le32(xdata, pos)
2077 h.ctime = to_nsdatetime(h.ctime, nsec)
2078 if tflags & RAR5_XTIME_HAS_ATIME:
2079 nsec, pos = load_le32(xdata, pos)
2080 h.atime = to_nsdatetime(h.atime, nsec)
2082 # just remember encryption info
2083 def _parse_file_encryption(self, h, xdata, pos):
2084 algo, pos = load_vint(xdata, pos)
2085 flags, pos = load_vint(xdata, pos)
2086 kdf_count, pos = load_byte(xdata, pos)
2087 salt, pos = load_bytes(xdata, 16, pos)
2088 iv, pos = load_bytes(xdata, 16, pos)
2089 checkval = None
2090 if flags & RAR5_XENC_CHECKVAL:
2091 checkval, pos = load_bytes(xdata, 12, pos)
2092 if flags & RAR5_XENC_TWEAKED:
2093 h._md_expect = None
2094 h._md_class = NoHashContext
2096 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
2097 h.flags |= RAR_FILE_PASSWORD
2099 def _parse_file_hash(self, h, xdata, pos):
2100 hash_type, pos = load_vint(xdata, pos)
2101 if hash_type == RAR5_XHASH_BLAKE2SP:
2102 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
2103 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2104 h._md_class = Blake2SP
2105 h._md_expect = h.blake2sp_hash
2107 def _parse_file_version(self, h, xdata, pos):
2108 flags, pos = load_vint(xdata, pos)
2109 version, pos = load_vint(xdata, pos)
2110 h.file_version = (flags, version)
2112 def _parse_file_redir(self, h, xdata, pos):
2113 redir_type, pos = load_vint(xdata, pos)
2114 redir_flags, pos = load_vint(xdata, pos)
2115 redir_name, pos = load_vstr(xdata, pos)
2116 redir_name = redir_name.decode("utf8", "replace")
2117 h.file_redir = (redir_type, redir_flags, redir_name)
2119 def _parse_file_owner(self, h, xdata, pos):
2120 user_name = group_name = user_id = group_id = None
2122 flags, pos = load_vint(xdata, pos)
2123 if flags & RAR5_XOWNER_UNAME:
2124 user_name, pos = load_vstr(xdata, pos)
2125 if flags & RAR5_XOWNER_GNAME:
2126 group_name, pos = load_vstr(xdata, pos)
2127 if flags & RAR5_XOWNER_UID:
2128 user_id, pos = load_vint(xdata, pos)
2129 if flags & RAR5_XOWNER_GID:
2130 group_id, pos = load_vint(xdata, pos)
2132 h.file_owner = (user_name, group_name, user_id, group_id)
2134 def process_entry(self, fd, item):
2135 if item.block_type == RAR5_BLOCK_FILE:
2136 if item.file_version:
2137 pass # skip old versions
2138 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2139 # use only first part
2140 self._info_map[item.filename.rstrip("/")] = item
2141 self._info_list.append(item)
2142 elif len(self._info_list) > 0:
2143 # final crc is in last block
2144 old = self._info_list[-1]
2145 old.CRC = item.CRC
2146 old._md_expect = item._md_expect
2147 old.blake2sp_hash = item.blake2sp_hash
2148 old.compress_size += item.compress_size
2149 elif item.block_type == RAR5_BLOCK_SERVICE:
2150 if item.filename == "CMT":
2151 self._load_comment(fd, item)
2153 def _load_comment(self, fd, item):
2154 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2155 return None
2156 if item.compress_type != RAR_M0:
2157 return None
2159 if item.flags & RAR_FILE_PASSWORD:
2160 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2161 if algo != RAR5_XENC_CIPHER_AES256:
2162 return None
2163 key = self._gen_key(kdf_count, salt)
2164 f = HeaderDecrypt(fd, key, iv)
2165 cmt = f.read(item.file_size)
2166 else:
2167 # archive comment
2168 with self._open_clear(item) as cmtstream:
2169 cmt = cmtstream.read()
2171 # rar bug? - appends zero to comment
2172 cmt = cmt.split(b"\0", 1)[0]
2173 self.comment = cmt.decode("utf8")
2174 return None
2176 def _open_hack(self, inf, pwd):
2177 # len, type, blk_flags, flags
2178 main_hdr = b"\x03\x01\x00\x00"
2179 endarc_hdr = b"\x03\x05\x00\x00"
2180 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2181 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2182 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2186 ## Utility classes
2189 class UnicodeFilename:
2190 """Handle RAR3 unicode filename decompression.
2192 def __init__(self, name, encdata):
2193 self.std_name = bytearray(name)
2194 self.encdata = bytearray(encdata)
2195 self.pos = self.encpos = 0
2196 self.buf = bytearray()
2197 self.failed = 0
2199 def enc_byte(self):
2200 """Copy encoded byte."""
2201 try:
2202 c = self.encdata[self.encpos]
2203 self.encpos += 1
2204 return c
2205 except IndexError:
2206 self.failed = 1
2207 return 0
2209 def std_byte(self):
2210 """Copy byte from 8-bit representation."""
2211 try:
2212 return self.std_name[self.pos]
2213 except IndexError:
2214 self.failed = 1
2215 return ord("?")
2217 def put(self, lo, hi):
2218 """Copy 16-bit value to result."""
2219 self.buf.append(lo)
2220 self.buf.append(hi)
2221 self.pos += 1
2223 def decode(self):
2224 """Decompress compressed UTF16 value."""
2225 hi = self.enc_byte()
2226 flagbits = 0
2227 while self.encpos < len(self.encdata):
2228 if flagbits == 0:
2229 flags = self.enc_byte()
2230 flagbits = 8
2231 flagbits -= 2
2232 t = (flags >> flagbits) & 3
2233 if t == 0:
2234 self.put(self.enc_byte(), 0)
2235 elif t == 1:
2236 self.put(self.enc_byte(), hi)
2237 elif t == 2:
2238 self.put(self.enc_byte(), self.enc_byte())
2239 else:
2240 n = self.enc_byte()
2241 if n & 0x80:
2242 c = self.enc_byte()
2243 for _ in range((n & 0x7f) + 2):
2244 lo = (self.std_byte() + c) & 0xFF
2245 self.put(lo, hi)
2246 else:
2247 for _ in range(n + 2):
2248 self.put(self.std_byte(), 0)
2249 return self.buf.decode("utf-16le", "replace")
2252 class RarExtFile(io.RawIOBase):
2253 """Base class for file-like object that :meth:`RarFile.open` returns.
2255 Provides public methods and common crc checking.
2257 Behaviour:
2258 - no short reads - .read() and .readinfo() read as much as requested.
2259 - no internal buffer, use io.BufferedReader for that.
2261 name = None #: Filename of the archive entry
2262 mode = "rb"
2263 _parser = None
2264 _inf = None
2265 _fd = None
2266 _remain = 0
2267 _returncode = 0
2268 _md_context = None
2269 _seeking = False
2271 def _open_extfile(self, parser, inf):
2272 self.name = inf.filename
2273 self._parser = parser
2274 self._inf = inf
2276 if self._fd:
2277 self._fd.close()
2278 if self._seeking:
2279 md_class = NoHashContext
2280 else:
2281 md_class = self._inf._md_class or NoHashContext
2282 self._md_context = md_class()
2283 self._fd = None
2284 self._remain = self._inf.file_size
2286 def read(self, n=-1):
2287 """Read all or specified amount of data from archive entry."""
2289 # sanitize count
2290 if n is None or n < 0:
2291 n = self._remain
2292 elif n > self._remain:
2293 n = self._remain
2294 if n == 0:
2295 return b""
2297 buf = []
2298 orig = n
2299 while n > 0:
2300 # actual read
2301 data = self._read(n)
2302 if not data:
2303 break
2304 buf.append(data)
2305 self._md_context.update(data)
2306 self._remain -= len(data)
2307 n -= len(data)
2308 data = b"".join(buf)
2309 if n > 0:
2310 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2312 # done?
2313 if not data or self._remain == 0:
2314 # self.close()
2315 self._check()
2316 return data
2318 def _check(self):
2319 """Check final CRC."""
2320 final = self._md_context.digest()
2321 exp = self._inf._md_expect
2322 if exp is None:
2323 return
2324 if final is None:
2325 return
2326 if self._returncode:
2327 check_returncode(self._returncode, "", tool_setup().get_errmap())
2328 if self._remain != 0:
2329 raise BadRarFile("Failed the read enough data")
2330 if final != exp:
2331 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2332 self._inf.filename, exp, final))
2334 def _read(self, cnt):
2335 """Actual read that gets sanitized cnt."""
2336 raise NotImplementedError("_read")
2338 def close(self):
2339 """Close open resources."""
2341 super().close()
2343 if self._fd:
2344 self._fd.close()
2345 self._fd = None
2347 def __del__(self):
2348 """Hook delete to make sure tempfile is removed."""
2349 self.close()
2351 def readinto(self, buf):
2352 """Zero-copy read directly into buffer.
2354 Returns bytes read.
2356 raise NotImplementedError("readinto")
2358 def tell(self):
2359 """Return current reading position in uncompressed data."""
2360 return self._inf.file_size - self._remain
2362 def seek(self, offset, whence=0):
2363 """Seek in data.
2365 On uncompressed files, the seeking works by actual
2366 seeks so it's fast. On compressed files its slow
2367 - forward seeking happens by reading ahead,
2368 backwards by re-opening and decompressing from the start.
2371 # disable crc check when seeking
2372 if not self._seeking:
2373 self._md_context = NoHashContext()
2374 self._seeking = True
2376 fsize = self._inf.file_size
2377 cur_ofs = self.tell()
2379 if whence == 0: # seek from beginning of file
2380 new_ofs = offset
2381 elif whence == 1: # seek from current position
2382 new_ofs = cur_ofs + offset
2383 elif whence == 2: # seek from end of file
2384 new_ofs = fsize + offset
2385 else:
2386 raise ValueError("Invalid value for whence")
2388 # sanity check
2389 if new_ofs < 0:
2390 new_ofs = 0
2391 elif new_ofs > fsize:
2392 new_ofs = fsize
2394 # do the actual seek
2395 if new_ofs >= cur_ofs:
2396 self._skip(new_ofs - cur_ofs)
2397 else:
2398 # reopen and seek
2399 self._open_extfile(self._parser, self._inf)
2400 self._skip(new_ofs)
2401 return self.tell()
2403 def _skip(self, cnt):
2404 """Read and discard data"""
2405 empty_read(self, cnt, BSIZE)
2407 def readable(self):
2408 """Returns True"""
2409 return True
2411 def writable(self):
2412 """Returns False.
2414 Writing is not supported.
2416 return False
2418 def seekable(self):
2419 """Returns True.
2421 Seeking is supported, although it's slow on compressed files.
2423 return True
2425 def readall(self):
2426 """Read all remaining data"""
2427 # avoid RawIOBase default impl
2428 return self.read()
2431 class PipeReader(RarExtFile):
2432 """Read data from pipe, handle tempfile cleanup."""
2434 def __init__(self, parser, inf, cmd, tempfile=None):
2435 super().__init__()
2436 self._cmd = cmd
2437 self._proc = None
2438 self._tempfile = tempfile
2439 self._open_extfile(parser, inf)
2441 def _close_proc(self):
2442 if not self._proc:
2443 return
2444 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2445 if f:
2446 f.close()
2447 self._proc.wait()
2448 self._returncode = self._proc.returncode
2449 self._proc = None
2451 def _open_extfile(self, parser, inf):
2452 super()._open_extfile(parser, inf)
2454 # stop old process
2455 self._close_proc()
2457 # launch new process
2458 self._returncode = 0
2459 self._proc = custom_popen(self._cmd)
2460 self._fd = self._proc.stdout
2462 def _read(self, cnt):
2463 """Read from pipe."""
2465 # normal read is usually enough
2466 data = self._fd.read(cnt)
2467 if len(data) == cnt or not data:
2468 return data
2470 # short read, try looping
2471 buf = [data]
2472 cnt -= len(data)
2473 while cnt > 0:
2474 data = self._fd.read(cnt)
2475 if not data:
2476 break
2477 cnt -= len(data)
2478 buf.append(data)
2479 return b"".join(buf)
2481 def close(self):
2482 """Close open resources."""
2484 self._close_proc()
2485 super().close()
2487 if self._tempfile:
2488 try:
2489 os.unlink(self._tempfile)
2490 except OSError:
2491 pass
2492 self._tempfile = None
2494 def readinto(self, buf):
2495 """Zero-copy read directly into buffer."""
2496 cnt = len(buf)
2497 if cnt > self._remain:
2498 cnt = self._remain
2499 vbuf = memoryview(buf)
2500 res = got = 0
2501 while got < cnt:
2502 res = self._fd.readinto(vbuf[got: cnt])
2503 if not res:
2504 break
2505 self._md_context.update(vbuf[got: got + res])
2506 self._remain -= res
2507 got += res
2508 return got
2511 class DirectReader(RarExtFile):
2512 """Read uncompressed data directly from archive.
2514 _cur = None
2515 _cur_avail = None
2516 _volfile = None
2518 def __init__(self, parser, inf):
2519 super().__init__()
2520 self._open_extfile(parser, inf)
2522 def _open_extfile(self, parser, inf):
2523 super()._open_extfile(parser, inf)
2525 self._volfile = self._inf.volume_file
2526 self._fd = XFile(self._volfile, 0)
2527 self._fd.seek(self._inf.header_offset, 0)
2528 self._cur = self._parser._parse_header(self._fd)
2529 self._cur_avail = self._cur.add_size
2531 def _skip(self, cnt):
2532 """RAR Seek, skipping through rar files to get to correct position
2535 while cnt > 0:
2536 # next vol needed?
2537 if self._cur_avail == 0:
2538 if not self._open_next():
2539 break
2541 # fd is in read pos, do the read
2542 if cnt > self._cur_avail:
2543 cnt -= self._cur_avail
2544 self._remain -= self._cur_avail
2545 self._cur_avail = 0
2546 else:
2547 self._fd.seek(cnt, 1)
2548 self._cur_avail -= cnt
2549 self._remain -= cnt
2550 cnt = 0
2552 def _read(self, cnt):
2553 """Read from potentially multi-volume archive."""
2555 pos = self._fd.tell()
2556 need = self._cur.data_offset + self._cur.add_size - self._cur_avail
2557 if pos != need:
2558 self._fd.seek(need, 0)
2560 buf = []
2561 while cnt > 0:
2562 # next vol needed?
2563 if self._cur_avail == 0:
2564 if not self._open_next():
2565 break
2567 # fd is in read pos, do the read
2568 if cnt > self._cur_avail:
2569 data = self._fd.read(self._cur_avail)
2570 else:
2571 data = self._fd.read(cnt)
2572 if not data:
2573 break
2575 # got some data
2576 cnt -= len(data)
2577 self._cur_avail -= len(data)
2578 buf.append(data)
2580 if len(buf) == 1:
2581 return buf[0]
2582 return b"".join(buf)
2584 def _open_next(self):
2585 """Proceed to next volume."""
2587 # is the file split over archives?
2588 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2589 return False
2591 if self._fd:
2592 self._fd.close()
2593 self._fd = None
2595 # open next part
2596 self._volfile = self._parser._next_volname(self._volfile)
2597 fd = open(self._volfile, "rb", 0)
2598 self._fd = fd
2599 sig = fd.read(len(self._parser._expect_sig))
2600 if sig != self._parser._expect_sig:
2601 raise BadRarFile("Invalid signature")
2603 # loop until first file header
2604 while True:
2605 cur = self._parser._parse_header(fd)
2606 if not cur:
2607 raise BadRarFile("Unexpected EOF")
2608 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2609 if cur.add_size:
2610 fd.seek(cur.add_size, 1)
2611 continue
2612 if cur.orig_filename != self._inf.orig_filename:
2613 raise BadRarFile("Did not found file entry")
2614 self._cur = cur
2615 self._cur_avail = cur.add_size
2616 return True
2618 def readinto(self, buf):
2619 """Zero-copy read directly into buffer."""
2620 got = 0
2621 vbuf = memoryview(buf)
2622 while got < len(buf):
2623 # next vol needed?
2624 if self._cur_avail == 0:
2625 if not self._open_next():
2626 break
2628 # length for next read
2629 cnt = len(buf) - got
2630 if cnt > self._cur_avail:
2631 cnt = self._cur_avail
2633 # read into temp view
2634 res = self._fd.readinto(vbuf[got: got + cnt])
2635 if not res:
2636 break
2637 self._md_context.update(vbuf[got: got + res])
2638 self._cur_avail -= res
2639 self._remain -= res
2640 got += res
2641 return got
2644 class HeaderDecrypt:
2645 """File-like object that decrypts from another file"""
2646 def __init__(self, f, key, iv):
2647 self.f = f
2648 self.ciph = AES_CBC_Decrypt(key, iv)
2649 self.buf = b""
2651 def tell(self):
2652 """Current file pos - works only on block boundaries."""
2653 return self.f.tell()
2655 def read(self, cnt=None):
2656 """Read and decrypt."""
2657 if cnt > 8 * 1024:
2658 raise BadRarFile("Bad count to header decrypt - wrong password?")
2660 # consume old data
2661 if cnt <= len(self.buf):
2662 res = self.buf[:cnt]
2663 self.buf = self.buf[cnt:]
2664 return res
2665 res = self.buf
2666 self.buf = b""
2667 cnt -= len(res)
2669 # decrypt new data
2670 blklen = 16
2671 while cnt > 0:
2672 enc = self.f.read(blklen)
2673 if len(enc) < blklen:
2674 break
2675 dec = self.ciph.decrypt(enc)
2676 if cnt >= len(dec):
2677 res += dec
2678 cnt -= len(dec)
2679 else:
2680 res += dec[:cnt]
2681 self.buf = dec[cnt:]
2682 cnt = 0
2684 return res
2687 class XFile:
2688 """Input may be filename or file object.
2690 __slots__ = ("_fd", "_need_close")
2692 def __init__(self, xfile, bufsize=1024):
2693 if is_filelike(xfile):
2694 self._need_close = False
2695 self._fd = xfile
2696 self._fd.seek(0)
2697 else:
2698 self._need_close = True
2699 self._fd = open(xfile, "rb", bufsize)
2701 def read(self, n=None):
2702 """Read from file."""
2703 return self._fd.read(n)
2705 def tell(self):
2706 """Return file pos."""
2707 return self._fd.tell()
2709 def seek(self, ofs, whence=0):
2710 """Move file pos."""
2711 return self._fd.seek(ofs, whence)
2713 def readinto(self, buf):
2714 """Read into buffer."""
2715 return self._fd.readinto(buf)
2717 def close(self):
2718 """Close file object."""
2719 if self._need_close:
2720 self._fd.close()
2722 def __enter__(self):
2723 return self
2725 def __exit__(self, typ, val, tb):
2726 self.close()
2729 class NoHashContext:
2730 """No-op hash function."""
2731 def __init__(self, data=None):
2732 """Initialize"""
2733 def update(self, data):
2734 """Update data"""
2735 def digest(self):
2736 """Final hash"""
2737 def hexdigest(self):
2738 """Hexadecimal digest."""
2741 class CRC32Context:
2742 """Hash context that uses CRC32."""
2743 __slots__ = ["_crc"]
2745 def __init__(self, data=None):
2746 self._crc = 0
2747 if data:
2748 self.update(data)
2750 def update(self, data):
2751 """Process data."""
2752 self._crc = crc32(data, self._crc)
2754 def digest(self):
2755 """Final hash."""
2756 return self._crc
2758 def hexdigest(self):
2759 """Hexadecimal digest."""
2760 return "%08x" % self.digest()
2763 class Blake2SP:
2764 """Blake2sp hash context.
2766 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2767 digest_size = 32
2768 block_size = 64
2769 parallelism = 8
2771 def __init__(self, data=None):
2772 self._buf = b""
2773 self._cur = 0
2774 self._digest = None
2775 self._thread = []
2777 for i in range(self.parallelism):
2778 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2779 self._thread.append(ctx)
2781 if data:
2782 self.update(data)
2784 def _blake2s(self, ofs, depth, is_last):
2785 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2786 depth=2, inner_size=32, fanout=self.parallelism)
2788 def _add_block(self, blk):
2789 self._thread[self._cur].update(blk)
2790 self._cur = (self._cur + 1) % self.parallelism
2792 def update(self, data):
2793 """Hash data.
2795 view = memoryview(data)
2796 bs = self.block_size
2797 if self._buf:
2798 need = bs - len(self._buf)
2799 if len(view) < need:
2800 self._buf += view.tobytes()
2801 return
2802 self._add_block(self._buf + view[:need].tobytes())
2803 view = view[need:]
2804 while len(view) >= bs:
2805 self._add_block(view[:bs])
2806 view = view[bs:]
2807 self._buf = view.tobytes()
2809 def digest(self):
2810 """Return final digest value.
2812 if self._digest is None:
2813 if self._buf:
2814 self._add_block(self._buf)
2815 self._buf = b""
2816 ctx = self._blake2s(0, 1, True)
2817 for t in self._thread:
2818 ctx.update(t.digest())
2819 self._digest = ctx.digest()
2820 return self._digest
2822 def hexdigest(self):
2823 """Hexadecimal digest."""
2824 return hexlify(self.digest()).decode("ascii")
2827 class Rar3Sha1:
2828 """Emulate buggy SHA1 from RAR3.
2830 digest_size = 20
2831 block_size = 64
2833 _BLK_BE = struct.Struct(b">16L")
2834 _BLK_LE = struct.Struct(b"<16L")
2836 __slots__ = ("_nbytes", "_md", "_rarbug")
2838 def __init__(self, data=b"", rarbug=False):
2839 self._md = sha1()
2840 self._nbytes = 0
2841 self._rarbug = rarbug
2842 self.update(data)
2844 def update(self, data):
2845 """Process more data."""
2846 self._md.update(data)
2847 bufpos = self._nbytes & 63
2848 self._nbytes += len(data)
2850 if self._rarbug and len(data) > 64:
2851 dpos = self.block_size - bufpos
2852 while dpos + self.block_size <= len(data):
2853 self._corrupt(data, dpos)
2854 dpos += self.block_size
2856 def digest(self):
2857 """Return final state."""
2858 return self._md.digest()
2860 def hexdigest(self):
2861 """Return final state as hex string."""
2862 return self._md.hexdigest()
2864 def _corrupt(self, data, dpos):
2865 """Corruption from SHA1 core."""
2866 ws = list(self._BLK_BE.unpack_from(data, dpos))
2867 for t in range(16, 80):
2868 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2869 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2870 self._BLK_LE.pack_into(data, dpos, *ws)
2874 ## Utility functions
2877 S_LONG = Struct("<L")
2878 S_SHORT = Struct("<H")
2879 S_BYTE = Struct("<B")
2881 S_BLK_HDR = Struct("<HBHH")
2882 S_FILE_HDR = Struct("<LLBLLBBHL")
2883 S_COMMENT_HDR = Struct("<HBBH")
2886 def load_vint(buf, pos):
2887 """Load RAR5 variable-size int."""
2888 limit = min(pos + 11, len(buf))
2889 res = ofs = 0
2890 while pos < limit:
2891 b = buf[pos]
2892 res += ((b & 0x7F) << ofs)
2893 pos += 1
2894 ofs += 7
2895 if b < 0x80:
2896 return res, pos
2897 raise BadRarFile("cannot load vint")
2900 def load_byte(buf, pos):
2901 """Load single byte"""
2902 end = pos + 1
2903 if end > len(buf):
2904 raise BadRarFile("cannot load byte")
2905 return S_BYTE.unpack_from(buf, pos)[0], end
2908 def load_le32(buf, pos):
2909 """Load little-endian 32-bit integer"""
2910 end = pos + 4
2911 if end > len(buf):
2912 raise BadRarFile("cannot load le32")
2913 return S_LONG.unpack_from(buf, pos)[0], end
2916 def load_bytes(buf, num, pos):
2917 """Load sequence of bytes"""
2918 end = pos + num
2919 if end > len(buf):
2920 raise BadRarFile("cannot load bytes")
2921 return buf[pos: end], end
2924 def load_vstr(buf, pos):
2925 """Load bytes prefixed by vint length"""
2926 slen, pos = load_vint(buf, pos)
2927 return load_bytes(buf, slen, pos)
2930 def load_dostime(buf, pos):
2931 """Load LE32 dos timestamp"""
2932 stamp, pos = load_le32(buf, pos)
2933 tup = parse_dos_time(stamp)
2934 return to_datetime(tup), pos
2937 def load_unixtime(buf, pos):
2938 """Load LE32 unix timestamp"""
2939 secs, pos = load_le32(buf, pos)
2940 dt = datetime.fromtimestamp(secs, timezone.utc)
2941 return dt, pos
2944 def load_windowstime(buf, pos):
2945 """Load LE64 windows timestamp"""
2946 # unix epoch (1970) in seconds from windows epoch (1601)
2947 unix_epoch = 11644473600
2948 val1, pos = load_le32(buf, pos)
2949 val2, pos = load_le32(buf, pos)
2950 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2951 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2952 dt = to_nsdatetime(dt, n1secs * 100)
2953 return dt, pos
2957 # volume numbering
2960 _rc_num = re.compile('^[0-9]+$')
2963 def _next_newvol(volfile):
2964 """New-style next volume
2966 name, ext = os.path.splitext(volfile)
2967 if ext.lower() in ("", ".exe", ".sfx"):
2968 volfile = name + ".rar"
2969 i = len(volfile) - 1
2970 while i >= 0:
2971 if "0" <= volfile[i] <= "9":
2972 return _inc_volname(volfile, i, False)
2973 if volfile[i] in ("/", os.sep):
2974 break
2975 i -= 1
2976 raise BadRarName("Cannot construct volume name: " + volfile)
2980 def _next_oldvol(volfile):
2981 """Old-style next volume
2983 name, ext = os.path.splitext(volfile)
2984 if ext.lower() in ("", ".exe", ".sfx"):
2985 ext = ".rar"
2986 sfx = ext[2:]
2987 if _rc_num.match(sfx):
2988 ext = _inc_volname(ext, len(ext) - 1, True)
2989 else:
2990 # .rar -> .r00
2991 ext = ext[:2] + "00"
2992 return name + ext
2995 def _inc_volname(volfile, i, inc_chars):
2996 """increase digits with carry, otherwise just increment char
2998 fn = list(volfile)
2999 while i >= 0:
3000 if fn[i] == "9":
3001 fn[i] = "0"
3002 i -= 1
3003 if i < 0:
3004 fn.insert(0, "1")
3005 elif "0" <= fn[i] < "9" or inc_chars:
3006 fn[i] = chr(ord(fn[i]) + 1)
3007 break
3008 else:
3009 fn.insert(i + 1, "1")
3010 break
3011 return "".join(fn)
3014 def _parse_ext_time(h, data, pos):
3015 """Parse all RAR3 extended time fields
3017 # flags and rest of data can be missing
3018 flags = 0
3019 if pos + 2 <= len(data):
3020 flags = S_SHORT.unpack_from(data, pos)[0]
3021 pos += 2
3023 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
3024 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
3025 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
3026 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
3027 if mtime:
3028 h.mtime = mtime
3029 h.date_time = mtime.timetuple()[:6]
3030 return pos
3033 def _parse_xtime(flag, data, pos, basetime=None):
3034 """Parse one RAR3 extended time field
3036 res = None
3037 if flag & 8:
3038 if not basetime:
3039 basetime, pos = load_dostime(data, pos)
3041 # load second fractions of 100ns units
3042 rem = 0
3043 cnt = flag & 3
3044 for _ in range(cnt):
3045 b, pos = load_byte(data, pos)
3046 rem = (b << 16) | (rem >> 8)
3048 # dostime has room for 30 seconds only, correct if needed
3049 if flag & 4 and basetime.second < 59:
3050 basetime = basetime.replace(second=basetime.second + 1)
3052 res = to_nsdatetime(basetime, rem * 100)
3053 return res, pos
3056 def is_filelike(obj):
3057 """Filename or file object?
3059 if isinstance(obj, (bytes, str, Path)):
3060 return False
3061 res = True
3062 for a in ("read", "tell", "seek"):
3063 res = res and hasattr(obj, a)
3064 if not res:
3065 raise ValueError("Invalid object passed as file")
3066 return True
3069 def rar5_s2k(pwd, salt, kdf_count):
3070 """String-to-key hash for RAR5.
3072 if not isinstance(pwd, str):
3073 pwd = pwd.decode("utf8")
3074 wstr = pwd.encode("utf-16le")[:RAR_MAX_PASSWORD*2]
3075 ustr = wstr.decode("utf-16le").encode("utf8")
3076 return pbkdf2_hmac("sha256", ustr, salt, kdf_count)
3079 def rar3_s2k(pwd, salt):
3080 """String-to-key hash for RAR3.
3082 if not isinstance(pwd, str):
3083 pwd = pwd.decode("utf8")
3084 wstr = pwd.encode("utf-16le")[:RAR_MAX_PASSWORD*2]
3085 seed = bytearray(wstr + salt)
3086 h = Rar3Sha1(rarbug=True)
3087 iv = b""
3088 for i in range(16):
3089 for j in range(0x4000):
3090 cnt = S_LONG.pack(i * 0x4000 + j)
3091 h.update(seed)
3092 h.update(cnt[:3])
3093 if j == 0:
3094 iv += h.digest()[19:20]
3095 key_be = h.digest()[:16]
3096 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
3097 return key_le, iv
3100 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
3101 """Decompress blob of compressed data.
3103 Used for data with non-standard header - eg. comments.
3105 # already uncompressed?
3106 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
3107 return data
3109 # take only necessary flags
3110 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
3111 flags |= RAR_LONG_BLOCK
3113 # file header
3114 fname = b"data"
3115 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
3116 mode = DOS_MODE_ARCHIVE
3117 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
3118 date, vers, meth, len(fname), mode)
3119 fhdr += fname
3120 if salt:
3121 fhdr += salt
3123 # full header
3124 hlen = S_BLK_HDR.size + len(fhdr)
3125 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3126 hcrc = crc32(hdr[2:]) & 0xFFFF
3127 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3129 # archive main header
3130 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3132 # decompress via temp rar
3133 setup = tool_setup()
3134 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3135 tmpf = os.fdopen(tmpfd, "wb")
3136 try:
3137 tmpf.write(RAR_ID + mh + hdr + data)
3138 tmpf.close()
3140 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3141 cmd = setup.open_cmdline(curpwd, tmpname)
3142 p = custom_popen(cmd)
3143 return p.communicate()[0]
3144 finally:
3145 tmpf.close()
3146 os.unlink(tmpname)
3149 def sanitize_filename(fname, pathsep, is_win32):
3150 """Make filename safe for write access.
3152 if is_win32:
3153 if len(fname) > 1 and fname[1] == ":":
3154 fname = fname[2:]
3155 rc = RC_BAD_CHARS_WIN32
3156 else:
3157 rc = RC_BAD_CHARS_UNIX
3158 if rc.search(fname):
3159 fname = rc.sub("_", fname)
3161 parts = []
3162 for seg in fname.split("/"):
3163 if seg in ("", ".", ".."):
3164 continue
3165 if is_win32 and seg[-1] in (" ", "."):
3166 seg = seg[:-1] + "_"
3167 parts.append(seg)
3168 return pathsep.join(parts)
3171 def empty_read(src, size, blklen):
3172 """Read and drop fixed amount of data.
3174 while size > 0:
3175 if size > blklen:
3176 res = src.read(blklen)
3177 else:
3178 res = src.read(size)
3179 if not res:
3180 raise BadRarFile("cannot load data")
3181 size -= len(res)
3184 def to_datetime(t):
3185 """Convert 6-part time tuple into datetime object.
3187 # extract values
3188 year, mon, day, h, m, s = t
3190 # assume the values are valid
3191 try:
3192 return datetime(year, mon, day, h, m, s)
3193 except ValueError:
3194 pass
3196 # sanitize invalid values
3197 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3198 mon = max(1, min(mon, 12))
3199 day = max(1, min(day, mday[mon]))
3200 h = min(h, 23)
3201 m = min(m, 59)
3202 s = min(s, 59)
3203 return datetime(year, mon, day, h, m, s)
3206 def parse_dos_time(stamp):
3207 """Parse standard 32-bit DOS timestamp.
3209 sec, stamp = stamp & 0x1F, stamp >> 5
3210 mn, stamp = stamp & 0x3F, stamp >> 6
3211 hr, stamp = stamp & 0x1F, stamp >> 5
3212 day, stamp = stamp & 0x1F, stamp >> 5
3213 mon, stamp = stamp & 0x0F, stamp >> 4
3214 yr = (stamp & 0x7F) + 1980
3215 return (yr, mon, day, hr, mn, sec * 2)
3218 # pylint: disable=arguments-differ,signature-differs
3219 class nsdatetime(datetime):
3220 """Datetime that carries nanoseconds.
3222 Arithmetic operations will lose nanoseconds.
3224 .. versionadded:: 4.0
3226 __slots__ = ("nanosecond",)
3227 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond <= 999999999
3229 def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
3230 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3231 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3232 if mod == 0:
3233 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3234 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3235 self.nanosecond = nanosecond
3236 return self
3238 def isoformat(self, sep="T", timespec="auto"):
3239 """Formats with nanosecond precision by default.
3241 if timespec == "auto":
3242 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3243 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3244 return super().isoformat(sep, timespec)
3246 def astimezone(self, tz=None):
3247 """Convert to new timezone.
3249 tmp = super().astimezone(tz)
3250 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3251 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3253 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3254 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3255 """Return new timestamp with specified fields replaced.
3257 return self.__class__(
3258 self.year if year is None else year,
3259 self.month if month is None else month,
3260 self.day if day is None else day,
3261 self.hour if hour is None else hour,
3262 self.minute if minute is None else minute,
3263 self.second if second is None else second,
3264 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3265 if nanosecond is None else nanosecond),
3266 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3267 fold=self.fold if fold is None else fold)
3269 def __hash__(self):
3270 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3272 def __eq__(self, other):
3273 return super().__eq__(other) and self.nanosecond == (
3274 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3276 def __gt__(self, other):
3277 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3278 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3280 def __lt__(self, other):
3281 return not (self > other or self == other)
3283 def __ge__(self, other):
3284 return not self < other
3286 def __le__(self, other):
3287 return not self > other
3289 def __ne__(self, other):
3290 return not self == other
3293 def to_nsdatetime(dt, nsec):
3294 """Apply nanoseconds to datetime.
3296 if not nsec:
3297 return dt
3298 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3299 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3302 def to_nsecs(dt):
3303 """Convert datatime instance to nanoseconds.
3305 secs = int(dt.timestamp())
3306 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3307 return secs * 1000000000 + nsecs
3310 def custom_popen(cmd):
3311 """Disconnect cmd from parent fds, read only from stdout.
3313 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3314 try:
3315 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3316 creationflags=creationflags)
3317 except OSError as ex:
3318 if ex.errno == errno.ENOENT:
3319 raise RarCannotExec("Unrar not installed?") from None
3320 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3321 raise RarCannotExec("Cannot execute unrar") from None
3322 raise
3323 return p
3326 def check_returncode(code, out, errmap):
3327 """Raise exception according to unrar exit code.
3329 if code == 0:
3330 return
3332 if code > 0 and code < len(errmap):
3333 exc = errmap[code]
3334 elif code == 255:
3335 exc = RarUserBreak
3336 elif code < 0:
3337 exc = RarSignalExit
3338 else:
3339 exc = RarUnknownError
3341 # format message
3342 if out:
3343 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3344 else:
3345 msg = "%s [%d]" % (exc.__doc__, code)
3347 raise exc(msg)
3350 def membuf_tempfile(memfile):
3351 """Write in-memory file object to real file.
3353 memfile.seek(0, 0)
3355 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3356 tmpf = os.fdopen(tmpfd, "wb")
3358 try:
3359 shutil.copyfileobj(memfile, tmpf, BSIZE)
3360 tmpf.close()
3361 except BaseException:
3362 tmpf.close()
3363 os.unlink(tmpname)
3364 raise
3365 return tmpname
3369 # Find working command-line tool
3372 class ToolSetup:
3373 def __init__(self, setup):
3374 self.setup = setup
3376 def check(self):
3377 cmdline = self.get_cmdline("check_cmd", None)
3378 try:
3379 p = custom_popen(cmdline)
3380 out, _ = p.communicate()
3381 return p.returncode == 0
3382 except RarCannotExec:
3383 return False
3385 def open_cmdline(self, pwd, rarfn, filefn=None):
3386 cmdline = self.get_cmdline("open_cmd", pwd)
3387 cmdline.append(rarfn)
3388 if filefn:
3389 self.add_file_arg(cmdline, filefn)
3390 return cmdline
3392 def get_errmap(self):
3393 return self.setup["errmap"]
3395 def get_cmdline(self, key, pwd, nodash=False):
3396 cmdline = list(self.setup[key])
3397 cmdline[0] = globals()[cmdline[0]]
3398 if key == "check_cmd":
3399 return cmdline
3400 self.add_password_arg(cmdline, pwd)
3401 if not nodash:
3402 cmdline.append("--")
3403 return cmdline
3405 def add_file_arg(self, cmdline, filename):
3406 cmdline.append(filename)
3408 def add_password_arg(self, cmdline, pwd):
3409 """Append password switch to commandline.
3411 if pwd is not None:
3412 if not isinstance(pwd, str):
3413 pwd = pwd.decode("utf8")
3414 args = self.setup["password"]
3415 if args is None:
3416 tool = self.setup["open_cmd"][0]
3417 raise RarCannotExec(f"{tool} does not support passwords")
3418 elif isinstance(args, str):
3419 cmdline.append(args + pwd)
3420 else:
3421 cmdline.extend(args)
3422 cmdline.append(pwd)
3423 else:
3424 cmdline.extend(self.setup["no_password"])
3427 UNRAR_CONFIG = {
3428 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3429 "check_cmd": ("UNRAR_TOOL", "-inul", "-?"),
3430 "password": "-p",
3431 "no_password": ("-p-",),
3432 # map return code to exception class, codes from rar.txt
3433 "errmap": [None,
3434 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3435 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3436 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3439 # Problems with unar RAR backend:
3440 # - Does not support RAR2 locked files [fails to read]
3441 # - Does not support RAR5 Blake2sp hash [reading works]
3442 UNAR_CONFIG = {
3443 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3444 "check_cmd": ("UNAR_TOOL", "-version"),
3445 "password": ("-p",),
3446 "no_password": ("-p", ""),
3447 "errmap": [None],
3450 # Problems with libarchive RAR backend:
3451 # - Does not support solid archives.
3452 # - Does not support password-protected archives.
3453 # - Does not support RARVM-based compression filters.
3454 BSDTAR_CONFIG = {
3455 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3456 "check_cmd": ("BSDTAR_TOOL", "--version"),
3457 "password": None,
3458 "no_password": (),
3459 "errmap": [None],
3462 SEVENZIP_CONFIG = {
3463 "open_cmd": ("SEVENZIP_TOOL", "e", "-so", "-bb0"),
3464 "check_cmd": ("SEVENZIP_TOOL", "i"),
3465 "password": "-p",
3466 "no_password": ("-p",),
3467 "errmap": [None,
3468 RarWarning, RarFatalError, None, None, # 1..4
3469 None, None, RarUserError, RarMemoryError] # 5..8
3472 SEVENZIP2_CONFIG = {
3473 "open_cmd": ("SEVENZIP2_TOOL", "e", "-so", "-bb0"),
3474 "check_cmd": ("SEVENZIP2_TOOL", "i"),
3475 "password": "-p",
3476 "no_password": ("-p",),
3477 "errmap": [None,
3478 RarWarning, RarFatalError, None, None, # 1..4
3479 None, None, RarUserError, RarMemoryError] # 5..8
3482 CURRENT_SETUP = None
3485 def tool_setup(unrar=True, unar=True, bsdtar=True, sevenzip=True, sevenzip2=True, force=False):
3486 """Pick a tool, return cached ToolSetup.
3488 global CURRENT_SETUP
3489 if force:
3490 CURRENT_SETUP = None
3491 if CURRENT_SETUP is not None:
3492 return CURRENT_SETUP
3493 lst = []
3494 if unrar:
3495 lst.append(UNRAR_CONFIG)
3496 if unar:
3497 lst.append(UNAR_CONFIG)
3498 if sevenzip:
3499 lst.append(SEVENZIP_CONFIG)
3500 if sevenzip2:
3501 lst.append(SEVENZIP2_CONFIG)
3502 if bsdtar:
3503 lst.append(BSDTAR_CONFIG)
3505 for conf in lst:
3506 setup = ToolSetup(conf)
3507 if setup.check():
3508 CURRENT_SETUP = setup
3509 break
3510 if CURRENT_SETUP is None:
3511 raise RarCannotExec("Cannot find working tool")
3512 return CURRENT_SETUP
3515 def main(args):
3516 """Minimal command-line interface for rarfile module.
3518 import argparse
3519 p = argparse.ArgumentParser(description=main.__doc__)
3520 g = p.add_mutually_exclusive_group(required=True)
3521 g.add_argument("-l", "--list", metavar="<rarfile>",
3522 help="Show archive listing")
3523 g.add_argument("-e", "--extract", nargs=2,
3524 metavar=("<rarfile>", "<output_dir>"),
3525 help="Extract archive into target dir")
3526 g.add_argument("-t", "--test", metavar="<rarfile>",
3527 help="Test if a archive is valid")
3528 cmd = p.parse_args(args)
3530 if cmd.list:
3531 with RarFile(cmd.list) as rf:
3532 rf.printdir()
3533 elif cmd.test:
3534 with RarFile(cmd.test) as rf:
3535 rf.testrar()
3536 elif cmd.extract:
3537 with RarFile(cmd.extract[0]) as rf:
3538 rf.extractall(cmd.extract[1])
3541 if __name__ == "__main__":
3542 main(sys.argv[1:])