run_dump_all: update list of pythons
[rarfile.git] / rarfile.py
blobba6611cb3c5db4c811aa6fd4949910e94830b376
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 AES = None
70 # only needed for encrypted headers
71 try:
72 try:
73 from cryptography.hazmat.backends import default_backend
74 from cryptography.hazmat.primitives.ciphers import (
75 Cipher, algorithms, modes,
77 _have_crypto = 1
78 except ImportError:
79 from Crypto.Cipher import AES
80 _have_crypto = 2
81 except ImportError:
82 _have_crypto = 0
85 class AES_CBC_Decrypt:
86 """Decrypt API"""
87 def __init__(self, key, iv):
88 if _have_crypto == 2:
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
90 else:
91 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
92 self.decrypt = ciph.decryptor().update
95 __version__ = "4.1a1"
97 # export only interesting items
98 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
101 ## Module configuration. Can be tuned after importing.
104 #: executable for unrar tool
105 UNRAR_TOOL = "unrar"
107 #: executable for unar tool
108 UNAR_TOOL = "unar"
110 #: executable for bsdtar tool
111 BSDTAR_TOOL = "bsdtar"
113 #: default fallback charset
114 DEFAULT_CHARSET = "windows-1252"
116 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
117 TRY_ENCODINGS = ("utf8", "utf-16le")
119 #: whether to speed up decompression by using tmp archive
120 USE_EXTRACT_HACK = 1
122 #: limit the filesize for tmp archive usage
123 HACK_SIZE_LIMIT = 20 * 1024 * 1024
125 #: set specific directory for mkstemp() used by hack dir usage
126 HACK_TMP_DIR = None
128 #: Separator for path name components. Always "/".
129 PATH_SEP = "/"
132 ## rar constants
135 # block types
136 RAR_BLOCK_MARK = 0x72 # r
137 RAR_BLOCK_MAIN = 0x73 # s
138 RAR_BLOCK_FILE = 0x74 # t
139 RAR_BLOCK_OLD_COMMENT = 0x75 # u
140 RAR_BLOCK_OLD_EXTRA = 0x76 # v
141 RAR_BLOCK_OLD_SUB = 0x77 # w
142 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
143 RAR_BLOCK_OLD_AUTH = 0x79 # y
144 RAR_BLOCK_SUB = 0x7a # z
145 RAR_BLOCK_ENDARC = 0x7b # {
147 # flags for RAR_BLOCK_MAIN
148 RAR_MAIN_VOLUME = 0x0001
149 RAR_MAIN_COMMENT = 0x0002
150 RAR_MAIN_LOCK = 0x0004
151 RAR_MAIN_SOLID = 0x0008
152 RAR_MAIN_NEWNUMBERING = 0x0010
153 RAR_MAIN_AUTH = 0x0020
154 RAR_MAIN_RECOVERY = 0x0040
155 RAR_MAIN_PASSWORD = 0x0080
156 RAR_MAIN_FIRSTVOLUME = 0x0100
157 RAR_MAIN_ENCRYPTVER = 0x0200
159 # flags for RAR_BLOCK_FILE
160 RAR_FILE_SPLIT_BEFORE = 0x0001
161 RAR_FILE_SPLIT_AFTER = 0x0002
162 RAR_FILE_PASSWORD = 0x0004
163 RAR_FILE_COMMENT = 0x0008
164 RAR_FILE_SOLID = 0x0010
165 RAR_FILE_DICTMASK = 0x00e0
166 RAR_FILE_DICT64 = 0x0000
167 RAR_FILE_DICT128 = 0x0020
168 RAR_FILE_DICT256 = 0x0040
169 RAR_FILE_DICT512 = 0x0060
170 RAR_FILE_DICT1024 = 0x0080
171 RAR_FILE_DICT2048 = 0x00a0
172 RAR_FILE_DICT4096 = 0x00c0
173 RAR_FILE_DIRECTORY = 0x00e0
174 RAR_FILE_LARGE = 0x0100
175 RAR_FILE_UNICODE = 0x0200
176 RAR_FILE_SALT = 0x0400
177 RAR_FILE_VERSION = 0x0800
178 RAR_FILE_EXTTIME = 0x1000
179 RAR_FILE_EXTFLAGS = 0x2000
181 # flags for RAR_BLOCK_ENDARC
182 RAR_ENDARC_NEXT_VOLUME = 0x0001
183 RAR_ENDARC_DATACRC = 0x0002
184 RAR_ENDARC_REVSPACE = 0x0004
185 RAR_ENDARC_VOLNR = 0x0008
187 # flags common to all blocks
188 RAR_SKIP_IF_UNKNOWN = 0x4000
189 RAR_LONG_BLOCK = 0x8000
191 # Host OS types
192 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
193 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
194 RAR_OS_WIN32 = 2 #: Windows
195 RAR_OS_UNIX = 3 #: UNIX
196 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
197 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
199 # Compression methods - "0".."5"
200 RAR_M0 = 0x30 #: No compression.
201 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
202 RAR_M2 = 0x32 #: Compression level `-m2`.
203 RAR_M3 = 0x33 #: Compression level `-m3`.
204 RAR_M4 = 0x34 #: Compression level `-m4`.
205 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
208 # RAR5 constants
211 RAR5_BLOCK_MAIN = 1
212 RAR5_BLOCK_FILE = 2
213 RAR5_BLOCK_SERVICE = 3
214 RAR5_BLOCK_ENCRYPTION = 4
215 RAR5_BLOCK_ENDARC = 5
217 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
218 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
219 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
220 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
221 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
222 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
223 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
225 RAR5_MAIN_FLAG_ISVOL = 0x01
226 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
227 RAR5_MAIN_FLAG_SOLID = 0x04
228 RAR5_MAIN_FLAG_RECOVERY = 0x08
229 RAR5_MAIN_FLAG_LOCKED = 0x10
231 RAR5_FILE_FLAG_ISDIR = 0x01
232 RAR5_FILE_FLAG_HAS_MTIME = 0x02
233 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
234 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
236 RAR5_COMPR_SOLID = 0x40
238 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
240 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
242 RAR5_XFILE_ENCRYPTION = 1
243 RAR5_XFILE_HASH = 2
244 RAR5_XFILE_TIME = 3
245 RAR5_XFILE_VERSION = 4
246 RAR5_XFILE_REDIR = 5
247 RAR5_XFILE_OWNER = 6
248 RAR5_XFILE_SERVICE = 7
250 RAR5_XTIME_UNIXTIME = 0x01
251 RAR5_XTIME_HAS_MTIME = 0x02
252 RAR5_XTIME_HAS_CTIME = 0x04
253 RAR5_XTIME_HAS_ATIME = 0x08
254 RAR5_XTIME_UNIXTIME_NS = 0x10
256 RAR5_XENC_CIPHER_AES256 = 0
258 RAR5_XENC_CHECKVAL = 0x01
259 RAR5_XENC_TWEAKED = 0x02
261 RAR5_XHASH_BLAKE2SP = 0
263 RAR5_XREDIR_UNIX_SYMLINK = 1
264 RAR5_XREDIR_WINDOWS_SYMLINK = 2
265 RAR5_XREDIR_WINDOWS_JUNCTION = 3
266 RAR5_XREDIR_HARD_LINK = 4
267 RAR5_XREDIR_FILE_COPY = 5
269 RAR5_XREDIR_ISDIR = 0x01
271 RAR5_XOWNER_UNAME = 0x01
272 RAR5_XOWNER_GNAME = 0x02
273 RAR5_XOWNER_UID = 0x04
274 RAR5_XOWNER_GID = 0x08
276 RAR5_OS_WINDOWS = 0
277 RAR5_OS_UNIX = 1
279 DOS_MODE_ARCHIVE = 0x20
280 DOS_MODE_DIR = 0x10
281 DOS_MODE_SYSTEM = 0x04
282 DOS_MODE_HIDDEN = 0x02
283 DOS_MODE_READONLY = 0x01
286 ## internal constants
289 RAR_ID = b"Rar!\x1a\x07\x00"
290 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
292 WIN32 = sys.platform == "win32"
293 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
295 SFX_MAX_SIZE = 2 * 1024 * 1024
296 RAR_V3 = 3
297 RAR_V5 = 5
299 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
300 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
301 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
304 def _find_sfx_header(xfile):
305 sig = RAR_ID[:-1]
306 buf = io.BytesIO()
307 steps = (64, SFX_MAX_SIZE)
309 with XFile(xfile) as fd:
310 for step in steps:
311 data = fd.read(step)
312 if not data:
313 break
314 buf.write(data)
315 curdata = buf.getvalue()
316 findpos = 0
317 while True:
318 pos = curdata.find(sig, findpos)
319 if pos < 0:
320 break
321 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
322 return RAR_V3, pos
323 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
324 return RAR_V5, pos
325 findpos = pos + len(sig)
326 return 0, 0
330 ## Public interface
334 def get_rar_version(xfile):
335 """Check quickly whether file is rar archive.
337 with XFile(xfile) as fd:
338 buf = fd.read(len(RAR5_ID))
339 if buf.startswith(RAR_ID):
340 return RAR_V3
341 elif buf.startswith(RAR5_ID):
342 return RAR_V5
343 return 0
346 def is_rarfile(xfile):
347 """Check quickly whether file is rar archive.
349 try:
350 return get_rar_version(xfile) > 0
351 except OSError:
352 # File not found or not accessible, ignore
353 return False
356 def is_rarfile_sfx(xfile):
357 """Check whether file is rar archive with support for SFX.
359 It will read 2M from file.
361 return _find_sfx_header(xfile)[0] > 0
364 class Error(Exception):
365 """Base class for rarfile errors."""
368 class BadRarFile(Error):
369 """Incorrect data in archive."""
372 class NotRarFile(Error):
373 """The file is not RAR archive."""
376 class BadRarName(Error):
377 """Cannot guess multipart name components."""
380 class NoRarEntry(Error):
381 """File not found in RAR"""
384 class PasswordRequired(Error):
385 """File requires password"""
388 class NeedFirstVolume(Error):
389 """Need to start from first volume.
391 Attributes:
393 current_volume
394 Volume number of current file or None if not known
396 def __init__(self, msg, volume):
397 super().__init__(msg)
398 self.current_volume = volume
401 class NoCrypto(Error):
402 """Cannot parse encrypted headers - no crypto available."""
405 class RarExecError(Error):
406 """Problem reported by unrar/rar."""
409 class RarWarning(RarExecError):
410 """Non-fatal error"""
413 class RarFatalError(RarExecError):
414 """Fatal error"""
417 class RarCRCError(RarExecError):
418 """CRC error during unpacking"""
421 class RarLockedArchiveError(RarExecError):
422 """Must not modify locked archive"""
425 class RarWriteError(RarExecError):
426 """Write error"""
429 class RarOpenError(RarExecError):
430 """Open error"""
433 class RarUserError(RarExecError):
434 """User error"""
437 class RarMemoryError(RarExecError):
438 """Memory error"""
441 class RarCreateError(RarExecError):
442 """Create error"""
445 class RarNoFilesError(RarExecError):
446 """No files that match pattern were found"""
449 class RarUserBreak(RarExecError):
450 """User stop"""
453 class RarWrongPassword(RarExecError):
454 """Incorrect password"""
457 class RarUnknownError(RarExecError):
458 """Unknown exit code"""
461 class RarSignalExit(RarExecError):
462 """Unrar exited with signal"""
465 class RarCannotExec(RarExecError):
466 """Executable not found."""
469 class UnsupportedWarning(UserWarning):
470 """Archive uses feature that are unsupported by rarfile.
472 .. versionadded:: 4.0
476 class RarInfo:
477 r"""An entry in rar archive.
479 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
480 with UTC timezone in RAR5 archives.
482 Attributes:
484 filename
485 File name with relative path.
486 Path separator is "/". Always unicode string.
488 date_time
489 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
490 RAR5 allows archives where it is missing, it's None then.
492 comment
493 Optional file comment field. Unicode string. (RAR3-only)
495 file_size
496 Uncompressed size.
498 compress_size
499 Compressed size.
501 compress_type
502 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
504 extract_version
505 Minimal Rar version needed for decompressing. As (major*10 + minor),
506 so 2.9 is 29.
508 RAR3: 10, 20, 29
510 RAR5 does not have such field in archive, it's simply set to 50.
512 host_os
513 Host OS type, one of RAR_OS_* constants.
515 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
516 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
518 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
520 mode
521 File attributes. May be either dos-style or unix-style, depending on host_os.
523 mtime
524 File modification time. Same value as :attr:`date_time`
525 but as :class:`~datetime.datetime` object with extended precision.
527 ctime
528 Optional time field: creation time. As :class:`~datetime.datetime` object.
530 atime
531 Optional time field: last access time. As :class:`~datetime.datetime` object.
533 arctime
534 Optional time field: archival time. As :class:`~datetime.datetime` object.
535 (RAR3-only)
538 CRC-32 of uncompressed file, unsigned int.
540 RAR5: may be None.
542 blake2sp_hash
543 Blake2SP hash over decompressed data. (RAR5-only)
545 volume
546 Volume nr, starting from 0.
548 volume_file
549 Volume file name, where file starts.
551 file_redir
552 If not None, file is link of some sort. Contains tuple of (type, flags, target).
553 (RAR5-only)
555 Type is one of constants:
557 :data:`RAR5_XREDIR_UNIX_SYMLINK`
558 Unix symlink.
559 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
560 Windows symlink.
561 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
562 Windows junction.
563 :data:`RAR5_XREDIR_HARD_LINK`
564 Hard link to target.
565 :data:`RAR5_XREDIR_FILE_COPY`
566 Current file is copy of another archive entry.
568 Flags may contain bits:
570 :data:`RAR5_XREDIR_ISDIR`
571 Symlink points to directory.
574 # zipfile-compatible fields
575 filename = None
576 file_size = None
577 compress_size = None
578 date_time = None
579 CRC = None
580 volume = None
581 orig_filename = None
583 # optional extended time fields, datetime() objects.
584 mtime = None
585 ctime = None
586 atime = None
588 extract_version = None
589 mode = None
590 host_os = None
591 compress_type = None
593 # rar3-only fields
594 comment = None
595 arctime = None
597 # rar5-only fields
598 blake2sp_hash = None
599 file_redir = None
601 # internal fields
602 flags = 0
603 type = None
605 # zipfile compat
606 def is_dir(self):
607 """Returns True if entry is a directory.
609 .. versionadded:: 4.0
611 return False
613 def is_symlink(self):
614 """Returns True if entry is a symlink.
616 .. versionadded:: 4.0
618 return False
620 def is_file(self):
621 """Returns True if entry is a normal file.
623 .. versionadded:: 4.0
625 return False
627 def needs_password(self):
628 """Returns True if data is stored password-protected.
630 if self.type == RAR_BLOCK_FILE:
631 return (self.flags & RAR_FILE_PASSWORD) > 0
632 return False
634 def isdir(self):
635 """Returns True if entry is a directory.
637 .. deprecated:: 4.0
639 return self.is_dir()
642 class RarFile:
643 """Parse RAR structure, provide access to files in archive.
646 #: File name, if available. Unicode string or None.
647 filename = None
649 #: Archive comment. Unicode string or None.
650 comment = None
652 def __init__(self, file, mode="r", charset=None, info_callback=None,
653 crc_check=True, errors="stop", part_only=False):
654 """Open and parse a RAR archive.
656 Parameters:
658 file
659 archive file name or file-like object.
660 mode
661 only "r" is supported.
662 charset
663 fallback charset to use, if filenames are not already Unicode-enabled.
664 info_callback
665 debug callback, gets to see all archive entries.
666 crc_check
667 set to False to disable CRC checks
668 errors
669 Either "stop" to quietly stop parsing on errors,
670 or "strict" to raise errors. Default is "stop".
671 part_only
672 If True, read only single file and allow it to be middle-part
673 of multi-volume archive.
675 if is_filelike(file):
676 self.filename = getattr(file, "name", None)
677 else:
678 if isinstance(file, Path):
679 file = str(file)
680 self.filename = file
681 self._rarfile = file
683 self._charset = charset or DEFAULT_CHARSET
684 self._info_callback = info_callback
685 self._crc_check = crc_check
686 self._part_only = part_only
687 self._password = None
688 self._file_parser = None
690 if errors == "stop":
691 self._strict = False
692 elif errors == "strict":
693 self._strict = True
694 else:
695 raise ValueError("Invalid value for errors= parameter.")
697 if mode != "r":
698 raise NotImplementedError("RarFile supports only mode=r")
700 self._parse()
702 def __enter__(self):
703 """Open context."""
704 return self
706 def __exit__(self, typ, value, traceback):
707 """Exit context."""
708 self.close()
710 def __iter__(self):
711 """Iterate over members."""
712 return iter(self.infolist())
714 def setpassword(self, pwd):
715 """Sets the password to use when extracting.
717 self._password = pwd
718 if self._file_parser:
719 if self._file_parser.has_header_encryption():
720 self._file_parser = None
721 if not self._file_parser:
722 self._parse()
723 else:
724 self._file_parser.setpassword(self._password)
726 def needs_password(self):
727 """Returns True if any archive entries require password for extraction.
729 return self._file_parser.needs_password()
731 def namelist(self):
732 """Return list of filenames in archive.
734 return [f.filename for f in self.infolist()]
736 def infolist(self):
737 """Return RarInfo objects for all files/directories in archive.
739 return self._file_parser.infolist()
741 def volumelist(self):
742 """Returns filenames of archive volumes.
744 In case of single-volume archive, the list contains
745 just the name of main archive file.
747 return self._file_parser.volumelist()
749 def getinfo(self, name):
750 """Return RarInfo for file.
752 return self._file_parser.getinfo(name)
754 def open(self, name, mode="r", pwd=None):
755 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
757 The object implements :class:`io.RawIOBase` interface, so it can
758 be further wrapped with :class:`io.BufferedReader`
759 and :class:`io.TextIOWrapper`.
761 On older Python where io module is not available, it implements
762 only .read(), .seek(), .tell() and .close() methods.
764 The object is seekable, although the seeking is fast only on
765 uncompressed files, on compressed files the seeking is implemented
766 by reading ahead and/or restarting the decompression.
768 Parameters:
770 name
771 file name or RarInfo instance.
772 mode
773 must be "r"
775 password to use for extracting.
778 if mode != "r":
779 raise NotImplementedError("RarFile.open() supports only mode=r")
781 # entry lookup
782 inf = self.getinfo(name)
783 if inf.is_dir():
784 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
786 # check password
787 if inf.needs_password():
788 pwd = pwd or self._password
789 if pwd is None:
790 raise PasswordRequired("File %s requires password" % inf.filename)
791 else:
792 pwd = None
794 return self._file_parser.open(inf, pwd)
796 def read(self, name, pwd=None):
797 """Return uncompressed data for archive entry.
799 For longer files using :meth:`~RarFile.open` may be better idea.
801 Parameters:
803 name
804 filename or RarInfo instance
806 password to use for extracting.
809 with self.open(name, "r", pwd) as f:
810 return f.read()
812 def close(self):
813 """Release open resources."""
814 pass
816 def printdir(self, file=None):
817 """Print archive file list to stdout or given file.
819 if file is None:
820 file = sys.stdout
821 for f in self.infolist():
822 print(f.filename, file=file)
824 def extract(self, member, path=None, pwd=None):
825 """Extract single file into current directory.
827 Parameters:
829 member
830 filename or :class:`RarInfo` instance
831 path
832 optional destination path
834 optional password to use
836 inf = self.getinfo(member)
837 return self._extract_one(inf, path, pwd, True)
839 def extractall(self, path=None, members=None, pwd=None):
840 """Extract all files into current directory.
842 Parameters:
844 path
845 optional destination path
846 members
847 optional filename or :class:`RarInfo` instance list to extract
849 optional password to use
851 if members is None:
852 members = self.namelist()
854 done = set()
855 dirs = []
856 for m in members:
857 inf = self.getinfo(m)
858 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
859 if inf.is_dir():
860 if dst not in done:
861 dirs.append((dst, inf))
862 done.add(dst)
863 if dirs:
864 dirs.sort(reverse=True)
865 for dst, inf in dirs:
866 self._set_attrs(inf, dst)
868 def testrar(self, pwd=None):
869 """Read all files and test CRC.
871 for member in self.infolist():
872 if member.is_file():
873 with self.open(member, 'r', pwd) as f:
874 empty_read(f, member.file_size, BSIZE)
876 def strerror(self):
877 """Return error string if parsing failed or None if no problems.
879 if not self._file_parser:
880 return "Not a RAR file"
881 return self._file_parser.strerror()
884 ## private methods
887 def _parse(self):
888 """Run parser for file type
890 ver, sfx_ofs = _find_sfx_header(self._rarfile)
891 if ver == RAR_V3:
892 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
893 self._charset, self._strict, self._info_callback,
894 sfx_ofs, self._part_only)
895 self._file_parser = p3 # noqa
896 elif ver == RAR_V5:
897 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
898 self._charset, self._strict, self._info_callback,
899 sfx_ofs, self._part_only)
900 self._file_parser = p5 # noqa
901 else:
902 raise NotRarFile("Not a RAR file")
904 self._file_parser.parse()
905 self.comment = self._file_parser.comment
907 def _extract_one(self, info, path, pwd, set_attrs):
908 fname = sanitize_filename(
909 info.filename, os.path.sep, WIN32
912 if path is None:
913 path = os.getcwd()
914 else:
915 path = os.fspath(path)
916 dstfn = os.path.join(path, fname)
918 dirname = os.path.dirname(dstfn)
919 if dirname and dirname != ".":
920 os.makedirs(dirname, exist_ok=True)
922 if info.is_file():
923 return self._make_file(info, dstfn, pwd, set_attrs)
924 if info.is_dir():
925 return self._make_dir(info, dstfn, pwd, set_attrs)
926 if info.is_symlink():
927 return self._make_symlink(info, dstfn, pwd, set_attrs)
928 return None
930 def _create_helper(self, name, flags, info):
931 return os.open(name, flags)
933 def _make_file(self, info, dstfn, pwd, set_attrs):
934 def helper(name, flags):
935 return self._create_helper(name, flags, info)
936 with self.open(info, "r", pwd) as src:
937 with open(dstfn, "wb", opener=helper) as dst:
938 shutil.copyfileobj(src, dst)
939 if set_attrs:
940 self._set_attrs(info, dstfn)
941 return dstfn
943 def _make_dir(self, info, dstfn, pwd, set_attrs):
944 os.makedirs(dstfn, exist_ok=True)
945 if set_attrs:
946 self._set_attrs(info, dstfn)
947 return dstfn
949 def _make_symlink(self, info, dstfn, pwd, set_attrs):
950 target_is_directory = False
951 if info.host_os == RAR_OS_UNIX:
952 link_name = self.read(info, pwd)
953 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
954 elif info.file_redir:
955 redir_type, redir_flags, link_name = info.file_redir
956 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
957 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
958 return None
959 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
960 else:
961 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
962 return None
964 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
965 return dstfn
967 def _set_attrs(self, info, dstfn):
968 if info.host_os == RAR_OS_UNIX:
969 os.chmod(dstfn, info.mode & 0o777)
970 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
971 # only keep R/O attr, except for dirs on win32
972 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
973 st = os.stat(dstfn)
974 new_mode = st.st_mode & ~0o222
975 os.chmod(dstfn, new_mode)
977 if info.mtime:
978 mtime_ns = to_nsecs(info.mtime)
979 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
980 os.utime(dstfn, ns=(atime_ns, mtime_ns))
984 # File format parsing
987 class CommonParser:
988 """Shared parser parts."""
989 _main = None
990 _hdrenc_main = None
991 _needs_password = False
992 _fd = None
993 _expect_sig = None
994 _parse_error = None
995 _password = None
996 comment = None
998 def __init__(self, rarfile, password, crc_check, charset, strict,
999 info_cb, sfx_offset, part_only):
1000 self._rarfile = rarfile
1001 self._password = password
1002 self._crc_check = crc_check
1003 self._charset = charset
1004 self._strict = strict
1005 self._info_callback = info_cb
1006 self._info_list = []
1007 self._info_map = {}
1008 self._vol_list = []
1009 self._sfx_offset = sfx_offset
1010 self._part_only = part_only
1012 def has_header_encryption(self):
1013 """Returns True if headers are encrypted
1015 if self._hdrenc_main:
1016 return True
1017 if self._main:
1018 if self._main.flags & RAR_MAIN_PASSWORD:
1019 return True
1020 return False
1022 def setpassword(self, pwd):
1023 """Set cached password."""
1024 self._password = pwd
1026 def volumelist(self):
1027 """Volume files"""
1028 return self._vol_list
1030 def needs_password(self):
1031 """Is password required"""
1032 return self._needs_password
1034 def strerror(self):
1035 """Last error"""
1036 return self._parse_error
1038 def infolist(self):
1039 """List of RarInfo records.
1041 return self._info_list
1043 def getinfo(self, member):
1044 """Return RarInfo for filename
1046 if isinstance(member, RarInfo):
1047 fname = member.filename
1048 elif isinstance(member, Path):
1049 fname = str(member)
1050 else:
1051 fname = member
1053 if fname.endswith("/"):
1054 fname = fname.rstrip("/")
1056 try:
1057 return self._info_map[fname]
1058 except KeyError:
1059 raise NoRarEntry("No such file: %s" % fname) from None
1061 def parse(self):
1062 """Process file."""
1063 self._fd = None
1064 try:
1065 self._parse_real()
1066 finally:
1067 if self._fd:
1068 self._fd.close()
1069 self._fd = None
1071 def _parse_real(self):
1072 """Actually read file.
1074 fd = XFile(self._rarfile)
1075 self._fd = fd
1076 fd.seek(self._sfx_offset, 0)
1077 sig = fd.read(len(self._expect_sig))
1078 if sig != self._expect_sig:
1079 raise NotRarFile("Not a Rar archive")
1081 volume = 0 # first vol (.rar) is 0
1082 more_vols = False
1083 endarc = False
1084 volfile = self._rarfile
1085 self._vol_list = [self._rarfile]
1086 raise_need_first_vol = False
1087 while True:
1088 if endarc:
1089 h = None # don"t read past ENDARC
1090 else:
1091 h = self._parse_header(fd)
1092 if not h:
1093 if raise_need_first_vol:
1094 # did not find ENDARC with VOLNR
1095 raise NeedFirstVolume("Need to start from first volume", None)
1096 if more_vols and not self._part_only:
1097 volume += 1
1098 fd.close()
1099 try:
1100 volfile = self._next_volname(volfile)
1101 fd = XFile(volfile)
1102 except IOError:
1103 self._set_error("Cannot open next volume: %s", volfile)
1104 break
1105 self._fd = fd
1106 sig = fd.read(len(self._expect_sig))
1107 if sig != self._expect_sig:
1108 self._set_error("Invalid volume sig: %s", volfile)
1109 break
1110 more_vols = False
1111 endarc = False
1112 self._vol_list.append(volfile)
1113 self._main = None
1114 continue
1115 break
1116 h.volume = volume
1117 h.volume_file = volfile
1119 if h.type == RAR_BLOCK_MAIN and not self._main:
1120 self._main = h
1121 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING) and not self._part_only:
1122 # RAR 2.x does not set FIRSTVOLUME,
1123 # so check it only if NEWNUMBERING is used
1124 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1125 if getattr(h, "main_volume_number", None) is not None:
1126 # rar5 may have more info
1127 raise NeedFirstVolume(
1128 "Need to start from first volume (current: %r)"
1129 % (h.main_volume_number,),
1130 h.main_volume_number
1132 # delay raise until we have volnr from ENDARC
1133 raise_need_first_vol = True
1134 if h.flags & RAR_MAIN_PASSWORD:
1135 self._needs_password = True
1136 if not self._password:
1137 break
1138 elif h.type == RAR_BLOCK_ENDARC:
1139 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1140 endarc = True
1141 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1142 raise NeedFirstVolume(
1143 "Need to start from first volume (current: %r)"
1144 % (h.endarc_volnr,),
1145 h.endarc_volnr
1147 elif h.type == RAR_BLOCK_FILE:
1148 # RAR 2.x does not write RAR_BLOCK_ENDARC
1149 if h.flags & RAR_FILE_SPLIT_AFTER:
1150 more_vols = True
1151 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1152 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1153 if not self._part_only:
1154 raise_need_first_vol = True
1156 if h.needs_password():
1157 self._needs_password = True
1159 # store it
1160 self.process_entry(fd, h)
1162 if self._info_callback:
1163 self._info_callback(h)
1165 # go to next header
1166 if h.add_size > 0:
1167 fd.seek(h.data_offset + h.add_size, 0)
1169 def process_entry(self, fd, item):
1170 """Examine item, add into lookup cache."""
1171 raise NotImplementedError()
1173 def _decrypt_header(self, fd):
1174 raise NotImplementedError("_decrypt_header")
1176 def _parse_block_header(self, fd):
1177 raise NotImplementedError("_parse_block_header")
1179 def _open_hack(self, inf, pwd):
1180 raise NotImplementedError("_open_hack")
1182 def _parse_header(self, fd):
1183 """Read single header
1185 try:
1186 # handle encrypted headers
1187 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1188 if not self._password:
1189 return None
1190 fd = self._decrypt_header(fd)
1192 # now read actual header
1193 return self._parse_block_header(fd)
1194 except struct.error:
1195 self._set_error("Broken header in RAR file")
1196 return None
1198 def _next_volname(self, volfile):
1199 """Given current vol name, construct next one
1201 if is_filelike(volfile):
1202 raise IOError("Working on single FD")
1203 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1204 return _next_newvol(volfile)
1205 return _next_oldvol(volfile)
1207 def _set_error(self, msg, *args):
1208 if args:
1209 msg = msg % args
1210 self._parse_error = msg
1211 if self._strict:
1212 raise BadRarFile(msg)
1214 def open(self, inf, pwd):
1215 """Return stream object for file data."""
1217 if inf.file_redir:
1218 redir_type, redir_flags, redir_name = inf.file_redir
1219 # cannot leave to unrar as it expects copied file to exist
1220 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1221 inf = self.getinfo(redir_name)
1222 if not inf:
1223 raise BadRarFile("cannot find copied file")
1224 elif redir_type in (
1225 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1226 RAR5_XREDIR_WINDOWS_JUNCTION,
1228 return io.BytesIO(redir_name.encode("utf8"))
1229 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1230 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1232 # is temp write usable?
1233 use_hack = 1
1234 if not self._main:
1235 use_hack = 0
1236 elif self._main._must_disable_hack():
1237 use_hack = 0
1238 elif inf._must_disable_hack():
1239 use_hack = 0
1240 elif is_filelike(self._rarfile):
1241 pass
1242 elif inf.file_size > HACK_SIZE_LIMIT:
1243 use_hack = 0
1244 elif not USE_EXTRACT_HACK:
1245 use_hack = 0
1247 # now extract
1248 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1249 return self._open_clear(inf)
1250 elif use_hack:
1251 return self._open_hack(inf, pwd)
1252 elif is_filelike(self._rarfile):
1253 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1254 else:
1255 return self._open_unrar(self._rarfile, inf, pwd)
1257 def _open_clear(self, inf):
1258 return DirectReader(self, inf)
1260 def _open_hack_core(self, inf, pwd, prefix, suffix):
1262 size = inf.compress_size + inf.header_size
1263 rf = XFile(inf.volume_file, 0)
1264 rf.seek(inf.header_offset)
1266 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1267 tmpf = os.fdopen(tmpfd, "wb")
1269 try:
1270 tmpf.write(prefix)
1271 while size > 0:
1272 if size > BSIZE:
1273 buf = rf.read(BSIZE)
1274 else:
1275 buf = rf.read(size)
1276 if not buf:
1277 raise BadRarFile("read failed: " + inf.filename)
1278 tmpf.write(buf)
1279 size -= len(buf)
1280 tmpf.write(suffix)
1281 tmpf.close()
1282 rf.close()
1283 except BaseException:
1284 rf.close()
1285 tmpf.close()
1286 os.unlink(tmpname)
1287 raise
1289 return self._open_unrar(tmpname, inf, pwd, tmpname)
1291 def _open_unrar_membuf(self, memfile, inf, pwd):
1292 """Write in-memory archive to temp file, needed for solid archives.
1294 tmpname = membuf_tempfile(memfile)
1295 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1297 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1298 """Extract using unrar
1300 setup = tool_setup()
1302 # not giving filename avoids encoding related problems
1303 fn = None
1304 if not tmpfile or force_file:
1305 fn = inf.filename
1307 # read from unrar pipe
1308 cmd = setup.open_cmdline(pwd, rarfile, fn)
1309 return PipeReader(self, inf, cmd, tmpfile)
1313 # RAR3 format
1316 class Rar3Info(RarInfo):
1317 """RAR3 specific fields."""
1318 extract_version = 15
1319 salt = None
1320 add_size = 0
1321 header_crc = None
1322 header_size = None
1323 header_offset = None
1324 data_offset = None
1325 _md_class = None
1326 _md_expect = None
1327 _name_size = None
1329 # make sure some rar5 fields are always present
1330 file_redir = None
1331 blake2sp_hash = None
1333 endarc_datacrc = None
1334 endarc_volnr = None
1336 def _must_disable_hack(self):
1337 if self.type == RAR_BLOCK_FILE:
1338 if self.flags & RAR_FILE_PASSWORD:
1339 return True
1340 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1341 return True
1342 elif self.type == RAR_BLOCK_MAIN:
1343 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1344 return True
1345 return False
1347 def is_dir(self):
1348 """Returns True if entry is a directory."""
1349 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1350 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1351 return False
1353 def is_symlink(self):
1354 """Returns True if entry is a symlink."""
1355 return (
1356 self.type == RAR_BLOCK_FILE and
1357 self.host_os == RAR_OS_UNIX and
1358 self.mode & 0xF000 == 0xA000
1361 def is_file(self):
1362 """Returns True if entry is a normal file."""
1363 return (
1364 self.type == RAR_BLOCK_FILE and
1365 not (self.is_dir() or self.is_symlink())
1369 class RAR3Parser(CommonParser):
1370 """Parse RAR3 file format.
1372 _expect_sig = RAR_ID
1373 _last_aes_key = (None, None, None) # (salt, key, iv)
1375 def _decrypt_header(self, fd):
1376 if not _have_crypto:
1377 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1378 salt = fd.read(8)
1379 if self._last_aes_key[0] == salt:
1380 key, iv = self._last_aes_key[1:]
1381 else:
1382 key, iv = rar3_s2k(self._password, salt)
1383 self._last_aes_key = (salt, key, iv)
1384 return HeaderDecrypt(fd, key, iv)
1386 def _parse_block_header(self, fd):
1387 """Parse common block header
1389 h = Rar3Info()
1390 h.header_offset = fd.tell()
1392 # read and parse base header
1393 buf = fd.read(S_BLK_HDR.size)
1394 if not buf:
1395 return None
1396 if len(buf) < S_BLK_HDR.size:
1397 self._set_error("Unexpected EOF when reading header")
1398 return None
1399 t = S_BLK_HDR.unpack_from(buf)
1400 h.header_crc, h.type, h.flags, h.header_size = t
1402 # read full header
1403 if h.header_size > S_BLK_HDR.size:
1404 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1405 else:
1406 hdata = buf
1407 h.data_offset = fd.tell()
1409 # unexpected EOF?
1410 if len(hdata) != h.header_size:
1411 self._set_error("Unexpected EOF when reading header")
1412 return None
1414 pos = S_BLK_HDR.size
1416 # block has data assiciated with it?
1417 if h.flags & RAR_LONG_BLOCK:
1418 h.add_size, pos = load_le32(hdata, pos)
1419 else:
1420 h.add_size = 0
1422 # parse interesting ones, decide header boundaries for crc
1423 if h.type == RAR_BLOCK_MARK:
1424 return h
1425 elif h.type == RAR_BLOCK_MAIN:
1426 pos += 6
1427 if h.flags & RAR_MAIN_ENCRYPTVER:
1428 pos += 1
1429 crc_pos = pos
1430 if h.flags & RAR_MAIN_COMMENT:
1431 self._parse_subblocks(h, hdata, pos)
1432 elif h.type == RAR_BLOCK_FILE:
1433 pos = self._parse_file_header(h, hdata, pos - 4)
1434 crc_pos = pos
1435 if h.flags & RAR_FILE_COMMENT:
1436 pos = self._parse_subblocks(h, hdata, pos)
1437 elif h.type == RAR_BLOCK_SUB:
1438 pos = self._parse_file_header(h, hdata, pos - 4)
1439 crc_pos = h.header_size
1440 elif h.type == RAR_BLOCK_OLD_AUTH:
1441 pos += 8
1442 crc_pos = pos
1443 elif h.type == RAR_BLOCK_OLD_EXTRA:
1444 pos += 7
1445 crc_pos = pos
1446 elif h.type == RAR_BLOCK_ENDARC:
1447 if h.flags & RAR_ENDARC_DATACRC:
1448 h.endarc_datacrc, pos = load_le32(hdata, pos)
1449 if h.flags & RAR_ENDARC_VOLNR:
1450 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1451 pos += 2
1452 crc_pos = h.header_size
1453 else:
1454 crc_pos = h.header_size
1456 # check crc
1457 if h.type == RAR_BLOCK_OLD_SUB:
1458 crcdat = hdata[2:] + fd.read(h.add_size)
1459 else:
1460 crcdat = hdata[2:crc_pos]
1462 calc_crc = crc32(crcdat) & 0xFFFF
1464 # return good header
1465 if h.header_crc == calc_crc:
1466 return h
1468 # header parsing failed.
1469 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1470 h.type, h.header_crc, calc_crc, len(crcdat))
1472 # instead panicing, send eof
1473 return None
1475 def _parse_file_header(self, h, hdata, pos):
1476 """Read file-specific header
1478 fld = S_FILE_HDR.unpack_from(hdata, pos)
1479 pos += S_FILE_HDR.size
1481 h.compress_size = fld[0]
1482 h.file_size = fld[1]
1483 h.host_os = fld[2]
1484 h.CRC = fld[3]
1485 h.date_time = parse_dos_time(fld[4])
1486 h.mtime = to_datetime(h.date_time)
1487 h.extract_version = fld[5]
1488 h.compress_type = fld[6]
1489 h._name_size = name_size = fld[7]
1490 h.mode = fld[8]
1492 h._md_class = CRC32Context
1493 h._md_expect = h.CRC
1495 if h.flags & RAR_FILE_LARGE:
1496 h1, pos = load_le32(hdata, pos)
1497 h2, pos = load_le32(hdata, pos)
1498 h.compress_size |= h1 << 32
1499 h.file_size |= h2 << 32
1500 h.add_size = h.compress_size
1502 name, pos = load_bytes(hdata, name_size, pos)
1503 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1504 # stored in custom encoding
1505 nul = name.find(b"\0")
1506 h.orig_filename = name[:nul]
1507 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1508 h.filename = u.decode()
1510 # if parsing failed fall back to simple name
1511 if u.failed:
1512 h.filename = self._decode(h.orig_filename)
1513 elif h.flags & RAR_FILE_UNICODE:
1514 # stored in UTF8
1515 h.orig_filename = name
1516 h.filename = name.decode("utf8", "replace")
1517 else:
1518 # stored in random encoding
1519 h.orig_filename = name
1520 h.filename = self._decode(name)
1522 # change separator, set dir suffix
1523 h.filename = h.filename.replace("\\", "/").rstrip("/")
1524 if h.is_dir():
1525 h.filename = h.filename + "/"
1527 if h.flags & RAR_FILE_SALT:
1528 h.salt, pos = load_bytes(hdata, 8, pos)
1529 else:
1530 h.salt = None
1532 # optional extended time stamps
1533 if h.flags & RAR_FILE_EXTTIME:
1534 pos = _parse_ext_time(h, hdata, pos)
1535 else:
1536 h.mtime = h.atime = h.ctime = h.arctime = None
1538 return pos
1540 def _parse_subblocks(self, h, hdata, pos):
1541 """Find old-style comment subblock
1543 while pos < len(hdata):
1544 # ordinary block header
1545 t = S_BLK_HDR.unpack_from(hdata, pos)
1546 ___scrc, stype, sflags, slen = t
1547 pos_next = pos + slen
1548 pos += S_BLK_HDR.size
1550 # corrupt header
1551 if pos_next < pos:
1552 break
1554 # followed by block-specific header
1555 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1556 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1557 pos += S_COMMENT_HDR.size
1558 data = hdata[pos: pos_next]
1559 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1560 crc, self._password)
1561 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1562 h.comment = self._decode_comment(cmt)
1564 pos = pos_next
1565 return pos
1567 def _read_comment_v3(self, inf, pwd=None):
1569 # read data
1570 with XFile(inf.volume_file) as rf:
1571 rf.seek(inf.data_offset)
1572 data = rf.read(inf.compress_size)
1574 # decompress
1575 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1576 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1578 # check crc
1579 if self._crc_check:
1580 crc = crc32(cmt)
1581 if crc != inf.CRC:
1582 return None
1584 return self._decode_comment(cmt)
1586 def _decode(self, val):
1587 for c in TRY_ENCODINGS:
1588 try:
1589 return val.decode(c)
1590 except UnicodeError:
1591 pass
1592 return val.decode(self._charset, "replace")
1594 def _decode_comment(self, val):
1595 return self._decode(val)
1597 def process_entry(self, fd, item):
1598 if item.type == RAR_BLOCK_FILE:
1599 # use only first part
1600 if item.flags & RAR_FILE_VERSION:
1601 pass # skip old versions
1602 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1603 self._info_map[item.filename.rstrip("/")] = item
1604 self._info_list.append(item)
1605 elif len(self._info_list) > 0:
1606 # final crc is in last block
1607 old = self._info_list[-1]
1608 old.CRC = item.CRC
1609 old._md_expect = item._md_expect
1610 old.compress_size += item.compress_size
1612 # parse new-style comment
1613 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1614 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1615 pass
1616 elif item.flags & RAR_FILE_SOLID:
1617 # file comment
1618 cmt = self._read_comment_v3(item, self._password)
1619 if len(self._info_list) > 0:
1620 old = self._info_list[-1]
1621 old.comment = cmt
1622 else:
1623 # archive comment
1624 cmt = self._read_comment_v3(item, self._password)
1625 self.comment = cmt
1627 if item.type == RAR_BLOCK_MAIN:
1628 if item.flags & RAR_MAIN_COMMENT:
1629 self.comment = item.comment
1630 if item.flags & RAR_MAIN_PASSWORD:
1631 self._needs_password = True
1633 # put file compressed data into temporary .rar archive, and run
1634 # unrar on that, thus avoiding unrar going over whole archive
1635 def _open_hack(self, inf, pwd):
1636 # create main header: crc, type, flags, size, res1, res2
1637 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1638 return self._open_hack_core(inf, pwd, prefix, b"")
1642 # RAR5 format
1645 class Rar5Info(RarInfo):
1646 """Shared fields for RAR5 records.
1648 extract_version = 50
1649 header_crc = None
1650 header_size = None
1651 header_offset = None
1652 data_offset = None
1654 # type=all
1655 block_type = None
1656 block_flags = None
1657 add_size = 0
1658 block_extra_size = 0
1660 # type=MAIN
1661 volume_number = None
1662 _md_class = None
1663 _md_expect = None
1665 def _must_disable_hack(self):
1666 return False
1669 class Rar5BaseFile(Rar5Info):
1670 """Shared sturct for file & service record.
1672 type = -1
1673 file_flags = None
1674 file_encryption = (0, 0, 0, b"", b"", b"")
1675 file_compress_flags = None
1676 file_redir = None
1677 file_owner = None
1678 file_version = None
1679 blake2sp_hash = None
1681 def _must_disable_hack(self):
1682 if self.flags & RAR_FILE_PASSWORD:
1683 return True
1684 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1685 return True
1686 if self.file_compress_flags & RAR5_COMPR_SOLID:
1687 return True
1688 if self.file_redir:
1689 return True
1690 return False
1693 class Rar5FileInfo(Rar5BaseFile):
1694 """RAR5 file record.
1696 type = RAR_BLOCK_FILE
1698 def is_symlink(self):
1699 """Returns True if entry is a symlink."""
1700 # pylint: disable=unsubscriptable-object
1701 return (
1702 self.file_redir is not None and
1703 self.file_redir[0] in (
1704 RAR5_XREDIR_UNIX_SYMLINK,
1705 RAR5_XREDIR_WINDOWS_SYMLINK,
1706 RAR5_XREDIR_WINDOWS_JUNCTION,
1710 def is_file(self):
1711 """Returns True if entry is a normal file."""
1712 return not (self.is_dir() or self.is_symlink())
1714 def is_dir(self):
1715 """Returns True if entry is a directory."""
1716 if not self.file_redir:
1717 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1718 return True
1719 return False
1722 class Rar5ServiceInfo(Rar5BaseFile):
1723 """RAR5 service record.
1725 type = RAR_BLOCK_SUB
1728 class Rar5MainInfo(Rar5Info):
1729 """RAR5 archive main record.
1731 type = RAR_BLOCK_MAIN
1732 main_flags = None
1733 main_volume_number = None
1735 def _must_disable_hack(self):
1736 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1737 return True
1738 return False
1741 class Rar5EncryptionInfo(Rar5Info):
1742 """RAR5 archive header encryption record.
1744 type = RAR5_BLOCK_ENCRYPTION
1745 encryption_algo = None
1746 encryption_flags = None
1747 encryption_kdf_count = None
1748 encryption_salt = None
1749 encryption_check_value = None
1751 def needs_password(self):
1752 return True
1755 class Rar5EndArcInfo(Rar5Info):
1756 """RAR5 end of archive record.
1758 type = RAR_BLOCK_ENDARC
1759 endarc_flags = None
1762 class RAR5Parser(CommonParser):
1763 """Parse RAR5 format.
1765 _expect_sig = RAR5_ID
1766 _hdrenc_main = None
1768 # AES encrypted headers
1769 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1771 def _gen_key(self, kdf_count, salt):
1772 if self._last_aes256_key[:2] == (kdf_count, salt):
1773 return self._last_aes256_key[2]
1774 if kdf_count > 24:
1775 raise BadRarFile("Too large kdf_count")
1776 pwd = self._password
1777 if isinstance(pwd, str):
1778 pwd = pwd.encode("utf8")
1779 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1780 self._last_aes256_key = (kdf_count, salt, key)
1781 return key
1783 def _decrypt_header(self, fd):
1784 if not _have_crypto:
1785 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1786 h = self._hdrenc_main
1787 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1788 iv = fd.read(16)
1789 return HeaderDecrypt(fd, key, iv)
1791 def _parse_block_header(self, fd):
1792 """Parse common block header
1794 header_offset = fd.tell()
1796 preload = 4 + 1
1797 start_bytes = fd.read(preload)
1798 if len(start_bytes) < preload:
1799 self._set_error("Unexpected EOF when reading header")
1800 return None
1801 while start_bytes[-1] & 0x80:
1802 b = fd.read(1)
1803 if not b:
1804 self._set_error("Unexpected EOF when reading header")
1805 return None
1806 start_bytes += b
1807 header_crc, pos = load_le32(start_bytes, 0)
1808 hdrlen, pos = load_vint(start_bytes, pos)
1809 if hdrlen > 2 * 1024 * 1024:
1810 return None
1811 header_size = pos + hdrlen
1813 # read full header, check for EOF
1814 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1815 if len(hdata) != header_size:
1816 self._set_error("Unexpected EOF when reading header")
1817 return None
1818 data_offset = fd.tell()
1820 calc_crc = crc32(memoryview(hdata)[4:])
1821 if header_crc != calc_crc:
1822 # header parsing failed.
1823 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1824 header_crc, calc_crc, len(hdata))
1825 return None
1827 block_type, pos = load_vint(hdata, pos)
1829 if block_type == RAR5_BLOCK_MAIN:
1830 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1831 h = self._parse_main_block(h, hdata, pos)
1832 elif block_type == RAR5_BLOCK_FILE:
1833 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1834 h = self._parse_file_block(h, hdata, pos)
1835 elif block_type == RAR5_BLOCK_SERVICE:
1836 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1837 h = self._parse_file_block(h, hdata, pos)
1838 elif block_type == RAR5_BLOCK_ENCRYPTION:
1839 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1840 h = self._parse_encryption_block(h, hdata, pos)
1841 elif block_type == RAR5_BLOCK_ENDARC:
1842 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1843 h = self._parse_endarc_block(h, hdata, pos)
1844 else:
1845 h = None
1846 if h:
1847 h.header_offset = header_offset
1848 h.data_offset = data_offset
1849 return h
1851 def _parse_block_common(self, h, hdata):
1852 h.header_crc, pos = load_le32(hdata, 0)
1853 hdrlen, pos = load_vint(hdata, pos)
1854 h.header_size = hdrlen + pos
1855 h.block_type, pos = load_vint(hdata, pos)
1856 h.block_flags, pos = load_vint(hdata, pos)
1858 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1859 h.block_extra_size, pos = load_vint(hdata, pos)
1860 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1861 h.add_size, pos = load_vint(hdata, pos)
1863 h.compress_size = h.add_size
1865 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1866 h.flags |= RAR_SKIP_IF_UNKNOWN
1867 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1868 h.flags |= RAR_LONG_BLOCK
1869 return h, pos
1871 def _parse_main_block(self, h, hdata, pos):
1872 h.main_flags, pos = load_vint(hdata, pos)
1873 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1874 h.main_volume_number, pos = load_vint(hdata, pos)
1876 h.flags |= RAR_MAIN_NEWNUMBERING
1877 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1878 h.flags |= RAR_MAIN_SOLID
1879 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1880 h.flags |= RAR_MAIN_VOLUME
1881 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1882 h.flags |= RAR_MAIN_RECOVERY
1883 if self._hdrenc_main:
1884 h.flags |= RAR_MAIN_PASSWORD
1885 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1886 h.flags |= RAR_MAIN_FIRSTVOLUME
1888 return h
1890 def _parse_file_block(self, h, hdata, pos):
1891 h.file_flags, pos = load_vint(hdata, pos)
1892 h.file_size, pos = load_vint(hdata, pos)
1893 h.mode, pos = load_vint(hdata, pos)
1895 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1896 h.mtime, pos = load_unixtime(hdata, pos)
1897 h.date_time = h.mtime.timetuple()[:6]
1898 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1899 h.CRC, pos = load_le32(hdata, pos)
1900 h._md_class = CRC32Context
1901 h._md_expect = h.CRC
1903 h.file_compress_flags, pos = load_vint(hdata, pos)
1904 h.file_host_os, pos = load_vint(hdata, pos)
1905 h.orig_filename, pos = load_vstr(hdata, pos)
1906 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1908 # use compatible values
1909 if h.file_host_os == RAR5_OS_WINDOWS:
1910 h.host_os = RAR_OS_WIN32
1911 else:
1912 h.host_os = RAR_OS_UNIX
1913 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1915 if h.block_extra_size:
1916 # allow 1 byte of garbage
1917 while pos < len(hdata) - 1:
1918 xsize, pos = load_vint(hdata, pos)
1919 xdata, pos = load_bytes(hdata, xsize, pos)
1920 self._process_file_extra(h, xdata)
1922 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1923 h.flags |= RAR_FILE_SPLIT_BEFORE
1924 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1925 h.flags |= RAR_FILE_SPLIT_AFTER
1926 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1927 h.flags |= RAR_FILE_DIRECTORY
1928 if h.file_compress_flags & RAR5_COMPR_SOLID:
1929 h.flags |= RAR_FILE_SOLID
1931 if h.is_dir():
1932 h.filename = h.filename + "/"
1933 return h
1935 def _parse_endarc_block(self, h, hdata, pos):
1936 h.endarc_flags, pos = load_vint(hdata, pos)
1937 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1938 h.flags |= RAR_ENDARC_NEXT_VOLUME
1939 return h
1941 def _parse_encryption_block(self, h, hdata, pos):
1942 h.encryption_algo, pos = load_vint(hdata, pos)
1943 h.encryption_flags, pos = load_vint(hdata, pos)
1944 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1945 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1946 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1947 h.encryption_check_value = load_bytes(hdata, 12, pos)
1948 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1949 raise BadRarFile("Unsupported header encryption cipher")
1950 self._hdrenc_main = h
1951 return h
1953 def _process_file_extra(self, h, xdata):
1954 xtype, pos = load_vint(xdata, 0)
1955 if xtype == RAR5_XFILE_TIME:
1956 self._parse_file_xtime(h, xdata, pos)
1957 elif xtype == RAR5_XFILE_ENCRYPTION:
1958 self._parse_file_encryption(h, xdata, pos)
1959 elif xtype == RAR5_XFILE_HASH:
1960 self._parse_file_hash(h, xdata, pos)
1961 elif xtype == RAR5_XFILE_VERSION:
1962 self._parse_file_version(h, xdata, pos)
1963 elif xtype == RAR5_XFILE_REDIR:
1964 self._parse_file_redir(h, xdata, pos)
1965 elif xtype == RAR5_XFILE_OWNER:
1966 self._parse_file_owner(h, xdata, pos)
1967 elif xtype == RAR5_XFILE_SERVICE:
1968 pass
1969 else:
1970 pass
1972 # extra block for file time record
1973 def _parse_file_xtime(self, h, xdata, pos):
1974 tflags, pos = load_vint(xdata, pos)
1976 ldr = load_windowstime
1977 if tflags & RAR5_XTIME_UNIXTIME:
1978 ldr = load_unixtime
1980 if tflags & RAR5_XTIME_HAS_MTIME:
1981 h.mtime, pos = ldr(xdata, pos)
1982 h.date_time = h.mtime.timetuple()[:6]
1983 if tflags & RAR5_XTIME_HAS_CTIME:
1984 h.ctime, pos = ldr(xdata, pos)
1985 if tflags & RAR5_XTIME_HAS_ATIME:
1986 h.atime, pos = ldr(xdata, pos)
1988 if tflags & RAR5_XTIME_UNIXTIME_NS:
1989 if tflags & RAR5_XTIME_HAS_MTIME:
1990 nsec, pos = load_le32(xdata, pos)
1991 h.mtime = to_nsdatetime(h.mtime, nsec)
1992 if tflags & RAR5_XTIME_HAS_CTIME:
1993 nsec, pos = load_le32(xdata, pos)
1994 h.ctime = to_nsdatetime(h.ctime, nsec)
1995 if tflags & RAR5_XTIME_HAS_ATIME:
1996 nsec, pos = load_le32(xdata, pos)
1997 h.atime = to_nsdatetime(h.atime, nsec)
1999 # just remember encryption info
2000 def _parse_file_encryption(self, h, xdata, pos):
2001 algo, pos = load_vint(xdata, pos)
2002 flags, pos = load_vint(xdata, pos)
2003 kdf_count, pos = load_byte(xdata, pos)
2004 salt, pos = load_bytes(xdata, 16, pos)
2005 iv, pos = load_bytes(xdata, 16, pos)
2006 checkval = None
2007 if flags & RAR5_XENC_CHECKVAL:
2008 checkval, pos = load_bytes(xdata, 12, pos)
2009 if flags & RAR5_XENC_TWEAKED:
2010 h._md_expect = None
2011 h._md_class = NoHashContext
2013 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
2014 h.flags |= RAR_FILE_PASSWORD
2016 def _parse_file_hash(self, h, xdata, pos):
2017 hash_type, pos = load_vint(xdata, pos)
2018 if hash_type == RAR5_XHASH_BLAKE2SP:
2019 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
2020 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2021 h._md_class = Blake2SP
2022 h._md_expect = h.blake2sp_hash
2024 def _parse_file_version(self, h, xdata, pos):
2025 flags, pos = load_vint(xdata, pos)
2026 version, pos = load_vint(xdata, pos)
2027 h.file_version = (flags, version)
2029 def _parse_file_redir(self, h, xdata, pos):
2030 redir_type, pos = load_vint(xdata, pos)
2031 redir_flags, pos = load_vint(xdata, pos)
2032 redir_name, pos = load_vstr(xdata, pos)
2033 redir_name = redir_name.decode("utf8", "replace")
2034 h.file_redir = (redir_type, redir_flags, redir_name)
2036 def _parse_file_owner(self, h, xdata, pos):
2037 user_name = group_name = user_id = group_id = None
2039 flags, pos = load_vint(xdata, pos)
2040 if flags & RAR5_XOWNER_UNAME:
2041 user_name, pos = load_vstr(xdata, pos)
2042 if flags & RAR5_XOWNER_GNAME:
2043 group_name, pos = load_vstr(xdata, pos)
2044 if flags & RAR5_XOWNER_UID:
2045 user_id, pos = load_vint(xdata, pos)
2046 if flags & RAR5_XOWNER_GID:
2047 group_id, pos = load_vint(xdata, pos)
2049 h.file_owner = (user_name, group_name, user_id, group_id)
2051 def process_entry(self, fd, item):
2052 if item.block_type == RAR5_BLOCK_FILE:
2053 if item.file_version:
2054 pass # skip old versions
2055 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2056 # use only first part
2057 self._info_map[item.filename.rstrip("/")] = item
2058 self._info_list.append(item)
2059 elif len(self._info_list) > 0:
2060 # final crc is in last block
2061 old = self._info_list[-1]
2062 old.CRC = item.CRC
2063 old._md_expect = item._md_expect
2064 old.blake2sp_hash = item.blake2sp_hash
2065 old.compress_size += item.compress_size
2066 elif item.block_type == RAR5_BLOCK_SERVICE:
2067 if item.filename == "CMT":
2068 self._load_comment(fd, item)
2070 def _load_comment(self, fd, item):
2071 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2072 return None
2073 if item.compress_type != RAR_M0:
2074 return None
2076 if item.flags & RAR_FILE_PASSWORD:
2077 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2078 if algo != RAR5_XENC_CIPHER_AES256:
2079 return None
2080 key = self._gen_key(kdf_count, salt)
2081 f = HeaderDecrypt(fd, key, iv)
2082 cmt = f.read(item.file_size)
2083 else:
2084 # archive comment
2085 with self._open_clear(item) as cmtstream:
2086 cmt = cmtstream.read()
2088 # rar bug? - appends zero to comment
2089 cmt = cmt.split(b"\0", 1)[0]
2090 self.comment = cmt.decode("utf8")
2091 return None
2093 def _open_hack(self, inf, pwd):
2094 # len, type, blk_flags, flags
2095 main_hdr = b"\x03\x01\x00\x00"
2096 endarc_hdr = b"\x03\x05\x00\x00"
2097 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2098 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2099 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2103 ## Utility classes
2106 class UnicodeFilename:
2107 """Handle RAR3 unicode filename decompression.
2109 def __init__(self, name, encdata):
2110 self.std_name = bytearray(name)
2111 self.encdata = bytearray(encdata)
2112 self.pos = self.encpos = 0
2113 self.buf = bytearray()
2114 self.failed = 0
2116 def enc_byte(self):
2117 """Copy encoded byte."""
2118 try:
2119 c = self.encdata[self.encpos]
2120 self.encpos += 1
2121 return c
2122 except IndexError:
2123 self.failed = 1
2124 return 0
2126 def std_byte(self):
2127 """Copy byte from 8-bit representation."""
2128 try:
2129 return self.std_name[self.pos]
2130 except IndexError:
2131 self.failed = 1
2132 return ord("?")
2134 def put(self, lo, hi):
2135 """Copy 16-bit value to result."""
2136 self.buf.append(lo)
2137 self.buf.append(hi)
2138 self.pos += 1
2140 def decode(self):
2141 """Decompress compressed UTF16 value."""
2142 hi = self.enc_byte()
2143 flagbits = 0
2144 while self.encpos < len(self.encdata):
2145 if flagbits == 0:
2146 flags = self.enc_byte()
2147 flagbits = 8
2148 flagbits -= 2
2149 t = (flags >> flagbits) & 3
2150 if t == 0:
2151 self.put(self.enc_byte(), 0)
2152 elif t == 1:
2153 self.put(self.enc_byte(), hi)
2154 elif t == 2:
2155 self.put(self.enc_byte(), self.enc_byte())
2156 else:
2157 n = self.enc_byte()
2158 if n & 0x80:
2159 c = self.enc_byte()
2160 for _ in range((n & 0x7f) + 2):
2161 lo = (self.std_byte() + c) & 0xFF
2162 self.put(lo, hi)
2163 else:
2164 for _ in range(n + 2):
2165 self.put(self.std_byte(), 0)
2166 return self.buf.decode("utf-16le", "replace")
2169 class RarExtFile(io.RawIOBase):
2170 """Base class for file-like object that :meth:`RarFile.open` returns.
2172 Provides public methods and common crc checking.
2174 Behaviour:
2175 - no short reads - .read() and .readinfo() read as much as requested.
2176 - no internal buffer, use io.BufferedReader for that.
2178 name = None #: Filename of the archive entry
2179 mode = "rb"
2180 _parser = None
2181 _inf = None
2182 _fd = None
2183 _remain = 0
2184 _returncode = 0
2185 _md_context = None
2187 def _open_extfile(self, parser, inf):
2188 self.name = inf.filename
2189 self._parser = parser
2190 self._inf = inf
2192 if self._fd:
2193 self._fd.close()
2194 md_class = self._inf._md_class or NoHashContext
2195 self._md_context = md_class()
2196 self._fd = None
2197 self._remain = self._inf.file_size
2199 def read(self, n=-1):
2200 """Read all or specified amount of data from archive entry."""
2202 # sanitize count
2203 if n is None or n < 0:
2204 n = self._remain
2205 elif n > self._remain:
2206 n = self._remain
2207 if n == 0:
2208 return b""
2210 buf = []
2211 orig = n
2212 while n > 0:
2213 # actual read
2214 data = self._read(n)
2215 if not data:
2216 break
2217 buf.append(data)
2218 self._md_context.update(data)
2219 self._remain -= len(data)
2220 n -= len(data)
2221 data = b"".join(buf)
2222 if n > 0:
2223 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2225 # done?
2226 if not data or self._remain == 0:
2227 # self.close()
2228 self._check()
2229 return data
2231 def _check(self):
2232 """Check final CRC."""
2233 final = self._md_context.digest()
2234 exp = self._inf._md_expect
2235 if exp is None:
2236 return
2237 if final is None:
2238 return
2239 if self._returncode:
2240 check_returncode(self._returncode, "", tool_setup().get_errmap())
2241 if self._remain != 0:
2242 raise BadRarFile("Failed the read enough data")
2243 if final != exp:
2244 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2245 self._inf.filename, exp, final))
2247 def _read(self, cnt):
2248 """Actual read that gets sanitized cnt."""
2249 raise NotImplementedError("_read")
2251 def close(self):
2252 """Close open resources."""
2254 super().close()
2256 if self._fd:
2257 self._fd.close()
2258 self._fd = None
2260 def __del__(self):
2261 """Hook delete to make sure tempfile is removed."""
2262 self.close()
2264 def readinto(self, buf):
2265 """Zero-copy read directly into buffer.
2267 Returns bytes read.
2269 raise NotImplementedError("readinto")
2271 def tell(self):
2272 """Return current reading position in uncompressed data."""
2273 return self._inf.file_size - self._remain
2275 def seek(self, offset, whence=0):
2276 """Seek in data.
2278 On uncompressed files, the seeking works by actual
2279 seeks so it's fast. On compresses files its slow
2280 - forward seeking happends by reading ahead,
2281 backwards by re-opening and decompressing from the start.
2284 # disable crc check when seeking
2285 self._md_context = NoHashContext()
2287 fsize = self._inf.file_size
2288 cur_ofs = self.tell()
2290 if whence == 0: # seek from beginning of file
2291 new_ofs = offset
2292 elif whence == 1: # seek from current position
2293 new_ofs = cur_ofs + offset
2294 elif whence == 2: # seek from end of file
2295 new_ofs = fsize + offset
2296 else:
2297 raise ValueError("Invalid value for whence")
2299 # sanity check
2300 if new_ofs < 0:
2301 new_ofs = 0
2302 elif new_ofs > fsize:
2303 new_ofs = fsize
2305 # do the actual seek
2306 if new_ofs >= cur_ofs:
2307 self._skip(new_ofs - cur_ofs)
2308 else:
2309 # reopen and seek
2310 self._open_extfile(self._parser, self._inf)
2311 self._skip(new_ofs)
2312 return self.tell()
2314 def _skip(self, cnt):
2315 """Read and discard data"""
2316 empty_read(self, cnt, BSIZE)
2318 def readable(self):
2319 """Returns True"""
2320 return True
2322 def writable(self):
2323 """Returns False.
2325 Writing is not supported.
2327 return False
2329 def seekable(self):
2330 """Returns True.
2332 Seeking is supported, although it's slow on compressed files.
2334 return True
2336 def readall(self):
2337 """Read all remaining data"""
2338 # avoid RawIOBase default impl
2339 return self.read()
2342 class PipeReader(RarExtFile):
2343 """Read data from pipe, handle tempfile cleanup."""
2345 def __init__(self, parser, inf, cmd, tempfile=None):
2346 super().__init__()
2347 self._cmd = cmd
2348 self._proc = None
2349 self._tempfile = tempfile
2350 self._open_extfile(parser, inf)
2352 def _close_proc(self):
2353 if not self._proc:
2354 return
2355 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2356 if f:
2357 f.close()
2358 self._proc.wait()
2359 self._returncode = self._proc.returncode
2360 self._proc = None
2362 def _open_extfile(self, parser, inf):
2363 super()._open_extfile(parser, inf)
2365 # stop old process
2366 self._close_proc()
2368 # launch new process
2369 self._returncode = 0
2370 self._proc = custom_popen(self._cmd)
2371 self._fd = self._proc.stdout
2373 def _read(self, cnt):
2374 """Read from pipe."""
2376 # normal read is usually enough
2377 data = self._fd.read(cnt)
2378 if len(data) == cnt or not data:
2379 return data
2381 # short read, try looping
2382 buf = [data]
2383 cnt -= len(data)
2384 while cnt > 0:
2385 data = self._fd.read(cnt)
2386 if not data:
2387 break
2388 cnt -= len(data)
2389 buf.append(data)
2390 return b"".join(buf)
2392 def close(self):
2393 """Close open resources."""
2395 self._close_proc()
2396 super().close()
2398 if self._tempfile:
2399 try:
2400 os.unlink(self._tempfile)
2401 except OSError:
2402 pass
2403 self._tempfile = None
2405 def readinto(self, buf):
2406 """Zero-copy read directly into buffer."""
2407 cnt = len(buf)
2408 if cnt > self._remain:
2409 cnt = self._remain
2410 vbuf = memoryview(buf)
2411 res = got = 0
2412 while got < cnt:
2413 res = self._fd.readinto(vbuf[got: cnt])
2414 if not res:
2415 break
2416 self._md_context.update(vbuf[got: got + res])
2417 self._remain -= res
2418 got += res
2419 return got
2422 class DirectReader(RarExtFile):
2423 """Read uncompressed data directly from archive.
2425 _cur = None
2426 _cur_avail = None
2427 _volfile = None
2429 def __init__(self, parser, inf):
2430 super().__init__()
2431 self._open_extfile(parser, inf)
2433 def _open_extfile(self, parser, inf):
2434 super()._open_extfile(parser, inf)
2436 self._volfile = self._inf.volume_file
2437 self._fd = XFile(self._volfile, 0)
2438 self._fd.seek(self._inf.header_offset, 0)
2439 self._cur = self._parser._parse_header(self._fd)
2440 self._cur_avail = self._cur.add_size
2442 def _skip(self, cnt):
2443 """RAR Seek, skipping through rar files to get to correct position
2446 while cnt > 0:
2447 # next vol needed?
2448 if self._cur_avail == 0:
2449 if not self._open_next():
2450 break
2452 # fd is in read pos, do the read
2453 if cnt > self._cur_avail:
2454 cnt -= self._cur_avail
2455 self._remain -= self._cur_avail
2456 self._cur_avail = 0
2457 else:
2458 self._fd.seek(cnt, 1)
2459 self._cur_avail -= cnt
2460 self._remain -= cnt
2461 cnt = 0
2463 def _read(self, cnt):
2464 """Read from potentially multi-volume archive."""
2466 buf = []
2467 while cnt > 0:
2468 # next vol needed?
2469 if self._cur_avail == 0:
2470 if not self._open_next():
2471 break
2473 # fd is in read pos, do the read
2474 if cnt > self._cur_avail:
2475 data = self._fd.read(self._cur_avail)
2476 else:
2477 data = self._fd.read(cnt)
2478 if not data:
2479 break
2481 # got some data
2482 cnt -= len(data)
2483 self._cur_avail -= len(data)
2484 buf.append(data)
2486 if len(buf) == 1:
2487 return buf[0]
2488 return b"".join(buf)
2490 def _open_next(self):
2491 """Proceed to next volume."""
2493 # is the file split over archives?
2494 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2495 return False
2497 if self._fd:
2498 self._fd.close()
2499 self._fd = None
2501 # open next part
2502 self._volfile = self._parser._next_volname(self._volfile)
2503 fd = open(self._volfile, "rb", 0)
2504 self._fd = fd
2505 sig = fd.read(len(self._parser._expect_sig))
2506 if sig != self._parser._expect_sig:
2507 raise BadRarFile("Invalid signature")
2509 # loop until first file header
2510 while True:
2511 cur = self._parser._parse_header(fd)
2512 if not cur:
2513 raise BadRarFile("Unexpected EOF")
2514 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2515 if cur.add_size:
2516 fd.seek(cur.add_size, 1)
2517 continue
2518 if cur.orig_filename != self._inf.orig_filename:
2519 raise BadRarFile("Did not found file entry")
2520 self._cur = cur
2521 self._cur_avail = cur.add_size
2522 return True
2524 def readinto(self, buf):
2525 """Zero-copy read directly into buffer."""
2526 got = 0
2527 vbuf = memoryview(buf)
2528 while got < len(buf):
2529 # next vol needed?
2530 if self._cur_avail == 0:
2531 if not self._open_next():
2532 break
2534 # length for next read
2535 cnt = len(buf) - got
2536 if cnt > self._cur_avail:
2537 cnt = self._cur_avail
2539 # read into temp view
2540 res = self._fd.readinto(vbuf[got: got + cnt])
2541 if not res:
2542 break
2543 self._md_context.update(vbuf[got: got + res])
2544 self._cur_avail -= res
2545 self._remain -= res
2546 got += res
2547 return got
2550 class HeaderDecrypt:
2551 """File-like object that decrypts from another file"""
2552 def __init__(self, f, key, iv):
2553 self.f = f
2554 self.ciph = AES_CBC_Decrypt(key, iv)
2555 self.buf = b""
2557 def tell(self):
2558 """Current file pos - works only on block boundaries."""
2559 return self.f.tell()
2561 def read(self, cnt=None):
2562 """Read and decrypt."""
2563 if cnt > 8 * 1024:
2564 raise BadRarFile("Bad count to header decrypt - wrong password?")
2566 # consume old data
2567 if cnt <= len(self.buf):
2568 res = self.buf[:cnt]
2569 self.buf = self.buf[cnt:]
2570 return res
2571 res = self.buf
2572 self.buf = b""
2573 cnt -= len(res)
2575 # decrypt new data
2576 blklen = 16
2577 while cnt > 0:
2578 enc = self.f.read(blklen)
2579 if len(enc) < blklen:
2580 break
2581 dec = self.ciph.decrypt(enc)
2582 if cnt >= len(dec):
2583 res += dec
2584 cnt -= len(dec)
2585 else:
2586 res += dec[:cnt]
2587 self.buf = dec[cnt:]
2588 cnt = 0
2590 return res
2593 class XFile:
2594 """Input may be filename or file object.
2596 __slots__ = ("_fd", "_need_close")
2598 def __init__(self, xfile, bufsize=1024):
2599 if is_filelike(xfile):
2600 self._need_close = False
2601 self._fd = xfile
2602 self._fd.seek(0)
2603 else:
2604 self._need_close = True
2605 self._fd = open(xfile, "rb", bufsize)
2607 def read(self, n=None):
2608 """Read from file."""
2609 return self._fd.read(n)
2611 def tell(self):
2612 """Return file pos."""
2613 return self._fd.tell()
2615 def seek(self, ofs, whence=0):
2616 """Move file pos."""
2617 return self._fd.seek(ofs, whence)
2619 def readinto(self, buf):
2620 """Read into buffer."""
2621 return self._fd.readinto(buf)
2623 def close(self):
2624 """Close file object."""
2625 if self._need_close:
2626 self._fd.close()
2628 def __enter__(self):
2629 return self
2631 def __exit__(self, typ, val, tb):
2632 self.close()
2635 class NoHashContext:
2636 """No-op hash function."""
2637 def __init__(self, data=None):
2638 """Initialize"""
2639 def update(self, data):
2640 """Update data"""
2641 def digest(self):
2642 """Final hash"""
2643 def hexdigest(self):
2644 """Hexadecimal digest."""
2647 class CRC32Context:
2648 """Hash context that uses CRC32."""
2649 __slots__ = ["_crc"]
2651 def __init__(self, data=None):
2652 self._crc = 0
2653 if data:
2654 self.update(data)
2656 def update(self, data):
2657 """Process data."""
2658 self._crc = crc32(data, self._crc)
2660 def digest(self):
2661 """Final hash."""
2662 return self._crc
2664 def hexdigest(self):
2665 """Hexadecimal digest."""
2666 return "%08x" % self.digest()
2669 class Blake2SP:
2670 """Blake2sp hash context.
2672 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2673 digest_size = 32
2674 block_size = 64
2675 parallelism = 8
2677 def __init__(self, data=None):
2678 self._buf = b""
2679 self._cur = 0
2680 self._digest = None
2681 self._thread = []
2683 for i in range(self.parallelism):
2684 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2685 self._thread.append(ctx)
2687 if data:
2688 self.update(data)
2690 def _blake2s(self, ofs, depth, is_last):
2691 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2692 depth=2, inner_size=32, fanout=self.parallelism)
2694 def _add_block(self, blk):
2695 self._thread[self._cur].update(blk)
2696 self._cur = (self._cur + 1) % self.parallelism
2698 def update(self, data):
2699 """Hash data.
2701 view = memoryview(data)
2702 bs = self.block_size
2703 if self._buf:
2704 need = bs - len(self._buf)
2705 if len(view) < need:
2706 self._buf += view.tobytes()
2707 return
2708 self._add_block(self._buf + view[:need].tobytes())
2709 view = view[need:]
2710 while len(view) >= bs:
2711 self._add_block(view[:bs])
2712 view = view[bs:]
2713 self._buf = view.tobytes()
2715 def digest(self):
2716 """Return final digest value.
2718 if self._digest is None:
2719 if self._buf:
2720 self._add_block(self._buf)
2721 self._buf = b""
2722 ctx = self._blake2s(0, 1, True)
2723 for t in self._thread:
2724 ctx.update(t.digest())
2725 self._digest = ctx.digest()
2726 return self._digest
2728 def hexdigest(self):
2729 """Hexadecimal digest."""
2730 return hexlify(self.digest()).decode("ascii")
2733 class Rar3Sha1:
2734 """Emulate buggy SHA1 from RAR3.
2736 digest_size = 20
2737 block_size = 64
2739 _BLK_BE = struct.Struct(b">16L")
2740 _BLK_LE = struct.Struct(b"<16L")
2742 __slots__ = ("_nbytes", "_md", "_rarbug")
2744 def __init__(self, data=b"", rarbug=False):
2745 self._md = sha1()
2746 self._nbytes = 0
2747 self._rarbug = rarbug
2748 self.update(data)
2750 def update(self, data):
2751 """Process more data."""
2752 self._md.update(data)
2753 bufpos = self._nbytes & 63
2754 self._nbytes += len(data)
2756 if self._rarbug and len(data) > 64:
2757 dpos = self.block_size - bufpos
2758 while dpos + self.block_size <= len(data):
2759 self._corrupt(data, dpos)
2760 dpos += self.block_size
2762 def digest(self):
2763 """Return final state."""
2764 return self._md.digest()
2766 def hexdigest(self):
2767 """Return final state as hex string."""
2768 return self._md.hexdigest()
2770 def _corrupt(self, data, dpos):
2771 """Corruption from SHA1 core."""
2772 ws = list(self._BLK_BE.unpack_from(data, dpos))
2773 for t in range(16, 80):
2774 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2775 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2776 self._BLK_LE.pack_into(data, dpos, *ws)
2780 ## Utility functions
2783 S_LONG = Struct("<L")
2784 S_SHORT = Struct("<H")
2785 S_BYTE = Struct("<B")
2787 S_BLK_HDR = Struct("<HBHH")
2788 S_FILE_HDR = Struct("<LLBLLBBHL")
2789 S_COMMENT_HDR = Struct("<HBBH")
2792 def load_vint(buf, pos):
2793 """Load RAR5 variable-size int."""
2794 limit = min(pos + 11, len(buf))
2795 res = ofs = 0
2796 while pos < limit:
2797 b = buf[pos]
2798 res += ((b & 0x7F) << ofs)
2799 pos += 1
2800 ofs += 7
2801 if b < 0x80:
2802 return res, pos
2803 raise BadRarFile("cannot load vint")
2806 def load_byte(buf, pos):
2807 """Load single byte"""
2808 end = pos + 1
2809 if end > len(buf):
2810 raise BadRarFile("cannot load byte")
2811 return S_BYTE.unpack_from(buf, pos)[0], end
2814 def load_le32(buf, pos):
2815 """Load little-endian 32-bit integer"""
2816 end = pos + 4
2817 if end > len(buf):
2818 raise BadRarFile("cannot load le32")
2819 return S_LONG.unpack_from(buf, pos)[0], end
2822 def load_bytes(buf, num, pos):
2823 """Load sequence of bytes"""
2824 end = pos + num
2825 if end > len(buf):
2826 raise BadRarFile("cannot load bytes")
2827 return buf[pos: end], end
2830 def load_vstr(buf, pos):
2831 """Load bytes prefixed by vint length"""
2832 slen, pos = load_vint(buf, pos)
2833 return load_bytes(buf, slen, pos)
2836 def load_dostime(buf, pos):
2837 """Load LE32 dos timestamp"""
2838 stamp, pos = load_le32(buf, pos)
2839 tup = parse_dos_time(stamp)
2840 return to_datetime(tup), pos
2843 def load_unixtime(buf, pos):
2844 """Load LE32 unix timestamp"""
2845 secs, pos = load_le32(buf, pos)
2846 dt = datetime.fromtimestamp(secs, timezone.utc)
2847 return dt, pos
2850 def load_windowstime(buf, pos):
2851 """Load LE64 windows timestamp"""
2852 # unix epoch (1970) in seconds from windows epoch (1601)
2853 unix_epoch = 11644473600
2854 val1, pos = load_le32(buf, pos)
2855 val2, pos = load_le32(buf, pos)
2856 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2857 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2858 dt = to_nsdatetime(dt, n1secs * 100)
2859 return dt, pos
2863 # volume numbering
2866 _rc_num = re.compile('^[0-9]+$')
2869 def _next_newvol(volfile):
2870 """New-style next volume
2872 name, ext = os.path.splitext(volfile)
2873 if ext.lower() in ("", ".exe", ".sfx"):
2874 volfile = name + ".rar"
2875 i = len(volfile) - 1
2876 while i >= 0:
2877 if "0" <= volfile[i] <= "9":
2878 return _inc_volname(volfile, i, False)
2879 if volfile[i] in ("/", os.sep):
2880 break
2881 i -= 1
2882 raise BadRarName("Cannot construct volume name: " + volfile)
2886 def _next_oldvol(volfile):
2887 """Old-style next volume
2889 name, ext = os.path.splitext(volfile)
2890 if ext.lower() in ("", ".exe", ".sfx"):
2891 ext = ".rar"
2892 sfx = ext[2:]
2893 if _rc_num.match(sfx):
2894 ext = _inc_volname(ext, len(ext) - 1, True)
2895 else:
2896 # .rar -> .r00
2897 ext = ext[:2] + "00"
2898 return name + ext
2901 def _inc_volname(volfile, i, inc_chars):
2902 """increase digits with carry, otherwise just increment char
2904 fn = list(volfile)
2905 while i >= 0:
2906 if fn[i] == "9":
2907 fn[i] = "0"
2908 i -= 1
2909 if i < 0:
2910 fn.insert(0, "1")
2911 elif "0" <= fn[i] < "9" or inc_chars:
2912 fn[i] = chr(ord(fn[i]) + 1)
2913 break
2914 else:
2915 fn.insert(i + 1, "1")
2916 break
2917 return "".join(fn)
2920 def _parse_ext_time(h, data, pos):
2921 """Parse all RAR3 extended time fields
2923 # flags and rest of data can be missing
2924 flags = 0
2925 if pos + 2 <= len(data):
2926 flags = S_SHORT.unpack_from(data, pos)[0]
2927 pos += 2
2929 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2930 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2931 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2932 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2933 if mtime:
2934 h.mtime = mtime
2935 h.date_time = mtime.timetuple()[:6]
2936 return pos
2939 def _parse_xtime(flag, data, pos, basetime=None):
2940 """Parse one RAR3 extended time field
2942 res = None
2943 if flag & 8:
2944 if not basetime:
2945 basetime, pos = load_dostime(data, pos)
2947 # load second fractions of 100ns units
2948 rem = 0
2949 cnt = flag & 3
2950 for _ in range(cnt):
2951 b, pos = load_byte(data, pos)
2952 rem = (b << 16) | (rem >> 8)
2954 # dostime has room for 30 seconds only, correct if needed
2955 if flag & 4 and basetime.second < 59:
2956 basetime = basetime.replace(second=basetime.second + 1)
2958 res = to_nsdatetime(basetime, rem * 100)
2959 return res, pos
2962 def is_filelike(obj):
2963 """Filename or file object?
2965 if isinstance(obj, (bytes, str, Path)):
2966 return False
2967 res = True
2968 for a in ("read", "tell", "seek"):
2969 res = res and hasattr(obj, a)
2970 if not res:
2971 raise ValueError("Invalid object passed as file")
2972 return True
2975 def rar3_s2k(pwd, salt):
2976 """String-to-key hash for RAR3.
2978 if not isinstance(pwd, str):
2979 pwd = pwd.decode("utf8")
2980 seed = bytearray(pwd.encode("utf-16le") + salt)
2981 h = Rar3Sha1(rarbug=True)
2982 iv = b""
2983 for i in range(16):
2984 for j in range(0x4000):
2985 cnt = S_LONG.pack(i * 0x4000 + j)
2986 h.update(seed)
2987 h.update(cnt[:3])
2988 if j == 0:
2989 iv += h.digest()[19:20]
2990 key_be = h.digest()[:16]
2991 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2992 return key_le, iv
2995 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
2996 """Decompress blob of compressed data.
2998 Used for data with non-standard header - eg. comments.
3000 # already uncompressed?
3001 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
3002 return data
3004 # take only necessary flags
3005 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
3006 flags |= RAR_LONG_BLOCK
3008 # file header
3009 fname = b"data"
3010 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
3011 mode = DOS_MODE_ARCHIVE
3012 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
3013 date, vers, meth, len(fname), mode)
3014 fhdr += fname
3015 if salt:
3016 fhdr += salt
3018 # full header
3019 hlen = S_BLK_HDR.size + len(fhdr)
3020 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3021 hcrc = crc32(hdr[2:]) & 0xFFFF
3022 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3024 # archive main header
3025 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3027 # decompress via temp rar
3028 setup = tool_setup()
3029 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3030 tmpf = os.fdopen(tmpfd, "wb")
3031 try:
3032 tmpf.write(RAR_ID + mh + hdr + data)
3033 tmpf.close()
3035 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3036 cmd = setup.open_cmdline(curpwd, tmpname)
3037 p = custom_popen(cmd)
3038 return p.communicate()[0]
3039 finally:
3040 tmpf.close()
3041 os.unlink(tmpname)
3044 def sanitize_filename(fname, pathsep, is_win32):
3045 """Simulate unrar sanitization.
3047 if is_win32:
3048 if len(fname) > 1 and fname[1] == ":":
3049 fname = fname[2:]
3050 rc = RC_BAD_CHARS_WIN32
3051 else:
3052 rc = RC_BAD_CHARS_UNIX
3053 if rc.search(fname):
3054 fname = rc.sub("_", fname)
3056 parts = []
3057 for seg in fname.split("/"):
3058 if seg in ("", ".", ".."):
3059 continue
3060 if is_win32 and seg[-1] in (" ", "."):
3061 seg = seg[:-1] + "_"
3062 parts.append(seg)
3063 return pathsep.join(parts)
3066 def empty_read(src, size, blklen):
3067 """Read and drop fixed amount of data.
3069 while size > 0:
3070 if size > blklen:
3071 res = src.read(blklen)
3072 else:
3073 res = src.read(size)
3074 if not res:
3075 raise BadRarFile("cannot load data")
3076 size -= len(res)
3079 def to_datetime(t):
3080 """Convert 6-part time tuple into datetime object.
3082 # extract values
3083 year, mon, day, h, m, s = t
3085 # assume the values are valid
3086 try:
3087 return datetime(year, mon, day, h, m, s)
3088 except ValueError:
3089 pass
3091 # sanitize invalid values
3092 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3093 mon = max(1, min(mon, 12))
3094 day = max(1, min(day, mday[mon]))
3095 h = min(h, 23)
3096 m = min(m, 59)
3097 s = min(s, 59)
3098 return datetime(year, mon, day, h, m, s)
3101 def parse_dos_time(stamp):
3102 """Parse standard 32-bit DOS timestamp.
3104 sec, stamp = stamp & 0x1F, stamp >> 5
3105 mn, stamp = stamp & 0x3F, stamp >> 6
3106 hr, stamp = stamp & 0x1F, stamp >> 5
3107 day, stamp = stamp & 0x1F, stamp >> 5
3108 mon, stamp = stamp & 0x0F, stamp >> 4
3109 yr = (stamp & 0x7F) + 1980
3110 return (yr, mon, day, hr, mn, sec * 2)
3113 # pylint: disable=arguments-differ,signature-differs
3114 class nsdatetime(datetime):
3115 """Datetime that carries nanoseconds.
3117 Arithmetic not supported, will lose nanoseconds.
3119 .. versionadded:: 4.0
3121 __slots__ = ("nanosecond",)
3122 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3124 def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
3125 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3126 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3127 if mod == 0:
3128 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3129 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3130 self.nanosecond = nanosecond
3131 return self
3133 def isoformat(self, sep="T", timespec="auto"):
3134 """Formats with nanosecond precision by default.
3136 if timespec == "auto":
3137 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3138 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3139 return super().isoformat(sep, timespec)
3141 def astimezone(self, tz=None):
3142 """Convert to new timezone.
3144 tmp = super().astimezone(tz)
3145 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3146 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3148 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3149 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3150 """Return new timestamp with specified fields replaced.
3152 return self.__class__(
3153 self.year if year is None else year,
3154 self.month if month is None else month,
3155 self.day if day is None else day,
3156 self.hour if hour is None else hour,
3157 self.minute if minute is None else minute,
3158 self.second if second is None else second,
3159 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3160 if nanosecond is None else nanosecond),
3161 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3162 fold=self.fold if fold is None else fold)
3164 def __hash__(self):
3165 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3167 def __eq__(self, other):
3168 return super().__eq__(other) and self.nanosecond == (
3169 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3171 def __gt__(self, other):
3172 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3173 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3175 def __lt__(self, other):
3176 return not (self > other or self == other)
3178 def __ge__(self, other):
3179 return not self < other
3181 def __le__(self, other):
3182 return not self > other
3184 def __ne__(self, other):
3185 return not self == other
3188 def to_nsdatetime(dt, nsec):
3189 """Apply nanoseconds to datetime.
3191 if not nsec:
3192 return dt
3193 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3194 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3197 def to_nsecs(dt):
3198 """Convert datatime instance to nanoseconds.
3200 secs = int(dt.timestamp())
3201 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3202 return secs * 1000000000 + nsecs
3205 def custom_popen(cmd):
3206 """Disconnect cmd from parent fds, read only from stdout.
3208 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3209 try:
3210 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3211 creationflags=creationflags)
3212 except OSError as ex:
3213 if ex.errno == errno.ENOENT:
3214 raise RarCannotExec("Unrar not installed?") from None
3215 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3216 raise RarCannotExec("Cannot execute unrar") from None
3217 raise
3218 return p
3221 def check_returncode(code, out, errmap):
3222 """Raise exception according to unrar exit code.
3224 if code == 0:
3225 return
3227 if code > 0 and code < len(errmap):
3228 exc = errmap[code]
3229 elif code == 255:
3230 exc = RarUserBreak
3231 elif code < 0:
3232 exc = RarSignalExit
3233 else:
3234 exc = RarUnknownError
3236 # format message
3237 if out:
3238 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3239 else:
3240 msg = "%s [%d]" % (exc.__doc__, code)
3242 raise exc(msg)
3245 def membuf_tempfile(memfile):
3246 """Write in-memory file object to real file.
3248 memfile.seek(0, 0)
3250 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3251 tmpf = os.fdopen(tmpfd, "wb")
3253 try:
3254 shutil.copyfileobj(memfile, tmpf, BSIZE)
3255 tmpf.close()
3256 except BaseException:
3257 tmpf.close()
3258 os.unlink(tmpname)
3259 raise
3260 return tmpname
3264 # Find working command-line tool
3267 class ToolSetup:
3268 def __init__(self, setup):
3269 self.setup = setup
3271 def check(self):
3272 cmdline = self.get_cmdline("check_cmd", None)
3273 try:
3274 p = custom_popen(cmdline)
3275 out, _ = p.communicate()
3276 return p.returncode == 0
3277 except RarCannotExec:
3278 return False
3280 def open_cmdline(self, pwd, rarfn, filefn=None):
3281 cmdline = self.get_cmdline("open_cmd", pwd)
3282 cmdline.append(rarfn)
3283 if filefn:
3284 self.add_file_arg(cmdline, filefn)
3285 return cmdline
3287 def get_errmap(self):
3288 return self.setup["errmap"]
3290 def get_cmdline(self, key, pwd, nodash=False):
3291 cmdline = list(self.setup[key])
3292 cmdline[0] = globals()[cmdline[0]]
3293 self.add_password_arg(cmdline, pwd)
3294 if not nodash:
3295 cmdline.append("--")
3296 return cmdline
3298 def add_file_arg(self, cmdline, filename):
3299 cmdline.append(filename)
3301 def add_password_arg(self, cmdline, pwd):
3302 """Append password switch to commandline.
3304 if pwd is not None:
3305 if not isinstance(pwd, str):
3306 pwd = pwd.decode("utf8")
3307 args = self.setup["password"]
3308 if args is None:
3309 tool = self.setup["open_cmd"][0]
3310 raise RarCannotExec(f"{tool} does not support passwords")
3311 elif isinstance(args, str):
3312 cmdline.append(args + pwd)
3313 else:
3314 cmdline.extend(args)
3315 cmdline.append(pwd)
3316 else:
3317 cmdline.extend(self.setup["no_password"])
3320 UNRAR_CONFIG = {
3321 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3322 "check_cmd": ("UNRAR_TOOL", "-inul"),
3323 "password": "-p",
3324 "no_password": ("-p-",),
3325 # map return code to exception class, codes from rar.txt
3326 "errmap": [None,
3327 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3328 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3329 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3332 # Problems with unar RAR backend:
3333 # - Does not support RAR2 locked files [fails to read]
3334 # - Does not support RAR5 Blake2sp hash [reading works]
3335 UNAR_CONFIG = {
3336 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3337 "check_cmd": ("UNAR_TOOL", "-version"),
3338 "password": ("-p",),
3339 "no_password": ("-p", ""),
3340 "errmap": [None],
3343 # Problems with libarchive RAR backend:
3344 # - Does not support solid archives.
3345 # - Does not support password-protected archives.
3346 # - Does not support RARVM-based compression filters.
3347 BSDTAR_CONFIG = {
3348 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3349 "check_cmd": ("BSDTAR_TOOL", "--version"),
3350 "password": None,
3351 "no_password": (),
3352 "errmap": [None],
3355 CURRENT_SETUP = None
3358 def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
3359 """Pick a tool, return cached ToolSetup.
3361 global CURRENT_SETUP
3362 if force:
3363 CURRENT_SETUP = None
3364 if CURRENT_SETUP is not None:
3365 return CURRENT_SETUP
3366 lst = []
3367 if unrar:
3368 lst.append(UNRAR_CONFIG)
3369 if unar:
3370 lst.append(UNAR_CONFIG)
3371 if bsdtar:
3372 lst.append(BSDTAR_CONFIG)
3374 for conf in lst:
3375 setup = ToolSetup(conf)
3376 if setup.check():
3377 CURRENT_SETUP = setup
3378 break
3379 if CURRENT_SETUP is None:
3380 raise RarCannotExec("Cannot find working tool")
3381 return CURRENT_SETUP
3384 def main(args):
3385 """Minimal command-line interface for rarfile module.
3387 import argparse
3388 p = argparse.ArgumentParser(description=main.__doc__)
3389 g = p.add_mutually_exclusive_group(required=True)
3390 g.add_argument("-l", "--list", metavar="<rarfile>",
3391 help="Show archive listing")
3392 g.add_argument("-e", "--extract", nargs=2,
3393 metavar=("<rarfile>", "<output_dir>"),
3394 help="Extract archive into target dir")
3395 g.add_argument("-t", "--test", metavar="<rarfile>",
3396 help="Test if a archive is valid")
3397 cmd = p.parse_args(args)
3399 if cmd.list:
3400 with RarFile(cmd.list) as rf:
3401 rf.printdir()
3402 elif cmd.test:
3403 with RarFile(cmd.test) as rf:
3404 rf.testrar()
3405 elif cmd.extract:
3406 with RarFile(cmd.extract[0]) as rf:
3407 rf.extractall(cmd.extract[1])
3410 if __name__ == "__main__":
3411 main(sys.argv[1:])