Minor load_le32 cleanup
[rarfile.git] / rarfile.py
blob623547c52093f415597eadcc1dbc3cea5596e114
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 # only needed for encrypted headers
69 try:
70 try:
71 from cryptography.hazmat.backends import default_backend
72 from cryptography.hazmat.primitives.ciphers import (
73 Cipher, algorithms, modes,
75 _have_crypto = 1
76 except ImportError:
77 from Crypto.Cipher import AES
78 _have_crypto = 2
79 except ImportError:
80 _have_crypto = 0
83 class AES_CBC_Decrypt:
84 """Decrypt API"""
85 def __init__(self, key, iv):
86 if _have_crypto == 2:
87 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
88 else:
89 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
90 self.decrypt = ciph.decryptor().update
93 __version__ = "4.1a1"
95 # export only interesting items
96 __all__ = ["get_rar_version", "is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
99 ## Module configuration. Can be tuned after importing.
102 #: executable for unrar tool
103 UNRAR_TOOL = "unrar"
105 #: executable for unar tool
106 UNAR_TOOL = "unar"
108 #: executable for bsdtar tool
109 BSDTAR_TOOL = "bsdtar"
111 #: default fallback charset
112 DEFAULT_CHARSET = "windows-1252"
114 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
115 TRY_ENCODINGS = ("utf8", "utf-16le")
117 #: whether to speed up decompression by using tmp archive
118 USE_EXTRACT_HACK = 1
120 #: limit the filesize for tmp archive usage
121 HACK_SIZE_LIMIT = 20 * 1024 * 1024
123 #: set specific directory for mkstemp() used by hack dir usage
124 HACK_TMP_DIR = None
126 #: Separator for path name components. Always "/".
127 PATH_SEP = "/"
130 ## rar constants
133 # block types
134 RAR_BLOCK_MARK = 0x72 # r
135 RAR_BLOCK_MAIN = 0x73 # s
136 RAR_BLOCK_FILE = 0x74 # t
137 RAR_BLOCK_OLD_COMMENT = 0x75 # u
138 RAR_BLOCK_OLD_EXTRA = 0x76 # v
139 RAR_BLOCK_OLD_SUB = 0x77 # w
140 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
141 RAR_BLOCK_OLD_AUTH = 0x79 # y
142 RAR_BLOCK_SUB = 0x7a # z
143 RAR_BLOCK_ENDARC = 0x7b # {
145 # flags for RAR_BLOCK_MAIN
146 RAR_MAIN_VOLUME = 0x0001
147 RAR_MAIN_COMMENT = 0x0002
148 RAR_MAIN_LOCK = 0x0004
149 RAR_MAIN_SOLID = 0x0008
150 RAR_MAIN_NEWNUMBERING = 0x0010
151 RAR_MAIN_AUTH = 0x0020
152 RAR_MAIN_RECOVERY = 0x0040
153 RAR_MAIN_PASSWORD = 0x0080
154 RAR_MAIN_FIRSTVOLUME = 0x0100
155 RAR_MAIN_ENCRYPTVER = 0x0200
157 # flags for RAR_BLOCK_FILE
158 RAR_FILE_SPLIT_BEFORE = 0x0001
159 RAR_FILE_SPLIT_AFTER = 0x0002
160 RAR_FILE_PASSWORD = 0x0004
161 RAR_FILE_COMMENT = 0x0008
162 RAR_FILE_SOLID = 0x0010
163 RAR_FILE_DICTMASK = 0x00e0
164 RAR_FILE_DICT64 = 0x0000
165 RAR_FILE_DICT128 = 0x0020
166 RAR_FILE_DICT256 = 0x0040
167 RAR_FILE_DICT512 = 0x0060
168 RAR_FILE_DICT1024 = 0x0080
169 RAR_FILE_DICT2048 = 0x00a0
170 RAR_FILE_DICT4096 = 0x00c0
171 RAR_FILE_DIRECTORY = 0x00e0
172 RAR_FILE_LARGE = 0x0100
173 RAR_FILE_UNICODE = 0x0200
174 RAR_FILE_SALT = 0x0400
175 RAR_FILE_VERSION = 0x0800
176 RAR_FILE_EXTTIME = 0x1000
177 RAR_FILE_EXTFLAGS = 0x2000
179 # flags for RAR_BLOCK_ENDARC
180 RAR_ENDARC_NEXT_VOLUME = 0x0001
181 RAR_ENDARC_DATACRC = 0x0002
182 RAR_ENDARC_REVSPACE = 0x0004
183 RAR_ENDARC_VOLNR = 0x0008
185 # flags common to all blocks
186 RAR_SKIP_IF_UNKNOWN = 0x4000
187 RAR_LONG_BLOCK = 0x8000
189 # Host OS types
190 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
191 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
192 RAR_OS_WIN32 = 2 #: Windows
193 RAR_OS_UNIX = 3 #: UNIX
194 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
195 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
197 # Compression methods - "0".."5"
198 RAR_M0 = 0x30 #: No compression.
199 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
200 RAR_M2 = 0x32 #: Compression level `-m2`.
201 RAR_M3 = 0x33 #: Compression level `-m3`.
202 RAR_M4 = 0x34 #: Compression level `-m4`.
203 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
206 # RAR5 constants
209 RAR5_BLOCK_MAIN = 1
210 RAR5_BLOCK_FILE = 2
211 RAR5_BLOCK_SERVICE = 3
212 RAR5_BLOCK_ENCRYPTION = 4
213 RAR5_BLOCK_ENDARC = 5
215 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
216 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
217 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
218 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
219 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
220 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
221 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
223 RAR5_MAIN_FLAG_ISVOL = 0x01
224 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
225 RAR5_MAIN_FLAG_SOLID = 0x04
226 RAR5_MAIN_FLAG_RECOVERY = 0x08
227 RAR5_MAIN_FLAG_LOCKED = 0x10
229 RAR5_FILE_FLAG_ISDIR = 0x01
230 RAR5_FILE_FLAG_HAS_MTIME = 0x02
231 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
232 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
234 RAR5_COMPR_SOLID = 0x40
236 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
238 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
240 RAR5_XFILE_ENCRYPTION = 1
241 RAR5_XFILE_HASH = 2
242 RAR5_XFILE_TIME = 3
243 RAR5_XFILE_VERSION = 4
244 RAR5_XFILE_REDIR = 5
245 RAR5_XFILE_OWNER = 6
246 RAR5_XFILE_SERVICE = 7
248 RAR5_XTIME_UNIXTIME = 0x01
249 RAR5_XTIME_HAS_MTIME = 0x02
250 RAR5_XTIME_HAS_CTIME = 0x04
251 RAR5_XTIME_HAS_ATIME = 0x08
252 RAR5_XTIME_UNIXTIME_NS = 0x10
254 RAR5_XENC_CIPHER_AES256 = 0
256 RAR5_XENC_CHECKVAL = 0x01
257 RAR5_XENC_TWEAKED = 0x02
259 RAR5_XHASH_BLAKE2SP = 0
261 RAR5_XREDIR_UNIX_SYMLINK = 1
262 RAR5_XREDIR_WINDOWS_SYMLINK = 2
263 RAR5_XREDIR_WINDOWS_JUNCTION = 3
264 RAR5_XREDIR_HARD_LINK = 4
265 RAR5_XREDIR_FILE_COPY = 5
267 RAR5_XREDIR_ISDIR = 0x01
269 RAR5_XOWNER_UNAME = 0x01
270 RAR5_XOWNER_GNAME = 0x02
271 RAR5_XOWNER_UID = 0x04
272 RAR5_XOWNER_GID = 0x08
274 RAR5_OS_WINDOWS = 0
275 RAR5_OS_UNIX = 1
277 DOS_MODE_ARCHIVE = 0x20
278 DOS_MODE_DIR = 0x10
279 DOS_MODE_SYSTEM = 0x04
280 DOS_MODE_HIDDEN = 0x02
281 DOS_MODE_READONLY = 0x01
284 ## internal constants
287 RAR_ID = b"Rar!\x1a\x07\x00"
288 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
290 WIN32 = sys.platform == "win32"
291 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
293 SFX_MAX_SIZE = 2 * 1024 * 1024
294 RAR_V3 = 3
295 RAR_V5 = 5
297 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
298 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
299 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
302 def _find_sfx_header(xfile):
303 sig = RAR_ID[:-1]
304 buf = io.BytesIO()
305 steps = (64, SFX_MAX_SIZE)
307 with XFile(xfile) as fd:
308 for step in steps:
309 data = fd.read(step)
310 if not data:
311 break
312 buf.write(data)
313 curdata = buf.getvalue()
314 findpos = 0
315 while True:
316 pos = curdata.find(sig, findpos)
317 if pos < 0:
318 break
319 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
320 return RAR_V3, pos
321 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
322 return RAR_V5, pos
323 findpos = pos + len(sig)
324 return 0, 0
328 ## Public interface
332 def get_rar_version(xfile):
333 """Check quickly whether file is rar archive.
335 with XFile(xfile) as fd:
336 buf = fd.read(len(RAR5_ID))
337 if buf.startswith(RAR_ID):
338 return RAR_V3
339 elif buf.startswith(RAR5_ID):
340 return RAR_V5
341 return 0
344 def is_rarfile(xfile):
345 """Check quickly whether file is rar archive.
347 try:
348 return get_rar_version(xfile) > 0
349 except OSError:
350 # File not found or not accessible, ignore
351 return False
354 def is_rarfile_sfx(xfile):
355 """Check whether file is rar archive with support for SFX.
357 It will read 2M from file.
359 return _find_sfx_header(xfile)[0] > 0
362 class Error(Exception):
363 """Base class for rarfile errors."""
366 class BadRarFile(Error):
367 """Incorrect data in archive."""
370 class NotRarFile(Error):
371 """The file is not RAR archive."""
374 class BadRarName(Error):
375 """Cannot guess multipart name components."""
378 class NoRarEntry(Error):
379 """File not found in RAR"""
382 class PasswordRequired(Error):
383 """File requires password"""
386 class NeedFirstVolume(Error):
387 """Need to start from first volume.
389 Attributes:
391 current_volume
392 Volume number of current file or None if not known
394 def __init__(self, msg, volume):
395 super().__init__(msg)
396 self.current_volume = volume
399 class NoCrypto(Error):
400 """Cannot parse encrypted headers - no crypto available."""
403 class RarExecError(Error):
404 """Problem reported by unrar/rar."""
407 class RarWarning(RarExecError):
408 """Non-fatal error"""
411 class RarFatalError(RarExecError):
412 """Fatal error"""
415 class RarCRCError(RarExecError):
416 """CRC error during unpacking"""
419 class RarLockedArchiveError(RarExecError):
420 """Must not modify locked archive"""
423 class RarWriteError(RarExecError):
424 """Write error"""
427 class RarOpenError(RarExecError):
428 """Open error"""
431 class RarUserError(RarExecError):
432 """User error"""
435 class RarMemoryError(RarExecError):
436 """Memory error"""
439 class RarCreateError(RarExecError):
440 """Create error"""
443 class RarNoFilesError(RarExecError):
444 """No files that match pattern were found"""
447 class RarUserBreak(RarExecError):
448 """User stop"""
451 class RarWrongPassword(RarExecError):
452 """Incorrect password"""
455 class RarUnknownError(RarExecError):
456 """Unknown exit code"""
459 class RarSignalExit(RarExecError):
460 """Unrar exited with signal"""
463 class RarCannotExec(RarExecError):
464 """Executable not found."""
467 class UnsupportedWarning(UserWarning):
468 """Archive uses feature that are unsupported by rarfile.
470 .. versionadded:: 4.0
474 class RarInfo:
475 r"""An entry in rar archive.
477 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
478 with UTC timezone in RAR5 archives.
480 Attributes:
482 filename
483 File name with relative path.
484 Path separator is "/". Always unicode string.
486 date_time
487 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
488 RAR5 allows archives where it is missing, it's None then.
490 comment
491 Optional file comment field. Unicode string. (RAR3-only)
493 file_size
494 Uncompressed size.
496 compress_size
497 Compressed size.
499 compress_type
500 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
502 extract_version
503 Minimal Rar version needed for decompressing. As (major*10 + minor),
504 so 2.9 is 29.
506 RAR3: 10, 20, 29
508 RAR5 does not have such field in archive, it's simply set to 50.
510 host_os
511 Host OS type, one of RAR_OS_* constants.
513 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
514 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
516 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
518 mode
519 File attributes. May be either dos-style or unix-style, depending on host_os.
521 mtime
522 File modification time. Same value as :attr:`date_time`
523 but as :class:`~datetime.datetime` object with extended precision.
525 ctime
526 Optional time field: creation time. As :class:`~datetime.datetime` object.
528 atime
529 Optional time field: last access time. As :class:`~datetime.datetime` object.
531 arctime
532 Optional time field: archival time. As :class:`~datetime.datetime` object.
533 (RAR3-only)
536 CRC-32 of uncompressed file, unsigned int.
538 RAR5: may be None.
540 blake2sp_hash
541 Blake2SP hash over decompressed data. (RAR5-only)
543 volume
544 Volume nr, starting from 0.
546 volume_file
547 Volume file name, where file starts.
549 file_redir
550 If not None, file is link of some sort. Contains tuple of (type, flags, target).
551 (RAR5-only)
553 Type is one of constants:
555 :data:`RAR5_XREDIR_UNIX_SYMLINK`
556 Unix symlink.
557 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
558 Windows symlink.
559 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
560 Windows junction.
561 :data:`RAR5_XREDIR_HARD_LINK`
562 Hard link to target.
563 :data:`RAR5_XREDIR_FILE_COPY`
564 Current file is copy of another archive entry.
566 Flags may contain bits:
568 :data:`RAR5_XREDIR_ISDIR`
569 Symlink points to directory.
572 # zipfile-compatible fields
573 filename = None
574 file_size = None
575 compress_size = None
576 date_time = None
577 CRC = None
578 volume = None
579 orig_filename = None
581 # optional extended time fields, datetime() objects.
582 mtime = None
583 ctime = None
584 atime = None
586 extract_version = None
587 mode = None
588 host_os = None
589 compress_type = None
591 # rar3-only fields
592 comment = None
593 arctime = None
595 # rar5-only fields
596 blake2sp_hash = None
597 file_redir = None
599 # internal fields
600 flags = 0
601 type = None
603 # zipfile compat
604 def is_dir(self):
605 """Returns True if entry is a directory.
607 .. versionadded:: 4.0
609 return False
611 def is_symlink(self):
612 """Returns True if entry is a symlink.
614 .. versionadded:: 4.0
616 return False
618 def is_file(self):
619 """Returns True if entry is a normal file.
621 .. versionadded:: 4.0
623 return False
625 def needs_password(self):
626 """Returns True if data is stored password-protected.
628 if self.type == RAR_BLOCK_FILE:
629 return (self.flags & RAR_FILE_PASSWORD) > 0
630 return False
632 def isdir(self):
633 """Returns True if entry is a directory.
635 .. deprecated:: 4.0
637 return self.is_dir()
640 class RarFile:
641 """Parse RAR structure, provide access to files in archive.
644 #: File name, if available. Unicode string or None.
645 filename = None
647 #: Archive comment. Unicode string or None.
648 comment = None
650 def __init__(self, file, mode="r", charset=None, info_callback=None,
651 crc_check=True, errors="stop"):
652 """Open and parse a RAR archive.
654 Parameters:
656 file
657 archive file name or file-like object.
658 mode
659 only "r" is supported.
660 charset
661 fallback charset to use, if filenames are not already Unicode-enabled.
662 info_callback
663 debug callback, gets to see all archive entries.
664 crc_check
665 set to False to disable CRC checks
666 errors
667 Either "stop" to quietly stop parsing on errors,
668 or "strict" to raise errors. Default is "stop".
670 if is_filelike(file):
671 self.filename = getattr(file, "name", None)
672 else:
673 if isinstance(file, Path):
674 file = str(file)
675 self.filename = file
676 self._rarfile = file
678 self._charset = charset or DEFAULT_CHARSET
679 self._info_callback = info_callback
680 self._crc_check = crc_check
681 self._password = None
682 self._file_parser = None
684 if errors == "stop":
685 self._strict = False
686 elif errors == "strict":
687 self._strict = True
688 else:
689 raise ValueError("Invalid value for errors= parameter.")
691 if mode != "r":
692 raise NotImplementedError("RarFile supports only mode=r")
694 self._parse()
696 def __enter__(self):
697 """Open context."""
698 return self
700 def __exit__(self, typ, value, traceback):
701 """Exit context."""
702 self.close()
704 def __iter__(self):
705 """Iterate over members."""
706 return iter(self.infolist())
708 def setpassword(self, pwd):
709 """Sets the password to use when extracting.
711 self._password = pwd
712 if self._file_parser:
713 if self._file_parser.has_header_encryption():
714 self._file_parser = None
715 if not self._file_parser:
716 self._parse()
717 else:
718 self._file_parser.setpassword(self._password)
720 def needs_password(self):
721 """Returns True if any archive entries require password for extraction.
723 return self._file_parser.needs_password()
725 def namelist(self):
726 """Return list of filenames in archive.
728 return [f.filename for f in self.infolist()]
730 def infolist(self):
731 """Return RarInfo objects for all files/directories in archive.
733 return self._file_parser.infolist()
735 def volumelist(self):
736 """Returns filenames of archive volumes.
738 In case of single-volume archive, the list contains
739 just the name of main archive file.
741 return self._file_parser.volumelist()
743 def getinfo(self, name):
744 """Return RarInfo for file.
746 return self._file_parser.getinfo(name)
748 def open(self, name, mode="r", pwd=None):
749 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
751 The object implements :class:`io.RawIOBase` interface, so it can
752 be further wrapped with :class:`io.BufferedReader`
753 and :class:`io.TextIOWrapper`.
755 On older Python where io module is not available, it implements
756 only .read(), .seek(), .tell() and .close() methods.
758 The object is seekable, although the seeking is fast only on
759 uncompressed files, on compressed files the seeking is implemented
760 by reading ahead and/or restarting the decompression.
762 Parameters:
764 name
765 file name or RarInfo instance.
766 mode
767 must be "r"
769 password to use for extracting.
772 if mode != "r":
773 raise NotImplementedError("RarFile.open() supports only mode=r")
775 # entry lookup
776 inf = self.getinfo(name)
777 if inf.is_dir():
778 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
780 # check password
781 if inf.needs_password():
782 pwd = pwd or self._password
783 if pwd is None:
784 raise PasswordRequired("File %s requires password" % inf.filename)
785 else:
786 pwd = None
788 return self._file_parser.open(inf, pwd)
790 def read(self, name, pwd=None):
791 """Return uncompressed data for archive entry.
793 For longer files using :meth:`~RarFile.open` may be better idea.
795 Parameters:
797 name
798 filename or RarInfo instance
800 password to use for extracting.
803 with self.open(name, "r", pwd) as f:
804 return f.read()
806 def close(self):
807 """Release open resources."""
808 pass
810 def printdir(self, file=None):
811 """Print archive file list to stdout or given file.
813 if file is None:
814 file = sys.stdout
815 for f in self.infolist():
816 print(f.filename, file=file)
818 def extract(self, member, path=None, pwd=None):
819 """Extract single file into current directory.
821 Parameters:
823 member
824 filename or :class:`RarInfo` instance
825 path
826 optional destination path
828 optional password to use
830 inf = self.getinfo(member)
831 return self._extract_one(inf, path, pwd, True)
833 def extractall(self, path=None, members=None, pwd=None):
834 """Extract all files into current directory.
836 Parameters:
838 path
839 optional destination path
840 members
841 optional filename or :class:`RarInfo` instance list to extract
843 optional password to use
845 if members is None:
846 members = self.namelist()
848 done = set()
849 dirs = []
850 for m in members:
851 inf = self.getinfo(m)
852 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
853 if inf.is_dir():
854 if dst not in done:
855 dirs.append((dst, inf))
856 done.add(dst)
857 if dirs:
858 dirs.sort(reverse=True)
859 for dst, inf in dirs:
860 self._set_attrs(inf, dst)
862 def testrar(self, pwd=None):
863 """Read all files and test CRC.
865 for member in self.infolist():
866 if member.is_file():
867 with self.open(member, 'r', pwd) as f:
868 empty_read(f, member.file_size, BSIZE)
870 def strerror(self):
871 """Return error string if parsing failed or None if no problems.
873 if not self._file_parser:
874 return "Not a RAR file"
875 return self._file_parser.strerror()
878 ## private methods
881 def _parse(self):
882 """Run parser for file type
884 ver, sfx_ofs = _find_sfx_header(self._rarfile)
885 if ver == RAR_V3:
886 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
887 self._charset, self._strict, self._info_callback,
888 sfx_ofs)
889 self._file_parser = p3 # noqa
890 elif ver == RAR_V5:
891 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
892 self._charset, self._strict, self._info_callback,
893 sfx_ofs)
894 self._file_parser = p5 # noqa
895 else:
896 raise NotRarFile("Not a RAR file")
898 self._file_parser.parse()
899 self.comment = self._file_parser.comment
901 def _extract_one(self, info, path, pwd, set_attrs):
902 fname = sanitize_filename(
903 info.filename, os.path.sep, WIN32
906 if path is None:
907 path = os.getcwd()
908 else:
909 path = os.fspath(path)
910 dstfn = os.path.join(path, fname)
912 dirname = os.path.dirname(dstfn)
913 if dirname and dirname != ".":
914 os.makedirs(dirname, exist_ok=True)
916 if info.is_file():
917 return self._make_file(info, dstfn, pwd, set_attrs)
918 if info.is_dir():
919 return self._make_dir(info, dstfn, pwd, set_attrs)
920 if info.is_symlink():
921 return self._make_symlink(info, dstfn, pwd, set_attrs)
922 return None
924 def _create_helper(self, name, flags, info):
925 return os.open(name, flags)
927 def _make_file(self, info, dstfn, pwd, set_attrs):
928 def helper(name, flags):
929 return self._create_helper(name, flags, info)
930 with self.open(info, "r", pwd) as src:
931 with open(dstfn, "wb", opener=helper) as dst:
932 shutil.copyfileobj(src, dst)
933 if set_attrs:
934 self._set_attrs(info, dstfn)
935 return dstfn
937 def _make_dir(self, info, dstfn, pwd, set_attrs):
938 os.makedirs(dstfn, exist_ok=True)
939 if set_attrs:
940 self._set_attrs(info, dstfn)
941 return dstfn
943 def _make_symlink(self, info, dstfn, pwd, set_attrs):
944 target_is_directory = False
945 if info.host_os == RAR_OS_UNIX:
946 link_name = self.read(info, pwd)
947 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
948 elif info.file_redir:
949 redir_type, redir_flags, link_name = info.file_redir
950 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
951 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
952 return None
953 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
954 else:
955 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
956 return None
958 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
959 return dstfn
961 def _set_attrs(self, info, dstfn):
962 if info.host_os == RAR_OS_UNIX:
963 os.chmod(dstfn, info.mode & 0o777)
964 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
965 # only keep R/O attr, except for dirs on win32
966 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
967 st = os.stat(dstfn)
968 new_mode = st.st_mode & ~0o222
969 os.chmod(dstfn, new_mode)
971 if info.mtime:
972 mtime_ns = to_nsecs(info.mtime)
973 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
974 os.utime(dstfn, ns=(atime_ns, mtime_ns))
978 # File format parsing
981 class CommonParser:
982 """Shared parser parts."""
983 _main = None
984 _hdrenc_main = None
985 _needs_password = False
986 _fd = None
987 _expect_sig = None
988 _parse_error = None
989 _password = None
990 comment = None
992 def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset):
993 self._rarfile = rarfile
994 self._password = password
995 self._crc_check = crc_check
996 self._charset = charset
997 self._strict = strict
998 self._info_callback = info_cb
999 self._info_list = []
1000 self._info_map = {}
1001 self._vol_list = []
1002 self._sfx_offset = sfx_offset
1004 def has_header_encryption(self):
1005 """Returns True if headers are encrypted
1007 if self._hdrenc_main:
1008 return True
1009 if self._main:
1010 if self._main.flags & RAR_MAIN_PASSWORD:
1011 return True
1012 return False
1014 def setpassword(self, pwd):
1015 """Set cached password."""
1016 self._password = pwd
1018 def volumelist(self):
1019 """Volume files"""
1020 return self._vol_list
1022 def needs_password(self):
1023 """Is password required"""
1024 return self._needs_password
1026 def strerror(self):
1027 """Last error"""
1028 return self._parse_error
1030 def infolist(self):
1031 """List of RarInfo records.
1033 return self._info_list
1035 def getinfo(self, member):
1036 """Return RarInfo for filename
1038 if isinstance(member, RarInfo):
1039 fname = member.filename
1040 elif isinstance(member, Path):
1041 fname = str(member)
1042 else:
1043 fname = member
1045 if fname.endswith("/"):
1046 fname = fname.rstrip("/")
1048 try:
1049 return self._info_map[fname]
1050 except KeyError:
1051 raise NoRarEntry("No such file: %s" % fname) from None
1053 def parse(self):
1054 """Process file."""
1055 self._fd = None
1056 try:
1057 self._parse_real()
1058 finally:
1059 if self._fd:
1060 self._fd.close()
1061 self._fd = None
1063 def _parse_real(self):
1064 """Actually read file.
1066 fd = XFile(self._rarfile)
1067 self._fd = fd
1068 fd.seek(self._sfx_offset, 0)
1069 sig = fd.read(len(self._expect_sig))
1070 if sig != self._expect_sig:
1071 raise NotRarFile("Not a Rar archive")
1073 volume = 0 # first vol (.rar) is 0
1074 more_vols = False
1075 endarc = False
1076 volfile = self._rarfile
1077 self._vol_list = [self._rarfile]
1078 raise_need_first_vol = False
1079 while True:
1080 if endarc:
1081 h = None # don"t read past ENDARC
1082 else:
1083 h = self._parse_header(fd)
1084 if not h:
1085 if raise_need_first_vol:
1086 # did not find ENDARC with VOLNR
1087 raise NeedFirstVolume("Need to start from first volume", None)
1088 if more_vols:
1089 volume += 1
1090 fd.close()
1091 try:
1092 volfile = self._next_volname(volfile)
1093 fd = XFile(volfile)
1094 except IOError:
1095 self._set_error("Cannot open next volume: %s", volfile)
1096 break
1097 self._fd = fd
1098 sig = fd.read(len(self._expect_sig))
1099 if sig != self._expect_sig:
1100 self._set_error("Invalid volume sig: %s", volfile)
1101 break
1102 more_vols = False
1103 endarc = False
1104 self._vol_list.append(volfile)
1105 self._main = None
1106 continue
1107 break
1108 h.volume = volume
1109 h.volume_file = volfile
1111 if h.type == RAR_BLOCK_MAIN and not self._main:
1112 self._main = h
1113 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING):
1114 # RAR 2.x does not set FIRSTVOLUME,
1115 # so check it only if NEWNUMBERING is used
1116 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1117 if getattr(h, "main_volume_number", None) is not None:
1118 # rar5 may have more info
1119 raise NeedFirstVolume(
1120 "Need to start from first volume (current: %r)"
1121 % (h.main_volume_number,),
1122 h.main_volume_number
1124 # delay raise until we have volnr from ENDARC
1125 raise_need_first_vol = True
1126 if h.flags & RAR_MAIN_PASSWORD:
1127 self._needs_password = True
1128 if not self._password:
1129 break
1130 elif h.type == RAR_BLOCK_ENDARC:
1131 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1132 endarc = True
1133 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1134 raise NeedFirstVolume(
1135 "Need to start from first volume (current: %r)"
1136 % (h.endarc_volnr,),
1137 h.endarc_volnr
1139 elif h.type == RAR_BLOCK_FILE:
1140 # RAR 2.x does not write RAR_BLOCK_ENDARC
1141 if h.flags & RAR_FILE_SPLIT_AFTER:
1142 more_vols = True
1143 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1144 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1145 raise_need_first_vol = True
1147 if h.needs_password():
1148 self._needs_password = True
1150 # store it
1151 self.process_entry(fd, h)
1153 if self._info_callback:
1154 self._info_callback(h)
1156 # go to next header
1157 if h.add_size > 0:
1158 fd.seek(h.data_offset + h.add_size, 0)
1160 def process_entry(self, fd, item):
1161 """Examine item, add into lookup cache."""
1162 raise NotImplementedError()
1164 def _decrypt_header(self, fd):
1165 raise NotImplementedError("_decrypt_header")
1167 def _parse_block_header(self, fd):
1168 raise NotImplementedError("_parse_block_header")
1170 def _open_hack(self, inf, pwd):
1171 raise NotImplementedError("_open_hack")
1173 def _parse_header(self, fd):
1174 """Read single header
1176 try:
1177 # handle encrypted headers
1178 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1179 if not self._password:
1180 return None
1181 fd = self._decrypt_header(fd)
1183 # now read actual header
1184 return self._parse_block_header(fd)
1185 except struct.error:
1186 self._set_error("Broken header in RAR file")
1187 return None
1189 def _next_volname(self, volfile):
1190 """Given current vol name, construct next one
1192 if is_filelike(volfile):
1193 raise IOError("Working on single FD")
1194 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1195 return _next_newvol(volfile)
1196 return _next_oldvol(volfile)
1198 def _set_error(self, msg, *args):
1199 if args:
1200 msg = msg % args
1201 self._parse_error = msg
1202 if self._strict:
1203 raise BadRarFile(msg)
1205 def open(self, inf, pwd):
1206 """Return stream object for file data."""
1208 if inf.file_redir:
1209 redir_type, redir_flags, redir_name = inf.file_redir
1210 # cannot leave to unrar as it expects copied file to exist
1211 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1212 inf = self.getinfo(redir_name)
1213 if not inf:
1214 raise BadRarFile("cannot find copied file")
1215 elif redir_type in (
1216 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1217 RAR5_XREDIR_WINDOWS_JUNCTION,
1219 return io.BytesIO(redir_name.encode("utf8"))
1220 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1221 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1223 # is temp write usable?
1224 use_hack = 1
1225 if not self._main:
1226 use_hack = 0
1227 elif self._main._must_disable_hack():
1228 use_hack = 0
1229 elif inf._must_disable_hack():
1230 use_hack = 0
1231 elif is_filelike(self._rarfile):
1232 pass
1233 elif inf.file_size > HACK_SIZE_LIMIT:
1234 use_hack = 0
1235 elif not USE_EXTRACT_HACK:
1236 use_hack = 0
1238 # now extract
1239 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1240 return self._open_clear(inf)
1241 elif use_hack:
1242 return self._open_hack(inf, pwd)
1243 elif is_filelike(self._rarfile):
1244 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1245 else:
1246 return self._open_unrar(self._rarfile, inf, pwd)
1248 def _open_clear(self, inf):
1249 return DirectReader(self, inf)
1251 def _open_hack_core(self, inf, pwd, prefix, suffix):
1253 size = inf.compress_size + inf.header_size
1254 rf = XFile(inf.volume_file, 0)
1255 rf.seek(inf.header_offset)
1257 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1258 tmpf = os.fdopen(tmpfd, "wb")
1260 try:
1261 tmpf.write(prefix)
1262 while size > 0:
1263 if size > BSIZE:
1264 buf = rf.read(BSIZE)
1265 else:
1266 buf = rf.read(size)
1267 if not buf:
1268 raise BadRarFile("read failed: " + inf.filename)
1269 tmpf.write(buf)
1270 size -= len(buf)
1271 tmpf.write(suffix)
1272 tmpf.close()
1273 rf.close()
1274 except BaseException:
1275 rf.close()
1276 tmpf.close()
1277 os.unlink(tmpname)
1278 raise
1280 return self._open_unrar(tmpname, inf, pwd, tmpname)
1282 def _open_unrar_membuf(self, memfile, inf, pwd):
1283 """Write in-memory archive to temp file, needed for solid archives.
1285 tmpname = membuf_tempfile(memfile)
1286 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1288 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1289 """Extract using unrar
1291 setup = tool_setup()
1293 # not giving filename avoids encoding related problems
1294 fn = None
1295 if not tmpfile or force_file:
1296 fn = inf.filename
1298 # read from unrar pipe
1299 cmd = setup.open_cmdline(pwd, rarfile, fn)
1300 return PipeReader(self, inf, cmd, tmpfile)
1304 # RAR3 format
1307 class Rar3Info(RarInfo):
1308 """RAR3 specific fields."""
1309 extract_version = 15
1310 salt = None
1311 add_size = 0
1312 header_crc = None
1313 header_size = None
1314 header_offset = None
1315 data_offset = None
1316 _md_class = None
1317 _md_expect = None
1318 _name_size = None
1320 # make sure some rar5 fields are always present
1321 file_redir = None
1322 blake2sp_hash = None
1324 endarc_datacrc = None
1325 endarc_volnr = None
1327 def _must_disable_hack(self):
1328 if self.type == RAR_BLOCK_FILE:
1329 if self.flags & RAR_FILE_PASSWORD:
1330 return True
1331 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1332 return True
1333 elif self.type == RAR_BLOCK_MAIN:
1334 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1335 return True
1336 return False
1338 def is_dir(self):
1339 """Returns True if entry is a directory."""
1340 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1341 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1342 return False
1344 def is_symlink(self):
1345 """Returns True if entry is a symlink."""
1346 return (
1347 self.type == RAR_BLOCK_FILE and
1348 self.host_os == RAR_OS_UNIX and
1349 self.mode & 0xF000 == 0xA000
1352 def is_file(self):
1353 """Returns True if entry is a normal file."""
1354 return (
1355 self.type == RAR_BLOCK_FILE and
1356 not (self.is_dir() or self.is_symlink())
1360 class RAR3Parser(CommonParser):
1361 """Parse RAR3 file format.
1363 _expect_sig = RAR_ID
1364 _last_aes_key = (None, None, None) # (salt, key, iv)
1366 def _decrypt_header(self, fd):
1367 if not _have_crypto:
1368 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1369 salt = fd.read(8)
1370 if self._last_aes_key[0] == salt:
1371 key, iv = self._last_aes_key[1:]
1372 else:
1373 key, iv = rar3_s2k(self._password, salt)
1374 self._last_aes_key = (salt, key, iv)
1375 return HeaderDecrypt(fd, key, iv)
1377 def _parse_block_header(self, fd):
1378 """Parse common block header
1380 h = Rar3Info()
1381 h.header_offset = fd.tell()
1383 # read and parse base header
1384 buf = fd.read(S_BLK_HDR.size)
1385 if not buf:
1386 return None
1387 t = S_BLK_HDR.unpack_from(buf)
1388 h.header_crc, h.type, h.flags, h.header_size = t
1390 # read full header
1391 if h.header_size > S_BLK_HDR.size:
1392 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1393 else:
1394 hdata = buf
1395 h.data_offset = fd.tell()
1397 # unexpected EOF?
1398 if len(hdata) != h.header_size:
1399 self._set_error("Unexpected EOF when reading header")
1400 return None
1402 pos = S_BLK_HDR.size
1404 # block has data assiciated with it?
1405 if h.flags & RAR_LONG_BLOCK:
1406 h.add_size, pos = load_le32(hdata, pos)
1407 else:
1408 h.add_size = 0
1410 # parse interesting ones, decide header boundaries for crc
1411 if h.type == RAR_BLOCK_MARK:
1412 return h
1413 elif h.type == RAR_BLOCK_MAIN:
1414 pos += 6
1415 if h.flags & RAR_MAIN_ENCRYPTVER:
1416 pos += 1
1417 crc_pos = pos
1418 if h.flags & RAR_MAIN_COMMENT:
1419 self._parse_subblocks(h, hdata, pos)
1420 elif h.type == RAR_BLOCK_FILE:
1421 pos = self._parse_file_header(h, hdata, pos - 4)
1422 crc_pos = pos
1423 if h.flags & RAR_FILE_COMMENT:
1424 pos = self._parse_subblocks(h, hdata, pos)
1425 elif h.type == RAR_BLOCK_SUB:
1426 pos = self._parse_file_header(h, hdata, pos - 4)
1427 crc_pos = h.header_size
1428 elif h.type == RAR_BLOCK_OLD_AUTH:
1429 pos += 8
1430 crc_pos = pos
1431 elif h.type == RAR_BLOCK_OLD_EXTRA:
1432 pos += 7
1433 crc_pos = pos
1434 elif h.type == RAR_BLOCK_ENDARC:
1435 if h.flags & RAR_ENDARC_DATACRC:
1436 h.endarc_datacrc, pos = load_le32(hdata, pos)
1437 if h.flags & RAR_ENDARC_VOLNR:
1438 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1439 pos += 2
1440 crc_pos = h.header_size
1441 else:
1442 crc_pos = h.header_size
1444 # check crc
1445 if h.type == RAR_BLOCK_OLD_SUB:
1446 crcdat = hdata[2:] + fd.read(h.add_size)
1447 else:
1448 crcdat = hdata[2:crc_pos]
1450 calc_crc = crc32(crcdat) & 0xFFFF
1452 # return good header
1453 if h.header_crc == calc_crc:
1454 return h
1456 # header parsing failed.
1457 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1458 h.type, h.header_crc, calc_crc, len(crcdat))
1460 # instead panicing, send eof
1461 return None
1463 def _parse_file_header(self, h, hdata, pos):
1464 """Read file-specific header
1466 fld = S_FILE_HDR.unpack_from(hdata, pos)
1467 pos += S_FILE_HDR.size
1469 h.compress_size = fld[0]
1470 h.file_size = fld[1]
1471 h.host_os = fld[2]
1472 h.CRC = fld[3]
1473 h.date_time = parse_dos_time(fld[4])
1474 h.mtime = to_datetime(h.date_time)
1475 h.extract_version = fld[5]
1476 h.compress_type = fld[6]
1477 h._name_size = name_size = fld[7]
1478 h.mode = fld[8]
1480 h._md_class = CRC32Context
1481 h._md_expect = h.CRC
1483 if h.flags & RAR_FILE_LARGE:
1484 h1, pos = load_le32(hdata, pos)
1485 h2, pos = load_le32(hdata, pos)
1486 h.compress_size |= h1 << 32
1487 h.file_size |= h2 << 32
1488 h.add_size = h.compress_size
1490 name, pos = load_bytes(hdata, name_size, pos)
1491 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1492 # stored in custom encoding
1493 nul = name.find(b"\0")
1494 h.orig_filename = name[:nul]
1495 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1496 h.filename = u.decode()
1498 # if parsing failed fall back to simple name
1499 if u.failed:
1500 h.filename = self._decode(h.orig_filename)
1501 elif h.flags & RAR_FILE_UNICODE:
1502 # stored in UTF8
1503 h.orig_filename = name
1504 h.filename = name.decode("utf8", "replace")
1505 else:
1506 # stored in random encoding
1507 h.orig_filename = name
1508 h.filename = self._decode(name)
1510 # change separator, set dir suffix
1511 h.filename = h.filename.replace("\\", "/").rstrip("/")
1512 if h.is_dir():
1513 h.filename = h.filename + "/"
1515 if h.flags & RAR_FILE_SALT:
1516 h.salt, pos = load_bytes(hdata, 8, pos)
1517 else:
1518 h.salt = None
1520 # optional extended time stamps
1521 if h.flags & RAR_FILE_EXTTIME:
1522 pos = _parse_ext_time(h, hdata, pos)
1523 else:
1524 h.mtime = h.atime = h.ctime = h.arctime = None
1526 return pos
1528 def _parse_subblocks(self, h, hdata, pos):
1529 """Find old-style comment subblock
1531 while pos < len(hdata):
1532 # ordinary block header
1533 t = S_BLK_HDR.unpack_from(hdata, pos)
1534 ___scrc, stype, sflags, slen = t
1535 pos_next = pos + slen
1536 pos += S_BLK_HDR.size
1538 # corrupt header
1539 if pos_next < pos:
1540 break
1542 # followed by block-specific header
1543 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1544 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1545 pos += S_COMMENT_HDR.size
1546 data = hdata[pos: pos_next]
1547 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1548 crc, self._password)
1549 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1550 h.comment = self._decode_comment(cmt)
1552 pos = pos_next
1553 return pos
1555 def _read_comment_v3(self, inf, pwd=None):
1557 # read data
1558 with XFile(inf.volume_file) as rf:
1559 rf.seek(inf.data_offset)
1560 data = rf.read(inf.compress_size)
1562 # decompress
1563 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1564 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1566 # check crc
1567 if self._crc_check:
1568 crc = crc32(cmt)
1569 if crc != inf.CRC:
1570 return None
1572 return self._decode_comment(cmt)
1574 def _decode(self, val):
1575 for c in TRY_ENCODINGS:
1576 try:
1577 return val.decode(c)
1578 except UnicodeError:
1579 pass
1580 return val.decode(self._charset, "replace")
1582 def _decode_comment(self, val):
1583 return self._decode(val)
1585 def process_entry(self, fd, item):
1586 if item.type == RAR_BLOCK_FILE:
1587 # use only first part
1588 if item.flags & RAR_FILE_VERSION:
1589 pass # skip old versions
1590 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1591 self._info_map[item.filename.rstrip("/")] = item
1592 self._info_list.append(item)
1593 elif len(self._info_list) > 0:
1594 # final crc is in last block
1595 old = self._info_list[-1]
1596 old.CRC = item.CRC
1597 old._md_expect = item._md_expect
1598 old.compress_size += item.compress_size
1600 # parse new-style comment
1601 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1602 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1603 pass
1604 elif item.flags & RAR_FILE_SOLID:
1605 # file comment
1606 cmt = self._read_comment_v3(item, self._password)
1607 if len(self._info_list) > 0:
1608 old = self._info_list[-1]
1609 old.comment = cmt
1610 else:
1611 # archive comment
1612 cmt = self._read_comment_v3(item, self._password)
1613 self.comment = cmt
1615 if item.type == RAR_BLOCK_MAIN:
1616 if item.flags & RAR_MAIN_COMMENT:
1617 self.comment = item.comment
1618 if item.flags & RAR_MAIN_PASSWORD:
1619 self._needs_password = True
1621 # put file compressed data into temporary .rar archive, and run
1622 # unrar on that, thus avoiding unrar going over whole archive
1623 def _open_hack(self, inf, pwd):
1624 # create main header: crc, type, flags, size, res1, res2
1625 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1626 return self._open_hack_core(inf, pwd, prefix, b"")
1630 # RAR5 format
1633 class Rar5Info(RarInfo):
1634 """Shared fields for RAR5 records.
1636 extract_version = 50
1637 header_crc = None
1638 header_size = None
1639 header_offset = None
1640 data_offset = None
1642 # type=all
1643 block_type = None
1644 block_flags = None
1645 add_size = 0
1646 block_extra_size = 0
1648 # type=MAIN
1649 volume_number = None
1650 _md_class = None
1651 _md_expect = None
1653 def _must_disable_hack(self):
1654 return False
1657 class Rar5BaseFile(Rar5Info):
1658 """Shared sturct for file & service record.
1660 type = -1
1661 file_flags = None
1662 file_encryption = (0, 0, 0, b"", b"", b"")
1663 file_compress_flags = None
1664 file_redir = None
1665 file_owner = None
1666 file_version = None
1667 blake2sp_hash = None
1669 def _must_disable_hack(self):
1670 if self.flags & RAR_FILE_PASSWORD:
1671 return True
1672 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1673 return True
1674 if self.file_compress_flags & RAR5_COMPR_SOLID:
1675 return True
1676 if self.file_redir:
1677 return True
1678 return False
1681 class Rar5FileInfo(Rar5BaseFile):
1682 """RAR5 file record.
1684 type = RAR_BLOCK_FILE
1686 def is_symlink(self):
1687 """Returns True if entry is a symlink."""
1688 # pylint: disable=unsubscriptable-object
1689 return (
1690 self.file_redir is not None and
1691 self.file_redir[0] in (
1692 RAR5_XREDIR_UNIX_SYMLINK,
1693 RAR5_XREDIR_WINDOWS_SYMLINK,
1694 RAR5_XREDIR_WINDOWS_JUNCTION,
1698 def is_file(self):
1699 """Returns True if entry is a normal file."""
1700 return not (self.is_dir() or self.is_symlink())
1702 def is_dir(self):
1703 """Returns True if entry is a directory."""
1704 if not self.file_redir:
1705 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1706 return True
1707 return False
1710 class Rar5ServiceInfo(Rar5BaseFile):
1711 """RAR5 service record.
1713 type = RAR_BLOCK_SUB
1716 class Rar5MainInfo(Rar5Info):
1717 """RAR5 archive main record.
1719 type = RAR_BLOCK_MAIN
1720 main_flags = None
1721 main_volume_number = None
1723 def _must_disable_hack(self):
1724 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1725 return True
1726 return False
1729 class Rar5EncryptionInfo(Rar5Info):
1730 """RAR5 archive header encryption record.
1732 type = RAR5_BLOCK_ENCRYPTION
1733 encryption_algo = None
1734 encryption_flags = None
1735 encryption_kdf_count = None
1736 encryption_salt = None
1737 encryption_check_value = None
1739 def needs_password(self):
1740 return True
1743 class Rar5EndArcInfo(Rar5Info):
1744 """RAR5 end of archive record.
1746 type = RAR_BLOCK_ENDARC
1747 endarc_flags = None
1750 class RAR5Parser(CommonParser):
1751 """Parse RAR5 format.
1753 _expect_sig = RAR5_ID
1754 _hdrenc_main = None
1756 # AES encrypted headers
1757 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1759 def _gen_key(self, kdf_count, salt):
1760 if self._last_aes256_key[:2] == (kdf_count, salt):
1761 return self._last_aes256_key[2]
1762 if kdf_count > 24:
1763 raise BadRarFile("Too large kdf_count")
1764 pwd = self._password
1765 if isinstance(pwd, str):
1766 pwd = pwd.encode("utf8")
1767 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1768 self._last_aes256_key = (kdf_count, salt, key)
1769 return key
1771 def _decrypt_header(self, fd):
1772 if not _have_crypto:
1773 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1774 h = self._hdrenc_main
1775 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1776 iv = fd.read(16)
1777 return HeaderDecrypt(fd, key, iv)
1779 def _parse_block_header(self, fd):
1780 """Parse common block header
1782 header_offset = fd.tell()
1784 preload = 4 + 3
1785 start_bytes = fd.read(preload)
1786 header_crc, pos = load_le32(start_bytes, 0)
1787 hdrlen, pos = load_vint(start_bytes, pos)
1788 if hdrlen > 2 * 1024 * 1024:
1789 return None
1790 header_size = pos + hdrlen
1792 # read full header, check for EOF
1793 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1794 if len(hdata) != header_size:
1795 self._set_error("Unexpected EOF when reading header")
1796 return None
1797 data_offset = fd.tell()
1799 calc_crc = crc32(memoryview(hdata)[4:])
1800 if header_crc != calc_crc:
1801 # header parsing failed.
1802 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1803 header_crc, calc_crc, len(hdata))
1804 return None
1806 block_type, pos = load_vint(hdata, pos)
1808 if block_type == RAR5_BLOCK_MAIN:
1809 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1810 h = self._parse_main_block(h, hdata, pos)
1811 elif block_type == RAR5_BLOCK_FILE:
1812 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1813 h = self._parse_file_block(h, hdata, pos)
1814 elif block_type == RAR5_BLOCK_SERVICE:
1815 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1816 h = self._parse_file_block(h, hdata, pos)
1817 elif block_type == RAR5_BLOCK_ENCRYPTION:
1818 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1819 h = self._parse_encryption_block(h, hdata, pos)
1820 elif block_type == RAR5_BLOCK_ENDARC:
1821 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1822 h = self._parse_endarc_block(h, hdata, pos)
1823 else:
1824 h = None
1825 if h:
1826 h.header_offset = header_offset
1827 h.data_offset = data_offset
1828 return h
1830 def _parse_block_common(self, h, hdata):
1831 h.header_crc, pos = load_le32(hdata, 0)
1832 hdrlen, pos = load_vint(hdata, pos)
1833 h.header_size = hdrlen + pos
1834 h.block_type, pos = load_vint(hdata, pos)
1835 h.block_flags, pos = load_vint(hdata, pos)
1837 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1838 h.block_extra_size, pos = load_vint(hdata, pos)
1839 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1840 h.add_size, pos = load_vint(hdata, pos)
1842 h.compress_size = h.add_size
1844 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1845 h.flags |= RAR_SKIP_IF_UNKNOWN
1846 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1847 h.flags |= RAR_LONG_BLOCK
1848 return h, pos
1850 def _parse_main_block(self, h, hdata, pos):
1851 h.main_flags, pos = load_vint(hdata, pos)
1852 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1853 h.main_volume_number, pos = load_vint(hdata, pos)
1855 h.flags |= RAR_MAIN_NEWNUMBERING
1856 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1857 h.flags |= RAR_MAIN_SOLID
1858 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1859 h.flags |= RAR_MAIN_VOLUME
1860 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1861 h.flags |= RAR_MAIN_RECOVERY
1862 if self._hdrenc_main:
1863 h.flags |= RAR_MAIN_PASSWORD
1864 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1865 h.flags |= RAR_MAIN_FIRSTVOLUME
1867 return h
1869 def _parse_file_block(self, h, hdata, pos):
1870 h.file_flags, pos = load_vint(hdata, pos)
1871 h.file_size, pos = load_vint(hdata, pos)
1872 h.mode, pos = load_vint(hdata, pos)
1874 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1875 h.mtime, pos = load_unixtime(hdata, pos)
1876 h.date_time = h.mtime.timetuple()[:6]
1877 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1878 h.CRC, pos = load_le32(hdata, pos)
1879 h._md_class = CRC32Context
1880 h._md_expect = h.CRC
1882 h.file_compress_flags, pos = load_vint(hdata, pos)
1883 h.file_host_os, pos = load_vint(hdata, pos)
1884 h.orig_filename, pos = load_vstr(hdata, pos)
1885 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1887 # use compatible values
1888 if h.file_host_os == RAR5_OS_WINDOWS:
1889 h.host_os = RAR_OS_WIN32
1890 else:
1891 h.host_os = RAR_OS_UNIX
1892 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1894 if h.block_extra_size:
1895 # allow 1 byte of garbage
1896 while pos < len(hdata) - 1:
1897 xsize, pos = load_vint(hdata, pos)
1898 xdata, pos = load_bytes(hdata, xsize, pos)
1899 self._process_file_extra(h, xdata)
1901 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1902 h.flags |= RAR_FILE_SPLIT_BEFORE
1903 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1904 h.flags |= RAR_FILE_SPLIT_AFTER
1905 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1906 h.flags |= RAR_FILE_DIRECTORY
1907 if h.file_compress_flags & RAR5_COMPR_SOLID:
1908 h.flags |= RAR_FILE_SOLID
1910 if h.is_dir():
1911 h.filename = h.filename + "/"
1912 return h
1914 def _parse_endarc_block(self, h, hdata, pos):
1915 h.endarc_flags, pos = load_vint(hdata, pos)
1916 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1917 h.flags |= RAR_ENDARC_NEXT_VOLUME
1918 return h
1920 def _parse_encryption_block(self, h, hdata, pos):
1921 h.encryption_algo, pos = load_vint(hdata, pos)
1922 h.encryption_flags, pos = load_vint(hdata, pos)
1923 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1924 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1925 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1926 h.encryption_check_value = load_bytes(hdata, 12, pos)
1927 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1928 raise BadRarFile("Unsupported header encryption cipher")
1929 self._hdrenc_main = h
1930 return h
1932 def _process_file_extra(self, h, xdata):
1933 xtype, pos = load_vint(xdata, 0)
1934 if xtype == RAR5_XFILE_TIME:
1935 self._parse_file_xtime(h, xdata, pos)
1936 elif xtype == RAR5_XFILE_ENCRYPTION:
1937 self._parse_file_encryption(h, xdata, pos)
1938 elif xtype == RAR5_XFILE_HASH:
1939 self._parse_file_hash(h, xdata, pos)
1940 elif xtype == RAR5_XFILE_VERSION:
1941 self._parse_file_version(h, xdata, pos)
1942 elif xtype == RAR5_XFILE_REDIR:
1943 self._parse_file_redir(h, xdata, pos)
1944 elif xtype == RAR5_XFILE_OWNER:
1945 self._parse_file_owner(h, xdata, pos)
1946 elif xtype == RAR5_XFILE_SERVICE:
1947 pass
1948 else:
1949 pass
1951 # extra block for file time record
1952 def _parse_file_xtime(self, h, xdata, pos):
1953 tflags, pos = load_vint(xdata, pos)
1955 ldr = load_windowstime
1956 if tflags & RAR5_XTIME_UNIXTIME:
1957 ldr = load_unixtime
1959 if tflags & RAR5_XTIME_HAS_MTIME:
1960 h.mtime, pos = ldr(xdata, pos)
1961 h.date_time = h.mtime.timetuple()[:6]
1962 if tflags & RAR5_XTIME_HAS_CTIME:
1963 h.ctime, pos = ldr(xdata, pos)
1964 if tflags & RAR5_XTIME_HAS_ATIME:
1965 h.atime, pos = ldr(xdata, pos)
1967 if tflags & RAR5_XTIME_UNIXTIME_NS:
1968 if tflags & RAR5_XTIME_HAS_MTIME:
1969 nsec, pos = load_le32(xdata, pos)
1970 h.mtime = to_nsdatetime(h.mtime, nsec)
1971 if tflags & RAR5_XTIME_HAS_CTIME:
1972 nsec, pos = load_le32(xdata, pos)
1973 h.ctime = to_nsdatetime(h.ctime, nsec)
1974 if tflags & RAR5_XTIME_HAS_ATIME:
1975 nsec, pos = load_le32(xdata, pos)
1976 h.atime = to_nsdatetime(h.atime, nsec)
1978 # just remember encryption info
1979 def _parse_file_encryption(self, h, xdata, pos):
1980 algo, pos = load_vint(xdata, pos)
1981 flags, pos = load_vint(xdata, pos)
1982 kdf_count, pos = load_byte(xdata, pos)
1983 salt, pos = load_bytes(xdata, 16, pos)
1984 iv, pos = load_bytes(xdata, 16, pos)
1985 checkval = None
1986 if flags & RAR5_XENC_CHECKVAL:
1987 checkval, pos = load_bytes(xdata, 12, pos)
1988 if flags & RAR5_XENC_TWEAKED:
1989 h._md_expect = None
1990 h._md_class = NoHashContext
1992 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
1993 h.flags |= RAR_FILE_PASSWORD
1995 def _parse_file_hash(self, h, xdata, pos):
1996 hash_type, pos = load_vint(xdata, pos)
1997 if hash_type == RAR5_XHASH_BLAKE2SP:
1998 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
1999 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2000 h._md_class = Blake2SP
2001 h._md_expect = h.blake2sp_hash
2003 def _parse_file_version(self, h, xdata, pos):
2004 flags, pos = load_vint(xdata, pos)
2005 version, pos = load_vint(xdata, pos)
2006 h.file_version = (flags, version)
2008 def _parse_file_redir(self, h, xdata, pos):
2009 redir_type, pos = load_vint(xdata, pos)
2010 redir_flags, pos = load_vint(xdata, pos)
2011 redir_name, pos = load_vstr(xdata, pos)
2012 redir_name = redir_name.decode("utf8", "replace")
2013 h.file_redir = (redir_type, redir_flags, redir_name)
2015 def _parse_file_owner(self, h, xdata, pos):
2016 user_name = group_name = user_id = group_id = None
2018 flags, pos = load_vint(xdata, pos)
2019 if flags & RAR5_XOWNER_UNAME:
2020 user_name, pos = load_vstr(xdata, pos)
2021 if flags & RAR5_XOWNER_GNAME:
2022 group_name, pos = load_vstr(xdata, pos)
2023 if flags & RAR5_XOWNER_UID:
2024 user_id, pos = load_vint(xdata, pos)
2025 if flags & RAR5_XOWNER_GID:
2026 group_id, pos = load_vint(xdata, pos)
2028 h.file_owner = (user_name, group_name, user_id, group_id)
2030 def process_entry(self, fd, item):
2031 if item.block_type == RAR5_BLOCK_FILE:
2032 if item.file_version:
2033 pass # skip old versions
2034 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2035 # use only first part
2036 self._info_map[item.filename.rstrip("/")] = item
2037 self._info_list.append(item)
2038 elif len(self._info_list) > 0:
2039 # final crc is in last block
2040 old = self._info_list[-1]
2041 old.CRC = item.CRC
2042 old._md_expect = item._md_expect
2043 old.blake2sp_hash = item.blake2sp_hash
2044 old.compress_size += item.compress_size
2045 elif item.block_type == RAR5_BLOCK_SERVICE:
2046 if item.filename == "CMT":
2047 self._load_comment(fd, item)
2049 def _load_comment(self, fd, item):
2050 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2051 return None
2052 if item.compress_type != RAR_M0:
2053 return None
2055 if item.flags & RAR_FILE_PASSWORD:
2056 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2057 if algo != RAR5_XENC_CIPHER_AES256:
2058 return None
2059 key = self._gen_key(kdf_count, salt)
2060 f = HeaderDecrypt(fd, key, iv)
2061 cmt = f.read(item.file_size)
2062 else:
2063 # archive comment
2064 with self._open_clear(item) as cmtstream:
2065 cmt = cmtstream.read()
2067 # rar bug? - appends zero to comment
2068 cmt = cmt.split(b"\0", 1)[0]
2069 self.comment = cmt.decode("utf8")
2070 return None
2072 def _open_hack(self, inf, pwd):
2073 # len, type, blk_flags, flags
2074 main_hdr = b"\x03\x01\x00\x00"
2075 endarc_hdr = b"\x03\x05\x00\x00"
2076 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2077 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2078 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2082 ## Utility classes
2085 class UnicodeFilename:
2086 """Handle RAR3 unicode filename decompression.
2088 def __init__(self, name, encdata):
2089 self.std_name = bytearray(name)
2090 self.encdata = bytearray(encdata)
2091 self.pos = self.encpos = 0
2092 self.buf = bytearray()
2093 self.failed = 0
2095 def enc_byte(self):
2096 """Copy encoded byte."""
2097 try:
2098 c = self.encdata[self.encpos]
2099 self.encpos += 1
2100 return c
2101 except IndexError:
2102 self.failed = 1
2103 return 0
2105 def std_byte(self):
2106 """Copy byte from 8-bit representation."""
2107 try:
2108 return self.std_name[self.pos]
2109 except IndexError:
2110 self.failed = 1
2111 return ord("?")
2113 def put(self, lo, hi):
2114 """Copy 16-bit value to result."""
2115 self.buf.append(lo)
2116 self.buf.append(hi)
2117 self.pos += 1
2119 def decode(self):
2120 """Decompress compressed UTF16 value."""
2121 hi = self.enc_byte()
2122 flagbits = 0
2123 while self.encpos < len(self.encdata):
2124 if flagbits == 0:
2125 flags = self.enc_byte()
2126 flagbits = 8
2127 flagbits -= 2
2128 t = (flags >> flagbits) & 3
2129 if t == 0:
2130 self.put(self.enc_byte(), 0)
2131 elif t == 1:
2132 self.put(self.enc_byte(), hi)
2133 elif t == 2:
2134 self.put(self.enc_byte(), self.enc_byte())
2135 else:
2136 n = self.enc_byte()
2137 if n & 0x80:
2138 c = self.enc_byte()
2139 for _ in range((n & 0x7f) + 2):
2140 lo = (self.std_byte() + c) & 0xFF
2141 self.put(lo, hi)
2142 else:
2143 for _ in range(n + 2):
2144 self.put(self.std_byte(), 0)
2145 return self.buf.decode("utf-16le", "replace")
2148 class RarExtFile(io.RawIOBase):
2149 """Base class for file-like object that :meth:`RarFile.open` returns.
2151 Provides public methods and common crc checking.
2153 Behaviour:
2154 - no short reads - .read() and .readinfo() read as much as requested.
2155 - no internal buffer, use io.BufferedReader for that.
2157 name = None #: Filename of the archive entry
2158 mode = "rb"
2159 _parser = None
2160 _inf = None
2161 _fd = None
2162 _remain = 0
2163 _returncode = 0
2164 _md_context = None
2166 def _open_extfile(self, parser, inf):
2167 self.name = inf.filename
2168 self._parser = parser
2169 self._inf = inf
2171 if self._fd:
2172 self._fd.close()
2173 md_class = self._inf._md_class or NoHashContext
2174 self._md_context = md_class()
2175 self._fd = None
2176 self._remain = self._inf.file_size
2178 def read(self, n=-1):
2179 """Read all or specified amount of data from archive entry."""
2181 # sanitize count
2182 if n is None or n < 0:
2183 n = self._remain
2184 elif n > self._remain:
2185 n = self._remain
2186 if n == 0:
2187 return b""
2189 buf = []
2190 orig = n
2191 while n > 0:
2192 # actual read
2193 data = self._read(n)
2194 if not data:
2195 break
2196 buf.append(data)
2197 self._md_context.update(data)
2198 self._remain -= len(data)
2199 n -= len(data)
2200 data = b"".join(buf)
2201 if n > 0:
2202 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2204 # done?
2205 if not data or self._remain == 0:
2206 # self.close()
2207 self._check()
2208 return data
2210 def _check(self):
2211 """Check final CRC."""
2212 final = self._md_context.digest()
2213 exp = self._inf._md_expect
2214 if exp is None:
2215 return
2216 if final is None:
2217 return
2218 if self._returncode:
2219 check_returncode(self._returncode, "", tool_setup().get_errmap())
2220 if self._remain != 0:
2221 raise BadRarFile("Failed the read enough data")
2222 if final != exp:
2223 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2224 self._inf.filename, exp, final))
2226 def _read(self, cnt):
2227 """Actual read that gets sanitized cnt."""
2228 raise NotImplementedError("_read")
2230 def close(self):
2231 """Close open resources."""
2233 super().close()
2235 if self._fd:
2236 self._fd.close()
2237 self._fd = None
2239 def __del__(self):
2240 """Hook delete to make sure tempfile is removed."""
2241 self.close()
2243 def readinto(self, buf):
2244 """Zero-copy read directly into buffer.
2246 Returns bytes read.
2248 raise NotImplementedError("readinto")
2250 def tell(self):
2251 """Return current reading position in uncompressed data."""
2252 return self._inf.file_size - self._remain
2254 def seek(self, offset, whence=0):
2255 """Seek in data.
2257 On uncompressed files, the seeking works by actual
2258 seeks so it's fast. On compresses files its slow
2259 - forward seeking happends by reading ahead,
2260 backwards by re-opening and decompressing from the start.
2263 # disable crc check when seeking
2264 self._md_context = NoHashContext()
2266 fsize = self._inf.file_size
2267 cur_ofs = self.tell()
2269 if whence == 0: # seek from beginning of file
2270 new_ofs = offset
2271 elif whence == 1: # seek from current position
2272 new_ofs = cur_ofs + offset
2273 elif whence == 2: # seek from end of file
2274 new_ofs = fsize + offset
2275 else:
2276 raise ValueError("Invalid value for whence")
2278 # sanity check
2279 if new_ofs < 0:
2280 new_ofs = 0
2281 elif new_ofs > fsize:
2282 new_ofs = fsize
2284 # do the actual seek
2285 if new_ofs >= cur_ofs:
2286 self._skip(new_ofs - cur_ofs)
2287 else:
2288 # reopen and seek
2289 self._open_extfile(self._parser, self._inf)
2290 self._skip(new_ofs)
2291 return self.tell()
2293 def _skip(self, cnt):
2294 """Read and discard data"""
2295 empty_read(self, cnt, BSIZE)
2297 def readable(self):
2298 """Returns True"""
2299 return True
2301 def writable(self):
2302 """Returns False.
2304 Writing is not supported.
2306 return False
2308 def seekable(self):
2309 """Returns True.
2311 Seeking is supported, although it's slow on compressed files.
2313 return True
2315 def readall(self):
2316 """Read all remaining data"""
2317 # avoid RawIOBase default impl
2318 return self.read()
2321 class PipeReader(RarExtFile):
2322 """Read data from pipe, handle tempfile cleanup."""
2324 def __init__(self, parser, inf, cmd, tempfile=None):
2325 super().__init__()
2326 self._cmd = cmd
2327 self._proc = None
2328 self._tempfile = tempfile
2329 self._open_extfile(parser, inf)
2331 def _close_proc(self):
2332 if not self._proc:
2333 return
2334 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2335 if f:
2336 f.close()
2337 self._proc.wait()
2338 self._returncode = self._proc.returncode
2339 self._proc = None
2341 def _open_extfile(self, parser, inf):
2342 super()._open_extfile(parser, inf)
2344 # stop old process
2345 self._close_proc()
2347 # launch new process
2348 self._returncode = 0
2349 self._proc = custom_popen(self._cmd)
2350 self._fd = self._proc.stdout
2352 def _read(self, cnt):
2353 """Read from pipe."""
2355 # normal read is usually enough
2356 data = self._fd.read(cnt)
2357 if len(data) == cnt or not data:
2358 return data
2360 # short read, try looping
2361 buf = [data]
2362 cnt -= len(data)
2363 while cnt > 0:
2364 data = self._fd.read(cnt)
2365 if not data:
2366 break
2367 cnt -= len(data)
2368 buf.append(data)
2369 return b"".join(buf)
2371 def close(self):
2372 """Close open resources."""
2374 self._close_proc()
2375 super().close()
2377 if self._tempfile:
2378 try:
2379 os.unlink(self._tempfile)
2380 except OSError:
2381 pass
2382 self._tempfile = None
2384 def readinto(self, buf):
2385 """Zero-copy read directly into buffer."""
2386 cnt = len(buf)
2387 if cnt > self._remain:
2388 cnt = self._remain
2389 vbuf = memoryview(buf)
2390 res = got = 0
2391 while got < cnt:
2392 res = self._fd.readinto(vbuf[got: cnt])
2393 if not res:
2394 break
2395 self._md_context.update(vbuf[got: got + res])
2396 self._remain -= res
2397 got += res
2398 return got
2401 class DirectReader(RarExtFile):
2402 """Read uncompressed data directly from archive.
2404 _cur = None
2405 _cur_avail = None
2406 _volfile = None
2408 def __init__(self, parser, inf):
2409 super().__init__()
2410 self._open_extfile(parser, inf)
2412 def _open_extfile(self, parser, inf):
2413 super()._open_extfile(parser, inf)
2415 self._volfile = self._inf.volume_file
2416 self._fd = XFile(self._volfile, 0)
2417 self._fd.seek(self._inf.header_offset, 0)
2418 self._cur = self._parser._parse_header(self._fd)
2419 self._cur_avail = self._cur.add_size
2421 def _skip(self, cnt):
2422 """RAR Seek, skipping through rar files to get to correct position
2425 while cnt > 0:
2426 # next vol needed?
2427 if self._cur_avail == 0:
2428 if not self._open_next():
2429 break
2431 # fd is in read pos, do the read
2432 if cnt > self._cur_avail:
2433 cnt -= self._cur_avail
2434 self._remain -= self._cur_avail
2435 self._cur_avail = 0
2436 else:
2437 self._fd.seek(cnt, 1)
2438 self._cur_avail -= cnt
2439 self._remain -= cnt
2440 cnt = 0
2442 def _read(self, cnt):
2443 """Read from potentially multi-volume archive."""
2445 buf = []
2446 while cnt > 0:
2447 # next vol needed?
2448 if self._cur_avail == 0:
2449 if not self._open_next():
2450 break
2452 # fd is in read pos, do the read
2453 if cnt > self._cur_avail:
2454 data = self._fd.read(self._cur_avail)
2455 else:
2456 data = self._fd.read(cnt)
2457 if not data:
2458 break
2460 # got some data
2461 cnt -= len(data)
2462 self._cur_avail -= len(data)
2463 buf.append(data)
2465 if len(buf) == 1:
2466 return buf[0]
2467 return b"".join(buf)
2469 def _open_next(self):
2470 """Proceed to next volume."""
2472 # is the file split over archives?
2473 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2474 return False
2476 if self._fd:
2477 self._fd.close()
2478 self._fd = None
2480 # open next part
2481 self._volfile = self._parser._next_volname(self._volfile)
2482 fd = open(self._volfile, "rb", 0)
2483 self._fd = fd
2484 sig = fd.read(len(self._parser._expect_sig))
2485 if sig != self._parser._expect_sig:
2486 raise BadRarFile("Invalid signature")
2488 # loop until first file header
2489 while True:
2490 cur = self._parser._parse_header(fd)
2491 if not cur:
2492 raise BadRarFile("Unexpected EOF")
2493 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2494 if cur.add_size:
2495 fd.seek(cur.add_size, 1)
2496 continue
2497 if cur.orig_filename != self._inf.orig_filename:
2498 raise BadRarFile("Did not found file entry")
2499 self._cur = cur
2500 self._cur_avail = cur.add_size
2501 return True
2503 def readinto(self, buf):
2504 """Zero-copy read directly into buffer."""
2505 got = 0
2506 vbuf = memoryview(buf)
2507 while got < len(buf):
2508 # next vol needed?
2509 if self._cur_avail == 0:
2510 if not self._open_next():
2511 break
2513 # length for next read
2514 cnt = len(buf) - got
2515 if cnt > self._cur_avail:
2516 cnt = self._cur_avail
2518 # read into temp view
2519 res = self._fd.readinto(vbuf[got: got + cnt])
2520 if not res:
2521 break
2522 self._md_context.update(vbuf[got: got + res])
2523 self._cur_avail -= res
2524 self._remain -= res
2525 got += res
2526 return got
2529 class HeaderDecrypt:
2530 """File-like object that decrypts from another file"""
2531 def __init__(self, f, key, iv):
2532 self.f = f
2533 self.ciph = AES_CBC_Decrypt(key, iv)
2534 self.buf = b""
2536 def tell(self):
2537 """Current file pos - works only on block boundaries."""
2538 return self.f.tell()
2540 def read(self, cnt=None):
2541 """Read and decrypt."""
2542 if cnt > 8 * 1024:
2543 raise BadRarFile("Bad count to header decrypt - wrong password?")
2545 # consume old data
2546 if cnt <= len(self.buf):
2547 res = self.buf[:cnt]
2548 self.buf = self.buf[cnt:]
2549 return res
2550 res = self.buf
2551 self.buf = b""
2552 cnt -= len(res)
2554 # decrypt new data
2555 blklen = 16
2556 while cnt > 0:
2557 enc = self.f.read(blklen)
2558 if len(enc) < blklen:
2559 break
2560 dec = self.ciph.decrypt(enc)
2561 if cnt >= len(dec):
2562 res += dec
2563 cnt -= len(dec)
2564 else:
2565 res += dec[:cnt]
2566 self.buf = dec[cnt:]
2567 cnt = 0
2569 return res
2572 class XFile:
2573 """Input may be filename or file object.
2575 __slots__ = ("_fd", "_need_close")
2577 def __init__(self, xfile, bufsize=1024):
2578 if is_filelike(xfile):
2579 self._need_close = False
2580 self._fd = xfile
2581 self._fd.seek(0)
2582 else:
2583 self._need_close = True
2584 self._fd = open(xfile, "rb", bufsize)
2586 def read(self, n=None):
2587 """Read from file."""
2588 return self._fd.read(n)
2590 def tell(self):
2591 """Return file pos."""
2592 return self._fd.tell()
2594 def seek(self, ofs, whence=0):
2595 """Move file pos."""
2596 return self._fd.seek(ofs, whence)
2598 def readinto(self, buf):
2599 """Read into buffer."""
2600 return self._fd.readinto(buf)
2602 def close(self):
2603 """Close file object."""
2604 if self._need_close:
2605 self._fd.close()
2607 def __enter__(self):
2608 return self
2610 def __exit__(self, typ, val, tb):
2611 self.close()
2614 class NoHashContext:
2615 """No-op hash function."""
2616 def __init__(self, data=None):
2617 """Initialize"""
2618 def update(self, data):
2619 """Update data"""
2620 def digest(self):
2621 """Final hash"""
2622 def hexdigest(self):
2623 """Hexadecimal digest."""
2626 class CRC32Context:
2627 """Hash context that uses CRC32."""
2628 __slots__ = ["_crc"]
2630 def __init__(self, data=None):
2631 self._crc = 0
2632 if data:
2633 self.update(data)
2635 def update(self, data):
2636 """Process data."""
2637 self._crc = crc32(data, self._crc)
2639 def digest(self):
2640 """Final hash."""
2641 return self._crc
2643 def hexdigest(self):
2644 """Hexadecimal digest."""
2645 return "%08x" % self.digest()
2648 class Blake2SP:
2649 """Blake2sp hash context.
2651 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2652 digest_size = 32
2653 block_size = 64
2654 parallelism = 8
2656 def __init__(self, data=None):
2657 self._buf = b""
2658 self._cur = 0
2659 self._digest = None
2660 self._thread = []
2662 for i in range(self.parallelism):
2663 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2664 self._thread.append(ctx)
2666 if data:
2667 self.update(data)
2669 def _blake2s(self, ofs, depth, is_last):
2670 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2671 depth=2, inner_size=32, fanout=self.parallelism)
2673 def _add_block(self, blk):
2674 self._thread[self._cur].update(blk)
2675 self._cur = (self._cur + 1) % self.parallelism
2677 def update(self, data):
2678 """Hash data.
2680 view = memoryview(data)
2681 bs = self.block_size
2682 if self._buf:
2683 need = bs - len(self._buf)
2684 if len(view) < need:
2685 self._buf += view.tobytes()
2686 return
2687 self._add_block(self._buf + view[:need].tobytes())
2688 view = view[need:]
2689 while len(view) >= bs:
2690 self._add_block(view[:bs])
2691 view = view[bs:]
2692 self._buf = view.tobytes()
2694 def digest(self):
2695 """Return final digest value.
2697 if self._digest is None:
2698 if self._buf:
2699 self._add_block(self._buf)
2700 self._buf = b""
2701 ctx = self._blake2s(0, 1, True)
2702 for t in self._thread:
2703 ctx.update(t.digest())
2704 self._digest = ctx.digest()
2705 return self._digest
2707 def hexdigest(self):
2708 """Hexadecimal digest."""
2709 return hexlify(self.digest()).decode("ascii")
2712 class Rar3Sha1:
2713 """Emulate buggy SHA1 from RAR3.
2715 digest_size = 20
2716 block_size = 64
2718 _BLK_BE = struct.Struct(b">16L")
2719 _BLK_LE = struct.Struct(b"<16L")
2721 __slots__ = ("_nbytes", "_md", "_rarbug")
2723 def __init__(self, data=b"", rarbug=False):
2724 self._md = sha1()
2725 self._nbytes = 0
2726 self._rarbug = rarbug
2727 self.update(data)
2729 def update(self, data):
2730 """Process more data."""
2731 self._md.update(data)
2732 bufpos = self._nbytes & 63
2733 self._nbytes += len(data)
2735 if self._rarbug and len(data) > 64:
2736 dpos = self.block_size - bufpos
2737 while dpos + self.block_size <= len(data):
2738 self._corrupt(data, dpos)
2739 dpos += self.block_size
2741 def digest(self):
2742 """Return final state."""
2743 return self._md.digest()
2745 def hexdigest(self):
2746 """Return final state as hex string."""
2747 return self._md.hexdigest()
2749 def _corrupt(self, data, dpos):
2750 """Corruption from SHA1 core."""
2751 ws = list(self._BLK_BE.unpack_from(data, dpos))
2752 for t in range(16, 80):
2753 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2754 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2755 self._BLK_LE.pack_into(data, dpos, *ws)
2759 ## Utility functions
2762 S_LONG = Struct("<L")
2763 S_SHORT = Struct("<H")
2764 S_BYTE = Struct("<B")
2766 S_BLK_HDR = Struct("<HBHH")
2767 S_FILE_HDR = Struct("<LLBLLBBHL")
2768 S_COMMENT_HDR = Struct("<HBBH")
2771 def load_vint(buf, pos):
2772 """Load RAR5 variable-size int."""
2773 limit = min(pos + 11, len(buf))
2774 res = ofs = 0
2775 while pos < limit:
2776 b = buf[pos]
2777 res += ((b & 0x7F) << ofs)
2778 pos += 1
2779 ofs += 7
2780 if b < 0x80:
2781 return res, pos
2782 raise BadRarFile("cannot load vint")
2785 def load_byte(buf, pos):
2786 """Load single byte"""
2787 end = pos + 1
2788 if end > len(buf):
2789 raise BadRarFile("cannot load byte")
2790 return S_BYTE.unpack_from(buf, pos)[0], end
2793 def load_le32(buf, pos):
2794 """Load little-endian 32-bit integer"""
2795 end = pos + 4
2796 if end > len(buf):
2797 raise BadRarFile("cannot load le32")
2798 return S_LONG.unpack_from(buf, pos)[0], end
2801 def load_bytes(buf, num, pos):
2802 """Load sequence of bytes"""
2803 end = pos + num
2804 if end > len(buf):
2805 raise BadRarFile("cannot load bytes")
2806 return buf[pos: end], end
2809 def load_vstr(buf, pos):
2810 """Load bytes prefixed by vint length"""
2811 slen, pos = load_vint(buf, pos)
2812 return load_bytes(buf, slen, pos)
2815 def load_dostime(buf, pos):
2816 """Load LE32 dos timestamp"""
2817 stamp, pos = load_le32(buf, pos)
2818 tup = parse_dos_time(stamp)
2819 return to_datetime(tup), pos
2822 def load_unixtime(buf, pos):
2823 """Load LE32 unix timestamp"""
2824 secs, pos = load_le32(buf, pos)
2825 dt = datetime.fromtimestamp(secs, timezone.utc)
2826 return dt, pos
2829 def load_windowstime(buf, pos):
2830 """Load LE64 windows timestamp"""
2831 # unix epoch (1970) in seconds from windows epoch (1601)
2832 unix_epoch = 11644473600
2833 val1, pos = load_le32(buf, pos)
2834 val2, pos = load_le32(buf, pos)
2835 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2836 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2837 dt = to_nsdatetime(dt, n1secs * 100)
2838 return dt, pos
2842 # volume numbering
2845 _rc_num = re.compile('^[0-9]+$')
2848 def _next_newvol(volfile):
2849 """New-style next volume
2851 name, ext = os.path.splitext(volfile)
2852 if ext.lower() in ("", ".exe", ".sfx"):
2853 volfile = name + ".rar"
2854 i = len(volfile) - 1
2855 while i >= 0:
2856 if "0" <= volfile[i] <= "9":
2857 return _inc_volname(volfile, i, False)
2858 if volfile[i] in ("/", os.sep):
2859 break
2860 i -= 1
2861 raise BadRarName("Cannot construct volume name: " + volfile)
2865 def _next_oldvol(volfile):
2866 """Old-style next volume
2868 name, ext = os.path.splitext(volfile)
2869 if ext.lower() in ("", ".exe", ".sfx"):
2870 ext = ".rar"
2871 sfx = ext[2:]
2872 if _rc_num.match(sfx):
2873 ext = _inc_volname(ext, len(ext) - 1, True)
2874 else:
2875 # .rar -> .r00
2876 ext = ext[:2] + "00"
2877 return name + ext
2880 def _inc_volname(volfile, i, inc_chars):
2881 """increase digits with carry, otherwise just increment char
2883 fn = list(volfile)
2884 while i >= 0:
2885 if fn[i] == "9":
2886 fn[i] = "0"
2887 i -= 1
2888 if i < 0:
2889 fn.insert(0, "1")
2890 elif "0" <= fn[i] < "9" or inc_chars:
2891 fn[i] = chr(ord(fn[i]) + 1)
2892 break
2893 else:
2894 fn.insert(i + 1, "1")
2895 break
2896 return "".join(fn)
2899 def _parse_ext_time(h, data, pos):
2900 """Parse all RAR3 extended time fields
2902 # flags and rest of data can be missing
2903 flags = 0
2904 if pos + 2 <= len(data):
2905 flags = S_SHORT.unpack_from(data, pos)[0]
2906 pos += 2
2908 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2909 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2910 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2911 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2912 if mtime:
2913 h.mtime = mtime
2914 h.date_time = mtime.timetuple()[:6]
2915 return pos
2918 def _parse_xtime(flag, data, pos, basetime=None):
2919 """Parse one RAR3 extended time field
2921 res = None
2922 if flag & 8:
2923 if not basetime:
2924 basetime, pos = load_dostime(data, pos)
2926 # load second fractions of 100ns units
2927 rem = 0
2928 cnt = flag & 3
2929 for _ in range(cnt):
2930 b, pos = load_byte(data, pos)
2931 rem = (b << 16) | (rem >> 8)
2933 # dostime has room for 30 seconds only, correct if needed
2934 if flag & 4 and basetime.second < 59:
2935 basetime = basetime.replace(second=basetime.second + 1)
2937 res = to_nsdatetime(basetime, rem * 100)
2938 return res, pos
2941 def is_filelike(obj):
2942 """Filename or file object?
2944 if isinstance(obj, (bytes, str, Path)):
2945 return False
2946 res = True
2947 for a in ("read", "tell", "seek"):
2948 res = res and hasattr(obj, a)
2949 if not res:
2950 raise ValueError("Invalid object passed as file")
2951 return True
2954 def rar3_s2k(pwd, salt):
2955 """String-to-key hash for RAR3.
2957 if not isinstance(pwd, str):
2958 pwd = pwd.decode("utf8")
2959 seed = bytearray(pwd.encode("utf-16le") + salt)
2960 h = Rar3Sha1(rarbug=True)
2961 iv = b""
2962 for i in range(16):
2963 for j in range(0x4000):
2964 cnt = S_LONG.pack(i * 0x4000 + j)
2965 h.update(seed)
2966 h.update(cnt[:3])
2967 if j == 0:
2968 iv += h.digest()[19:20]
2969 key_be = h.digest()[:16]
2970 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2971 return key_le, iv
2974 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
2975 """Decompress blob of compressed data.
2977 Used for data with non-standard header - eg. comments.
2979 # already uncompressed?
2980 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
2981 return data
2983 # take only necessary flags
2984 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
2985 flags |= RAR_LONG_BLOCK
2987 # file header
2988 fname = b"data"
2989 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
2990 mode = DOS_MODE_ARCHIVE
2991 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
2992 date, vers, meth, len(fname), mode)
2993 fhdr += fname
2994 if salt:
2995 fhdr += salt
2997 # full header
2998 hlen = S_BLK_HDR.size + len(fhdr)
2999 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
3000 hcrc = crc32(hdr[2:]) & 0xFFFF
3001 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
3003 # archive main header
3004 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
3006 # decompress via temp rar
3007 setup = tool_setup()
3008 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3009 tmpf = os.fdopen(tmpfd, "wb")
3010 try:
3011 tmpf.write(RAR_ID + mh + hdr + data)
3012 tmpf.close()
3014 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3015 cmd = setup.open_cmdline(curpwd, tmpname)
3016 p = custom_popen(cmd)
3017 return p.communicate()[0]
3018 finally:
3019 tmpf.close()
3020 os.unlink(tmpname)
3023 def sanitize_filename(fname, pathsep, is_win32):
3024 """Simulate unrar sanitization.
3026 if is_win32:
3027 if len(fname) > 1 and fname[1] == ":":
3028 fname = fname[2:]
3029 rc = RC_BAD_CHARS_WIN32
3030 else:
3031 rc = RC_BAD_CHARS_UNIX
3032 if rc.search(fname):
3033 fname = rc.sub("_", fname)
3035 parts = []
3036 for seg in fname.split("/"):
3037 if seg in ("", ".", ".."):
3038 continue
3039 if is_win32 and seg[-1] in (" ", "."):
3040 seg = seg[:-1] + "_"
3041 parts.append(seg)
3042 return pathsep.join(parts)
3045 def empty_read(src, size, blklen):
3046 """Read and drop fixed amount of data.
3048 while size > 0:
3049 if size > blklen:
3050 res = src.read(blklen)
3051 else:
3052 res = src.read(size)
3053 if not res:
3054 raise BadRarFile("cannot load data")
3055 size -= len(res)
3058 def to_datetime(t):
3059 """Convert 6-part time tuple into datetime object.
3061 # extract values
3062 year, mon, day, h, m, s = t
3064 # assume the values are valid
3065 try:
3066 return datetime(year, mon, day, h, m, s)
3067 except ValueError:
3068 pass
3070 # sanitize invalid values
3071 mday = (0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3072 mon = max(1, min(mon, 12))
3073 day = max(1, min(day, mday[mon]))
3074 h = min(h, 23)
3075 m = min(m, 59)
3076 s = min(s, 59)
3077 return datetime(year, mon, day, h, m, s)
3080 def parse_dos_time(stamp):
3081 """Parse standard 32-bit DOS timestamp.
3083 sec, stamp = stamp & 0x1F, stamp >> 5
3084 mn, stamp = stamp & 0x3F, stamp >> 6
3085 hr, stamp = stamp & 0x1F, stamp >> 5
3086 day, stamp = stamp & 0x1F, stamp >> 5
3087 mon, stamp = stamp & 0x0F, stamp >> 4
3088 yr = (stamp & 0x7F) + 1980
3089 return (yr, mon, day, hr, mn, sec * 2)
3092 # pylint: disable=arguments-differ,signature-differs
3093 class nsdatetime(datetime):
3094 """Datetime that carries nanoseconds.
3096 Arithmetic not supported, will lose nanoseconds.
3098 .. versionadded:: 4.0
3100 __slots__ = ("nanosecond",)
3101 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3103 def __new__(cls, year, month, day, hour=0, minute=0, second=0,
3104 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3105 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3106 if mod == 0:
3107 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3108 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3109 self.nanosecond = nanosecond
3110 return self
3112 def isoformat(self, sep="T", timespec="auto"):
3113 """Formats with nanosecond precision by default.
3115 if timespec == "auto":
3116 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3117 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3118 return super().isoformat(sep, timespec)
3120 def astimezone(self, tz=None):
3121 """Convert to new timezone.
3123 tmp = super().astimezone(tz)
3124 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3125 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3127 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3128 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3129 """Return new timestamp with specified fields replaced.
3131 return self.__class__(
3132 self.year if year is None else year,
3133 self.month if month is None else month,
3134 self.day if day is None else day,
3135 self.hour if hour is None else hour,
3136 self.minute if minute is None else minute,
3137 self.second if second is None else second,
3138 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3139 if nanosecond is None else nanosecond),
3140 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3141 fold=self.fold if fold is None else fold)
3143 def __hash__(self):
3144 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3146 def __eq__(self, other):
3147 return super().__eq__(other) and self.nanosecond == (
3148 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3150 def __gt__(self, other):
3151 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3152 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3154 def __lt__(self, other):
3155 return not (self > other or self == other)
3157 def __ge__(self, other):
3158 return not self < other
3160 def __le__(self, other):
3161 return not self > other
3163 def __ne__(self, other):
3164 return not self == other
3167 def to_nsdatetime(dt, nsec):
3168 """Apply nanoseconds to datetime.
3170 if not nsec:
3171 return dt
3172 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3173 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3176 def to_nsecs(dt):
3177 """Convert datatime instance to nanoseconds.
3179 secs = int(dt.timestamp())
3180 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3181 return secs * 1000000000 + nsecs
3184 def custom_popen(cmd):
3185 """Disconnect cmd from parent fds, read only from stdout.
3187 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3188 try:
3189 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3190 creationflags=creationflags)
3191 except OSError as ex:
3192 if ex.errno == errno.ENOENT:
3193 raise RarCannotExec("Unrar not installed?") from None
3194 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3195 raise RarCannotExec("Cannot execute unrar") from None
3196 raise
3197 return p
3200 def check_returncode(code, out, errmap):
3201 """Raise exception according to unrar exit code.
3203 if code == 0:
3204 return
3206 if code > 0 and code < len(errmap):
3207 exc = errmap[code]
3208 elif code == 255:
3209 exc = RarUserBreak
3210 elif code < 0:
3211 exc = RarSignalExit
3212 else:
3213 exc = RarUnknownError
3215 # format message
3216 if out:
3217 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3218 else:
3219 msg = "%s [%d]" % (exc.__doc__, code)
3221 raise exc(msg)
3224 def membuf_tempfile(memfile):
3225 """Write in-memory file object to real file.
3227 memfile.seek(0, 0)
3229 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3230 tmpf = os.fdopen(tmpfd, "wb")
3232 try:
3233 shutil.copyfileobj(memfile, tmpf, BSIZE)
3234 tmpf.close()
3235 except BaseException:
3236 tmpf.close()
3237 os.unlink(tmpname)
3238 raise
3239 return tmpname
3243 # Find working command-line tool
3246 class ToolSetup:
3247 def __init__(self, setup):
3248 self.setup = setup
3250 def check(self):
3251 cmdline = self.get_cmdline("check_cmd", None)
3252 try:
3253 p = custom_popen(cmdline)
3254 out, _ = p.communicate()
3255 return p.returncode == 0
3256 except RarCannotExec:
3257 return False
3259 def open_cmdline(self, pwd, rarfn, filefn=None):
3260 cmdline = self.get_cmdline("open_cmd", pwd)
3261 cmdline.append(rarfn)
3262 if filefn:
3263 self.add_file_arg(cmdline, filefn)
3264 return cmdline
3266 def get_errmap(self):
3267 return self.setup["errmap"]
3269 def get_cmdline(self, key, pwd, nodash=False):
3270 cmdline = list(self.setup[key])
3271 cmdline[0] = globals()[cmdline[0]]
3272 self.add_password_arg(cmdline, pwd)
3273 if not nodash:
3274 cmdline.append("--")
3275 return cmdline
3277 def add_file_arg(self, cmdline, filename):
3278 cmdline.append(filename)
3280 def add_password_arg(self, cmdline, pwd):
3281 """Append password switch to commandline.
3283 if pwd is not None:
3284 if not isinstance(pwd, str):
3285 pwd = pwd.decode("utf8")
3286 args = self.setup["password"]
3287 if isinstance(args, str):
3288 cmdline.append(args + pwd)
3289 else:
3290 cmdline.extend(args)
3291 cmdline.append(pwd)
3292 else:
3293 cmdline.extend(self.setup["no_password"])
3296 UNRAR_CONFIG = {
3297 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3298 "check_cmd": ("UNRAR_TOOL", "-inul"),
3299 "password": "-p",
3300 "no_password": ("-p-",),
3301 # map return code to exception class, codes from rar.txt
3302 "errmap": [None,
3303 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3304 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3305 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3308 # Problems with unar RAR backend:
3309 # - Does not support RAR2 locked files [fails to read]
3310 # - Does not support RAR5 Blake2sp hash [reading works]
3311 UNAR_CONFIG = {
3312 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3313 "check_cmd": ("UNAR_TOOL", "-version"),
3314 "password": ("-p",),
3315 "no_password": ("-p", ""),
3316 "errmap": [None],
3319 # Problems with libarchive RAR backend:
3320 # - Does not support solid archives.
3321 # - Does not support password-protected archives.
3322 # - Does not support RARVM-based compression filters.
3323 BSDTAR_CONFIG = {
3324 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3325 "check_cmd": ("BSDTAR_TOOL", "--version"),
3326 "password": None,
3327 "no_password": (),
3328 "errmap": [None],
3331 CURRENT_SETUP = None
3334 def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
3335 """Pick a tool, return cached ToolSetup.
3337 global CURRENT_SETUP
3338 if force:
3339 CURRENT_SETUP = None
3340 if CURRENT_SETUP is not None:
3341 return CURRENT_SETUP
3342 lst = []
3343 if unrar:
3344 lst.append(UNRAR_CONFIG)
3345 if unar:
3346 lst.append(UNAR_CONFIG)
3347 if bsdtar:
3348 lst.append(BSDTAR_CONFIG)
3350 for conf in lst:
3351 setup = ToolSetup(conf)
3352 if setup.check():
3353 CURRENT_SETUP = setup
3354 break
3355 if CURRENT_SETUP is None:
3356 raise RarCannotExec("Cannot find working tool")
3357 return CURRENT_SETUP
3360 def main(args):
3361 """Minimal command-line interface for rarfile module.
3363 import argparse
3364 p = argparse.ArgumentParser(description=main.__doc__)
3365 g = p.add_mutually_exclusive_group(required=True)
3366 g.add_argument("-l", "--list", metavar="<rarfile>",
3367 help="Show archive listing")
3368 g.add_argument("-e", "--extract", nargs=2,
3369 metavar=("<rarfile>", "<output_dir>"),
3370 help="Extract archive into target dir")
3371 g.add_argument("-t", "--test", metavar="<rarfile>",
3372 help="Test if a archive is valid")
3373 cmd = p.parse_args(args)
3375 if cmd.list:
3376 with RarFile(cmd.list) as rf:
3377 rf.printdir()
3378 elif cmd.test:
3379 with RarFile(cmd.test) as rf:
3380 rf.testrar()
3381 elif cmd.extract:
3382 with RarFile(cmd.extract[0]) as rf:
3383 rf.extractall(cmd.extract[1])
3386 if __name__ == "__main__":
3387 main(sys.argv[1:])