v4.0
[rarfile.git] / rarfile.py
blob58a59abb68be6c839caa6502752899f35cec5c62
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 """RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
50 """
52 import errno
53 import io
54 import os
55 import re
56 import shutil
57 import struct
58 import sys
59 import warnings
60 from binascii import crc32, hexlify
61 from datetime import datetime, timezone
62 from hashlib import blake2s, pbkdf2_hmac, sha1
63 from pathlib import Path
64 from struct import Struct, pack, unpack
65 from subprocess import DEVNULL, PIPE, STDOUT, Popen
66 from tempfile import mkstemp
68 # only needed for encrypted headers
69 try:
70 try:
71 from cryptography.hazmat.backends import default_backend
72 from cryptography.hazmat.primitives.ciphers import (
73 Cipher, algorithms, modes,
75 _have_crypto = 1
76 except ImportError:
77 from Crypto.Cipher import AES
78 _have_crypto = 2
79 except ImportError:
80 _have_crypto = 0
83 class AES_CBC_Decrypt:
84 """Decrypt API"""
85 def __init__(self, key, iv):
86 if _have_crypto == 2:
87 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
88 else:
89 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
90 self.decrypt = ciph.decryptor().update
93 __version__ = "4.0"
95 # export only interesting items
96 __all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
99 ## Module configuration. Can be tuned after importing.
102 #: executable for unrar tool
103 UNRAR_TOOL = "unrar"
105 #: executable for unar tool
106 UNAR_TOOL = "unar"
108 #: executable for bsdtar tool
109 BSDTAR_TOOL = "bsdtar"
111 #: default fallback charset
112 DEFAULT_CHARSET = "windows-1252"
114 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
115 TRY_ENCODINGS = ("utf8", "utf-16le")
117 #: whether to speed up decompression by using tmp archive
118 USE_EXTRACT_HACK = 1
120 #: limit the filesize for tmp archive usage
121 HACK_SIZE_LIMIT = 20 * 1024 * 1024
123 #: set specific directory for mkstemp() used by hack dir usage
124 HACK_TMP_DIR = None
126 #: Separator for path name components. Always "/".
127 PATH_SEP = "/"
130 ## rar constants
133 # block types
134 RAR_BLOCK_MARK = 0x72 # r
135 RAR_BLOCK_MAIN = 0x73 # s
136 RAR_BLOCK_FILE = 0x74 # t
137 RAR_BLOCK_OLD_COMMENT = 0x75 # u
138 RAR_BLOCK_OLD_EXTRA = 0x76 # v
139 RAR_BLOCK_OLD_SUB = 0x77 # w
140 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
141 RAR_BLOCK_OLD_AUTH = 0x79 # y
142 RAR_BLOCK_SUB = 0x7a # z
143 RAR_BLOCK_ENDARC = 0x7b # {
145 # flags for RAR_BLOCK_MAIN
146 RAR_MAIN_VOLUME = 0x0001
147 RAR_MAIN_COMMENT = 0x0002
148 RAR_MAIN_LOCK = 0x0004
149 RAR_MAIN_SOLID = 0x0008
150 RAR_MAIN_NEWNUMBERING = 0x0010
151 RAR_MAIN_AUTH = 0x0020
152 RAR_MAIN_RECOVERY = 0x0040
153 RAR_MAIN_PASSWORD = 0x0080
154 RAR_MAIN_FIRSTVOLUME = 0x0100
155 RAR_MAIN_ENCRYPTVER = 0x0200
157 # flags for RAR_BLOCK_FILE
158 RAR_FILE_SPLIT_BEFORE = 0x0001
159 RAR_FILE_SPLIT_AFTER = 0x0002
160 RAR_FILE_PASSWORD = 0x0004
161 RAR_FILE_COMMENT = 0x0008
162 RAR_FILE_SOLID = 0x0010
163 RAR_FILE_DICTMASK = 0x00e0
164 RAR_FILE_DICT64 = 0x0000
165 RAR_FILE_DICT128 = 0x0020
166 RAR_FILE_DICT256 = 0x0040
167 RAR_FILE_DICT512 = 0x0060
168 RAR_FILE_DICT1024 = 0x0080
169 RAR_FILE_DICT2048 = 0x00a0
170 RAR_FILE_DICT4096 = 0x00c0
171 RAR_FILE_DIRECTORY = 0x00e0
172 RAR_FILE_LARGE = 0x0100
173 RAR_FILE_UNICODE = 0x0200
174 RAR_FILE_SALT = 0x0400
175 RAR_FILE_VERSION = 0x0800
176 RAR_FILE_EXTTIME = 0x1000
177 RAR_FILE_EXTFLAGS = 0x2000
179 # flags for RAR_BLOCK_ENDARC
180 RAR_ENDARC_NEXT_VOLUME = 0x0001
181 RAR_ENDARC_DATACRC = 0x0002
182 RAR_ENDARC_REVSPACE = 0x0004
183 RAR_ENDARC_VOLNR = 0x0008
185 # flags common to all blocks
186 RAR_SKIP_IF_UNKNOWN = 0x4000
187 RAR_LONG_BLOCK = 0x8000
189 # Host OS types
190 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
191 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
192 RAR_OS_WIN32 = 2 #: Windows
193 RAR_OS_UNIX = 3 #: UNIX
194 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
195 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
197 # Compression methods - "0".."5"
198 RAR_M0 = 0x30 #: No compression.
199 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
200 RAR_M2 = 0x32 #: Compression level `-m2`.
201 RAR_M3 = 0x33 #: Compression level `-m3`.
202 RAR_M4 = 0x34 #: Compression level `-m4`.
203 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
206 # RAR5 constants
209 RAR5_BLOCK_MAIN = 1
210 RAR5_BLOCK_FILE = 2
211 RAR5_BLOCK_SERVICE = 3
212 RAR5_BLOCK_ENCRYPTION = 4
213 RAR5_BLOCK_ENDARC = 5
215 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
216 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
217 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
218 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
219 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
220 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
221 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
223 RAR5_MAIN_FLAG_ISVOL = 0x01
224 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
225 RAR5_MAIN_FLAG_SOLID = 0x04
226 RAR5_MAIN_FLAG_RECOVERY = 0x08
227 RAR5_MAIN_FLAG_LOCKED = 0x10
229 RAR5_FILE_FLAG_ISDIR = 0x01
230 RAR5_FILE_FLAG_HAS_MTIME = 0x02
231 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
232 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
234 RAR5_COMPR_SOLID = 0x40
236 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
238 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
240 RAR5_XFILE_ENCRYPTION = 1
241 RAR5_XFILE_HASH = 2
242 RAR5_XFILE_TIME = 3
243 RAR5_XFILE_VERSION = 4
244 RAR5_XFILE_REDIR = 5
245 RAR5_XFILE_OWNER = 6
246 RAR5_XFILE_SERVICE = 7
248 RAR5_XTIME_UNIXTIME = 0x01
249 RAR5_XTIME_HAS_MTIME = 0x02
250 RAR5_XTIME_HAS_CTIME = 0x04
251 RAR5_XTIME_HAS_ATIME = 0x08
252 RAR5_XTIME_UNIXTIME_NS = 0x10
254 RAR5_XENC_CIPHER_AES256 = 0
256 RAR5_XENC_CHECKVAL = 0x01
257 RAR5_XENC_TWEAKED = 0x02
259 RAR5_XHASH_BLAKE2SP = 0
261 RAR5_XREDIR_UNIX_SYMLINK = 1
262 RAR5_XREDIR_WINDOWS_SYMLINK = 2
263 RAR5_XREDIR_WINDOWS_JUNCTION = 3
264 RAR5_XREDIR_HARD_LINK = 4
265 RAR5_XREDIR_FILE_COPY = 5
267 RAR5_XREDIR_ISDIR = 0x01
269 RAR5_XOWNER_UNAME = 0x01
270 RAR5_XOWNER_GNAME = 0x02
271 RAR5_XOWNER_UID = 0x04
272 RAR5_XOWNER_GID = 0x08
274 RAR5_OS_WINDOWS = 0
275 RAR5_OS_UNIX = 1
277 DOS_MODE_ARCHIVE = 0x20
278 DOS_MODE_DIR = 0x10
279 DOS_MODE_SYSTEM = 0x04
280 DOS_MODE_HIDDEN = 0x02
281 DOS_MODE_READONLY = 0x01
284 ## internal constants
287 RAR_ID = b"Rar!\x1a\x07\x00"
288 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
290 WIN32 = sys.platform == "win32"
291 BSIZE = 512 * 1024 if WIN32 else 64 * 1024
293 SFX_MAX_SIZE = 2 * 1024 * 1024
294 RAR_V3 = 3
295 RAR_V5 = 5
297 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
298 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
299 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
302 def _get_rar_version(xfile):
303 """Check quickly whether file is rar archive.
305 with XFile(xfile) as fd:
306 buf = fd.read(len(RAR5_ID))
307 if buf.startswith(RAR_ID):
308 return RAR_V3
309 elif buf.startswith(RAR5_ID):
310 return RAR_V5
311 return 0
314 def _find_sfx_header(xfile):
315 sig = RAR_ID[:-1]
316 buf = io.BytesIO()
317 steps = (64, SFX_MAX_SIZE)
319 with XFile(xfile) as fd:
320 for step in steps:
321 data = fd.read(step)
322 if not data:
323 break
324 buf.write(data)
325 curdata = buf.getvalue()
326 findpos = 0
327 while True:
328 pos = curdata.find(sig, findpos)
329 if pos < 0:
330 break
331 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
332 return RAR_V3, pos
333 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
334 return RAR_V5, pos
335 findpos = pos + len(sig)
336 return 0, 0
340 ## Public interface
343 def is_rarfile(xfile):
344 """Check quickly whether file is rar archive.
346 return _get_rar_version(xfile) > 0
349 def is_rarfile_sfx(xfile):
350 """Check whether file is rar archive with support for SFX.
352 It will read 2M from file.
354 return _find_sfx_header(xfile)[0] > 0
357 class Error(Exception):
358 """Base class for rarfile errors."""
361 class BadRarFile(Error):
362 """Incorrect data in archive."""
365 class NotRarFile(Error):
366 """The file is not RAR archive."""
369 class BadRarName(Error):
370 """Cannot guess multipart name components."""
373 class NoRarEntry(Error):
374 """File not found in RAR"""
377 class PasswordRequired(Error):
378 """File requires password"""
381 class NeedFirstVolume(Error):
382 """Need to start from first volume.
384 Attributes:
386 current_volume
387 Volume number of current file or None if not known
389 def __init__(self, msg, volume):
390 super().__init__(msg)
391 self.current_volume = volume
394 class NoCrypto(Error):
395 """Cannot parse encrypted headers - no crypto available."""
398 class RarExecError(Error):
399 """Problem reported by unrar/rar."""
402 class RarWarning(RarExecError):
403 """Non-fatal error"""
406 class RarFatalError(RarExecError):
407 """Fatal error"""
410 class RarCRCError(RarExecError):
411 """CRC error during unpacking"""
414 class RarLockedArchiveError(RarExecError):
415 """Must not modify locked archive"""
418 class RarWriteError(RarExecError):
419 """Write error"""
422 class RarOpenError(RarExecError):
423 """Open error"""
426 class RarUserError(RarExecError):
427 """User error"""
430 class RarMemoryError(RarExecError):
431 """Memory error"""
434 class RarCreateError(RarExecError):
435 """Create error"""
438 class RarNoFilesError(RarExecError):
439 """No files that match pattern were found"""
442 class RarUserBreak(RarExecError):
443 """User stop"""
446 class RarWrongPassword(RarExecError):
447 """Incorrect password"""
450 class RarUnknownError(RarExecError):
451 """Unknown exit code"""
454 class RarSignalExit(RarExecError):
455 """Unrar exited with signal"""
458 class RarCannotExec(RarExecError):
459 """Executable not found."""
462 class UnsupportedWarning(UserWarning):
463 """Archive uses feature that are unsupported by rarfile.
465 .. versionadded:: 4.0
469 class RarInfo:
470 r"""An entry in rar archive.
472 Timestamps as :class:`~datetime.datetime` are without timezone in RAR3,
473 with UTC timezone in RAR5 archives.
475 Attributes:
477 filename
478 File name with relative path.
479 Path separator is "/". Always unicode string.
481 date_time
482 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
483 RAR5 allows archives where it is missing, it's None then.
485 comment
486 Optional file comment field. Unicode string. (RAR3-only)
488 file_size
489 Uncompressed size.
491 compress_size
492 Compressed size.
494 compress_type
495 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
497 extract_version
498 Minimal Rar version needed for decompressing. As (major*10 + minor),
499 so 2.9 is 29.
501 RAR3: 10, 20, 29
503 RAR5 does not have such field in archive, it's simply set to 50.
505 host_os
506 Host OS type, one of RAR_OS_* constants.
508 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
509 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
511 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
513 mode
514 File attributes. May be either dos-style or unix-style, depending on host_os.
516 mtime
517 File modification time. Same value as :attr:`date_time`
518 but as :class:`~datetime.datetime` object with extended precision.
520 ctime
521 Optional time field: creation time. As :class:`~datetime.datetime` object.
523 atime
524 Optional time field: last access time. As :class:`~datetime.datetime` object.
526 arctime
527 Optional time field: archival time. As :class:`~datetime.datetime` object.
528 (RAR3-only)
531 CRC-32 of uncompressed file, unsigned int.
533 RAR5: may be None.
535 blake2sp_hash
536 Blake2SP hash over decompressed data. (RAR5-only)
538 volume
539 Volume nr, starting from 0.
541 volume_file
542 Volume file name, where file starts.
544 file_redir
545 If not None, file is link of some sort. Contains tuple of (type, flags, target).
546 (RAR5-only)
548 Type is one of constants:
550 :data:`RAR5_XREDIR_UNIX_SYMLINK`
551 Unix symlink.
552 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
553 Windows symlink.
554 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
555 Windows junction.
556 :data:`RAR5_XREDIR_HARD_LINK`
557 Hard link to target.
558 :data:`RAR5_XREDIR_FILE_COPY`
559 Current file is copy of another archive entry.
561 Flags may contain bits:
563 :data:`RAR5_XREDIR_ISDIR`
564 Symlink points to directory.
567 # zipfile-compatible fields
568 filename = None
569 file_size = None
570 compress_size = None
571 date_time = None
572 CRC = None
573 volume = None
574 orig_filename = None
576 # optional extended time fields, datetime() objects.
577 mtime = None
578 ctime = None
579 atime = None
581 extract_version = None
582 mode = None
583 host_os = None
584 compress_type = None
586 # rar3-only fields
587 comment = None
588 arctime = None
590 # rar5-only fields
591 blake2sp_hash = None
592 file_redir = None
594 # internal fields
595 flags = 0
596 type = None
598 # zipfile compat
599 def is_dir(self):
600 """Returns True if entry is a directory.
602 .. versionadded:: 4.0
604 return False
606 def is_symlink(self):
607 """Returns True if entry is a symlink.
609 .. versionadded:: 4.0
611 return False
613 def is_file(self):
614 """Returns True if entry is a normal file.
616 .. versionadded:: 4.0
618 return False
620 def needs_password(self):
621 """Returns True if data is stored password-protected.
623 if self.type == RAR_BLOCK_FILE:
624 return (self.flags & RAR_FILE_PASSWORD) > 0
625 return False
627 def isdir(self):
628 """Returns True if entry is a directory.
630 .. deprecated:: 4.0
632 return self.is_dir()
635 class RarFile:
636 """Parse RAR structure, provide access to files in archive.
639 #: File name, if available. Unicode string or None.
640 filename = None
642 #: Archive comment. Unicode string or None.
643 comment = None
645 def __init__(self, file, mode="r", charset=None, info_callback=None,
646 crc_check=True, errors="stop"):
647 """Open and parse a RAR archive.
649 Parameters:
651 file
652 archive file name or file-like object.
653 mode
654 only "r" is supported.
655 charset
656 fallback charset to use, if filenames are not already Unicode-enabled.
657 info_callback
658 debug callback, gets to see all archive entries.
659 crc_check
660 set to False to disable CRC checks
661 errors
662 Either "stop" to quietly stop parsing on errors,
663 or "strict" to raise errors. Default is "stop".
665 if is_filelike(file):
666 self.filename = getattr(file, "name", None)
667 else:
668 if isinstance(file, Path):
669 file = str(file)
670 self.filename = file
671 self._rarfile = file
673 self._charset = charset or DEFAULT_CHARSET
674 self._info_callback = info_callback
675 self._crc_check = crc_check
676 self._password = None
677 self._file_parser = None
679 if errors == "stop":
680 self._strict = False
681 elif errors == "strict":
682 self._strict = True
683 else:
684 raise ValueError("Invalid value for errors= parameter.")
686 if mode != "r":
687 raise NotImplementedError("RarFile supports only mode=r")
689 self._parse()
691 def __enter__(self):
692 """Open context."""
693 return self
695 def __exit__(self, typ, value, traceback):
696 """Exit context."""
697 self.close()
699 def __iter__(self):
700 """Iterate over members."""
701 return iter(self.infolist())
703 def setpassword(self, pwd):
704 """Sets the password to use when extracting.
706 self._password = pwd
707 if self._file_parser:
708 if self._file_parser.has_header_encryption():
709 self._file_parser = None
710 if not self._file_parser:
711 self._parse()
712 else:
713 self._file_parser.setpassword(self._password)
715 def needs_password(self):
716 """Returns True if any archive entries require password for extraction.
718 return self._file_parser.needs_password()
720 def namelist(self):
721 """Return list of filenames in archive.
723 return [f.filename for f in self.infolist()]
725 def infolist(self):
726 """Return RarInfo objects for all files/directories in archive.
728 return self._file_parser.infolist()
730 def volumelist(self):
731 """Returns filenames of archive volumes.
733 In case of single-volume archive, the list contains
734 just the name of main archive file.
736 return self._file_parser.volumelist()
738 def getinfo(self, name):
739 """Return RarInfo for file.
741 return self._file_parser.getinfo(name)
743 def open(self, name, mode="r", pwd=None):
744 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
746 The object implements :class:`io.RawIOBase` interface, so it can
747 be further wrapped with :class:`io.BufferedReader`
748 and :class:`io.TextIOWrapper`.
750 On older Python where io module is not available, it implements
751 only .read(), .seek(), .tell() and .close() methods.
753 The object is seekable, although the seeking is fast only on
754 uncompressed files, on compressed files the seeking is implemented
755 by reading ahead and/or restarting the decompression.
757 Parameters:
759 name
760 file name or RarInfo instance.
761 mode
762 must be "r"
764 password to use for extracting.
767 if mode != "r":
768 raise NotImplementedError("RarFile.open() supports only mode=r")
770 # entry lookup
771 inf = self.getinfo(name)
772 if inf.is_dir():
773 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
775 # check password
776 if inf.needs_password():
777 pwd = pwd or self._password
778 if pwd is None:
779 raise PasswordRequired("File %s requires password" % inf.filename)
780 else:
781 pwd = None
783 return self._file_parser.open(inf, pwd)
785 def read(self, name, pwd=None):
786 """Return uncompressed data for archive entry.
788 For longer files using :meth:`~RarFile.open` may be better idea.
790 Parameters:
792 name
793 filename or RarInfo instance
795 password to use for extracting.
798 with self.open(name, "r", pwd) as f:
799 return f.read()
801 def close(self):
802 """Release open resources."""
803 pass
805 def printdir(self, file=None):
806 """Print archive file list to stdout or given file.
808 if file is None:
809 file = sys.stdout
810 for f in self.infolist():
811 print(f.filename, file=file)
813 def extract(self, member, path=None, pwd=None):
814 """Extract single file into current directory.
816 Parameters:
818 member
819 filename or :class:`RarInfo` instance
820 path
821 optional destination path
823 optional password to use
825 inf = self.getinfo(member)
826 return self._extract_one(inf, path, pwd, True)
828 def extractall(self, path=None, members=None, pwd=None):
829 """Extract all files into current directory.
831 Parameters:
833 path
834 optional destination path
835 members
836 optional filename or :class:`RarInfo` instance list to extract
838 optional password to use
840 if members is None:
841 members = self.namelist()
843 done = set()
844 dirs = []
845 for m in members:
846 inf = self.getinfo(m)
847 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
848 if inf.is_dir():
849 if dst not in done:
850 dirs.append((dst, inf))
851 done.add(dst)
852 if dirs:
853 dirs.sort(reverse=True)
854 for dst, inf in dirs:
855 self._set_attrs(inf, dst)
857 def testrar(self, pwd=None):
858 """Read all files and test CRC.
860 for member in self.infolist():
861 if member.is_file():
862 with self.open(member, 'r', pwd) as f:
863 empty_read(f, member.file_size, BSIZE)
865 def strerror(self):
866 """Return error string if parsing failed or None if no problems.
868 if not self._file_parser:
869 return "Not a RAR file"
870 return self._file_parser.strerror()
873 ## private methods
876 def _parse(self):
877 """Run parser for file type
879 ver, sfx_ofs = _find_sfx_header(self._rarfile)
880 if ver == RAR_V3:
881 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
882 self._charset, self._strict, self._info_callback,
883 sfx_ofs)
884 self._file_parser = p3 # noqa
885 elif ver == RAR_V5:
886 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
887 self._charset, self._strict, self._info_callback,
888 sfx_ofs)
889 self._file_parser = p5 # noqa
890 else:
891 raise NotRarFile("Not a RAR file")
893 self._file_parser.parse()
894 self.comment = self._file_parser.comment
896 def _extract_one(self, info, path, pwd, set_attrs):
897 fname = sanitize_filename(
898 info.filename, os.path.sep, WIN32
901 if path is None:
902 path = os.getcwd()
903 else:
904 path = os.fspath(path)
905 dstfn = os.path.join(path, fname)
907 dirname = os.path.dirname(dstfn)
908 if dirname and dirname != ".":
909 os.makedirs(dirname, exist_ok=True)
911 if info.is_file():
912 return self._make_file(info, dstfn, pwd, set_attrs)
913 if info.is_dir():
914 return self._make_dir(info, dstfn, pwd, set_attrs)
915 if info.is_symlink():
916 return self._make_symlink(info, dstfn, pwd, set_attrs)
917 return None
919 def _create_helper(self, name, flags, info):
920 return os.open(name, flags)
922 def _make_file(self, info, dstfn, pwd, set_attrs):
923 def helper(name, flags):
924 return self._create_helper(name, flags, info)
925 with self.open(info, "r", pwd) as src:
926 with open(dstfn, "wb", opener=helper) as dst:
927 shutil.copyfileobj(src, dst)
928 if set_attrs:
929 self._set_attrs(info, dstfn)
930 return dstfn
932 def _make_dir(self, info, dstfn, pwd, set_attrs):
933 os.makedirs(dstfn, exist_ok=True)
934 if set_attrs:
935 self._set_attrs(info, dstfn)
936 return dstfn
938 def _make_symlink(self, info, dstfn, pwd, set_attrs):
939 target_is_directory = False
940 if info.host_os == RAR_OS_UNIX:
941 link_name = self.read(info, pwd)
942 target_is_directory = (info.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
943 elif info.file_redir:
944 redir_type, redir_flags, link_name = info.file_redir
945 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
946 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
947 return None
948 target_is_directory = (redir_type & RAR5_XREDIR_ISDIR) > 0
949 else:
950 warnings.warn(f"Unsupported link type - {info.filename}", UnsupportedWarning)
951 return None
953 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
954 return dstfn
956 def _set_attrs(self, info, dstfn):
957 if info.host_os == RAR_OS_UNIX:
958 os.chmod(dstfn, info.mode & 0o777)
959 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
960 # only keep R/O attr, except for dirs on win32
961 if info.mode & DOS_MODE_READONLY and (info.is_file() or not WIN32):
962 st = os.stat(dstfn)
963 new_mode = st.st_mode & ~0o222
964 os.chmod(dstfn, new_mode)
966 if info.mtime:
967 mtime_ns = to_nsecs(info.mtime)
968 atime_ns = to_nsecs(info.atime) if info.atime else mtime_ns
969 os.utime(dstfn, ns=(atime_ns, mtime_ns))
973 # File format parsing
976 class CommonParser:
977 """Shared parser parts."""
978 _main = None
979 _hdrenc_main = None
980 _needs_password = False
981 _fd = None
982 _expect_sig = None
983 _parse_error = None
984 _password = None
985 comment = None
987 def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset):
988 self._rarfile = rarfile
989 self._password = password
990 self._crc_check = crc_check
991 self._charset = charset
992 self._strict = strict
993 self._info_callback = info_cb
994 self._info_list = []
995 self._info_map = {}
996 self._vol_list = []
997 self._sfx_offset = sfx_offset
999 def has_header_encryption(self):
1000 """Returns True if headers are encrypted
1002 if self._hdrenc_main:
1003 return True
1004 if self._main:
1005 if self._main.flags & RAR_MAIN_PASSWORD:
1006 return True
1007 return False
1009 def setpassword(self, pwd):
1010 """Set cached password."""
1011 self._password = pwd
1013 def volumelist(self):
1014 """Volume files"""
1015 return self._vol_list
1017 def needs_password(self):
1018 """Is password required"""
1019 return self._needs_password
1021 def strerror(self):
1022 """Last error"""
1023 return self._parse_error
1025 def infolist(self):
1026 """List of RarInfo records.
1028 return self._info_list
1030 def getinfo(self, member):
1031 """Return RarInfo for filename
1033 if isinstance(member, RarInfo):
1034 fname = member.filename
1035 elif isinstance(member, Path):
1036 fname = str(member)
1037 else:
1038 fname = member
1040 if fname.endswith("/"):
1041 fname = fname.rstrip("/")
1043 try:
1044 return self._info_map[fname]
1045 except KeyError:
1046 raise NoRarEntry("No such file: %s" % fname)
1048 def parse(self):
1049 """Process file."""
1050 self._fd = None
1051 try:
1052 self._parse_real()
1053 finally:
1054 if self._fd:
1055 self._fd.close()
1056 self._fd = None
1058 def _parse_real(self):
1059 """Actually read file.
1061 fd = XFile(self._rarfile)
1062 self._fd = fd
1063 fd.seek(self._sfx_offset, 0)
1064 sig = fd.read(len(self._expect_sig))
1065 if sig != self._expect_sig:
1066 raise NotRarFile("Not a Rar archive")
1068 volume = 0 # first vol (.rar) is 0
1069 more_vols = False
1070 endarc = False
1071 volfile = self._rarfile
1072 self._vol_list = [self._rarfile]
1073 raise_need_first_vol = False
1074 while True:
1075 if endarc:
1076 h = None # don"t read past ENDARC
1077 else:
1078 h = self._parse_header(fd)
1079 if not h:
1080 if raise_need_first_vol:
1081 # did not find ENDARC with VOLNR
1082 raise NeedFirstVolume("Need to start from first volume", None)
1083 if more_vols:
1084 volume += 1
1085 fd.close()
1086 try:
1087 volfile = self._next_volname(volfile)
1088 fd = XFile(volfile)
1089 except IOError:
1090 self._set_error("Cannot open next volume: %s", volfile)
1091 break
1092 self._fd = fd
1093 sig = fd.read(len(self._expect_sig))
1094 if sig != self._expect_sig:
1095 self._set_error("Invalid volume sig: %s", volfile)
1096 break
1097 more_vols = False
1098 endarc = False
1099 self._vol_list.append(volfile)
1100 self._main = None
1101 continue
1102 break
1103 h.volume = volume
1104 h.volume_file = volfile
1106 if h.type == RAR_BLOCK_MAIN and not self._main:
1107 self._main = h
1108 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING):
1109 # RAR 2.x does not set FIRSTVOLUME,
1110 # so check it only if NEWNUMBERING is used
1111 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1112 if getattr(h, "main_volume_number", None) is not None:
1113 # rar5 may have more info
1114 raise NeedFirstVolume(
1115 "Need to start from first volume (current: %r)"
1116 % (h.main_volume_number,),
1117 h.main_volume_number
1119 # delay raise until we have volnr from ENDARC
1120 raise_need_first_vol = True
1121 if h.flags & RAR_MAIN_PASSWORD:
1122 self._needs_password = True
1123 if not self._password:
1124 break
1125 elif h.type == RAR_BLOCK_ENDARC:
1126 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1127 endarc = True
1128 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1129 raise NeedFirstVolume(
1130 "Need to start from first volume (current: %r)"
1131 % (h.endarc_volnr,),
1132 h.endarc_volnr
1134 elif h.type == RAR_BLOCK_FILE:
1135 # RAR 2.x does not write RAR_BLOCK_ENDARC
1136 if h.flags & RAR_FILE_SPLIT_AFTER:
1137 more_vols = True
1138 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1139 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1140 raise_need_first_vol = True
1142 if h.needs_password():
1143 self._needs_password = True
1145 # store it
1146 self.process_entry(fd, h)
1148 if self._info_callback:
1149 self._info_callback(h)
1151 # go to next header
1152 if h.add_size > 0:
1153 fd.seek(h.data_offset + h.add_size, 0)
1155 def process_entry(self, fd, item):
1156 """Examine item, add into lookup cache."""
1157 raise NotImplementedError()
1159 def _decrypt_header(self, fd):
1160 raise NotImplementedError("_decrypt_header")
1162 def _parse_block_header(self, fd):
1163 raise NotImplementedError("_parse_block_header")
1165 def _open_hack(self, inf, pwd):
1166 raise NotImplementedError("_open_hack")
1168 def _parse_header(self, fd):
1169 """Read single header
1171 try:
1172 # handle encrypted headers
1173 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1174 if not self._password:
1175 return None
1176 fd = self._decrypt_header(fd)
1178 # now read actual header
1179 return self._parse_block_header(fd)
1180 except struct.error:
1181 self._set_error("Broken header in RAR file")
1182 return None
1184 def _next_volname(self, volfile):
1185 """Given current vol name, construct next one
1187 if is_filelike(volfile):
1188 raise IOError("Working on single FD")
1189 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1190 return _next_newvol(volfile)
1191 return _next_oldvol(volfile)
1193 def _set_error(self, msg, *args):
1194 if args:
1195 msg = msg % args
1196 self._parse_error = msg
1197 if self._strict:
1198 raise BadRarFile(msg)
1200 def open(self, inf, pwd):
1201 """Return stream object for file data."""
1203 if inf.file_redir:
1204 redir_type, redir_flags, redir_name = inf.file_redir
1205 # cannot leave to unrar as it expects copied file to exist
1206 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1207 inf = self.getinfo(redir_name)
1208 if not inf:
1209 raise BadRarFile("cannot find copied file")
1210 elif redir_type in (
1211 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1212 RAR5_XREDIR_WINDOWS_JUNCTION,
1214 return io.BytesIO(redir_name.encode("utf8"))
1215 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1216 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1218 # is temp write usable?
1219 use_hack = 1
1220 if not self._main:
1221 use_hack = 0
1222 elif self._main._must_disable_hack():
1223 use_hack = 0
1224 elif inf._must_disable_hack():
1225 use_hack = 0
1226 elif is_filelike(self._rarfile):
1227 pass
1228 elif inf.file_size > HACK_SIZE_LIMIT:
1229 use_hack = 0
1230 elif not USE_EXTRACT_HACK:
1231 use_hack = 0
1233 # now extract
1234 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1235 return self._open_clear(inf)
1236 elif use_hack:
1237 return self._open_hack(inf, pwd)
1238 elif is_filelike(self._rarfile):
1239 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1240 else:
1241 return self._open_unrar(self._rarfile, inf, pwd)
1243 def _open_clear(self, inf):
1244 return DirectReader(self, inf)
1246 def _open_hack_core(self, inf, pwd, prefix, suffix):
1248 size = inf.compress_size + inf.header_size
1249 rf = XFile(inf.volume_file, 0)
1250 rf.seek(inf.header_offset)
1252 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1253 tmpf = os.fdopen(tmpfd, "wb")
1255 try:
1256 tmpf.write(prefix)
1257 while size > 0:
1258 if size > BSIZE:
1259 buf = rf.read(BSIZE)
1260 else:
1261 buf = rf.read(size)
1262 if not buf:
1263 raise BadRarFile("read failed: " + inf.filename)
1264 tmpf.write(buf)
1265 size -= len(buf)
1266 tmpf.write(suffix)
1267 tmpf.close()
1268 rf.close()
1269 except BaseException:
1270 rf.close()
1271 tmpf.close()
1272 os.unlink(tmpname)
1273 raise
1275 return self._open_unrar(tmpname, inf, pwd, tmpname)
1277 def _open_unrar_membuf(self, memfile, inf, pwd):
1278 """Write in-memory archive to temp file, needed for solid archives.
1280 tmpname = membuf_tempfile(memfile)
1281 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1283 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1284 """Extract using unrar
1286 setup = tool_setup()
1288 # not giving filename avoids encoding related problems
1289 fn = None
1290 if not tmpfile or force_file:
1291 fn = inf.filename
1293 # read from unrar pipe
1294 cmd = setup.open_cmdline(pwd, rarfile, fn)
1295 return PipeReader(self, inf, cmd, tmpfile)
1299 # RAR3 format
1302 class Rar3Info(RarInfo):
1303 """RAR3 specific fields."""
1304 extract_version = 15
1305 salt = None
1306 add_size = 0
1307 header_crc = None
1308 header_size = None
1309 header_offset = None
1310 data_offset = None
1311 _md_class = None
1312 _md_expect = None
1313 _name_size = None
1315 # make sure some rar5 fields are always present
1316 file_redir = None
1317 blake2sp_hash = None
1319 endarc_datacrc = None
1320 endarc_volnr = None
1322 def _must_disable_hack(self):
1323 if self.type == RAR_BLOCK_FILE:
1324 if self.flags & RAR_FILE_PASSWORD:
1325 return True
1326 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1327 return True
1328 elif self.type == RAR_BLOCK_MAIN:
1329 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1330 return True
1331 return False
1333 def is_dir(self):
1334 """Returns True if entry is a directory."""
1335 if self.type == RAR_BLOCK_FILE and not self.is_symlink():
1336 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
1337 return False
1339 def is_symlink(self):
1340 """Returns True if entry is a symlink."""
1341 return (
1342 self.type == RAR_BLOCK_FILE and
1343 self.host_os == RAR_OS_UNIX and
1344 self.mode & 0xF000 == 0xA000
1347 def is_file(self):
1348 """Returns True if entry is a normal file."""
1349 return (
1350 self.type == RAR_BLOCK_FILE and
1351 not (self.is_dir() or self.is_symlink())
1355 class RAR3Parser(CommonParser):
1356 """Parse RAR3 file format.
1358 _expect_sig = RAR_ID
1359 _last_aes_key = (None, None, None) # (salt, key, iv)
1361 def _decrypt_header(self, fd):
1362 if not _have_crypto:
1363 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1364 salt = fd.read(8)
1365 if self._last_aes_key[0] == salt:
1366 key, iv = self._last_aes_key[1:]
1367 else:
1368 key, iv = rar3_s2k(self._password, salt)
1369 self._last_aes_key = (salt, key, iv)
1370 return HeaderDecrypt(fd, key, iv)
1372 def _parse_block_header(self, fd):
1373 """Parse common block header
1375 h = Rar3Info()
1376 h.header_offset = fd.tell()
1378 # read and parse base header
1379 buf = fd.read(S_BLK_HDR.size)
1380 if not buf:
1381 return None
1382 t = S_BLK_HDR.unpack_from(buf)
1383 h.header_crc, h.type, h.flags, h.header_size = t
1385 # read full header
1386 if h.header_size > S_BLK_HDR.size:
1387 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1388 else:
1389 hdata = buf
1390 h.data_offset = fd.tell()
1392 # unexpected EOF?
1393 if len(hdata) != h.header_size:
1394 self._set_error("Unexpected EOF when reading header")
1395 return None
1397 pos = S_BLK_HDR.size
1399 # block has data assiciated with it?
1400 if h.flags & RAR_LONG_BLOCK:
1401 h.add_size, pos = load_le32(hdata, pos)
1402 else:
1403 h.add_size = 0
1405 # parse interesting ones, decide header boundaries for crc
1406 if h.type == RAR_BLOCK_MARK:
1407 return h
1408 elif h.type == RAR_BLOCK_MAIN:
1409 pos += 6
1410 if h.flags & RAR_MAIN_ENCRYPTVER:
1411 pos += 1
1412 crc_pos = pos
1413 if h.flags & RAR_MAIN_COMMENT:
1414 self._parse_subblocks(h, hdata, pos)
1415 elif h.type == RAR_BLOCK_FILE:
1416 pos = self._parse_file_header(h, hdata, pos - 4)
1417 crc_pos = pos
1418 if h.flags & RAR_FILE_COMMENT:
1419 pos = self._parse_subblocks(h, hdata, pos)
1420 elif h.type == RAR_BLOCK_SUB:
1421 pos = self._parse_file_header(h, hdata, pos - 4)
1422 crc_pos = h.header_size
1423 elif h.type == RAR_BLOCK_OLD_AUTH:
1424 pos += 8
1425 crc_pos = pos
1426 elif h.type == RAR_BLOCK_OLD_EXTRA:
1427 pos += 7
1428 crc_pos = pos
1429 elif h.type == RAR_BLOCK_ENDARC:
1430 if h.flags & RAR_ENDARC_DATACRC:
1431 h.endarc_datacrc, pos = load_le32(hdata, pos)
1432 if h.flags & RAR_ENDARC_VOLNR:
1433 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1434 pos += 2
1435 crc_pos = h.header_size
1436 else:
1437 crc_pos = h.header_size
1439 # check crc
1440 if h.type == RAR_BLOCK_OLD_SUB:
1441 crcdat = hdata[2:] + fd.read(h.add_size)
1442 else:
1443 crcdat = hdata[2:crc_pos]
1445 calc_crc = crc32(crcdat) & 0xFFFF
1447 # return good header
1448 if h.header_crc == calc_crc:
1449 return h
1451 # header parsing failed.
1452 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1453 h.type, h.header_crc, calc_crc, len(crcdat))
1455 # instead panicing, send eof
1456 return None
1458 def _parse_file_header(self, h, hdata, pos):
1459 """Read file-specific header
1461 fld = S_FILE_HDR.unpack_from(hdata, pos)
1462 pos += S_FILE_HDR.size
1464 h.compress_size = fld[0]
1465 h.file_size = fld[1]
1466 h.host_os = fld[2]
1467 h.CRC = fld[3]
1468 h.date_time = parse_dos_time(fld[4])
1469 h.mtime = to_datetime(h.date_time)
1470 h.extract_version = fld[5]
1471 h.compress_type = fld[6]
1472 h._name_size = name_size = fld[7]
1473 h.mode = fld[8]
1475 h._md_class = CRC32Context
1476 h._md_expect = h.CRC
1478 if h.flags & RAR_FILE_LARGE:
1479 h1, pos = load_le32(hdata, pos)
1480 h2, pos = load_le32(hdata, pos)
1481 h.compress_size |= h1 << 32
1482 h.file_size |= h2 << 32
1483 h.add_size = h.compress_size
1485 name, pos = load_bytes(hdata, name_size, pos)
1486 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1487 # stored in custom encoding
1488 nul = name.find(b"\0")
1489 h.orig_filename = name[:nul]
1490 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1491 h.filename = u.decode()
1493 # if parsing failed fall back to simple name
1494 if u.failed:
1495 h.filename = self._decode(h.orig_filename)
1496 elif h.flags & RAR_FILE_UNICODE:
1497 # stored in UTF8
1498 h.orig_filename = name
1499 h.filename = name.decode("utf8", "replace")
1500 else:
1501 # stored in random encoding
1502 h.orig_filename = name
1503 h.filename = self._decode(name)
1505 # change separator, set dir suffix
1506 h.filename = h.filename.replace("\\", "/").rstrip("/")
1507 if h.is_dir():
1508 h.filename = h.filename + "/"
1510 if h.flags & RAR_FILE_SALT:
1511 h.salt, pos = load_bytes(hdata, 8, pos)
1512 else:
1513 h.salt = None
1515 # optional extended time stamps
1516 if h.flags & RAR_FILE_EXTTIME:
1517 pos = _parse_ext_time(h, hdata, pos)
1518 else:
1519 h.mtime = h.atime = h.ctime = h.arctime = None
1521 return pos
1523 def _parse_subblocks(self, h, hdata, pos):
1524 """Find old-style comment subblock
1526 while pos < len(hdata):
1527 # ordinary block header
1528 t = S_BLK_HDR.unpack_from(hdata, pos)
1529 ___scrc, stype, sflags, slen = t
1530 pos_next = pos + slen
1531 pos += S_BLK_HDR.size
1533 # corrupt header
1534 if pos_next < pos:
1535 break
1537 # followed by block-specific header
1538 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1539 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1540 pos += S_COMMENT_HDR.size
1541 data = hdata[pos: pos_next]
1542 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1543 crc, self._password)
1544 if not self._crc_check or (crc32(cmt) & 0xFFFF == crc):
1545 h.comment = self._decode_comment(cmt)
1547 pos = pos_next
1548 return pos
1550 def _read_comment_v3(self, inf, pwd=None):
1552 # read data
1553 with XFile(inf.volume_file) as rf:
1554 rf.seek(inf.data_offset)
1555 data = rf.read(inf.compress_size)
1557 # decompress
1558 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1559 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1561 # check crc
1562 if self._crc_check:
1563 crc = crc32(cmt)
1564 if crc != inf.CRC:
1565 return None
1567 return self._decode_comment(cmt)
1569 def _decode(self, val):
1570 for c in TRY_ENCODINGS:
1571 try:
1572 return val.decode(c)
1573 except UnicodeError:
1574 pass
1575 return val.decode(self._charset, "replace")
1577 def _decode_comment(self, val):
1578 return self._decode(val)
1580 def process_entry(self, fd, item):
1581 if item.type == RAR_BLOCK_FILE:
1582 # use only first part
1583 if item.flags & RAR_FILE_VERSION:
1584 pass # skip old versions
1585 elif (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1586 self._info_map[item.filename.rstrip("/")] = item
1587 self._info_list.append(item)
1588 elif len(self._info_list) > 0:
1589 # final crc is in last block
1590 old = self._info_list[-1]
1591 old.CRC = item.CRC
1592 old._md_expect = item._md_expect
1593 old.compress_size += item.compress_size
1595 # parse new-style comment
1596 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1597 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1598 pass
1599 elif item.flags & RAR_FILE_SOLID:
1600 # file comment
1601 cmt = self._read_comment_v3(item, self._password)
1602 if len(self._info_list) > 0:
1603 old = self._info_list[-1]
1604 old.comment = cmt
1605 else:
1606 # archive comment
1607 cmt = self._read_comment_v3(item, self._password)
1608 self.comment = cmt
1610 if item.type == RAR_BLOCK_MAIN:
1611 if item.flags & RAR_MAIN_COMMENT:
1612 self.comment = item.comment
1613 if item.flags & RAR_MAIN_PASSWORD:
1614 self._needs_password = True
1616 # put file compressed data into temporary .rar archive, and run
1617 # unrar on that, thus avoiding unrar going over whole archive
1618 def _open_hack(self, inf, pwd):
1619 # create main header: crc, type, flags, size, res1, res2
1620 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1621 return self._open_hack_core(inf, pwd, prefix, b"")
1625 # RAR5 format
1628 class Rar5Info(RarInfo):
1629 """Shared fields for RAR5 records.
1631 extract_version = 50
1632 header_crc = None
1633 header_size = None
1634 header_offset = None
1635 data_offset = None
1637 # type=all
1638 block_type = None
1639 block_flags = None
1640 add_size = 0
1641 block_extra_size = 0
1643 # type=MAIN
1644 volume_number = None
1645 _md_class = None
1646 _md_expect = None
1648 def _must_disable_hack(self):
1649 return False
1652 class Rar5BaseFile(Rar5Info):
1653 """Shared sturct for file & service record.
1655 type = -1
1656 file_flags = None
1657 file_encryption = (0, 0, 0, b"", b"", b"")
1658 file_compress_flags = None
1659 file_redir = None
1660 file_owner = None
1661 file_version = None
1662 blake2sp_hash = None
1664 def _must_disable_hack(self):
1665 if self.flags & RAR_FILE_PASSWORD:
1666 return True
1667 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1668 return True
1669 if self.file_compress_flags & RAR5_COMPR_SOLID:
1670 return True
1671 if self.file_redir:
1672 return True
1673 return False
1676 class Rar5FileInfo(Rar5BaseFile):
1677 """RAR5 file record.
1679 type = RAR_BLOCK_FILE
1681 def is_symlink(self):
1682 """Returns True if entry is a symlink."""
1683 # pylint: disable=unsubscriptable-object
1684 return (
1685 self.file_redir is not None and
1686 self.file_redir[0] in (
1687 RAR5_XREDIR_UNIX_SYMLINK,
1688 RAR5_XREDIR_WINDOWS_SYMLINK,
1689 RAR5_XREDIR_WINDOWS_JUNCTION,
1693 def is_file(self):
1694 """Returns True if entry is a normal file."""
1695 return not (self.is_dir() or self.is_symlink())
1697 def is_dir(self):
1698 """Returns True if entry is a directory."""
1699 if not self.file_redir:
1700 if self.file_flags & RAR5_FILE_FLAG_ISDIR:
1701 return True
1702 return False
1705 class Rar5ServiceInfo(Rar5BaseFile):
1706 """RAR5 service record.
1708 type = RAR_BLOCK_SUB
1711 class Rar5MainInfo(Rar5Info):
1712 """RAR5 archive main record.
1714 type = RAR_BLOCK_MAIN
1715 main_flags = None
1716 main_volume_number = None
1718 def _must_disable_hack(self):
1719 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1720 return True
1721 return False
1724 class Rar5EncryptionInfo(Rar5Info):
1725 """RAR5 archive header encryption record.
1727 type = RAR5_BLOCK_ENCRYPTION
1728 encryption_algo = None
1729 encryption_flags = None
1730 encryption_kdf_count = None
1731 encryption_salt = None
1732 encryption_check_value = None
1734 def needs_password(self):
1735 return True
1738 class Rar5EndArcInfo(Rar5Info):
1739 """RAR5 end of archive record.
1741 type = RAR_BLOCK_ENDARC
1742 endarc_flags = None
1745 class RAR5Parser(CommonParser):
1746 """Parse RAR5 format.
1748 _expect_sig = RAR5_ID
1749 _hdrenc_main = None
1751 # AES encrypted headers
1752 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1754 def _gen_key(self, kdf_count, salt):
1755 if self._last_aes256_key[:2] == (kdf_count, salt):
1756 return self._last_aes256_key[2]
1757 if kdf_count > 24:
1758 raise BadRarFile("Too large kdf_count")
1759 pwd = self._password
1760 if isinstance(pwd, str):
1761 pwd = pwd.encode("utf8")
1762 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1763 self._last_aes256_key = (kdf_count, salt, key)
1764 return key
1766 def _decrypt_header(self, fd):
1767 if not _have_crypto:
1768 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1769 h = self._hdrenc_main
1770 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1771 iv = fd.read(16)
1772 return HeaderDecrypt(fd, key, iv)
1774 def _parse_block_header(self, fd):
1775 """Parse common block header
1777 header_offset = fd.tell()
1779 preload = 4 + 3
1780 start_bytes = fd.read(preload)
1781 header_crc, pos = load_le32(start_bytes, 0)
1782 hdrlen, pos = load_vint(start_bytes, pos)
1783 if hdrlen > 2 * 1024 * 1024:
1784 return None
1785 header_size = pos + hdrlen
1787 # read full header, check for EOF
1788 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1789 if len(hdata) != header_size:
1790 self._set_error("Unexpected EOF when reading header")
1791 return None
1792 data_offset = fd.tell()
1794 calc_crc = crc32(memoryview(hdata)[4:])
1795 if header_crc != calc_crc:
1796 # header parsing failed.
1797 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1798 header_crc, calc_crc, len(hdata))
1799 return None
1801 block_type, pos = load_vint(hdata, pos)
1803 if block_type == RAR5_BLOCK_MAIN:
1804 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1805 h = self._parse_main_block(h, hdata, pos)
1806 elif block_type == RAR5_BLOCK_FILE:
1807 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1808 h = self._parse_file_block(h, hdata, pos)
1809 elif block_type == RAR5_BLOCK_SERVICE:
1810 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1811 h = self._parse_file_block(h, hdata, pos)
1812 elif block_type == RAR5_BLOCK_ENCRYPTION:
1813 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1814 h = self._parse_encryption_block(h, hdata, pos)
1815 elif block_type == RAR5_BLOCK_ENDARC:
1816 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1817 h = self._parse_endarc_block(h, hdata, pos)
1818 else:
1819 h = None
1820 if h:
1821 h.header_offset = header_offset
1822 h.data_offset = data_offset
1823 return h
1825 def _parse_block_common(self, h, hdata):
1826 h.header_crc, pos = load_le32(hdata, 0)
1827 hdrlen, pos = load_vint(hdata, pos)
1828 h.header_size = hdrlen + pos
1829 h.block_type, pos = load_vint(hdata, pos)
1830 h.block_flags, pos = load_vint(hdata, pos)
1832 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1833 h.block_extra_size, pos = load_vint(hdata, pos)
1834 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1835 h.add_size, pos = load_vint(hdata, pos)
1837 h.compress_size = h.add_size
1839 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1840 h.flags |= RAR_SKIP_IF_UNKNOWN
1841 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1842 h.flags |= RAR_LONG_BLOCK
1843 return h, pos
1845 def _parse_main_block(self, h, hdata, pos):
1846 h.main_flags, pos = load_vint(hdata, pos)
1847 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1848 h.main_volume_number, pos = load_vint(hdata, pos)
1850 h.flags |= RAR_MAIN_NEWNUMBERING
1851 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1852 h.flags |= RAR_MAIN_SOLID
1853 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1854 h.flags |= RAR_MAIN_VOLUME
1855 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1856 h.flags |= RAR_MAIN_RECOVERY
1857 if self._hdrenc_main:
1858 h.flags |= RAR_MAIN_PASSWORD
1859 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1860 h.flags |= RAR_MAIN_FIRSTVOLUME
1862 return h
1864 def _parse_file_block(self, h, hdata, pos):
1865 h.file_flags, pos = load_vint(hdata, pos)
1866 h.file_size, pos = load_vint(hdata, pos)
1867 h.mode, pos = load_vint(hdata, pos)
1869 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1870 h.mtime, pos = load_unixtime(hdata, pos)
1871 h.date_time = h.mtime.timetuple()[:6]
1872 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1873 h.CRC, pos = load_le32(hdata, pos)
1874 h._md_class = CRC32Context
1875 h._md_expect = h.CRC
1877 h.file_compress_flags, pos = load_vint(hdata, pos)
1878 h.file_host_os, pos = load_vint(hdata, pos)
1879 h.orig_filename, pos = load_vstr(hdata, pos)
1880 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1882 # use compatible values
1883 if h.file_host_os == RAR5_OS_WINDOWS:
1884 h.host_os = RAR_OS_WIN32
1885 else:
1886 h.host_os = RAR_OS_UNIX
1887 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1889 if h.block_extra_size:
1890 # allow 1 byte of garbage
1891 while pos < len(hdata) - 1:
1892 xsize, pos = load_vint(hdata, pos)
1893 xdata, pos = load_bytes(hdata, xsize, pos)
1894 self._process_file_extra(h, xdata)
1896 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1897 h.flags |= RAR_FILE_SPLIT_BEFORE
1898 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1899 h.flags |= RAR_FILE_SPLIT_AFTER
1900 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1901 h.flags |= RAR_FILE_DIRECTORY
1902 if h.file_compress_flags & RAR5_COMPR_SOLID:
1903 h.flags |= RAR_FILE_SOLID
1905 if h.is_dir():
1906 h.filename = h.filename + "/"
1907 return h
1909 def _parse_endarc_block(self, h, hdata, pos):
1910 h.endarc_flags, pos = load_vint(hdata, pos)
1911 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1912 h.flags |= RAR_ENDARC_NEXT_VOLUME
1913 return h
1915 def _parse_encryption_block(self, h, hdata, pos):
1916 h.encryption_algo, pos = load_vint(hdata, pos)
1917 h.encryption_flags, pos = load_vint(hdata, pos)
1918 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1919 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1920 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1921 h.encryption_check_value = load_bytes(hdata, 12, pos)
1922 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1923 raise BadRarFile("Unsupported header encryption cipher")
1924 self._hdrenc_main = h
1925 return h
1927 def _process_file_extra(self, h, xdata):
1928 xtype, pos = load_vint(xdata, 0)
1929 if xtype == RAR5_XFILE_TIME:
1930 self._parse_file_xtime(h, xdata, pos)
1931 elif xtype == RAR5_XFILE_ENCRYPTION:
1932 self._parse_file_encryption(h, xdata, pos)
1933 elif xtype == RAR5_XFILE_HASH:
1934 self._parse_file_hash(h, xdata, pos)
1935 elif xtype == RAR5_XFILE_VERSION:
1936 self._parse_file_version(h, xdata, pos)
1937 elif xtype == RAR5_XFILE_REDIR:
1938 self._parse_file_redir(h, xdata, pos)
1939 elif xtype == RAR5_XFILE_OWNER:
1940 self._parse_file_owner(h, xdata, pos)
1941 elif xtype == RAR5_XFILE_SERVICE:
1942 pass
1943 else:
1944 pass
1946 # extra block for file time record
1947 def _parse_file_xtime(self, h, xdata, pos):
1948 tflags, pos = load_vint(xdata, pos)
1950 ldr = load_windowstime
1951 if tflags & RAR5_XTIME_UNIXTIME:
1952 ldr = load_unixtime
1954 if tflags & RAR5_XTIME_HAS_MTIME:
1955 h.mtime, pos = ldr(xdata, pos)
1956 h.date_time = h.mtime.timetuple()[:6]
1957 if tflags & RAR5_XTIME_HAS_CTIME:
1958 h.ctime, pos = ldr(xdata, pos)
1959 if tflags & RAR5_XTIME_HAS_ATIME:
1960 h.atime, pos = ldr(xdata, pos)
1962 if tflags & RAR5_XTIME_UNIXTIME_NS:
1963 if tflags & RAR5_XTIME_HAS_MTIME:
1964 nsec, pos = load_le32(xdata, pos)
1965 h.mtime = to_nsdatetime(h.mtime, nsec)
1966 if tflags & RAR5_XTIME_HAS_CTIME:
1967 nsec, pos = load_le32(xdata, pos)
1968 h.ctime = to_nsdatetime(h.ctime, nsec)
1969 if tflags & RAR5_XTIME_HAS_ATIME:
1970 nsec, pos = load_le32(xdata, pos)
1971 h.atime = to_nsdatetime(h.atime, nsec)
1973 # just remember encryption info
1974 def _parse_file_encryption(self, h, xdata, pos):
1975 algo, pos = load_vint(xdata, pos)
1976 flags, pos = load_vint(xdata, pos)
1977 kdf_count, pos = load_byte(xdata, pos)
1978 salt, pos = load_bytes(xdata, 16, pos)
1979 iv, pos = load_bytes(xdata, 16, pos)
1980 checkval = None
1981 if flags & RAR5_XENC_CHECKVAL:
1982 checkval, pos = load_bytes(xdata, 12, pos)
1983 if flags & RAR5_XENC_TWEAKED:
1984 h._md_expect = None
1985 h._md_class = NoHashContext
1987 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
1988 h.flags |= RAR_FILE_PASSWORD
1990 def _parse_file_hash(self, h, xdata, pos):
1991 hash_type, pos = load_vint(xdata, pos)
1992 if hash_type == RAR5_XHASH_BLAKE2SP:
1993 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
1994 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
1995 h._md_class = Blake2SP
1996 h._md_expect = h.blake2sp_hash
1998 def _parse_file_version(self, h, xdata, pos):
1999 flags, pos = load_vint(xdata, pos)
2000 version, pos = load_vint(xdata, pos)
2001 h.file_version = (flags, version)
2003 def _parse_file_redir(self, h, xdata, pos):
2004 redir_type, pos = load_vint(xdata, pos)
2005 redir_flags, pos = load_vint(xdata, pos)
2006 redir_name, pos = load_vstr(xdata, pos)
2007 redir_name = redir_name.decode("utf8", "replace")
2008 h.file_redir = (redir_type, redir_flags, redir_name)
2010 def _parse_file_owner(self, h, xdata, pos):
2011 user_name = group_name = user_id = group_id = None
2013 flags, pos = load_vint(xdata, pos)
2014 if flags & RAR5_XOWNER_UNAME:
2015 user_name, pos = load_vstr(xdata, pos)
2016 if flags & RAR5_XOWNER_GNAME:
2017 group_name, pos = load_vstr(xdata, pos)
2018 if flags & RAR5_XOWNER_UID:
2019 user_id, pos = load_vint(xdata, pos)
2020 if flags & RAR5_XOWNER_GID:
2021 group_id, pos = load_vint(xdata, pos)
2023 h.file_owner = (user_name, group_name, user_id, group_id)
2025 def process_entry(self, fd, item):
2026 if item.block_type == RAR5_BLOCK_FILE:
2027 if item.file_version:
2028 pass # skip old versions
2029 elif (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2030 # use only first part
2031 self._info_map[item.filename.rstrip("/")] = item
2032 self._info_list.append(item)
2033 elif len(self._info_list) > 0:
2034 # final crc is in last block
2035 old = self._info_list[-1]
2036 old.CRC = item.CRC
2037 old._md_expect = item._md_expect
2038 old.blake2sp_hash = item.blake2sp_hash
2039 old.compress_size += item.compress_size
2040 elif item.block_type == RAR5_BLOCK_SERVICE:
2041 if item.filename == "CMT":
2042 self._load_comment(fd, item)
2044 def _load_comment(self, fd, item):
2045 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2046 return None
2047 if item.compress_type != RAR_M0:
2048 return None
2050 if item.flags & RAR_FILE_PASSWORD:
2051 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2052 if algo != RAR5_XENC_CIPHER_AES256:
2053 return None
2054 key = self._gen_key(kdf_count, salt)
2055 f = HeaderDecrypt(fd, key, iv)
2056 cmt = f.read(item.file_size)
2057 else:
2058 # archive comment
2059 with self._open_clear(item) as cmtstream:
2060 cmt = cmtstream.read()
2062 # rar bug? - appends zero to comment
2063 cmt = cmt.split(b"\0", 1)[0]
2064 self.comment = cmt.decode("utf8")
2065 return None
2067 def _open_hack(self, inf, pwd):
2068 # len, type, blk_flags, flags
2069 main_hdr = b"\x03\x01\x00\x00"
2070 endarc_hdr = b"\x03\x05\x00\x00"
2071 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2072 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2073 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2077 ## Utility classes
2080 class UnicodeFilename:
2081 """Handle RAR3 unicode filename decompression.
2083 def __init__(self, name, encdata):
2084 self.std_name = bytearray(name)
2085 self.encdata = bytearray(encdata)
2086 self.pos = self.encpos = 0
2087 self.buf = bytearray()
2088 self.failed = 0
2090 def enc_byte(self):
2091 """Copy encoded byte."""
2092 try:
2093 c = self.encdata[self.encpos]
2094 self.encpos += 1
2095 return c
2096 except IndexError:
2097 self.failed = 1
2098 return 0
2100 def std_byte(self):
2101 """Copy byte from 8-bit representation."""
2102 try:
2103 return self.std_name[self.pos]
2104 except IndexError:
2105 self.failed = 1
2106 return ord("?")
2108 def put(self, lo, hi):
2109 """Copy 16-bit value to result."""
2110 self.buf.append(lo)
2111 self.buf.append(hi)
2112 self.pos += 1
2114 def decode(self):
2115 """Decompress compressed UTF16 value."""
2116 hi = self.enc_byte()
2117 flagbits = 0
2118 while self.encpos < len(self.encdata):
2119 if flagbits == 0:
2120 flags = self.enc_byte()
2121 flagbits = 8
2122 flagbits -= 2
2123 t = (flags >> flagbits) & 3
2124 if t == 0:
2125 self.put(self.enc_byte(), 0)
2126 elif t == 1:
2127 self.put(self.enc_byte(), hi)
2128 elif t == 2:
2129 self.put(self.enc_byte(), self.enc_byte())
2130 else:
2131 n = self.enc_byte()
2132 if n & 0x80:
2133 c = self.enc_byte()
2134 for _ in range((n & 0x7f) + 2):
2135 lo = (self.std_byte() + c) & 0xFF
2136 self.put(lo, hi)
2137 else:
2138 for _ in range(n + 2):
2139 self.put(self.std_byte(), 0)
2140 return self.buf.decode("utf-16le", "replace")
2143 class RarExtFile(io.RawIOBase):
2144 """Base class for file-like object that :meth:`RarFile.open` returns.
2146 Provides public methods and common crc checking.
2148 Behaviour:
2149 - no short reads - .read() and .readinfo() read as much as requested.
2150 - no internal buffer, use io.BufferedReader for that.
2152 name = None #: Filename of the archive entry
2153 mode = "rb"
2154 _parser = None
2155 _inf = None
2156 _fd = None
2157 _remain = 0
2158 _returncode = 0
2159 _md_context = None
2161 def _open_extfile(self, parser, inf):
2162 self.name = inf.filename
2163 self._parser = parser
2164 self._inf = inf
2166 if self._fd:
2167 self._fd.close()
2168 md_class = self._inf._md_class or NoHashContext
2169 self._md_context = md_class()
2170 self._fd = None
2171 self._remain = self._inf.file_size
2173 def read(self, n=-1):
2174 """Read all or specified amount of data from archive entry."""
2176 # sanitize count
2177 if n is None or n < 0:
2178 n = self._remain
2179 elif n > self._remain:
2180 n = self._remain
2181 if n == 0:
2182 return b""
2184 buf = []
2185 orig = n
2186 while n > 0:
2187 # actual read
2188 data = self._read(n)
2189 if not data:
2190 break
2191 buf.append(data)
2192 self._md_context.update(data)
2193 self._remain -= len(data)
2194 n -= len(data)
2195 data = b"".join(buf)
2196 if n > 0:
2197 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2199 # done?
2200 if not data or self._remain == 0:
2201 # self.close()
2202 self._check()
2203 return data
2205 def _check(self):
2206 """Check final CRC."""
2207 final = self._md_context.digest()
2208 exp = self._inf._md_expect
2209 if exp is None:
2210 return
2211 if final is None:
2212 return
2213 if self._returncode:
2214 check_returncode(self._returncode, "", tool_setup().get_errmap())
2215 if self._remain != 0:
2216 raise BadRarFile("Failed the read enough data")
2217 if final != exp:
2218 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2219 self._inf.filename, exp, final))
2221 def _read(self, cnt):
2222 """Actual read that gets sanitized cnt."""
2223 raise NotImplementedError("_read")
2225 def close(self):
2226 """Close open resources."""
2228 super().close()
2230 if self._fd:
2231 self._fd.close()
2232 self._fd = None
2234 def __del__(self):
2235 """Hook delete to make sure tempfile is removed."""
2236 self.close()
2238 def readinto(self, buf):
2239 """Zero-copy read directly into buffer.
2241 Returns bytes read.
2243 raise NotImplementedError("readinto")
2245 def tell(self):
2246 """Return current reading position in uncompressed data."""
2247 return self._inf.file_size - self._remain
2249 def seek(self, offset, whence=0):
2250 """Seek in data.
2252 On uncompressed files, the seeking works by actual
2253 seeks so it's fast. On compresses files its slow
2254 - forward seeking happends by reading ahead,
2255 backwards by re-opening and decompressing from the start.
2258 # disable crc check when seeking
2259 self._md_context = NoHashContext()
2261 fsize = self._inf.file_size
2262 cur_ofs = self.tell()
2264 if whence == 0: # seek from beginning of file
2265 new_ofs = offset
2266 elif whence == 1: # seek from current position
2267 new_ofs = cur_ofs + offset
2268 elif whence == 2: # seek from end of file
2269 new_ofs = fsize + offset
2270 else:
2271 raise ValueError("Invalid value for whence")
2273 # sanity check
2274 if new_ofs < 0:
2275 new_ofs = 0
2276 elif new_ofs > fsize:
2277 new_ofs = fsize
2279 # do the actual seek
2280 if new_ofs >= cur_ofs:
2281 self._skip(new_ofs - cur_ofs)
2282 else:
2283 # reopen and seek
2284 self._open_extfile(self._parser, self._inf)
2285 self._skip(new_ofs)
2286 return self.tell()
2288 def _skip(self, cnt):
2289 """Read and discard data"""
2290 empty_read(self, cnt, BSIZE)
2292 def readable(self):
2293 """Returns True"""
2294 return True
2296 def writable(self):
2297 """Returns False.
2299 Writing is not supported.
2301 return False
2303 def seekable(self):
2304 """Returns True.
2306 Seeking is supported, although it's slow on compressed files.
2308 return True
2310 def readall(self):
2311 """Read all remaining data"""
2312 # avoid RawIOBase default impl
2313 return self.read()
2316 class PipeReader(RarExtFile):
2317 """Read data from pipe, handle tempfile cleanup."""
2319 def __init__(self, parser, inf, cmd, tempfile=None):
2320 super().__init__()
2321 self._cmd = cmd
2322 self._proc = None
2323 self._tempfile = tempfile
2324 self._open_extfile(parser, inf)
2326 def _close_proc(self):
2327 if not self._proc:
2328 return
2329 for f in (self._proc.stdout, self._proc.stderr, self._proc.stdin):
2330 if f:
2331 f.close()
2332 self._proc.wait()
2333 self._returncode = self._proc.returncode
2334 self._proc = None
2336 def _open_extfile(self, parser, inf):
2337 super()._open_extfile(parser, inf)
2339 # stop old process
2340 self._close_proc()
2342 # launch new process
2343 self._returncode = 0
2344 self._proc = custom_popen(self._cmd)
2345 self._fd = self._proc.stdout
2347 def _read(self, cnt):
2348 """Read from pipe."""
2350 # normal read is usually enough
2351 data = self._fd.read(cnt)
2352 if len(data) == cnt or not data:
2353 return data
2355 # short read, try looping
2356 buf = [data]
2357 cnt -= len(data)
2358 while cnt > 0:
2359 data = self._fd.read(cnt)
2360 if not data:
2361 break
2362 cnt -= len(data)
2363 buf.append(data)
2364 return b"".join(buf)
2366 def close(self):
2367 """Close open resources."""
2369 self._close_proc()
2370 super().close()
2372 if self._tempfile:
2373 try:
2374 os.unlink(self._tempfile)
2375 except OSError:
2376 pass
2377 self._tempfile = None
2379 def readinto(self, buf):
2380 """Zero-copy read directly into buffer."""
2381 cnt = len(buf)
2382 if cnt > self._remain:
2383 cnt = self._remain
2384 vbuf = memoryview(buf)
2385 res = got = 0
2386 while got < cnt:
2387 res = self._fd.readinto(vbuf[got: cnt])
2388 if not res:
2389 break
2390 self._md_context.update(vbuf[got: got + res])
2391 self._remain -= res
2392 got += res
2393 return got
2396 class DirectReader(RarExtFile):
2397 """Read uncompressed data directly from archive.
2399 _cur = None
2400 _cur_avail = None
2401 _volfile = None
2403 def __init__(self, parser, inf):
2404 super().__init__()
2405 self._open_extfile(parser, inf)
2407 def _open_extfile(self, parser, inf):
2408 super()._open_extfile(parser, inf)
2410 self._volfile = self._inf.volume_file
2411 self._fd = XFile(self._volfile, 0)
2412 self._fd.seek(self._inf.header_offset, 0)
2413 self._cur = self._parser._parse_header(self._fd)
2414 self._cur_avail = self._cur.add_size
2416 def _skip(self, cnt):
2417 """RAR Seek, skipping through rar files to get to correct position
2420 while cnt > 0:
2421 # next vol needed?
2422 if self._cur_avail == 0:
2423 if not self._open_next():
2424 break
2426 # fd is in read pos, do the read
2427 if cnt > self._cur_avail:
2428 cnt -= self._cur_avail
2429 self._remain -= self._cur_avail
2430 self._cur_avail = 0
2431 else:
2432 self._fd.seek(cnt, 1)
2433 self._cur_avail -= cnt
2434 self._remain -= cnt
2435 cnt = 0
2437 def _read(self, cnt):
2438 """Read from potentially multi-volume archive."""
2440 buf = []
2441 while cnt > 0:
2442 # next vol needed?
2443 if self._cur_avail == 0:
2444 if not self._open_next():
2445 break
2447 # fd is in read pos, do the read
2448 if cnt > self._cur_avail:
2449 data = self._fd.read(self._cur_avail)
2450 else:
2451 data = self._fd.read(cnt)
2452 if not data:
2453 break
2455 # got some data
2456 cnt -= len(data)
2457 self._cur_avail -= len(data)
2458 buf.append(data)
2460 if len(buf) == 1:
2461 return buf[0]
2462 return b"".join(buf)
2464 def _open_next(self):
2465 """Proceed to next volume."""
2467 # is the file split over archives?
2468 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2469 return False
2471 if self._fd:
2472 self._fd.close()
2473 self._fd = None
2475 # open next part
2476 self._volfile = self._parser._next_volname(self._volfile)
2477 fd = open(self._volfile, "rb", 0)
2478 self._fd = fd
2479 sig = fd.read(len(self._parser._expect_sig))
2480 if sig != self._parser._expect_sig:
2481 raise BadRarFile("Invalid signature")
2483 # loop until first file header
2484 while True:
2485 cur = self._parser._parse_header(fd)
2486 if not cur:
2487 raise BadRarFile("Unexpected EOF")
2488 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2489 if cur.add_size:
2490 fd.seek(cur.add_size, 1)
2491 continue
2492 if cur.orig_filename != self._inf.orig_filename:
2493 raise BadRarFile("Did not found file entry")
2494 self._cur = cur
2495 self._cur_avail = cur.add_size
2496 return True
2498 def readinto(self, buf):
2499 """Zero-copy read directly into buffer."""
2500 got = 0
2501 vbuf = memoryview(buf)
2502 while got < len(buf):
2503 # next vol needed?
2504 if self._cur_avail == 0:
2505 if not self._open_next():
2506 break
2508 # length for next read
2509 cnt = len(buf) - got
2510 if cnt > self._cur_avail:
2511 cnt = self._cur_avail
2513 # read into temp view
2514 res = self._fd.readinto(vbuf[got: got + cnt])
2515 if not res:
2516 break
2517 self._md_context.update(vbuf[got: got + res])
2518 self._cur_avail -= res
2519 self._remain -= res
2520 got += res
2521 return got
2524 class HeaderDecrypt:
2525 """File-like object that decrypts from another file"""
2526 def __init__(self, f, key, iv):
2527 self.f = f
2528 self.ciph = AES_CBC_Decrypt(key, iv)
2529 self.buf = b""
2531 def tell(self):
2532 """Current file pos - works only on block boundaries."""
2533 return self.f.tell()
2535 def read(self, cnt=None):
2536 """Read and decrypt."""
2537 if cnt > 8 * 1024:
2538 raise BadRarFile("Bad count to header decrypt - wrong password?")
2540 # consume old data
2541 if cnt <= len(self.buf):
2542 res = self.buf[:cnt]
2543 self.buf = self.buf[cnt:]
2544 return res
2545 res = self.buf
2546 self.buf = b""
2547 cnt -= len(res)
2549 # decrypt new data
2550 blklen = 16
2551 while cnt > 0:
2552 enc = self.f.read(blklen)
2553 if len(enc) < blklen:
2554 break
2555 dec = self.ciph.decrypt(enc)
2556 if cnt >= len(dec):
2557 res += dec
2558 cnt -= len(dec)
2559 else:
2560 res += dec[:cnt]
2561 self.buf = dec[cnt:]
2562 cnt = 0
2564 return res
2567 class XFile:
2568 """Input may be filename or file object.
2570 __slots__ = ("_fd", "_need_close")
2572 def __init__(self, xfile, bufsize=1024):
2573 if is_filelike(xfile):
2574 self._need_close = False
2575 self._fd = xfile
2576 self._fd.seek(0)
2577 else:
2578 self._need_close = True
2579 self._fd = open(xfile, "rb", bufsize)
2581 def read(self, n=None):
2582 """Read from file."""
2583 return self._fd.read(n)
2585 def tell(self):
2586 """Return file pos."""
2587 return self._fd.tell()
2589 def seek(self, ofs, whence=0):
2590 """Move file pos."""
2591 return self._fd.seek(ofs, whence)
2593 def readinto(self, buf):
2594 """Read into buffer."""
2595 return self._fd.readinto(buf)
2597 def close(self):
2598 """Close file object."""
2599 if self._need_close:
2600 self._fd.close()
2602 def __enter__(self):
2603 return self
2605 def __exit__(self, typ, val, tb):
2606 self.close()
2609 class NoHashContext:
2610 """No-op hash function."""
2611 def __init__(self, data=None):
2612 """Initialize"""
2613 def update(self, data):
2614 """Update data"""
2615 def digest(self):
2616 """Final hash"""
2617 def hexdigest(self):
2618 """Hexadecimal digest."""
2621 class CRC32Context:
2622 """Hash context that uses CRC32."""
2623 __slots__ = ["_crc"]
2625 def __init__(self, data=None):
2626 self._crc = 0
2627 if data:
2628 self.update(data)
2630 def update(self, data):
2631 """Process data."""
2632 self._crc = crc32(data, self._crc)
2634 def digest(self):
2635 """Final hash."""
2636 return self._crc
2638 def hexdigest(self):
2639 """Hexadecimal digest."""
2640 return "%08x" % self.digest()
2643 class Blake2SP:
2644 """Blake2sp hash context.
2646 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2647 digest_size = 32
2648 block_size = 64
2649 parallelism = 8
2651 def __init__(self, data=None):
2652 self._buf = b""
2653 self._cur = 0
2654 self._digest = None
2655 self._thread = []
2657 for i in range(self.parallelism):
2658 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2659 self._thread.append(ctx)
2661 if data:
2662 self.update(data)
2664 def _blake2s(self, ofs, depth, is_last):
2665 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2666 depth=2, inner_size=32, fanout=self.parallelism)
2668 def _add_block(self, blk):
2669 self._thread[self._cur].update(blk)
2670 self._cur = (self._cur + 1) % self.parallelism
2672 def update(self, data):
2673 """Hash data.
2675 view = memoryview(data)
2676 bs = self.block_size
2677 if self._buf:
2678 need = bs - len(self._buf)
2679 if len(view) < need:
2680 self._buf += view.tobytes()
2681 return
2682 self._add_block(self._buf + view[:need].tobytes())
2683 view = view[need:]
2684 while len(view) >= bs:
2685 self._add_block(view[:bs])
2686 view = view[bs:]
2687 self._buf = view.tobytes()
2689 def digest(self):
2690 """Return final digest value.
2692 if self._digest is None:
2693 if self._buf:
2694 self._add_block(self._buf)
2695 self._buf = b""
2696 ctx = self._blake2s(0, 1, True)
2697 for t in self._thread:
2698 ctx.update(t.digest())
2699 self._digest = ctx.digest()
2700 return self._digest
2702 def hexdigest(self):
2703 """Hexadecimal digest."""
2704 return hexlify(self.digest()).decode("ascii")
2707 class Rar3Sha1:
2708 """Emulate buggy SHA1 from RAR3.
2710 digest_size = 20
2711 block_size = 64
2713 _BLK_BE = struct.Struct(b">16L")
2714 _BLK_LE = struct.Struct(b"<16L")
2716 __slots__ = ("_nbytes", "_md", "_rarbug")
2718 def __init__(self, data=b"", rarbug=False):
2719 self._md = sha1()
2720 self._nbytes = 0
2721 self._rarbug = rarbug
2722 self.update(data)
2724 def update(self, data):
2725 """Process more data."""
2726 self._md.update(data)
2727 bufpos = self._nbytes & 63
2728 self._nbytes += len(data)
2730 if self._rarbug and len(data) > 64:
2731 dpos = self.block_size - bufpos
2732 while dpos + self.block_size <= len(data):
2733 self._corrupt(data, dpos)
2734 dpos += self.block_size
2736 def digest(self):
2737 """Return final state."""
2738 return self._md.digest()
2740 def hexdigest(self):
2741 """Return final state as hex string."""
2742 return self._md.hexdigest()
2744 def _corrupt(self, data, dpos):
2745 """Corruption from SHA1 core."""
2746 ws = list(self._BLK_BE.unpack_from(data, dpos))
2747 for t in range(16, 80):
2748 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2749 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2750 self._BLK_LE.pack_into(data, dpos, *ws)
2754 ## Utility functions
2757 S_LONG = Struct("<L")
2758 S_SHORT = Struct("<H")
2759 S_BYTE = Struct("<B")
2761 S_BLK_HDR = Struct("<HBHH")
2762 S_FILE_HDR = Struct("<LLBLLBBHL")
2763 S_COMMENT_HDR = Struct("<HBBH")
2766 def load_vint(buf, pos):
2767 """Load RAR5 variable-size int."""
2768 limit = min(pos + 11, len(buf))
2769 res = ofs = 0
2770 while pos < limit:
2771 b = buf[pos]
2772 res += ((b & 0x7F) << ofs)
2773 pos += 1
2774 ofs += 7
2775 if b < 0x80:
2776 return res, pos
2777 raise BadRarFile("cannot load vint")
2780 def load_byte(buf, pos):
2781 """Load single byte"""
2782 end = pos + 1
2783 if end > len(buf):
2784 raise BadRarFile("cannot load byte")
2785 return S_BYTE.unpack_from(buf, pos)[0], end
2788 def load_le32(buf, pos):
2789 """Load little-endian 32-bit integer"""
2790 end = pos + 4
2791 if end > len(buf):
2792 raise BadRarFile("cannot load le32")
2793 return S_LONG.unpack_from(buf, pos)[0], pos + 4
2796 def load_bytes(buf, num, pos):
2797 """Load sequence of bytes"""
2798 end = pos + num
2799 if end > len(buf):
2800 raise BadRarFile("cannot load bytes")
2801 return buf[pos: end], end
2804 def load_vstr(buf, pos):
2805 """Load bytes prefixed by vint length"""
2806 slen, pos = load_vint(buf, pos)
2807 return load_bytes(buf, slen, pos)
2810 def load_dostime(buf, pos):
2811 """Load LE32 dos timestamp"""
2812 stamp, pos = load_le32(buf, pos)
2813 tup = parse_dos_time(stamp)
2814 return to_datetime(tup), pos
2817 def load_unixtime(buf, pos):
2818 """Load LE32 unix timestamp"""
2819 secs, pos = load_le32(buf, pos)
2820 dt = datetime.fromtimestamp(secs, timezone.utc)
2821 return dt, pos
2824 def load_windowstime(buf, pos):
2825 """Load LE64 windows timestamp"""
2826 # unix epoch (1970) in seconds from windows epoch (1601)
2827 unix_epoch = 11644473600
2828 val1, pos = load_le32(buf, pos)
2829 val2, pos = load_le32(buf, pos)
2830 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2831 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2832 dt = to_nsdatetime(dt, n1secs * 100)
2833 return dt, pos
2836 def _next_newvol(volfile):
2837 """New-style next volume
2839 i = len(volfile) - 1
2840 while i >= 0:
2841 if volfile[i] >= "0" and volfile[i] <= "9":
2842 return _inc_volname(volfile, i)
2843 i -= 1
2844 raise BadRarName("Cannot construct volume name: " + volfile)
2847 def _next_oldvol(volfile):
2848 """Old-style next volume
2850 # rar -> r00
2851 if volfile[-4:].lower() == ".rar":
2852 return volfile[:-2] + "00"
2853 return _inc_volname(volfile, len(volfile) - 1)
2856 def _inc_volname(volfile, i):
2857 """increase digits with carry, otherwise just increment char
2859 fn = list(volfile)
2860 while i >= 0:
2861 if fn[i] != "9":
2862 fn[i] = chr(ord(fn[i]) + 1)
2863 break
2864 fn[i] = "0"
2865 i -= 1
2866 return "".join(fn)
2869 def _parse_ext_time(h, data, pos):
2870 """Parse all RAR3 extended time fields
2872 # flags and rest of data can be missing
2873 flags = 0
2874 if pos + 2 <= len(data):
2875 flags = S_SHORT.unpack_from(data, pos)[0]
2876 pos += 2
2878 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2879 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2880 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2881 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2882 if mtime:
2883 h.mtime = mtime
2884 h.date_time = mtime.timetuple()[:6]
2885 return pos
2888 def _parse_xtime(flag, data, pos, basetime=None):
2889 """Parse one RAR3 extended time field
2891 res = None
2892 if flag & 8:
2893 if not basetime:
2894 basetime, pos = load_dostime(data, pos)
2896 # load second fractions of 100ns units
2897 rem = 0
2898 cnt = flag & 3
2899 for _ in range(cnt):
2900 b, pos = load_byte(data, pos)
2901 rem = (b << 16) | (rem >> 8)
2903 # dostime has room for 30 seconds only, correct if needed
2904 if flag & 4 and basetime.second < 59:
2905 basetime = basetime.replace(second=basetime.second + 1)
2907 res = to_nsdatetime(basetime, rem * 100)
2908 return res, pos
2911 def is_filelike(obj):
2912 """Filename or file object?
2914 if isinstance(obj, (bytes, str, Path)):
2915 return False
2916 res = True
2917 for a in ("read", "tell", "seek"):
2918 res = res and hasattr(obj, a)
2919 if not res:
2920 raise ValueError("Invalid object passed as file")
2921 return True
2924 def rar3_s2k(pwd, salt):
2925 """String-to-key hash for RAR3.
2927 if not isinstance(pwd, str):
2928 pwd = pwd.decode("utf8")
2929 seed = bytearray(pwd.encode("utf-16le") + salt)
2930 h = Rar3Sha1(rarbug=True)
2931 iv = b""
2932 for i in range(16):
2933 for j in range(0x4000):
2934 cnt = S_LONG.pack(i * 0x4000 + j)
2935 h.update(seed)
2936 h.update(cnt[:3])
2937 if j == 0:
2938 iv += h.digest()[19:20]
2939 key_be = h.digest()[:16]
2940 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2941 return key_le, iv
2944 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
2945 """Decompress blob of compressed data.
2947 Used for data with non-standard header - eg. comments.
2949 # already uncompressed?
2950 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
2951 return data
2953 # take only necessary flags
2954 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
2955 flags |= RAR_LONG_BLOCK
2957 # file header
2958 fname = b"data"
2959 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
2960 mode = 0x20
2961 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
2962 date, vers, meth, len(fname), mode)
2963 fhdr += fname
2964 if salt:
2965 fhdr += salt
2967 # full header
2968 hlen = S_BLK_HDR.size + len(fhdr)
2969 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
2970 hcrc = crc32(hdr[2:]) & 0xFFFF
2971 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
2973 # archive main header
2974 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
2976 # decompress via temp rar
2977 setup = tool_setup()
2978 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
2979 tmpf = os.fdopen(tmpfd, "wb")
2980 try:
2981 tmpf.write(RAR_ID + mh + hdr + data)
2982 tmpf.close()
2984 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
2985 cmd = setup.open_cmdline(curpwd, tmpname)
2986 p = custom_popen(cmd)
2987 return p.communicate()[0]
2988 finally:
2989 tmpf.close()
2990 os.unlink(tmpname)
2993 def sanitize_filename(fname, pathsep, is_win32):
2994 """Simulate unrar sanitization.
2996 if is_win32:
2997 if len(fname) > 1 and fname[1] == ":":
2998 fname = fname[2:]
2999 rc = RC_BAD_CHARS_WIN32
3000 else:
3001 rc = RC_BAD_CHARS_UNIX
3002 if rc.search(fname):
3003 fname = rc.sub("_", fname)
3005 parts = []
3006 for seg in fname.split("/"):
3007 if seg in ("", ".", ".."):
3008 continue
3009 if is_win32 and seg[-1] in (" ", "."):
3010 seg = seg[:-1] + "_"
3011 parts.append(seg)
3012 return pathsep.join(parts)
3015 def empty_read(src, size, blklen):
3016 """Read and drop fixed amount of data.
3018 while size > 0:
3019 if size > blklen:
3020 res = src.read(blklen)
3021 else:
3022 res = src.read(size)
3023 if not res:
3024 raise BadRarFile("cannot load data")
3025 size -= len(res)
3028 def to_datetime(t):
3029 """Convert 6-part time tuple into datetime object.
3031 # extract values
3032 year, mon, day, h, m, s = t
3034 # assume the values are valid
3035 try:
3036 return datetime(year, mon, day, h, m, s)
3037 except ValueError:
3038 pass
3040 # sanitize invalid values
3041 mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3042 mon = max(1, min(mon, 12))
3043 day = max(1, min(day, mday[mon]))
3044 h = min(h, 23)
3045 m = min(m, 59)
3046 s = min(s, 59)
3047 if mon == 2 and day == 29:
3048 try:
3049 return datetime(year, mon, day, h, m, s)
3050 except ValueError:
3051 day = 28
3052 return datetime(year, mon, day, h, m, s)
3055 def parse_dos_time(stamp):
3056 """Parse standard 32-bit DOS timestamp.
3058 sec, stamp = stamp & 0x1F, stamp >> 5
3059 mn, stamp = stamp & 0x3F, stamp >> 6
3060 hr, stamp = stamp & 0x1F, stamp >> 5
3061 day, stamp = stamp & 0x1F, stamp >> 5
3062 mon, stamp = stamp & 0x0F, stamp >> 4
3063 yr = (stamp & 0x7F) + 1980
3064 return (yr, mon, day, hr, mn, sec * 2)
3067 # pylint: disable=arguments-differ,signature-differs
3068 class nsdatetime(datetime):
3069 """Datetime that carries nanoseconds.
3071 Arithmetic not supported, will lose nanoseconds.
3073 .. versionadded:: 4.0
3075 __slots__ = ("nanosecond",)
3076 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3078 def __new__(cls, year, month, day, hour=0, minute=0, second=0,
3079 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3080 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3081 if mod == 0:
3082 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3083 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3084 self.nanosecond = nanosecond
3085 return self
3087 def isoformat(self, sep="T", timespec="auto"):
3088 """Formats with nanosecond precision by default.
3090 if timespec == "auto":
3091 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3092 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3093 return super().isoformat(sep, timespec)
3095 def astimezone(self, tz=None):
3096 """Convert to new timezone.
3098 tmp = super().astimezone(tz)
3099 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3100 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3102 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3103 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3104 """Return new timestamp with specified fields replaced.
3106 return self.__class__(
3107 self.year if year is None else year,
3108 self.month if month is None else month,
3109 self.day if day is None else day,
3110 self.hour if hour is None else hour,
3111 self.minute if minute is None else minute,
3112 self.second if second is None else second,
3113 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3114 if nanosecond is None else nanosecond),
3115 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3116 fold=self.fold if fold is None else fold)
3118 def __hash__(self):
3119 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3121 def __eq__(self, other):
3122 return super().__eq__(other) and self.nanosecond == (
3123 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3125 def __gt__(self, other):
3126 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3127 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3129 def __lt__(self, other):
3130 return not (self > other or self == other)
3132 def __ge__(self, other):
3133 return not self < other
3135 def __le__(self, other):
3136 return not self > other
3138 def __ne__(self, other):
3139 return not self == other
3142 def to_nsdatetime(dt, nsec):
3143 """Apply nanoseconds to datetime.
3145 if not nsec:
3146 return dt
3147 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3148 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3151 def to_nsecs(dt):
3152 """Convert datatime instance to nanoseconds.
3154 secs = int(dt.timestamp())
3155 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3156 return secs * 1000000000 + nsecs
3159 def custom_popen(cmd):
3160 """Disconnect cmd from parent fds, read only from stdout.
3162 creationflags = 0x08000000 if WIN32 else 0 # CREATE_NO_WINDOW
3163 try:
3164 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3165 creationflags=creationflags)
3166 except OSError as ex:
3167 if ex.errno == errno.ENOENT:
3168 raise RarCannotExec("Unrar not installed?")
3169 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3170 raise RarCannotExec("Cannot execute unrar")
3171 raise
3172 return p
3175 def check_returncode(code, out, errmap):
3176 """Raise exception according to unrar exit code.
3178 if code == 0:
3179 return
3181 if code > 0 and code < len(errmap):
3182 exc = errmap[code]
3183 elif code == 255:
3184 exc = RarUserBreak
3185 elif code < 0:
3186 exc = RarSignalExit
3187 else:
3188 exc = RarUnknownError
3190 # format message
3191 if out:
3192 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3193 else:
3194 msg = "%s [%d]" % (exc.__doc__, code)
3196 raise exc(msg)
3199 def membuf_tempfile(memfile):
3200 """Write in-memory file object to real file.
3202 memfile.seek(0, 0)
3204 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3205 tmpf = os.fdopen(tmpfd, "wb")
3207 try:
3208 shutil.copyfileobj(memfile, tmpf, BSIZE)
3209 tmpf.close()
3210 except BaseException:
3211 tmpf.close()
3212 os.unlink(tmpname)
3213 raise
3214 return tmpname
3218 # Find working command-line tool
3221 class ToolSetup:
3222 def __init__(self, setup):
3223 self.setup = setup
3225 def check(self):
3226 cmdline = self.get_cmdline("check_cmd", None)
3227 try:
3228 p = custom_popen(cmdline)
3229 out, _ = p.communicate()
3230 return p.returncode == 0
3231 except RarCannotExec:
3232 return False
3234 def open_cmdline(self, pwd, rarfn, filefn=None):
3235 cmdline = self.get_cmdline("open_cmd", pwd)
3236 cmdline.append(rarfn)
3237 if filefn:
3238 self.add_file_arg(cmdline, filefn)
3239 return cmdline
3241 def get_errmap(self):
3242 return self.setup["errmap"]
3244 def get_cmdline(self, key, pwd, nodash=False):
3245 cmdline = list(self.setup[key])
3246 cmdline[0] = globals()[cmdline[0]]
3247 self.add_password_arg(cmdline, pwd)
3248 if not nodash:
3249 cmdline.append("--")
3250 return cmdline
3252 def add_file_arg(self, cmdline, filename):
3253 cmdline.append(filename)
3255 def add_password_arg(self, cmdline, pwd):
3256 """Append password switch to commandline.
3258 if pwd is not None:
3259 if not isinstance(pwd, str):
3260 pwd = pwd.decode("utf8")
3261 args = self.setup["password"]
3262 if isinstance(args, str):
3263 cmdline.append(args + pwd)
3264 else:
3265 cmdline.extend(args)
3266 cmdline.append(pwd)
3267 else:
3268 cmdline.extend(self.setup["no_password"])
3271 UNRAR_CONFIG = {
3272 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3273 "check_cmd": ("UNRAR_TOOL", "-inul"),
3274 "password": "-p",
3275 "no_password": ("-p-",),
3276 # map return code to exception class, codes from rar.txt
3277 "errmap": [None,
3278 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3279 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3280 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3283 # Problems with unar RAR backend:
3284 # - Does not support RAR2 locked files [fails to read]
3285 # - Does not support RAR5 Blake2sp hash [reading works]
3286 UNAR_CONFIG = {
3287 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3288 "check_cmd": ("UNAR_TOOL", "-version"),
3289 "password": ("-p",),
3290 "no_password": ("-p", ""),
3291 "errmap": [None],
3294 # Problems with libarchive RAR backend:
3295 # - Does not support solid archives.
3296 # - Does not support password-protected archives.
3297 # - Does not support RARVM-based compression filters.
3298 BSDTAR_CONFIG = {
3299 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3300 "check_cmd": ("BSDTAR_TOOL", "--version"),
3301 "password": None,
3302 "no_password": (),
3303 "errmap": [None],
3306 CURRENT_SETUP = None
3309 def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
3310 """Pick a tool, return cached ToolSetup.
3312 global CURRENT_SETUP
3313 if force:
3314 CURRENT_SETUP = None
3315 if CURRENT_SETUP is not None:
3316 return CURRENT_SETUP
3317 lst = []
3318 if unrar:
3319 lst.append(UNRAR_CONFIG)
3320 if unar:
3321 lst.append(UNAR_CONFIG)
3322 if bsdtar:
3323 lst.append(BSDTAR_CONFIG)
3325 for conf in lst:
3326 setup = ToolSetup(conf)
3327 if setup.check():
3328 CURRENT_SETUP = setup
3329 break
3330 if CURRENT_SETUP is None:
3331 raise RarCannotExec("Cannot find working tool")
3332 return CURRENT_SETUP
3335 def main(args):
3336 """Minimal command-line interface for rarfile module.
3338 import argparse
3339 p = argparse.ArgumentParser(description=main.__doc__)
3340 g = p.add_mutually_exclusive_group(required=True)
3341 g.add_argument("-l", "--list", metavar="<rarfile>",
3342 help="Show archive listing")
3343 g.add_argument("-e", "--extract", nargs=2,
3344 metavar=("<rarfile>", "<output_dir>"),
3345 help="Extract archive into target dir")
3346 g.add_argument("-t", "--test", metavar="<rarfile>",
3347 help="Test if a archive is valid")
3348 cmd = p.parse_args(args)
3350 if cmd.list:
3351 with RarFile(cmd.list) as rf:
3352 rf.printdir()
3353 elif cmd.test:
3354 with RarFile(cmd.test) as rf:
3355 rf.testrar()
3356 elif cmd.extract:
3357 with RarFile(cmd.extract[0]) as rf:
3358 rf.extractall(cmd.extract[1])
3361 if __name__ == "__main__":
3362 main(sys.argv[1:])