Change is_dir open exception
[rarfile.git] / rarfile.py
blob2bb638ba7694c84652e61ed5888b099374b651d7
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
51 """
53 import errno
54 import io
55 import os
56 import re
57 import shutil
58 import struct
59 import sys
60 import warnings
61 from binascii import crc32, hexlify
62 from datetime import datetime, timedelta, timezone
63 from hashlib import blake2s, pbkdf2_hmac, sha1
64 from pathlib import Path
65 from struct import Struct, pack, unpack
66 from subprocess import PIPE, STDOUT, Popen
67 from tempfile import mkstemp
69 # only needed for encrypted headers
70 try:
71 try:
72 from cryptography.hazmat.backends import default_backend
73 from cryptography.hazmat.primitives.ciphers import (
74 Cipher, algorithms, modes,
77 class AES_CBC_Decrypt:
78 """Decrypt API"""
79 def __init__(self, key, iv):
80 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
81 self.decrypt = ciph.decryptor().update
83 except ImportError:
84 from Crypto.Cipher import AES
86 class AES_CBC_Decrypt:
87 """Decrypt API"""
88 def __init__(self, key, iv):
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
91 _have_crypto = 1
92 except ImportError:
93 _have_crypto = 0
96 def tohex(data):
97 """Return hex string."""
98 return hexlify(data).decode("ascii")
101 __version__ = "4.0a1"
103 # export only interesting items
104 __all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
107 ## Module configuration. Can be tuned after importing.
110 #: executable for unrar tool
111 UNRAR_TOOL = "unrar"
113 #: executable for unar tool
114 UNAR_TOOL = "unar"
116 #: executable for bsdtar tool
117 BSDTAR_TOOL = "bsdtar"
119 #: default fallback charset
120 DEFAULT_CHARSET = "windows-1252"
122 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
123 TRY_ENCODINGS = ("utf8", "utf-16le")
125 #: whether to speed up decompression by using tmp archive
126 USE_EXTRACT_HACK = 1
128 #: limit the filesize for tmp archive usage
129 HACK_SIZE_LIMIT = 20 * 1024 * 1024
131 #: set specific directory for mkstemp() used by hack dir usage
132 HACK_TMP_DIR = None
134 #: Separator for path name components. Always "/".
135 PATH_SEP = "/"
138 ## rar constants
141 # block types
142 RAR_BLOCK_MARK = 0x72 # r
143 RAR_BLOCK_MAIN = 0x73 # s
144 RAR_BLOCK_FILE = 0x74 # t
145 RAR_BLOCK_OLD_COMMENT = 0x75 # u
146 RAR_BLOCK_OLD_EXTRA = 0x76 # v
147 RAR_BLOCK_OLD_SUB = 0x77 # w
148 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
149 RAR_BLOCK_OLD_AUTH = 0x79 # y
150 RAR_BLOCK_SUB = 0x7a # z
151 RAR_BLOCK_ENDARC = 0x7b # {
153 # flags for RAR_BLOCK_MAIN
154 RAR_MAIN_VOLUME = 0x0001
155 RAR_MAIN_COMMENT = 0x0002
156 RAR_MAIN_LOCK = 0x0004
157 RAR_MAIN_SOLID = 0x0008
158 RAR_MAIN_NEWNUMBERING = 0x0010
159 RAR_MAIN_AUTH = 0x0020
160 RAR_MAIN_RECOVERY = 0x0040
161 RAR_MAIN_PASSWORD = 0x0080
162 RAR_MAIN_FIRSTVOLUME = 0x0100
163 RAR_MAIN_ENCRYPTVER = 0x0200
165 # flags for RAR_BLOCK_FILE
166 RAR_FILE_SPLIT_BEFORE = 0x0001
167 RAR_FILE_SPLIT_AFTER = 0x0002
168 RAR_FILE_PASSWORD = 0x0004
169 RAR_FILE_COMMENT = 0x0008
170 RAR_FILE_SOLID = 0x0010
171 RAR_FILE_DICTMASK = 0x00e0
172 RAR_FILE_DICT64 = 0x0000
173 RAR_FILE_DICT128 = 0x0020
174 RAR_FILE_DICT256 = 0x0040
175 RAR_FILE_DICT512 = 0x0060
176 RAR_FILE_DICT1024 = 0x0080
177 RAR_FILE_DICT2048 = 0x00a0
178 RAR_FILE_DICT4096 = 0x00c0
179 RAR_FILE_DIRECTORY = 0x00e0
180 RAR_FILE_LARGE = 0x0100
181 RAR_FILE_UNICODE = 0x0200
182 RAR_FILE_SALT = 0x0400
183 RAR_FILE_VERSION = 0x0800
184 RAR_FILE_EXTTIME = 0x1000
185 RAR_FILE_EXTFLAGS = 0x2000
187 # flags for RAR_BLOCK_ENDARC
188 RAR_ENDARC_NEXT_VOLUME = 0x0001
189 RAR_ENDARC_DATACRC = 0x0002
190 RAR_ENDARC_REVSPACE = 0x0004
191 RAR_ENDARC_VOLNR = 0x0008
193 # flags common to all blocks
194 RAR_SKIP_IF_UNKNOWN = 0x4000
195 RAR_LONG_BLOCK = 0x8000
197 # Host OS types
198 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
199 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
200 RAR_OS_WIN32 = 2 #: Windows
201 RAR_OS_UNIX = 3 #: UNIX
202 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
203 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
205 # Compression methods - "0".."5"
206 RAR_M0 = 0x30 #: No compression.
207 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
208 RAR_M2 = 0x32 #: Compression level `-m2`.
209 RAR_M3 = 0x33 #: Compression level `-m3`.
210 RAR_M4 = 0x34 #: Compression level `-m4`.
211 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
214 # RAR5 constants
217 RAR5_BLOCK_MAIN = 1
218 RAR5_BLOCK_FILE = 2
219 RAR5_BLOCK_SERVICE = 3
220 RAR5_BLOCK_ENCRYPTION = 4
221 RAR5_BLOCK_ENDARC = 5
223 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
224 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
225 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
226 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
227 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
228 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
229 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
231 RAR5_MAIN_FLAG_ISVOL = 0x01
232 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
233 RAR5_MAIN_FLAG_SOLID = 0x04
234 RAR5_MAIN_FLAG_RECOVERY = 0x08
235 RAR5_MAIN_FLAG_LOCKED = 0x10
237 RAR5_FILE_FLAG_ISDIR = 0x01
238 RAR5_FILE_FLAG_HAS_MTIME = 0x02
239 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
240 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
242 RAR5_COMPR_SOLID = 0x40
244 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
246 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
248 RAR5_XFILE_ENCRYPTION = 1
249 RAR5_XFILE_HASH = 2
250 RAR5_XFILE_TIME = 3
251 RAR5_XFILE_VERSION = 4
252 RAR5_XFILE_REDIR = 5
253 RAR5_XFILE_OWNER = 6
254 RAR5_XFILE_SERVICE = 7
256 RAR5_XTIME_UNIXTIME = 0x01
257 RAR5_XTIME_HAS_MTIME = 0x02
258 RAR5_XTIME_HAS_CTIME = 0x04
259 RAR5_XTIME_HAS_ATIME = 0x08
260 RAR5_XTIME_UNIXTIME_NS = 0x10
262 RAR5_XENC_CIPHER_AES256 = 0
264 RAR5_XENC_CHECKVAL = 0x01
265 RAR5_XENC_TWEAKED = 0x02
267 RAR5_XHASH_BLAKE2SP = 0
269 RAR5_XREDIR_UNIX_SYMLINK = 1
270 RAR5_XREDIR_WINDOWS_SYMLINK = 2
271 RAR5_XREDIR_WINDOWS_JUNCTION = 3
272 RAR5_XREDIR_HARD_LINK = 4
273 RAR5_XREDIR_FILE_COPY = 5
275 RAR5_XREDIR_ISDIR = 0x01
277 RAR5_XOWNER_UNAME = 0x01
278 RAR5_XOWNER_GNAME = 0x02
279 RAR5_XOWNER_UID = 0x04
280 RAR5_XOWNER_GID = 0x08
282 RAR5_OS_WINDOWS = 0
283 RAR5_OS_UNIX = 1
285 DOS_MODE_ARCHIVE = 0x20
286 DOS_MODE_DIR = 0x10
287 DOS_MODE_SYSTEM = 0x04
288 DOS_MODE_HIDDEN = 0x02
289 DOS_MODE_READONLY = 0x01
292 ## internal constants
295 RAR_ID = b"Rar!\x1a\x07\x00"
296 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
297 ZERO = b"\0"
298 EMPTY = b""
299 UTC = timezone(timedelta(0), "UTC")
300 BSIZE = 512 * 1024 if sys.platform == "win32" else 64 * 1024
302 SFX_MAX_SIZE = 2 * 1024 * 1024
303 RAR_V3 = 3
304 RAR_V5 = 5
306 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
307 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
308 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
311 def _get_rar_version(xfile):
312 """Check quickly whether file is rar archive.
314 with XFile(xfile) as fd:
315 buf = fd.read(len(RAR5_ID))
316 if buf.startswith(RAR_ID):
317 return RAR_V3
318 elif buf.startswith(RAR5_ID):
319 return RAR_V5
320 return 0
323 def _find_sfx_header(xfile):
324 sig = RAR_ID[:-1]
325 buf = io.BytesIO()
326 steps = (64, SFX_MAX_SIZE)
328 with XFile(xfile) as fd:
329 for step in steps:
330 data = fd.read(step)
331 if not data:
332 break
333 buf.write(data)
334 curdata = buf.getvalue()
335 findpos = 0
336 while True:
337 pos = curdata.find(sig, findpos)
338 if pos < 0:
339 break
340 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
341 return RAR_V3, pos
342 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
343 return RAR_V5, pos
344 findpos = pos + len(sig)
345 return 0, 0
349 ## Public interface
352 def is_rarfile(xfile):
353 """Check quickly whether file is rar archive.
355 return _get_rar_version(xfile) > 0
358 def is_rarfile_sfx(xfile):
359 """Check whether file is rar archive with support for SFX.
361 It will read 2M from file.
363 return _find_sfx_header(xfile)[0] > 0
366 class Error(Exception):
367 """Base class for rarfile errors."""
370 class BadRarFile(Error):
371 """Incorrect data in archive."""
374 class NotRarFile(Error):
375 """The file is not RAR archive."""
378 class BadRarName(Error):
379 """Cannot guess multipart name components."""
382 class NoRarEntry(Error):
383 """File not found in RAR"""
386 class PasswordRequired(Error):
387 """File requires password"""
390 class NeedFirstVolume(Error):
391 """Need to start from first volume.
393 Attributes:
395 current_volume
396 Volume number of current file or None if not known
398 def __init__(self, msg, volume):
399 super().__init__(msg)
400 self.current_volume = volume
403 class NoCrypto(Error):
404 """Cannot parse encrypted headers - no crypto available."""
407 class RarExecError(Error):
408 """Problem reported by unrar/rar."""
411 class RarWarning(RarExecError):
412 """Non-fatal error"""
415 class RarFatalError(RarExecError):
416 """Fatal error"""
419 class RarCRCError(RarExecError):
420 """CRC error during unpacking"""
423 class RarLockedArchiveError(RarExecError):
424 """Must not modify locked archive"""
427 class RarWriteError(RarExecError):
428 """Write error"""
431 class RarOpenError(RarExecError):
432 """Open error"""
435 class RarUserError(RarExecError):
436 """User error"""
439 class RarMemoryError(RarExecError):
440 """Memory error"""
443 class RarCreateError(RarExecError):
444 """Create error"""
447 class RarNoFilesError(RarExecError):
448 """No files that match pattern were found"""
451 class RarUserBreak(RarExecError):
452 """User stop"""
455 class RarWrongPassword(RarExecError):
456 """Incorrect password"""
459 class RarUnknownError(RarExecError):
460 """Unknown exit code"""
463 class RarSignalExit(RarExecError):
464 """Unrar exited with signal"""
467 class RarCannotExec(RarExecError):
468 """Executable not found."""
471 class UnsupportedWarning(UserWarning):
472 """There is issue with RAR archive.
474 .. versionadded:: 4.0
478 class RarInfo:
479 r"""An entry in rar archive.
481 RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone.
482 RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone.
484 Attributes:
486 filename
487 File name with relative path.
488 Path separator is "/". Always unicode string.
490 date_time
491 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
492 RAR5 allows archives where it is missing, it's None then.
494 comment
495 Optional file comment field. Unicode string. (RAR3-only)
497 file_size
498 Uncompressed size.
500 compress_size
501 Compressed size.
503 compress_type
504 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
506 extract_version
507 Minimal Rar version needed for decompressing. As (major*10 + minor),
508 so 2.9 is 29.
510 RAR3: 10, 20, 29
512 RAR5 does not have such field in archive, it's simply set to 50.
514 host_os
515 Host OS type, one of RAR_OS_* constants.
517 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
518 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
520 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
522 mode
523 File attributes. May be either dos-style or unix-style, depending on host_os.
525 mtime
526 File modification time. Same value as :attr:`date_time`
527 but as :class:`datetime.datetime` object with extended precision.
529 ctime
530 Optional time field: creation time. As :class:`datetime.datetime` object.
532 atime
533 Optional time field: last access time. As :class:`datetime.datetime` object.
535 arctime
536 Optional time field: archival time. As :class:`datetime.datetime` object.
537 (RAR3-only)
540 CRC-32 of uncompressed file, unsigned int.
542 RAR5: may be None.
544 blake2sp_hash
545 Blake2SP hash over decompressed data. (RAR5-only)
547 volume
548 Volume nr, starting from 0.
550 volume_file
551 Volume file name, where file starts.
553 file_redir
554 If not None, file is link of some sort. Contains tuple of (type, flags, target).
555 (RAR5-only)
557 Type is one of constants:
559 :data:`RAR5_XREDIR_UNIX_SYMLINK`
560 Unix symlink.
561 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
562 Windows symlink.
563 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
564 Windows junction.
565 :data:`RAR5_XREDIR_HARD_LINK`
566 Hard link to target.
567 :data:`RAR5_XREDIR_FILE_COPY`
568 Current file is copy of another archive entry.
570 Flags may contain bits:
572 :data:`RAR5_XREDIR_ISDIR`
573 Symlink points to directory.
576 # zipfile-compatible fields
577 filename = None
578 file_size = None
579 compress_size = None
580 date_time = None
581 CRC = None
582 volume = None
583 orig_filename = None
585 # optional extended time fields, datetime() objects.
586 mtime = None
587 ctime = None
588 atime = None
590 extract_version = None
591 mode = None
592 host_os = None
593 compress_type = None
595 # rar3-only fields
596 comment = None
597 arctime = None
599 # rar5-only fields
600 blake2sp_hash = None
601 file_redir = None
603 # internal fields
604 flags = 0
605 type = None
607 # zipfile compat
608 def is_dir(self):
609 """Returns True if entry is a directory.
611 .. versionadded:: 4.0
613 if self.type == RAR_BLOCK_FILE:
614 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
615 return False
617 def is_symlink(self):
618 """Returns True if entry is a symlink.
620 .. versionadded:: 4.0
622 return False
624 def is_file(self):
625 """Returns True if entry is a normal file.
627 .. versionadded:: 4.0
629 return False
631 def needs_password(self):
632 """Returns True if data is stored password-protected.
634 if self.type == RAR_BLOCK_FILE:
635 return (self.flags & RAR_FILE_PASSWORD) > 0
636 return False
638 def isdir(self):
639 """Returns True if entry is a directory.
641 .. deprecated:: 4.0
643 return self.is_dir()
646 class RarFile:
647 """Parse RAR structure, provide access to files in archive.
650 #: File name, if available. Unicode string or None.
651 filename = None
653 #: Archive comment. Unicode string or None.
654 comment = None
656 def __init__(self, file, mode="r", charset=None, info_callback=None,
657 crc_check=True, errors="stop"):
658 """Open and parse a RAR archive.
660 Parameters:
662 file
663 archive file name or file-like object.
664 mode
665 only "r" is supported.
666 charset
667 fallback charset to use, if filenames are not already Unicode-enabled.
668 info_callback
669 debug callback, gets to see all archive entries.
670 crc_check
671 set to False to disable CRC checks
672 errors
673 Either "stop" to quietly stop parsing on errors,
674 or "strict" to raise errors. Default is "stop".
676 if is_filelike(file):
677 self.filename = getattr(file, "name", None)
678 else:
679 if isinstance(file, Path):
680 file = str(file)
681 self.filename = file
682 self._rarfile = file
684 self._charset = charset or DEFAULT_CHARSET
685 self._info_callback = info_callback
686 self._crc_check = crc_check
687 self._password = None
688 self._file_parser = None
690 if errors == "stop":
691 self._strict = False
692 elif errors == "strict":
693 self._strict = True
694 else:
695 raise ValueError("Invalid value for errors= parameter.")
697 if mode != "r":
698 raise NotImplementedError("RarFile supports only mode=r")
700 self._parse()
702 def __enter__(self):
703 """Open context."""
704 return self
706 def __exit__(self, typ, value, traceback):
707 """Exit context."""
708 self.close()
710 def __iter__(self):
711 """Iterate over members."""
712 return iter(self.infolist())
714 def setpassword(self, pwd):
715 """Sets the password to use when extracting.
717 self._password = pwd
718 if self._file_parser:
719 if self._file_parser.has_header_encryption():
720 self._file_parser = None
721 if not self._file_parser:
722 self._parse()
723 else:
724 self._file_parser.setpassword(self._password)
726 def needs_password(self):
727 """Returns True if any archive entries require password for extraction.
729 return self._file_parser.needs_password()
731 def namelist(self):
732 """Return list of filenames in archive.
734 return [f.filename for f in self.infolist()]
736 def infolist(self):
737 """Return RarInfo objects for all files/directories in archive.
739 return self._file_parser.infolist()
741 def volumelist(self):
742 """Returns filenames of archive volumes.
744 In case of single-volume archive, the list contains
745 just the name of main archive file.
747 return self._file_parser.volumelist()
749 def getinfo(self, name):
750 """Return RarInfo for file.
752 return self._file_parser.getinfo(name)
754 def open(self, name, mode="r", pwd=None):
755 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
757 The object implements :class:`io.RawIOBase` interface, so it can
758 be further wrapped with :class:`io.BufferedReader`
759 and :class:`io.TextIOWrapper`.
761 On older Python where io module is not available, it implements
762 only .read(), .seek(), .tell() and .close() methods.
764 The object is seekable, although the seeking is fast only on
765 uncompressed files, on compressed files the seeking is implemented
766 by reading ahead and/or restarting the decompression.
768 Parameters:
770 name
771 file name or RarInfo instance.
772 mode
773 must be "r"
775 password to use for extracting.
778 if mode != "r":
779 raise NotImplementedError("RarFile.open() supports only mode=r")
781 # entry lookup
782 inf = self.getinfo(name)
783 if inf.is_dir():
784 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
786 # check password
787 if inf.needs_password():
788 pwd = pwd or self._password
789 if pwd is None:
790 raise PasswordRequired("File %s requires password" % inf.filename)
791 else:
792 pwd = None
794 return self._file_parser.open(inf, pwd)
796 def read(self, name, pwd=None):
797 """Return uncompressed data for archive entry.
799 For longer files using :meth:`RarFile.open` may be better idea.
801 Parameters:
803 name
804 filename or RarInfo instance
806 password to use for extracting.
809 with self.open(name, "r", pwd) as f:
810 return f.read()
812 def close(self):
813 """Release open resources."""
814 pass
816 def printdir(self, file=None):
817 """Print archive file list to stdout or given file.
819 if file is None:
820 file = sys.stdout
821 for f in self.infolist():
822 print(f.filename, file=file)
824 def extract(self, member, path=None, pwd=None):
825 """Extract single file into current directory.
827 Parameters:
829 member
830 filename or :class:`RarInfo` instance
831 path
832 optional destination path
834 optional password to use
836 inf = self.getinfo(member)
837 return self._extract_one(inf, path, pwd, True)
839 def extractall(self, path=None, members=None, pwd=None):
840 """Extract all files into current directory.
842 Parameters:
844 path
845 optional destination path
846 members
847 optional filename or :class:`RarInfo` instance list to extract
849 optional password to use
851 if members is None:
852 members = self.namelist()
854 done = set()
855 dirs = []
856 for m in members:
857 inf = self.getinfo(m)
858 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
859 if inf.is_dir():
860 if dst not in done:
861 dirs.append((dst, inf))
862 done.add(dst)
863 if dirs:
864 dirs.sort(reverse=True)
865 for dst, inf in dirs:
866 self._set_attrs(inf, dst)
868 def testrar(self, pwd=None):
869 """Read all files and test CRC.
871 for member in self.infolist():
872 if member.is_dir():
873 continue
874 with self.open(member, 'r', pwd) as f:
875 empty_read(f, member.file_size, BSIZE)
877 def strerror(self):
878 """Return error string if parsing failed or None if no problems.
880 if not self._file_parser:
881 return "Not a RAR file"
882 return self._file_parser.strerror()
885 ## private methods
888 def _parse(self):
889 """Run parser for file type
891 ver, sfx_ofs = _find_sfx_header(self._rarfile)
892 if ver == RAR_V3:
893 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
894 self._charset, self._strict, self._info_callback,
895 sfx_ofs)
896 self._file_parser = p3 # noqa
897 elif ver == RAR_V5:
898 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
899 self._charset, self._strict, self._info_callback,
900 sfx_ofs)
901 self._file_parser = p5 # noqa
902 else:
903 raise BadRarFile("Not a RAR file")
905 self._file_parser.parse()
906 self.comment = self._file_parser.comment
908 def _extract_one(self, info, path, pwd, set_attrs):
909 fname = sanitize_filename(
910 info.filename, os.path.sep, sys.platform == "win32"
913 if path is None:
914 path = os.getcwd()
915 else:
916 path = os.fspath(path)
917 dstfn = os.path.join(path, fname)
919 dirname = os.path.dirname(dstfn)
920 if dirname and dirname != ".":
921 self._makedirs(dirname)
923 if info.is_file():
924 self._make_file(info, dstfn, pwd, set_attrs)
925 elif info.is_dir():
926 self._make_dir(info, dstfn, set_attrs)
927 elif info.is_symlink():
928 self._make_symlink(info, dstfn)
930 return dstfn
932 def _make_dir(self, info, dstfn, set_attrs):
933 self._makedirs(dstfn)
934 if set_attrs:
935 self._set_attrs(info, dstfn)
937 def _make_file(self, info, dstfn, pwd, set_attrs):
938 with self.open(info, "r", pwd) as src:
939 with open(dstfn, "wb") as dst:
940 shutil.copyfileobj(src, dst)
942 if set_attrs:
943 self._set_attrs(info, dstfn)
945 def _make_symlink(self, info, dstfn):
946 target_is_directory = False
947 if info.host_os == RAR_OS_UNIX:
948 link_name = self.read(info)
949 elif info.file_redir:
950 redir_type, redir_flags, link_name = info.file_redir
951 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
952 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
953 return
954 target_is_directory = redir_type & RAR5_XREDIR_ISDIR > 0
956 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
958 def _makedirs(self, name):
959 if not name:
960 return
961 head, tail = os.path.split(name)
962 if not tail:
963 head, tail = os.path.split(head)
964 if head and tail and not os.path.isdir(head):
965 self._makedirs(head)
966 try:
967 os.mkdir(name)
968 except OSError:
969 if not os.path.isdir(name):
970 raise
972 def _set_attrs(self, info, dstfn):
973 if info.host_os == RAR_OS_UNIX:
974 os.chmod(dstfn, info.mode & 0o777)
975 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
976 if info.mode & DOS_MODE_READONLY:
977 st = os.stat(dstfn)
978 new_mode = st.st_mode & ~0o222
979 os.chmod(dstfn, new_mode & 0o777)
981 if info.mtime and hasattr(os, "utime"):
982 mtime_ns = atime_ns = to_nsecs(info.mtime)
983 if info.atime:
984 atime_ns = to_nsecs(info.atime)
985 os.utime(dstfn, ns=(atime_ns, mtime_ns))
989 # File format parsing
992 class CommonParser:
993 """Shared parser parts."""
994 _main = None
995 _hdrenc_main = None
996 _needs_password = False
997 _fd = None
998 _expect_sig = None
999 _parse_error = None
1000 _password = None
1001 comment = None
1003 def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset):
1004 self._rarfile = rarfile
1005 self._password = password
1006 self._crc_check = crc_check
1007 self._charset = charset
1008 self._strict = strict
1009 self._info_callback = info_cb
1010 self._info_list = []
1011 self._info_map = {}
1012 self._vol_list = []
1013 self._sfx_offset = sfx_offset
1015 def has_header_encryption(self):
1016 """Returns True if headers are encrypted
1018 if self._hdrenc_main:
1019 return True
1020 if self._main:
1021 if self._main.flags & RAR_MAIN_PASSWORD:
1022 return True
1023 return False
1025 def setpassword(self, pwd):
1026 """Set cached password."""
1027 self._password = pwd
1029 def volumelist(self):
1030 """Volume files"""
1031 return self._vol_list
1033 def needs_password(self):
1034 """Is password required"""
1035 return self._needs_password
1037 def strerror(self):
1038 """Last error"""
1039 return self._parse_error
1041 def infolist(self):
1042 """List of RarInfo records.
1044 return self._info_list
1046 def getinfo(self, member):
1047 """Return RarInfo for filename
1049 if isinstance(member, RarInfo):
1050 fname = member.filename
1051 elif isinstance(member, Path):
1052 fname = str(member)
1053 else:
1054 fname = member
1056 if fname.endswith("/"):
1057 fname = fname.rstrip("/")
1059 try:
1060 return self._info_map[fname]
1061 except KeyError:
1062 raise NoRarEntry("No such file: %s" % fname)
1064 def parse(self):
1065 """Process file."""
1066 self._fd = None
1067 try:
1068 self._parse_real()
1069 finally:
1070 if self._fd:
1071 self._fd.close()
1072 self._fd = None
1074 def _parse_real(self):
1075 """Actually read file.
1077 fd = XFile(self._rarfile)
1078 self._fd = fd
1079 fd.seek(self._sfx_offset, 0)
1080 sig = fd.read(len(self._expect_sig))
1081 if sig != self._expect_sig:
1082 if isinstance(self._rarfile, str):
1083 raise NotRarFile("Not a Rar archive: {}".format(self._rarfile))
1084 raise NotRarFile("Not a Rar archive")
1086 volume = 0 # first vol (.rar) is 0
1087 more_vols = False
1088 endarc = False
1089 volfile = self._rarfile
1090 self._vol_list = [self._rarfile]
1091 raise_need_first_vol = False
1092 while True:
1093 if endarc:
1094 h = None # don"t read past ENDARC
1095 else:
1096 h = self._parse_header(fd)
1097 if not h:
1098 if raise_need_first_vol:
1099 # did not find ENDARC with VOLNR
1100 raise NeedFirstVolume("Need to start from first volume", None)
1101 if more_vols:
1102 volume += 1
1103 fd.close()
1104 try:
1105 volfile = self._next_volname(volfile)
1106 fd = XFile(volfile)
1107 except IOError:
1108 self._set_error("Cannot open next volume: %s", volfile)
1109 break
1110 self._fd = fd
1111 sig = fd.read(len(self._expect_sig))
1112 if sig != self._expect_sig:
1113 self._set_error("Invalid volume sig: %s", volfile)
1114 break
1115 more_vols = False
1116 endarc = False
1117 self._vol_list.append(volfile)
1118 self._main = None
1119 continue
1120 break
1121 h.volume = volume
1122 h.volume_file = volfile
1124 if h.type == RAR_BLOCK_MAIN and not self._main:
1125 self._main = h
1126 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING):
1127 # RAR 2.x does not set FIRSTVOLUME,
1128 # so check it only if NEWNUMBERING is used
1129 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1130 if getattr(h, "main_volume_number", None) is not None:
1131 # rar5 may have more info
1132 raise NeedFirstVolume(
1133 "Need to start from first volume (current: %r)"
1134 % (h.main_volume_number,),
1135 h.main_volume_number
1137 # delay raise until we have volnr from ENDARC
1138 raise_need_first_vol = True
1139 if h.flags & RAR_MAIN_PASSWORD:
1140 self._needs_password = True
1141 if not self._password:
1142 break
1143 elif h.type == RAR_BLOCK_ENDARC:
1144 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1145 endarc = True
1146 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1147 raise NeedFirstVolume(
1148 "Need to start from first volume (current: %r)"
1149 % (h.endarc_volnr,),
1150 h.endarc_volnr
1152 elif h.type == RAR_BLOCK_FILE:
1153 # RAR 2.x does not write RAR_BLOCK_ENDARC
1154 if h.flags & RAR_FILE_SPLIT_AFTER:
1155 more_vols = True
1156 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1157 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1158 raise_need_first_vol = True
1160 if h.needs_password():
1161 self._needs_password = True
1163 # store it
1164 self.process_entry(fd, h)
1166 if self._info_callback:
1167 self._info_callback(h)
1169 # go to next header
1170 if h.add_size > 0:
1171 fd.seek(h.data_offset + h.add_size, 0)
1173 def process_entry(self, fd, item):
1174 """Examine item, add into lookup cache."""
1175 raise NotImplementedError()
1177 def _decrypt_header(self, fd):
1178 raise NotImplementedError("_decrypt_header")
1180 def _parse_block_header(self, fd):
1181 raise NotImplementedError("_parse_block_header")
1183 def _open_hack(self, inf, pwd):
1184 raise NotImplementedError("_open_hack")
1186 def _parse_header(self, fd):
1187 """Read single header
1189 try:
1190 # handle encrypted headers
1191 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1192 if not self._password:
1193 return None
1194 fd = self._decrypt_header(fd)
1196 # now read actual header
1197 return self._parse_block_header(fd)
1198 except struct.error:
1199 self._set_error("Broken header in RAR file")
1200 return None
1202 def _next_volname(self, volfile):
1203 """Given current vol name, construct next one
1205 if is_filelike(volfile):
1206 raise IOError("Working on single FD")
1207 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1208 return _next_newvol(volfile)
1209 return _next_oldvol(volfile)
1211 def _set_error(self, msg, *args):
1212 if args:
1213 msg = msg % args
1214 self._parse_error = msg
1215 if self._strict:
1216 raise BadRarFile(msg)
1218 def open(self, inf, pwd):
1219 """Return stream object for file data."""
1221 if inf.file_redir:
1222 redir_type, redir_flags, redir_name = inf.file_redir
1223 # cannot leave to unrar as it expects copied file to exist
1224 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1225 inf = self.getinfo(redir_name)
1226 if not inf:
1227 raise BadRarFile("cannot find copied file")
1228 elif redir_type in (
1229 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1230 RAR5_XREDIR_WINDOWS_JUNCTION,
1232 return io.BytesIO(redir_name.encode("utf8"))
1233 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1234 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1236 # is temp write usable?
1237 use_hack = 1
1238 if not self._main:
1239 use_hack = 0
1240 elif self._main._must_disable_hack():
1241 use_hack = 0
1242 elif inf._must_disable_hack():
1243 use_hack = 0
1244 elif is_filelike(self._rarfile):
1245 pass
1246 elif inf.file_size > HACK_SIZE_LIMIT:
1247 use_hack = 0
1248 elif not USE_EXTRACT_HACK:
1249 use_hack = 0
1251 # now extract
1252 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1253 return self._open_clear(inf)
1254 elif use_hack:
1255 return self._open_hack(inf, pwd)
1256 elif is_filelike(self._rarfile):
1257 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1258 else:
1259 return self._open_unrar(self._rarfile, inf, pwd)
1261 def _open_clear(self, inf):
1262 return DirectReader(self, inf)
1264 def _open_hack_core(self, inf, pwd, prefix, suffix):
1266 size = inf.compress_size + inf.header_size
1267 rf = XFile(inf.volume_file, 0)
1268 rf.seek(inf.header_offset)
1270 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1271 tmpf = os.fdopen(tmpfd, "wb")
1273 try:
1274 tmpf.write(prefix)
1275 while size > 0:
1276 if size > BSIZE:
1277 buf = rf.read(BSIZE)
1278 else:
1279 buf = rf.read(size)
1280 if not buf:
1281 raise BadRarFile("read failed: " + inf.filename)
1282 tmpf.write(buf)
1283 size -= len(buf)
1284 tmpf.write(suffix)
1285 tmpf.close()
1286 rf.close()
1287 except BaseException:
1288 rf.close()
1289 tmpf.close()
1290 os.unlink(tmpname)
1291 raise
1293 return self._open_unrar(tmpname, inf, pwd, tmpname)
1295 def _open_unrar_membuf(self, memfile, inf, pwd):
1296 """Write in-memory archive to temp file, needed for solid archives.
1298 tmpname = membuf_tempfile(memfile)
1299 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1301 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1302 """Extract using unrar
1304 setup = tool_setup()
1306 # not giving filename avoids encoding related problems
1307 fn = None
1308 if not tmpfile or force_file:
1309 fn = inf.filename
1311 # read from unrar pipe
1312 cmd = setup.open_cmdline(pwd, rarfile, fn)
1313 return PipeReader(self, inf, cmd, tmpfile)
1317 # RAR3 format
1320 class Rar3Info(RarInfo):
1321 """RAR3 specific fields."""
1322 extract_version = 15
1323 salt = None
1324 add_size = 0
1325 header_crc = None
1326 header_size = None
1327 header_offset = None
1328 data_offset = None
1329 _md_class = None
1330 _md_expect = None
1331 _name_size = None
1333 # make sure some rar5 fields are always present
1334 file_redir = None
1335 blake2sp_hash = None
1337 endarc_datacrc = None
1338 endarc_volnr = None
1340 def _must_disable_hack(self):
1341 if self.type == RAR_BLOCK_FILE:
1342 if self.flags & RAR_FILE_PASSWORD:
1343 return True
1344 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1345 return True
1346 elif self.type == RAR_BLOCK_MAIN:
1347 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1348 return True
1349 return False
1351 def is_symlink(self):
1352 """Returns True if entry is a symlink."""
1353 return (
1354 self.type == RAR_BLOCK_FILE and
1355 self.host_os == RAR_OS_UNIX and
1356 self.mode & 0xF000 == 0xA000
1359 def is_file(self):
1360 """Returns True if entry is a normal file."""
1361 return (
1362 self.type == RAR_BLOCK_FILE and
1363 not (self.is_dir() or self.is_symlink())
1367 class RAR3Parser(CommonParser):
1368 """Parse RAR3 file format.
1370 _expect_sig = RAR_ID
1371 _last_aes_key = (None, None, None) # (salt, key, iv)
1373 def _decrypt_header(self, fd):
1374 if not _have_crypto:
1375 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1376 salt = fd.read(8)
1377 if self._last_aes_key[0] == salt:
1378 key, iv = self._last_aes_key[1:]
1379 else:
1380 key, iv = rar3_s2k(self._password, salt)
1381 self._last_aes_key = (salt, key, iv)
1382 return HeaderDecrypt(fd, key, iv)
1384 def _parse_block_header(self, fd):
1385 """Parse common block header
1387 h = Rar3Info()
1388 h.header_offset = fd.tell()
1390 # read and parse base header
1391 buf = fd.read(S_BLK_HDR.size)
1392 if not buf:
1393 return None
1394 t = S_BLK_HDR.unpack_from(buf)
1395 h.header_crc, h.type, h.flags, h.header_size = t
1397 # read full header
1398 if h.header_size > S_BLK_HDR.size:
1399 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1400 else:
1401 hdata = buf
1402 h.data_offset = fd.tell()
1404 # unexpected EOF?
1405 if len(hdata) != h.header_size:
1406 self._set_error("Unexpected EOF when reading header")
1407 return None
1409 pos = S_BLK_HDR.size
1411 # block has data assiciated with it?
1412 if h.flags & RAR_LONG_BLOCK:
1413 h.add_size, pos = load_le32(hdata, pos)
1414 else:
1415 h.add_size = 0
1417 # parse interesting ones, decide header boundaries for crc
1418 if h.type == RAR_BLOCK_MARK:
1419 return h
1420 elif h.type == RAR_BLOCK_MAIN:
1421 pos += 6
1422 if h.flags & RAR_MAIN_ENCRYPTVER:
1423 pos += 1
1424 crc_pos = pos
1425 if h.flags & RAR_MAIN_COMMENT:
1426 self._parse_subblocks(h, hdata, pos)
1427 elif h.type == RAR_BLOCK_FILE:
1428 pos = self._parse_file_header(h, hdata, pos - 4)
1429 crc_pos = pos
1430 if h.flags & RAR_FILE_COMMENT:
1431 pos = self._parse_subblocks(h, hdata, pos)
1432 elif h.type == RAR_BLOCK_SUB:
1433 pos = self._parse_file_header(h, hdata, pos - 4)
1434 crc_pos = h.header_size
1435 elif h.type == RAR_BLOCK_OLD_AUTH:
1436 pos += 8
1437 crc_pos = pos
1438 elif h.type == RAR_BLOCK_OLD_EXTRA:
1439 pos += 7
1440 crc_pos = pos
1441 elif h.type == RAR_BLOCK_ENDARC:
1442 if h.flags & RAR_ENDARC_DATACRC:
1443 h.endarc_datacrc, pos = load_le32(hdata, pos)
1444 if h.flags & RAR_ENDARC_VOLNR:
1445 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1446 pos += 2
1447 crc_pos = h.header_size
1448 else:
1449 crc_pos = h.header_size
1451 # check crc
1452 if h.type == RAR_BLOCK_OLD_SUB:
1453 crcdat = hdata[2:] + fd.read(h.add_size)
1454 else:
1455 crcdat = hdata[2:crc_pos]
1457 calc_crc = crc32(crcdat) & 0xFFFF
1459 # return good header
1460 if h.header_crc == calc_crc:
1461 return h
1463 # header parsing failed.
1464 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1465 h.type, h.header_crc, calc_crc, len(crcdat))
1467 # instead panicing, send eof
1468 return None
1470 def _parse_file_header(self, h, hdata, pos):
1471 """Read file-specific header
1473 fld = S_FILE_HDR.unpack_from(hdata, pos)
1474 pos += S_FILE_HDR.size
1476 h.compress_size = fld[0]
1477 h.file_size = fld[1]
1478 h.host_os = fld[2]
1479 h.CRC = fld[3]
1480 h.date_time = parse_dos_time(fld[4])
1481 h.mtime = to_datetime(h.date_time)
1482 h.extract_version = fld[5]
1483 h.compress_type = fld[6]
1484 h._name_size = name_size = fld[7]
1485 h.mode = fld[8]
1487 h._md_class = CRC32Context
1488 h._md_expect = h.CRC
1490 if h.flags & RAR_FILE_LARGE:
1491 h1, pos = load_le32(hdata, pos)
1492 h2, pos = load_le32(hdata, pos)
1493 h.compress_size |= h1 << 32
1494 h.file_size |= h2 << 32
1495 h.add_size = h.compress_size
1497 name, pos = load_bytes(hdata, name_size, pos)
1498 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1499 # stored in custom encoding
1500 nul = name.find(ZERO)
1501 h.orig_filename = name[:nul]
1502 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1503 h.filename = u.decode()
1505 # if parsing failed fall back to simple name
1506 if u.failed:
1507 h.filename = self._decode(h.orig_filename)
1508 elif h.flags & RAR_FILE_UNICODE:
1509 # stored in UTF8
1510 h.orig_filename = name
1511 h.filename = name.decode("utf8", "replace")
1512 else:
1513 # stored in random encoding
1514 h.orig_filename = name
1515 h.filename = self._decode(name)
1517 # change separator, set dir suffix
1518 h.filename = h.filename.replace("\\", "/").rstrip("/")
1519 if h.is_dir():
1520 h.filename = h.filename + "/"
1522 if h.flags & RAR_FILE_SALT:
1523 h.salt, pos = load_bytes(hdata, 8, pos)
1524 else:
1525 h.salt = None
1527 # optional extended time stamps
1528 if h.flags & RAR_FILE_EXTTIME:
1529 pos = _parse_ext_time(h, hdata, pos)
1530 else:
1531 h.mtime = h.atime = h.ctime = h.arctime = None
1533 return pos
1535 def _parse_subblocks(self, h, hdata, pos):
1536 """Find old-style comment subblock
1538 while pos < len(hdata):
1539 # ordinary block header
1540 t = S_BLK_HDR.unpack_from(hdata, pos)
1541 ___scrc, stype, sflags, slen = t
1542 pos_next = pos + slen
1543 pos += S_BLK_HDR.size
1545 # corrupt header
1546 if pos_next < pos:
1547 break
1549 # followed by block-specific header
1550 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1551 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1552 pos += S_COMMENT_HDR.size
1553 data = hdata[pos: pos_next]
1554 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1555 crc, self._password)
1556 if not self._crc_check:
1557 h.comment = self._decode_comment(cmt)
1558 elif crc32(cmt) & 0xFFFF == crc:
1559 h.comment = self._decode_comment(cmt)
1561 pos = pos_next
1562 return pos
1564 def _read_comment_v3(self, inf, pwd=None):
1566 # read data
1567 with XFile(inf.volume_file) as rf:
1568 rf.seek(inf.data_offset)
1569 data = rf.read(inf.compress_size)
1571 # decompress
1572 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1573 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1575 # check crc
1576 if self._crc_check:
1577 crc = crc32(cmt)
1578 if crc != inf.CRC:
1579 return None
1581 return self._decode_comment(cmt)
1583 def _decode(self, val):
1584 for c in TRY_ENCODINGS:
1585 try:
1586 return val.decode(c)
1587 except UnicodeError:
1588 pass
1589 return val.decode(self._charset, "replace")
1591 def _decode_comment(self, val):
1592 return self._decode(val)
1594 def process_entry(self, fd, item):
1595 if item.type == RAR_BLOCK_FILE:
1596 # use only first part
1597 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1598 self._info_map[item.filename.rstrip("/")] = item
1599 self._info_list.append(item)
1600 elif len(self._info_list) > 0:
1601 # final crc is in last block
1602 old = self._info_list[-1]
1603 old.CRC = item.CRC
1604 old._md_expect = item._md_expect
1605 old.compress_size += item.compress_size
1607 # parse new-style comment
1608 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1609 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1610 pass
1611 elif item.flags & RAR_FILE_SOLID:
1612 # file comment
1613 cmt = self._read_comment_v3(item, self._password)
1614 if len(self._info_list) > 0:
1615 old = self._info_list[-1]
1616 old.comment = cmt
1617 else:
1618 # archive comment
1619 cmt = self._read_comment_v3(item, self._password)
1620 self.comment = cmt
1622 if item.type == RAR_BLOCK_MAIN:
1623 if item.flags & RAR_MAIN_COMMENT:
1624 self.comment = item.comment
1625 if item.flags & RAR_MAIN_PASSWORD:
1626 self._needs_password = True
1628 # put file compressed data into temporary .rar archive, and run
1629 # unrar on that, thus avoiding unrar going over whole archive
1630 def _open_hack(self, inf, pwd):
1631 # create main header: crc, type, flags, size, res1, res2
1632 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4)
1633 return self._open_hack_core(inf, pwd, prefix, EMPTY)
1637 # RAR5 format
1640 class Rar5Info(RarInfo):
1641 """Shared fields for RAR5 records.
1643 extract_version = 50
1644 header_crc = None
1645 header_size = None
1646 header_offset = None
1647 data_offset = None
1649 # type=all
1650 block_type = None
1651 block_flags = None
1652 add_size = 0
1653 block_extra_size = 0
1655 # type=MAIN
1656 volume_number = None
1657 _md_class = None
1658 _md_expect = None
1660 def _must_disable_hack(self):
1661 return False
1664 class Rar5BaseFile(Rar5Info):
1665 """Shared sturct for file & service record.
1667 type = -1
1668 file_flags = None
1669 file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY)
1670 file_compress_flags = None
1671 file_redir = None
1672 file_owner = None
1673 file_version = None
1674 blake2sp_hash = None
1676 def _must_disable_hack(self):
1677 if self.flags & RAR_FILE_PASSWORD:
1678 return True
1679 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1680 return True
1681 if self.file_compress_flags & RAR5_COMPR_SOLID:
1682 return True
1683 if self.file_redir:
1684 return True
1685 return False
1688 class Rar5FileInfo(Rar5BaseFile):
1689 """RAR5 file record.
1691 type = RAR_BLOCK_FILE
1693 def is_symlink(self):
1694 """Returns True if entry is a symlink."""
1695 # pylint: disable=unsubscriptable-object
1696 return (
1697 self.file_redir is not None and
1698 self.file_redir[0] in (
1699 RAR5_XREDIR_UNIX_SYMLINK,
1700 RAR5_XREDIR_WINDOWS_SYMLINK,
1701 RAR5_XREDIR_WINDOWS_JUNCTION,
1705 def is_file(self):
1706 """Returns True if entry is a normal file."""
1707 return not (self.is_dir() or self.is_symlink())
1710 class Rar5ServiceInfo(Rar5BaseFile):
1711 """RAR5 service record.
1713 type = RAR_BLOCK_SUB
1716 class Rar5MainInfo(Rar5Info):
1717 """RAR5 archive main record.
1719 type = RAR_BLOCK_MAIN
1720 main_flags = None
1721 main_volume_number = None
1723 def _must_disable_hack(self):
1724 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1725 return True
1726 return False
1729 class Rar5EncryptionInfo(Rar5Info):
1730 """RAR5 archive header encryption record.
1732 type = RAR5_BLOCK_ENCRYPTION
1733 encryption_algo = None
1734 encryption_flags = None
1735 encryption_kdf_count = None
1736 encryption_salt = None
1737 encryption_check_value = None
1739 def needs_password(self):
1740 return True
1743 class Rar5EndArcInfo(Rar5Info):
1744 """RAR5 end of archive record.
1746 type = RAR_BLOCK_ENDARC
1747 endarc_flags = None
1750 class RAR5Parser(CommonParser):
1751 """Parse RAR5 format.
1753 _expect_sig = RAR5_ID
1754 _hdrenc_main = None
1756 # AES encrypted headers
1757 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1759 def _gen_key(self, kdf_count, salt):
1760 if self._last_aes256_key[:2] == (kdf_count, salt):
1761 return self._last_aes256_key[2]
1762 if kdf_count > 24:
1763 raise BadRarFile("Too large kdf_count")
1764 pwd = self._password
1765 if isinstance(pwd, str):
1766 pwd = pwd.encode("utf8")
1767 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1768 self._last_aes256_key = (kdf_count, salt, key)
1769 return key
1771 def _decrypt_header(self, fd):
1772 if not _have_crypto:
1773 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1774 h = self._hdrenc_main
1775 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1776 iv = fd.read(16)
1777 return HeaderDecrypt(fd, key, iv)
1779 def _parse_block_header(self, fd):
1780 """Parse common block header
1782 header_offset = fd.tell()
1784 preload = 4 + 3
1785 start_bytes = fd.read(preload)
1786 header_crc, pos = load_le32(start_bytes, 0)
1787 hdrlen, pos = load_vint(start_bytes, pos)
1788 if hdrlen > 2 * 1024 * 1024:
1789 return None
1790 header_size = pos + hdrlen
1792 # read full header, check for EOF
1793 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1794 if len(hdata) != header_size:
1795 self._set_error("Unexpected EOF when reading header")
1796 return None
1797 data_offset = fd.tell()
1799 calc_crc = crc32(memoryview(hdata)[4:])
1800 if header_crc != calc_crc:
1801 # header parsing failed.
1802 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1803 header_crc, calc_crc, len(hdata))
1804 return None
1806 block_type, pos = load_vint(hdata, pos)
1808 if block_type == RAR5_BLOCK_MAIN:
1809 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1810 h = self._parse_main_block(h, hdata, pos)
1811 elif block_type == RAR5_BLOCK_FILE:
1812 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1813 h = self._parse_file_block(h, hdata, pos)
1814 elif block_type == RAR5_BLOCK_SERVICE:
1815 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1816 h = self._parse_file_block(h, hdata, pos)
1817 elif block_type == RAR5_BLOCK_ENCRYPTION:
1818 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1819 h = self._parse_encryption_block(h, hdata, pos)
1820 elif block_type == RAR5_BLOCK_ENDARC:
1821 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1822 h = self._parse_endarc_block(h, hdata, pos)
1823 else:
1824 h = None
1825 if h:
1826 h.header_offset = header_offset
1827 h.data_offset = data_offset
1828 return h
1830 def _parse_block_common(self, h, hdata):
1831 h.header_crc, pos = load_le32(hdata, 0)
1832 hdrlen, pos = load_vint(hdata, pos)
1833 h.header_size = hdrlen + pos
1834 h.block_type, pos = load_vint(hdata, pos)
1835 h.block_flags, pos = load_vint(hdata, pos)
1837 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1838 h.block_extra_size, pos = load_vint(hdata, pos)
1839 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1840 h.add_size, pos = load_vint(hdata, pos)
1842 h.compress_size = h.add_size
1844 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1845 h.flags |= RAR_SKIP_IF_UNKNOWN
1846 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1847 h.flags |= RAR_LONG_BLOCK
1848 return h, pos
1850 def _parse_main_block(self, h, hdata, pos):
1851 h.main_flags, pos = load_vint(hdata, pos)
1852 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1853 h.main_volume_number, pos = load_vint(hdata, pos)
1855 h.flags |= RAR_MAIN_NEWNUMBERING
1856 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1857 h.flags |= RAR_MAIN_SOLID
1858 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1859 h.flags |= RAR_MAIN_VOLUME
1860 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1861 h.flags |= RAR_MAIN_RECOVERY
1862 if self._hdrenc_main:
1863 h.flags |= RAR_MAIN_PASSWORD
1864 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1865 h.flags |= RAR_MAIN_FIRSTVOLUME
1867 return h
1869 def _parse_file_block(self, h, hdata, pos):
1870 h.file_flags, pos = load_vint(hdata, pos)
1871 h.file_size, pos = load_vint(hdata, pos)
1872 h.mode, pos = load_vint(hdata, pos)
1874 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1875 h.mtime, pos = load_unixtime(hdata, pos)
1876 h.date_time = h.mtime.timetuple()[:6]
1877 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1878 h.CRC, pos = load_le32(hdata, pos)
1879 h._md_class = CRC32Context
1880 h._md_expect = h.CRC
1882 h.file_compress_flags, pos = load_vint(hdata, pos)
1883 h.file_host_os, pos = load_vint(hdata, pos)
1884 h.orig_filename, pos = load_vstr(hdata, pos)
1885 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1887 # use compatible values
1888 if h.file_host_os == RAR5_OS_WINDOWS:
1889 h.host_os = RAR_OS_WIN32
1890 else:
1891 h.host_os = RAR_OS_UNIX
1892 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1894 if h.block_extra_size:
1895 # allow 1 byte of garbage
1896 while pos < len(hdata) - 1:
1897 xsize, pos = load_vint(hdata, pos)
1898 xdata, pos = load_bytes(hdata, xsize, pos)
1899 self._process_file_extra(h, xdata)
1901 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1902 h.flags |= RAR_FILE_SPLIT_BEFORE
1903 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1904 h.flags |= RAR_FILE_SPLIT_AFTER
1905 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1906 h.flags |= RAR_FILE_DIRECTORY
1907 if h.file_compress_flags & RAR5_COMPR_SOLID:
1908 h.flags |= RAR_FILE_SOLID
1910 if h.is_dir():
1911 h.filename = h.filename + "/"
1912 return h
1914 def _parse_endarc_block(self, h, hdata, pos):
1915 h.endarc_flags, pos = load_vint(hdata, pos)
1916 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1917 h.flags |= RAR_ENDARC_NEXT_VOLUME
1918 return h
1920 def _parse_encryption_block(self, h, hdata, pos):
1921 h.encryption_algo, pos = load_vint(hdata, pos)
1922 h.encryption_flags, pos = load_vint(hdata, pos)
1923 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1924 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1925 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1926 h.encryption_check_value = load_bytes(hdata, 12, pos)
1927 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1928 raise BadRarFile("Unsupported header encryption cipher")
1929 self._hdrenc_main = h
1930 return h
1932 def _process_file_extra(self, h, xdata):
1933 xtype, pos = load_vint(xdata, 0)
1934 if xtype == RAR5_XFILE_TIME:
1935 self._parse_file_xtime(h, xdata, pos)
1936 elif xtype == RAR5_XFILE_ENCRYPTION:
1937 self._parse_file_encryption(h, xdata, pos)
1938 elif xtype == RAR5_XFILE_HASH:
1939 self._parse_file_hash(h, xdata, pos)
1940 elif xtype == RAR5_XFILE_VERSION:
1941 self._parse_file_version(h, xdata, pos)
1942 elif xtype == RAR5_XFILE_REDIR:
1943 self._parse_file_redir(h, xdata, pos)
1944 elif xtype == RAR5_XFILE_OWNER:
1945 self._parse_file_owner(h, xdata, pos)
1946 elif xtype == RAR5_XFILE_SERVICE:
1947 pass
1948 else:
1949 pass
1951 # extra block for file time record
1952 def _parse_file_xtime(self, h, xdata, pos):
1953 tflags, pos = load_vint(xdata, pos)
1955 ldr = load_windowstime
1956 if tflags & RAR5_XTIME_UNIXTIME:
1957 ldr = load_unixtime
1959 if tflags & RAR5_XTIME_HAS_MTIME:
1960 h.mtime, pos = ldr(xdata, pos)
1961 h.date_time = h.mtime.timetuple()[:6]
1962 if tflags & RAR5_XTIME_HAS_CTIME:
1963 h.ctime, pos = ldr(xdata, pos)
1964 if tflags & RAR5_XTIME_HAS_ATIME:
1965 h.atime, pos = ldr(xdata, pos)
1967 if tflags & RAR5_XTIME_UNIXTIME_NS:
1968 if tflags & RAR5_XTIME_HAS_MTIME:
1969 nsec, pos = load_le32(xdata, pos)
1970 h.mtime = to_nsdatetime(h.mtime, nsec)
1971 if tflags & RAR5_XTIME_HAS_CTIME:
1972 nsec, pos = load_le32(xdata, pos)
1973 h.ctime = to_nsdatetime(h.ctime, nsec)
1974 if tflags & RAR5_XTIME_HAS_ATIME:
1975 nsec, pos = load_le32(xdata, pos)
1976 h.atime = to_nsdatetime(h.atime, nsec)
1978 # just remember encryption info
1979 def _parse_file_encryption(self, h, xdata, pos):
1980 algo, pos = load_vint(xdata, pos)
1981 flags, pos = load_vint(xdata, pos)
1982 kdf_count, pos = load_byte(xdata, pos)
1983 salt, pos = load_bytes(xdata, 16, pos)
1984 iv, pos = load_bytes(xdata, 16, pos)
1985 checkval = None
1986 if flags & RAR5_XENC_CHECKVAL:
1987 checkval, pos = load_bytes(xdata, 12, pos)
1988 if flags & RAR5_XENC_TWEAKED:
1989 h._md_expect = None
1990 h._md_class = NoHashContext
1992 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
1993 h.flags |= RAR_FILE_PASSWORD
1995 def _parse_file_hash(self, h, xdata, pos):
1996 hash_type, pos = load_vint(xdata, pos)
1997 if hash_type == RAR5_XHASH_BLAKE2SP:
1998 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
1999 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
2000 h._md_class = Blake2SP
2001 h._md_expect = h.blake2sp_hash
2003 def _parse_file_version(self, h, xdata, pos):
2004 flags, pos = load_vint(xdata, pos)
2005 version, pos = load_vint(xdata, pos)
2006 h.file_version = (flags, version)
2008 def _parse_file_redir(self, h, xdata, pos):
2009 redir_type, pos = load_vint(xdata, pos)
2010 redir_flags, pos = load_vint(xdata, pos)
2011 redir_name, pos = load_vstr(xdata, pos)
2012 redir_name = redir_name.decode("utf8", "replace")
2013 h.file_redir = (redir_type, redir_flags, redir_name)
2015 def _parse_file_owner(self, h, xdata, pos):
2016 user_name = group_name = user_id = group_id = None
2018 flags, pos = load_vint(xdata, pos)
2019 if flags & RAR5_XOWNER_UNAME:
2020 user_name, pos = load_vstr(xdata, pos)
2021 if flags & RAR5_XOWNER_GNAME:
2022 group_name, pos = load_vstr(xdata, pos)
2023 if flags & RAR5_XOWNER_UID:
2024 user_id, pos = load_vint(xdata, pos)
2025 if flags & RAR5_XOWNER_GID:
2026 group_id, pos = load_vint(xdata, pos)
2028 h.file_owner = (user_name, group_name, user_id, group_id)
2030 def process_entry(self, fd, item):
2031 if item.block_type == RAR5_BLOCK_FILE:
2032 # use only first part
2033 if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2034 self._info_map[item.filename.rstrip("/")] = item
2035 self._info_list.append(item)
2036 elif len(self._info_list) > 0:
2037 # final crc is in last block
2038 old = self._info_list[-1]
2039 old.CRC = item.CRC
2040 old._md_expect = item._md_expect
2041 old.blake2sp_hash = item.blake2sp_hash
2042 old.compress_size += item.compress_size
2043 elif item.block_type == RAR5_BLOCK_SERVICE:
2044 if item.filename == "CMT":
2045 self._load_comment(fd, item)
2047 def _load_comment(self, fd, item):
2048 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2049 return None
2050 if item.compress_type != RAR_M0:
2051 return None
2053 if item.flags & RAR_FILE_PASSWORD:
2054 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2055 if algo != RAR5_XENC_CIPHER_AES256:
2056 return None
2057 key = self._gen_key(kdf_count, salt)
2058 f = HeaderDecrypt(fd, key, iv)
2059 cmt = f.read(item.file_size)
2060 else:
2061 # archive comment
2062 with self._open_clear(item) as cmtstream:
2063 cmt = cmtstream.read()
2065 # rar bug? - appends zero to comment
2066 cmt = cmt.split(ZERO, 1)[0]
2067 self.comment = cmt.decode("utf8")
2068 return None
2070 def _open_hack(self, inf, pwd):
2071 # len, type, blk_flags, flags
2072 main_hdr = b"\x03\x01\x00\x00"
2073 endarc_hdr = b"\x03\x05\x00\x00"
2074 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2075 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2076 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2080 ## Utility classes
2083 class UnicodeFilename:
2084 """Handle RAR3 unicode filename decompression.
2086 def __init__(self, name, encdata):
2087 self.std_name = bytearray(name)
2088 self.encdata = bytearray(encdata)
2089 self.pos = self.encpos = 0
2090 self.buf = bytearray()
2091 self.failed = 0
2093 def enc_byte(self):
2094 """Copy encoded byte."""
2095 try:
2096 c = self.encdata[self.encpos]
2097 self.encpos += 1
2098 return c
2099 except IndexError:
2100 self.failed = 1
2101 return 0
2103 def std_byte(self):
2104 """Copy byte from 8-bit representation."""
2105 try:
2106 return self.std_name[self.pos]
2107 except IndexError:
2108 self.failed = 1
2109 return ord("?")
2111 def put(self, lo, hi):
2112 """Copy 16-bit value to result."""
2113 self.buf.append(lo)
2114 self.buf.append(hi)
2115 self.pos += 1
2117 def decode(self):
2118 """Decompress compressed UTF16 value."""
2119 hi = self.enc_byte()
2120 flagbits = 0
2121 while self.encpos < len(self.encdata):
2122 if flagbits == 0:
2123 flags = self.enc_byte()
2124 flagbits = 8
2125 flagbits -= 2
2126 t = (flags >> flagbits) & 3
2127 if t == 0:
2128 self.put(self.enc_byte(), 0)
2129 elif t == 1:
2130 self.put(self.enc_byte(), hi)
2131 elif t == 2:
2132 self.put(self.enc_byte(), self.enc_byte())
2133 else:
2134 n = self.enc_byte()
2135 if n & 0x80:
2136 c = self.enc_byte()
2137 for _ in range((n & 0x7f) + 2):
2138 lo = (self.std_byte() + c) & 0xFF
2139 self.put(lo, hi)
2140 else:
2141 for _ in range(n + 2):
2142 self.put(self.std_byte(), 0)
2143 return self.buf.decode("utf-16le", "replace")
2146 class RarExtFile(io.RawIOBase):
2147 """Base class for file-like object that :meth:`RarFile.open` returns.
2149 Provides public methods and common crc checking.
2151 Behaviour:
2152 - no short reads - .read() and .readinfo() read as much as requested.
2153 - no internal buffer, use io.BufferedReader for that.
2155 name = None #: Filename of the archive entry
2156 mode = "rb"
2157 _parser = None
2158 _inf = None
2159 _fd = None
2160 _remain = 0
2161 _returncode = 0
2162 _md_context = None
2164 def _open_extfile(self, parser, inf):
2165 self.name = inf.filename
2166 self._parser = parser
2167 self._inf = inf
2169 if self._fd:
2170 self._fd.close()
2171 md_class = self._inf._md_class or NoHashContext
2172 self._md_context = md_class()
2173 self._fd = None
2174 self._remain = self._inf.file_size
2176 def read(self, n=-1):
2177 """Read all or specified amount of data from archive entry."""
2179 # sanitize count
2180 if n is None or n < 0:
2181 n = self._remain
2182 elif n > self._remain:
2183 n = self._remain
2184 if n == 0:
2185 return EMPTY
2187 buf = []
2188 orig = n
2189 while n > 0:
2190 # actual read
2191 data = self._read(n)
2192 if not data:
2193 break
2194 buf.append(data)
2195 self._md_context.update(data)
2196 self._remain -= len(data)
2197 n -= len(data)
2198 data = EMPTY.join(buf)
2199 if n > 0:
2200 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2202 # done?
2203 if not data or self._remain == 0:
2204 # self.close()
2205 self._check()
2206 return data
2208 def _check(self):
2209 """Check final CRC."""
2210 final = self._md_context.digest()
2211 exp = self._inf._md_expect
2212 if exp is None:
2213 return
2214 if final is None:
2215 return
2216 if self._returncode:
2217 check_returncode(self._returncode, "", tool_setup().get_errmap())
2218 if self._remain != 0:
2219 raise BadRarFile("Failed the read enough data")
2220 if final != exp:
2221 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2222 self._inf.filename, exp, final))
2224 def _read(self, cnt):
2225 """Actual read that gets sanitized cnt."""
2226 raise NotImplementedError("_read")
2228 def close(self):
2229 """Close open resources."""
2231 super().close()
2233 if self._fd:
2234 self._fd.close()
2235 self._fd = None
2237 def __del__(self):
2238 """Hook delete to make sure tempfile is removed."""
2239 self.close()
2241 def readinto(self, buf):
2242 """Zero-copy read directly into buffer.
2244 Returns bytes read.
2246 raise NotImplementedError("readinto")
2248 def tell(self):
2249 """Return current reading position in uncompressed data."""
2250 return self._inf.file_size - self._remain
2252 def seek(self, offset, whence=0):
2253 """Seek in data.
2255 On uncompressed files, the seeking works by actual
2256 seeks so it's fast. On compresses files its slow
2257 - forward seeking happends by reading ahead,
2258 backwards by re-opening and decompressing from the start.
2261 # disable crc check when seeking
2262 self._md_context = NoHashContext()
2264 fsize = self._inf.file_size
2265 cur_ofs = self.tell()
2267 if whence == 0: # seek from beginning of file
2268 new_ofs = offset
2269 elif whence == 1: # seek from current position
2270 new_ofs = cur_ofs + offset
2271 elif whence == 2: # seek from end of file
2272 new_ofs = fsize + offset
2273 else:
2274 raise ValueError("Invalid value for whence")
2276 # sanity check
2277 if new_ofs < 0:
2278 new_ofs = 0
2279 elif new_ofs > fsize:
2280 new_ofs = fsize
2282 # do the actual seek
2283 if new_ofs >= cur_ofs:
2284 self._skip(new_ofs - cur_ofs)
2285 else:
2286 # reopen and seek
2287 self._open_extfile(self._parser, self._inf)
2288 self._skip(new_ofs)
2289 return self.tell()
2291 def _skip(self, cnt):
2292 """Read and discard data"""
2293 empty_read(self, cnt, BSIZE)
2295 def readable(self):
2296 """Returns True"""
2297 return True
2299 def writable(self):
2300 """Returns False.
2302 Writing is not supported.
2304 return False
2306 def seekable(self):
2307 """Returns True.
2309 Seeking is supported, although it's slow on compressed files.
2311 return True
2313 def readall(self):
2314 """Read all remaining data"""
2315 # avoid RawIOBase default impl
2316 return self.read()
2319 class PipeReader(RarExtFile):
2320 """Read data from pipe, handle tempfile cleanup."""
2322 def __init__(self, parser, inf, cmd, tempfile=None):
2323 super().__init__()
2324 self._cmd = cmd
2325 self._proc = None
2326 self._tempfile = tempfile
2327 self._open_extfile(parser, inf)
2329 def _close_proc(self):
2330 if not self._proc:
2331 return
2332 if self._proc.stdout:
2333 self._proc.stdout.close()
2334 if self._proc.stdin:
2335 self._proc.stdin.close()
2336 if self._proc.stderr:
2337 self._proc.stderr.close()
2338 self._proc.wait()
2339 self._returncode = self._proc.returncode
2340 self._proc = None
2342 def _open_extfile(self, parser, inf):
2343 super()._open_extfile(parser, inf)
2345 # stop old process
2346 self._close_proc()
2348 # launch new process
2349 self._returncode = 0
2350 self._proc = custom_popen(self._cmd)
2351 self._fd = self._proc.stdout
2353 # avoid situation where unrar waits on stdin
2354 if self._proc.stdin:
2355 self._proc.stdin.close()
2357 def _read(self, cnt):
2358 """Read from pipe."""
2360 # normal read is usually enough
2361 data = self._fd.read(cnt)
2362 if len(data) == cnt or not data:
2363 return data
2365 # short read, try looping
2366 buf = [data]
2367 cnt -= len(data)
2368 while cnt > 0:
2369 data = self._fd.read(cnt)
2370 if not data:
2371 break
2372 cnt -= len(data)
2373 buf.append(data)
2374 return EMPTY.join(buf)
2376 def close(self):
2377 """Close open resources."""
2379 self._close_proc()
2380 super().close()
2382 if self._tempfile:
2383 try:
2384 os.unlink(self._tempfile)
2385 except OSError:
2386 pass
2387 self._tempfile = None
2389 def readinto(self, buf):
2390 """Zero-copy read directly into buffer."""
2391 cnt = len(buf)
2392 if cnt > self._remain:
2393 cnt = self._remain
2394 vbuf = memoryview(buf)
2395 res = got = 0
2396 while got < cnt:
2397 res = self._fd.readinto(vbuf[got: cnt])
2398 if not res:
2399 break
2400 self._md_context.update(vbuf[got: got + res])
2401 self._remain -= res
2402 got += res
2403 return got
2406 class DirectReader(RarExtFile):
2407 """Read uncompressed data directly from archive.
2409 _cur = None
2410 _cur_avail = None
2411 _volfile = None
2413 def __init__(self, parser, inf):
2414 super().__init__()
2415 self._open_extfile(parser, inf)
2417 def _open_extfile(self, parser, inf):
2418 super()._open_extfile(parser, inf)
2420 self._volfile = self._inf.volume_file
2421 self._fd = XFile(self._volfile, 0)
2422 self._fd.seek(self._inf.header_offset, 0)
2423 self._cur = self._parser._parse_header(self._fd)
2424 self._cur_avail = self._cur.add_size
2426 def _skip(self, cnt):
2427 """RAR Seek, skipping through rar files to get to correct position
2430 while cnt > 0:
2431 # next vol needed?
2432 if self._cur_avail == 0:
2433 if not self._open_next():
2434 break
2436 # fd is in read pos, do the read
2437 if cnt > self._cur_avail:
2438 cnt -= self._cur_avail
2439 self._remain -= self._cur_avail
2440 self._cur_avail = 0
2441 else:
2442 self._fd.seek(cnt, 1)
2443 self._cur_avail -= cnt
2444 self._remain -= cnt
2445 cnt = 0
2447 def _read(self, cnt):
2448 """Read from potentially multi-volume archive."""
2450 buf = []
2451 while cnt > 0:
2452 # next vol needed?
2453 if self._cur_avail == 0:
2454 if not self._open_next():
2455 break
2457 # fd is in read pos, do the read
2458 if cnt > self._cur_avail:
2459 data = self._fd.read(self._cur_avail)
2460 else:
2461 data = self._fd.read(cnt)
2462 if not data:
2463 break
2465 # got some data
2466 cnt -= len(data)
2467 self._cur_avail -= len(data)
2468 buf.append(data)
2470 if len(buf) == 1:
2471 return buf[0]
2472 return EMPTY.join(buf)
2474 def _open_next(self):
2475 """Proceed to next volume."""
2477 # is the file split over archives?
2478 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2479 return False
2481 if self._fd:
2482 self._fd.close()
2483 self._fd = None
2485 # open next part
2486 self._volfile = self._parser._next_volname(self._volfile)
2487 fd = open(self._volfile, "rb", 0)
2488 self._fd = fd
2489 sig = fd.read(len(self._parser._expect_sig))
2490 if sig != self._parser._expect_sig:
2491 raise BadRarFile("Invalid signature")
2493 # loop until first file header
2494 while True:
2495 cur = self._parser._parse_header(fd)
2496 if not cur:
2497 raise BadRarFile("Unexpected EOF")
2498 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2499 if cur.add_size:
2500 fd.seek(cur.add_size, 1)
2501 continue
2502 if cur.orig_filename != self._inf.orig_filename:
2503 raise BadRarFile("Did not found file entry")
2504 self._cur = cur
2505 self._cur_avail = cur.add_size
2506 return True
2508 def readinto(self, buf):
2509 """Zero-copy read directly into buffer."""
2510 got = 0
2511 vbuf = memoryview(buf)
2512 while got < len(buf):
2513 # next vol needed?
2514 if self._cur_avail == 0:
2515 if not self._open_next():
2516 break
2518 # length for next read
2519 cnt = len(buf) - got
2520 if cnt > self._cur_avail:
2521 cnt = self._cur_avail
2523 # read into temp view
2524 res = self._fd.readinto(vbuf[got: got + cnt])
2525 if not res:
2526 break
2527 self._md_context.update(vbuf[got: got + res])
2528 self._cur_avail -= res
2529 self._remain -= res
2530 got += res
2531 return got
2534 class HeaderDecrypt:
2535 """File-like object that decrypts from another file"""
2536 def __init__(self, f, key, iv):
2537 self.f = f
2538 self.ciph = AES_CBC_Decrypt(key, iv)
2539 self.buf = EMPTY
2541 def tell(self):
2542 """Current file pos - works only on block boundaries."""
2543 return self.f.tell()
2545 def read(self, cnt=None):
2546 """Read and decrypt."""
2547 if cnt > 8 * 1024:
2548 raise BadRarFile("Bad count to header decrypt - wrong password?")
2550 # consume old data
2551 if cnt <= len(self.buf):
2552 res = self.buf[:cnt]
2553 self.buf = self.buf[cnt:]
2554 return res
2555 res = self.buf
2556 self.buf = EMPTY
2557 cnt -= len(res)
2559 # decrypt new data
2560 blklen = 16
2561 while cnt > 0:
2562 enc = self.f.read(blklen)
2563 if len(enc) < blklen:
2564 break
2565 dec = self.ciph.decrypt(enc)
2566 if cnt >= len(dec):
2567 res += dec
2568 cnt -= len(dec)
2569 else:
2570 res += dec[:cnt]
2571 self.buf = dec[cnt:]
2572 cnt = 0
2574 return res
2577 class XFile:
2578 """Input may be filename or file object.
2580 __slots__ = ("_fd", "_need_close")
2582 def __init__(self, xfile, bufsize=1024):
2583 if is_filelike(xfile):
2584 self._need_close = False
2585 self._fd = xfile
2586 self._fd.seek(0)
2587 else:
2588 self._need_close = True
2589 self._fd = open(xfile, "rb", bufsize)
2591 def read(self, n=None):
2592 """Read from file."""
2593 return self._fd.read(n)
2595 def tell(self):
2596 """Return file pos."""
2597 return self._fd.tell()
2599 def seek(self, ofs, whence=0):
2600 """Move file pos."""
2601 return self._fd.seek(ofs, whence)
2603 def readinto(self, buf):
2604 """Read into buffer."""
2605 return self._fd.readinto(buf)
2607 def close(self):
2608 """Close file object."""
2609 if self._need_close:
2610 self._fd.close()
2612 def __enter__(self):
2613 return self
2615 def __exit__(self, typ, val, tb):
2616 self.close()
2619 class NoHashContext:
2620 """No-op hash function."""
2621 def __init__(self, data=None):
2622 """Initialize"""
2623 def update(self, data):
2624 """Update data"""
2625 def digest(self):
2626 """Final hash"""
2627 def hexdigest(self):
2628 """Hexadecimal digest."""
2631 class CRC32Context:
2632 """Hash context that uses CRC32."""
2633 __slots__ = ["_crc"]
2635 def __init__(self, data=None):
2636 self._crc = 0
2637 if data:
2638 self.update(data)
2640 def update(self, data):
2641 """Process data."""
2642 self._crc = crc32(data, self._crc)
2644 def digest(self):
2645 """Final hash."""
2646 return self._crc
2648 def hexdigest(self):
2649 """Hexadecimal digest."""
2650 return "%08x" % self.digest()
2653 class Blake2SP:
2654 """Blake2sp hash context.
2656 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2657 digest_size = 32
2658 block_size = 64
2659 parallelism = 8
2661 def __init__(self, data=None):
2662 self._buf = b""
2663 self._cur = 0
2664 self._digest = None
2665 self._thread = []
2667 for i in range(self.parallelism):
2668 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2669 self._thread.append(ctx)
2671 if data:
2672 self.update(data)
2674 def _blake2s(self, ofs, depth, is_last):
2675 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2676 depth=2, inner_size=32, fanout=self.parallelism)
2678 def _add_block(self, blk):
2679 self._thread[self._cur].update(blk)
2680 self._cur = (self._cur + 1) % self.parallelism
2682 def update(self, data):
2683 """Hash data.
2685 view = memoryview(data)
2686 bs = self.block_size
2687 if self._buf:
2688 need = bs - len(self._buf)
2689 if len(view) < need:
2690 self._buf += view.tobytes()
2691 return
2692 self._add_block(self._buf + view[:need].tobytes())
2693 view = view[need:]
2694 while len(view) >= bs:
2695 self._add_block(view[:bs])
2696 view = view[bs:]
2697 self._buf = view.tobytes()
2699 def digest(self):
2700 """Return final digest value.
2702 if self._digest is None:
2703 if self._buf:
2704 self._add_block(self._buf)
2705 self._buf = EMPTY
2706 ctx = self._blake2s(0, 1, True)
2707 for t in self._thread:
2708 ctx.update(t.digest())
2709 self._digest = ctx.digest()
2710 return self._digest
2712 def hexdigest(self):
2713 """Hexadecimal digest."""
2714 return tohex(self.digest())
2717 class Rar3Sha1:
2718 """Emulate buggy SHA1 from RAR3.
2720 digest_size = 20
2721 block_size = 64
2723 _BLK_BE = struct.Struct(b">16L")
2724 _BLK_LE = struct.Struct(b"<16L")
2726 __slots__ = ("_nbytes", "_md", "_rarbug")
2728 def __init__(self, data=b"", rarbug=False):
2729 self._md = sha1()
2730 self._nbytes = 0
2731 self._rarbug = rarbug
2732 self.update(data)
2734 def update(self, data):
2735 """Process more data."""
2736 self._md.update(data)
2737 bufpos = self._nbytes & 63
2738 self._nbytes += len(data)
2740 if self._rarbug and len(data) > 64:
2741 dpos = self.block_size - bufpos
2742 while dpos + self.block_size <= len(data):
2743 self._corrupt(data, dpos)
2744 dpos += self.block_size
2746 def digest(self):
2747 """Return final state."""
2748 return self._md.digest()
2750 def hexdigest(self):
2751 """Return final state as hex string."""
2752 return self._md.hexdigest()
2754 def _corrupt(self, data, dpos):
2755 """Corruption from SHA1 core."""
2756 ws = list(self._BLK_BE.unpack_from(data, dpos))
2757 for t in range(16, 80):
2758 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2759 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2760 self._BLK_LE.pack_into(data, dpos, *ws)
2764 ## Utility functions
2767 S_LONG = Struct("<L")
2768 S_SHORT = Struct("<H")
2769 S_BYTE = Struct("<B")
2771 S_BLK_HDR = Struct("<HBHH")
2772 S_FILE_HDR = Struct("<LLBLLBBHL")
2773 S_COMMENT_HDR = Struct("<HBBH")
2776 def load_vint(buf, pos):
2777 """Load RAR5 variable-size int."""
2778 limit = min(pos + 11, len(buf))
2779 res = ofs = 0
2780 while pos < limit:
2781 b = buf[pos]
2782 res += ((b & 0x7F) << ofs)
2783 pos += 1
2784 ofs += 7
2785 if b < 0x80:
2786 return res, pos
2787 raise BadRarFile("cannot load vint")
2790 def load_byte(buf, pos):
2791 """Load single byte"""
2792 end = pos + 1
2793 if end > len(buf):
2794 raise BadRarFile("cannot load byte")
2795 return S_BYTE.unpack_from(buf, pos)[0], end
2798 def load_le32(buf, pos):
2799 """Load little-endian 32-bit integer"""
2800 end = pos + 4
2801 if end > len(buf):
2802 raise BadRarFile("cannot load le32")
2803 return S_LONG.unpack_from(buf, pos)[0], pos + 4
2806 def load_bytes(buf, num, pos):
2807 """Load sequence of bytes"""
2808 end = pos + num
2809 if end > len(buf):
2810 raise BadRarFile("cannot load bytes")
2811 return buf[pos: end], end
2814 def load_vstr(buf, pos):
2815 """Load bytes prefixed by vint length"""
2816 slen, pos = load_vint(buf, pos)
2817 return load_bytes(buf, slen, pos)
2820 def load_dostime(buf, pos):
2821 """Load LE32 dos timestamp"""
2822 stamp, pos = load_le32(buf, pos)
2823 tup = parse_dos_time(stamp)
2824 return to_datetime(tup), pos
2827 def load_unixtime(buf, pos):
2828 """Load LE32 unix timestamp"""
2829 secs, pos = load_le32(buf, pos)
2830 dt = datetime.fromtimestamp(secs, UTC)
2831 return dt, pos
2834 def load_windowstime(buf, pos):
2835 """Load LE64 windows timestamp"""
2836 # unix epoch (1970) in seconds from windows epoch (1601)
2837 unix_epoch = 11644473600
2838 val1, pos = load_le32(buf, pos)
2839 val2, pos = load_le32(buf, pos)
2840 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2841 dt = datetime.fromtimestamp(secs - unix_epoch, UTC)
2842 dt = to_nsdatetime(dt, n1secs * 100)
2843 return dt, pos
2846 def _next_newvol(volfile):
2847 """New-style next volume
2849 i = len(volfile) - 1
2850 while i >= 0:
2851 if volfile[i] >= "0" and volfile[i] <= "9":
2852 return _inc_volname(volfile, i)
2853 i -= 1
2854 raise BadRarName("Cannot construct volume name: " + volfile)
2857 def _next_oldvol(volfile):
2858 """Old-style next volume
2860 # rar -> r00
2861 if volfile[-4:].lower() == ".rar":
2862 return volfile[:-2] + "00"
2863 return _inc_volname(volfile, len(volfile) - 1)
2866 def _inc_volname(volfile, i):
2867 """increase digits with carry, otherwise just increment char
2869 fn = list(volfile)
2870 while i >= 0:
2871 if fn[i] != "9":
2872 fn[i] = chr(ord(fn[i]) + 1)
2873 break
2874 fn[i] = "0"
2875 i -= 1
2876 return "".join(fn)
2879 def _parse_ext_time(h, data, pos):
2880 """Parse all RAR3 extended time fields
2882 # flags and rest of data can be missing
2883 flags = 0
2884 if pos + 2 <= len(data):
2885 flags = S_SHORT.unpack_from(data, pos)[0]
2886 pos += 2
2888 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2889 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2890 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2891 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2892 if mtime:
2893 h.mtime = mtime
2894 h.date_time = mtime.timetuple()[:6]
2895 return pos
2898 def _parse_xtime(flag, data, pos, basetime=None):
2899 """Parse one RAR3 extended time field
2901 res = None
2902 if flag & 8:
2903 if not basetime:
2904 basetime, pos = load_dostime(data, pos)
2906 # load second fractions
2907 rem = 0
2908 cnt = flag & 3
2909 for _ in range(cnt):
2910 b, pos = load_byte(data, pos)
2911 rem = (b << 16) | (rem >> 8)
2913 # convert 100ns units to nanoseconds
2914 nsec = rem * 100
2916 # dostime has room for 30 seconds only, correct if needed
2917 if flag & 4 and basetime.second < 59:
2918 basetime = basetime.replace(second=basetime.second + 1)
2920 res = to_nsdatetime(basetime, nsec)
2921 return res, pos
2924 def is_filelike(obj):
2925 """Filename or file object?
2927 filename_types = (bytes, str, Path)
2929 if isinstance(obj, filename_types):
2930 return False
2931 res = True
2932 for a in ("read", "tell", "seek"):
2933 res = res and hasattr(obj, a)
2934 if not res:
2935 raise ValueError("Invalid object passed as file")
2936 return True
2939 def rar3_s2k(pwd, salt):
2940 """String-to-key hash for RAR3.
2942 if not isinstance(pwd, str):
2943 pwd = pwd.decode("utf8")
2944 seed = bytearray(pwd.encode("utf-16le") + salt)
2945 h = Rar3Sha1(rarbug=True)
2946 iv = EMPTY
2947 for i in range(16):
2948 for j in range(0x4000):
2949 cnt = S_LONG.pack(i * 0x4000 + j)
2950 h.update(seed)
2951 h.update(cnt[:3])
2952 if j == 0:
2953 iv += h.digest()[19:20]
2954 key_be = h.digest()[:16]
2955 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2956 return key_le, iv
2959 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
2960 """Decompress blob of compressed data.
2962 Used for data with non-standard header - eg. comments.
2964 # already uncompressed?
2965 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
2966 return data
2968 # take only necessary flags
2969 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
2970 flags |= RAR_LONG_BLOCK
2972 # file header
2973 fname = b"data"
2974 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
2975 mode = 0x20
2976 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
2977 date, vers, meth, len(fname), mode)
2978 fhdr += fname
2979 if flags & RAR_FILE_SALT:
2980 if not salt:
2981 return EMPTY
2982 fhdr += salt
2984 # full header
2985 hlen = S_BLK_HDR.size + len(fhdr)
2986 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
2987 hcrc = crc32(hdr[2:]) & 0xFFFF
2988 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
2990 # archive main header
2991 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4)
2993 # decompress via temp rar
2994 setup = tool_setup()
2995 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
2996 tmpf = os.fdopen(tmpfd, "wb")
2997 try:
2998 tmpf.write(RAR_ID + mh + hdr + data)
2999 tmpf.close()
3001 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
3002 cmd = setup.open_cmdline(curpwd, tmpname)
3003 p = custom_popen(cmd)
3004 return p.communicate()[0]
3005 finally:
3006 tmpf.close()
3007 os.unlink(tmpname)
3010 def sanitize_filename(fname, pathsep, is_win32):
3011 """Simulate unrar sanitization.
3013 if is_win32:
3014 if len(fname) > 1 and fname[1] == ":":
3015 fname = fname[2:]
3016 rc = RC_BAD_CHARS_WIN32
3017 else:
3018 rc = RC_BAD_CHARS_UNIX
3019 if rc.search(fname):
3020 fname = rc.sub("_", fname)
3022 parts = []
3023 for seg in fname.split("/"):
3024 if seg in ("", ".", ".."):
3025 continue
3026 if is_win32 and seg[-1] in (" ", "."):
3027 seg = seg[:-1] + "_"
3028 parts.append(seg)
3029 return pathsep.join(parts)
3032 def empty_read(src, size, blklen):
3033 """Read and drop fixed amount of data.
3035 while size > 0:
3036 if size > blklen:
3037 res = src.read(blklen)
3038 else:
3039 res = src.read(size)
3040 if not res:
3041 raise BadRarFile("cannot load data")
3042 size -= len(res)
3045 def to_datetime(t):
3046 """Convert 6-part time tuple into datetime object.
3048 # extract values
3049 year, mon, day, h, m, s = t
3051 # assume the values are valid
3052 try:
3053 return datetime(year, mon, day, h, m, s)
3054 except ValueError:
3055 pass
3057 # sanitize invalid values
3058 mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3059 mon = max(1, min(mon, 12))
3060 day = max(1, min(day, mday[mon]))
3061 h = min(h, 23)
3062 m = min(m, 59)
3063 s = min(s, 59)
3064 if mon == 2 and day == 29:
3065 try:
3066 return datetime(year, mon, day, h, m, s)
3067 except ValueError:
3068 day = 28
3069 return datetime(year, mon, day, h, m, s)
3072 def parse_dos_time(stamp):
3073 """Parse standard 32-bit DOS timestamp.
3075 sec, stamp = stamp & 0x1F, stamp >> 5
3076 mn, stamp = stamp & 0x3F, stamp >> 6
3077 hr, stamp = stamp & 0x1F, stamp >> 5
3078 day, stamp = stamp & 0x1F, stamp >> 5
3079 mon, stamp = stamp & 0x0F, stamp >> 4
3080 yr = (stamp & 0x7F) + 1980
3081 return (yr, mon, day, hr, mn, sec * 2)
3084 # pylint: disable=arguments-differ,signature-differs
3085 class nsdatetime(datetime):
3086 """Datetime that carries nanoseconds.
3088 Arithmetic not supported, will lose nanoseconds.
3090 .. versionadded:: 4.0
3092 __slots__ = ("nanosecond",)
3093 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3095 def __new__(cls, year, month, day, hour=0, minute=0, second=0,
3096 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3097 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3098 if mod == 0:
3099 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3100 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3101 self.nanosecond = nanosecond
3102 return self
3104 def isoformat(self, sep="T", timespec="auto"):
3105 """Formats with nanosecond precision by default.
3107 if timespec == "auto":
3108 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3109 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3110 return super().isoformat(sep, timespec)
3112 def astimezone(self, tz=None):
3113 """Convert to new timezone.
3115 tmp = super().astimezone(tz)
3116 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3117 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3119 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3120 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3121 """Return new timestamp with specified fields replaced.
3123 return self.__class__(
3124 self.year if year is None else year,
3125 self.month if month is None else month,
3126 self.day if day is None else day,
3127 self.hour if hour is None else hour,
3128 self.minute if minute is None else minute,
3129 self.second if second is None else second,
3130 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3131 if nanosecond is None else nanosecond),
3132 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3133 fold=self.fold if fold is None else fold)
3135 def __hash__(self):
3136 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3138 def __eq__(self, other):
3139 otherns = other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000
3140 return super().__eq__(other) and self.nanosecond == otherns
3142 def __gt__(self, other):
3143 otherns = other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000
3144 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > otherns)
3146 def __lt__(self, other):
3147 return not (self > other or self == other)
3149 def __ge__(self, other):
3150 return not self < other
3152 def __le__(self, other):
3153 return not self > other
3155 def __ne__(self, other):
3156 return not self == other
3159 def to_nsdatetime(dt, nsec):
3160 """Apply nanoseconds to datetime.
3162 if not nsec:
3163 return dt
3164 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3165 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3168 def to_nsecs(dt):
3169 """Convert datatime instance to nanoseconds.
3171 secs = int(dt.timestamp())
3172 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3173 return secs * 1000000000 + nsecs
3176 def custom_popen(cmd):
3177 """Disconnect cmd from parent fds, read only from stdout.
3179 # needed for py2exe
3180 creationflags = 0
3181 if sys.platform == "win32":
3182 creationflags = 0x08000000 # CREATE_NO_WINDOW
3184 # run command
3185 try:
3186 p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT,
3187 creationflags=creationflags)
3188 except OSError as ex:
3189 if ex.errno == errno.ENOENT:
3190 raise RarCannotExec("Unrar not installed?")
3191 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3192 raise RarCannotExec("Cannot execute unrar")
3193 raise
3194 return p
3197 def check_returncode(code, out, errmap):
3198 """Raise exception according to unrar exit code.
3200 if code == 0:
3201 return
3203 if code > 0 and code < len(errmap):
3204 exc = errmap[code]
3205 elif code == 255:
3206 exc = RarUserBreak
3207 elif code < 0:
3208 exc = RarSignalExit
3209 else:
3210 exc = RarUnknownError
3212 # format message
3213 if out:
3214 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3215 else:
3216 msg = "%s [%d]" % (exc.__doc__, code)
3218 raise exc(msg)
3221 def membuf_tempfile(memfile):
3222 """Write in-memory file object to real file.
3224 memfile.seek(0, 0)
3226 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3227 tmpf = os.fdopen(tmpfd, "wb")
3229 try:
3230 shutil.copyfileobj(memfile, tmpf, BSIZE)
3231 tmpf.close()
3232 except BaseException:
3233 tmpf.close()
3234 os.unlink(tmpname)
3235 raise
3236 return tmpname
3240 # Find working command-line tool
3243 class ToolSetup:
3244 def __init__(self, setup):
3245 self.setup = setup
3247 def check(self):
3248 cmdline = self.get_cmdline("check_cmd", None)
3249 try:
3250 p = custom_popen(cmdline)
3251 out, _ = p.communicate()
3252 return p.returncode == 0
3253 except RarCannotExec:
3254 return False
3256 def open_cmdline(self, pwd, rarfn, filefn=None):
3257 cmdline = self.get_cmdline("open_cmd", pwd)
3258 cmdline.append(rarfn)
3259 if filefn:
3260 self.add_file_arg(cmdline, filefn)
3261 return cmdline
3263 def get_errmap(self):
3264 return self.setup["errmap"]
3266 def get_cmdline(self, key, pwd, nodash=False):
3267 cmdline = list(self.setup[key])
3268 cmdline[0] = globals()[cmdline[0]]
3269 self.add_password_arg(cmdline, pwd)
3270 if not nodash:
3271 cmdline.append("--")
3272 return cmdline
3274 def add_file_arg(self, cmdline, filename):
3275 cmdline.append(filename)
3277 def add_password_arg(self, cmdline, pwd):
3278 """Append password switch to commandline.
3280 if pwd is not None:
3281 if not isinstance(pwd, str):
3282 pwd = pwd.decode("utf8")
3283 args = self.setup["password"]
3284 if isinstance(args, str):
3285 cmdline.append(args + pwd)
3286 else:
3287 cmdline.extend(args)
3288 cmdline.append(pwd)
3289 else:
3290 cmdline.extend(self.setup["no_password"])
3293 UNRAR_CONFIG = {
3294 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3295 "check_cmd": ("UNRAR_TOOL", "-inul"),
3296 "password": "-p",
3297 "no_password": ("-p-",),
3298 # map return code to exception class, codes from rar.txt
3299 "errmap": [None,
3300 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3301 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3302 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3305 # Problems with unar RAR backend:
3306 # - Does not support RAR2 locked files [fails to read]
3307 # - Does not support RAR5 Blake2sp hash [reading works]
3308 UNAR_CONFIG = {
3309 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3310 "check_cmd": ("UNAR_TOOL", "-version"),
3311 "password": ("-p",),
3312 "no_password": ("-p", ""),
3313 "errmap": [None],
3316 # Problems with libarchive RAR backend:
3317 # - Does not support solid archives.
3318 # - Does not support password-protected archives.
3319 # - Does not support RARVM-based compression filters.
3320 BSDTAR_CONFIG = {
3321 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3322 "check_cmd": ("BSDTAR_TOOL", "--version"),
3323 "password": None,
3324 "no_password": (),
3325 "errmap": [None],
3328 CURRENT_SETUP = None
3331 def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
3332 """Pick a tool, return cached ToolSetup.
3334 global CURRENT_SETUP
3335 if force:
3336 CURRENT_SETUP = None
3337 if CURRENT_SETUP is not None:
3338 return CURRENT_SETUP
3339 lst = []
3340 if unrar:
3341 lst.append(UNRAR_CONFIG)
3342 if unar:
3343 lst.append(UNAR_CONFIG)
3344 if bsdtar:
3345 lst.append(BSDTAR_CONFIG)
3347 for conf in lst:
3348 setup = ToolSetup(conf)
3349 if setup.check():
3350 CURRENT_SETUP = setup
3351 break
3352 if CURRENT_SETUP is None:
3353 raise RarCannotExec("Cannot find working tool")
3354 return CURRENT_SETUP
3357 def main(args):
3358 """Minimal command-line interface for rarfile module.
3360 import argparse
3361 p = argparse.ArgumentParser(description=main.__doc__)
3362 g = p.add_mutually_exclusive_group(required=True)
3363 g.add_argument("-l", "--list", metavar="<rarfile>",
3364 help="Show archive listing")
3365 g.add_argument("-e", "--extract", nargs=2,
3366 metavar=("<rarfile>", "<output_dir>"),
3367 help="Extract archive into target dir")
3368 g.add_argument("-t", "--test", metavar="<rarfile>",
3369 help="Test if a archive is valid")
3370 cmd = p.parse_args(args)
3372 if cmd.list:
3373 with RarFile(cmd.list) as rf:
3374 rf.printdir()
3375 elif cmd.test:
3376 with RarFile(cmd.test) as rf:
3377 rf.testrar()
3378 elif cmd.extract:
3379 with RarFile(cmd.extract[0]) as rf:
3380 rf.extractall(cmd.extract[1])
3383 if __name__ == "__main__":
3384 main(sys.argv[1:])