Cleanup
[rarfile.git] / rarfile.py
bloba5dc07ff61a8146e9eac53c81bd1f5e6e079f29a
1 # rarfile.py
3 # Copyright (c) 2005-2020 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile("myarchive.rar")
34 for f in rf.infolist():
35 print(f.filename, f.file_size)
36 if f.filename == "README":
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile("archive.rar") as rf:
45 with rf.open("README") as f:
46 for ln in f:
47 print(ln.strip())
49 For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH.
51 """
53 import errno
54 import io
55 import os
56 import re
57 import shutil
58 import struct
59 import sys
60 import warnings
61 from binascii import crc32, hexlify
62 from datetime import datetime, timezone
63 from hashlib import blake2s, pbkdf2_hmac, sha1
64 from pathlib import Path
65 from struct import Struct, pack, unpack
66 from subprocess import DEVNULL, PIPE, STDOUT, Popen
67 from tempfile import mkstemp
69 # only needed for encrypted headers
70 try:
71 try:
72 from cryptography.hazmat.backends import default_backend
73 from cryptography.hazmat.primitives.ciphers import (
74 Cipher, algorithms, modes,
77 class AES_CBC_Decrypt:
78 """Decrypt API"""
79 def __init__(self, key, iv):
80 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
81 self.decrypt = ciph.decryptor().update
83 except ImportError:
84 from Crypto.Cipher import AES
86 class AES_CBC_Decrypt:
87 """Decrypt API"""
88 def __init__(self, key, iv):
89 self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt
91 _have_crypto = 1
92 except ImportError:
93 _have_crypto = 0
96 def tohex(data):
97 """Return hex string."""
98 return hexlify(data).decode("ascii")
101 __version__ = "4.0a1"
103 # export only interesting items
104 __all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"]
107 ## Module configuration. Can be tuned after importing.
110 #: executable for unrar tool
111 UNRAR_TOOL = "unrar"
113 #: executable for unar tool
114 UNAR_TOOL = "unar"
116 #: executable for bsdtar tool
117 BSDTAR_TOOL = "bsdtar"
119 #: default fallback charset
120 DEFAULT_CHARSET = "windows-1252"
122 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
123 TRY_ENCODINGS = ("utf8", "utf-16le")
125 #: whether to speed up decompression by using tmp archive
126 USE_EXTRACT_HACK = 1
128 #: limit the filesize for tmp archive usage
129 HACK_SIZE_LIMIT = 20 * 1024 * 1024
131 #: set specific directory for mkstemp() used by hack dir usage
132 HACK_TMP_DIR = None
134 #: Separator for path name components. Always "/".
135 PATH_SEP = "/"
138 ## rar constants
141 # block types
142 RAR_BLOCK_MARK = 0x72 # r
143 RAR_BLOCK_MAIN = 0x73 # s
144 RAR_BLOCK_FILE = 0x74 # t
145 RAR_BLOCK_OLD_COMMENT = 0x75 # u
146 RAR_BLOCK_OLD_EXTRA = 0x76 # v
147 RAR_BLOCK_OLD_SUB = 0x77 # w
148 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
149 RAR_BLOCK_OLD_AUTH = 0x79 # y
150 RAR_BLOCK_SUB = 0x7a # z
151 RAR_BLOCK_ENDARC = 0x7b # {
153 # flags for RAR_BLOCK_MAIN
154 RAR_MAIN_VOLUME = 0x0001
155 RAR_MAIN_COMMENT = 0x0002
156 RAR_MAIN_LOCK = 0x0004
157 RAR_MAIN_SOLID = 0x0008
158 RAR_MAIN_NEWNUMBERING = 0x0010
159 RAR_MAIN_AUTH = 0x0020
160 RAR_MAIN_RECOVERY = 0x0040
161 RAR_MAIN_PASSWORD = 0x0080
162 RAR_MAIN_FIRSTVOLUME = 0x0100
163 RAR_MAIN_ENCRYPTVER = 0x0200
165 # flags for RAR_BLOCK_FILE
166 RAR_FILE_SPLIT_BEFORE = 0x0001
167 RAR_FILE_SPLIT_AFTER = 0x0002
168 RAR_FILE_PASSWORD = 0x0004
169 RAR_FILE_COMMENT = 0x0008
170 RAR_FILE_SOLID = 0x0010
171 RAR_FILE_DICTMASK = 0x00e0
172 RAR_FILE_DICT64 = 0x0000
173 RAR_FILE_DICT128 = 0x0020
174 RAR_FILE_DICT256 = 0x0040
175 RAR_FILE_DICT512 = 0x0060
176 RAR_FILE_DICT1024 = 0x0080
177 RAR_FILE_DICT2048 = 0x00a0
178 RAR_FILE_DICT4096 = 0x00c0
179 RAR_FILE_DIRECTORY = 0x00e0
180 RAR_FILE_LARGE = 0x0100
181 RAR_FILE_UNICODE = 0x0200
182 RAR_FILE_SALT = 0x0400
183 RAR_FILE_VERSION = 0x0800
184 RAR_FILE_EXTTIME = 0x1000
185 RAR_FILE_EXTFLAGS = 0x2000
187 # flags for RAR_BLOCK_ENDARC
188 RAR_ENDARC_NEXT_VOLUME = 0x0001
189 RAR_ENDARC_DATACRC = 0x0002
190 RAR_ENDARC_REVSPACE = 0x0004
191 RAR_ENDARC_VOLNR = 0x0008
193 # flags common to all blocks
194 RAR_SKIP_IF_UNKNOWN = 0x4000
195 RAR_LONG_BLOCK = 0x8000
197 # Host OS types
198 RAR_OS_MSDOS = 0 #: MSDOS (only in RAR3)
199 RAR_OS_OS2 = 1 #: OS2 (only in RAR3)
200 RAR_OS_WIN32 = 2 #: Windows
201 RAR_OS_UNIX = 3 #: UNIX
202 RAR_OS_MACOS = 4 #: MacOS (only in RAR3)
203 RAR_OS_BEOS = 5 #: BeOS (only in RAR3)
205 # Compression methods - "0".."5"
206 RAR_M0 = 0x30 #: No compression.
207 RAR_M1 = 0x31 #: Compression level `-m1` - Fastest compression.
208 RAR_M2 = 0x32 #: Compression level `-m2`.
209 RAR_M3 = 0x33 #: Compression level `-m3`.
210 RAR_M4 = 0x34 #: Compression level `-m4`.
211 RAR_M5 = 0x35 #: Compression level `-m5` - Maximum compression.
214 # RAR5 constants
217 RAR5_BLOCK_MAIN = 1
218 RAR5_BLOCK_FILE = 2
219 RAR5_BLOCK_SERVICE = 3
220 RAR5_BLOCK_ENCRYPTION = 4
221 RAR5_BLOCK_ENDARC = 5
223 RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01
224 RAR5_BLOCK_FLAG_DATA_AREA = 0x02
225 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04
226 RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08
227 RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10
228 RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20
229 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40
231 RAR5_MAIN_FLAG_ISVOL = 0x01
232 RAR5_MAIN_FLAG_HAS_VOLNR = 0x02
233 RAR5_MAIN_FLAG_SOLID = 0x04
234 RAR5_MAIN_FLAG_RECOVERY = 0x08
235 RAR5_MAIN_FLAG_LOCKED = 0x10
237 RAR5_FILE_FLAG_ISDIR = 0x01
238 RAR5_FILE_FLAG_HAS_MTIME = 0x02
239 RAR5_FILE_FLAG_HAS_CRC32 = 0x04
240 RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08
242 RAR5_COMPR_SOLID = 0x40
244 RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01
246 RAR5_ENDARC_FLAG_NEXT_VOL = 0x01
248 RAR5_XFILE_ENCRYPTION = 1
249 RAR5_XFILE_HASH = 2
250 RAR5_XFILE_TIME = 3
251 RAR5_XFILE_VERSION = 4
252 RAR5_XFILE_REDIR = 5
253 RAR5_XFILE_OWNER = 6
254 RAR5_XFILE_SERVICE = 7
256 RAR5_XTIME_UNIXTIME = 0x01
257 RAR5_XTIME_HAS_MTIME = 0x02
258 RAR5_XTIME_HAS_CTIME = 0x04
259 RAR5_XTIME_HAS_ATIME = 0x08
260 RAR5_XTIME_UNIXTIME_NS = 0x10
262 RAR5_XENC_CIPHER_AES256 = 0
264 RAR5_XENC_CHECKVAL = 0x01
265 RAR5_XENC_TWEAKED = 0x02
267 RAR5_XHASH_BLAKE2SP = 0
269 RAR5_XREDIR_UNIX_SYMLINK = 1
270 RAR5_XREDIR_WINDOWS_SYMLINK = 2
271 RAR5_XREDIR_WINDOWS_JUNCTION = 3
272 RAR5_XREDIR_HARD_LINK = 4
273 RAR5_XREDIR_FILE_COPY = 5
275 RAR5_XREDIR_ISDIR = 0x01
277 RAR5_XOWNER_UNAME = 0x01
278 RAR5_XOWNER_GNAME = 0x02
279 RAR5_XOWNER_UID = 0x04
280 RAR5_XOWNER_GID = 0x08
282 RAR5_OS_WINDOWS = 0
283 RAR5_OS_UNIX = 1
285 DOS_MODE_ARCHIVE = 0x20
286 DOS_MODE_DIR = 0x10
287 DOS_MODE_SYSTEM = 0x04
288 DOS_MODE_HIDDEN = 0x02
289 DOS_MODE_READONLY = 0x01
292 ## internal constants
295 RAR_ID = b"Rar!\x1a\x07\x00"
296 RAR5_ID = b"Rar!\x1a\x07\x01\x00"
297 BSIZE = 512 * 1024 if sys.platform == "win32" else 64 * 1024
299 SFX_MAX_SIZE = 2 * 1024 * 1024
300 RAR_V3 = 3
301 RAR_V5 = 5
303 _BAD_CHARS = r"""\x00-\x1F<>|"?*"""
304 RC_BAD_CHARS_UNIX = re.compile(r"[%s]" % _BAD_CHARS)
305 RC_BAD_CHARS_WIN32 = re.compile(r"[%s:^\\]" % _BAD_CHARS)
308 def _get_rar_version(xfile):
309 """Check quickly whether file is rar archive.
311 with XFile(xfile) as fd:
312 buf = fd.read(len(RAR5_ID))
313 if buf.startswith(RAR_ID):
314 return RAR_V3
315 elif buf.startswith(RAR5_ID):
316 return RAR_V5
317 return 0
320 def _find_sfx_header(xfile):
321 sig = RAR_ID[:-1]
322 buf = io.BytesIO()
323 steps = (64, SFX_MAX_SIZE)
325 with XFile(xfile) as fd:
326 for step in steps:
327 data = fd.read(step)
328 if not data:
329 break
330 buf.write(data)
331 curdata = buf.getvalue()
332 findpos = 0
333 while True:
334 pos = curdata.find(sig, findpos)
335 if pos < 0:
336 break
337 if curdata[pos:pos + len(RAR_ID)] == RAR_ID:
338 return RAR_V3, pos
339 if curdata[pos:pos + len(RAR5_ID)] == RAR5_ID:
340 return RAR_V5, pos
341 findpos = pos + len(sig)
342 return 0, 0
346 ## Public interface
349 def is_rarfile(xfile):
350 """Check quickly whether file is rar archive.
352 return _get_rar_version(xfile) > 0
355 def is_rarfile_sfx(xfile):
356 """Check whether file is rar archive with support for SFX.
358 It will read 2M from file.
360 return _find_sfx_header(xfile)[0] > 0
363 class Error(Exception):
364 """Base class for rarfile errors."""
367 class BadRarFile(Error):
368 """Incorrect data in archive."""
371 class NotRarFile(Error):
372 """The file is not RAR archive."""
375 class BadRarName(Error):
376 """Cannot guess multipart name components."""
379 class NoRarEntry(Error):
380 """File not found in RAR"""
383 class PasswordRequired(Error):
384 """File requires password"""
387 class NeedFirstVolume(Error):
388 """Need to start from first volume.
390 Attributes:
392 current_volume
393 Volume number of current file or None if not known
395 def __init__(self, msg, volume):
396 super().__init__(msg)
397 self.current_volume = volume
400 class NoCrypto(Error):
401 """Cannot parse encrypted headers - no crypto available."""
404 class RarExecError(Error):
405 """Problem reported by unrar/rar."""
408 class RarWarning(RarExecError):
409 """Non-fatal error"""
412 class RarFatalError(RarExecError):
413 """Fatal error"""
416 class RarCRCError(RarExecError):
417 """CRC error during unpacking"""
420 class RarLockedArchiveError(RarExecError):
421 """Must not modify locked archive"""
424 class RarWriteError(RarExecError):
425 """Write error"""
428 class RarOpenError(RarExecError):
429 """Open error"""
432 class RarUserError(RarExecError):
433 """User error"""
436 class RarMemoryError(RarExecError):
437 """Memory error"""
440 class RarCreateError(RarExecError):
441 """Create error"""
444 class RarNoFilesError(RarExecError):
445 """No files that match pattern were found"""
448 class RarUserBreak(RarExecError):
449 """User stop"""
452 class RarWrongPassword(RarExecError):
453 """Incorrect password"""
456 class RarUnknownError(RarExecError):
457 """Unknown exit code"""
460 class RarSignalExit(RarExecError):
461 """Unrar exited with signal"""
464 class RarCannotExec(RarExecError):
465 """Executable not found."""
468 class UnsupportedWarning(UserWarning):
469 """Archive uses feature that are unsupported by rarfile.
471 .. versionadded:: 4.0
475 class RarInfo:
476 r"""An entry in rar archive.
478 RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone.
479 RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone.
481 Attributes:
483 filename
484 File name with relative path.
485 Path separator is "/". Always unicode string.
487 date_time
488 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
489 RAR5 allows archives where it is missing, it's None then.
491 comment
492 Optional file comment field. Unicode string. (RAR3-only)
494 file_size
495 Uncompressed size.
497 compress_size
498 Compressed size.
500 compress_type
501 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
503 extract_version
504 Minimal Rar version needed for decompressing. As (major*10 + minor),
505 so 2.9 is 29.
507 RAR3: 10, 20, 29
509 RAR5 does not have such field in archive, it's simply set to 50.
511 host_os
512 Host OS type, one of RAR_OS_* constants.
514 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
515 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
517 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
519 mode
520 File attributes. May be either dos-style or unix-style, depending on host_os.
522 mtime
523 File modification time. Same value as :attr:`date_time`
524 but as :class:`datetime.datetime` object with extended precision.
526 ctime
527 Optional time field: creation time. As :class:`datetime.datetime` object.
529 atime
530 Optional time field: last access time. As :class:`datetime.datetime` object.
532 arctime
533 Optional time field: archival time. As :class:`datetime.datetime` object.
534 (RAR3-only)
537 CRC-32 of uncompressed file, unsigned int.
539 RAR5: may be None.
541 blake2sp_hash
542 Blake2SP hash over decompressed data. (RAR5-only)
544 volume
545 Volume nr, starting from 0.
547 volume_file
548 Volume file name, where file starts.
550 file_redir
551 If not None, file is link of some sort. Contains tuple of (type, flags, target).
552 (RAR5-only)
554 Type is one of constants:
556 :data:`RAR5_XREDIR_UNIX_SYMLINK`
557 Unix symlink.
558 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
559 Windows symlink.
560 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
561 Windows junction.
562 :data:`RAR5_XREDIR_HARD_LINK`
563 Hard link to target.
564 :data:`RAR5_XREDIR_FILE_COPY`
565 Current file is copy of another archive entry.
567 Flags may contain bits:
569 :data:`RAR5_XREDIR_ISDIR`
570 Symlink points to directory.
573 # zipfile-compatible fields
574 filename = None
575 file_size = None
576 compress_size = None
577 date_time = None
578 CRC = None
579 volume = None
580 orig_filename = None
582 # optional extended time fields, datetime() objects.
583 mtime = None
584 ctime = None
585 atime = None
587 extract_version = None
588 mode = None
589 host_os = None
590 compress_type = None
592 # rar3-only fields
593 comment = None
594 arctime = None
596 # rar5-only fields
597 blake2sp_hash = None
598 file_redir = None
600 # internal fields
601 flags = 0
602 type = None
604 # zipfile compat
605 def is_dir(self):
606 """Returns True if entry is a directory.
608 .. versionadded:: 4.0
610 if self.type == RAR_BLOCK_FILE:
611 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
612 return False
614 def is_symlink(self):
615 """Returns True if entry is a symlink.
617 .. versionadded:: 4.0
619 return False
621 def is_file(self):
622 """Returns True if entry is a normal file.
624 .. versionadded:: 4.0
626 return False
628 def needs_password(self):
629 """Returns True if data is stored password-protected.
631 if self.type == RAR_BLOCK_FILE:
632 return (self.flags & RAR_FILE_PASSWORD) > 0
633 return False
635 def isdir(self):
636 """Returns True if entry is a directory.
638 .. deprecated:: 4.0
640 return self.is_dir()
643 class RarFile:
644 """Parse RAR structure, provide access to files in archive.
647 #: File name, if available. Unicode string or None.
648 filename = None
650 #: Archive comment. Unicode string or None.
651 comment = None
653 def __init__(self, file, mode="r", charset=None, info_callback=None,
654 crc_check=True, errors="stop"):
655 """Open and parse a RAR archive.
657 Parameters:
659 file
660 archive file name or file-like object.
661 mode
662 only "r" is supported.
663 charset
664 fallback charset to use, if filenames are not already Unicode-enabled.
665 info_callback
666 debug callback, gets to see all archive entries.
667 crc_check
668 set to False to disable CRC checks
669 errors
670 Either "stop" to quietly stop parsing on errors,
671 or "strict" to raise errors. Default is "stop".
673 if is_filelike(file):
674 self.filename = getattr(file, "name", None)
675 else:
676 if isinstance(file, Path):
677 file = str(file)
678 self.filename = file
679 self._rarfile = file
681 self._charset = charset or DEFAULT_CHARSET
682 self._info_callback = info_callback
683 self._crc_check = crc_check
684 self._password = None
685 self._file_parser = None
687 if errors == "stop":
688 self._strict = False
689 elif errors == "strict":
690 self._strict = True
691 else:
692 raise ValueError("Invalid value for errors= parameter.")
694 if mode != "r":
695 raise NotImplementedError("RarFile supports only mode=r")
697 self._parse()
699 def __enter__(self):
700 """Open context."""
701 return self
703 def __exit__(self, typ, value, traceback):
704 """Exit context."""
705 self.close()
707 def __iter__(self):
708 """Iterate over members."""
709 return iter(self.infolist())
711 def setpassword(self, pwd):
712 """Sets the password to use when extracting.
714 self._password = pwd
715 if self._file_parser:
716 if self._file_parser.has_header_encryption():
717 self._file_parser = None
718 if not self._file_parser:
719 self._parse()
720 else:
721 self._file_parser.setpassword(self._password)
723 def needs_password(self):
724 """Returns True if any archive entries require password for extraction.
726 return self._file_parser.needs_password()
728 def namelist(self):
729 """Return list of filenames in archive.
731 return [f.filename for f in self.infolist()]
733 def infolist(self):
734 """Return RarInfo objects for all files/directories in archive.
736 return self._file_parser.infolist()
738 def volumelist(self):
739 """Returns filenames of archive volumes.
741 In case of single-volume archive, the list contains
742 just the name of main archive file.
744 return self._file_parser.volumelist()
746 def getinfo(self, name):
747 """Return RarInfo for file.
749 return self._file_parser.getinfo(name)
751 def open(self, name, mode="r", pwd=None):
752 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
754 The object implements :class:`io.RawIOBase` interface, so it can
755 be further wrapped with :class:`io.BufferedReader`
756 and :class:`io.TextIOWrapper`.
758 On older Python where io module is not available, it implements
759 only .read(), .seek(), .tell() and .close() methods.
761 The object is seekable, although the seeking is fast only on
762 uncompressed files, on compressed files the seeking is implemented
763 by reading ahead and/or restarting the decompression.
765 Parameters:
767 name
768 file name or RarInfo instance.
769 mode
770 must be "r"
772 password to use for extracting.
775 if mode != "r":
776 raise NotImplementedError("RarFile.open() supports only mode=r")
778 # entry lookup
779 inf = self.getinfo(name)
780 if inf.is_dir():
781 raise io.UnsupportedOperation("Directory does not have any data: " + inf.filename)
783 # check password
784 if inf.needs_password():
785 pwd = pwd or self._password
786 if pwd is None:
787 raise PasswordRequired("File %s requires password" % inf.filename)
788 else:
789 pwd = None
791 return self._file_parser.open(inf, pwd)
793 def read(self, name, pwd=None):
794 """Return uncompressed data for archive entry.
796 For longer files using :meth:`RarFile.open` may be better idea.
798 Parameters:
800 name
801 filename or RarInfo instance
803 password to use for extracting.
806 with self.open(name, "r", pwd) as f:
807 return f.read()
809 def close(self):
810 """Release open resources."""
811 pass
813 def printdir(self, file=None):
814 """Print archive file list to stdout or given file.
816 if file is None:
817 file = sys.stdout
818 for f in self.infolist():
819 print(f.filename, file=file)
821 def extract(self, member, path=None, pwd=None):
822 """Extract single file into current directory.
824 Parameters:
826 member
827 filename or :class:`RarInfo` instance
828 path
829 optional destination path
831 optional password to use
833 inf = self.getinfo(member)
834 return self._extract_one(inf, path, pwd, True)
836 def extractall(self, path=None, members=None, pwd=None):
837 """Extract all files into current directory.
839 Parameters:
841 path
842 optional destination path
843 members
844 optional filename or :class:`RarInfo` instance list to extract
846 optional password to use
848 if members is None:
849 members = self.namelist()
851 done = set()
852 dirs = []
853 for m in members:
854 inf = self.getinfo(m)
855 dst = self._extract_one(inf, path, pwd, not inf.is_dir())
856 if inf.is_dir():
857 if dst not in done:
858 dirs.append((dst, inf))
859 done.add(dst)
860 if dirs:
861 dirs.sort(reverse=True)
862 for dst, inf in dirs:
863 self._set_attrs(inf, dst)
865 def testrar(self, pwd=None):
866 """Read all files and test CRC.
868 for member in self.infolist():
869 if member.is_dir():
870 continue
871 with self.open(member, 'r', pwd) as f:
872 empty_read(f, member.file_size, BSIZE)
874 def strerror(self):
875 """Return error string if parsing failed or None if no problems.
877 if not self._file_parser:
878 return "Not a RAR file"
879 return self._file_parser.strerror()
882 ## private methods
885 def _parse(self):
886 """Run parser for file type
888 ver, sfx_ofs = _find_sfx_header(self._rarfile)
889 if ver == RAR_V3:
890 p3 = RAR3Parser(self._rarfile, self._password, self._crc_check,
891 self._charset, self._strict, self._info_callback,
892 sfx_ofs)
893 self._file_parser = p3 # noqa
894 elif ver == RAR_V5:
895 p5 = RAR5Parser(self._rarfile, self._password, self._crc_check,
896 self._charset, self._strict, self._info_callback,
897 sfx_ofs)
898 self._file_parser = p5 # noqa
899 else:
900 raise BadRarFile("Not a RAR file")
902 self._file_parser.parse()
903 self.comment = self._file_parser.comment
905 def _extract_one(self, info, path, pwd, set_attrs):
906 fname = sanitize_filename(
907 info.filename, os.path.sep, sys.platform == "win32"
910 if path is None:
911 path = os.getcwd()
912 else:
913 path = os.fspath(path)
914 dstfn = os.path.join(path, fname)
916 dirname = os.path.dirname(dstfn)
917 if dirname and dirname != ".":
918 os.makedirs(dirname, exist_ok=True)
920 if info.is_file():
921 self._make_file(info, dstfn, pwd, set_attrs)
922 elif info.is_dir():
923 self._make_dir(info, dstfn, set_attrs)
924 elif info.is_symlink():
925 self._make_symlink(info, dstfn)
927 return dstfn
929 def _make_dir(self, info, dstfn, set_attrs):
930 os.makedirs(dstfn, exist_ok=True)
931 if set_attrs:
932 self._set_attrs(info, dstfn)
934 def _make_file(self, info, dstfn, pwd, set_attrs):
935 with self.open(info, "r", pwd) as src:
936 with open(dstfn, "wb") as dst:
937 shutil.copyfileobj(src, dst)
939 if set_attrs:
940 self._set_attrs(info, dstfn)
942 def _make_symlink(self, info, dstfn):
943 target_is_directory = False
944 if info.host_os == RAR_OS_UNIX:
945 link_name = self.read(info)
946 elif info.file_redir:
947 redir_type, redir_flags, link_name = info.file_redir
948 if redir_type == RAR5_XREDIR_WINDOWS_JUNCTION:
949 warnings.warn(f"Windows junction not supported - {info.filename}", UnsupportedWarning)
950 return
951 target_is_directory = redir_type & RAR5_XREDIR_ISDIR > 0
953 os.symlink(link_name, dstfn, target_is_directory=target_is_directory)
955 def _set_attrs(self, info, dstfn):
956 if info.host_os == RAR_OS_UNIX:
957 os.chmod(dstfn, info.mode & 0o777)
958 elif info.host_os in (RAR_OS_WIN32, RAR_OS_MSDOS):
959 if info.mode & DOS_MODE_READONLY:
960 st = os.stat(dstfn)
961 new_mode = st.st_mode & ~0o222
962 os.chmod(dstfn, new_mode & 0o777)
964 if info.mtime and hasattr(os, "utime"):
965 mtime_ns = atime_ns = to_nsecs(info.mtime)
966 if info.atime:
967 atime_ns = to_nsecs(info.atime)
968 os.utime(dstfn, ns=(atime_ns, mtime_ns))
972 # File format parsing
975 class CommonParser:
976 """Shared parser parts."""
977 _main = None
978 _hdrenc_main = None
979 _needs_password = False
980 _fd = None
981 _expect_sig = None
982 _parse_error = None
983 _password = None
984 comment = None
986 def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset):
987 self._rarfile = rarfile
988 self._password = password
989 self._crc_check = crc_check
990 self._charset = charset
991 self._strict = strict
992 self._info_callback = info_cb
993 self._info_list = []
994 self._info_map = {}
995 self._vol_list = []
996 self._sfx_offset = sfx_offset
998 def has_header_encryption(self):
999 """Returns True if headers are encrypted
1001 if self._hdrenc_main:
1002 return True
1003 if self._main:
1004 if self._main.flags & RAR_MAIN_PASSWORD:
1005 return True
1006 return False
1008 def setpassword(self, pwd):
1009 """Set cached password."""
1010 self._password = pwd
1012 def volumelist(self):
1013 """Volume files"""
1014 return self._vol_list
1016 def needs_password(self):
1017 """Is password required"""
1018 return self._needs_password
1020 def strerror(self):
1021 """Last error"""
1022 return self._parse_error
1024 def infolist(self):
1025 """List of RarInfo records.
1027 return self._info_list
1029 def getinfo(self, member):
1030 """Return RarInfo for filename
1032 if isinstance(member, RarInfo):
1033 fname = member.filename
1034 elif isinstance(member, Path):
1035 fname = str(member)
1036 else:
1037 fname = member
1039 if fname.endswith("/"):
1040 fname = fname.rstrip("/")
1042 try:
1043 return self._info_map[fname]
1044 except KeyError:
1045 raise NoRarEntry("No such file: %s" % fname)
1047 def parse(self):
1048 """Process file."""
1049 self._fd = None
1050 try:
1051 self._parse_real()
1052 finally:
1053 if self._fd:
1054 self._fd.close()
1055 self._fd = None
1057 def _parse_real(self):
1058 """Actually read file.
1060 fd = XFile(self._rarfile)
1061 self._fd = fd
1062 fd.seek(self._sfx_offset, 0)
1063 sig = fd.read(len(self._expect_sig))
1064 if sig != self._expect_sig:
1065 if isinstance(self._rarfile, str):
1066 raise NotRarFile("Not a Rar archive: {}".format(self._rarfile))
1067 raise NotRarFile("Not a Rar archive")
1069 volume = 0 # first vol (.rar) is 0
1070 more_vols = False
1071 endarc = False
1072 volfile = self._rarfile
1073 self._vol_list = [self._rarfile]
1074 raise_need_first_vol = False
1075 while True:
1076 if endarc:
1077 h = None # don"t read past ENDARC
1078 else:
1079 h = self._parse_header(fd)
1080 if not h:
1081 if raise_need_first_vol:
1082 # did not find ENDARC with VOLNR
1083 raise NeedFirstVolume("Need to start from first volume", None)
1084 if more_vols:
1085 volume += 1
1086 fd.close()
1087 try:
1088 volfile = self._next_volname(volfile)
1089 fd = XFile(volfile)
1090 except IOError:
1091 self._set_error("Cannot open next volume: %s", volfile)
1092 break
1093 self._fd = fd
1094 sig = fd.read(len(self._expect_sig))
1095 if sig != self._expect_sig:
1096 self._set_error("Invalid volume sig: %s", volfile)
1097 break
1098 more_vols = False
1099 endarc = False
1100 self._vol_list.append(volfile)
1101 self._main = None
1102 continue
1103 break
1104 h.volume = volume
1105 h.volume_file = volfile
1107 if h.type == RAR_BLOCK_MAIN and not self._main:
1108 self._main = h
1109 if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING):
1110 # RAR 2.x does not set FIRSTVOLUME,
1111 # so check it only if NEWNUMBERING is used
1112 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
1113 if getattr(h, "main_volume_number", None) is not None:
1114 # rar5 may have more info
1115 raise NeedFirstVolume(
1116 "Need to start from first volume (current: %r)"
1117 % (h.main_volume_number,),
1118 h.main_volume_number
1120 # delay raise until we have volnr from ENDARC
1121 raise_need_first_vol = True
1122 if h.flags & RAR_MAIN_PASSWORD:
1123 self._needs_password = True
1124 if not self._password:
1125 break
1126 elif h.type == RAR_BLOCK_ENDARC:
1127 more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0
1128 endarc = True
1129 if raise_need_first_vol and (h.flags & RAR_ENDARC_VOLNR) > 0:
1130 raise NeedFirstVolume(
1131 "Need to start from first volume (current: %r)"
1132 % (h.endarc_volnr,),
1133 h.endarc_volnr
1135 elif h.type == RAR_BLOCK_FILE:
1136 # RAR 2.x does not write RAR_BLOCK_ENDARC
1137 if h.flags & RAR_FILE_SPLIT_AFTER:
1138 more_vols = True
1139 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1140 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
1141 raise_need_first_vol = True
1143 if h.needs_password():
1144 self._needs_password = True
1146 # store it
1147 self.process_entry(fd, h)
1149 if self._info_callback:
1150 self._info_callback(h)
1152 # go to next header
1153 if h.add_size > 0:
1154 fd.seek(h.data_offset + h.add_size, 0)
1156 def process_entry(self, fd, item):
1157 """Examine item, add into lookup cache."""
1158 raise NotImplementedError()
1160 def _decrypt_header(self, fd):
1161 raise NotImplementedError("_decrypt_header")
1163 def _parse_block_header(self, fd):
1164 raise NotImplementedError("_parse_block_header")
1166 def _open_hack(self, inf, pwd):
1167 raise NotImplementedError("_open_hack")
1169 def _parse_header(self, fd):
1170 """Read single header
1172 try:
1173 # handle encrypted headers
1174 if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main:
1175 if not self._password:
1176 return None
1177 fd = self._decrypt_header(fd)
1179 # now read actual header
1180 return self._parse_block_header(fd)
1181 except struct.error:
1182 self._set_error("Broken header in RAR file")
1183 return None
1185 def _next_volname(self, volfile):
1186 """Given current vol name, construct next one
1188 if is_filelike(volfile):
1189 raise IOError("Working on single FD")
1190 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1191 return _next_newvol(volfile)
1192 return _next_oldvol(volfile)
1194 def _set_error(self, msg, *args):
1195 if args:
1196 msg = msg % args
1197 self._parse_error = msg
1198 if self._strict:
1199 raise BadRarFile(msg)
1201 def open(self, inf, pwd):
1202 """Return stream object for file data."""
1204 if inf.file_redir:
1205 redir_type, redir_flags, redir_name = inf.file_redir
1206 # cannot leave to unrar as it expects copied file to exist
1207 if redir_type in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK):
1208 inf = self.getinfo(redir_name)
1209 if not inf:
1210 raise BadRarFile("cannot find copied file")
1211 elif redir_type in (
1212 RAR5_XREDIR_UNIX_SYMLINK, RAR5_XREDIR_WINDOWS_SYMLINK,
1213 RAR5_XREDIR_WINDOWS_JUNCTION,
1215 return io.BytesIO(redir_name.encode("utf8"))
1216 if inf.flags & RAR_FILE_SPLIT_BEFORE:
1217 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename, None)
1219 # is temp write usable?
1220 use_hack = 1
1221 if not self._main:
1222 use_hack = 0
1223 elif self._main._must_disable_hack():
1224 use_hack = 0
1225 elif inf._must_disable_hack():
1226 use_hack = 0
1227 elif is_filelike(self._rarfile):
1228 pass
1229 elif inf.file_size > HACK_SIZE_LIMIT:
1230 use_hack = 0
1231 elif not USE_EXTRACT_HACK:
1232 use_hack = 0
1234 # now extract
1235 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None:
1236 return self._open_clear(inf)
1237 elif use_hack:
1238 return self._open_hack(inf, pwd)
1239 elif is_filelike(self._rarfile):
1240 return self._open_unrar_membuf(self._rarfile, inf, pwd)
1241 else:
1242 return self._open_unrar(self._rarfile, inf, pwd)
1244 def _open_clear(self, inf):
1245 return DirectReader(self, inf)
1247 def _open_hack_core(self, inf, pwd, prefix, suffix):
1249 size = inf.compress_size + inf.header_size
1250 rf = XFile(inf.volume_file, 0)
1251 rf.seek(inf.header_offset)
1253 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
1254 tmpf = os.fdopen(tmpfd, "wb")
1256 try:
1257 tmpf.write(prefix)
1258 while size > 0:
1259 if size > BSIZE:
1260 buf = rf.read(BSIZE)
1261 else:
1262 buf = rf.read(size)
1263 if not buf:
1264 raise BadRarFile("read failed: " + inf.filename)
1265 tmpf.write(buf)
1266 size -= len(buf)
1267 tmpf.write(suffix)
1268 tmpf.close()
1269 rf.close()
1270 except BaseException:
1271 rf.close()
1272 tmpf.close()
1273 os.unlink(tmpname)
1274 raise
1276 return self._open_unrar(tmpname, inf, pwd, tmpname)
1278 def _open_unrar_membuf(self, memfile, inf, pwd):
1279 """Write in-memory archive to temp file, needed for solid archives.
1281 tmpname = membuf_tempfile(memfile)
1282 return self._open_unrar(tmpname, inf, pwd, tmpname, force_file=True)
1284 def _open_unrar(self, rarfile, inf, pwd=None, tmpfile=None, force_file=False):
1285 """Extract using unrar
1287 setup = tool_setup()
1289 # not giving filename avoids encoding related problems
1290 fn = None
1291 if not tmpfile or force_file:
1292 fn = inf.filename
1294 # read from unrar pipe
1295 cmd = setup.open_cmdline(pwd, rarfile, fn)
1296 return PipeReader(self, inf, cmd, tmpfile)
1300 # RAR3 format
1303 class Rar3Info(RarInfo):
1304 """RAR3 specific fields."""
1305 extract_version = 15
1306 salt = None
1307 add_size = 0
1308 header_crc = None
1309 header_size = None
1310 header_offset = None
1311 data_offset = None
1312 _md_class = None
1313 _md_expect = None
1314 _name_size = None
1316 # make sure some rar5 fields are always present
1317 file_redir = None
1318 blake2sp_hash = None
1320 endarc_datacrc = None
1321 endarc_volnr = None
1323 def _must_disable_hack(self):
1324 if self.type == RAR_BLOCK_FILE:
1325 if self.flags & RAR_FILE_PASSWORD:
1326 return True
1327 elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1328 return True
1329 elif self.type == RAR_BLOCK_MAIN:
1330 if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
1331 return True
1332 return False
1334 def is_symlink(self):
1335 """Returns True if entry is a symlink."""
1336 return (
1337 self.type == RAR_BLOCK_FILE and
1338 self.host_os == RAR_OS_UNIX and
1339 self.mode & 0xF000 == 0xA000
1342 def is_file(self):
1343 """Returns True if entry is a normal file."""
1344 return (
1345 self.type == RAR_BLOCK_FILE and
1346 not (self.is_dir() or self.is_symlink())
1350 class RAR3Parser(CommonParser):
1351 """Parse RAR3 file format.
1353 _expect_sig = RAR_ID
1354 _last_aes_key = (None, None, None) # (salt, key, iv)
1356 def _decrypt_header(self, fd):
1357 if not _have_crypto:
1358 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1359 salt = fd.read(8)
1360 if self._last_aes_key[0] == salt:
1361 key, iv = self._last_aes_key[1:]
1362 else:
1363 key, iv = rar3_s2k(self._password, salt)
1364 self._last_aes_key = (salt, key, iv)
1365 return HeaderDecrypt(fd, key, iv)
1367 def _parse_block_header(self, fd):
1368 """Parse common block header
1370 h = Rar3Info()
1371 h.header_offset = fd.tell()
1373 # read and parse base header
1374 buf = fd.read(S_BLK_HDR.size)
1375 if not buf:
1376 return None
1377 t = S_BLK_HDR.unpack_from(buf)
1378 h.header_crc, h.type, h.flags, h.header_size = t
1380 # read full header
1381 if h.header_size > S_BLK_HDR.size:
1382 hdata = buf + fd.read(h.header_size - S_BLK_HDR.size)
1383 else:
1384 hdata = buf
1385 h.data_offset = fd.tell()
1387 # unexpected EOF?
1388 if len(hdata) != h.header_size:
1389 self._set_error("Unexpected EOF when reading header")
1390 return None
1392 pos = S_BLK_HDR.size
1394 # block has data assiciated with it?
1395 if h.flags & RAR_LONG_BLOCK:
1396 h.add_size, pos = load_le32(hdata, pos)
1397 else:
1398 h.add_size = 0
1400 # parse interesting ones, decide header boundaries for crc
1401 if h.type == RAR_BLOCK_MARK:
1402 return h
1403 elif h.type == RAR_BLOCK_MAIN:
1404 pos += 6
1405 if h.flags & RAR_MAIN_ENCRYPTVER:
1406 pos += 1
1407 crc_pos = pos
1408 if h.flags & RAR_MAIN_COMMENT:
1409 self._parse_subblocks(h, hdata, pos)
1410 elif h.type == RAR_BLOCK_FILE:
1411 pos = self._parse_file_header(h, hdata, pos - 4)
1412 crc_pos = pos
1413 if h.flags & RAR_FILE_COMMENT:
1414 pos = self._parse_subblocks(h, hdata, pos)
1415 elif h.type == RAR_BLOCK_SUB:
1416 pos = self._parse_file_header(h, hdata, pos - 4)
1417 crc_pos = h.header_size
1418 elif h.type == RAR_BLOCK_OLD_AUTH:
1419 pos += 8
1420 crc_pos = pos
1421 elif h.type == RAR_BLOCK_OLD_EXTRA:
1422 pos += 7
1423 crc_pos = pos
1424 elif h.type == RAR_BLOCK_ENDARC:
1425 if h.flags & RAR_ENDARC_DATACRC:
1426 h.endarc_datacrc, pos = load_le32(hdata, pos)
1427 if h.flags & RAR_ENDARC_VOLNR:
1428 h.endarc_volnr = S_SHORT.unpack_from(hdata, pos)[0]
1429 pos += 2
1430 crc_pos = h.header_size
1431 else:
1432 crc_pos = h.header_size
1434 # check crc
1435 if h.type == RAR_BLOCK_OLD_SUB:
1436 crcdat = hdata[2:] + fd.read(h.add_size)
1437 else:
1438 crcdat = hdata[2:crc_pos]
1440 calc_crc = crc32(crcdat) & 0xFFFF
1442 # return good header
1443 if h.header_crc == calc_crc:
1444 return h
1446 # header parsing failed.
1447 self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)",
1448 h.type, h.header_crc, calc_crc, len(crcdat))
1450 # instead panicing, send eof
1451 return None
1453 def _parse_file_header(self, h, hdata, pos):
1454 """Read file-specific header
1456 fld = S_FILE_HDR.unpack_from(hdata, pos)
1457 pos += S_FILE_HDR.size
1459 h.compress_size = fld[0]
1460 h.file_size = fld[1]
1461 h.host_os = fld[2]
1462 h.CRC = fld[3]
1463 h.date_time = parse_dos_time(fld[4])
1464 h.mtime = to_datetime(h.date_time)
1465 h.extract_version = fld[5]
1466 h.compress_type = fld[6]
1467 h._name_size = name_size = fld[7]
1468 h.mode = fld[8]
1470 h._md_class = CRC32Context
1471 h._md_expect = h.CRC
1473 if h.flags & RAR_FILE_LARGE:
1474 h1, pos = load_le32(hdata, pos)
1475 h2, pos = load_le32(hdata, pos)
1476 h.compress_size |= h1 << 32
1477 h.file_size |= h2 << 32
1478 h.add_size = h.compress_size
1480 name, pos = load_bytes(hdata, name_size, pos)
1481 if h.flags & RAR_FILE_UNICODE and b"\0" in name:
1482 # stored in custom encoding
1483 nul = name.find(b"\0")
1484 h.orig_filename = name[:nul]
1485 u = UnicodeFilename(h.orig_filename, name[nul + 1:])
1486 h.filename = u.decode()
1488 # if parsing failed fall back to simple name
1489 if u.failed:
1490 h.filename = self._decode(h.orig_filename)
1491 elif h.flags & RAR_FILE_UNICODE:
1492 # stored in UTF8
1493 h.orig_filename = name
1494 h.filename = name.decode("utf8", "replace")
1495 else:
1496 # stored in random encoding
1497 h.orig_filename = name
1498 h.filename = self._decode(name)
1500 # change separator, set dir suffix
1501 h.filename = h.filename.replace("\\", "/").rstrip("/")
1502 if h.is_dir():
1503 h.filename = h.filename + "/"
1505 if h.flags & RAR_FILE_SALT:
1506 h.salt, pos = load_bytes(hdata, 8, pos)
1507 else:
1508 h.salt = None
1510 # optional extended time stamps
1511 if h.flags & RAR_FILE_EXTTIME:
1512 pos = _parse_ext_time(h, hdata, pos)
1513 else:
1514 h.mtime = h.atime = h.ctime = h.arctime = None
1516 return pos
1518 def _parse_subblocks(self, h, hdata, pos):
1519 """Find old-style comment subblock
1521 while pos < len(hdata):
1522 # ordinary block header
1523 t = S_BLK_HDR.unpack_from(hdata, pos)
1524 ___scrc, stype, sflags, slen = t
1525 pos_next = pos + slen
1526 pos += S_BLK_HDR.size
1528 # corrupt header
1529 if pos_next < pos:
1530 break
1532 # followed by block-specific header
1533 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1534 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1535 pos += S_COMMENT_HDR.size
1536 data = hdata[pos: pos_next]
1537 cmt = rar3_decompress(ver, meth, data, declen, sflags,
1538 crc, self._password)
1539 if not self._crc_check:
1540 h.comment = self._decode_comment(cmt)
1541 elif crc32(cmt) & 0xFFFF == crc:
1542 h.comment = self._decode_comment(cmt)
1544 pos = pos_next
1545 return pos
1547 def _read_comment_v3(self, inf, pwd=None):
1549 # read data
1550 with XFile(inf.volume_file) as rf:
1551 rf.seek(inf.data_offset)
1552 data = rf.read(inf.compress_size)
1554 # decompress
1555 cmt = rar3_decompress(inf.extract_version, inf.compress_type, data,
1556 inf.file_size, inf.flags, inf.CRC, pwd, inf.salt)
1558 # check crc
1559 if self._crc_check:
1560 crc = crc32(cmt)
1561 if crc != inf.CRC:
1562 return None
1564 return self._decode_comment(cmt)
1566 def _decode(self, val):
1567 for c in TRY_ENCODINGS:
1568 try:
1569 return val.decode(c)
1570 except UnicodeError:
1571 pass
1572 return val.decode(self._charset, "replace")
1574 def _decode_comment(self, val):
1575 return self._decode(val)
1577 def process_entry(self, fd, item):
1578 if item.type == RAR_BLOCK_FILE:
1579 # use only first part
1580 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
1581 self._info_map[item.filename.rstrip("/")] = item
1582 self._info_list.append(item)
1583 elif len(self._info_list) > 0:
1584 # final crc is in last block
1585 old = self._info_list[-1]
1586 old.CRC = item.CRC
1587 old._md_expect = item._md_expect
1588 old.compress_size += item.compress_size
1590 # parse new-style comment
1591 if item.type == RAR_BLOCK_SUB and item.filename == "CMT":
1592 if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
1593 pass
1594 elif item.flags & RAR_FILE_SOLID:
1595 # file comment
1596 cmt = self._read_comment_v3(item, self._password)
1597 if len(self._info_list) > 0:
1598 old = self._info_list[-1]
1599 old.comment = cmt
1600 else:
1601 # archive comment
1602 cmt = self._read_comment_v3(item, self._password)
1603 self.comment = cmt
1605 if item.type == RAR_BLOCK_MAIN:
1606 if item.flags & RAR_MAIN_COMMENT:
1607 self.comment = item.comment
1608 if item.flags & RAR_MAIN_PASSWORD:
1609 self._needs_password = True
1611 # put file compressed data into temporary .rar archive, and run
1612 # unrar on that, thus avoiding unrar going over whole archive
1613 def _open_hack(self, inf, pwd):
1614 # create main header: crc, type, flags, size, res1, res2
1615 prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + b"\0" * (2 + 4)
1616 return self._open_hack_core(inf, pwd, prefix, b"")
1620 # RAR5 format
1623 class Rar5Info(RarInfo):
1624 """Shared fields for RAR5 records.
1626 extract_version = 50
1627 header_crc = None
1628 header_size = None
1629 header_offset = None
1630 data_offset = None
1632 # type=all
1633 block_type = None
1634 block_flags = None
1635 add_size = 0
1636 block_extra_size = 0
1638 # type=MAIN
1639 volume_number = None
1640 _md_class = None
1641 _md_expect = None
1643 def _must_disable_hack(self):
1644 return False
1647 class Rar5BaseFile(Rar5Info):
1648 """Shared sturct for file & service record.
1650 type = -1
1651 file_flags = None
1652 file_encryption = (0, 0, 0, b"", b"", b"")
1653 file_compress_flags = None
1654 file_redir = None
1655 file_owner = None
1656 file_version = None
1657 blake2sp_hash = None
1659 def _must_disable_hack(self):
1660 if self.flags & RAR_FILE_PASSWORD:
1661 return True
1662 if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
1663 return True
1664 if self.file_compress_flags & RAR5_COMPR_SOLID:
1665 return True
1666 if self.file_redir:
1667 return True
1668 return False
1671 class Rar5FileInfo(Rar5BaseFile):
1672 """RAR5 file record.
1674 type = RAR_BLOCK_FILE
1676 def is_symlink(self):
1677 """Returns True if entry is a symlink."""
1678 # pylint: disable=unsubscriptable-object
1679 return (
1680 self.file_redir is not None and
1681 self.file_redir[0] in (
1682 RAR5_XREDIR_UNIX_SYMLINK,
1683 RAR5_XREDIR_WINDOWS_SYMLINK,
1684 RAR5_XREDIR_WINDOWS_JUNCTION,
1688 def is_file(self):
1689 """Returns True if entry is a normal file."""
1690 return not (self.is_dir() or self.is_symlink())
1693 class Rar5ServiceInfo(Rar5BaseFile):
1694 """RAR5 service record.
1696 type = RAR_BLOCK_SUB
1699 class Rar5MainInfo(Rar5Info):
1700 """RAR5 archive main record.
1702 type = RAR_BLOCK_MAIN
1703 main_flags = None
1704 main_volume_number = None
1706 def _must_disable_hack(self):
1707 if self.main_flags & RAR5_MAIN_FLAG_SOLID:
1708 return True
1709 return False
1712 class Rar5EncryptionInfo(Rar5Info):
1713 """RAR5 archive header encryption record.
1715 type = RAR5_BLOCK_ENCRYPTION
1716 encryption_algo = None
1717 encryption_flags = None
1718 encryption_kdf_count = None
1719 encryption_salt = None
1720 encryption_check_value = None
1722 def needs_password(self):
1723 return True
1726 class Rar5EndArcInfo(Rar5Info):
1727 """RAR5 end of archive record.
1729 type = RAR_BLOCK_ENDARC
1730 endarc_flags = None
1733 class RAR5Parser(CommonParser):
1734 """Parse RAR5 format.
1736 _expect_sig = RAR5_ID
1737 _hdrenc_main = None
1739 # AES encrypted headers
1740 _last_aes256_key = (-1, None, None) # (kdf_count, salt, key)
1742 def _gen_key(self, kdf_count, salt):
1743 if self._last_aes256_key[:2] == (kdf_count, salt):
1744 return self._last_aes256_key[2]
1745 if kdf_count > 24:
1746 raise BadRarFile("Too large kdf_count")
1747 pwd = self._password
1748 if isinstance(pwd, str):
1749 pwd = pwd.encode("utf8")
1750 key = pbkdf2_hmac("sha256", pwd, salt, 1 << kdf_count)
1751 self._last_aes256_key = (kdf_count, salt, key)
1752 return key
1754 def _decrypt_header(self, fd):
1755 if not _have_crypto:
1756 raise NoCrypto("Cannot parse encrypted headers - no crypto")
1757 h = self._hdrenc_main
1758 key = self._gen_key(h.encryption_kdf_count, h.encryption_salt)
1759 iv = fd.read(16)
1760 return HeaderDecrypt(fd, key, iv)
1762 def _parse_block_header(self, fd):
1763 """Parse common block header
1765 header_offset = fd.tell()
1767 preload = 4 + 3
1768 start_bytes = fd.read(preload)
1769 header_crc, pos = load_le32(start_bytes, 0)
1770 hdrlen, pos = load_vint(start_bytes, pos)
1771 if hdrlen > 2 * 1024 * 1024:
1772 return None
1773 header_size = pos + hdrlen
1775 # read full header, check for EOF
1776 hdata = start_bytes + fd.read(header_size - len(start_bytes))
1777 if len(hdata) != header_size:
1778 self._set_error("Unexpected EOF when reading header")
1779 return None
1780 data_offset = fd.tell()
1782 calc_crc = crc32(memoryview(hdata)[4:])
1783 if header_crc != calc_crc:
1784 # header parsing failed.
1785 self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)",
1786 header_crc, calc_crc, len(hdata))
1787 return None
1789 block_type, pos = load_vint(hdata, pos)
1791 if block_type == RAR5_BLOCK_MAIN:
1792 h, pos = self._parse_block_common(Rar5MainInfo(), hdata)
1793 h = self._parse_main_block(h, hdata, pos)
1794 elif block_type == RAR5_BLOCK_FILE:
1795 h, pos = self._parse_block_common(Rar5FileInfo(), hdata)
1796 h = self._parse_file_block(h, hdata, pos)
1797 elif block_type == RAR5_BLOCK_SERVICE:
1798 h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata)
1799 h = self._parse_file_block(h, hdata, pos)
1800 elif block_type == RAR5_BLOCK_ENCRYPTION:
1801 h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata)
1802 h = self._parse_encryption_block(h, hdata, pos)
1803 elif block_type == RAR5_BLOCK_ENDARC:
1804 h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata)
1805 h = self._parse_endarc_block(h, hdata, pos)
1806 else:
1807 h = None
1808 if h:
1809 h.header_offset = header_offset
1810 h.data_offset = data_offset
1811 return h
1813 def _parse_block_common(self, h, hdata):
1814 h.header_crc, pos = load_le32(hdata, 0)
1815 hdrlen, pos = load_vint(hdata, pos)
1816 h.header_size = hdrlen + pos
1817 h.block_type, pos = load_vint(hdata, pos)
1818 h.block_flags, pos = load_vint(hdata, pos)
1820 if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA:
1821 h.block_extra_size, pos = load_vint(hdata, pos)
1822 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1823 h.add_size, pos = load_vint(hdata, pos)
1825 h.compress_size = h.add_size
1827 if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN:
1828 h.flags |= RAR_SKIP_IF_UNKNOWN
1829 if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA:
1830 h.flags |= RAR_LONG_BLOCK
1831 return h, pos
1833 def _parse_main_block(self, h, hdata, pos):
1834 h.main_flags, pos = load_vint(hdata, pos)
1835 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR:
1836 h.main_volume_number, pos = load_vint(hdata, pos)
1838 h.flags |= RAR_MAIN_NEWNUMBERING
1839 if h.main_flags & RAR5_MAIN_FLAG_SOLID:
1840 h.flags |= RAR_MAIN_SOLID
1841 if h.main_flags & RAR5_MAIN_FLAG_ISVOL:
1842 h.flags |= RAR_MAIN_VOLUME
1843 if h.main_flags & RAR5_MAIN_FLAG_RECOVERY:
1844 h.flags |= RAR_MAIN_RECOVERY
1845 if self._hdrenc_main:
1846 h.flags |= RAR_MAIN_PASSWORD
1847 if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0:
1848 h.flags |= RAR_MAIN_FIRSTVOLUME
1850 return h
1852 def _parse_file_block(self, h, hdata, pos):
1853 h.file_flags, pos = load_vint(hdata, pos)
1854 h.file_size, pos = load_vint(hdata, pos)
1855 h.mode, pos = load_vint(hdata, pos)
1857 if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME:
1858 h.mtime, pos = load_unixtime(hdata, pos)
1859 h.date_time = h.mtime.timetuple()[:6]
1860 if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32:
1861 h.CRC, pos = load_le32(hdata, pos)
1862 h._md_class = CRC32Context
1863 h._md_expect = h.CRC
1865 h.file_compress_flags, pos = load_vint(hdata, pos)
1866 h.file_host_os, pos = load_vint(hdata, pos)
1867 h.orig_filename, pos = load_vstr(hdata, pos)
1868 h.filename = h.orig_filename.decode("utf8", "replace").rstrip("/")
1870 # use compatible values
1871 if h.file_host_os == RAR5_OS_WINDOWS:
1872 h.host_os = RAR_OS_WIN32
1873 else:
1874 h.host_os = RAR_OS_UNIX
1875 h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7)
1877 if h.block_extra_size:
1878 # allow 1 byte of garbage
1879 while pos < len(hdata) - 1:
1880 xsize, pos = load_vint(hdata, pos)
1881 xdata, pos = load_bytes(hdata, xsize, pos)
1882 self._process_file_extra(h, xdata)
1884 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE:
1885 h.flags |= RAR_FILE_SPLIT_BEFORE
1886 if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER:
1887 h.flags |= RAR_FILE_SPLIT_AFTER
1888 if h.file_flags & RAR5_FILE_FLAG_ISDIR:
1889 h.flags |= RAR_FILE_DIRECTORY
1890 if h.file_compress_flags & RAR5_COMPR_SOLID:
1891 h.flags |= RAR_FILE_SOLID
1893 if h.is_dir():
1894 h.filename = h.filename + "/"
1895 return h
1897 def _parse_endarc_block(self, h, hdata, pos):
1898 h.endarc_flags, pos = load_vint(hdata, pos)
1899 if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL:
1900 h.flags |= RAR_ENDARC_NEXT_VOLUME
1901 return h
1903 def _parse_encryption_block(self, h, hdata, pos):
1904 h.encryption_algo, pos = load_vint(hdata, pos)
1905 h.encryption_flags, pos = load_vint(hdata, pos)
1906 h.encryption_kdf_count, pos = load_byte(hdata, pos)
1907 h.encryption_salt, pos = load_bytes(hdata, 16, pos)
1908 if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL:
1909 h.encryption_check_value = load_bytes(hdata, 12, pos)
1910 if h.encryption_algo != RAR5_XENC_CIPHER_AES256:
1911 raise BadRarFile("Unsupported header encryption cipher")
1912 self._hdrenc_main = h
1913 return h
1915 def _process_file_extra(self, h, xdata):
1916 xtype, pos = load_vint(xdata, 0)
1917 if xtype == RAR5_XFILE_TIME:
1918 self._parse_file_xtime(h, xdata, pos)
1919 elif xtype == RAR5_XFILE_ENCRYPTION:
1920 self._parse_file_encryption(h, xdata, pos)
1921 elif xtype == RAR5_XFILE_HASH:
1922 self._parse_file_hash(h, xdata, pos)
1923 elif xtype == RAR5_XFILE_VERSION:
1924 self._parse_file_version(h, xdata, pos)
1925 elif xtype == RAR5_XFILE_REDIR:
1926 self._parse_file_redir(h, xdata, pos)
1927 elif xtype == RAR5_XFILE_OWNER:
1928 self._parse_file_owner(h, xdata, pos)
1929 elif xtype == RAR5_XFILE_SERVICE:
1930 pass
1931 else:
1932 pass
1934 # extra block for file time record
1935 def _parse_file_xtime(self, h, xdata, pos):
1936 tflags, pos = load_vint(xdata, pos)
1938 ldr = load_windowstime
1939 if tflags & RAR5_XTIME_UNIXTIME:
1940 ldr = load_unixtime
1942 if tflags & RAR5_XTIME_HAS_MTIME:
1943 h.mtime, pos = ldr(xdata, pos)
1944 h.date_time = h.mtime.timetuple()[:6]
1945 if tflags & RAR5_XTIME_HAS_CTIME:
1946 h.ctime, pos = ldr(xdata, pos)
1947 if tflags & RAR5_XTIME_HAS_ATIME:
1948 h.atime, pos = ldr(xdata, pos)
1950 if tflags & RAR5_XTIME_UNIXTIME_NS:
1951 if tflags & RAR5_XTIME_HAS_MTIME:
1952 nsec, pos = load_le32(xdata, pos)
1953 h.mtime = to_nsdatetime(h.mtime, nsec)
1954 if tflags & RAR5_XTIME_HAS_CTIME:
1955 nsec, pos = load_le32(xdata, pos)
1956 h.ctime = to_nsdatetime(h.ctime, nsec)
1957 if tflags & RAR5_XTIME_HAS_ATIME:
1958 nsec, pos = load_le32(xdata, pos)
1959 h.atime = to_nsdatetime(h.atime, nsec)
1961 # just remember encryption info
1962 def _parse_file_encryption(self, h, xdata, pos):
1963 algo, pos = load_vint(xdata, pos)
1964 flags, pos = load_vint(xdata, pos)
1965 kdf_count, pos = load_byte(xdata, pos)
1966 salt, pos = load_bytes(xdata, 16, pos)
1967 iv, pos = load_bytes(xdata, 16, pos)
1968 checkval = None
1969 if flags & RAR5_XENC_CHECKVAL:
1970 checkval, pos = load_bytes(xdata, 12, pos)
1971 if flags & RAR5_XENC_TWEAKED:
1972 h._md_expect = None
1973 h._md_class = NoHashContext
1975 h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval)
1976 h.flags |= RAR_FILE_PASSWORD
1978 def _parse_file_hash(self, h, xdata, pos):
1979 hash_type, pos = load_vint(xdata, pos)
1980 if hash_type == RAR5_XHASH_BLAKE2SP:
1981 h.blake2sp_hash, pos = load_bytes(xdata, 32, pos)
1982 if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0:
1983 h._md_class = Blake2SP
1984 h._md_expect = h.blake2sp_hash
1986 def _parse_file_version(self, h, xdata, pos):
1987 flags, pos = load_vint(xdata, pos)
1988 version, pos = load_vint(xdata, pos)
1989 h.file_version = (flags, version)
1991 def _parse_file_redir(self, h, xdata, pos):
1992 redir_type, pos = load_vint(xdata, pos)
1993 redir_flags, pos = load_vint(xdata, pos)
1994 redir_name, pos = load_vstr(xdata, pos)
1995 redir_name = redir_name.decode("utf8", "replace")
1996 h.file_redir = (redir_type, redir_flags, redir_name)
1998 def _parse_file_owner(self, h, xdata, pos):
1999 user_name = group_name = user_id = group_id = None
2001 flags, pos = load_vint(xdata, pos)
2002 if flags & RAR5_XOWNER_UNAME:
2003 user_name, pos = load_vstr(xdata, pos)
2004 if flags & RAR5_XOWNER_GNAME:
2005 group_name, pos = load_vstr(xdata, pos)
2006 if flags & RAR5_XOWNER_UID:
2007 user_id, pos = load_vint(xdata, pos)
2008 if flags & RAR5_XOWNER_GID:
2009 group_id, pos = load_vint(xdata, pos)
2011 h.file_owner = (user_name, group_name, user_id, group_id)
2013 def process_entry(self, fd, item):
2014 if item.block_type == RAR5_BLOCK_FILE:
2015 # use only first part
2016 if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0:
2017 self._info_map[item.filename.rstrip("/")] = item
2018 self._info_list.append(item)
2019 elif len(self._info_list) > 0:
2020 # final crc is in last block
2021 old = self._info_list[-1]
2022 old.CRC = item.CRC
2023 old._md_expect = item._md_expect
2024 old.blake2sp_hash = item.blake2sp_hash
2025 old.compress_size += item.compress_size
2026 elif item.block_type == RAR5_BLOCK_SERVICE:
2027 if item.filename == "CMT":
2028 self._load_comment(fd, item)
2030 def _load_comment(self, fd, item):
2031 if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER):
2032 return None
2033 if item.compress_type != RAR_M0:
2034 return None
2036 if item.flags & RAR_FILE_PASSWORD:
2037 algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption
2038 if algo != RAR5_XENC_CIPHER_AES256:
2039 return None
2040 key = self._gen_key(kdf_count, salt)
2041 f = HeaderDecrypt(fd, key, iv)
2042 cmt = f.read(item.file_size)
2043 else:
2044 # archive comment
2045 with self._open_clear(item) as cmtstream:
2046 cmt = cmtstream.read()
2048 # rar bug? - appends zero to comment
2049 cmt = cmt.split(b"\0", 1)[0]
2050 self.comment = cmt.decode("utf8")
2051 return None
2053 def _open_hack(self, inf, pwd):
2054 # len, type, blk_flags, flags
2055 main_hdr = b"\x03\x01\x00\x00"
2056 endarc_hdr = b"\x03\x05\x00\x00"
2057 main_hdr = S_LONG.pack(crc32(main_hdr)) + main_hdr
2058 endarc_hdr = S_LONG.pack(crc32(endarc_hdr)) + endarc_hdr
2059 return self._open_hack_core(inf, pwd, RAR5_ID + main_hdr, endarc_hdr)
2063 ## Utility classes
2066 class UnicodeFilename:
2067 """Handle RAR3 unicode filename decompression.
2069 def __init__(self, name, encdata):
2070 self.std_name = bytearray(name)
2071 self.encdata = bytearray(encdata)
2072 self.pos = self.encpos = 0
2073 self.buf = bytearray()
2074 self.failed = 0
2076 def enc_byte(self):
2077 """Copy encoded byte."""
2078 try:
2079 c = self.encdata[self.encpos]
2080 self.encpos += 1
2081 return c
2082 except IndexError:
2083 self.failed = 1
2084 return 0
2086 def std_byte(self):
2087 """Copy byte from 8-bit representation."""
2088 try:
2089 return self.std_name[self.pos]
2090 except IndexError:
2091 self.failed = 1
2092 return ord("?")
2094 def put(self, lo, hi):
2095 """Copy 16-bit value to result."""
2096 self.buf.append(lo)
2097 self.buf.append(hi)
2098 self.pos += 1
2100 def decode(self):
2101 """Decompress compressed UTF16 value."""
2102 hi = self.enc_byte()
2103 flagbits = 0
2104 while self.encpos < len(self.encdata):
2105 if flagbits == 0:
2106 flags = self.enc_byte()
2107 flagbits = 8
2108 flagbits -= 2
2109 t = (flags >> flagbits) & 3
2110 if t == 0:
2111 self.put(self.enc_byte(), 0)
2112 elif t == 1:
2113 self.put(self.enc_byte(), hi)
2114 elif t == 2:
2115 self.put(self.enc_byte(), self.enc_byte())
2116 else:
2117 n = self.enc_byte()
2118 if n & 0x80:
2119 c = self.enc_byte()
2120 for _ in range((n & 0x7f) + 2):
2121 lo = (self.std_byte() + c) & 0xFF
2122 self.put(lo, hi)
2123 else:
2124 for _ in range(n + 2):
2125 self.put(self.std_byte(), 0)
2126 return self.buf.decode("utf-16le", "replace")
2129 class RarExtFile(io.RawIOBase):
2130 """Base class for file-like object that :meth:`RarFile.open` returns.
2132 Provides public methods and common crc checking.
2134 Behaviour:
2135 - no short reads - .read() and .readinfo() read as much as requested.
2136 - no internal buffer, use io.BufferedReader for that.
2138 name = None #: Filename of the archive entry
2139 mode = "rb"
2140 _parser = None
2141 _inf = None
2142 _fd = None
2143 _remain = 0
2144 _returncode = 0
2145 _md_context = None
2147 def _open_extfile(self, parser, inf):
2148 self.name = inf.filename
2149 self._parser = parser
2150 self._inf = inf
2152 if self._fd:
2153 self._fd.close()
2154 md_class = self._inf._md_class or NoHashContext
2155 self._md_context = md_class()
2156 self._fd = None
2157 self._remain = self._inf.file_size
2159 def read(self, n=-1):
2160 """Read all or specified amount of data from archive entry."""
2162 # sanitize count
2163 if n is None or n < 0:
2164 n = self._remain
2165 elif n > self._remain:
2166 n = self._remain
2167 if n == 0:
2168 return b""
2170 buf = []
2171 orig = n
2172 while n > 0:
2173 # actual read
2174 data = self._read(n)
2175 if not data:
2176 break
2177 buf.append(data)
2178 self._md_context.update(data)
2179 self._remain -= len(data)
2180 n -= len(data)
2181 data = b"".join(buf)
2182 if n > 0:
2183 raise BadRarFile("Failed the read enough data: req=%d got=%d" % (orig, len(data)))
2185 # done?
2186 if not data or self._remain == 0:
2187 # self.close()
2188 self._check()
2189 return data
2191 def _check(self):
2192 """Check final CRC."""
2193 final = self._md_context.digest()
2194 exp = self._inf._md_expect
2195 if exp is None:
2196 return
2197 if final is None:
2198 return
2199 if self._returncode:
2200 check_returncode(self._returncode, "", tool_setup().get_errmap())
2201 if self._remain != 0:
2202 raise BadRarFile("Failed the read enough data")
2203 if final != exp:
2204 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2205 self._inf.filename, exp, final))
2207 def _read(self, cnt):
2208 """Actual read that gets sanitized cnt."""
2209 raise NotImplementedError("_read")
2211 def close(self):
2212 """Close open resources."""
2214 super().close()
2216 if self._fd:
2217 self._fd.close()
2218 self._fd = None
2220 def __del__(self):
2221 """Hook delete to make sure tempfile is removed."""
2222 self.close()
2224 def readinto(self, buf):
2225 """Zero-copy read directly into buffer.
2227 Returns bytes read.
2229 raise NotImplementedError("readinto")
2231 def tell(self):
2232 """Return current reading position in uncompressed data."""
2233 return self._inf.file_size - self._remain
2235 def seek(self, offset, whence=0):
2236 """Seek in data.
2238 On uncompressed files, the seeking works by actual
2239 seeks so it's fast. On compresses files its slow
2240 - forward seeking happends by reading ahead,
2241 backwards by re-opening and decompressing from the start.
2244 # disable crc check when seeking
2245 self._md_context = NoHashContext()
2247 fsize = self._inf.file_size
2248 cur_ofs = self.tell()
2250 if whence == 0: # seek from beginning of file
2251 new_ofs = offset
2252 elif whence == 1: # seek from current position
2253 new_ofs = cur_ofs + offset
2254 elif whence == 2: # seek from end of file
2255 new_ofs = fsize + offset
2256 else:
2257 raise ValueError("Invalid value for whence")
2259 # sanity check
2260 if new_ofs < 0:
2261 new_ofs = 0
2262 elif new_ofs > fsize:
2263 new_ofs = fsize
2265 # do the actual seek
2266 if new_ofs >= cur_ofs:
2267 self._skip(new_ofs - cur_ofs)
2268 else:
2269 # reopen and seek
2270 self._open_extfile(self._parser, self._inf)
2271 self._skip(new_ofs)
2272 return self.tell()
2274 def _skip(self, cnt):
2275 """Read and discard data"""
2276 empty_read(self, cnt, BSIZE)
2278 def readable(self):
2279 """Returns True"""
2280 return True
2282 def writable(self):
2283 """Returns False.
2285 Writing is not supported.
2287 return False
2289 def seekable(self):
2290 """Returns True.
2292 Seeking is supported, although it's slow on compressed files.
2294 return True
2296 def readall(self):
2297 """Read all remaining data"""
2298 # avoid RawIOBase default impl
2299 return self.read()
2302 class PipeReader(RarExtFile):
2303 """Read data from pipe, handle tempfile cleanup."""
2305 def __init__(self, parser, inf, cmd, tempfile=None):
2306 super().__init__()
2307 self._cmd = cmd
2308 self._proc = None
2309 self._tempfile = tempfile
2310 self._open_extfile(parser, inf)
2312 def _close_proc(self):
2313 if not self._proc:
2314 return
2315 if self._proc.stdout:
2316 self._proc.stdout.close()
2317 if self._proc.stdin:
2318 self._proc.stdin.close()
2319 if self._proc.stderr:
2320 self._proc.stderr.close()
2321 self._proc.wait()
2322 self._returncode = self._proc.returncode
2323 self._proc = None
2325 def _open_extfile(self, parser, inf):
2326 super()._open_extfile(parser, inf)
2328 # stop old process
2329 self._close_proc()
2331 # launch new process
2332 self._returncode = 0
2333 self._proc = custom_popen(self._cmd)
2334 self._fd = self._proc.stdout
2336 # avoid situation where unrar waits on stdin
2337 if self._proc.stdin:
2338 self._proc.stdin.close()
2340 def _read(self, cnt):
2341 """Read from pipe."""
2343 # normal read is usually enough
2344 data = self._fd.read(cnt)
2345 if len(data) == cnt or not data:
2346 return data
2348 # short read, try looping
2349 buf = [data]
2350 cnt -= len(data)
2351 while cnt > 0:
2352 data = self._fd.read(cnt)
2353 if not data:
2354 break
2355 cnt -= len(data)
2356 buf.append(data)
2357 return b"".join(buf)
2359 def close(self):
2360 """Close open resources."""
2362 self._close_proc()
2363 super().close()
2365 if self._tempfile:
2366 try:
2367 os.unlink(self._tempfile)
2368 except OSError:
2369 pass
2370 self._tempfile = None
2372 def readinto(self, buf):
2373 """Zero-copy read directly into buffer."""
2374 cnt = len(buf)
2375 if cnt > self._remain:
2376 cnt = self._remain
2377 vbuf = memoryview(buf)
2378 res = got = 0
2379 while got < cnt:
2380 res = self._fd.readinto(vbuf[got: cnt])
2381 if not res:
2382 break
2383 self._md_context.update(vbuf[got: got + res])
2384 self._remain -= res
2385 got += res
2386 return got
2389 class DirectReader(RarExtFile):
2390 """Read uncompressed data directly from archive.
2392 _cur = None
2393 _cur_avail = None
2394 _volfile = None
2396 def __init__(self, parser, inf):
2397 super().__init__()
2398 self._open_extfile(parser, inf)
2400 def _open_extfile(self, parser, inf):
2401 super()._open_extfile(parser, inf)
2403 self._volfile = self._inf.volume_file
2404 self._fd = XFile(self._volfile, 0)
2405 self._fd.seek(self._inf.header_offset, 0)
2406 self._cur = self._parser._parse_header(self._fd)
2407 self._cur_avail = self._cur.add_size
2409 def _skip(self, cnt):
2410 """RAR Seek, skipping through rar files to get to correct position
2413 while cnt > 0:
2414 # next vol needed?
2415 if self._cur_avail == 0:
2416 if not self._open_next():
2417 break
2419 # fd is in read pos, do the read
2420 if cnt > self._cur_avail:
2421 cnt -= self._cur_avail
2422 self._remain -= self._cur_avail
2423 self._cur_avail = 0
2424 else:
2425 self._fd.seek(cnt, 1)
2426 self._cur_avail -= cnt
2427 self._remain -= cnt
2428 cnt = 0
2430 def _read(self, cnt):
2431 """Read from potentially multi-volume archive."""
2433 buf = []
2434 while cnt > 0:
2435 # next vol needed?
2436 if self._cur_avail == 0:
2437 if not self._open_next():
2438 break
2440 # fd is in read pos, do the read
2441 if cnt > self._cur_avail:
2442 data = self._fd.read(self._cur_avail)
2443 else:
2444 data = self._fd.read(cnt)
2445 if not data:
2446 break
2448 # got some data
2449 cnt -= len(data)
2450 self._cur_avail -= len(data)
2451 buf.append(data)
2453 if len(buf) == 1:
2454 return buf[0]
2455 return b"".join(buf)
2457 def _open_next(self):
2458 """Proceed to next volume."""
2460 # is the file split over archives?
2461 if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
2462 return False
2464 if self._fd:
2465 self._fd.close()
2466 self._fd = None
2468 # open next part
2469 self._volfile = self._parser._next_volname(self._volfile)
2470 fd = open(self._volfile, "rb", 0)
2471 self._fd = fd
2472 sig = fd.read(len(self._parser._expect_sig))
2473 if sig != self._parser._expect_sig:
2474 raise BadRarFile("Invalid signature")
2476 # loop until first file header
2477 while True:
2478 cur = self._parser._parse_header(fd)
2479 if not cur:
2480 raise BadRarFile("Unexpected EOF")
2481 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
2482 if cur.add_size:
2483 fd.seek(cur.add_size, 1)
2484 continue
2485 if cur.orig_filename != self._inf.orig_filename:
2486 raise BadRarFile("Did not found file entry")
2487 self._cur = cur
2488 self._cur_avail = cur.add_size
2489 return True
2491 def readinto(self, buf):
2492 """Zero-copy read directly into buffer."""
2493 got = 0
2494 vbuf = memoryview(buf)
2495 while got < len(buf):
2496 # next vol needed?
2497 if self._cur_avail == 0:
2498 if not self._open_next():
2499 break
2501 # length for next read
2502 cnt = len(buf) - got
2503 if cnt > self._cur_avail:
2504 cnt = self._cur_avail
2506 # read into temp view
2507 res = self._fd.readinto(vbuf[got: got + cnt])
2508 if not res:
2509 break
2510 self._md_context.update(vbuf[got: got + res])
2511 self._cur_avail -= res
2512 self._remain -= res
2513 got += res
2514 return got
2517 class HeaderDecrypt:
2518 """File-like object that decrypts from another file"""
2519 def __init__(self, f, key, iv):
2520 self.f = f
2521 self.ciph = AES_CBC_Decrypt(key, iv)
2522 self.buf = b""
2524 def tell(self):
2525 """Current file pos - works only on block boundaries."""
2526 return self.f.tell()
2528 def read(self, cnt=None):
2529 """Read and decrypt."""
2530 if cnt > 8 * 1024:
2531 raise BadRarFile("Bad count to header decrypt - wrong password?")
2533 # consume old data
2534 if cnt <= len(self.buf):
2535 res = self.buf[:cnt]
2536 self.buf = self.buf[cnt:]
2537 return res
2538 res = self.buf
2539 self.buf = b""
2540 cnt -= len(res)
2542 # decrypt new data
2543 blklen = 16
2544 while cnt > 0:
2545 enc = self.f.read(blklen)
2546 if len(enc) < blklen:
2547 break
2548 dec = self.ciph.decrypt(enc)
2549 if cnt >= len(dec):
2550 res += dec
2551 cnt -= len(dec)
2552 else:
2553 res += dec[:cnt]
2554 self.buf = dec[cnt:]
2555 cnt = 0
2557 return res
2560 class XFile:
2561 """Input may be filename or file object.
2563 __slots__ = ("_fd", "_need_close")
2565 def __init__(self, xfile, bufsize=1024):
2566 if is_filelike(xfile):
2567 self._need_close = False
2568 self._fd = xfile
2569 self._fd.seek(0)
2570 else:
2571 self._need_close = True
2572 self._fd = open(xfile, "rb", bufsize)
2574 def read(self, n=None):
2575 """Read from file."""
2576 return self._fd.read(n)
2578 def tell(self):
2579 """Return file pos."""
2580 return self._fd.tell()
2582 def seek(self, ofs, whence=0):
2583 """Move file pos."""
2584 return self._fd.seek(ofs, whence)
2586 def readinto(self, buf):
2587 """Read into buffer."""
2588 return self._fd.readinto(buf)
2590 def close(self):
2591 """Close file object."""
2592 if self._need_close:
2593 self._fd.close()
2595 def __enter__(self):
2596 return self
2598 def __exit__(self, typ, val, tb):
2599 self.close()
2602 class NoHashContext:
2603 """No-op hash function."""
2604 def __init__(self, data=None):
2605 """Initialize"""
2606 def update(self, data):
2607 """Update data"""
2608 def digest(self):
2609 """Final hash"""
2610 def hexdigest(self):
2611 """Hexadecimal digest."""
2614 class CRC32Context:
2615 """Hash context that uses CRC32."""
2616 __slots__ = ["_crc"]
2618 def __init__(self, data=None):
2619 self._crc = 0
2620 if data:
2621 self.update(data)
2623 def update(self, data):
2624 """Process data."""
2625 self._crc = crc32(data, self._crc)
2627 def digest(self):
2628 """Final hash."""
2629 return self._crc
2631 def hexdigest(self):
2632 """Hexadecimal digest."""
2633 return "%08x" % self.digest()
2636 class Blake2SP:
2637 """Blake2sp hash context.
2639 __slots__ = ["_thread", "_buf", "_cur", "_digest"]
2640 digest_size = 32
2641 block_size = 64
2642 parallelism = 8
2644 def __init__(self, data=None):
2645 self._buf = b""
2646 self._cur = 0
2647 self._digest = None
2648 self._thread = []
2650 for i in range(self.parallelism):
2651 ctx = self._blake2s(i, 0, i == (self.parallelism - 1))
2652 self._thread.append(ctx)
2654 if data:
2655 self.update(data)
2657 def _blake2s(self, ofs, depth, is_last):
2658 return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last,
2659 depth=2, inner_size=32, fanout=self.parallelism)
2661 def _add_block(self, blk):
2662 self._thread[self._cur].update(blk)
2663 self._cur = (self._cur + 1) % self.parallelism
2665 def update(self, data):
2666 """Hash data.
2668 view = memoryview(data)
2669 bs = self.block_size
2670 if self._buf:
2671 need = bs - len(self._buf)
2672 if len(view) < need:
2673 self._buf += view.tobytes()
2674 return
2675 self._add_block(self._buf + view[:need].tobytes())
2676 view = view[need:]
2677 while len(view) >= bs:
2678 self._add_block(view[:bs])
2679 view = view[bs:]
2680 self._buf = view.tobytes()
2682 def digest(self):
2683 """Return final digest value.
2685 if self._digest is None:
2686 if self._buf:
2687 self._add_block(self._buf)
2688 self._buf = b""
2689 ctx = self._blake2s(0, 1, True)
2690 for t in self._thread:
2691 ctx.update(t.digest())
2692 self._digest = ctx.digest()
2693 return self._digest
2695 def hexdigest(self):
2696 """Hexadecimal digest."""
2697 return tohex(self.digest())
2700 class Rar3Sha1:
2701 """Emulate buggy SHA1 from RAR3.
2703 digest_size = 20
2704 block_size = 64
2706 _BLK_BE = struct.Struct(b">16L")
2707 _BLK_LE = struct.Struct(b"<16L")
2709 __slots__ = ("_nbytes", "_md", "_rarbug")
2711 def __init__(self, data=b"", rarbug=False):
2712 self._md = sha1()
2713 self._nbytes = 0
2714 self._rarbug = rarbug
2715 self.update(data)
2717 def update(self, data):
2718 """Process more data."""
2719 self._md.update(data)
2720 bufpos = self._nbytes & 63
2721 self._nbytes += len(data)
2723 if self._rarbug and len(data) > 64:
2724 dpos = self.block_size - bufpos
2725 while dpos + self.block_size <= len(data):
2726 self._corrupt(data, dpos)
2727 dpos += self.block_size
2729 def digest(self):
2730 """Return final state."""
2731 return self._md.digest()
2733 def hexdigest(self):
2734 """Return final state as hex string."""
2735 return self._md.hexdigest()
2737 def _corrupt(self, data, dpos):
2738 """Corruption from SHA1 core."""
2739 ws = list(self._BLK_BE.unpack_from(data, dpos))
2740 for t in range(16, 80):
2741 tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15]
2742 ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF
2743 self._BLK_LE.pack_into(data, dpos, *ws)
2747 ## Utility functions
2750 S_LONG = Struct("<L")
2751 S_SHORT = Struct("<H")
2752 S_BYTE = Struct("<B")
2754 S_BLK_HDR = Struct("<HBHH")
2755 S_FILE_HDR = Struct("<LLBLLBBHL")
2756 S_COMMENT_HDR = Struct("<HBBH")
2759 def load_vint(buf, pos):
2760 """Load RAR5 variable-size int."""
2761 limit = min(pos + 11, len(buf))
2762 res = ofs = 0
2763 while pos < limit:
2764 b = buf[pos]
2765 res += ((b & 0x7F) << ofs)
2766 pos += 1
2767 ofs += 7
2768 if b < 0x80:
2769 return res, pos
2770 raise BadRarFile("cannot load vint")
2773 def load_byte(buf, pos):
2774 """Load single byte"""
2775 end = pos + 1
2776 if end > len(buf):
2777 raise BadRarFile("cannot load byte")
2778 return S_BYTE.unpack_from(buf, pos)[0], end
2781 def load_le32(buf, pos):
2782 """Load little-endian 32-bit integer"""
2783 end = pos + 4
2784 if end > len(buf):
2785 raise BadRarFile("cannot load le32")
2786 return S_LONG.unpack_from(buf, pos)[0], pos + 4
2789 def load_bytes(buf, num, pos):
2790 """Load sequence of bytes"""
2791 end = pos + num
2792 if end > len(buf):
2793 raise BadRarFile("cannot load bytes")
2794 return buf[pos: end], end
2797 def load_vstr(buf, pos):
2798 """Load bytes prefixed by vint length"""
2799 slen, pos = load_vint(buf, pos)
2800 return load_bytes(buf, slen, pos)
2803 def load_dostime(buf, pos):
2804 """Load LE32 dos timestamp"""
2805 stamp, pos = load_le32(buf, pos)
2806 tup = parse_dos_time(stamp)
2807 return to_datetime(tup), pos
2810 def load_unixtime(buf, pos):
2811 """Load LE32 unix timestamp"""
2812 secs, pos = load_le32(buf, pos)
2813 dt = datetime.fromtimestamp(secs, timezone.utc)
2814 return dt, pos
2817 def load_windowstime(buf, pos):
2818 """Load LE64 windows timestamp"""
2819 # unix epoch (1970) in seconds from windows epoch (1601)
2820 unix_epoch = 11644473600
2821 val1, pos = load_le32(buf, pos)
2822 val2, pos = load_le32(buf, pos)
2823 secs, n1secs = divmod((val2 << 32) | val1, 10000000)
2824 dt = datetime.fromtimestamp(secs - unix_epoch, timezone.utc)
2825 dt = to_nsdatetime(dt, n1secs * 100)
2826 return dt, pos
2829 def _next_newvol(volfile):
2830 """New-style next volume
2832 i = len(volfile) - 1
2833 while i >= 0:
2834 if volfile[i] >= "0" and volfile[i] <= "9":
2835 return _inc_volname(volfile, i)
2836 i -= 1
2837 raise BadRarName("Cannot construct volume name: " + volfile)
2840 def _next_oldvol(volfile):
2841 """Old-style next volume
2843 # rar -> r00
2844 if volfile[-4:].lower() == ".rar":
2845 return volfile[:-2] + "00"
2846 return _inc_volname(volfile, len(volfile) - 1)
2849 def _inc_volname(volfile, i):
2850 """increase digits with carry, otherwise just increment char
2852 fn = list(volfile)
2853 while i >= 0:
2854 if fn[i] != "9":
2855 fn[i] = chr(ord(fn[i]) + 1)
2856 break
2857 fn[i] = "0"
2858 i -= 1
2859 return "".join(fn)
2862 def _parse_ext_time(h, data, pos):
2863 """Parse all RAR3 extended time fields
2865 # flags and rest of data can be missing
2866 flags = 0
2867 if pos + 2 <= len(data):
2868 flags = S_SHORT.unpack_from(data, pos)[0]
2869 pos += 2
2871 mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime)
2872 h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos)
2873 h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos)
2874 h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos)
2875 if mtime:
2876 h.mtime = mtime
2877 h.date_time = mtime.timetuple()[:6]
2878 return pos
2881 def _parse_xtime(flag, data, pos, basetime=None):
2882 """Parse one RAR3 extended time field
2884 res = None
2885 if flag & 8:
2886 if not basetime:
2887 basetime, pos = load_dostime(data, pos)
2889 # load second fractions
2890 rem = 0
2891 cnt = flag & 3
2892 for _ in range(cnt):
2893 b, pos = load_byte(data, pos)
2894 rem = (b << 16) | (rem >> 8)
2896 # convert 100ns units to nanoseconds
2897 nsec = rem * 100
2899 # dostime has room for 30 seconds only, correct if needed
2900 if flag & 4 and basetime.second < 59:
2901 basetime = basetime.replace(second=basetime.second + 1)
2903 res = to_nsdatetime(basetime, nsec)
2904 return res, pos
2907 def is_filelike(obj):
2908 """Filename or file object?
2910 filename_types = (bytes, str, Path)
2912 if isinstance(obj, filename_types):
2913 return False
2914 res = True
2915 for a in ("read", "tell", "seek"):
2916 res = res and hasattr(obj, a)
2917 if not res:
2918 raise ValueError("Invalid object passed as file")
2919 return True
2922 def rar3_s2k(pwd, salt):
2923 """String-to-key hash for RAR3.
2925 if not isinstance(pwd, str):
2926 pwd = pwd.decode("utf8")
2927 seed = bytearray(pwd.encode("utf-16le") + salt)
2928 h = Rar3Sha1(rarbug=True)
2929 iv = b""
2930 for i in range(16):
2931 for j in range(0x4000):
2932 cnt = S_LONG.pack(i * 0x4000 + j)
2933 h.update(seed)
2934 h.update(cnt[:3])
2935 if j == 0:
2936 iv += h.digest()[19:20]
2937 key_be = h.digest()[:16]
2938 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
2939 return key_le, iv
2942 def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, pwd=None, salt=None):
2943 """Decompress blob of compressed data.
2945 Used for data with non-standard header - eg. comments.
2947 # already uncompressed?
2948 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
2949 return data
2951 # take only necessary flags
2952 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
2953 flags |= RAR_LONG_BLOCK
2955 # file header
2956 fname = b"data"
2957 date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16)
2958 mode = 0x20
2959 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
2960 date, vers, meth, len(fname), mode)
2961 fhdr += fname
2962 if flags & RAR_FILE_SALT:
2963 if not salt:
2964 return b""
2965 fhdr += salt
2967 # full header
2968 hlen = S_BLK_HDR.size + len(fhdr)
2969 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
2970 hcrc = crc32(hdr[2:]) & 0xFFFF
2971 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
2973 # archive main header
2974 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + b"\0" * (2 + 4)
2976 # decompress via temp rar
2977 setup = tool_setup()
2978 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
2979 tmpf = os.fdopen(tmpfd, "wb")
2980 try:
2981 tmpf.write(RAR_ID + mh + hdr + data)
2982 tmpf.close()
2984 curpwd = (flags & RAR_FILE_PASSWORD) and pwd or None
2985 cmd = setup.open_cmdline(curpwd, tmpname)
2986 p = custom_popen(cmd)
2987 return p.communicate()[0]
2988 finally:
2989 tmpf.close()
2990 os.unlink(tmpname)
2993 def sanitize_filename(fname, pathsep, is_win32):
2994 """Simulate unrar sanitization.
2996 if is_win32:
2997 if len(fname) > 1 and fname[1] == ":":
2998 fname = fname[2:]
2999 rc = RC_BAD_CHARS_WIN32
3000 else:
3001 rc = RC_BAD_CHARS_UNIX
3002 if rc.search(fname):
3003 fname = rc.sub("_", fname)
3005 parts = []
3006 for seg in fname.split("/"):
3007 if seg in ("", ".", ".."):
3008 continue
3009 if is_win32 and seg[-1] in (" ", "."):
3010 seg = seg[:-1] + "_"
3011 parts.append(seg)
3012 return pathsep.join(parts)
3015 def empty_read(src, size, blklen):
3016 """Read and drop fixed amount of data.
3018 while size > 0:
3019 if size > blklen:
3020 res = src.read(blklen)
3021 else:
3022 res = src.read(size)
3023 if not res:
3024 raise BadRarFile("cannot load data")
3025 size -= len(res)
3028 def to_datetime(t):
3029 """Convert 6-part time tuple into datetime object.
3031 # extract values
3032 year, mon, day, h, m, s = t
3034 # assume the values are valid
3035 try:
3036 return datetime(year, mon, day, h, m, s)
3037 except ValueError:
3038 pass
3040 # sanitize invalid values
3041 mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
3042 mon = max(1, min(mon, 12))
3043 day = max(1, min(day, mday[mon]))
3044 h = min(h, 23)
3045 m = min(m, 59)
3046 s = min(s, 59)
3047 if mon == 2 and day == 29:
3048 try:
3049 return datetime(year, mon, day, h, m, s)
3050 except ValueError:
3051 day = 28
3052 return datetime(year, mon, day, h, m, s)
3055 def parse_dos_time(stamp):
3056 """Parse standard 32-bit DOS timestamp.
3058 sec, stamp = stamp & 0x1F, stamp >> 5
3059 mn, stamp = stamp & 0x3F, stamp >> 6
3060 hr, stamp = stamp & 0x1F, stamp >> 5
3061 day, stamp = stamp & 0x1F, stamp >> 5
3062 mon, stamp = stamp & 0x0F, stamp >> 4
3063 yr = (stamp & 0x7F) + 1980
3064 return (yr, mon, day, hr, mn, sec * 2)
3067 # pylint: disable=arguments-differ,signature-differs
3068 class nsdatetime(datetime):
3069 """Datetime that carries nanoseconds.
3071 Arithmetic not supported, will lose nanoseconds.
3073 .. versionadded:: 4.0
3075 __slots__ = ("nanosecond",)
3076 nanosecond: int #: Number of nanoseconds, 0 <= nanosecond < 999999999
3078 def __new__(cls, year, month, day, hour=0, minute=0, second=0,
3079 microsecond=0, tzinfo=None, *, fold=0, nanosecond=0):
3080 usec, mod = divmod(nanosecond, 1000) if nanosecond else (microsecond, 0)
3081 if mod == 0:
3082 return datetime(year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3083 self = super().__new__(cls, year, month, day, hour, minute, second, usec, tzinfo, fold=fold)
3084 self.nanosecond = nanosecond
3085 return self
3087 def isoformat(self, sep="T", timespec="auto"):
3088 """Formats with nanosecond precision by default.
3090 if timespec == "auto":
3091 pre, post = super().isoformat(sep, "microseconds").split(".", 1)
3092 return f"{pre}.{self.nanosecond:09d}{post[6:]}"
3093 return super().isoformat(sep, timespec)
3095 def astimezone(self, tz=None):
3096 """Convert to new timezone.
3098 tmp = super().astimezone(tz)
3099 return self.__class__(tmp.year, tmp.month, tmp.day, tmp.hour, tmp.minute, tmp.second,
3100 nanosecond=self.nanosecond, tzinfo=tmp.tzinfo, fold=tmp.fold)
3102 def replace(self, year=None, month=None, day=None, hour=None, minute=None, second=None,
3103 microsecond=None, tzinfo=None, *, fold=None, nanosecond=None):
3104 """Return new timestamp with specified fields replaced.
3106 return self.__class__(
3107 self.year if year is None else year,
3108 self.month if month is None else month,
3109 self.day if day is None else day,
3110 self.hour if hour is None else hour,
3111 self.minute if minute is None else minute,
3112 self.second if second is None else second,
3113 nanosecond=((self.nanosecond if microsecond is None else microsecond * 1000)
3114 if nanosecond is None else nanosecond),
3115 tzinfo=self.tzinfo if tzinfo is None else tzinfo,
3116 fold=self.fold if fold is None else fold)
3118 def __hash__(self):
3119 return hash((super().__hash__(), self.nanosecond)) if self.nanosecond else super().__hash__()
3121 def __eq__(self, other):
3122 return super().__eq__(other) and self.nanosecond == (
3123 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000)
3125 def __gt__(self, other):
3126 return super().__gt__(other) or (super().__eq__(other) and self.nanosecond > (
3127 other.nanosecond if isinstance(other, nsdatetime) else other.microsecond * 1000))
3129 def __lt__(self, other):
3130 return not (self > other or self == other)
3132 def __ge__(self, other):
3133 return not self < other
3135 def __le__(self, other):
3136 return not self > other
3138 def __ne__(self, other):
3139 return not self == other
3142 def to_nsdatetime(dt, nsec):
3143 """Apply nanoseconds to datetime.
3145 if not nsec:
3146 return dt
3147 return nsdatetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second,
3148 tzinfo=dt.tzinfo, fold=dt.fold, nanosecond=nsec)
3151 def to_nsecs(dt):
3152 """Convert datatime instance to nanoseconds.
3154 secs = int(dt.timestamp())
3155 nsecs = dt.nanosecond if isinstance(dt, nsdatetime) else dt.microsecond * 1000
3156 return secs * 1000000000 + nsecs
3159 def custom_popen(cmd):
3160 """Disconnect cmd from parent fds, read only from stdout.
3162 creationflags = 0
3163 if sys.platform == "win32":
3164 creationflags = 0x08000000 # CREATE_NO_WINDOW
3166 try:
3167 p = Popen(cmd, bufsize=0, stdout=PIPE, stderr=STDOUT, stdin=DEVNULL,
3168 creationflags=creationflags)
3169 except OSError as ex:
3170 if ex.errno == errno.ENOENT:
3171 raise RarCannotExec("Unrar not installed?")
3172 if ex.errno == errno.EACCES or ex.errno == errno.EPERM:
3173 raise RarCannotExec("Cannot execute unrar")
3174 raise
3175 return p
3178 def check_returncode(code, out, errmap):
3179 """Raise exception according to unrar exit code.
3181 if code == 0:
3182 return
3184 if code > 0 and code < len(errmap):
3185 exc = errmap[code]
3186 elif code == 255:
3187 exc = RarUserBreak
3188 elif code < 0:
3189 exc = RarSignalExit
3190 else:
3191 exc = RarUnknownError
3193 # format message
3194 if out:
3195 msg = "%s [%d]: %s" % (exc.__doc__, code, out)
3196 else:
3197 msg = "%s [%d]" % (exc.__doc__, code)
3199 raise exc(msg)
3202 def membuf_tempfile(memfile):
3203 """Write in-memory file object to real file.
3205 memfile.seek(0, 0)
3207 tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR)
3208 tmpf = os.fdopen(tmpfd, "wb")
3210 try:
3211 shutil.copyfileobj(memfile, tmpf, BSIZE)
3212 tmpf.close()
3213 except BaseException:
3214 tmpf.close()
3215 os.unlink(tmpname)
3216 raise
3217 return tmpname
3221 # Find working command-line tool
3224 class ToolSetup:
3225 def __init__(self, setup):
3226 self.setup = setup
3228 def check(self):
3229 cmdline = self.get_cmdline("check_cmd", None)
3230 try:
3231 p = custom_popen(cmdline)
3232 out, _ = p.communicate()
3233 return p.returncode == 0
3234 except RarCannotExec:
3235 return False
3237 def open_cmdline(self, pwd, rarfn, filefn=None):
3238 cmdline = self.get_cmdline("open_cmd", pwd)
3239 cmdline.append(rarfn)
3240 if filefn:
3241 self.add_file_arg(cmdline, filefn)
3242 return cmdline
3244 def get_errmap(self):
3245 return self.setup["errmap"]
3247 def get_cmdline(self, key, pwd, nodash=False):
3248 cmdline = list(self.setup[key])
3249 cmdline[0] = globals()[cmdline[0]]
3250 self.add_password_arg(cmdline, pwd)
3251 if not nodash:
3252 cmdline.append("--")
3253 return cmdline
3255 def add_file_arg(self, cmdline, filename):
3256 cmdline.append(filename)
3258 def add_password_arg(self, cmdline, pwd):
3259 """Append password switch to commandline.
3261 if pwd is not None:
3262 if not isinstance(pwd, str):
3263 pwd = pwd.decode("utf8")
3264 args = self.setup["password"]
3265 if isinstance(args, str):
3266 cmdline.append(args + pwd)
3267 else:
3268 cmdline.extend(args)
3269 cmdline.append(pwd)
3270 else:
3271 cmdline.extend(self.setup["no_password"])
3274 UNRAR_CONFIG = {
3275 "open_cmd": ("UNRAR_TOOL", "p", "-inul"),
3276 "check_cmd": ("UNRAR_TOOL", "-inul"),
3277 "password": "-p",
3278 "no_password": ("-p-",),
3279 # map return code to exception class, codes from rar.txt
3280 "errmap": [None,
3281 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4
3282 RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8
3283 RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11
3286 # Problems with unar RAR backend:
3287 # - Does not support RAR2 locked files [fails to read]
3288 # - Does not support RAR5 Blake2sp hash [reading works]
3289 UNAR_CONFIG = {
3290 "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"),
3291 "check_cmd": ("UNAR_TOOL", "-version"),
3292 "password": ("-p",),
3293 "no_password": ("-p", ""),
3294 "errmap": [None],
3297 # Problems with libarchive RAR backend:
3298 # - Does not support solid archives.
3299 # - Does not support password-protected archives.
3300 # - Does not support RARVM-based compression filters.
3301 BSDTAR_CONFIG = {
3302 "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"),
3303 "check_cmd": ("BSDTAR_TOOL", "--version"),
3304 "password": None,
3305 "no_password": (),
3306 "errmap": [None],
3309 CURRENT_SETUP = None
3312 def tool_setup(unrar=True, unar=True, bsdtar=True, force=False):
3313 """Pick a tool, return cached ToolSetup.
3315 global CURRENT_SETUP
3316 if force:
3317 CURRENT_SETUP = None
3318 if CURRENT_SETUP is not None:
3319 return CURRENT_SETUP
3320 lst = []
3321 if unrar:
3322 lst.append(UNRAR_CONFIG)
3323 if unar:
3324 lst.append(UNAR_CONFIG)
3325 if bsdtar:
3326 lst.append(BSDTAR_CONFIG)
3328 for conf in lst:
3329 setup = ToolSetup(conf)
3330 if setup.check():
3331 CURRENT_SETUP = setup
3332 break
3333 if CURRENT_SETUP is None:
3334 raise RarCannotExec("Cannot find working tool")
3335 return CURRENT_SETUP
3338 def main(args):
3339 """Minimal command-line interface for rarfile module.
3341 import argparse
3342 p = argparse.ArgumentParser(description=main.__doc__)
3343 g = p.add_mutually_exclusive_group(required=True)
3344 g.add_argument("-l", "--list", metavar="<rarfile>",
3345 help="Show archive listing")
3346 g.add_argument("-e", "--extract", nargs=2,
3347 metavar=("<rarfile>", "<output_dir>"),
3348 help="Extract archive into target dir")
3349 g.add_argument("-t", "--test", metavar="<rarfile>",
3350 help="Test if a archive is valid")
3351 cmd = p.parse_args(args)
3353 if cmd.list:
3354 with RarFile(cmd.list) as rf:
3355 rf.printdir()
3356 elif cmd.test:
3357 with RarFile(cmd.test) as rf:
3358 rf.testrar()
3359 elif cmd.extract:
3360 with RarFile(cmd.extract[0]) as rf:
3361 rf.extractall(cmd.extract[1])
3364 if __name__ == "__main__":
3365 main(sys.argv[1:])