3 # Copyright (c) 2005-2016 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r
"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to '\\' to be more compatible with old rarfile
58 rarfile.PATH_SEP = '/'
60 For more details, refer to source.
64 from __future__
import division
, print_function
67 ## Imports and compat - support both Python 2.x and 3.x
75 from struct
import pack
, unpack
, Struct
76 from binascii
import crc32
, hexlify
77 from tempfile
import mkstemp
78 from subprocess
import Popen
, PIPE
, STDOUT
79 from io
import RawIOBase
80 from hashlib
import sha1
, sha256
82 from datetime
import datetime
, timedelta
, tzinfo
84 # fixed offset timezone, for UTC
86 from datetime
import timezone
88 class timezone(tzinfo
):
89 """Compat timezone."""
90 __slots__
= ('_ofs', '_name')
93 def __init__(self
, offset
, name
):
94 super(timezone
, self
).__init
__()
95 self
._ofs
, self
._name
= offset
, name
97 def utcoffset(self
, dt
):
100 def tzname(self
, dt
):
106 # only needed for encryped headers
109 from cryptography
.hazmat
.primitives
.ciphers
import algorithms
, modes
, Cipher
110 from cryptography
.hazmat
.backends
import default_backend
111 from cryptography
.hazmat
.primitives
import hashes
112 from cryptography
.hazmat
.primitives
.kdf
import pbkdf2
114 class AES_CBC_Decrypt(object):
116 def __init__(self
, key
, iv
):
117 ciph
= Cipher(algorithms
.AES(key
), modes
.CBC(iv
), default_backend())
118 self
.decrypt
= ciph
.decryptor().update
120 def pbkdf2_sha256(password
, salt
, iters
):
121 """PBKDF2 with HMAC-SHA256"""
122 ctx
= pbkdf2
.PBKDF2HMAC(hashes
.SHA256(), 32, salt
, iters
, default_backend())
123 return ctx
.derive(password
)
126 from Crypto
.Cipher
import AES
127 from Crypto
.Protocol
import KDF
129 class AES_CBC_Decrypt(object):
131 def __init__(self
, key
, iv
):
132 self
.decrypt
= AES
.new(key
, AES
.MODE_CBC
, iv
).decrypt
134 def pbkdf2_sha256(password
, salt
, iters
):
135 """PBKDF2 with HMAC-SHA256"""
136 return KDF
.PBKDF2(password
, salt
, 32, iters
, hmac_sha256
)
144 from hashlib
import blake2s
147 from pyblake2
import blake2s
153 if sys
.hexversion
< 0x3000000:
154 def rar_crc32(data
, prev
=0):
155 """CRC32 with unsigned values.
157 if (prev
> 0) and (prev
& 0x80000000):
159 res
= crc32(data
, prev
)
165 else: # pragma: no cover
167 """Return hex string."""
168 return hexlify(data
).decode('ascii')
171 _byte_code
= int # noqa
176 # export only interesting items
177 __all__
= ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
180 ## Module configuration. Can be tuned after importing.
183 #: default fallback charset
184 DEFAULT_CHARSET
= "windows-1252"
186 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
187 TRY_ENCODINGS
= ('utf8', 'utf-16le')
189 #: 'unrar', 'rar' or full path to either one
192 #: Command line args to use for opening file for reading.
193 OPEN_ARGS
= ('p', '-inul')
195 #: Command line args to use for extracting file to disk.
196 EXTRACT_ARGS
= ('x', '-y', '-idq')
198 #: args for testrar()
199 TEST_ARGS
= ('t', '-idq')
202 # Allow use of tool that is not compatible with unrar.
204 # By default use 'bsdtar' which is 'tar' program that
205 # sits on top of libarchive.
207 # Problems with libarchive RAR backend:
208 # - Does not support solid archives.
209 # - Does not support password-protected archives.
213 ALT_OPEN_ARGS
= ('-x', '--to-stdout', '-f')
214 ALT_EXTRACT_ARGS
= ('-x', '-f')
215 ALT_TEST_ARGS
= ('-t', '-f')
216 ALT_CHECK_ARGS
= ('--help',)
218 #: whether to speed up decompression by using tmp archive
221 #: limit the filesize for tmp archive usage
222 HACK_SIZE_LIMIT
= 20 * 1024 * 1024
224 #: Separator for path name components. RAR internally uses '\\'.
225 #: Use '/' to be similar with zipfile.
233 RAR_BLOCK_MARK
= 0x72 # r
234 RAR_BLOCK_MAIN
= 0x73 # s
235 RAR_BLOCK_FILE
= 0x74 # t
236 RAR_BLOCK_OLD_COMMENT
= 0x75 # u
237 RAR_BLOCK_OLD_EXTRA
= 0x76 # v
238 RAR_BLOCK_OLD_SUB
= 0x77 # w
239 RAR_BLOCK_OLD_RECOVERY
= 0x78 # x
240 RAR_BLOCK_OLD_AUTH
= 0x79 # y
241 RAR_BLOCK_SUB
= 0x7a # z
242 RAR_BLOCK_ENDARC
= 0x7b # {
244 # flags for RAR_BLOCK_MAIN
245 RAR_MAIN_VOLUME
= 0x0001
246 RAR_MAIN_COMMENT
= 0x0002
247 RAR_MAIN_LOCK
= 0x0004
248 RAR_MAIN_SOLID
= 0x0008
249 RAR_MAIN_NEWNUMBERING
= 0x0010
250 RAR_MAIN_AUTH
= 0x0020
251 RAR_MAIN_RECOVERY
= 0x0040
252 RAR_MAIN_PASSWORD
= 0x0080
253 RAR_MAIN_FIRSTVOLUME
= 0x0100
254 RAR_MAIN_ENCRYPTVER
= 0x0200
256 # flags for RAR_BLOCK_FILE
257 RAR_FILE_SPLIT_BEFORE
= 0x0001
258 RAR_FILE_SPLIT_AFTER
= 0x0002
259 RAR_FILE_PASSWORD
= 0x0004
260 RAR_FILE_COMMENT
= 0x0008
261 RAR_FILE_SOLID
= 0x0010
262 RAR_FILE_DICTMASK
= 0x00e0
263 RAR_FILE_DICT64
= 0x0000
264 RAR_FILE_DICT128
= 0x0020
265 RAR_FILE_DICT256
= 0x0040
266 RAR_FILE_DICT512
= 0x0060
267 RAR_FILE_DICT1024
= 0x0080
268 RAR_FILE_DICT2048
= 0x00a0
269 RAR_FILE_DICT4096
= 0x00c0
270 RAR_FILE_DIRECTORY
= 0x00e0
271 RAR_FILE_LARGE
= 0x0100
272 RAR_FILE_UNICODE
= 0x0200
273 RAR_FILE_SALT
= 0x0400
274 RAR_FILE_VERSION
= 0x0800
275 RAR_FILE_EXTTIME
= 0x1000
276 RAR_FILE_EXTFLAGS
= 0x2000
278 # flags for RAR_BLOCK_ENDARC
279 RAR_ENDARC_NEXT_VOLUME
= 0x0001
280 RAR_ENDARC_DATACRC
= 0x0002
281 RAR_ENDARC_REVSPACE
= 0x0004
282 RAR_ENDARC_VOLNR
= 0x0008
284 # flags common to all blocks
285 RAR_SKIP_IF_UNKNOWN
= 0x4000
286 RAR_LONG_BLOCK
= 0x8000
296 # Compression methods - '0'..'5'
310 RAR5_BLOCK_SERVICE
= 3
311 RAR5_BLOCK_ENCRYPTION
= 4
312 RAR5_BLOCK_ENDARC
= 5
314 RAR5_BLOCK_FLAG_EXTRA_DATA
= 0x01
315 RAR5_BLOCK_FLAG_DATA_AREA
= 0x02
316 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN
= 0x04
317 RAR5_BLOCK_FLAG_SPLIT_BEFORE
= 0x08
318 RAR5_BLOCK_FLAG_SPLIT_AFTER
= 0x10
319 RAR5_BLOCK_FLAG_DEPENDS_PREV
= 0x20
320 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT
= 0x40
322 RAR5_MAIN_FLAG_ISVOL
= 0x01
323 RAR5_MAIN_FLAG_HAS_VOLNR
= 0x02
324 RAR5_MAIN_FLAG_SOLID
= 0x04
325 RAR5_MAIN_FLAG_RECOVERY
= 0x08
326 RAR5_MAIN_FLAG_LOCKED
= 0x10
328 RAR5_FILE_FLAG_ISDIR
= 0x01
329 RAR5_FILE_FLAG_HAS_MTIME
= 0x02
330 RAR5_FILE_FLAG_HAS_CRC32
= 0x04
331 RAR5_FILE_FLAG_UNKNOWN_SIZE
= 0x08
333 RAR5_COMPR_SOLID
= 0x40
335 RAR5_ENC_FLAG_HAS_CHECKVAL
= 0x01
337 RAR5_ENDARC_FLAG_NEXT_VOL
= 0x01
339 RAR5_XFILE_ENCRYPTION
= 1
342 RAR5_XFILE_VERSION
= 4
345 RAR5_XFILE_SERVICE
= 7
347 RAR5_XTIME_UNIXTIME
= 0x01
348 RAR5_XTIME_HAS_MTIME
= 0x02
349 RAR5_XTIME_HAS_CTIME
= 0x04
350 RAR5_XTIME_HAS_ATIME
= 0x08
352 RAR5_XENC_CIPHER_AES256
= 0
354 RAR5_XENC_CHECKVAL
= 0x01
355 RAR5_XENC_TWEAKED
= 0x02
357 RAR5_XHASH_BLAKE2SP
= 0
359 RAR5_XREDIR_UNIX_SYMLINK
= 1
360 RAR5_XREDIR_WINDOWS_SYMLINK
= 2
361 RAR5_XREDIR_WINDOWS_JUNCTION
= 3
362 RAR5_XREDIR_HARD_LINK
= 4
363 RAR5_XREDIR_FILE_COPY
= 5
365 RAR5_XREDIR_ISDIR
= 0x01
367 RAR5_XOWNER_UNAME
= 0x01
368 RAR5_XOWNER_GNAME
= 0x02
369 RAR5_XOWNER_UID
= 0x04
370 RAR5_XOWNER_GID
= 0x08
376 ## internal constants
379 RAR_ID
= b
"Rar!\x1a\x07\x00"
380 RAR5_ID
= b
"Rar!\x1a\x07\x01\x00"
383 UTC
= timezone(timedelta(0), 'UTC')
386 def _get_rar_version(xfile
):
387 """Check quickly whether file is rar archive.
389 with
XFile(xfile
) as fd
:
390 buf
= fd
.read(len(RAR5_ID
))
391 if buf
.startswith(RAR_ID
):
393 elif buf
.startswith(RAR5_ID
):
401 def is_rarfile(xfile
):
402 """Check quickly whether file is rar archive.
404 return _get_rar_version(xfile
) > 0
406 class Error(Exception):
407 """Base class for rarfile errors."""
409 class BadRarFile(Error
):
410 """Incorrect data in archive."""
412 class NotRarFile(Error
):
413 """The file is not RAR archive."""
415 class BadRarName(Error
):
416 """Cannot guess multipart name components."""
418 class NoRarEntry(Error
):
419 """File not found in RAR"""
421 class PasswordRequired(Error
):
422 """File requires password"""
424 class NeedFirstVolume(Error
):
425 """Need to start from first volume."""
427 class NoCrypto(Error
):
428 """Cannot parse encrypted headers - no crypto available."""
430 class RarExecError(Error
):
431 """Problem reported by unrar/rar."""
433 class RarWarning(RarExecError
):
434 """Non-fatal error"""
436 class RarFatalError(RarExecError
):
439 class RarCRCError(RarExecError
):
440 """CRC error during unpacking"""
442 class RarLockedArchiveError(RarExecError
):
443 """Must not modify locked archive"""
445 class RarWriteError(RarExecError
):
448 class RarOpenError(RarExecError
):
451 class RarUserError(RarExecError
):
454 class RarMemoryError(RarExecError
):
457 class RarCreateError(RarExecError
):
460 class RarNoFilesError(RarExecError
):
461 """No files that match pattern were found"""
463 class RarUserBreak(RarExecError
):
466 class RarWrongPassword(RarExecError
):
467 """Incorrect password"""
469 class RarUnknownError(RarExecError
):
470 """Unknown exit code"""
472 class RarSignalExit(RarExecError
):
473 """Unrar exited with signal"""
475 class RarCannotExec(RarExecError
):
476 """Executable not found."""
479 class RarInfo(object):
480 r
"""An entry in rar archive.
482 RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone.
483 RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone.
488 File name with relative path.
489 Path separator is '/'. Always unicode string.
492 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
493 RAR5 allows archives where it is missing, it's None then.
502 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
505 Minimal Rar version needed for decompressing. As (major*10 + minor),
510 RAR5 does not have such field in archive, it's simply set to 50.
513 Host OS type, one of RAR_OS_* constants.
515 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
516 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
518 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
521 File attributes. May be either dos-style or unix-style, depending on host_os.
524 File modification time. Same value as :attr:`date_time`
525 but as :class:`datetime.datetime` object with extended precision.
528 Optional time field: creation time. As :class:`datetime.datetime` object.
531 Optional time field: last access time. As :class:`datetime.datetime` object.
534 Optional time field: archival time. As :class:`datetime.datetime` object.
538 CRC-32 of uncompressed file, unsigned int.
543 Blake2SP hash over decompressed data. (RAR5-only)
546 Optional file comment field. Unicode string. (RAR3-only)
549 If not None, file is link of some sort. Contains tuple of (type, flags, target).
552 Type is one of constants:
554 :data:`RAR5_XREDIR_UNIX_SYMLINK`
555 unix symlink to target.
556 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
557 windows symlink to target.
558 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
560 :data:`RAR5_XREDIR_HARD_LINK`
562 :data:`RAR5_XREDIR_FILE_COPY`
563 current file is copy of another archive entry.
565 Flags may contain :data:`RAR5_XREDIR_ISDIR` bit.
568 Volume nr, starting from 0.
571 Volume file name, where file starts.
575 # zipfile-compatible fields
585 # optional extended time fields, datetime() objects.
590 extract_version
= None
608 """Returns True if entry is a directory.
610 if self
.type == RAR_BLOCK_FILE
:
611 return (self
.flags
& RAR_FILE_DIRECTORY
) == RAR_FILE_DIRECTORY
614 def needs_password(self
):
615 """Returns True if data is stored password-protected.
617 if self
.type == RAR_BLOCK_FILE
:
618 return (self
.flags
& RAR_FILE_PASSWORD
) > 0
622 class RarFile(object):
623 """Parse RAR structure, provide access to files in archive.
626 #: Archive comment. Unicode string or None.
629 def __init__(self
, rarfile
, mode
="r", charset
=None, info_callback
=None,
630 crc_check
=True, errors
="stop"):
631 """Open and parse a RAR archive.
638 only 'r' is supported.
640 fallback charset to use, if filenames are not already Unicode-enabled.
642 debug callback, gets to see all archive entries.
644 set to False to disable CRC checks
646 Either "stop" to quietly stop parsing on errors,
647 or "strict" to raise errors. Default is "stop".
649 self
._rarfile
= rarfile
650 self
._charset
= charset
or DEFAULT_CHARSET
651 self
._info
_callback
= info_callback
652 self
._crc
_check
= crc_check
653 self
._password
= None
654 self
._file
_parser
= None
658 elif errors
== "strict":
661 raise ValueError("Invalid value for 'errors' parameter.")
664 raise NotImplementedError("RarFile supports only mode=r")
672 def __exit__(self
, typ
, value
, traceback
):
676 def setpassword(self
, password
):
677 """Sets the password to use when extracting.
679 self
._password
= password
680 if self
._file
_parser
:
681 if self
._file
_parser
.has_header_encryption():
682 self
._file
_parser
= None
683 if not self
._file
_parser
:
686 self
._file
_parser
.setpassword(self
._password
)
688 def needs_password(self
):
689 """Returns True if any archive entries require password for extraction.
691 return self
._file
_parser
.needs_password()
694 """Return list of filenames in archive.
696 return [f
.filename
for f
in self
.infolist()]
699 """Return RarInfo objects for all files/directories in archive.
701 return self
._file
_parser
.infolist()
703 def volumelist(self
):
704 """Returns filenames of archive volumes.
706 In case of single-volume archive, the list contains
707 just the name of main archive file.
709 return self
._file
_parser
.volumelist()
711 def getinfo(self
, fname
):
712 """Return RarInfo for file.
714 return self
._file
_parser
.getinfo(fname
)
716 def open(self
, fname
, mode
='r', psw
=None):
717 """Returns file-like object (:class:`RarExtFile`) from where the data can be read.
719 The object implements :class:`io.RawIOBase` interface, so it can
720 be further wrapped with :class:`io.BufferedReader`
721 and :class:`io.TextIOWrapper`.
723 On older Python where io module is not available, it implements
724 only .read(), .seek(), .tell() and .close() methods.
726 The object is seekable, although the seeking is fast only on
727 uncompressed files, on compressed files the seeking is implemented
728 by reading ahead and/or restarting the decompression.
733 file name or RarInfo instance.
737 password to use for extracting.
741 raise NotImplementedError("RarFile.open() supports only mode=r")
744 inf
= self
.getinfo(fname
)
746 raise TypeError("Directory does not have any data: " + inf
.filename
)
749 if inf
.needs_password():
750 psw
= psw
or self
._password
752 raise PasswordRequired("File %s requires password" % inf
.filename
)
756 return self
._file
_parser
.open(inf
, psw
)
758 def read(self
, fname
, psw
=None):
759 """Return uncompressed data for archive entry.
761 For longer files using :meth:`RarFile.open` may be better idea.
766 filename or RarInfo instance
768 password to use for extracting.
771 with self
.open(fname
, 'r', psw
) as f
:
775 """Release open resources."""
779 """Print archive file list to stdout."""
780 for f
in self
.infolist():
783 def extract(self
, member
, path
=None, pwd
=None):
784 """Extract single file into current directory.
789 filename or :class:`RarInfo` instance
791 optional destination path
793 optional password to use
795 if isinstance(member
, RarInfo
):
796 fname
= member
.filename
799 self
._extract
([fname
], path
, pwd
)
801 def extractall(self
, path
=None, members
=None, pwd
=None):
802 """Extract all files into current directory.
807 optional destination path
809 optional filename or :class:`RarInfo` instance list to extract
811 optional password to use
814 if members
is not None:
816 if isinstance(m
, RarInfo
):
817 fnlist
.append(m
.filename
)
820 self
._extract
(fnlist
, path
, pwd
)
823 """Let 'unrar' test the archive.
825 cmd
= [UNRAR_TOOL
] + list(TEST_ARGS
)
826 add_password_arg(cmd
, self
._password
)
828 with
XTempFile(self
._rarfile
) as rarfile
:
830 p
= custom_popen(cmd
)
831 output
= p
.communicate()[0]
832 check_returncode(p
, output
)
835 """Return error string if parsing failed or None if no problems.
837 if not self
._file
_parser
:
838 return "Not a RAR file"
839 return self
._file
_parser
.strerror()
846 ver
= _get_rar_version(self
._rarfile
)
848 p3
= RAR3Parser(self
._rarfile
, self
._password
, self
._crc
_check
,
849 self
._charset
, self
._strict
, self
._info
_callback
)
850 self
._file
_parser
= p3
# noqa
852 p5
= RAR5Parser(self
._rarfile
, self
._password
, self
._crc
_check
,
853 self
._charset
, self
._strict
, self
._info
_callback
)
854 self
._file
_parser
= p5
# noqa
856 raise BadRarFile("Not a RAR file")
858 self
._file
_parser
.parse()
859 self
.comment
= self
._file
_parser
.comment
861 # call unrar to extract a file
862 def _extract(self
, fnlist
, path
=None, psw
=None):
863 cmd
= [UNRAR_TOOL
] + list(EXTRACT_ARGS
)
866 psw
= psw
or self
._password
867 add_password_arg(cmd
, psw
)
871 with
XTempFile(self
._rarfile
) as rarfn
:
876 if os
.sep
!= PATH_SEP
:
877 fn
= fn
.replace(PATH_SEP
, os
.sep
)
882 cmd
.append(path
+ os
.sep
)
885 p
= custom_popen(cmd
)
886 output
= p
.communicate()[0]
887 check_returncode(p
, output
)
890 # File format parsing
893 class CommonParser(object):
894 """Shared parser parts."""
897 _needs_password
= False
904 def __init__(self
, rarfile
, password
, crc_check
, charset
, strict
, info_cb
):
905 self
._rarfile
= rarfile
906 self
._password
= password
907 self
._crc
_check
= crc_check
908 self
._charset
= charset
909 self
._strict
= strict
910 self
._info
_callback
= info_cb
915 def has_header_encryption(self
):
916 """Returns True if headers are encrypted
918 if self
._hdrenc
_main
:
921 if self
._main
.flags
& RAR_MAIN_PASSWORD
:
925 def setpassword(self
, psw
):
926 """Set cached password."""
929 def volumelist(self
):
931 return self
._vol
_list
933 def needs_password(self
):
934 """Is password required"""
935 return self
._needs
_password
939 return self
._parse
_error
942 """List of RarInfo records.
944 return self
._info
_list
946 def getinfo(self
, member
):
947 """Return RarInfo for filename
949 if isinstance(member
, RarInfo
):
950 fname
= member
.filename
954 # accept both ways here
956 fname2
= fname
.replace("\\", "/")
958 fname2
= fname
.replace("/", "\\")
961 return self
._info
_map
[fname
]
964 return self
._info
_map
[fname2
]
966 raise NoRarEntry("No such file: %s" % fname
)
979 def _parse_real(self
):
980 fd
= XFile(self
._rarfile
)
982 sig
= fd
.read(len(self
._expect
_sig
))
983 if sig
!= self
._expect
_sig
:
984 if isinstance(self
._rarfile
, (str, unicode)):
985 raise NotRarFile("Not a Rar archive: {}".format(self
._rarfile
))
986 raise NotRarFile("Not a Rar archive")
988 volume
= 0 # first vol (.rar) is 0
991 volfile
= self
._rarfile
992 self
._vol
_list
= [self
._rarfile
]
995 h
= None # don't read past ENDARC
997 h
= self
._parse
_header
(fd
)
1003 volfile
= self
._next
_volname
(volfile
)
1006 self
._set
_error
("Cannot open next volume: %s", volfile
)
1009 sig
= fd
.read(len(self
._expect
_sig
))
1010 if sig
!= self
._expect
_sig
:
1011 self
._set
_error
("Invalid volume sig: %s", volfile
)
1015 self
._vol
_list
.append(volfile
)
1019 h
.volume_file
= volfile
1021 if h
.type == RAR_BLOCK_MAIN
and not self
._main
:
1023 if h
.flags
& RAR_MAIN_NEWNUMBERING
:
1024 # RAR 2.x does not set FIRSTVOLUME,
1025 # so check it only if NEWNUMBERING is used
1026 if (h
.flags
& RAR_MAIN_FIRSTVOLUME
) == 0:
1027 raise NeedFirstVolume("Need to start from first volume")
1028 if h
.flags
& RAR_MAIN_PASSWORD
:
1029 self
._needs
_password
= True
1030 if not self
._password
:
1032 elif h
.type == RAR_BLOCK_ENDARC
:
1033 more_vols
= (h
.flags
& RAR_ENDARC_NEXT_VOLUME
) > 0
1035 elif h
.type == RAR_BLOCK_FILE
:
1036 # RAR 2.x does not write RAR_BLOCK_ENDARC
1037 if h
.flags
& RAR_FILE_SPLIT_AFTER
:
1039 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1040 if volume
== 0 and h
.flags
& RAR_FILE_SPLIT_BEFORE
:
1041 raise NeedFirstVolume("Need to start from first volume")
1043 if h
.needs_password():
1044 self
._needs
_password
= True
1047 self
.process_entry(fd
, h
)
1049 if self
._info
_callback
:
1050 self
._info
_callback
(h
)
1054 fd
.seek(h
.data_offset
+ h
.add_size
, 0)
1056 def process_entry(self
, fd
, item
):
1057 """Examine item, add into lookup cache."""
1058 raise NotImplementedError()
1060 def _decrypt_header(self
, fd
):
1061 raise NotImplementedError('_decrypt_header')
1063 def _parse_block_header(self
, fd
):
1064 raise NotImplementedError('_parse_block_header')
1066 def _open_hack(self
, inf
, psw
):
1067 raise NotImplementedError('_open_hack')
1069 # read single header
1070 def _parse_header(self
, fd
):
1072 # handle encrypted headers
1073 if (self
._main
and self
._main
.flags
& RAR_MAIN_PASSWORD
) or self
._hdrenc
_main
:
1074 if not self
._password
:
1076 fd
= self
._decrypt
_header
(fd
)
1078 # now read actual header
1079 return self
._parse
_block
_header
(fd
)
1080 except struct
.error
:
1081 self
._set
_error
('Broken header in RAR file')
1084 # given current vol name, construct next one
1085 def _next_volname(self
, volfile
):
1086 if is_filelike(volfile
):
1087 raise IOError("Working on single FD")
1088 if self
._main
.flags
& RAR_MAIN_NEWNUMBERING
:
1089 return _next_newvol(volfile
)
1090 return _next_oldvol(volfile
)
1092 def _set_error(self
, msg
, *args
):
1095 self
._parse
_error
= msg
1097 raise BadRarFile(msg
)
1099 def open(self
, inf
, psw
):
1100 """Return stream object for file data."""
1103 # cannot leave to unrar as it expects copied file to exist
1104 if inf
.file_redir
[0] in (RAR5_XREDIR_FILE_COPY
, RAR5_XREDIR_HARD_LINK
):
1105 inf
= self
.getinfo(inf
.file_redir
[2])
1107 raise BadRarFile('cannot find copied file')
1109 if inf
.flags
& RAR_FILE_SPLIT_BEFORE
:
1110 raise NeedFirstVolume("Partial file, please start from first volume: " + inf
.filename
)
1112 # is temp write usable?
1116 elif self
._main
._must
_disable
_hack
():
1118 elif inf
._must
_disable
_hack
():
1120 elif is_filelike(self
._rarfile
):
1122 elif inf
.file_size
> HACK_SIZE_LIMIT
:
1124 elif not USE_EXTRACT_HACK
:
1128 if inf
.compress_type
== RAR_M0
and (inf
.flags
& RAR_FILE_PASSWORD
) == 0 and inf
.file_redir
is None:
1129 return self
._open
_clear
(inf
)
1131 return self
._open
_hack
(inf
, psw
)
1132 elif is_filelike(self
._rarfile
):
1133 return self
._open
_unrar
_membuf
(self
._rarfile
, inf
, psw
)
1135 return self
._open
_unrar
(self
._rarfile
, inf
, psw
)
1137 def _open_clear(self
, inf
):
1138 return DirectReader(self
, inf
)
1140 def _open_hack_core(self
, inf
, psw
, prefix
, suffix
):
1142 size
= inf
.compress_size
+ inf
.header_size
1143 rf
= XFile(inf
.volume_file
, 0)
1144 rf
.seek(inf
.header_offset
)
1146 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
1147 tmpf
= os
.fdopen(tmpfd
, "wb")
1153 buf
= rf
.read(BSIZE
)
1157 raise BadRarFile('read failed: ' + inf
.filename
)
1169 return self
._open
_unrar
(tmpname
, inf
, psw
, tmpname
)
1171 # write in-memory archive to temp file - needed for solid archives
1172 def _open_unrar_membuf(self
, memfile
, inf
, psw
):
1173 tmpname
= membuf_tempfile(memfile
)
1174 return self
._open
_unrar
(tmpname
, inf
, psw
, tmpname
, force_file
=True)
1176 # extract using unrar
1177 def _open_unrar(self
, rarfile
, inf
, psw
=None, tmpfile
=None, force_file
=False):
1178 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
1179 add_password_arg(cmd
, psw
)
1183 # not giving filename avoids encoding related problems
1184 if not tmpfile
or force_file
:
1186 if PATH_SEP
!= os
.sep
:
1187 fn
= fn
.replace(PATH_SEP
, os
.sep
)
1190 # read from unrar pipe
1191 return PipeReader(self
, inf
, cmd
, tmpfile
)
1197 class Rar3Info(RarInfo
):
1198 """RAR3 specific fields."""
1199 extract_version
= 15
1204 header_offset
= None
1209 # make sure some rar5 fields are always present
1211 blake2sp_hash
= None
1213 def _must_disable_hack(self
):
1214 if self
.type == RAR_BLOCK_FILE
:
1215 if self
.flags
& RAR_FILE_PASSWORD
:
1217 elif self
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
1219 elif self
.type == RAR_BLOCK_MAIN
:
1220 if self
.flags
& (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD
):
1225 class RAR3Parser(CommonParser
):
1226 """Parse RAR3 file format.
1228 _expect_sig
= RAR_ID
1229 _last_aes_key
= (None, None, None) # (salt, key, iv)
1231 def _decrypt_header(self
, fd
):
1232 if not _have_crypto
:
1233 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1235 if self
._last
_aes
_key
[0] == salt
:
1236 key
, iv
= self
._last
_aes
_key
[1:]
1238 key
, iv
= rar3_s2k(self
._password
, salt
)
1239 self
._last
_aes
_key
= (salt
, key
, iv
)
1240 return HeaderDecrypt(fd
, key
, iv
)
1243 def _parse_block_header(self
, fd
):
1245 h
.header_offset
= fd
.tell()
1247 # read and parse base header
1248 buf
= fd
.read(S_BLK_HDR
.size
)
1251 t
= S_BLK_HDR
.unpack_from(buf
)
1252 h
.header_crc
, h
.type, h
.flags
, h
.header_size
= t
1255 if h
.header_size
> S_BLK_HDR
.size
:
1256 hdata
= buf
+ fd
.read(h
.header_size
- S_BLK_HDR
.size
)
1259 h
.data_offset
= fd
.tell()
1262 if len(hdata
) != h
.header_size
:
1263 self
._set
_error
('Unexpected EOF when reading header')
1266 pos
= S_BLK_HDR
.size
1268 # block has data assiciated with it?
1269 if h
.flags
& RAR_LONG_BLOCK
:
1270 h
.add_size
, pos
= load_le32(hdata
, pos
)
1274 # parse interesting ones, decide header boundaries for crc
1275 if h
.type == RAR_BLOCK_MARK
:
1277 elif h
.type == RAR_BLOCK_MAIN
:
1279 if h
.flags
& RAR_MAIN_ENCRYPTVER
:
1282 if h
.flags
& RAR_MAIN_COMMENT
:
1283 self
._parse
_subblocks
(h
, hdata
, pos
)
1284 elif h
.type == RAR_BLOCK_FILE
:
1285 pos
= self
._parse
_file
_header
(h
, hdata
, pos
- 4)
1287 if h
.flags
& RAR_FILE_COMMENT
:
1288 pos
= self
._parse
_subblocks
(h
, hdata
, pos
)
1289 elif h
.type == RAR_BLOCK_SUB
:
1290 pos
= self
._parse
_file
_header
(h
, hdata
, pos
- 4)
1291 crc_pos
= h
.header_size
1292 elif h
.type == RAR_BLOCK_OLD_AUTH
:
1295 elif h
.type == RAR_BLOCK_OLD_EXTRA
:
1299 crc_pos
= h
.header_size
1302 if h
.type == RAR_BLOCK_OLD_SUB
:
1303 crcdat
= hdata
[2:] + fd
.read(h
.add_size
)
1305 crcdat
= hdata
[2:crc_pos
]
1307 calc_crc
= rar_crc32(crcdat
) & 0xFFFF
1309 # return good header
1310 if h
.header_crc
== calc_crc
:
1313 # header parsing failed.
1314 self
._set
_error
('Header CRC error (%02x): exp=%x got=%x (xlen = %d)',
1315 h
.type, h
.header_crc
, calc_crc
, len(crcdat
))
1317 # instead panicing, send eof
1320 # read file-specific header
1321 def _parse_file_header(self
, h
, hdata
, pos
):
1322 fld
= S_FILE_HDR
.unpack_from(hdata
, pos
)
1323 pos
+= S_FILE_HDR
.size
1325 h
.compress_size
= fld
[0]
1326 h
.file_size
= fld
[1]
1329 h
.date_time
= parse_dos_time(fld
[4])
1330 h
.mtime
= to_datetime(h
.date_time
)
1331 h
.extract_version
= fld
[5]
1332 h
.compress_type
= fld
[6]
1336 h
._md
_class
= CRC32Context
1337 h
._md
_expect
= h
.CRC
1339 if h
.flags
& RAR_FILE_LARGE
:
1340 h1
, pos
= load_le32(hdata
, pos
)
1341 h2
, pos
= load_le32(hdata
, pos
)
1342 h
.compress_size |
= h1
<< 32
1343 h
.file_size |
= h2
<< 32
1344 h
.add_size
= h
.compress_size
1346 name
, pos
= load_bytes(hdata
, name_size
, pos
)
1347 if h
.flags
& RAR_FILE_UNICODE
:
1348 nul
= name
.find(ZERO
)
1349 h
.orig_filename
= name
[:nul
]
1350 u
= UnicodeFilename(h
.orig_filename
, name
[nul
+ 1:])
1351 h
.filename
= u
.decode()
1353 # if parsing failed fall back to simple name
1355 h
.filename
= self
._decode
(h
.orig_filename
)
1357 h
.orig_filename
= name
1358 h
.filename
= self
._decode
(name
)
1360 # change separator, if requested
1361 if PATH_SEP
!= '\\':
1362 h
.filename
= h
.filename
.replace('\\', PATH_SEP
)
1364 if h
.flags
& RAR_FILE_SALT
:
1365 h
.salt
, pos
= load_bytes(hdata
, 8, pos
)
1369 # optional extended time stamps
1370 if h
.flags
& RAR_FILE_EXTTIME
:
1371 pos
= _parse_ext_time(h
, hdata
, pos
)
1373 h
.mtime
= h
.atime
= h
.ctime
= h
.arctime
= None
1377 # find old-style comment subblock
1378 def _parse_subblocks(self
, h
, hdata
, pos
):
1379 while pos
< len(hdata
):
1380 # ordinary block header
1381 t
= S_BLK_HDR
.unpack_from(hdata
, pos
)
1382 ___scrc
, stype
, sflags
, slen
= t
1383 pos_next
= pos
+ slen
1384 pos
+= S_BLK_HDR
.size
1390 # followed by block-specific header
1391 if stype
== RAR_BLOCK_OLD_COMMENT
and pos
+ S_COMMENT_HDR
.size
<= pos_next
:
1392 declen
, ver
, meth
, crc
= S_COMMENT_HDR
.unpack_from(hdata
, pos
)
1393 pos
+= S_COMMENT_HDR
.size
1394 data
= hdata
[pos
: pos_next
]
1395 cmt
= rar3_decompress(ver
, meth
, data
, declen
, sflags
,
1396 crc
, self
._password
)
1397 if not self
._crc
_check
:
1398 h
.comment
= self
._decode
_comment
(cmt
)
1399 elif rar_crc32(cmt
) & 0xFFFF == crc
:
1400 h
.comment
= self
._decode
_comment
(cmt
)
1405 def _read_comment_v3(self
, inf
, psw
=None):
1408 with
XFile(inf
.volume_file
) as rf
:
1409 rf
.seek(inf
.data_offset
)
1410 data
= rf
.read(inf
.compress_size
)
1413 cmt
= rar3_decompress(inf
.extract_version
, inf
.compress_type
, data
,
1414 inf
.file_size
, inf
.flags
, inf
.CRC
, psw
, inf
.salt
)
1418 crc
= rar_crc32(cmt
)
1422 return self
._decode
_comment
(cmt
)
1424 def _decode(self
, val
):
1425 for c
in TRY_ENCODINGS
:
1427 return val
.decode(c
)
1428 except UnicodeError:
1430 return val
.decode(self
._charset
, 'replace')
1432 def _decode_comment(self
, val
):
1433 return self
._decode
(val
)
1435 def process_entry(self
, fd
, item
):
1436 if item
.type == RAR_BLOCK_FILE
:
1437 # use only first part
1438 if (item
.flags
& RAR_FILE_SPLIT_BEFORE
) == 0:
1439 self
._info
_map
[item
.filename
] = item
1440 self
._info
_list
.append(item
)
1441 elif len(self
._info
_list
) > 0:
1442 # final crc is in last block
1443 old
= self
._info
_list
[-1]
1445 old
._md
_expect
= item
._md
_expect
1446 old
.compress_size
+= item
.compress_size
1448 # parse new-style comment
1449 if item
.type == RAR_BLOCK_SUB
and item
.filename
== 'CMT':
1450 if item
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
1452 elif item
.flags
& RAR_FILE_SOLID
:
1454 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
1455 if len(self
._info
_list
) > 0:
1456 old
= self
._info
_list
[-1]
1460 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
1463 if item
.type == RAR_BLOCK_MAIN
:
1464 if item
.flags
& RAR_MAIN_COMMENT
:
1465 self
.comment
= item
.comment
1466 if item
.flags
& RAR_MAIN_PASSWORD
:
1467 self
._needs
_password
= True
1469 # put file compressed data into temporary .rar archive, and run
1470 # unrar on that, thus avoiding unrar going over whole archive
1471 def _open_hack(self
, inf
, psw
):
1472 # create main header: crc, type, flags, size, res1, res2
1473 prefix
= RAR_ID
+ S_BLK_HDR
.pack(0x90CF, 0x73, 0, 13) + ZERO
* (2 + 4)
1474 return self
._open
_hack
_core
(inf
, psw
, prefix
, EMPTY
)
1480 class Rar5Info(RarInfo
):
1481 """Shared fields for RAR5 records.
1483 extract_version
= 50
1486 header_offset
= None
1493 block_extra_size
= 0
1496 volume_number
= None
1500 def _must_disable_hack(self
):
1504 class Rar5BaseFile(Rar5Info
):
1505 """Shared sturct for file & service record.
1509 file_encryption
= (0, 0, 0, EMPTY
, EMPTY
, EMPTY
)
1510 file_compress_flags
= None
1514 blake2sp_hash
= None
1516 def _must_disable_hack(self
):
1517 if self
.flags
& RAR_FILE_PASSWORD
:
1519 if self
.block_flags
& (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER
):
1521 if self
.file_compress_flags
& RAR5_COMPR_SOLID
:
1528 class Rar5FileInfo(Rar5BaseFile
):
1529 """RAR5 file record.
1531 type = RAR_BLOCK_FILE
1534 class Rar5ServiceInfo(Rar5BaseFile
):
1535 """RAR5 service record.
1537 type = RAR_BLOCK_SUB
1540 class Rar5MainInfo(Rar5Info
):
1541 """RAR5 archive main record.
1543 type = RAR_BLOCK_MAIN
1545 main_volume_number
= None
1547 def _must_disable_hack(self
):
1548 if self
.main_flags
& RAR5_MAIN_FLAG_SOLID
:
1553 class Rar5EncryptionInfo(Rar5Info
):
1554 """RAR5 archive header encryption record.
1556 type = RAR5_BLOCK_ENCRYPTION
1557 encryption_algo
= None
1558 encryption_flags
= None
1559 encryption_kdf_count
= None
1560 encryption_salt
= None
1561 encryption_check_value
= None
1563 def needs_password(self
):
1567 class Rar5EndArcInfo(Rar5Info
):
1568 """RAR5 end of archive record.
1570 type = RAR_BLOCK_ENDARC
1574 class RAR5Parser(CommonParser
):
1575 """Parse RAR5 format.
1577 _expect_sig
= RAR5_ID
1580 # AES encrypted headers
1581 _last_aes256_key
= (-1, None, None) # (kdf_count, salt, key)
1583 def _gen_key(self
, kdf_count
, salt
):
1584 if self
._last
_aes
256_key
[:2] == (kdf_count
, salt
):
1585 return self
._last
_aes
256_key
[2]
1587 raise BadRarFile('Too large kdf_count')
1588 psw
= self
._password
1589 if isinstance(psw
, unicode):
1590 psw
= psw
.encode('utf8')
1591 key
= pbkdf2_sha256(psw
, salt
, 1 << kdf_count
)
1592 self
._last
_aes
256_key
= (kdf_count
, salt
, key
)
1595 def _decrypt_header(self
, fd
):
1596 if not _have_crypto
:
1597 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1598 h
= self
._hdrenc
_main
1599 key
= self
._gen
_key
(h
.encryption_kdf_count
, h
.encryption_salt
)
1601 return HeaderDecrypt(fd
, key
, iv
)
1604 def _parse_block_header(self
, fd
):
1605 header_offset
= fd
.tell()
1608 start_bytes
= fd
.read(preload
)
1609 header_crc
, pos
= load_le32(start_bytes
, 0)
1610 hdrlen
, pos
= load_vint(start_bytes
, pos
)
1611 if hdrlen
> 2 * 1024 * 1024:
1613 header_size
= pos
+ hdrlen
1615 # read full header, check for EOF
1616 hdata
= start_bytes
+ fd
.read(header_size
- len(start_bytes
))
1617 if len(hdata
) != header_size
:
1618 self
._set
_error
('Unexpected EOF when reading header')
1620 data_offset
= fd
.tell()
1622 calc_crc
= rar_crc32(memoryview(hdata
)[4:])
1623 if header_crc
!= calc_crc
:
1624 # header parsing failed.
1625 self
._set
_error
('Header CRC error: exp=%x got=%x (xlen = %d)',
1626 header_crc
, calc_crc
, len(hdata
))
1629 block_type
, pos
= load_vint(hdata
, pos
)
1631 if block_type
== RAR5_BLOCK_MAIN
:
1632 h
, pos
= self
._parse
_block
_common
(Rar5MainInfo(), hdata
)
1633 h
= self
._parse
_main
_block
(h
, hdata
, pos
)
1634 elif block_type
== RAR5_BLOCK_FILE
:
1635 h
, pos
= self
._parse
_block
_common
(Rar5FileInfo(), hdata
)
1636 h
= self
._parse
_file
_block
(h
, hdata
, pos
)
1637 elif block_type
== RAR5_BLOCK_SERVICE
:
1638 h
, pos
= self
._parse
_block
_common
(Rar5ServiceInfo(), hdata
)
1639 h
= self
._parse
_file
_block
(h
, hdata
, pos
)
1640 elif block_type
== RAR5_BLOCK_ENCRYPTION
:
1641 h
, pos
= self
._parse
_block
_common
(Rar5EncryptionInfo(), hdata
)
1642 h
= self
._parse
_encryption
_block
(h
, hdata
, pos
)
1643 elif block_type
== RAR5_BLOCK_ENDARC
:
1644 h
, pos
= self
._parse
_block
_common
(Rar5EndArcInfo(), hdata
)
1645 h
= self
._parse
_endarc
_block
(h
, hdata
, pos
)
1649 h
.header_offset
= header_offset
1650 h
.data_offset
= data_offset
1653 def _parse_block_common(self
, h
, hdata
):
1654 h
.header_crc
, pos
= load_le32(hdata
, 0)
1655 hdrlen
, pos
= load_vint(hdata
, pos
)
1656 h
.header_size
= hdrlen
+ pos
1657 h
.block_type
, pos
= load_vint(hdata
, pos
)
1658 h
.block_flags
, pos
= load_vint(hdata
, pos
)
1660 if h
.block_flags
& RAR5_BLOCK_FLAG_EXTRA_DATA
:
1661 h
.block_extra_size
, pos
= load_vint(hdata
, pos
)
1662 if h
.block_flags
& RAR5_BLOCK_FLAG_DATA_AREA
:
1663 h
.add_size
, pos
= load_vint(hdata
, pos
)
1665 h
.compress_size
= h
.add_size
1667 if h
.block_flags
& RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN
:
1668 h
.flags |
= RAR_SKIP_IF_UNKNOWN
1669 if h
.block_flags
& RAR5_BLOCK_FLAG_DATA_AREA
:
1670 h
.flags |
= RAR_LONG_BLOCK
1673 def _parse_main_block(self
, h
, hdata
, pos
):
1674 h
.main_flags
, pos
= load_vint(hdata
, pos
)
1675 if h
.main_flags
& RAR5_MAIN_FLAG_HAS_VOLNR
:
1676 h
.main_volume_number
= load_vint(hdata
, pos
)
1678 h
.flags |
= RAR_MAIN_NEWNUMBERING
1679 if h
.main_flags
& RAR5_MAIN_FLAG_SOLID
:
1680 h
.flags |
= RAR_MAIN_SOLID
1681 if h
.main_flags
& RAR5_MAIN_FLAG_ISVOL
:
1682 h
.flags |
= RAR_MAIN_VOLUME
1683 if h
.main_flags
& RAR5_MAIN_FLAG_RECOVERY
:
1684 h
.flags |
= RAR_MAIN_RECOVERY
1685 if self
._hdrenc
_main
:
1686 h
.flags |
= RAR_MAIN_PASSWORD
1687 if h
.main_flags
& RAR5_MAIN_FLAG_HAS_VOLNR
== 0:
1688 h
.flags |
= RAR_MAIN_FIRSTVOLUME
1692 def _parse_file_block(self
, h
, hdata
, pos
):
1693 h
.file_flags
, pos
= load_vint(hdata
, pos
)
1694 h
.file_size
, pos
= load_vint(hdata
, pos
)
1695 h
.mode
, pos
= load_vint(hdata
, pos
)
1697 if h
.file_flags
& RAR5_FILE_FLAG_HAS_MTIME
:
1698 h
.mtime
, pos
= load_unixtime(hdata
, pos
)
1699 h
.date_time
= h
.mtime
.timetuple()[:6]
1700 if h
.file_flags
& RAR5_FILE_FLAG_HAS_CRC32
:
1701 h
.CRC
, pos
= load_le32(hdata
, pos
)
1702 h
._md
_class
= CRC32Context
1703 h
._md
_expect
= h
.CRC
1705 h
.file_compress_flags
, pos
= load_vint(hdata
, pos
)
1706 h
.file_host_os
, pos
= load_vint(hdata
, pos
)
1707 h
.orig_filename
, pos
= load_vstr(hdata
, pos
)
1708 h
.filename
= h
.orig_filename
.decode('utf8', 'replace')
1710 # use compatible values
1711 if h
.file_host_os
== RAR5_OS_WINDOWS
:
1712 h
.host_os
= RAR_OS_WIN32
1714 h
.host_os
= RAR_OS_UNIX
1715 h
.compress_type
= RAR_M0
+ ((h
.file_compress_flags
>> 7) & 7)
1717 if h
.block_extra_size
:
1718 # allow 1 byte of garbage
1719 while pos
< len(hdata
) - 1:
1720 xsize
, pos
= load_vint(hdata
, pos
)
1721 xdata
, pos
= load_bytes(hdata
, xsize
, pos
)
1722 self
._process
_file
_extra
(h
, xdata
)
1724 if h
.block_flags
& RAR5_BLOCK_FLAG_SPLIT_BEFORE
:
1725 h
.flags |
= RAR_FILE_SPLIT_BEFORE
1726 if h
.block_flags
& RAR5_BLOCK_FLAG_SPLIT_AFTER
:
1727 h
.flags |
= RAR_FILE_SPLIT_AFTER
1728 if h
.file_flags
& RAR5_FILE_FLAG_ISDIR
:
1729 h
.flags |
= RAR_FILE_DIRECTORY
1730 if h
.file_compress_flags
& RAR5_COMPR_SOLID
:
1731 h
.flags |
= RAR_FILE_SOLID
1735 def _parse_endarc_block(self
, h
, hdata
, pos
):
1736 h
.endarc_flags
, pos
= load_vint(hdata
, pos
)
1737 if h
.endarc_flags
& RAR5_ENDARC_FLAG_NEXT_VOL
:
1738 h
.flags |
= RAR_ENDARC_NEXT_VOLUME
1741 def _parse_encryption_block(self
, h
, hdata
, pos
):
1742 h
.encryption_algo
, pos
= load_vint(hdata
, pos
)
1743 h
.encryption_flags
, pos
= load_vint(hdata
, pos
)
1744 h
.encryption_kdf_count
, pos
= load_byte(hdata
, pos
)
1745 h
.encryption_salt
, pos
= load_bytes(hdata
, 16, pos
)
1746 if h
.encryption_flags
& RAR5_ENC_FLAG_HAS_CHECKVAL
:
1747 h
.encryption_check_value
= load_bytes(hdata
, 12, pos
)
1748 if h
.encryption_algo
!= RAR5_XENC_CIPHER_AES256
:
1749 raise BadRarFile('Unsupported header encryption cipher')
1750 self
._hdrenc
_main
= h
1754 def _process_file_extra(self
, h
, xdata
):
1755 xtype
, pos
= load_vint(xdata
, 0)
1756 if xtype
== RAR5_XFILE_TIME
:
1757 self
._parse
_file
_xtime
(h
, xdata
, pos
)
1758 elif xtype
== RAR5_XFILE_ENCRYPTION
:
1759 self
._parse
_file
_encryption
(h
, xdata
, pos
)
1760 elif xtype
== RAR5_XFILE_HASH
:
1761 self
._parse
_file
_hash
(h
, xdata
, pos
)
1762 elif xtype
== RAR5_XFILE_VERSION
:
1763 self
._parse
_file
_version
(h
, xdata
, pos
)
1764 elif xtype
== RAR5_XFILE_REDIR
:
1765 self
._parse
_file
_redir
(h
, xdata
, pos
)
1766 elif xtype
== RAR5_XFILE_OWNER
:
1767 self
._parse
_file
_owner
(h
, xdata
, pos
)
1768 elif xtype
== RAR5_XFILE_SERVICE
:
1773 # extra block for file time record
1774 def _parse_file_xtime(self
, h
, xdata
, pos
):
1775 tflags
, pos
= load_vint(xdata
, pos
)
1776 ldr
= load_windowstime
1777 if tflags
& RAR5_XTIME_UNIXTIME
:
1779 if tflags
& RAR5_XTIME_HAS_MTIME
:
1780 h
.mtime
, pos
= ldr(xdata
, pos
)
1781 h
.date_time
= h
.mtime
.timetuple()[:6]
1782 if tflags
& RAR5_XTIME_HAS_CTIME
:
1783 h
.ctime
, pos
= ldr(xdata
, pos
)
1784 if tflags
& RAR5_XTIME_HAS_ATIME
:
1785 h
.atime
, pos
= ldr(xdata
, pos
)
1787 # just remember encryption info
1788 def _parse_file_encryption(self
, h
, xdata
, pos
):
1789 algo
, pos
= load_vint(xdata
, pos
)
1790 flags
, pos
= load_vint(xdata
, pos
)
1791 kdf_count
, pos
= load_byte(xdata
, pos
)
1792 salt
, pos
= load_bytes(xdata
, 16, pos
)
1793 iv
, pos
= load_bytes(xdata
, 16, pos
)
1795 if flags
& RAR5_XENC_CHECKVAL
:
1796 checkval
, pos
= load_bytes(xdata
, 12, pos
)
1797 if flags
& RAR5_XENC_TWEAKED
:
1799 h
._md
_class
= NoHashContext
1801 h
.file_encryption
= (algo
, flags
, kdf_count
, salt
, iv
, checkval
)
1802 h
.flags |
= RAR_FILE_PASSWORD
1804 def _parse_file_hash(self
, h
, xdata
, pos
):
1805 hash_type
, pos
= load_vint(xdata
, pos
)
1806 if hash_type
== RAR5_XHASH_BLAKE2SP
:
1807 h
.blake2sp_hash
, pos
= load_bytes(xdata
, 32, pos
)
1808 if _have_blake2
and (h
.file_encryption
[1] & RAR5_XENC_TWEAKED
) == 0:
1809 h
._md
_class
= Blake2SP
1810 h
._md
_expect
= h
.blake2sp_hash
1812 def _parse_file_version(self
, h
, xdata
, pos
):
1813 flags
, pos
= load_vint(xdata
, pos
)
1814 version
, pos
= load_vint(xdata
, pos
)
1815 h
.file_version
= (flags
, version
)
1817 def _parse_file_redir(self
, h
, xdata
, pos
):
1818 redir_type
, pos
= load_vint(xdata
, pos
)
1819 redir_flags
, pos
= load_vint(xdata
, pos
)
1820 redir_name
, pos
= load_vstr(xdata
, pos
)
1821 redir_name
= redir_name
.decode('utf8', 'replace')
1822 h
.file_redir
= (redir_type
, redir_flags
, redir_name
)
1824 def _parse_file_owner(self
, h
, xdata
, pos
):
1825 user_name
= group_name
= user_id
= group_id
= None
1827 flags
, pos
= load_vint(xdata
, pos
)
1828 if flags
& RAR5_XOWNER_UNAME
:
1829 user_name
, pos
= load_vstr(xdata
, pos
)
1830 if flags
& RAR5_XOWNER_GNAME
:
1831 group_name
, pos
= load_vstr(xdata
, pos
)
1832 if flags
& RAR5_XOWNER_UID
:
1833 user_id
, pos
= load_vint(xdata
, pos
)
1834 if flags
& RAR5_XOWNER_GID
:
1835 group_id
, pos
= load_vint(xdata
, pos
)
1837 h
.file_owner
= (user_name
, group_name
, user_id
, group_id
)
1839 def process_entry(self
, fd
, item
):
1840 if item
.block_type
== RAR5_BLOCK_FILE
:
1841 # use only first part
1842 if (item
.block_flags
& RAR5_BLOCK_FLAG_SPLIT_BEFORE
) == 0:
1843 self
._info
_map
[item
.filename
] = item
1844 self
._info
_list
.append(item
)
1845 elif len(self
._info
_list
) > 0:
1846 # final crc is in last block
1847 old
= self
._info
_list
[-1]
1849 old
._md
_expect
= item
._md
_expect
1850 old
.blake2sp_hash
= item
.blake2sp_hash
1851 old
.compress_size
+= item
.compress_size
1852 elif item
.block_type
== RAR5_BLOCK_SERVICE
:
1853 if item
.filename
== 'CMT':
1854 self
._load
_comment
(fd
, item
)
1856 def _load_comment(self
, fd
, item
):
1857 if item
.block_flags
& (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER
):
1859 if item
.compress_type
!= RAR_M0
:
1862 if item
.flags
& RAR_FILE_PASSWORD
:
1863 algo
, ___flags
, kdf_count
, salt
, iv
, ___checkval
= item
.file_encryption
1864 if algo
!= RAR5_XENC_CIPHER_AES256
:
1866 key
= self
._gen
_key
(kdf_count
, salt
)
1867 f
= HeaderDecrypt(fd
, key
, iv
)
1868 cmt
= f
.read(item
.file_size
)
1871 with self
._open
_clear
(item
) as cmtstream
:
1872 cmt
= cmtstream
.read()
1874 # rar bug? - appends zero to comment
1875 cmt
= cmt
.split(ZERO
, 1)[0]
1876 self
.comment
= cmt
.decode('utf8')
1878 def _open_hack(self
, inf
, psw
):
1879 # len, type, blk_flags, flags
1880 main_hdr
= b
'\x03\x01\x00\x00'
1881 endarc_hdr
= b
'\x03\x05\x00\x00'
1882 main_hdr
= S_LONG
.pack(rar_crc32(main_hdr
)) + main_hdr
1883 endarc_hdr
= S_LONG
.pack(rar_crc32(endarc_hdr
)) + endarc_hdr
1884 return self
._open
_hack
_core
(inf
, psw
, RAR5_ID
+ main_hdr
, endarc_hdr
)
1890 class UnicodeFilename(object):
1891 """Handle RAR3 unicode filename decompression.
1893 def __init__(self
, name
, encdata
):
1894 self
.std_name
= bytearray(name
)
1895 self
.encdata
= bytearray(encdata
)
1896 self
.pos
= self
.encpos
= 0
1897 self
.buf
= bytearray()
1901 """Copy encoded byte."""
1903 c
= self
.encdata
[self
.encpos
]
1911 """Copy byte from 8-bit representation."""
1913 return self
.std_name
[self
.pos
]
1918 def put(self
, lo
, hi
):
1919 """Copy 16-bit value to result."""
1925 """Decompress compressed UTF16 value."""
1926 hi
= self
.enc_byte()
1928 while self
.encpos
< len(self
.encdata
):
1930 flags
= self
.enc_byte()
1933 t
= (flags
>> flagbits
) & 3
1935 self
.put(self
.enc_byte(), 0)
1937 self
.put(self
.enc_byte(), hi
)
1939 self
.put(self
.enc_byte(), self
.enc_byte())
1944 for _
in range((n
& 0x7f) + 2):
1945 lo
= (self
.std_byte() + c
) & 0xFF
1948 for _
in range(n
+ 2):
1949 self
.put(self
.std_byte(), 0)
1950 return self
.buf
.decode("utf-16le", "replace")
1953 class RarExtFile(RawIOBase
):
1954 """Base class for file-like object that :meth:`RarFile.open` returns.
1956 Provides public methods and common crc checking.
1959 - no short reads - .read() and .readinfo() read as much as requested.
1960 - no internal buffer, use io.BufferedReader for that.
1963 #: Filename of the archive entry
1966 def __init__(self
, parser
, inf
):
1967 """Open archive entry.
1969 super(RarExtFile
, self
).__init
__()
1971 # standard io.* properties
1972 self
.name
= inf
.filename
1975 self
._parser
= parser
1979 self
._returncode
= 0
1981 self
._md
_context
= None
1988 md_class
= self
._inf
._md
_class
or NoHashContext
1989 self
._md
_context
= md_class()
1991 self
._remain
= self
._inf
.file_size
1993 def read(self
, cnt
=None):
1994 """Read all or specified amount of data from archive entry."""
1997 if cnt
is None or cnt
< 0:
1999 elif cnt
> self
._remain
:
2005 data
= self
._read
(cnt
)
2007 self
._md
_context
.update(data
)
2008 self
._remain
-= len(data
)
2009 if len(data
) != cnt
:
2010 raise BadRarFile("Failed the read enough data")
2013 if not data
or self
._remain
== 0:
2019 """Check final CRC."""
2020 final
= self
._md
_context
.digest()
2021 exp
= self
._inf
._md
_expect
2026 if self
._returncode
:
2027 check_returncode(self
, '')
2028 if self
._remain
!= 0:
2029 raise BadRarFile("Failed the read enough data")
2031 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2032 self
._inf
.filename
, exp
, final
))
2034 def _read(self
, cnt
):
2035 """Actual read that gets sanitized cnt."""
2038 """Close open resources."""
2040 super(RarExtFile
, self
).close()
2047 """Hook delete to make sure tempfile is removed."""
2050 def readinto(self
, buf
):
2051 """Zero-copy read directly into buffer.
2055 raise NotImplementedError('readinto')
2058 """Return current reading position in uncompressed data."""
2059 return self
._inf
.file_size
- self
._remain
2061 def seek(self
, ofs
, whence
=0):
2064 On uncompressed files, the seeking works by actual
2065 seeks so it's fast. On compresses files its slow
2066 - forward seeking happends by reading ahead,
2067 backwards by re-opening and decompressing from the start.
2070 # disable crc check when seeking
2071 self
._md
_context
= NoHashContext()
2073 fsize
= self
._inf
.file_size
2074 cur_ofs
= self
.tell()
2076 if whence
== 0: # seek from beginning of file
2078 elif whence
== 1: # seek from current position
2079 new_ofs
= cur_ofs
+ ofs
2080 elif whence
== 2: # seek from end of file
2081 new_ofs
= fsize
+ ofs
2083 raise ValueError('Invalid value for whence')
2088 elif new_ofs
> fsize
:
2091 # do the actual seek
2092 if new_ofs
>= cur_ofs
:
2093 self
._skip
(new_ofs
- cur_ofs
)
2100 def _skip(self
, cnt
):
2101 """Read and discard data"""
2104 buf
= self
.read(8192)
2106 buf
= self
.read(cnt
)
2118 Writing is not supported.
2125 Seeking is supported, although it's slow on compressed files.
2130 """Read all remaining data"""
2131 # avoid RawIOBase default impl
2135 class PipeReader(RarExtFile
):
2136 """Read data from pipe, handle tempfile cleanup."""
2138 def __init__(self
, rf
, inf
, cmd
, tempfile
=None):
2141 self
._tempfile
= tempfile
2142 super(PipeReader
, self
).__init
__(rf
, inf
)
2144 def _close_proc(self
):
2147 if self
._proc
.stdout
:
2148 self
._proc
.stdout
.close()
2149 if self
._proc
.stdin
:
2150 self
._proc
.stdin
.close()
2151 if self
._proc
.stderr
:
2152 self
._proc
.stderr
.close()
2154 self
._returncode
= self
._proc
.returncode
2158 super(PipeReader
, self
)._open
()
2163 # launch new process
2164 self
._returncode
= 0
2165 self
._proc
= custom_popen(self
._cmd
)
2166 self
._fd
= self
._proc
.stdout
2168 # avoid situation where unrar waits on stdin
2169 if self
._proc
.stdin
:
2170 self
._proc
.stdin
.close()
2172 def _read(self
, cnt
):
2173 """Read from pipe."""
2175 # normal read is usually enough
2176 data
= self
._fd
.read(cnt
)
2177 if len(data
) == cnt
or not data
:
2180 # short read, try looping
2184 data
= self
._fd
.read(cnt
)
2189 return EMPTY
.join(buf
)
2192 """Close open resources."""
2195 super(PipeReader
, self
).close()
2199 os
.unlink(self
._tempfile
)
2202 self
._tempfile
= None
2204 def readinto(self
, buf
):
2205 """Zero-copy read directly into buffer."""
2207 if cnt
> self
._remain
:
2209 vbuf
= memoryview(buf
)
2212 res
= self
._fd
.readinto(vbuf
[got
: cnt
])
2215 self
._md
_context
.update(vbuf
[got
: got
+ res
])
2221 class DirectReader(RarExtFile
):
2222 """Read uncompressed data directly from archive.
2229 super(DirectReader
, self
)._open
()
2231 self
._volfile
= self
._inf
.volume_file
2232 self
._fd
= XFile(self
._volfile
, 0)
2233 self
._fd
.seek(self
._inf
.header_offset
, 0)
2234 self
._cur
= self
._parser
._parse
_header
(self
._fd
)
2235 self
._cur
_avail
= self
._cur
.add_size
2237 def _skip(self
, cnt
):
2238 """RAR Seek, skipping through rar files to get to correct position
2243 if self
._cur
_avail
== 0:
2244 if not self
._open
_next
():
2247 # fd is in read pos, do the read
2248 if cnt
> self
._cur
_avail
:
2249 cnt
-= self
._cur
_avail
2250 self
._remain
-= self
._cur
_avail
2253 self
._fd
.seek(cnt
, 1)
2254 self
._cur
_avail
-= cnt
2258 def _read(self
, cnt
):
2259 """Read from potentially multi-volume archive."""
2264 if self
._cur
_avail
== 0:
2265 if not self
._open
_next
():
2268 # fd is in read pos, do the read
2269 if cnt
> self
._cur
_avail
:
2270 data
= self
._fd
.read(self
._cur
_avail
)
2272 data
= self
._fd
.read(cnt
)
2278 self
._cur
_avail
-= len(data
)
2283 return EMPTY
.join(buf
)
2285 def _open_next(self
):
2286 """Proceed to next volume."""
2288 # is the file split over archives?
2289 if (self
._cur
.flags
& RAR_FILE_SPLIT_AFTER
) == 0:
2297 self
._volfile
= self
._parser
._next
_volname
(self
._volfile
)
2298 fd
= open(self
._volfile
, "rb", 0)
2300 sig
= fd
.read(len(self
._parser
._expect
_sig
))
2301 if sig
!= self
._parser
._expect
_sig
:
2302 raise BadRarFile("Invalid signature")
2304 # loop until first file header
2306 cur
= self
._parser
._parse
_header
(fd
)
2308 raise BadRarFile("Unexpected EOF")
2309 if cur
.type in (RAR_BLOCK_MARK
, RAR_BLOCK_MAIN
):
2311 fd
.seek(cur
.add_size
, 1)
2313 if cur
.orig_filename
!= self
._inf
.orig_filename
:
2314 raise BadRarFile("Did not found file entry")
2316 self
._cur
_avail
= cur
.add_size
2319 def readinto(self
, buf
):
2320 """Zero-copy read directly into buffer."""
2322 vbuf
= memoryview(buf
)
2323 while got
< len(buf
):
2325 if self
._cur
_avail
== 0:
2326 if not self
._open
_next
():
2329 # length for next read
2330 cnt
= len(buf
) - got
2331 if cnt
> self
._cur
_avail
:
2332 cnt
= self
._cur
_avail
2334 # read into temp view
2335 res
= self
._fd
.readinto(vbuf
[got
: got
+ cnt
])
2338 self
._md
_context
.update(vbuf
[got
: got
+ res
])
2339 self
._cur
_avail
-= res
2345 class HeaderDecrypt(object):
2346 """File-like object that decrypts from another file"""
2347 def __init__(self
, f
, key
, iv
):
2349 self
.ciph
= AES_CBC_Decrypt(key
, iv
)
2353 """Current file pos - works only on block boundaries."""
2354 return self
.f
.tell()
2356 def read(self
, cnt
=None):
2357 """Read and decrypt."""
2359 raise BadRarFile('Bad count to header decrypt - wrong password?')
2362 if cnt
<= len(self
.buf
):
2363 res
= self
.buf
[:cnt
]
2364 self
.buf
= self
.buf
[cnt
:]
2373 enc
= self
.f
.read(blklen
)
2374 if len(enc
) < blklen
:
2376 dec
= self
.ciph
.decrypt(enc
)
2382 self
.buf
= dec
[cnt
:]
2388 # handle (filename|filelike) object
2389 class XFile(object):
2390 """Input may be filename or file object.
2392 __slots__
= ('_fd', '_need_close')
2394 def __init__(self
, xfile
, bufsize
=1024):
2395 if is_filelike(xfile
):
2396 self
._need
_close
= False
2400 self
._need
_close
= True
2401 self
._fd
= open(xfile
, 'rb', bufsize
)
2403 def read(self
, n
=None):
2404 """Read from file."""
2405 return self
._fd
.read(n
)
2408 """Return file pos."""
2409 return self
._fd
.tell()
2411 def seek(self
, ofs
, whence
=0):
2412 """Move file pos."""
2413 return self
._fd
.seek(ofs
, whence
)
2415 def readinto(self
, dst
):
2416 """Read into buffer."""
2417 return self
._fd
.readinto(dst
)
2420 """Close file object."""
2421 if self
._need
_close
:
2424 def __enter__(self
):
2427 def __exit__(self
, typ
, val
, tb
):
2431 class NoHashContext(object):
2432 """No-op hash function."""
2433 def __init__(self
, data
=None):
2435 def update(self
, data
):
2439 def hexdigest(self
):
2440 """Hexadecimal digest."""
2443 class CRC32Context(object):
2444 """Hash context that uses CRC32."""
2445 __slots__
= ['_crc']
2447 def __init__(self
, data
=None):
2452 def update(self
, data
):
2454 self
._crc
= rar_crc32(data
, self
._crc
)
2460 def hexdigest(self
):
2461 """Hexadecimal digest."""
2462 return '%08x' % self
.digest()
2465 class Blake2SP(object):
2466 """Blake2sp hash context.
2468 __slots__
= ['_thread', '_buf', '_cur', '_digest']
2473 def __init__(self
, data
=None):
2479 for i
in range(self
.parallelism
):
2480 ctx
= self
._blake
2s
(i
, 0, i
== (self
.parallelism
- 1))
2481 self
._thread
.append(ctx
)
2486 def _blake2s(self
, ofs
, depth
, is_last
):
2487 return blake2s(node_offset
=ofs
, node_depth
=depth
, last_node
=is_last
,
2488 depth
=2, inner_size
=32, fanout
=self
.parallelism
)
2490 def _add_block(self
, blk
):
2491 self
._thread
[self
._cur
].update(blk
)
2492 self
._cur
= (self
._cur
+ 1) % self
.parallelism
2494 def update(self
, data
):
2497 view
= memoryview(data
)
2498 bs
= self
.block_size
2500 need
= bs
- len(self
._buf
)
2501 if len(view
) < need
:
2502 self
._buf
+= view
.tobytes()
2504 self
._add
_block
(self
._buf
+ view
[:need
].tobytes())
2506 while len(view
) >= bs
:
2507 self
._add
_block
(view
[:bs
])
2509 self
._buf
= view
.tobytes()
2512 """Return final digest value.
2514 if self
._digest
is None:
2516 self
._add
_block
(self
._buf
)
2518 ctx
= self
._blake
2s
(0, 1, True)
2519 for t
in self
._thread
:
2520 ctx
.update(t
.digest())
2521 self
._digest
= ctx
.digest()
2524 def hexdigest(self
):
2525 """Hexadecimal digest."""
2526 return tohex(self
.digest())
2529 class Rar3Sha1(object):
2530 """Bug-compat for SHA1
2535 _BLK
= struct
.Struct(b
'>16L')
2536 _BLKx
= struct
.Struct(b
'<16L')
2538 __slots__
= ('_nbytes', '_md', '_rarbug', '_workspace')
2540 def __init__(self
, data
=b
'', rarbug
=False):
2543 self
._rarbug
= rarbug
2544 self
._workspace
= [0] * 16
2547 def update(self
, data
):
2548 """Process more data."""
2549 self
._md
.update(data
)
2550 bufpos
= self
._nbytes
& 63
2551 self
._nbytes
+= len(data
)
2553 if self
._rarbug
and len(data
) > 64:
2554 dpos
= self
.block_size
- bufpos
2555 while dpos
+ self
.block_size
<= len(data
):
2556 self
._corrupt
(data
, dpos
)
2557 dpos
+= self
.block_size
2560 """Return final state."""
2561 return self
._md
.digest()
2563 def hexdigest(self
):
2564 """Return final state as hex string."""
2565 return self
._md
.hexdigest()
2567 def _corrupt(self
, data
, dpos
):
2568 """Corruption from SHA1 core."""
2569 ws
= self
._workspace
2570 ws
[:] = self
._BLK
.unpack_from(data
, dpos
)
2571 for t
in range(16, 80):
2572 tmp
= ws
[(t
- 3) & 15] ^ ws
[(t
- 8) & 15] ^ ws
[(t
- 14) & 15] ^ ws
[(t
- 16) & 15]
2573 ws
[t
& 15] = ((tmp
<< 1) |
(tmp
>> (32 - 1))) & 0xFFFFFFFF
2574 self
._BLKx
.pack_into(data
, dpos
, *ws
)
2578 ## Utility functions
2581 S_LONG
= Struct('<L')
2582 S_SHORT
= Struct('<H')
2583 S_BYTE
= Struct('<B')
2585 S_BLK_HDR
= Struct('<HBHH')
2586 S_FILE_HDR
= Struct('<LLBLLBBHL')
2587 S_COMMENT_HDR
= Struct('<HBBH')
2589 def load_vint(buf
, pos
):
2590 """Load variable-size int."""
2591 limit
= min(pos
+ 11, len(buf
))
2594 b
= _byte_code(buf
[pos
])
2595 res
+= ((b
& 0x7F) << ofs
)
2600 raise BadRarFile('cannot load vint')
2602 def load_byte(buf
, pos
):
2603 """Load single byte"""
2606 raise BadRarFile('cannot load byte')
2607 return S_BYTE
.unpack_from(buf
, pos
)[0], end
2609 def load_le32(buf
, pos
):
2610 """Load little-endian 32-bit integer"""
2613 raise BadRarFile('cannot load le32')
2614 return S_LONG
.unpack_from(buf
, pos
)[0], pos
+ 4
2616 def load_bytes(buf
, num
, pos
):
2617 """Load sequence of bytes"""
2620 raise BadRarFile('cannot load bytes')
2621 return buf
[pos
: end
], end
2623 def load_vstr(buf
, pos
):
2624 """Load bytes prefixed by vint length"""
2625 slen
, pos
= load_vint(buf
, pos
)
2626 return load_bytes(buf
, slen
, pos
)
2628 def load_dostime(buf
, pos
):
2629 """Load LE32 dos timestamp"""
2630 stamp
, pos
= load_le32(buf
, pos
)
2631 tup
= parse_dos_time(stamp
)
2632 return to_datetime(tup
), pos
2634 def load_unixtime(buf
, pos
):
2635 """Load LE32 unix timestamp"""
2636 secs
, pos
= load_le32(buf
, pos
)
2637 dt
= datetime
.fromtimestamp(secs
, UTC
)
2640 def load_windowstime(buf
, pos
):
2641 """Load LE64 windows timestamp"""
2642 # unix epoch (1970) in seconds from windows epoch (1601)
2643 unix_epoch
= 11644473600
2644 val1
, pos
= load_le32(buf
, pos
)
2645 val2
, pos
= load_le32(buf
, pos
)
2646 secs
, n1secs
= divmod((val2
<< 32) | val1
, 10000000)
2647 dt
= datetime
.fromtimestamp(secs
- unix_epoch
, UTC
)
2648 dt
= dt
.replace(microsecond
=n1secs
// 10)
2651 # new-style next volume
2652 def _next_newvol(volfile
):
2653 i
= len(volfile
) - 1
2655 if volfile
[i
] >= '0' and volfile
[i
] <= '9':
2656 return _inc_volname(volfile
, i
)
2658 raise BadRarName("Cannot construct volume name: " + volfile
)
2660 # old-style next volume
2661 def _next_oldvol(volfile
):
2663 if volfile
[-4:].lower() == '.rar':
2664 return volfile
[:-2] + '00'
2665 return _inc_volname(volfile
, len(volfile
) - 1)
2667 # increase digits with carry, otherwise just increment char
2668 def _inc_volname(volfile
, i
):
2672 fn
[i
] = chr(ord(fn
[i
]) + 1)
2678 # rar3 extended time fields
2679 def _parse_ext_time(h
, data
, pos
):
2680 # flags and rest of data can be missing
2682 if pos
+ 2 <= len(data
):
2683 flags
= S_SHORT
.unpack_from(data
, pos
)[0]
2686 mtime
, pos
= _parse_xtime(flags
>> 3 * 4, data
, pos
, h
.mtime
)
2687 h
.ctime
, pos
= _parse_xtime(flags
>> 2 * 4, data
, pos
)
2688 h
.atime
, pos
= _parse_xtime(flags
>> 1 * 4, data
, pos
)
2689 h
.arctime
, pos
= _parse_xtime(flags
>> 0 * 4, data
, pos
)
2692 h
.date_time
= mtime
.timetuple()[:6]
2695 # rar3 one extended time field
2696 def _parse_xtime(flag
, data
, pos
, basetime
=None):
2700 basetime
, pos
= load_dostime(data
, pos
)
2702 # load second fractions
2705 for _
in range(cnt
):
2706 b
, pos
= load_byte(data
, pos
)
2707 rem
= (b
<< 16) |
(rem
>> 8)
2709 # convert 100ns units to microseconds
2714 # dostime has room for 30 seconds only, correct if needed
2715 if flag
& 4 and basetime
.second
< 59:
2716 res
= basetime
.replace(microsecond
=usec
, second
=basetime
.second
+ 1)
2718 res
= basetime
.replace(microsecond
=usec
)
2721 def is_filelike(obj
):
2722 """Filename or file object?
2724 if isinstance(obj
, (bytes
, unicode)):
2727 for a
in ('read', 'tell', 'seek'):
2728 res
= res
and hasattr(obj
, a
)
2730 raise ValueError("Invalid object passed as file")
2733 def rar3_s2k(psw
, salt
):
2734 """String-to-key hash for RAR3.
2736 if not isinstance(psw
, unicode):
2737 psw
= psw
.decode('utf8')
2738 seed
= bytearray(psw
.encode('utf-16le') + salt
)
2739 h
= Rar3Sha1(rarbug
=True)
2742 for j
in range(0x4000):
2743 cnt
= S_LONG
.pack(i
* 0x4000 + j
)
2747 iv
+= h
.digest()[19:20]
2748 key_be
= h
.digest()[:16]
2749 key_le
= pack("<LLLL", *unpack(">LLLL", key_be
))
2752 def rar3_decompress(vers
, meth
, data
, declen
=0, flags
=0, crc
=0, psw
=None, salt
=None):
2753 """Decompress blob of compressed data.
2755 Used for data with non-standard header - eg. comments.
2757 # already uncompressed?
2758 if meth
== RAR_M0
and (flags
& RAR_FILE_PASSWORD
) == 0:
2761 # take only necessary flags
2762 flags
= flags
& (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK
)
2763 flags |
= RAR_LONG_BLOCK
2769 fhdr
= S_FILE_HDR
.pack(len(data
), declen
, RAR_OS_MSDOS
, crc
,
2770 date
, vers
, meth
, len(fname
), mode
)
2772 if flags
& RAR_FILE_SALT
:
2778 hlen
= S_BLK_HDR
.size
+ len(fhdr
)
2779 hdr
= S_BLK_HDR
.pack(0, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
2780 hcrc
= rar_crc32(hdr
[2:]) & 0xFFFF
2781 hdr
= S_BLK_HDR
.pack(hcrc
, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
2783 # archive main header
2784 mh
= S_BLK_HDR
.pack(0x90CF, RAR_BLOCK_MAIN
, 0, 13) + ZERO
* (2 + 4)
2786 # decompress via temp rar
2787 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
2788 tmpf
= os
.fdopen(tmpfd
, "wb")
2790 tmpf
.write(RAR_ID
+ mh
+ hdr
+ data
)
2793 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
2794 add_password_arg(cmd
, psw
, (flags
& RAR_FILE_PASSWORD
))
2797 p
= custom_popen(cmd
)
2798 return p
.communicate()[0]
2804 """Convert 6-part time tuple into datetime object.
2810 year
, mon
, day
, h
, m
, s
= t
2812 # assume the values are valid
2814 return datetime(year
, mon
, day
, h
, m
, s
)
2818 # sanitize invalid values
2819 mday
= (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
2834 if mon
== 2 and day
== 29:
2836 return datetime(year
, mon
, day
, h
, m
, s
)
2839 return datetime(year
, mon
, day
, h
, m
, s
)
2841 def parse_dos_time(stamp
):
2842 """Parse standard 32-bit DOS timestamp.
2844 sec
, stamp
= stamp
& 0x1F, stamp
>> 5
2845 mn
, stamp
= stamp
& 0x3F, stamp
>> 6
2846 hr
, stamp
= stamp
& 0x1F, stamp
>> 5
2847 day
, stamp
= stamp
& 0x1F, stamp
>> 5
2848 mon
, stamp
= stamp
& 0x0F, stamp
>> 4
2849 yr
= (stamp
& 0x7F) + 1980
2850 return (yr
, mon
, day
, hr
, mn
, sec
* 2)
2852 def custom_popen(cmd
):
2853 """Disconnect cmd from parent fds, read only from stdout.
2857 if sys
.platform
== 'win32':
2858 creationflags
= 0x08000000 # CREATE_NO_WINDOW
2862 p
= Popen(cmd
, bufsize
=0, stdout
=PIPE
, stdin
=PIPE
, stderr
=STDOUT
,
2863 creationflags
=creationflags
)
2864 except OSError as ex
:
2865 if ex
.errno
== errno
.ENOENT
:
2866 raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL
)
2870 def custom_check(cmd
, ignore_retcode
=False):
2871 """Run command, collect output, raise error if needed.
2873 p
= custom_popen(cmd
)
2874 out
, _
= p
.communicate()
2875 if p
.returncode
and not ignore_retcode
:
2876 raise RarExecError("Check-run failed")
2879 def add_password_arg(cmd
, psw
, ___required
=False):
2880 """Append password switch to commandline.
2882 if UNRAR_TOOL
== ALT_TOOL
:
2885 cmd
.append('-p' + psw
)
2889 def check_returncode(p
, out
):
2890 """Raise exception according to unrar exit code.
2896 # map return code to exception class, codes from rar.txt
2898 RarWarning
, RarFatalError
, RarCRCError
, RarLockedArchiveError
, # 1..4
2899 RarWriteError
, RarOpenError
, RarUserError
, RarMemoryError
, # 5..8
2900 RarCreateError
, RarNoFilesError
, RarWrongPassword
] # 9..11
2901 if UNRAR_TOOL
== ALT_TOOL
:
2903 if code
> 0 and code
< len(errmap
):
2910 exc
= RarUnknownError
2914 msg
= "%s [%d]: %s" % (exc
.__doc
__, p
.returncode
, out
)
2916 msg
= "%s [%d]" % (exc
.__doc
__, p
.returncode
)
2920 def hmac_sha256(key
, data
):
2922 return HMAC(key
, data
, sha256
).digest()
2924 def membuf_tempfile(memfile
):
2925 """Write in-memory file object to real file."""
2928 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
2929 tmpf
= os
.fdopen(tmpfd
, "wb")
2933 buf
= memfile
.read(BSIZE
)
2944 class XTempFile(object):
2945 """Real file for archive.
2947 __slots__
= ('_tmpfile', '_filename')
2949 def __init__(self
, rarfile
):
2950 if is_filelike(rarfile
):
2951 self
._tmpfile
= membuf_tempfile(rarfile
)
2952 self
._filename
= self
._tmpfile
2954 self
._tmpfile
= None
2955 self
._filename
= rarfile
2957 def __enter__(self
):
2958 return self
._filename
2960 def __exit__(self
, exc_type
, exc_value
, tb
):
2963 os
.unlink(self
._tmpfile
)
2966 self
._tmpfile
= None
2969 # Check if unrar works
2972 ORIG_UNRAR_TOOL
= UNRAR_TOOL
2973 ORIG_OPEN_ARGS
= OPEN_ARGS
2974 ORIG_EXTRACT_ARGS
= EXTRACT_ARGS
2975 ORIG_TEST_ARGS
= TEST_ARGS
2977 def _check_unrar_tool():
2978 global UNRAR_TOOL
, OPEN_ARGS
, EXTRACT_ARGS
, TEST_ARGS
2980 # does UNRAR_TOOL work?
2981 custom_check([ORIG_UNRAR_TOOL
], True)
2983 UNRAR_TOOL
= ORIG_UNRAR_TOOL
2984 OPEN_ARGS
= ORIG_OPEN_ARGS
2985 EXTRACT_ARGS
= ORIG_EXTRACT_ARGS
2986 TEST_ARGS
= ORIG_TEST_ARGS
2987 except RarCannotExec
:
2989 # does ALT_TOOL work?
2990 custom_check([ALT_TOOL
] + list(ALT_CHECK_ARGS
), True)
2992 UNRAR_TOOL
= ALT_TOOL
2993 OPEN_ARGS
= ALT_OPEN_ARGS
2994 EXTRACT_ARGS
= ALT_EXTRACT_ARGS
2995 TEST_ARGS
= ALT_TEST_ARGS
2996 except RarCannotExec
:
2997 # no usable tool, only uncompressed archives work