3 # Copyright (c) 2005-2016 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r
"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to '\\' to be more compatible with old rarfile
58 rarfile.PATH_SEP = '/'
60 For more details, refer to source.
64 from __future__
import division
, print_function
67 ## Imports and compat - support both Python 2.x and 3.x
75 from struct
import pack
, unpack
, Struct
76 from binascii
import crc32
, hexlify
77 from tempfile
import mkstemp
78 from subprocess
import Popen
, PIPE
, STDOUT
79 from io
import RawIOBase
80 from hashlib
import sha1
, sha256
82 from datetime
import datetime
, timedelta
, tzinfo
84 # fixed offset timezone, for UTC
86 from datetime
import timezone
88 class timezone(tzinfo
):
89 """Compat timezone."""
90 __slots__
= ('_ofs', '_name')
93 def __init__(self
, offset
, name
):
94 super(timezone
, self
).__init
__()
95 self
._ofs
, self
._name
= offset
, name
97 def utcoffset(self
, dt
):
100 def tzname(self
, dt
):
106 # only needed for encryped headers
109 from cryptography
.hazmat
.primitives
.ciphers
import algorithms
, modes
, Cipher
110 from cryptography
.hazmat
.backends
import default_backend
111 from cryptography
.hazmat
.primitives
import hashes
112 from cryptography
.hazmat
.primitives
.kdf
import pbkdf2
114 class AES_CBC_Decrypt(object):
116 def __init__(self
, key
, iv
):
117 ciph
= Cipher(algorithms
.AES(key
), modes
.CBC(iv
), default_backend())
118 self
.decrypt
= ciph
.decryptor().update
120 def pbkdf2_sha256(password
, salt
, iters
):
121 """PBKDF2 with HMAC-SHA256"""
122 ctx
= pbkdf2
.PBKDF2HMAC(hashes
.SHA256(), 32, salt
, iters
, default_backend())
123 return ctx
.derive(password
)
126 from Crypto
.Cipher
import AES
127 from Crypto
.Protocol
import KDF
129 class AES_CBC_Decrypt(object):
131 def __init__(self
, key
, iv
):
132 self
.decrypt
= AES
.new(key
, AES
.MODE_CBC
, iv
).decrypt
134 def pbkdf2_sha256(password
, salt
, iters
):
135 """PBKDF2 with HMAC-SHA256"""
136 return KDF
.PBKDF2(password
, salt
, 32, iters
, hmac_sha256
)
143 from pyblake2
import blake2s
149 if sys
.hexversion
< 0x3000000:
150 def rar_crc32(data
, prev
=0):
151 """CRC32 with unsigned values.
153 if (prev
> 0) and (prev
& 0x80000000):
155 res
= crc32(data
, prev
)
161 else: # pragma: no cover
163 """Return hex string."""
164 return hexlify(data
).decode('ascii')
167 _byte_code
= int # noqa
172 # export only interesting items
173 __all__
= ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
176 ## Module configuration. Can be tuned after importing.
179 #: default fallback charset
180 DEFAULT_CHARSET
= "windows-1252"
182 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
183 TRY_ENCODINGS
= ('utf8', 'utf-16le')
185 #: 'unrar', 'rar' or full path to either one
188 #: Command line args to use for opening file for reading.
189 OPEN_ARGS
= ('p', '-inul')
191 #: Command line args to use for extracting file to disk.
192 EXTRACT_ARGS
= ('x', '-y', '-idq')
194 #: args for testrar()
195 TEST_ARGS
= ('t', '-idq')
198 # Allow use of tool that is not compatible with unrar.
200 # By default use 'bsdtar' which is 'tar' program that
201 # sits on top of libarchive.
203 # Problems with libarchive RAR backend:
204 # - Does not support solid archives.
205 # - Does not support password-protected archives.
209 ALT_OPEN_ARGS
= ('-x', '--to-stdout', '-f')
210 ALT_EXTRACT_ARGS
= ('-x', '-f')
211 ALT_TEST_ARGS
= ('-t', '-f')
212 ALT_CHECK_ARGS
= ('--help',)
214 #: whether to speed up decompression by using tmp archive
217 #: limit the filesize for tmp archive usage
218 HACK_SIZE_LIMIT
= 20 * 1024 * 1024
220 #: Separator for path name components. RAR internally uses '\\'.
221 #: Use '/' to be similar with zipfile.
229 RAR_BLOCK_MARK
= 0x72 # r
230 RAR_BLOCK_MAIN
= 0x73 # s
231 RAR_BLOCK_FILE
= 0x74 # t
232 RAR_BLOCK_OLD_COMMENT
= 0x75 # u
233 RAR_BLOCK_OLD_EXTRA
= 0x76 # v
234 RAR_BLOCK_OLD_SUB
= 0x77 # w
235 RAR_BLOCK_OLD_RECOVERY
= 0x78 # x
236 RAR_BLOCK_OLD_AUTH
= 0x79 # y
237 RAR_BLOCK_SUB
= 0x7a # z
238 RAR_BLOCK_ENDARC
= 0x7b # {
240 # flags for RAR_BLOCK_MAIN
241 RAR_MAIN_VOLUME
= 0x0001
242 RAR_MAIN_COMMENT
= 0x0002
243 RAR_MAIN_LOCK
= 0x0004
244 RAR_MAIN_SOLID
= 0x0008
245 RAR_MAIN_NEWNUMBERING
= 0x0010
246 RAR_MAIN_AUTH
= 0x0020
247 RAR_MAIN_RECOVERY
= 0x0040
248 RAR_MAIN_PASSWORD
= 0x0080
249 RAR_MAIN_FIRSTVOLUME
= 0x0100
250 RAR_MAIN_ENCRYPTVER
= 0x0200
252 # flags for RAR_BLOCK_FILE
253 RAR_FILE_SPLIT_BEFORE
= 0x0001
254 RAR_FILE_SPLIT_AFTER
= 0x0002
255 RAR_FILE_PASSWORD
= 0x0004
256 RAR_FILE_COMMENT
= 0x0008
257 RAR_FILE_SOLID
= 0x0010
258 RAR_FILE_DICTMASK
= 0x00e0
259 RAR_FILE_DICT64
= 0x0000
260 RAR_FILE_DICT128
= 0x0020
261 RAR_FILE_DICT256
= 0x0040
262 RAR_FILE_DICT512
= 0x0060
263 RAR_FILE_DICT1024
= 0x0080
264 RAR_FILE_DICT2048
= 0x00a0
265 RAR_FILE_DICT4096
= 0x00c0
266 RAR_FILE_DIRECTORY
= 0x00e0
267 RAR_FILE_LARGE
= 0x0100
268 RAR_FILE_UNICODE
= 0x0200
269 RAR_FILE_SALT
= 0x0400
270 RAR_FILE_VERSION
= 0x0800
271 RAR_FILE_EXTTIME
= 0x1000
272 RAR_FILE_EXTFLAGS
= 0x2000
274 # flags for RAR_BLOCK_ENDARC
275 RAR_ENDARC_NEXT_VOLUME
= 0x0001
276 RAR_ENDARC_DATACRC
= 0x0002
277 RAR_ENDARC_REVSPACE
= 0x0004
278 RAR_ENDARC_VOLNR
= 0x0008
280 # flags common to all blocks
281 RAR_SKIP_IF_UNKNOWN
= 0x4000
282 RAR_LONG_BLOCK
= 0x8000
292 # Compression methods - '0'..'5'
306 RAR5_BLOCK_SERVICE
= 3
307 RAR5_BLOCK_ENCRYPTION
= 4
308 RAR5_BLOCK_ENDARC
= 5
310 RAR5_BLOCK_FLAG_EXTRA_DATA
= 0x01
311 RAR5_BLOCK_FLAG_DATA_AREA
= 0x02
312 RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN
= 0x04
313 RAR5_BLOCK_FLAG_SPLIT_BEFORE
= 0x08
314 RAR5_BLOCK_FLAG_SPLIT_AFTER
= 0x10
315 RAR5_BLOCK_FLAG_DEPENDS_PREV
= 0x20
316 RAR5_BLOCK_FLAG_KEEP_WITH_PARENT
= 0x40
318 RAR5_MAIN_FLAG_ISVOL
= 0x01
319 RAR5_MAIN_FLAG_HAS_VOLNR
= 0x02
320 RAR5_MAIN_FLAG_SOLID
= 0x04
321 RAR5_MAIN_FLAG_RECOVERY
= 0x08
322 RAR5_MAIN_FLAG_LOCKED
= 0x10
324 RAR5_FILE_FLAG_ISDIR
= 0x01
325 RAR5_FILE_FLAG_HAS_MTIME
= 0x02
326 RAR5_FILE_FLAG_HAS_CRC32
= 0x04
327 RAR5_FILE_FLAG_UNKNOWN_SIZE
= 0x08
329 RAR5_COMPR_SOLID
= 0x40
331 RAR5_ENC_FLAG_HAS_CHECKVAL
= 0x01
333 RAR5_ENDARC_FLAG_NEXT_VOL
= 0x01
335 RAR5_XFILE_ENCRYPTION
= 1
338 RAR5_XFILE_VERSION
= 4
341 RAR5_XFILE_SERVICE
= 7
343 RAR5_XTIME_UNIXTIME
= 0x01
344 RAR5_XTIME_HAS_MTIME
= 0x02
345 RAR5_XTIME_HAS_CTIME
= 0x04
346 RAR5_XTIME_HAS_ATIME
= 0x08
348 RAR5_XENC_CIPHER_AES256
= 0
350 RAR5_XENC_CHECKVAL
= 0x01
351 RAR5_XENC_TWEAKED
= 0x02
353 RAR5_XHASH_BLAKE2SP
= 0
355 RAR5_XREDIR_UNIX_SYMLINK
= 1
356 RAR5_XREDIR_WINDOWS_SYMLINK
= 2
357 RAR5_XREDIR_WINDOWS_JUNCTION
= 3
358 RAR5_XREDIR_HARD_LINK
= 4
359 RAR5_XREDIR_FILE_COPY
= 5
361 RAR5_XREDIR_ISDIR
= 0x01
363 RAR5_XOWNER_UNAME
= 0x01
364 RAR5_XOWNER_GNAME
= 0x02
365 RAR5_XOWNER_UID
= 0x04
366 RAR5_XOWNER_GID
= 0x08
372 ## internal constants
375 RAR_ID
= b
"Rar!\x1a\x07\x00"
376 RAR5_ID
= b
"Rar!\x1a\x07\x01\x00"
379 UTC
= timezone(timedelta(0), 'UTC')
382 def _get_rar_version(xfile
):
383 '''Check quickly whether file is rar archive.
385 with
XFile(xfile
) as fd
:
386 buf
= fd
.read(len(RAR5_ID
))
387 if buf
.startswith(RAR_ID
):
389 elif buf
.startswith(RAR5_ID
):
397 def is_rarfile(xfile
):
398 '''Check quickly whether file is rar archive.
400 return _get_rar_version(xfile
) > 0
402 class Error(Exception):
403 """Base class for rarfile errors."""
405 class BadRarFile(Error
):
406 """Incorrect data in archive."""
408 class NotRarFile(Error
):
409 """The file is not RAR archive."""
411 class BadRarName(Error
):
412 """Cannot guess multipart name components."""
414 class NoRarEntry(Error
):
415 """File not found in RAR"""
417 class PasswordRequired(Error
):
418 """File requires password"""
420 class NeedFirstVolume(Error
):
421 """Need to start from first volume."""
423 class NoCrypto(Error
):
424 """Cannot parse encrypted headers - no crypto available."""
426 class RarExecError(Error
):
427 """Problem reported by unrar/rar."""
429 class RarWarning(RarExecError
):
430 """Non-fatal error"""
432 class RarFatalError(RarExecError
):
435 class RarCRCError(RarExecError
):
436 """CRC error during unpacking"""
438 class RarLockedArchiveError(RarExecError
):
439 """Must not modify locked archive"""
441 class RarWriteError(RarExecError
):
444 class RarOpenError(RarExecError
):
447 class RarUserError(RarExecError
):
450 class RarMemoryError(RarExecError
):
453 class RarCreateError(RarExecError
):
456 class RarNoFilesError(RarExecError
):
457 """No files that match pattern were found"""
459 class RarUserBreak(RarExecError
):
462 class RarWrongPassword(RarExecError
):
463 """Incorrect password"""
465 class RarUnknownError(RarExecError
):
466 """Unknown exit code"""
468 class RarSignalExit(RarExecError
):
469 """Unrar exited with signal"""
471 class RarCannotExec(RarExecError
):
472 """Executable not found."""
475 class RarInfo(object):
476 r
'''An entry in rar archive.
478 RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone.
479 RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone.
484 File name with relative path.
485 Path separator is '/'. Always unicode string.
488 File modification timestamp. As tuple of (year, month, day, hour, minute, second).
489 RAR5 allows archives where it is missing, it's None then.
498 Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants.
501 Minimal Rar version needed for decompressing. As (major*10 + minor),
506 RAR5 does not have such field in archive, it's simply set to 50.
509 Host OS type, one of RAR_OS_* constants.
511 RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`,
512 :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`.
514 RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`.
517 File attributes. May be either dos-style or unix-style, depending on host_os.
520 File modification time. Same value as :attr:`date_time`
521 but as :class:`datetime.datetime` object with extended precision.
524 Optional time field: creation time. As :class:`datetime.datetime` object.
527 Optional time field: last access time. As :class:`datetime.datetime` object.
530 Optional time field: archival time. As :class:`datetime.datetime` object.
534 CRC-32 of uncompressed file, unsigned int.
539 Blake2SP hash over decompressed data. (RAR5-only)
542 Optional file comment field. Unicode string. (RAR3-only)
545 If not None, file is link of some sort. Contains tuple of (type, flags, target).
548 Type is one of constants:
550 :data:`RAR5_XREDIR_UNIX_SYMLINK`
551 unix symlink to target.
552 :data:`RAR5_XREDIR_WINDOWS_SYMLINK`
553 windows symlink to target.
554 :data:`RAR5_XREDIR_WINDOWS_JUNCTION`
556 :data:`RAR5_XREDIR_HARD_LINK`
558 :data:`RAR5_XREDIR_FILE_COPY`
559 current file is copy of another archive entry.
561 Flags may contain :data:`RAR5_XREDIR_ISDIR` bit.
564 Volume nr, starting from 0.
567 Volume file name, where file starts.
571 # zipfile-compatible fields
581 # optional extended time fields, datetime() objects.
586 extract_version
= None
604 """Returns True if entry is a directory.
606 if self
.type == RAR_BLOCK_FILE
:
607 return (self
.flags
& RAR_FILE_DIRECTORY
) == RAR_FILE_DIRECTORY
610 def needs_password(self
):
611 """Returns True if data is stored password-protected.
613 if self
.type == RAR_BLOCK_FILE
:
614 return (self
.flags
& RAR_FILE_PASSWORD
) > 0
618 class RarFile(object):
619 '''Parse RAR structure, provide access to files in archive.
622 #: Archive comment. Unicode string or None.
625 def __init__(self
, rarfile
, mode
="r", charset
=None, info_callback
=None,
626 crc_check
=True, errors
="stop"):
627 """Open and parse a RAR archive.
634 only 'r' is supported.
636 fallback charset to use, if filenames are not already Unicode-enabled.
638 debug callback, gets to see all archive entries.
640 set to False to disable CRC checks
642 Either "stop" to quietly stop parsing on errors,
643 or "strict" to raise errors. Default is "stop".
645 self
._rarfile
= rarfile
646 self
._charset
= charset
or DEFAULT_CHARSET
647 self
._info
_callback
= info_callback
648 self
._crc
_check
= crc_check
649 self
._password
= None
650 self
._file
_parser
= None
654 elif errors
== "strict":
657 raise ValueError("Invalid value for 'errors' parameter.")
660 raise NotImplementedError("RarFile supports only mode=r")
667 def __exit__(self
, typ
, value
, traceback
):
670 def setpassword(self
, password
):
671 '''Sets the password to use when extracting.'''
672 self
._password
= password
673 if self
._file
_parser
:
674 if self
._file
_parser
.has_header_encryption():
675 self
._file
_parser
= None
676 if not self
._file
_parser
:
679 self
._file
_parser
.setpassword(self
._password
)
681 def needs_password(self
):
682 '''Returns True if any archive entries require password for extraction.'''
683 return self
._file
_parser
.needs_password()
686 '''Return list of filenames in archive.'''
687 return [f
.filename
for f
in self
.infolist()]
690 '''Return RarInfo objects for all files/directories in archive.'''
691 return self
._file
_parser
.infolist()
693 def volumelist(self
):
694 '''Returns filenames of archive volumes.
696 In case of single-volume archive, the list contains
697 just the name of main archive file.
699 return self
._file
_parser
.volumelist()
701 def getinfo(self
, fname
):
702 '''Return RarInfo for file.
704 return self
._file
_parser
.getinfo(fname
)
706 def open(self
, fname
, mode
='r', psw
=None):
707 '''Returns file-like object (:class:`RarExtFile`),
708 from where the data can be read.
710 The object implements :class:`io.RawIOBase` interface, so it can
711 be further wrapped with :class:`io.BufferedReader`
712 and :class:`io.TextIOWrapper`.
714 On older Python where io module is not available, it implements
715 only .read(), .seek(), .tell() and .close() methods.
717 The object is seekable, although the seeking is fast only on
718 uncompressed files, on compressed files the seeking is implemented
719 by reading ahead and/or restarting the decompression.
724 file name or RarInfo instance.
728 password to use for extracting.
732 raise NotImplementedError("RarFile.open() supports only mode=r")
735 inf
= self
.getinfo(fname
)
737 raise TypeError("Directory does not have any data: " + inf
.filename
)
740 if inf
.needs_password():
741 psw
= psw
or self
._password
743 raise PasswordRequired("File %s requires password" % inf
.filename
)
747 return self
._file
_parser
.open(inf
, psw
)
749 def read(self
, fname
, psw
=None):
750 """Return uncompressed data for archive entry.
752 For longer files using :meth:`RarFile.open` may be better idea.
757 filename or RarInfo instance
759 password to use for extracting.
762 with self
.open(fname
, 'r', psw
) as f
:
766 """Release open resources."""
770 """Print archive file list to stdout."""
771 for f
in self
.infolist():
774 def extract(self
, member
, path
=None, pwd
=None):
775 """Extract single file into current directory.
780 filename or :class:`RarInfo` instance
782 optional destination path
784 optional password to use
786 if isinstance(member
, RarInfo
):
787 fname
= member
.filename
790 self
._extract
([fname
], path
, pwd
)
792 def extractall(self
, path
=None, members
=None, pwd
=None):
793 """Extract all files into current directory.
798 optional destination path
800 optional filename or :class:`RarInfo` instance list to extract
802 optional password to use
805 if members
is not None:
807 if isinstance(m
, RarInfo
):
808 fnlist
.append(m
.filename
)
811 self
._extract
(fnlist
, path
, pwd
)
814 """Let 'unrar' test the archive.
816 cmd
= [UNRAR_TOOL
] + list(TEST_ARGS
)
817 add_password_arg(cmd
, self
._password
)
819 with
XTempFile(self
._rarfile
) as rarfile
:
821 p
= custom_popen(cmd
)
822 output
= p
.communicate()[0]
823 check_returncode(p
, output
)
826 """Return error string if parsing failed,
827 or None if no problems.
829 if not self
._file
_parser
:
830 return "Not a RAR file"
831 return self
._file
_parser
.strerror()
838 ver
= _get_rar_version(self
._rarfile
)
840 p3
= RAR3Parser(self
._rarfile
, self
._password
, self
._crc
_check
,
841 self
._charset
, self
._strict
, self
._info
_callback
)
842 self
._file
_parser
= p3
# noqa
844 p5
= RAR5Parser(self
._rarfile
, self
._password
, self
._crc
_check
,
845 self
._charset
, self
._strict
, self
._info
_callback
)
846 self
._file
_parser
= p5
# noqa
848 raise BadRarFile("Not a RAR file")
850 self
._file
_parser
.parse()
851 self
.comment
= self
._file
_parser
.comment
853 # call unrar to extract a file
854 def _extract(self
, fnlist
, path
=None, psw
=None):
855 cmd
= [UNRAR_TOOL
] + list(EXTRACT_ARGS
)
858 psw
= psw
or self
._password
859 add_password_arg(cmd
, psw
)
863 with
XTempFile(self
._rarfile
) as rarfn
:
868 if os
.sep
!= PATH_SEP
:
869 fn
= fn
.replace(PATH_SEP
, os
.sep
)
874 cmd
.append(path
+ os
.sep
)
877 p
= custom_popen(cmd
)
878 output
= p
.communicate()[0]
879 check_returncode(p
, output
)
882 # File format parsing
885 class CommonParser(object):
886 """Shared parser parts."""
889 _needs_password
= False
896 def __init__(self
, rarfile
, password
, crc_check
, charset
, strict
, info_cb
):
897 self
._rarfile
= rarfile
898 self
._password
= password
899 self
._crc
_check
= crc_check
900 self
._charset
= charset
901 self
._strict
= strict
902 self
._info
_callback
= info_cb
907 def has_header_encryption(self
):
908 """Returns True if headers are encrypted
910 if self
._hdrenc
_main
:
913 if self
._main
.flags
& RAR_MAIN_PASSWORD
:
917 def setpassword(self
, psw
):
918 """Set cached password."""
921 def volumelist(self
):
923 return self
._vol
_list
925 def needs_password(self
):
926 """Is password required"""
927 return self
._needs
_password
931 return self
._parse
_error
934 """List of RarInfo records.
936 return self
._info
_list
938 def getinfo(self
, fname
):
939 """Return RarInfo for filename
941 # accept both ways here
943 fname2
= fname
.replace("\\", "/")
945 fname2
= fname
.replace("/", "\\")
948 return self
._info
_map
[fname
]
951 return self
._info
_map
[fname2
]
953 raise NoRarEntry("No such file: %s" % fname
)
966 def _parse_real(self
):
967 fd
= XFile(self
._rarfile
)
969 sig
= fd
.read(len(self
._expect
_sig
))
970 if sig
!= self
._expect
_sig
:
971 if isinstance(self
._rarfile
, (str, unicode)):
972 raise NotRarFile("Not a Rar archive: {}".format(self
._rarfile
))
973 raise NotRarFile("Not a Rar archive")
975 volume
= 0 # first vol (.rar) is 0
978 volfile
= self
._rarfile
979 self
._vol
_list
= [self
._rarfile
]
982 h
= None # don't read past ENDARC
984 h
= self
._parse
_header
(fd
)
990 volfile
= self
._next
_volname
(volfile
)
993 self
._set
_error
("Cannot open next volume: %s", volfile
)
996 sig
= fd
.read(len(self
._expect
_sig
))
997 if sig
!= self
._expect
_sig
:
998 self
._set
_error
("Invalid volume sig: %s", volfile
)
1002 self
._vol
_list
.append(volfile
)
1006 h
.volume_file
= volfile
1008 if h
.type == RAR_BLOCK_MAIN
and not self
._main
:
1010 if h
.flags
& RAR_MAIN_NEWNUMBERING
:
1011 # RAR 2.x does not set FIRSTVOLUME,
1012 # so check it only if NEWNUMBERING is used
1013 if (h
.flags
& RAR_MAIN_FIRSTVOLUME
) == 0:
1014 raise NeedFirstVolume("Need to start from first volume")
1015 if h
.flags
& RAR_MAIN_PASSWORD
:
1016 self
._needs
_password
= True
1017 if not self
._password
:
1019 elif h
.type == RAR_BLOCK_ENDARC
:
1020 more_vols
= (h
.flags
& RAR_ENDARC_NEXT_VOLUME
) > 0
1022 elif h
.type == RAR_BLOCK_FILE
:
1023 # RAR 2.x does not write RAR_BLOCK_ENDARC
1024 if h
.flags
& RAR_FILE_SPLIT_AFTER
:
1026 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
1027 if volume
== 0 and h
.flags
& RAR_FILE_SPLIT_BEFORE
:
1028 raise NeedFirstVolume("Need to start from first volume")
1030 if h
.needs_password():
1031 self
._needs
_password
= True
1034 self
.process_entry(fd
, h
)
1036 if self
._info
_callback
:
1037 self
._info
_callback
(h
)
1041 fd
.seek(h
.data_offset
+ h
.add_size
, 0)
1043 def process_entry(self
, fd
, item
):
1044 """Examine item, add into lookup cache."""
1045 raise NotImplementedError()
1047 def _decrypt_header(self
, fd
):
1048 raise NotImplementedError('_decrypt_header')
1050 def _parse_block_header(self
, fd
):
1051 raise NotImplementedError('_parse_block_header')
1053 def _open_hack(self
, inf
, psw
):
1054 raise NotImplementedError('_open_hack')
1056 # read single header
1057 def _parse_header(self
, fd
):
1059 # handle encrypted headers
1060 if (self
._main
and self
._main
.flags
& RAR_MAIN_PASSWORD
) or self
._hdrenc
_main
:
1061 if not self
._password
:
1063 fd
= self
._decrypt
_header
(fd
)
1065 # now read actual header
1066 return self
._parse
_block
_header
(fd
)
1067 except struct
.error
:
1068 self
._set
_error
('Broken header in RAR file')
1071 # given current vol name, construct next one
1072 def _next_volname(self
, volfile
):
1073 if is_filelike(volfile
):
1074 raise IOError("Working on single FD")
1075 if self
._main
.flags
& RAR_MAIN_NEWNUMBERING
:
1076 return _next_newvol(volfile
)
1077 return _next_oldvol(volfile
)
1079 def _set_error(self
, msg
, *args
):
1082 self
._parse
_error
= msg
1084 raise BadRarFile(msg
)
1086 def open(self
, inf
, psw
):
1087 """Return stream object for file data."""
1090 # cannot leave to unrar as it expects copied file to exist
1091 if inf
.file_redir
[0] in (RAR5_XREDIR_FILE_COPY
, RAR5_XREDIR_HARD_LINK
):
1092 inf
= self
.getinfo(inf
.file_redir
[2])
1094 raise BadRarFile('cannot find copied file')
1096 if inf
.flags
& RAR_FILE_SPLIT_BEFORE
:
1097 raise NeedFirstVolume("Partial file, please start from first volume: " + inf
.filename
)
1099 # is temp write usable?
1103 elif self
._main
._must
_disable
_hack
():
1105 elif inf
._must
_disable
_hack
():
1107 elif is_filelike(self
._rarfile
):
1109 elif inf
.file_size
> HACK_SIZE_LIMIT
:
1111 elif not USE_EXTRACT_HACK
:
1115 if inf
.compress_type
== RAR_M0
and (inf
.flags
& RAR_FILE_PASSWORD
) == 0 and inf
.file_redir
is None:
1116 return self
._open
_clear
(inf
)
1118 return self
._open
_hack
(inf
, psw
)
1119 elif is_filelike(self
._rarfile
):
1120 return self
._open
_unrar
_membuf
(self
._rarfile
, inf
, psw
)
1122 return self
._open
_unrar
(self
._rarfile
, inf
, psw
)
1124 def _open_clear(self
, inf
):
1125 return DirectReader(self
, inf
)
1127 def _open_hack_core(self
, inf
, psw
, prefix
, suffix
):
1129 size
= inf
.compress_size
+ inf
.header_size
1130 rf
= XFile(inf
.volume_file
, 0)
1131 rf
.seek(inf
.header_offset
)
1133 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
1134 tmpf
= os
.fdopen(tmpfd
, "wb")
1140 buf
= rf
.read(BSIZE
)
1144 raise BadRarFile('read failed: ' + inf
.filename
)
1156 return self
._open
_unrar
(tmpname
, inf
, psw
, tmpname
)
1158 # write in-memory archive to temp file - needed for solid archives
1159 def _open_unrar_membuf(self
, memfile
, inf
, psw
):
1160 tmpname
= membuf_tempfile(memfile
)
1161 return self
._open
_unrar
(tmpname
, inf
, psw
, tmpname
, force_file
=True)
1163 # extract using unrar
1164 def _open_unrar(self
, rarfile
, inf
, psw
=None, tmpfile
=None, force_file
=False):
1165 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
1166 add_password_arg(cmd
, psw
)
1170 # not giving filename avoids encoding related problems
1171 if not tmpfile
or force_file
:
1173 if PATH_SEP
!= os
.sep
:
1174 fn
= fn
.replace(PATH_SEP
, os
.sep
)
1177 # read from unrar pipe
1178 return PipeReader(self
, inf
, cmd
, tmpfile
)
1184 class Rar3Info(RarInfo
):
1185 """RAR3 specific fields."""
1186 extract_version
= 15
1191 header_offset
= None
1196 # make sure some rar5 fields are always present
1198 blake2sp_hash
= None
1200 def _must_disable_hack(self
):
1201 if self
.type == RAR_BLOCK_FILE
:
1202 if self
.flags
& RAR_FILE_PASSWORD
:
1204 elif self
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
1206 elif self
.type == RAR_BLOCK_MAIN
:
1207 if self
.flags
& (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD
):
1212 class RAR3Parser(CommonParser
):
1213 """Parse RAR3 file format.
1215 _expect_sig
= RAR_ID
1216 _last_aes_key
= (None, None, None) # (salt, key, iv)
1218 def _decrypt_header(self
, fd
):
1219 if not _have_crypto
:
1220 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1222 if self
._last
_aes
_key
[0] == salt
:
1223 key
, iv
= self
._last
_aes
_key
[1:]
1225 key
, iv
= rar3_s2k(self
._password
, salt
)
1226 self
._last
_aes
_key
= (salt
, key
, iv
)
1227 return HeaderDecrypt(fd
, key
, iv
)
1230 def _parse_block_header(self
, fd
):
1232 h
.header_offset
= fd
.tell()
1234 # read and parse base header
1235 buf
= fd
.read(S_BLK_HDR
.size
)
1238 t
= S_BLK_HDR
.unpack_from(buf
)
1239 h
.header_crc
, h
.type, h
.flags
, h
.header_size
= t
1242 if h
.header_size
> S_BLK_HDR
.size
:
1243 hdata
= buf
+ fd
.read(h
.header_size
- S_BLK_HDR
.size
)
1246 h
.data_offset
= fd
.tell()
1249 if len(hdata
) != h
.header_size
:
1250 self
._set
_error
('Unexpected EOF when reading header')
1253 pos
= S_BLK_HDR
.size
1255 # block has data assiciated with it?
1256 if h
.flags
& RAR_LONG_BLOCK
:
1257 h
.add_size
, pos
= load_le32(hdata
, pos
)
1261 # parse interesting ones, decide header boundaries for crc
1262 if h
.type == RAR_BLOCK_MARK
:
1264 elif h
.type == RAR_BLOCK_MAIN
:
1266 if h
.flags
& RAR_MAIN_ENCRYPTVER
:
1269 if h
.flags
& RAR_MAIN_COMMENT
:
1270 self
._parse
_subblocks
(h
, hdata
, pos
)
1271 elif h
.type == RAR_BLOCK_FILE
:
1272 pos
= self
._parse
_file
_header
(h
, hdata
, pos
- 4)
1274 if h
.flags
& RAR_FILE_COMMENT
:
1275 pos
= self
._parse
_subblocks
(h
, hdata
, pos
)
1276 elif h
.type == RAR_BLOCK_SUB
:
1277 pos
= self
._parse
_file
_header
(h
, hdata
, pos
- 4)
1278 crc_pos
= h
.header_size
1279 elif h
.type == RAR_BLOCK_OLD_AUTH
:
1282 elif h
.type == RAR_BLOCK_OLD_EXTRA
:
1286 crc_pos
= h
.header_size
1289 if h
.type == RAR_BLOCK_OLD_SUB
:
1290 crcdat
= hdata
[2:] + fd
.read(h
.add_size
)
1292 crcdat
= hdata
[2:crc_pos
]
1294 calc_crc
= rar_crc32(crcdat
) & 0xFFFF
1296 # return good header
1297 if h
.header_crc
== calc_crc
:
1300 # header parsing failed.
1301 self
._set
_error
('Header CRC error (%02x): exp=%x got=%x (xlen = %d)',
1302 h
.type, h
.header_crc
, calc_crc
, len(crcdat
))
1304 # instead panicing, send eof
1307 # read file-specific header
1308 def _parse_file_header(self
, h
, hdata
, pos
):
1309 fld
= S_FILE_HDR
.unpack_from(hdata
, pos
)
1310 pos
+= S_FILE_HDR
.size
1312 h
.compress_size
= fld
[0]
1313 h
.file_size
= fld
[1]
1316 h
.date_time
= parse_dos_time(fld
[4])
1317 h
.mtime
= to_datetime(h
.date_time
)
1318 h
.extract_version
= fld
[5]
1319 h
.compress_type
= fld
[6]
1323 h
._md
_class
= CRC32Context
1324 h
._md
_expect
= h
.CRC
1326 if h
.flags
& RAR_FILE_LARGE
:
1327 h1
, pos
= load_le32(hdata
, pos
)
1328 h2
, pos
= load_le32(hdata
, pos
)
1329 h
.compress_size |
= h1
<< 32
1330 h
.file_size |
= h2
<< 32
1331 h
.add_size
= h
.compress_size
1333 name
, pos
= load_bytes(hdata
, name_size
, pos
)
1334 if h
.flags
& RAR_FILE_UNICODE
:
1335 nul
= name
.find(ZERO
)
1336 h
.orig_filename
= name
[:nul
]
1337 u
= UnicodeFilename(h
.orig_filename
, name
[nul
+ 1:])
1338 h
.filename
= u
.decode()
1340 # if parsing failed fall back to simple name
1342 h
.filename
= self
._decode
(h
.orig_filename
)
1344 h
.orig_filename
= name
1345 h
.filename
= self
._decode
(name
)
1347 # change separator, if requested
1348 if PATH_SEP
!= '\\':
1349 h
.filename
= h
.filename
.replace('\\', PATH_SEP
)
1351 if h
.flags
& RAR_FILE_SALT
:
1352 h
.salt
, pos
= load_bytes(hdata
, 8, pos
)
1356 # optional extended time stamps
1357 if h
.flags
& RAR_FILE_EXTTIME
:
1358 pos
= _parse_ext_time(h
, hdata
, pos
)
1360 h
.mtime
= h
.atime
= h
.ctime
= h
.arctime
= None
1364 # find old-style comment subblock
1365 def _parse_subblocks(self
, h
, hdata
, pos
):
1366 while pos
< len(hdata
):
1367 # ordinary block header
1368 t
= S_BLK_HDR
.unpack_from(hdata
, pos
)
1369 ___scrc
, stype
, sflags
, slen
= t
1370 pos_next
= pos
+ slen
1371 pos
+= S_BLK_HDR
.size
1377 # followed by block-specific header
1378 if stype
== RAR_BLOCK_OLD_COMMENT
and pos
+ S_COMMENT_HDR
.size
<= pos_next
:
1379 declen
, ver
, meth
, crc
= S_COMMENT_HDR
.unpack_from(hdata
, pos
)
1380 pos
+= S_COMMENT_HDR
.size
1381 data
= hdata
[pos
: pos_next
]
1382 cmt
= rar3_decompress(ver
, meth
, data
, declen
, sflags
,
1383 crc
, self
._password
)
1384 if not self
._crc
_check
:
1385 h
.comment
= self
._decode
_comment
(cmt
)
1386 elif rar_crc32(cmt
) & 0xFFFF == crc
:
1387 h
.comment
= self
._decode
_comment
(cmt
)
1392 def _read_comment_v3(self
, inf
, psw
=None):
1395 with
XFile(inf
.volume_file
) as rf
:
1396 rf
.seek(inf
.data_offset
)
1397 data
= rf
.read(inf
.compress_size
)
1400 cmt
= rar3_decompress(inf
.extract_version
, inf
.compress_type
, data
,
1401 inf
.file_size
, inf
.flags
, inf
.CRC
, psw
, inf
.salt
)
1405 crc
= rar_crc32(cmt
)
1409 return self
._decode
_comment
(cmt
)
1411 def _decode(self
, val
):
1412 for c
in TRY_ENCODINGS
:
1414 return val
.decode(c
)
1415 except UnicodeError:
1417 return val
.decode(self
._charset
, 'replace')
1419 def _decode_comment(self
, val
):
1420 return self
._decode
(val
)
1422 def process_entry(self
, fd
, item
):
1423 if item
.type == RAR_BLOCK_FILE
:
1424 # use only first part
1425 if (item
.flags
& RAR_FILE_SPLIT_BEFORE
) == 0:
1426 self
._info
_map
[item
.filename
] = item
1427 self
._info
_list
.append(item
)
1428 elif len(self
._info
_list
) > 0:
1429 # final crc is in last block
1430 old
= self
._info
_list
[-1]
1432 old
._md
_expect
= item
._md
_expect
1433 old
.compress_size
+= item
.compress_size
1435 # parse new-style comment
1436 if item
.type == RAR_BLOCK_SUB
and item
.filename
== 'CMT':
1437 if item
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
1439 elif item
.flags
& RAR_FILE_SOLID
:
1441 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
1442 if len(self
._info
_list
) > 0:
1443 old
= self
._info
_list
[-1]
1447 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
1450 if item
.type == RAR_BLOCK_MAIN
:
1451 if item
.flags
& RAR_MAIN_COMMENT
:
1452 self
.comment
= item
.comment
1453 if item
.flags
& RAR_MAIN_PASSWORD
:
1454 self
._needs
_password
= True
1456 # put file compressed data into temporary .rar archive, and run
1457 # unrar on that, thus avoiding unrar going over whole archive
1458 def _open_hack(self
, inf
, psw
):
1459 # create main header: crc, type, flags, size, res1, res2
1460 prefix
= RAR_ID
+ S_BLK_HDR
.pack(0x90CF, 0x73, 0, 13) + ZERO
* (2 + 4)
1461 return self
._open
_hack
_core
(inf
, psw
, prefix
, EMPTY
)
1467 class Rar5Info(RarInfo
):
1468 """Shared fields for RAR5 records.
1470 extract_version
= 50
1473 header_offset
= None
1480 block_extra_size
= 0
1483 volume_number
= None
1487 def _must_disable_hack(self
):
1491 class Rar5BaseFile(Rar5Info
):
1492 """Shared sturct for file & service record.
1496 file_encryption
= (0, 0, 0, EMPTY
, EMPTY
, EMPTY
)
1497 file_compress_flags
= None
1501 blake2sp_hash
= None
1503 def _must_disable_hack(self
):
1504 if self
.flags
& RAR_FILE_PASSWORD
:
1506 if self
.block_flags
& (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER
):
1508 if self
.file_compress_flags
& RAR5_COMPR_SOLID
:
1515 class Rar5FileInfo(Rar5BaseFile
):
1516 """RAR5 file record.
1518 type = RAR_BLOCK_FILE
1521 class Rar5ServiceInfo(Rar5BaseFile
):
1522 """RAR5 service record.
1524 type = RAR_BLOCK_SUB
1527 class Rar5MainInfo(Rar5Info
):
1528 """RAR5 archive main record.
1530 type = RAR_BLOCK_MAIN
1532 main_volume_number
= None
1534 def _must_disable_hack(self
):
1535 if self
.main_flags
& RAR5_MAIN_FLAG_SOLID
:
1540 class Rar5EncryptionInfo(Rar5Info
):
1541 """RAR5 archive header encryption record.
1543 type = RAR5_BLOCK_ENCRYPTION
1544 encryption_algo
= None
1545 encryption_flags
= None
1546 encryption_kdf_count
= None
1547 encryption_salt
= None
1548 encryption_check_value
= None
1550 def needs_password(self
):
1554 class Rar5EndArcInfo(Rar5Info
):
1555 """RAR5 end of archive record.
1557 type = RAR_BLOCK_ENDARC
1561 class RAR5Parser(CommonParser
):
1562 """Parse RAR5 format.
1564 _expect_sig
= RAR5_ID
1567 # AES encrypted headers
1568 _last_aes256_key
= (-1, None, None) # (kdf_count, salt, key)
1570 def _gen_key(self
, kdf_count
, salt
):
1571 if self
._last
_aes
256_key
[:2] == (kdf_count
, salt
):
1572 return self
._last
_aes
256_key
[2]
1574 raise BadRarFile('Too large kdf_count')
1575 psw
= self
._password
1576 if isinstance(psw
, unicode):
1577 psw
= psw
.encode('utf8')
1578 key
= pbkdf2_sha256(psw
, salt
, 1 << kdf_count
)
1579 self
._last
_aes
256_key
= (kdf_count
, salt
, key
)
1582 def _decrypt_header(self
, fd
):
1583 if not _have_crypto
:
1584 raise NoCrypto('Cannot parse encrypted headers - no crypto')
1585 h
= self
._hdrenc
_main
1586 key
= self
._gen
_key
(h
.encryption_kdf_count
, h
.encryption_salt
)
1588 return HeaderDecrypt(fd
, key
, iv
)
1591 def _parse_block_header(self
, fd
):
1592 header_offset
= fd
.tell()
1595 start_bytes
= fd
.read(preload
)
1596 header_crc
, pos
= load_le32(start_bytes
, 0)
1597 hdrlen
, pos
= load_vint(start_bytes
, pos
)
1598 if hdrlen
> 2 * 1024 * 1024:
1600 header_size
= pos
+ hdrlen
1602 # read full header, check for EOF
1603 hdata
= start_bytes
+ fd
.read(header_size
- len(start_bytes
))
1604 if len(hdata
) != header_size
:
1605 self
._set
_error
('Unexpected EOF when reading header')
1607 data_offset
= fd
.tell()
1609 calc_crc
= rar_crc32(memoryview(hdata
)[4:])
1610 if header_crc
!= calc_crc
:
1611 # header parsing failed.
1612 self
._set
_error
('Header CRC error: exp=%x got=%x (xlen = %d)',
1613 header_crc
, calc_crc
, len(hdata
))
1616 block_type
, pos
= load_vint(hdata
, pos
)
1618 if block_type
== RAR5_BLOCK_MAIN
:
1619 h
, pos
= self
._parse
_block
_common
(Rar5MainInfo(), hdata
)
1620 h
= self
._parse
_main
_block
(h
, hdata
, pos
)
1621 elif block_type
== RAR5_BLOCK_FILE
:
1622 h
, pos
= self
._parse
_block
_common
(Rar5FileInfo(), hdata
)
1623 h
= self
._parse
_file
_block
(h
, hdata
, pos
)
1624 elif block_type
== RAR5_BLOCK_SERVICE
:
1625 h
, pos
= self
._parse
_block
_common
(Rar5ServiceInfo(), hdata
)
1626 h
= self
._parse
_file
_block
(h
, hdata
, pos
)
1627 elif block_type
== RAR5_BLOCK_ENCRYPTION
:
1628 h
, pos
= self
._parse
_block
_common
(Rar5EncryptionInfo(), hdata
)
1629 h
= self
._parse
_encryption
_block
(h
, hdata
, pos
)
1630 elif block_type
== RAR5_BLOCK_ENDARC
:
1631 h
, pos
= self
._parse
_block
_common
(Rar5EndArcInfo(), hdata
)
1632 h
= self
._parse
_endarc
_block
(h
, hdata
, pos
)
1636 h
.header_offset
= header_offset
1637 h
.data_offset
= data_offset
1640 def _parse_block_common(self
, h
, hdata
):
1641 h
.header_crc
, pos
= load_le32(hdata
, 0)
1642 hdrlen
, pos
= load_vint(hdata
, pos
)
1643 h
.header_size
= hdrlen
+ pos
1644 h
.block_type
, pos
= load_vint(hdata
, pos
)
1645 h
.block_flags
, pos
= load_vint(hdata
, pos
)
1647 if h
.block_flags
& RAR5_BLOCK_FLAG_EXTRA_DATA
:
1648 h
.block_extra_size
, pos
= load_vint(hdata
, pos
)
1649 if h
.block_flags
& RAR5_BLOCK_FLAG_DATA_AREA
:
1650 h
.add_size
, pos
= load_vint(hdata
, pos
)
1652 h
.compress_size
= h
.add_size
1654 if h
.block_flags
& RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN
:
1655 h
.flags |
= RAR_SKIP_IF_UNKNOWN
1656 if h
.block_flags
& RAR5_BLOCK_FLAG_DATA_AREA
:
1657 h
.flags |
= RAR_LONG_BLOCK
1660 def _parse_main_block(self
, h
, hdata
, pos
):
1661 h
.main_flags
, pos
= load_vint(hdata
, pos
)
1662 if h
.main_flags
& RAR5_MAIN_FLAG_HAS_VOLNR
:
1663 h
.main_volume_number
= load_vint(hdata
, pos
)
1665 h
.flags |
= RAR_MAIN_NEWNUMBERING
1666 if h
.main_flags
& RAR5_MAIN_FLAG_SOLID
:
1667 h
.flags |
= RAR_MAIN_SOLID
1668 if h
.main_flags
& RAR5_MAIN_FLAG_ISVOL
:
1669 h
.flags |
= RAR_MAIN_VOLUME
1670 if h
.main_flags
& RAR5_MAIN_FLAG_RECOVERY
:
1671 h
.flags |
= RAR_MAIN_RECOVERY
1672 if self
._hdrenc
_main
:
1673 h
.flags |
= RAR_MAIN_PASSWORD
1674 if h
.main_flags
& RAR5_MAIN_FLAG_HAS_VOLNR
== 0:
1675 h
.flags |
= RAR_MAIN_FIRSTVOLUME
1679 def _parse_file_block(self
, h
, hdata
, pos
):
1680 h
.file_flags
, pos
= load_vint(hdata
, pos
)
1681 h
.file_size
, pos
= load_vint(hdata
, pos
)
1682 h
.mode
, pos
= load_vint(hdata
, pos
)
1684 if h
.file_flags
& RAR5_FILE_FLAG_HAS_MTIME
:
1685 h
.mtime
, pos
= load_unixtime(hdata
, pos
)
1686 h
.date_time
= h
.mtime
.timetuple()[:6]
1687 if h
.file_flags
& RAR5_FILE_FLAG_HAS_CRC32
:
1688 h
.CRC
, pos
= load_le32(hdata
, pos
)
1689 h
._md
_class
= CRC32Context
1690 h
._md
_expect
= h
.CRC
1692 h
.file_compress_flags
, pos
= load_vint(hdata
, pos
)
1693 h
.file_host_os
, pos
= load_vint(hdata
, pos
)
1694 h
.orig_filename
, pos
= load_vstr(hdata
, pos
)
1695 h
.filename
= h
.orig_filename
.decode('utf8', 'replace')
1697 # use compatible values
1698 if h
.file_host_os
== RAR5_OS_WINDOWS
:
1699 h
.host_os
= RAR_OS_WIN32
1701 h
.host_os
= RAR_OS_UNIX
1702 h
.compress_type
= RAR_M0
+ ((h
.file_compress_flags
>> 7) & 7)
1704 if h
.block_extra_size
:
1705 # allow 1 byte of garbage
1706 while pos
< len(hdata
) - 1:
1707 xsize
, pos
= load_vint(hdata
, pos
)
1708 xdata
, pos
= load_bytes(hdata
, xsize
, pos
)
1709 self
._process
_file
_extra
(h
, xdata
)
1711 if h
.block_flags
& RAR5_BLOCK_FLAG_SPLIT_BEFORE
:
1712 h
.flags |
= RAR_FILE_SPLIT_BEFORE
1713 if h
.block_flags
& RAR5_BLOCK_FLAG_SPLIT_AFTER
:
1714 h
.flags |
= RAR_FILE_SPLIT_AFTER
1715 if h
.file_flags
& RAR5_FILE_FLAG_ISDIR
:
1716 h
.flags |
= RAR_FILE_DIRECTORY
1717 if h
.file_compress_flags
& RAR5_COMPR_SOLID
:
1718 h
.flags |
= RAR_FILE_SOLID
1722 def _parse_endarc_block(self
, h
, hdata
, pos
):
1723 h
.endarc_flags
, pos
= load_vint(hdata
, pos
)
1724 if h
.endarc_flags
& RAR5_ENDARC_FLAG_NEXT_VOL
:
1725 h
.flags |
= RAR_ENDARC_NEXT_VOLUME
1728 def _parse_encryption_block(self
, h
, hdata
, pos
):
1729 h
.encryption_algo
, pos
= load_vint(hdata
, pos
)
1730 h
.encryption_flags
, pos
= load_vint(hdata
, pos
)
1731 h
.encryption_kdf_count
, pos
= load_byte(hdata
, pos
)
1732 h
.encryption_salt
, pos
= load_bytes(hdata
, 16, pos
)
1733 if h
.encryption_flags
& RAR5_ENC_FLAG_HAS_CHECKVAL
:
1734 h
.encryption_check_value
= load_bytes(hdata
, 12, pos
)
1735 if h
.encryption_algo
!= RAR5_XENC_CIPHER_AES256
:
1736 raise BadRarFile('Unsupported header encryption cipher')
1737 self
._hdrenc
_main
= h
1741 def _process_file_extra(self
, h
, xdata
):
1742 xtype
, pos
= load_vint(xdata
, 0)
1743 if xtype
== RAR5_XFILE_TIME
:
1744 self
._parse
_file
_xtime
(h
, xdata
, pos
)
1745 elif xtype
== RAR5_XFILE_ENCRYPTION
:
1746 self
._parse
_file
_encryption
(h
, xdata
, pos
)
1747 elif xtype
== RAR5_XFILE_HASH
:
1748 self
._parse
_file
_hash
(h
, xdata
, pos
)
1749 elif xtype
== RAR5_XFILE_VERSION
:
1750 self
._parse
_file
_version
(h
, xdata
, pos
)
1751 elif xtype
== RAR5_XFILE_REDIR
:
1752 self
._parse
_file
_redir
(h
, xdata
, pos
)
1753 elif xtype
== RAR5_XFILE_OWNER
:
1754 self
._parse
_file
_owner
(h
, xdata
, pos
)
1755 elif xtype
== RAR5_XFILE_SERVICE
:
1760 # extra block for file time record
1761 def _parse_file_xtime(self
, h
, xdata
, pos
):
1762 tflags
, pos
= load_vint(xdata
, pos
)
1763 ldr
= load_windowstime
1764 if tflags
& RAR5_XTIME_UNIXTIME
:
1766 if tflags
& RAR5_XTIME_HAS_MTIME
:
1767 h
.mtime
, pos
= ldr(xdata
, pos
)
1768 h
.date_time
= h
.mtime
.timetuple()[:6]
1769 if tflags
& RAR5_XTIME_HAS_CTIME
:
1770 h
.ctime
, pos
= ldr(xdata
, pos
)
1771 if tflags
& RAR5_XTIME_HAS_ATIME
:
1772 h
.atime
, pos
= ldr(xdata
, pos
)
1774 # just remember encryption info
1775 def _parse_file_encryption(self
, h
, xdata
, pos
):
1776 algo
, pos
= load_vint(xdata
, pos
)
1777 flags
, pos
= load_vint(xdata
, pos
)
1778 kdf_count
, pos
= load_byte(xdata
, pos
)
1779 salt
, pos
= load_bytes(xdata
, 16, pos
)
1780 iv
, pos
= load_bytes(xdata
, 16, pos
)
1782 if flags
& RAR5_XENC_CHECKVAL
:
1783 checkval
, pos
= load_bytes(xdata
, 12, pos
)
1784 if flags
& RAR5_XENC_TWEAKED
:
1786 h
._md
_class
= NoHashContext
1788 h
.file_encryption
= (algo
, flags
, kdf_count
, salt
, iv
, checkval
)
1789 h
.flags |
= RAR_FILE_PASSWORD
1791 def _parse_file_hash(self
, h
, xdata
, pos
):
1792 hash_type
, pos
= load_vint(xdata
, pos
)
1793 if hash_type
== RAR5_XHASH_BLAKE2SP
:
1794 h
.blake2sp_hash
, pos
= load_bytes(xdata
, 32, pos
)
1795 if _have_blake2
and (h
.file_encryption
[1] & RAR5_XENC_TWEAKED
) == 0:
1796 h
._md
_class
= Blake2SP
1797 h
._md
_expect
= h
.blake2sp_hash
1799 def _parse_file_version(self
, h
, xdata
, pos
):
1800 flags
, pos
= load_vint(xdata
, pos
)
1801 version
, pos
= load_vint(xdata
, pos
)
1802 h
.file_version
= (flags
, version
)
1804 def _parse_file_redir(self
, h
, xdata
, pos
):
1805 redir_type
, pos
= load_vint(xdata
, pos
)
1806 redir_flags
, pos
= load_vint(xdata
, pos
)
1807 redir_name
, pos
= load_vstr(xdata
, pos
)
1808 redir_name
= redir_name
.decode('utf8', 'replace')
1809 h
.file_redir
= (redir_type
, redir_flags
, redir_name
)
1811 def _parse_file_owner(self
, h
, xdata
, pos
):
1812 user_name
= group_name
= user_id
= group_id
= None
1814 flags
, pos
= load_vint(xdata
, pos
)
1815 if flags
& RAR5_XOWNER_UNAME
:
1816 user_name
, pos
= load_vstr(xdata
, pos
)
1817 if flags
& RAR5_XOWNER_GNAME
:
1818 group_name
, pos
= load_vstr(xdata
, pos
)
1819 if flags
& RAR5_XOWNER_UID
:
1820 user_id
, pos
= load_vint(xdata
, pos
)
1821 if flags
& RAR5_XOWNER_GID
:
1822 group_id
, pos
= load_vint(xdata
, pos
)
1824 h
.file_owner
= (user_name
, group_name
, user_id
, group_id
)
1826 def process_entry(self
, fd
, item
):
1827 if item
.block_type
== RAR5_BLOCK_FILE
:
1828 # use only first part
1829 if (item
.block_flags
& RAR5_BLOCK_FLAG_SPLIT_BEFORE
) == 0:
1830 self
._info
_map
[item
.filename
] = item
1831 self
._info
_list
.append(item
)
1832 elif len(self
._info
_list
) > 0:
1833 # final crc is in last block
1834 old
= self
._info
_list
[-1]
1836 old
._md
_expect
= item
._md
_expect
1837 old
.blake2sp_hash
= item
.blake2sp_hash
1838 old
.compress_size
+= item
.compress_size
1839 elif item
.block_type
== RAR5_BLOCK_SERVICE
:
1840 if item
.filename
== 'CMT':
1841 self
._load
_comment
(fd
, item
)
1843 def _load_comment(self
, fd
, item
):
1844 if item
.block_flags
& (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER
):
1846 if item
.compress_type
!= RAR_M0
:
1849 if item
.flags
& RAR_FILE_PASSWORD
:
1850 algo
, ___flags
, kdf_count
, salt
, iv
, ___checkval
= item
.file_encryption
1851 if algo
!= RAR5_XENC_CIPHER_AES256
:
1853 key
= self
._gen
_key
(kdf_count
, salt
)
1854 f
= HeaderDecrypt(fd
, key
, iv
)
1855 cmt
= f
.read(item
.file_size
)
1858 with self
._open
_clear
(item
) as cmtstream
:
1859 cmt
= cmtstream
.read()
1861 # rar bug? - appends zero to comment
1862 cmt
= cmt
.split(ZERO
, 1)[0]
1863 self
.comment
= cmt
.decode('utf8')
1865 def _open_hack(self
, inf
, psw
):
1866 # len, type, blk_flags, flags
1867 main_hdr
= b
'\x03\x01\x00\x00'
1868 endarc_hdr
= b
'\x03\x05\x00\x00'
1869 main_hdr
= S_LONG
.pack(rar_crc32(main_hdr
)) + main_hdr
1870 endarc_hdr
= S_LONG
.pack(rar_crc32(endarc_hdr
)) + endarc_hdr
1871 return self
._open
_hack
_core
(inf
, psw
, RAR5_ID
+ main_hdr
, endarc_hdr
)
1877 class UnicodeFilename(object):
1878 """Handle RAR3 unicode filename decompression.
1880 def __init__(self
, name
, encdata
):
1881 self
.std_name
= bytearray(name
)
1882 self
.encdata
= bytearray(encdata
)
1883 self
.pos
= self
.encpos
= 0
1884 self
.buf
= bytearray()
1888 """Copy encoded byte."""
1890 c
= self
.encdata
[self
.encpos
]
1898 """Copy byte from 8-bit representation."""
1900 return self
.std_name
[self
.pos
]
1905 def put(self
, lo
, hi
):
1906 """Copy 16-bit value to result."""
1912 """Decompress compressed UTF16 value."""
1913 hi
= self
.enc_byte()
1915 while self
.encpos
< len(self
.encdata
):
1917 flags
= self
.enc_byte()
1920 t
= (flags
>> flagbits
) & 3
1922 self
.put(self
.enc_byte(), 0)
1924 self
.put(self
.enc_byte(), hi
)
1926 self
.put(self
.enc_byte(), self
.enc_byte())
1931 for _
in range((n
& 0x7f) + 2):
1932 lo
= (self
.std_byte() + c
) & 0xFF
1935 for _
in range(n
+ 2):
1936 self
.put(self
.std_byte(), 0)
1937 return self
.buf
.decode("utf-16le", "replace")
1940 class RarExtFile(RawIOBase
):
1941 """Base class for file-like object that :meth:`RarFile.open` returns.
1943 Provides public methods and common crc checking.
1946 - no short reads - .read() and .readinfo() read as much as requested.
1947 - no internal buffer, use io.BufferedReader for that.
1950 #: Filename of the archive entry
1953 def __init__(self
, parser
, inf
):
1954 super(RarExtFile
, self
).__init
__()
1956 # standard io.* properties
1957 self
.name
= inf
.filename
1960 self
._parser
= parser
1964 self
._returncode
= 0
1966 self
._md
_context
= None
1973 md_class
= self
._inf
._md
_class
or NoHashContext
1974 self
._md
_context
= md_class()
1976 self
._remain
= self
._inf
.file_size
1978 def read(self
, cnt
=None):
1979 """Read all or specified amount of data from archive entry."""
1982 if cnt
is None or cnt
< 0:
1984 elif cnt
> self
._remain
:
1990 data
= self
._read
(cnt
)
1992 self
._md
_context
.update(data
)
1993 self
._remain
-= len(data
)
1994 if len(data
) != cnt
:
1995 raise BadRarFile("Failed the read enough data")
1998 if not data
or self
._remain
== 0:
2004 """Check final CRC."""
2005 final
= self
._md
_context
.digest()
2006 exp
= self
._inf
._md
_expect
2011 if self
._returncode
:
2012 check_returncode(self
, '')
2013 if self
._remain
!= 0:
2014 raise BadRarFile("Failed the read enough data")
2016 raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % (
2017 self
._inf
.filename
, exp
, final
))
2019 def _read(self
, cnt
):
2020 """Actual read that gets sanitized cnt."""
2023 """Close open resources."""
2025 super(RarExtFile
, self
).close()
2032 """Hook delete to make sure tempfile is removed."""
2035 def readinto(self
, buf
):
2036 """Zero-copy read directly into buffer.
2040 raise NotImplementedError('readinto')
2043 """Return current reading position in uncompressed data."""
2044 return self
._inf
.file_size
- self
._remain
2046 def seek(self
, ofs
, whence
=0):
2049 On uncompressed files, the seeking works by actual
2050 seeks so it's fast. On compresses files its slow
2051 - forward seeking happends by reading ahead,
2052 backwards by re-opening and decompressing from the start.
2055 # disable crc check when seeking
2056 self
._md
_context
= NoHashContext()
2058 fsize
= self
._inf
.file_size
2059 cur_ofs
= self
.tell()
2061 if whence
== 0: # seek from beginning of file
2063 elif whence
== 1: # seek from current position
2064 new_ofs
= cur_ofs
+ ofs
2065 elif whence
== 2: # seek from end of file
2066 new_ofs
= fsize
+ ofs
2068 raise ValueError('Invalid value for whence')
2073 elif new_ofs
> fsize
:
2076 # do the actual seek
2077 if new_ofs
>= cur_ofs
:
2078 self
._skip
(new_ofs
- cur_ofs
)
2085 def _skip(self
, cnt
):
2086 """Read and discard data"""
2089 buf
= self
.read(8192)
2091 buf
= self
.read(cnt
)
2103 Writing is not supported."""
2109 Seeking is supported, although it's slow on compressed files.
2114 """Read all remaining data"""
2115 # avoid RawIOBase default impl
2119 class PipeReader(RarExtFile
):
2120 """Read data from pipe, handle tempfile cleanup."""
2122 def __init__(self
, rf
, inf
, cmd
, tempfile
=None):
2125 self
._tempfile
= tempfile
2126 super(PipeReader
, self
).__init
__(rf
, inf
)
2128 def _close_proc(self
):
2131 if self
._proc
.stdout
:
2132 self
._proc
.stdout
.close()
2133 if self
._proc
.stdin
:
2134 self
._proc
.stdin
.close()
2135 if self
._proc
.stderr
:
2136 self
._proc
.stderr
.close()
2138 self
._returncode
= self
._proc
.returncode
2142 super(PipeReader
, self
)._open
()
2147 # launch new process
2148 self
._returncode
= 0
2149 self
._proc
= custom_popen(self
._cmd
)
2150 self
._fd
= self
._proc
.stdout
2152 # avoid situation where unrar waits on stdin
2153 if self
._proc
.stdin
:
2154 self
._proc
.stdin
.close()
2156 def _read(self
, cnt
):
2157 """Read from pipe."""
2159 # normal read is usually enough
2160 data
= self
._fd
.read(cnt
)
2161 if len(data
) == cnt
or not data
:
2164 # short read, try looping
2168 data
= self
._fd
.read(cnt
)
2173 return EMPTY
.join(buf
)
2176 """Close open resources."""
2179 super(PipeReader
, self
).close()
2183 os
.unlink(self
._tempfile
)
2186 self
._tempfile
= None
2188 def readinto(self
, buf
):
2189 """Zero-copy read directly into buffer."""
2191 if cnt
> self
._remain
:
2193 vbuf
= memoryview(buf
)
2196 res
= self
._fd
.readinto(vbuf
[got
: cnt
])
2199 self
._md
_context
.update(vbuf
[got
: got
+ res
])
2205 class DirectReader(RarExtFile
):
2206 """Read uncompressed data directly from archive.
2213 super(DirectReader
, self
)._open
()
2215 self
._volfile
= self
._inf
.volume_file
2216 self
._fd
= XFile(self
._volfile
, 0)
2217 self
._fd
.seek(self
._inf
.header_offset
, 0)
2218 self
._cur
= self
._parser
._parse
_header
(self
._fd
)
2219 self
._cur
_avail
= self
._cur
.add_size
2221 def _skip(self
, cnt
):
2222 """RAR Seek, skipping through rar files to get to correct position
2227 if self
._cur
_avail
== 0:
2228 if not self
._open
_next
():
2231 # fd is in read pos, do the read
2232 if cnt
> self
._cur
_avail
:
2233 cnt
-= self
._cur
_avail
2234 self
._remain
-= self
._cur
_avail
2237 self
._fd
.seek(cnt
, 1)
2238 self
._cur
_avail
-= cnt
2242 def _read(self
, cnt
):
2243 """Read from potentially multi-volume archive."""
2248 if self
._cur
_avail
== 0:
2249 if not self
._open
_next
():
2252 # fd is in read pos, do the read
2253 if cnt
> self
._cur
_avail
:
2254 data
= self
._fd
.read(self
._cur
_avail
)
2256 data
= self
._fd
.read(cnt
)
2262 self
._cur
_avail
-= len(data
)
2267 return EMPTY
.join(buf
)
2269 def _open_next(self
):
2270 """Proceed to next volume."""
2272 # is the file split over archives?
2273 if (self
._cur
.flags
& RAR_FILE_SPLIT_AFTER
) == 0:
2281 self
._volfile
= self
._parser
._next
_volname
(self
._volfile
)
2282 fd
= open(self
._volfile
, "rb", 0)
2284 sig
= fd
.read(len(self
._parser
._expect
_sig
))
2285 if sig
!= self
._parser
._expect
_sig
:
2286 raise BadRarFile("Invalid signature")
2288 # loop until first file header
2290 cur
= self
._parser
._parse
_header
(fd
)
2292 raise BadRarFile("Unexpected EOF")
2293 if cur
.type in (RAR_BLOCK_MARK
, RAR_BLOCK_MAIN
):
2295 fd
.seek(cur
.add_size
, 1)
2297 if cur
.orig_filename
!= self
._inf
.orig_filename
:
2298 raise BadRarFile("Did not found file entry")
2300 self
._cur
_avail
= cur
.add_size
2303 def readinto(self
, buf
):
2304 """Zero-copy read directly into buffer."""
2306 vbuf
= memoryview(buf
)
2307 while got
< len(buf
):
2309 if self
._cur
_avail
== 0:
2310 if not self
._open
_next
():
2313 # length for next read
2314 cnt
= len(buf
) - got
2315 if cnt
> self
._cur
_avail
:
2316 cnt
= self
._cur
_avail
2318 # read into temp view
2319 res
= self
._fd
.readinto(vbuf
[got
: got
+ cnt
])
2322 self
._md
_context
.update(vbuf
[got
: got
+ res
])
2323 self
._cur
_avail
-= res
2329 class HeaderDecrypt(object):
2330 """File-like object that decrypts from another file"""
2331 def __init__(self
, f
, key
, iv
):
2333 self
.ciph
= AES_CBC_Decrypt(key
, iv
)
2337 """Current file pos - works only on block boundaries."""
2338 return self
.f
.tell()
2340 def read(self
, cnt
=None):
2341 """Read and decrypt."""
2343 raise BadRarFile('Bad count to header decrypt - wrong password?')
2346 if cnt
<= len(self
.buf
):
2347 res
= self
.buf
[:cnt
]
2348 self
.buf
= self
.buf
[cnt
:]
2357 enc
= self
.f
.read(blklen
)
2358 if len(enc
) < blklen
:
2360 dec
= self
.ciph
.decrypt(enc
)
2366 self
.buf
= dec
[cnt
:]
2372 # handle (filename|filelike) object
2373 class XFile(object):
2374 """Input may be filename or file object.
2376 __slots__
= ('_fd', '_need_close')
2378 def __init__(self
, xfile
, bufsize
=1024):
2379 if is_filelike(xfile
):
2380 self
._need
_close
= False
2384 self
._need
_close
= True
2385 self
._fd
= open(xfile
, 'rb', bufsize
)
2387 def read(self
, n
=None):
2388 """Read from file."""
2389 return self
._fd
.read(n
)
2392 """Return file pos."""
2393 return self
._fd
.tell()
2395 def seek(self
, ofs
, whence
=0):
2396 """Move file pos."""
2397 return self
._fd
.seek(ofs
, whence
)
2399 def readinto(self
, dst
):
2400 """Read into buffer."""
2401 return self
._fd
.readinto(dst
)
2404 """Close file object."""
2405 if self
._need
_close
:
2408 def __enter__(self
):
2411 def __exit__(self
, typ
, val
, tb
):
2415 class NoHashContext(object):
2416 """No-op hash function."""
2417 def __init__(self
, data
=None):
2419 def update(self
, data
):
2423 def hexdigest(self
):
2424 """Hexadecimal digest."""
2427 class CRC32Context(object):
2428 """Hash context that uses CRC32."""
2429 __slots__
= ['_crc']
2431 def __init__(self
, data
=None):
2436 def update(self
, data
):
2438 self
._crc
= rar_crc32(data
, self
._crc
)
2444 def hexdigest(self
):
2445 """Hexadecimal digest."""
2446 return '%08x' % self
.digest()
2449 class Blake2SP(object):
2450 """Blake2sp hash context.
2452 __slots__
= ['_thread', '_buf', '_cur', '_digest']
2457 def __init__(self
, data
=None):
2463 for i
in range(self
.parallelism
):
2464 ctx
= self
._blake
2s
(i
, 0, i
== (self
.parallelism
- 1))
2465 self
._thread
.append(ctx
)
2470 def _blake2s(self
, ofs
, depth
, is_last
):
2471 return blake2s(node_offset
=ofs
, node_depth
=depth
, last_node
=is_last
,
2472 depth
=2, inner_size
=32, fanout
=self
.parallelism
)
2474 def _add_block(self
, blk
):
2475 self
._thread
[self
._cur
].update(blk
)
2476 self
._cur
= (self
._cur
+ 1) % self
.parallelism
2478 def update(self
, data
):
2481 view
= memoryview(data
)
2482 bs
= self
.block_size
2484 need
= bs
- len(self
._buf
)
2485 if len(view
) < need
:
2486 self
._buf
+= view
.tobytes()
2488 self
._add
_block
(self
._buf
+ view
[:need
].tobytes())
2490 while len(view
) >= bs
:
2491 self
._add
_block
(view
[:bs
])
2493 self
._buf
= view
.tobytes()
2496 """Return final digest value.
2498 if self
._digest
is None:
2500 self
._add
_block
(self
._buf
)
2502 ctx
= self
._blake
2s
(0, 1, True)
2503 for t
in self
._thread
:
2504 ctx
.update(t
.digest())
2505 self
._digest
= ctx
.digest()
2508 def hexdigest(self
):
2509 """Hexadecimal digest."""
2510 return tohex(self
.digest())
2513 ## Utility functions
2516 S_LONG
= Struct('<L')
2517 S_SHORT
= Struct('<H')
2518 S_BYTE
= Struct('<B')
2520 S_BLK_HDR
= Struct('<HBHH')
2521 S_FILE_HDR
= Struct('<LLBLLBBHL')
2522 S_COMMENT_HDR
= Struct('<HBBH')
2524 def load_vint(buf
, pos
):
2525 """Load variable-size int."""
2526 limit
= min(pos
+ 11, len(buf
))
2529 b
= _byte_code(buf
[pos
])
2530 res
+= ((b
& 0x7F) << ofs
)
2535 raise BadRarFile('cannot load vint')
2537 def load_byte(buf
, pos
):
2538 """Load single byte"""
2541 raise BadRarFile('cannot load byte')
2542 return S_BYTE
.unpack_from(buf
, pos
)[0], end
2544 def load_le32(buf
, pos
):
2545 """Load little-endian 32-bit integer"""
2548 raise BadRarFile('cannot load le32')
2549 return S_LONG
.unpack_from(buf
, pos
)[0], pos
+ 4
2551 def load_bytes(buf
, num
, pos
):
2552 """Load sequence of bytes"""
2555 raise BadRarFile('cannot load bytes')
2556 return buf
[pos
: end
], end
2558 def load_vstr(buf
, pos
):
2559 """Load bytes prefixed by vint length"""
2560 slen
, pos
= load_vint(buf
, pos
)
2561 return load_bytes(buf
, slen
, pos
)
2563 def load_dostime(buf
, pos
):
2564 """Load LE32 dos timestamp"""
2565 stamp
, pos
= load_le32(buf
, pos
)
2566 tup
= parse_dos_time(stamp
)
2567 return to_datetime(tup
), pos
2569 def load_unixtime(buf
, pos
):
2570 """Load LE32 unix timestamp"""
2571 secs
, pos
= load_le32(buf
, pos
)
2572 dt
= datetime
.fromtimestamp(secs
, UTC
)
2575 def load_windowstime(buf
, pos
):
2576 """Load LE64 windows timestamp"""
2577 # unix epoch (1970) in seconds from windows epoch (1601)
2578 unix_epoch
= 11644473600
2579 val1
, pos
= load_le32(buf
, pos
)
2580 val2
, pos
= load_le32(buf
, pos
)
2581 secs
, n1secs
= divmod((val2
<< 32) | val1
, 10000000)
2582 dt
= datetime
.fromtimestamp(secs
- unix_epoch
, UTC
)
2583 dt
= dt
.replace(microsecond
=n1secs
// 10)
2586 # new-style next volume
2587 def _next_newvol(volfile
):
2588 i
= len(volfile
) - 1
2590 if volfile
[i
] >= '0' and volfile
[i
] <= '9':
2591 return _inc_volname(volfile
, i
)
2593 raise BadRarName("Cannot construct volume name: " + volfile
)
2595 # old-style next volume
2596 def _next_oldvol(volfile
):
2598 if volfile
[-4:].lower() == '.rar':
2599 return volfile
[:-2] + '00'
2600 return _inc_volname(volfile
, len(volfile
) - 1)
2602 # increase digits with carry, otherwise just increment char
2603 def _inc_volname(volfile
, i
):
2607 fn
[i
] = chr(ord(fn
[i
]) + 1)
2613 # rar3 extended time fields
2614 def _parse_ext_time(h
, data
, pos
):
2615 # flags and rest of data can be missing
2617 if pos
+ 2 <= len(data
):
2618 flags
= S_SHORT
.unpack_from(data
, pos
)[0]
2621 mtime
, pos
= _parse_xtime(flags
>> 3 * 4, data
, pos
, h
.mtime
)
2622 h
.ctime
, pos
= _parse_xtime(flags
>> 2 * 4, data
, pos
)
2623 h
.atime
, pos
= _parse_xtime(flags
>> 1 * 4, data
, pos
)
2624 h
.arctime
, pos
= _parse_xtime(flags
>> 0 * 4, data
, pos
)
2627 h
.date_time
= mtime
.timetuple()[:6]
2630 # rar3 one extended time field
2631 def _parse_xtime(flag
, data
, pos
, basetime
=None):
2635 basetime
, pos
= load_dostime(data
, pos
)
2637 # load second fractions
2640 for _
in range(cnt
):
2641 b
, pos
= load_byte(data
, pos
)
2642 rem
= (b
<< 16) |
(rem
>> 8)
2644 # convert 100ns units to microseconds
2649 # dostime has room for 30 seconds only, correct if needed
2650 if flag
& 4 and basetime
.second
< 59:
2651 res
= basetime
.replace(microsecond
=usec
, second
=basetime
.second
+ 1)
2653 res
= basetime
.replace(microsecond
=usec
)
2656 def is_filelike(obj
):
2657 """Filename or file object?
2659 if isinstance(obj
, str) or isinstance(obj
, unicode):
2662 for a
in ('read', 'tell', 'seek'):
2663 res
= res
and hasattr(obj
, a
)
2665 raise ValueError("Invalid object passed as file")
2668 def rar3_s2k(psw
, salt
):
2669 """String-to-key hash for RAR3.
2671 if not isinstance(psw
, unicode):
2672 psw
= psw
.decode('utf8')
2673 seed
= psw
.encode('utf-16le') + salt
2677 for j
in range(0x4000):
2678 cnt
= S_LONG
.pack(i
* 0x4000 + j
)
2679 h
.update(seed
+ cnt
[:3])
2681 iv
+= h
.digest()[19:20]
2682 key_be
= h
.digest()[:16]
2683 key_le
= pack("<LLLL", *unpack(">LLLL", key_be
))
2686 def rar3_decompress(vers
, meth
, data
, declen
=0, flags
=0, crc
=0, psw
=None, salt
=None):
2687 """Decompress blob of compressed data.
2689 Used for data with non-standard header - eg. comments.
2691 # already uncompressed?
2692 if meth
== RAR_M0
and (flags
& RAR_FILE_PASSWORD
) == 0:
2695 # take only necessary flags
2696 flags
= flags
& (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK
)
2697 flags |
= RAR_LONG_BLOCK
2703 fhdr
= S_FILE_HDR
.pack(len(data
), declen
, RAR_OS_MSDOS
, crc
,
2704 date
, vers
, meth
, len(fname
), mode
)
2706 if flags
& RAR_FILE_SALT
:
2712 hlen
= S_BLK_HDR
.size
+ len(fhdr
)
2713 hdr
= S_BLK_HDR
.pack(0, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
2714 hcrc
= rar_crc32(hdr
[2:]) & 0xFFFF
2715 hdr
= S_BLK_HDR
.pack(hcrc
, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
2717 # archive main header
2718 mh
= S_BLK_HDR
.pack(0x90CF, RAR_BLOCK_MAIN
, 0, 13) + ZERO
* (2 + 4)
2720 # decompress via temp rar
2721 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
2722 tmpf
= os
.fdopen(tmpfd
, "wb")
2724 tmpf
.write(RAR_ID
+ mh
+ hdr
+ data
)
2727 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
2728 add_password_arg(cmd
, psw
, (flags
& RAR_FILE_PASSWORD
))
2731 p
= custom_popen(cmd
)
2732 return p
.communicate()[0]
2738 """Convert 6-part time tuple into datetime object.
2744 year
, mon
, day
, h
, m
, s
= t
2746 # assume the values are valid
2748 return datetime(year
, mon
, day
, h
, m
, s
)
2752 # sanitize invalid values
2753 mday
= (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
2768 if mon
== 2 and day
== 29:
2770 return datetime(year
, mon
, day
, h
, m
, s
)
2773 return datetime(year
, mon
, day
, h
, m
, s
)
2775 def parse_dos_time(stamp
):
2776 """Parse standard 32-bit DOS timestamp.
2778 sec
, stamp
= stamp
& 0x1F, stamp
>> 5
2779 mn
, stamp
= stamp
& 0x3F, stamp
>> 6
2780 hr
, stamp
= stamp
& 0x1F, stamp
>> 5
2781 day
, stamp
= stamp
& 0x1F, stamp
>> 5
2782 mon
, stamp
= stamp
& 0x0F, stamp
>> 4
2783 yr
= (stamp
& 0x7F) + 1980
2784 return (yr
, mon
, day
, hr
, mn
, sec
* 2)
2786 def custom_popen(cmd
):
2787 """Disconnect cmd from parent fds, read only from stdout.
2791 if sys
.platform
== 'win32':
2792 creationflags
= 0x08000000 # CREATE_NO_WINDOW
2796 p
= Popen(cmd
, bufsize
=0, stdout
=PIPE
, stdin
=PIPE
, stderr
=STDOUT
,
2797 creationflags
=creationflags
)
2798 except OSError as ex
:
2799 if ex
.errno
== errno
.ENOENT
:
2800 raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL
)
2804 def custom_check(cmd
, ignore_retcode
=False):
2805 """Run command, collect output, raise error if needed.
2807 p
= custom_popen(cmd
)
2808 out
, _
= p
.communicate()
2809 if p
.returncode
and not ignore_retcode
:
2810 raise RarExecError("Check-run failed")
2813 def add_password_arg(cmd
, psw
, ___required
=False):
2814 """Append password switch to commandline.
2816 if UNRAR_TOOL
== ALT_TOOL
:
2819 cmd
.append('-p' + psw
)
2823 def check_returncode(p
, out
):
2824 """Raise exception according to unrar exit code.
2830 # map return code to exception class, codes from rar.txt
2832 RarWarning
, RarFatalError
, RarCRCError
, RarLockedArchiveError
, # 1..4
2833 RarWriteError
, RarOpenError
, RarUserError
, RarMemoryError
, # 5..8
2834 RarCreateError
, RarNoFilesError
, RarWrongPassword
] # 9..11
2835 if UNRAR_TOOL
== ALT_TOOL
:
2837 if code
> 0 and code
< len(errmap
):
2844 exc
= RarUnknownError
2848 msg
= "%s [%d]: %s" % (exc
.__doc
__, p
.returncode
, out
)
2850 msg
= "%s [%d]" % (exc
.__doc
__, p
.returncode
)
2854 def hmac_sha256(key
, data
):
2856 return HMAC(key
, data
, sha256
).digest()
2858 def membuf_tempfile(memfile
):
2861 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
2862 tmpf
= os
.fdopen(tmpfd
, "wb")
2866 buf
= memfile
.read(BSIZE
)
2877 class XTempFile(object):
2878 __slots__
= ('_tmpfile', '_filename')
2880 def __init__(self
, rarfile
):
2881 if is_filelike(rarfile
):
2882 self
._tmpfile
= membuf_tempfile(rarfile
)
2883 self
._filename
= self
._tmpfile
2885 self
._tmpfile
= None
2886 self
._filename
= rarfile
2888 def __enter__(self
):
2889 return self
._filename
2891 def __exit__(self
, exc_type
, exc_value
, tb
):
2894 os
.unlink(self
._tmpfile
)
2897 self
._tmpfile
= None
2900 # Check if unrar works
2903 ORIG_UNRAR_TOOL
= UNRAR_TOOL
2904 ORIG_OPEN_ARGS
= OPEN_ARGS
2905 ORIG_EXTRACT_ARGS
= EXTRACT_ARGS
2906 ORIG_TEST_ARGS
= TEST_ARGS
2908 def _check_unrar_tool():
2909 global UNRAR_TOOL
, OPEN_ARGS
, EXTRACT_ARGS
, TEST_ARGS
2911 # does UNRAR_TOOL work?
2912 custom_check([ORIG_UNRAR_TOOL
], True)
2914 UNRAR_TOOL
= ORIG_UNRAR_TOOL
2915 OPEN_ARGS
= ORIG_OPEN_ARGS
2916 EXTRACT_ARGS
= ORIG_EXTRACT_ARGS
2917 TEST_ARGS
= ORIG_TEST_ARGS
2918 except RarCannotExec
:
2920 # does ALT_TOOL work?
2921 custom_check([ALT_TOOL
] + list(ALT_CHECK_ARGS
), True)
2923 UNRAR_TOOL
= ALT_TOOL
2924 OPEN_ARGS
= ALT_OPEN_ARGS
2925 EXTRACT_ARGS
= ALT_EXTRACT_ARGS
2926 TEST_ARGS
= ALT_TEST_ARGS
2927 except RarCannotExec
:
2928 # no usable tool, only uncompressed archives work