3 # Copyright (c) 2005-2013 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r
"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to 0 if you don't look at comments and want to
58 # avoid wasting time for parsing them
59 rarfile.NEED_COMMENTS = 1
61 # Set up to 1 if you don't want to deal with decoding comments
62 # from unknown encoding. rarfile will try couple of common
63 # encodings in sequence.
64 rarfile.UNICODE_COMMENTS = 0
66 # Set to 1 if you prefer timestamps to be datetime objects
68 rarfile.USE_DATETIME = 0
70 # Set to '/' to be more compatible with zipfile
71 rarfile.PATH_SEP = '\\'
73 For more details, refer to source.
79 # export only interesting items
80 __all__
= ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
83 ## Imports and compat - support both Python 2.x and 3.x
86 import sys
, os
, struct
, errno
87 from struct
import pack
, unpack
88 from binascii
import crc32
89 from tempfile
import mkstemp
90 from subprocess
import Popen
, PIPE
, STDOUT
91 from datetime
import datetime
93 # only needed for encryped headers
95 from Crypto
.Cipher
import AES
97 from hashlib
import sha1
99 from sha
import new
as sha1
105 if sys
.hexversion
< 0x3000000:
106 # prefer 3.x behaviour
108 # py2.6 has broken bytes()
112 # see if compat bytearray() is needed
118 def __init__(self
, val
= ''):
119 self
.arr
= array
.array('B', val
)
120 self
.append
= self
.arr
.append
121 self
.__getitem
__ = self
.arr
.__getitem
__
122 self
.__len
__ = self
.arr
.__len
__
123 def decode(self
, *args
):
124 return self
.arr
.tostring().decode(*args
)
126 # Optimized .readinto() requires memoryview
133 # Struct() for older python
135 from struct
import Struct
138 def __init__(self
, fmt
):
140 self
.size
= struct
.calcsize(fmt
)
141 def unpack(self
, buf
):
142 return unpack(self
.format
, buf
)
143 def unpack_from(self
, buf
, ofs
= 0):
144 return unpack(self
.format
, buf
[ofs
: ofs
+ self
.size
])
145 def pack(self
, *args
):
146 return pack(self
.format
, *args
)
148 # file object superclass
150 from io
import RawIOBase
152 class RawIOBase(object):
158 ## Module configuration. Can be tuned after importing.
161 #: default fallback charset
162 DEFAULT_CHARSET
= "windows-1252"
164 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
165 TRY_ENCODINGS
= ('utf8', 'utf-16le')
167 #: 'unrar', 'rar' or full path to either one
170 #: Command line args to use for opening file for reading.
171 OPEN_ARGS
= ('p', '-inul')
173 #: Command line args to use for extracting file to disk.
174 EXTRACT_ARGS
= ('x', '-y', '-idq')
176 #: args for testrar()
177 TEST_ARGS
= ('t', '-idq')
179 #: whether to speed up decompression by using tmp archive
182 #: limit the filesize for tmp archive usage
183 HACK_SIZE_LIMIT
= 20*1024*1024
185 #: whether to parse file/archive comments.
188 #: whether to convert comments to unicode strings
191 #: When RAR is corrupt, stopping on bad header is better
192 #: On unknown/misparsed RAR headers reporting is better
193 REPORT_BAD_HEADER
= 0
195 #: Convert RAR time tuple into datetime() object
198 #: Separator for path name components. RAR internally uses '\\'.
199 #: Use '/' to be similar with zipfile.
207 RAR_BLOCK_MARK
= 0x72 # r
208 RAR_BLOCK_MAIN
= 0x73 # s
209 RAR_BLOCK_FILE
= 0x74 # t
210 RAR_BLOCK_OLD_COMMENT
= 0x75 # u
211 RAR_BLOCK_OLD_EXTRA
= 0x76 # v
212 RAR_BLOCK_OLD_SUB
= 0x77 # w
213 RAR_BLOCK_OLD_RECOVERY
= 0x78 # x
214 RAR_BLOCK_OLD_AUTH
= 0x79 # y
215 RAR_BLOCK_SUB
= 0x7a # z
216 RAR_BLOCK_ENDARC
= 0x7b # {
218 # flags for RAR_BLOCK_MAIN
219 RAR_MAIN_VOLUME
= 0x0001
220 RAR_MAIN_COMMENT
= 0x0002
221 RAR_MAIN_LOCK
= 0x0004
222 RAR_MAIN_SOLID
= 0x0008
223 RAR_MAIN_NEWNUMBERING
= 0x0010
224 RAR_MAIN_AUTH
= 0x0020
225 RAR_MAIN_RECOVERY
= 0x0040
226 RAR_MAIN_PASSWORD
= 0x0080
227 RAR_MAIN_FIRSTVOLUME
= 0x0100
228 RAR_MAIN_ENCRYPTVER
= 0x0200
230 # flags for RAR_BLOCK_FILE
231 RAR_FILE_SPLIT_BEFORE
= 0x0001
232 RAR_FILE_SPLIT_AFTER
= 0x0002
233 RAR_FILE_PASSWORD
= 0x0004
234 RAR_FILE_COMMENT
= 0x0008
235 RAR_FILE_SOLID
= 0x0010
236 RAR_FILE_DICTMASK
= 0x00e0
237 RAR_FILE_DICT64
= 0x0000
238 RAR_FILE_DICT128
= 0x0020
239 RAR_FILE_DICT256
= 0x0040
240 RAR_FILE_DICT512
= 0x0060
241 RAR_FILE_DICT1024
= 0x0080
242 RAR_FILE_DICT2048
= 0x00a0
243 RAR_FILE_DICT4096
= 0x00c0
244 RAR_FILE_DIRECTORY
= 0x00e0
245 RAR_FILE_LARGE
= 0x0100
246 RAR_FILE_UNICODE
= 0x0200
247 RAR_FILE_SALT
= 0x0400
248 RAR_FILE_VERSION
= 0x0800
249 RAR_FILE_EXTTIME
= 0x1000
250 RAR_FILE_EXTFLAGS
= 0x2000
252 # flags for RAR_BLOCK_ENDARC
253 RAR_ENDARC_NEXT_VOLUME
= 0x0001
254 RAR_ENDARC_DATACRC
= 0x0002
255 RAR_ENDARC_REVSPACE
= 0x0004
256 RAR_ENDARC_VOLNR
= 0x0008
258 # flags common to all blocks
259 RAR_SKIP_IF_UNKNOWN
= 0x4000
260 RAR_LONG_BLOCK
= 0x8000
270 # Compression methods - '0'..'5'
279 ## internal constants
282 RAR_ID
= bytes("Rar!\x1a\x07\x00", 'ascii')
283 ZERO
= bytes("\0", 'ascii')
284 EMPTY
= bytes("", 'ascii')
286 S_BLK_HDR
= Struct('<HBHH')
287 S_FILE_HDR
= Struct('<LLBLLBBHL')
288 S_LONG
= Struct('<L')
289 S_SHORT
= Struct('<H')
290 S_BYTE
= Struct('<B')
291 S_COMMENT_HDR
= Struct('<HBBH')
297 class Error(Exception):
298 """Base class for rarfile errors."""
299 class BadRarFile(Error
):
300 """Incorrect data in archive."""
301 class NotRarFile(Error
):
302 """The file is not RAR archive."""
303 class BadRarName(Error
):
304 """Cannot guess multipart name components."""
305 class NoRarEntry(Error
):
306 """File not found in RAR"""
307 class PasswordRequired(Error
):
308 """File requires password"""
309 class NeedFirstVolume(Error
):
310 """Need to start from first volume."""
311 class NoCrypto(Error
):
312 """Cannot parse encrypted headers - no crypto available."""
313 class RarExecError(Error
):
314 """Problem reported by unrar/rar."""
315 class RarWarning(RarExecError
):
316 """Non-fatal error"""
317 class RarFatalError(RarExecError
):
319 class RarCRCError(RarExecError
):
320 """CRC error during unpacking"""
321 class RarLockedArchiveError(RarExecError
):
322 """Must not modify locked archive"""
323 class RarWriteError(RarExecError
):
325 class RarOpenError(RarExecError
):
327 class RarUserError(RarExecError
):
329 class RarMemoryError(RarExecError
):
331 class RarCreateError(RarExecError
):
333 class RarNoFilesError(RarExecError
):
334 """No files that match pattern were found"""
335 class RarUserBreak(RarExecError
):
337 class RarUnknownError(RarExecError
):
338 """Unknown exit code"""
339 class RarSignalExit(RarExecError
):
340 """Unrar exited with signal"""
344 '''Check quickly whether file is rar archive.'''
345 buf
= open(fn
, "rb").read(len(RAR_ID
))
349 class RarInfo(object):
350 r
'''An entry in rar archive.
352 :mod:`zipfile`-compatible fields:
355 File name with relative path.
356 Default path separator is '\\', to change set rarfile.PATH_SEP.
357 Always unicode string.
359 Modification time, tuple of (year, month, day, hour, minute, second).
360 Or datetime() object if USE_DATETIME is set.
366 CRC-32 of uncompressed file, unsigned int.
368 File comment. Byte string or None. Use UNICODE_COMMENTS
369 to get automatic decoding to unicode.
371 Volume nr, starting from 0.
376 Compression method: 0x30 - 0x35.
378 Minimal Rar version needed for decompressing.
380 Host OS type, one of RAR_OS_* constants.
382 File attributes. May be either dos-style or unix-style, depending on host_os.
384 Volume file name, where file starts.
386 Optional time field: Modification time, with float seconds.
387 Same as .date_time but with more precision.
389 Optional time field: creation time, with float seconds.
391 Optional time field: last access time, with float seconds.
393 Optional time field: archival time, with float seconds.
398 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
400 For files, RAR_FILE_* bits.
404 # zipfile-compatible fields
412 'orig_filename', # bytes in unknown encoding
414 # rar-specific fields
422 # optional extended time fields
423 # tuple where the sec is float, or datetime().
424 'mtime', # same as .date_time
443 '''Returns True if the entry is a directory.'''
444 if self
.type == RAR_BLOCK_FILE
:
445 return (self
.flags
& RAR_FILE_DIRECTORY
) == RAR_FILE_DIRECTORY
448 def needs_password(self
):
449 return self
.flags
& RAR_FILE_PASSWORD
452 class RarFile(object):
453 '''Parse RAR structure, provide access to files in archive.
456 #: Archive comment. Byte string or None. Use UNICODE_COMMENTS
457 #: to get automatic decoding to unicode.
460 def __init__(self
, rarfile
, mode
="r", charset
=None, info_callback
=None, crc_check
= True):
461 """Open and parse a RAR archive.
468 only 'r' is supported.
470 fallback charset to use, if filenames are not already Unicode-enabled.
472 debug callback, gets to see all archive entries.
474 set to False to disable CRC checks
476 self
.rarfile
= rarfile
478 self
._charset
= charset
or DEFAULT_CHARSET
479 self
._info
_callback
= info_callback
483 self
._needs
_password
= False
484 self
._password
= None
485 self
._crc
_check
= crc_check
491 raise NotImplementedError("RarFile supports only mode=r")
498 def __exit__(self
, type, value
, traceback
):
501 def setpassword(self
, password
):
502 '''Sets the password to use when extracting.'''
503 self
._password
= password
507 def needs_password(self
):
508 '''Returns True if any archive entries require password for extraction.'''
509 return self
._needs
_password
512 '''Return list of filenames in archive.'''
513 return [f
.filename
for f
in self
._info
_list
]
516 '''Return RarInfo objects for all files/directories in archive.'''
517 return self
._info
_list
519 def volumelist(self
):
520 '''Returns filenames of archive volumes.
522 In case of single-volume archive, the list contains
523 just the name of main archive file.
525 return self
._vol
_list
527 def getinfo(self
, fname
):
528 '''Return RarInfo for file.'''
530 if isinstance(fname
, RarInfo
):
533 # accept both ways here
535 fname2
= fname
.replace("\\", "/")
537 fname2
= fname
.replace("/", "\\")
540 return self
._info
_map
[fname
]
543 return self
._info
_map
[fname2
]
545 raise NoRarEntry("No such file: "+fname
)
547 def open(self
, fname
, mode
= 'r', psw
= None):
548 '''Returns file-like object (:class:`RarExtFile`),
549 from where the data can be read.
551 The object implements io.RawIOBase interface, so it can
552 be further wrapped with io.BufferedReader and io.TextIOWrapper.
554 On older Python where io module is not available, it implements
555 only .read(), .seek(), .tell() and .close() methods.
557 The object is seekable, although the seeking is fast only on
558 uncompressed files, on compressed files the seeking is implemented
559 by reading ahead and/or restarting the decompression.
564 file name or RarInfo instance.
568 password to use for extracting.
572 raise NotImplementedError("RarFile.open() supports only mode=r")
575 inf
= self
.getinfo(fname
)
577 raise TypeError("Directory does not have any data: " + inf
.filename
)
579 if inf
.flags
& RAR_FILE_SPLIT_BEFORE
:
580 raise NeedFirstVolume("Partial file, please start from first volume: " + inf
.filename
)
583 if inf
.needs_password():
584 psw
= psw
or self
._password
586 raise PasswordRequired("File %s requires password" % inf
.filename
)
590 # is temp write usable?
591 if not USE_EXTRACT_HACK
or not self
._main
:
593 elif self
._main
.flags
& (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD
):
595 elif inf
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
597 elif inf
.file_size
> HACK_SIZE_LIMIT
:
603 if inf
.compress_type
== RAR_M0
and (inf
.flags
& RAR_FILE_PASSWORD
) == 0:
604 return self
._open
_clear
(inf
)
606 return self
._open
_hack
(inf
, psw
)
608 return self
._open
_unrar
(self
.rarfile
, inf
, psw
)
610 def read(self
, fname
, psw
= None):
611 """Return uncompressed data for archive entry.
613 For longer files using .open() may be better idea.
618 filename or RarInfo instance
620 password to use for extracting.
623 f
= self
.open(fname
, 'r', psw
)
630 """Release open resources."""
634 """Print archive file list to stdout."""
635 for f
in self
._info
_list
:
638 def extract(self
, member
, path
=None, pwd
=None):
639 """Extract single file into current directory.
644 filename or RarInfo instance
646 optional destination path
648 optional password to use
650 if isinstance(member
, RarInfo
):
651 fname
= member
.filename
654 self
._extract
([fname
], path
, pwd
)
656 def extractall(self
, path
=None, members
=None, pwd
=None):
657 """Extract all files into current directory.
662 optional destination path
664 optional filename or RarInfo instance list to extract
666 optional password to use
669 if members
is not None:
671 if isinstance(m
, RarInfo
):
672 fnlist
.append(m
.filename
)
675 self
._extract
(fnlist
, path
, pwd
)
678 """Let 'unrar' test the archive.
680 cmd
= [UNRAR_TOOL
] + list(TEST_ARGS
)
681 if self
._password
is not None:
682 cmd
.append('-p' + self
._password
)
685 cmd
.append(self
.rarfile
)
686 p
= custom_popen(cmd
)
687 output
= p
.communicate()[0]
688 check_returncode(p
, output
)
695 def _process_entry(self
, item
):
696 if item
.type == RAR_BLOCK_FILE
:
697 # use only first part
698 if (item
.flags
& RAR_FILE_SPLIT_BEFORE
) == 0:
699 self
._info
_map
[item
.filename
] = item
700 self
._info
_list
.append(item
)
701 # remember if any items require password
702 if item
.needs_password():
703 self
._needs
_password
= True
704 elif len(self
._info
_list
) > 0:
705 # final crc is in last block
706 old
= self
._info
_list
[-1]
708 old
.compress_size
+= item
.compress_size
710 # parse new-style comment
711 if item
.type == RAR_BLOCK_SUB
and item
.filename
== 'CMT':
712 if not NEED_COMMENTS
:
714 elif item
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
716 elif item
.flags
& RAR_FILE_SOLID
:
718 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
719 if len(self
._info
_list
) > 0:
720 old
= self
._info
_list
[-1]
724 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
727 if self
._info
_callback
:
728 self
._info
_callback
(item
)
740 def _parse_real(self
):
741 fd
= open(self
.rarfile
, "rb")
743 id = fd
.read(len(RAR_ID
))
745 raise NotRarFile("Not a Rar archive: "+self
.rarfile
)
747 volume
= 0 # first vol (.rar) is 0
750 volfile
= self
.rarfile
751 self
._vol
_list
= [self
.rarfile
]
754 h
= None # don't read past ENDARC
756 h
= self
._parse
_header
(fd
)
760 volfile
= self
._next
_volname
(volfile
)
762 fd
= open(volfile
, "rb")
766 self
._vol
_list
.append(volfile
)
770 h
.volume_file
= volfile
772 if h
.type == RAR_BLOCK_MAIN
and not self
._main
:
774 if h
.flags
& RAR_MAIN_NEWNUMBERING
:
775 # RAR 2.x does not set FIRSTVOLUME,
776 # so check it only if NEWNUMBERING is used
777 if (h
.flags
& RAR_MAIN_FIRSTVOLUME
) == 0:
778 raise NeedFirstVolume("Need to start from first volume")
779 if h
.flags
& RAR_MAIN_PASSWORD
:
780 self
._needs
_password
= True
781 if not self
._password
:
784 elif h
.type == RAR_BLOCK_ENDARC
:
785 more_vols
= h
.flags
& RAR_ENDARC_NEXT_VOLUME
787 elif h
.type == RAR_BLOCK_FILE
:
788 # RAR 2.x does not write RAR_BLOCK_ENDARC
789 if h
.flags
& RAR_FILE_SPLIT_AFTER
:
791 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
792 if volume
== 0 and h
.flags
& RAR_FILE_SPLIT_BEFORE
:
793 raise NeedFirstVolume("Need to start from first volume")
796 self
._process
_entry
(h
)
800 fd
.seek(h
.file_offset
+ h
.add_size
, 0)
802 # AES encrypted headers
803 _last_aes_key
= (None, None, None) # (salt, key, iv)
804 def _decrypt_header(self
, fd
):
806 raise NoCrypto('Cannot parse encrypted headers - no crypto')
808 if self
._last
_aes
_key
[0] == salt
:
809 key
, iv
= self
._last
_aes
_key
[1:]
811 key
, iv
= rar3_s2k(self
._password
, salt
)
812 self
._last
_aes
_key
= (salt
, key
, iv
)
813 return HeaderDecrypt(fd
, key
, iv
)
816 def _parse_header(self
, fd
):
818 # handle encrypted headers
819 if self
._main
and self
._main
.flags
& RAR_MAIN_PASSWORD
:
820 if not self
._password
:
822 fd
= self
._decrypt
_header
(fd
)
824 # now read actual header
825 return self
._parse
_block
_header
(fd
)
827 if REPORT_BAD_HEADER
:
828 raise BadRarFile('Broken header in RAR file')
832 def _parse_block_header(self
, fd
):
834 h
.header_offset
= fd
.tell()
837 # read and parse base header
838 buf
= fd
.read(S_BLK_HDR
.size
)
841 t
= S_BLK_HDR
.unpack_from(buf
)
842 h
.header_crc
, h
.type, h
.flags
, h
.header_size
= t
843 h
.header_base
= S_BLK_HDR
.size
847 if h
.header_size
> S_BLK_HDR
.size
:
848 h
.header_data
= buf
+ fd
.read(h
.header_size
- S_BLK_HDR
.size
)
851 h
.file_offset
= fd
.tell()
854 if len(h
.header_data
) != h
.header_size
:
855 if REPORT_BAD_HEADER
:
856 raise BadRarFile('Unexpected EOF when reading header')
859 # block has data assiciated with it?
860 if h
.flags
& RAR_LONG_BLOCK
:
861 h
.add_size
= S_LONG
.unpack_from(h
.header_data
, pos
)[0]
865 # parse interesting ones, decide header boundaries for crc
866 if h
.type == RAR_BLOCK_MARK
:
868 elif h
.type == RAR_BLOCK_MAIN
:
870 if h
.flags
& RAR_MAIN_ENCRYPTVER
:
872 if h
.flags
& RAR_MAIN_COMMENT
:
873 self
._parse
_subblocks
(h
, h
.header_base
)
874 self
.comment
= h
.comment
875 elif h
.type == RAR_BLOCK_FILE
:
876 self
._parse
_file
_header
(h
, pos
)
877 elif h
.type == RAR_BLOCK_SUB
:
878 self
._parse
_file
_header
(h
, pos
)
879 h
.header_base
= h
.header_size
880 elif h
.type == RAR_BLOCK_OLD_AUTH
:
882 elif h
.type == RAR_BLOCK_OLD_EXTRA
:
885 h
.header_base
= h
.header_size
888 if h
.type == RAR_BLOCK_OLD_SUB
:
889 crcdat
= h
.header_data
[2:] + fd
.read(h
.add_size
)
891 crcdat
= h
.header_data
[2:h
.header_base
]
893 calc_crc
= crc32(crcdat
) & 0xFFFF
896 if h
.header_crc
== calc_crc
:
900 if REPORT_BAD_HEADER
:
902 crcdat
= h
.header_data
[2:]
903 msg
= 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h
.type, h
.header_crc
, calc_crc
, xlen
)
905 while xlen
>= S_BLK_HDR
.size
- 2:
906 crc
= crc32(crcdat
[:xlen
]) & 0xFFFF
907 if crc
== h
.header_crc
:
908 msg
+= ' / crc match, xlen = %d' % xlen
910 raise BadRarFile(msg
)
912 # instead panicing, send eof
915 # read file-specific header
916 def _parse_file_header(self
, h
, pos
):
917 fld
= S_FILE_HDR
.unpack_from(h
.header_data
, pos
)
918 h
.compress_size
= fld
[0]
922 h
.date_time
= parse_dos_time(fld
[4])
923 h
.extract_version
= fld
[5]
924 h
.compress_type
= fld
[6]
927 pos
+= S_FILE_HDR
.size
929 if h
.flags
& RAR_FILE_LARGE
:
930 h1
= S_LONG
.unpack_from(h
.header_data
, pos
)[0]
931 h2
= S_LONG
.unpack_from(h
.header_data
, pos
+ 4)[0]
932 h
.compress_size |
= h1
<< 32
933 h
.file_size |
= h2
<< 32
935 h
.add_size
= h
.compress_size
937 name
= h
.header_data
[pos
: pos
+ h
.name_size
]
939 if h
.flags
& RAR_FILE_UNICODE
:
940 nul
= name
.find(ZERO
)
941 h
.orig_filename
= name
[:nul
]
942 u
= UnicodeFilename(h
.orig_filename
, name
[nul
+ 1 : ])
943 h
.filename
= u
.decode()
945 # if parsing failed fall back to simple name
947 h
.filename
= self
._decode
(h
.orig_filename
)
949 h
.orig_filename
= name
950 h
.filename
= self
._decode
(name
)
952 # change separator, if requested
954 h
.filename
= h
.filename
.replace('\\', PATH_SEP
)
956 if h
.flags
& RAR_FILE_SALT
:
957 h
.salt
= h
.header_data
[pos
: pos
+ 8]
962 # optional extended time stamps
963 if h
.flags
& RAR_FILE_EXTTIME
:
964 pos
= self
._parse
_ext
_time
(h
, pos
)
966 h
.mtime
= h
.atime
= h
.ctime
= h
.arctime
= None
971 if h
.flags
& RAR_FILE_COMMENT
:
972 self
._parse
_subblocks
(h
, pos
)
976 h
.date_time
= to_datetime(h
.date_time
)
977 h
.mtime
= to_datetime(h
.mtime
)
978 h
.atime
= to_datetime(h
.atime
)
979 h
.ctime
= to_datetime(h
.ctime
)
980 h
.arctime
= to_datetime(h
.arctime
)
982 # .mtime is .date_time with more precision
985 h
.date_time
= h
.mtime
988 h
.date_time
= h
.mtime
[:5] + (int(h
.mtime
[5]),)
992 # find old-style comment subblock
993 def _parse_subblocks(self
, h
, pos
):
994 hdata
= h
.header_data
995 while pos
< len(hdata
):
996 # ordinary block header
997 t
= S_BLK_HDR
.unpack_from(hdata
, pos
)
998 scrc
, stype
, sflags
, slen
= t
999 pos_next
= pos
+ slen
1000 pos
+= S_BLK_HDR
.size
1006 # followed by block-specific header
1007 if stype
== RAR_BLOCK_OLD_COMMENT
and pos
+ S_COMMENT_HDR
.size
<= pos_next
:
1008 declen
, ver
, meth
, crc
= S_COMMENT_HDR
.unpack_from(hdata
, pos
)
1009 pos
+= S_COMMENT_HDR
.size
1010 data
= hdata
[pos
: pos_next
]
1011 cmt
= rar_decompress(ver
, meth
, data
, declen
, sflags
,
1012 crc
, self
._password
)
1013 if not self
._crc
_check
:
1014 h
.comment
= self
._decode
_comment
(cmt
)
1015 elif crc32(cmt
) & 0xFFFF == crc
:
1016 h
.comment
= self
._decode
_comment
(cmt
)
1020 def _parse_ext_time(self
, h
, pos
):
1021 data
= h
.header_data
1023 # flags and rest of data can be missing
1025 if pos
+ 2 <= len(data
):
1026 flags
= S_SHORT
.unpack_from(data
, pos
)[0]
1029 h
.mtime
, pos
= self
._parse
_xtime
(flags
>> 3*4, data
, pos
, h
.date_time
)
1030 h
.ctime
, pos
= self
._parse
_xtime
(flags
>> 2*4, data
, pos
)
1031 h
.atime
, pos
= self
._parse
_xtime
(flags
>> 1*4, data
, pos
)
1032 h
.arctime
, pos
= self
._parse
_xtime
(flags
>> 0*4, data
, pos
)
1035 def _parse_xtime(self
, flag
, data
, pos
, dostime
= None):
1036 unit
= 10000000.0 # 100 ns units
1039 t
= S_LONG
.unpack_from(data
, pos
)[0]
1040 dostime
= parse_dos_time(t
)
1044 for i
in range(cnt
):
1045 b
= S_BYTE
.unpack_from(data
, pos
)[0]
1046 rem
= (b
<< 16) |
(rem
>> 8)
1048 sec
= dostime
[5] + rem
/ unit
1051 dostime
= dostime
[:5] + (sec
,)
1054 # given current vol name, construct next one
1055 def _next_volname(self
, volfile
):
1056 if self
._main
.flags
& RAR_MAIN_NEWNUMBERING
:
1057 return self
._next
_newvol
(volfile
)
1058 return self
._next
_oldvol
(volfile
)
1060 # new-style next volume
1061 def _next_newvol(self
, volfile
):
1062 i
= len(volfile
) - 1
1064 if volfile
[i
] >= '0' and volfile
[i
] <= '9':
1065 return self
._inc
_volname
(volfile
, i
)
1067 raise BadRarName("Cannot construct volume name: "+volfile
)
1069 # old-style next volume
1070 def _next_oldvol(self
, volfile
):
1072 if volfile
[-4:].lower() == '.rar':
1073 return volfile
[:-2] + '00'
1074 return self
._inc
_volname
(volfile
, len(volfile
) - 1)
1076 # increase digits with carry, otherwise just increment char
1077 def _inc_volname(self
, volfile
, i
):
1081 fn
[i
] = chr(ord(fn
[i
]) + 1)
1087 def _open_clear(self
, inf
):
1088 return DirectReader(self
, inf
)
1090 # put file compressed data into temporary .rar archive, and run
1091 # unrar on that, thus avoiding unrar going over whole archive
1092 def _open_hack(self
, inf
, psw
= None):
1095 size
= inf
.compress_size
+ inf
.header_size
1096 rf
= open(inf
.volume_file
, "rb", 0)
1097 rf
.seek(inf
.header_offset
)
1099 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
1100 tmpf
= os
.fdopen(tmpfd
, "wb")
1103 # create main header: crc, type, flags, size, res1, res2
1104 mh
= S_BLK_HDR
.pack(0x90CF, 0x73, 0, 13) + ZERO
* (2+4)
1105 tmpf
.write(RAR_ID
+ mh
)
1108 buf
= rf
.read(BSIZE
)
1112 raise BadRarFile('read failed: ' + inf
.filename
)
1123 return self
._open
_unrar
(tmpname
, inf
, psw
, tmpname
)
1125 def _read_comment_v3(self
, inf
, psw
=None):
1128 rf
= open(inf
.volume_file
, "rb")
1129 rf
.seek(inf
.file_offset
)
1130 data
= rf
.read(inf
.compress_size
)
1134 cmt
= rar_decompress(inf
.extract_version
, inf
.compress_type
, data
,
1135 inf
.file_size
, inf
.flags
, inf
.CRC
, psw
, inf
.salt
)
1141 crc
+= (long(1) << 32)
1145 return self
._decode
_comment
(cmt
)
1147 # extract using unrar
1148 def _open_unrar(self
, rarfile
, inf
, psw
= None, tmpfile
= None):
1149 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
1151 cmd
.append("-p" + psw
)
1154 # not giving filename avoids encoding related problems
1157 if PATH_SEP
!= os
.sep
:
1158 fn
= fn
.replace(PATH_SEP
, os
.sep
)
1161 # read from unrar pipe
1162 return PipeReader(self
, inf
, cmd
, tmpfile
)
1164 def _decode(self
, val
):
1165 for c
in TRY_ENCODINGS
:
1167 return val
.decode(c
)
1168 except UnicodeError:
1170 return val
.decode(self
._charset
, 'replace')
1172 def _decode_comment(self
, val
):
1173 if UNICODE_COMMENTS
:
1174 return self
._decode
(val
)
1177 # call unrar to extract a file
1178 def _extract(self
, fnlist
, path
=None, psw
=None):
1179 cmd
= [UNRAR_TOOL
] + list(EXTRACT_ARGS
)
1182 psw
= psw
or self
._password
1184 cmd
.append('-p' + psw
)
1189 cmd
.append(self
.rarfile
)
1193 if os
.sep
!= PATH_SEP
:
1194 fn
= fn
.replace(PATH_SEP
, os
.sep
)
1198 if path
is not None:
1199 cmd
.append(path
+ os
.sep
)
1202 p
= custom_popen(cmd
)
1203 output
= p
.communicate()[0]
1204 check_returncode(p
, output
)
1210 class UnicodeFilename
:
1211 """Handle unicode filename decompression"""
1213 def __init__(self
, name
, encdata
):
1214 self
.std_name
= bytearray(name
)
1215 self
.encdata
= bytearray(encdata
)
1216 self
.pos
= self
.encpos
= 0
1217 self
.buf
= bytearray()
1222 c
= self
.encdata
[self
.encpos
]
1231 return self
.std_name
[self
.pos
]
1236 def put(self
, lo
, hi
):
1242 hi
= self
.enc_byte()
1244 while self
.encpos
< len(self
.encdata
):
1246 flags
= self
.enc_byte()
1249 t
= (flags
>> flagbits
) & 3
1251 self
.put(self
.enc_byte(), 0)
1253 self
.put(self
.enc_byte(), hi
)
1255 self
.put(self
.enc_byte(), self
.enc_byte())
1260 for i
in range((n
& 0x7f) + 2):
1261 lo
= (self
.std_byte() + c
) & 0xFF
1264 for i
in range(n
+ 2):
1265 self
.put(self
.std_byte(), 0)
1266 return self
.buf
.decode("utf-16le", "replace")
1269 class RarExtFile(RawIOBase
):
1270 """Base class for file-like object that :meth:`RarFile.open` returns.
1272 Provides public methods and common crc checking.
1275 - no short reads - .read() and .readinfo() read as much as requested.
1276 - no internal buffer, use io.BufferedReader for that.
1278 If :mod:`io` module is available (Python 2.6+, 3.x), then this calls
1279 will inherit from :class:`io.RawIOBase` class. This makes line-based
1280 access available: :meth:`RarExtFile.readline` and ``for ln in f``.
1283 #: Filename of the archive entry
1286 def __init__(self
, rf
, inf
):
1287 RawIOBase
.__init
__(self
)
1289 # standard io.* properties
1290 self
.name
= inf
.filename
1295 self
.crc_check
= rf
._crc
_check
1308 self
.remain
= self
.inf
.file_size
1310 def read(self
, cnt
= None):
1311 """Read all or specified amount of data from archive entry."""
1314 if cnt
is None or cnt
< 0:
1316 elif cnt
> self
.remain
:
1322 data
= self
._read
(cnt
)
1324 self
.CRC
= crc32(data
, self
.CRC
)
1325 self
.remain
-= len(data
)
1326 if len(data
) != cnt
:
1327 raise BadRarFile("Failed the read enough data")
1330 if not data
or self
.remain
== 0:
1336 """Check final CRC."""
1337 if not self
.crc_check
:
1340 check_returncode(self
, '')
1341 if self
.remain
!= 0:
1342 raise BadRarFile("Failed the read enough data")
1345 crc
+= (long(1) << 32)
1346 if crc
!= self
.inf
.CRC
:
1347 raise BadRarFile("Corrupt file - CRC check failed: " + self
.inf
.filename
)
1349 def _read(self
, cnt
):
1350 """Actual read that gets sanitized cnt."""
1353 """Close open resources."""
1355 RawIOBase
.close(self
)
1362 """Hook delete to make sure tempfile is removed."""
1365 def readinto(self
, buf
):
1366 """Zero-copy read directly into buffer.
1371 data
= self
.read(len(buf
))
1377 if not isinstance(buf
, array
.array
):
1379 buf
[:n
] = array
.array(buf
.typecode
, data
)
1383 """Return current reading position in uncompressed data."""
1384 return self
.inf
.file_size
- self
.remain
1386 def seek(self
, ofs
, whence
= 0):
1389 On uncompressed files, the seeking works by actual
1390 seeks so it's fast. On compresses files its slow
1391 - forward seeking happends by reading ahead,
1392 backwards by re-opening and decompressing from the start.
1395 # disable crc check when seeking
1398 fsize
= self
.inf
.file_size
1399 cur_ofs
= self
.tell()
1401 if whence
== 0: # seek from beginning of file
1403 elif whence
== 1: # seek from current position
1404 new_ofs
= cur_ofs
+ ofs
1405 elif whence
== 2: # seek from end of file
1406 new_ofs
= fsize
+ ofs
1408 raise ValueError('Invalid value for whence')
1413 elif new_ofs
> fsize
:
1416 # do the actual seek
1417 if new_ofs
>= cur_ofs
:
1418 self
._skip
(new_ofs
- cur_ofs
)
1420 # process old data ?
1421 #self._skip(fsize - cur_ofs)
1427 def _skip(self
, cnt
):
1428 """Read and discard data"""
1431 buf
= self
.read(8192)
1433 buf
= self
.read(cnt
)
1445 Writing is not supported."""
1451 Seeking is supported, although it's slow on compressed files.
1456 """Read all remaining data"""
1457 # avoid RawIOBase default impl
1461 class PipeReader(RarExtFile
):
1462 """Read data from pipe, handle tempfile cleanup."""
1464 def __init__(self
, rf
, inf
, cmd
, tempfile
=None):
1467 self
.tempfile
= tempfile
1468 RarExtFile
.__init
__(self
, rf
, inf
)
1470 def _close_proc(self
):
1473 if self
.proc
.stdout
:
1474 self
.proc
.stdout
.close()
1476 self
.proc
.stdin
.close()
1477 if self
.proc
.stderr
:
1478 self
.proc
.stderr
.close()
1480 self
.returncode
= self
.proc
.returncode
1484 RarExtFile
._open
(self
)
1489 # launch new process
1491 self
.proc
= custom_popen(self
.cmd
)
1492 self
.fd
= self
.proc
.stdout
1494 # avoid situation where unrar waits on stdin
1496 self
.proc
.stdin
.close()
1498 def _read(self
, cnt
):
1499 """Read from pipe."""
1501 # normal read is usually enough
1502 data
= self
.fd
.read(cnt
)
1503 if len(data
) == cnt
or not data
:
1506 # short read, try looping
1510 data
= self
.fd
.read(cnt
)
1515 return EMPTY
.join(buf
)
1518 """Close open resources."""
1521 RarExtFile
.close(self
)
1525 os
.unlink(self
.tempfile
)
1528 self
.tempfile
= None
1531 def readinto(self
, buf
):
1532 """Zero-copy read directly into buffer."""
1534 if cnt
> self
.remain
:
1536 vbuf
= memoryview(buf
)
1539 res
= self
.fd
.readinto(vbuf
[got
: cnt
])
1543 self
.CRC
= crc32(vbuf
[got
: got
+ res
], self
.CRC
)
1549 class DirectReader(RarExtFile
):
1550 """Read uncompressed data directly from archive."""
1553 RarExtFile
._open
(self
)
1555 self
.volfile
= self
.inf
.volume_file
1556 self
.fd
= open(self
.volfile
, "rb", 0)
1557 self
.fd
.seek(self
.inf
.header_offset
, 0)
1558 self
.cur
= self
.rf
._parse
_header
(self
.fd
)
1559 self
.cur_avail
= self
.cur
.add_size
1561 def _skip(self
, cnt
):
1562 """RAR Seek, skipping through rar files to get to correct position
1567 if self
.cur_avail
== 0:
1568 if not self
._open
_next
():
1571 # fd is in read pos, do the read
1572 if cnt
> self
.cur_avail
:
1573 cnt
-= self
.cur_avail
1574 self
.remain
-= self
.cur_avail
1577 self
.fd
.seek(cnt
, 1)
1578 self
.cur_avail
-= cnt
1582 def _read(self
, cnt
):
1583 """Read from potentially multi-volume archive."""
1588 if self
.cur_avail
== 0:
1589 if not self
._open
_next
():
1592 # fd is in read pos, do the read
1593 if cnt
> self
.cur_avail
:
1594 data
= self
.fd
.read(self
.cur_avail
)
1596 data
= self
.fd
.read(cnt
)
1602 self
.cur_avail
-= len(data
)
1607 return EMPTY
.join(buf
)
1609 def _open_next(self
):
1610 """Proceed to next volume."""
1612 # is the file split over archives?
1613 if (self
.cur
.flags
& RAR_FILE_SPLIT_AFTER
) == 0:
1621 self
.volfile
= self
.rf
._next
_volname
(self
.volfile
)
1622 fd
= open(self
.volfile
, "rb", 0)
1625 # loop until first file header
1627 cur
= self
.rf
._parse
_header
(fd
)
1629 raise BadRarFile("Unexpected EOF")
1630 if cur
.type in (RAR_BLOCK_MARK
, RAR_BLOCK_MAIN
):
1632 fd
.seek(cur
.add_size
, 1)
1634 if cur
.orig_filename
!= self
.inf
.orig_filename
:
1635 raise BadRarFile("Did not found file entry")
1637 self
.cur_avail
= cur
.add_size
1641 def readinto(self
, buf
):
1642 """Zero-copy read directly into buffer."""
1644 vbuf
= memoryview(buf
)
1645 while got
< len(buf
):
1647 if self
.cur_avail
== 0:
1648 if not self
._open
_next
():
1651 # lenght for next read
1652 cnt
= len(buf
) - got
1653 if cnt
> self
.cur_avail
:
1654 cnt
= self
.cur_avail
1656 # read into temp view
1657 res
= self
.fd
.readinto(vbuf
[got
: got
+ cnt
])
1661 self
.CRC
= crc32(vbuf
[got
: got
+ res
], self
.CRC
)
1662 self
.cur_avail
-= res
1668 class HeaderDecrypt
:
1669 """File-like object that decrypts from another file"""
1670 def __init__(self
, f
, key
, iv
):
1672 self
.ciph
= AES
.new(key
, AES
.MODE_CBC
, iv
)
1676 return self
.f
.tell()
1678 def read(self
, cnt
=None):
1680 raise BadRarFile('Bad count to header decrypt - wrong password?')
1683 if cnt
<= len(self
.buf
):
1684 res
= self
.buf
[:cnt
]
1685 self
.buf
= self
.buf
[cnt
:]
1692 BLK
= self
.ciph
.block_size
1694 enc
= self
.f
.read(BLK
)
1697 dec
= self
.ciph
.decrypt(enc
)
1703 self
.buf
= dec
[cnt
:]
1709 ## Utility functions
1712 def rar3_s2k(psw
, salt
):
1713 """String-to-key hash for RAR3."""
1715 seed
= psw
.encode('utf-16le') + salt
1719 for j
in range(0x4000):
1720 cnt
= S_LONG
.pack(i
*0x4000 + j
)
1721 h
.update(seed
+ cnt
[:3])
1723 iv
+= h
.digest()[19:20]
1724 key_be
= h
.digest()[:16]
1725 key_le
= pack("<LLLL", *unpack(">LLLL", key_be
))
1728 def rar_decompress(vers
, meth
, data
, declen
=0, flags
=0, crc
=0, psw
=None, salt
=None):
1729 """Decompress blob of compressed data.
1731 Used for data with non-standard header - eg. comments.
1734 # already uncompressed?
1735 if meth
== RAR_M0
and (flags
& RAR_FILE_PASSWORD
) == 0:
1738 # take only necessary flags
1739 flags
= flags
& (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK
)
1740 flags |
= RAR_LONG_BLOCK
1743 fname
= bytes('data', 'ascii')
1746 fhdr
= S_FILE_HDR
.pack(len(data
), declen
, RAR_OS_MSDOS
, crc
,
1747 date
, vers
, meth
, len(fname
), mode
)
1749 if flags
& RAR_FILE_SALT
:
1755 hlen
= S_BLK_HDR
.size
+ len(fhdr
)
1756 hdr
= S_BLK_HDR
.pack(0, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
1757 hcrc
= crc32(hdr
[2:]) & 0xFFFF
1758 hdr
= S_BLK_HDR
.pack(hcrc
, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
1760 # archive main header
1761 mh
= S_BLK_HDR
.pack(0x90CF, RAR_BLOCK_MAIN
, 0, 13) + ZERO
* (2+4)
1763 # decompress via temp rar
1764 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
1765 tmpf
= os
.fdopen(tmpfd
, "wb")
1767 tmpf
.write(RAR_ID
+ mh
+ hdr
+ data
)
1770 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
1771 if psw
is not None and (flags
& RAR_FILE_PASSWORD
):
1772 cmd
.append("-p" + psw
)
1777 p
= custom_popen(cmd
)
1778 return p
.communicate()[0]
1784 """Convert 6-part time tuple into datetime object."""
1790 year
, mon
, day
, h
, m
, xs
= t
1792 us
= int(1000000 * (xs
- s
))
1794 # assume the values are valid
1796 return datetime(year
, mon
, day
, h
, m
, s
, us
)
1800 # sanitize invalid values
1801 MDAY
= (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1803 if mon
> 12: mon
= 12
1805 if day
> MDAY
[mon
]: day
= MDAY
[mon
]
1809 if mon
== 2 and day
== 29:
1811 return datetime(year
, mon
, day
, h
, m
, s
, us
)
1814 return datetime(year
, mon
, day
, h
, m
, s
, us
)
1816 def parse_dos_time(stamp
):
1817 """Parse standard 32-bit DOS timestamp."""
1819 sec
= stamp
& 0x1F; stamp
= stamp
>> 5
1820 min = stamp
& 0x3F; stamp
= stamp
>> 6
1821 hr
= stamp
& 0x1F; stamp
= stamp
>> 5
1822 day
= stamp
& 0x1F; stamp
= stamp
>> 5
1823 mon
= stamp
& 0x0F; stamp
= stamp
>> 4
1824 yr
= (stamp
& 0x7F) + 1980
1825 return (yr
, mon
, day
, hr
, min, sec
* 2)
1827 def custom_popen(cmd
):
1828 """Disconnect cmd from parent fds, read only from stdout."""
1832 if sys
.platform
== 'win32':
1833 creationflags
= 0x08000000 # CREATE_NO_WINDOW
1837 p
= Popen(cmd
, bufsize
= 0,
1838 stdout
= PIPE
, stdin
= PIPE
, stderr
= STDOUT
,
1839 creationflags
= creationflags
)
1841 ex
= sys
.exc_info()[1]
1842 if ex
.errno
== errno
.ENOENT
:
1843 raise RarExecError("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL
)
1847 def check_returncode(p
, out
):
1848 """Raise exception according to unrar exit code"""
1854 # map return code to exception class
1856 RarWarning
, RarFatalError
, RarCRCError
, RarLockedArchiveError
,
1857 RarWriteError
, RarOpenError
, RarUserError
, RarMemoryError
,
1858 RarCreateError
, RarNoFilesError
] # codes from rar.txt
1859 if code
> 0 and code
< len(errmap
):
1866 exc
= RarUnknownError
1870 msg
= "%s [%d]: %s" % (exc
.__doc
__, p
.returncode
, out
)
1872 msg
= "%s [%d]" % (exc
.__doc
__, p
.returncode
)