3 # Copyright (c) 2005-2012 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r
"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as zipfile like as possible.
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
39 There are few module-level parameters to tune behaviour,
40 here they are with defaults, and reason to change it::
44 # Set to full path of unrar.exe if it is not in PATH
45 rarfile.UNRAR_TOOL = "unrar"
47 # Set to 0 if you don't look at comments and want to
48 # avoid wasting time for parsing them
49 rarfile.NEED_COMMENTS = 1
51 # Set up to 1 if you don't want to deal with decoding comments
52 # from unknown encoding. rarfile will try couple of common
53 # encodings in sequence.
54 rarfile.UNICODE_COMMENTS = 0
56 # Set to 1 if you prefer timestamps to be datetime objects
58 rarfile.USE_DATETIME = 0
60 # Set to '/' to be more compatible with zipfile
61 rarfile.PATH_SEP = '\\'
63 For more details, refer to source.
69 # export only interesting items
70 __all__
= ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
73 ## Imports and compat - support both Python 2.x and 3.x
76 import sys
, os
, struct
77 from struct
import pack
, unpack
78 from binascii
import crc32
79 from tempfile
import mkstemp
80 from subprocess
import Popen
, PIPE
, STDOUT
81 from datetime
import datetime
83 # only needed for encryped headers
85 from Crypto
.Cipher
import AES
87 from hashlib
import sha1
89 from sha
import new
as sha1
95 if sys
.hexversion
< 0x3000000:
96 # prefer 3.x behaviour
98 # py2.6 has broken bytes()
102 # see if compat bytearray() is needed
108 def __init__(self
, val
= ''):
109 self
.arr
= array
.array('B', val
)
110 self
.append
= self
.arr
.append
111 self
.__getitem
__ = self
.arr
.__getitem
__
112 self
.__len
__ = self
.arr
.__len
__
113 def decode(self
, *args
):
114 return self
.arr
.tostring().decode(*args
)
116 # Optimized .readinto() requires memoryview
123 # Struct() for older python
125 from struct
import Struct
128 def __init__(self
, fmt
):
130 self
.size
= struct
.calcsize(fmt
)
131 def unpack(self
, buf
):
132 return unpack(self
.format
, buf
)
133 def unpack_from(self
, buf
, ofs
= 0):
134 return unpack(self
.format
, buf
[ofs
: ofs
+ self
.size
])
135 def pack(self
, *args
):
136 return pack(self
.format
, *args
)
138 # file object superclass
140 from io
import RawIOBase
142 class RawIOBase(object):
148 ## Module configuration. Can be tuned after importing.
151 # default fallback charset
152 DEFAULT_CHARSET
= "windows-1252"
154 # list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
155 TRY_ENCODINGS
= ('utf8', 'utf-16le')
157 # 'unrar', 'rar' or full path to either one
160 # Command line args to use for opening file for reading.
161 OPEN_ARGS
= ('p', '-inul')
163 # Command line args to use for extracting file to disk.
164 EXTRACT_ARGS
= ('x', '-y', '-idq')
167 TEST_ARGS
= ('t', '-idq')
169 # whether to speed up decompression by using tmp archive
172 # limit the filesize for tmp archive usage
173 HACK_SIZE_LIMIT
= 20*1024*1024
175 # whether to parse file/archive comments.
178 # whether to convert comments to unicode strings
181 # When RAR is corrupt, stopping on bad header is better
182 # On unknown/misparsed RAR headers reporting is better
183 REPORT_BAD_HEADER
= 0
185 # Convert RAR time tuple into datetime() object
188 # Separator for path name components. RAR internally uses '\\'.
189 # Use '/' to be similar with zipfile.
197 RAR_BLOCK_MARK
= 0x72 # r
198 RAR_BLOCK_MAIN
= 0x73 # s
199 RAR_BLOCK_FILE
= 0x74 # t
200 RAR_BLOCK_OLD_COMMENT
= 0x75 # u
201 RAR_BLOCK_OLD_EXTRA
= 0x76 # v
202 RAR_BLOCK_OLD_SUB
= 0x77 # w
203 RAR_BLOCK_OLD_RECOVERY
= 0x78 # x
204 RAR_BLOCK_OLD_AUTH
= 0x79 # y
205 RAR_BLOCK_SUB
= 0x7a # z
206 RAR_BLOCK_ENDARC
= 0x7b # {
208 # flags for RAR_BLOCK_MAIN
209 RAR_MAIN_VOLUME
= 0x0001
210 RAR_MAIN_COMMENT
= 0x0002
211 RAR_MAIN_LOCK
= 0x0004
212 RAR_MAIN_SOLID
= 0x0008
213 RAR_MAIN_NEWNUMBERING
= 0x0010
214 RAR_MAIN_AUTH
= 0x0020
215 RAR_MAIN_RECOVERY
= 0x0040
216 RAR_MAIN_PASSWORD
= 0x0080
217 RAR_MAIN_FIRSTVOLUME
= 0x0100
218 RAR_MAIN_ENCRYPTVER
= 0x0200
220 # flags for RAR_BLOCK_FILE
221 RAR_FILE_SPLIT_BEFORE
= 0x0001
222 RAR_FILE_SPLIT_AFTER
= 0x0002
223 RAR_FILE_PASSWORD
= 0x0004
224 RAR_FILE_COMMENT
= 0x0008
225 RAR_FILE_SOLID
= 0x0010
226 RAR_FILE_DICTMASK
= 0x00e0
227 RAR_FILE_DICT64
= 0x0000
228 RAR_FILE_DICT128
= 0x0020
229 RAR_FILE_DICT256
= 0x0040
230 RAR_FILE_DICT512
= 0x0060
231 RAR_FILE_DICT1024
= 0x0080
232 RAR_FILE_DICT2048
= 0x00a0
233 RAR_FILE_DICT4096
= 0x00c0
234 RAR_FILE_DIRECTORY
= 0x00e0
235 RAR_FILE_LARGE
= 0x0100
236 RAR_FILE_UNICODE
= 0x0200
237 RAR_FILE_SALT
= 0x0400
238 RAR_FILE_VERSION
= 0x0800
239 RAR_FILE_EXTTIME
= 0x1000
240 RAR_FILE_EXTFLAGS
= 0x2000
242 # flags for RAR_BLOCK_ENDARC
243 RAR_ENDARC_NEXT_VOLUME
= 0x0001
244 RAR_ENDARC_DATACRC
= 0x0002
245 RAR_ENDARC_REVSPACE
= 0x0004
246 RAR_ENDARC_VOLNR
= 0x0008
248 # flags common to all blocks
249 RAR_SKIP_IF_UNKNOWN
= 0x4000
250 RAR_LONG_BLOCK
= 0x8000
260 # Compression methods - '0'..'5'
269 ## internal constants
272 RAR_ID
= bytes("Rar!\x1a\x07\x00", 'ascii')
273 ZERO
= bytes("\0", 'ascii')
274 EMPTY
= bytes("", 'ascii')
276 S_BLK_HDR
= Struct('<HBHH')
277 S_FILE_HDR
= Struct('<LLBLLBBHL')
278 S_LONG
= Struct('<L')
279 S_SHORT
= Struct('<H')
280 S_BYTE
= Struct('<B')
281 S_COMMENT_HDR
= Struct('<HBBH')
287 class Error(Exception):
288 """Base class for rarfile errors."""
289 class BadRarFile(Error
):
290 """Incorrect data in archive."""
291 class NotRarFile(Error
):
292 """The file is not RAR archive."""
293 class BadRarName(Error
):
294 """Cannot guess multipart name components."""
295 class NoRarEntry(Error
):
296 """File not found in RAR"""
297 class PasswordRequired(Error
):
298 """File requires password"""
299 class NeedFirstVolume(Error
):
300 """Need to start from first volume."""
301 class NoCrypto(Error
):
302 """Cannot parse encrypted headers - no crypto available."""
303 class RarExecError(Error
):
304 """Problem reported by unrar/rar."""
305 class RarWarning(RarExecError
):
306 """Non-fatal error"""
307 class RarFatalError(RarExecError
):
309 class RarCRCError(RarExecError
):
310 """CRC error during unpacking"""
311 class RarLockedArchiveError(RarExecError
):
312 """Must not modify locked archive"""
313 class RarWriteError(RarExecError
):
315 class RarOpenError(RarExecError
):
317 class RarUserError(RarExecError
):
319 class RarMemoryError(RarExecError
):
321 class RarCreateError(RarExecError
):
323 class RarUserBreak(RarExecError
):
325 class RarUnknownError(RarExecError
):
326 """Unknown exit code"""
327 class RarSignalExit(RarExecError
):
328 """Unrar exited with signal"""
332 '''Check quickly whether file is rar archive.'''
333 buf
= open(fn
, "rb").read(len(RAR_ID
))
337 class RarInfo(object):
338 '''An entry in rar archive.
341 File name with relative path.
342 Default path separator is '/', to change set rarfile.PATH_SEP.
343 Always unicode string.
345 Modification time, tuple of (year, month, day, hour, minute, second).
346 Or datetime() object if USE_DATETIME is set.
352 Compression method: 0x30 - 0x35.
353 @ivar extract_version:
354 Minimal Rar version needed for decompressing.
356 Host OS type, one of RAR_OS_* constants.
358 File attributes. May be either dos-style or unix-style, depending on host_os.
360 CRC-32 of uncompressed file, unsigned int.
362 Volume nr, starting from 0.
364 Volume file name, where file starts.
366 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
368 For files, RAR_FILE_* bits.
370 File comment (unicode string or None).
373 Optional time field: Modification time, with float seconds.
374 Same as .date_time but with more precision.
376 Optional time field: creation time, with float seconds.
378 Optional time field: last access time, with float seconds.
380 Optional time field: archival time, with float seconds.
384 # zipfile-compatible fields
392 'orig_filename', # bytes in unknown encoding
394 # rar-specific fields
402 # optional extended time fields
403 # tuple where the sec is float, or datetime().
404 'mtime', # same as .date_time
423 '''Returns True if the entry is a directory.'''
424 if self
.type == RAR_BLOCK_FILE
:
425 return (self
.flags
& RAR_FILE_DIRECTORY
) == RAR_FILE_DIRECTORY
428 def needs_password(self
):
429 return self
.flags
& RAR_FILE_PASSWORD
432 class RarFile(object):
433 '''Parse RAR structure, provide access to files in archive.
436 Archive comment (unicode string or None).
439 def __init__(self
, rarfile
, mode
="r", charset
=None, info_callback
=None, crc_check
= True):
440 """Open and parse a RAR archive.
442 @param rarfile: archive file name
443 @param mode: only 'r' is supported.
444 @param charset: fallback charset to use, if filenames are not already Unicode-enabled.
445 @param info_callback: debug callback, gets to see all archive entries.
446 @param crc_check: set to False to disable CRC checks
448 self
.rarfile
= rarfile
450 self
._charset
= charset
or DEFAULT_CHARSET
451 self
._info
_callback
= info_callback
455 self
._needs
_password
= False
456 self
._password
= None
457 self
._crc
_check
= crc_check
462 raise NotImplementedError("RarFile supports only mode=r")
469 def __exit__(self
, type, value
, traceback
):
472 def setpassword(self
, password
):
473 '''Sets the password to use when extracting.'''
474 self
._password
= password
478 def needs_password(self
):
479 '''Returns True if any archive entries require password for extraction.'''
480 return self
._needs
_password
483 '''Return list of filenames in archive.'''
484 return [f
.filename
for f
in self
._info
_list
]
487 '''Return RarInfo objects for all files/directories in archive.'''
488 return self
._info
_list
490 def getinfo(self
, fname
):
491 '''Return RarInfo for file.'''
493 if isinstance(fname
, RarInfo
):
496 # accept both ways here
498 fname2
= fname
.replace("\\", "/")
500 fname2
= fname
.replace("/", "\\")
503 return self
._info
_map
[fname
]
506 return self
._info
_map
[fname2
]
508 raise NoRarEntry("No such file: "+fname
)
510 def open(self
, fname
, mode
= 'r', psw
= None):
511 '''Return open file object, where the data can be read.
513 The object implements io.RawIOBase interface, so it can
514 be further wrapped with io.BufferedReader and io.TextIOWrapper.
516 On older Python where io module is not available, it implements
517 only .read(), .seek(), .tell() and .close() methods.
519 The object is seekable, although the seeking is fast only on
520 uncompressed files, on compressed files the seeking is implemented
521 by reading ahead and/or restarting the decompression.
523 @param fname: file name or RarInfo instance.
524 @param mode: must be 'r'
525 @param psw: password to use for extracting.
529 raise NotImplementedError("RarFile.open() supports only mode=r")
532 inf
= self
.getinfo(fname
)
534 raise TypeError("Directory does not have any data: " + inf
.filename
)
536 if inf
.flags
& RAR_FILE_SPLIT_BEFORE
:
537 raise NeedFirstVolume("Partial file, please start from first volume: " + inf
.filename
)
540 if inf
.needs_password():
541 psw
= psw
or self
._password
543 raise PasswordRequired("File %s requires password" % inf
.filename
)
547 # is temp write usable?
548 if not USE_EXTRACT_HACK
or not self
._main
:
550 elif self
._main
.flags
& (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD
):
552 elif inf
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
554 elif inf
.file_size
> HACK_SIZE_LIMIT
:
560 if inf
.compress_type
== RAR_M0
and (inf
.flags
& RAR_FILE_PASSWORD
) == 0:
561 return self
._open
_clear
(inf
)
563 return self
._open
_hack
(inf
, psw
)
565 return self
._open
_unrar
(self
.rarfile
, inf
, psw
)
567 def read(self
, fname
, psw
= None):
568 """Return uncompressed data for archive entry.
570 For longer files using .open() may be better idea.
572 @param fname: filename or RarInfo instance
573 @param psw: password to use for extracting.
576 f
= self
.open(fname
, 'r', psw
)
583 """Release open resources."""
587 """Print archive file list to stdout."""
588 for f
in self
._info
_list
:
591 def extract(self
, member
, path
=None, pwd
=None):
592 """Extract single file into current directory.
594 @param member: filename or RarInfo instance
595 @param path: optional destination path
596 @param pwd: optional password to use
598 if isinstance(member
, RarInfo
):
599 fname
= member
.filename
602 self
._extract
([fname
], path
, pwd
)
604 def extractall(self
, path
=None, members
=None, pwd
=None):
605 """Extract all files into current directory.
607 @param path: optional destination path
608 @param members: optional filename or RarInfo instance list to extract
609 @param pwd: optional password to use
612 if members
is not None:
614 if isinstance(m
, RarInfo
):
615 fnlist
.append(m
.filename
)
618 self
._extract
(fnlist
, path
, pwd
)
621 """Let 'unrar' test the archive.
623 cmd
= [UNRAR_TOOL
] + list(TEST_ARGS
)
624 if self
._password
is not None:
625 cmd
.append('-p' + self
._password
)
628 cmd
.append(self
.rarfile
)
629 p
= custom_popen(cmd
)
630 output
= p
.communicate()[0]
631 check_returncode(p
, output
)
638 def _process_entry(self
, item
):
639 if item
.type == RAR_BLOCK_FILE
:
640 # use only first part
641 if (item
.flags
& RAR_FILE_SPLIT_BEFORE
) == 0:
642 self
._info
_map
[item
.filename
] = item
643 self
._info
_list
.append(item
)
644 # remember if any items require password
645 if item
.needs_password():
646 self
._needs
_password
= True
647 elif len(self
._info
_list
) > 0:
648 # final crc is in last block
649 old
= self
._info
_list
[-1]
651 old
.compress_size
+= item
.compress_size
653 # parse new-style comment
654 if item
.type == RAR_BLOCK_SUB
and item
.filename
== 'CMT':
655 if not NEED_COMMENTS
:
657 elif item
.flags
& (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER
):
659 elif item
.flags
& RAR_FILE_SOLID
:
661 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
662 if len(self
._info
_list
) > 0:
663 old
= self
._info
_list
[-1]
667 cmt
= self
._read
_comment
_v
3(item
, self
._password
)
670 if self
._info
_callback
:
671 self
._info
_callback
(item
)
683 def _parse_real(self
):
684 fd
= open(self
.rarfile
, "rb")
686 id = fd
.read(len(RAR_ID
))
688 raise NotRarFile("Not a Rar archive: "+self
.rarfile
)
690 volume
= 0 # first vol (.rar) is 0
693 volfile
= self
.rarfile
696 h
= None # don't read past ENDARC
698 h
= self
._parse
_header
(fd
)
702 volfile
= self
._next
_volname
(volfile
)
704 fd
= open(volfile
, "rb")
711 h
.volume_file
= volfile
713 if h
.type == RAR_BLOCK_MAIN
and not self
._main
:
715 if h
.flags
& RAR_MAIN_NEWNUMBERING
:
716 # RAR 2.x does not set FIRSTVOLUME,
717 # so check it only if NEWNUMBERING is used
718 if (h
.flags
& RAR_MAIN_FIRSTVOLUME
) == 0:
719 raise NeedFirstVolume("Need to start from first volume")
720 if h
.flags
& RAR_MAIN_PASSWORD
:
721 self
._needs
_password
= True
722 if not self
._password
:
725 elif h
.type == RAR_BLOCK_ENDARC
:
726 more_vols
= h
.flags
& RAR_ENDARC_NEXT_VOLUME
728 elif h
.type == RAR_BLOCK_FILE
:
729 # RAR 2.x does not write RAR_BLOCK_ENDARC
730 if h
.flags
& RAR_FILE_SPLIT_AFTER
:
732 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
733 if volume
== 0 and h
.flags
& RAR_FILE_SPLIT_BEFORE
:
734 raise NeedFirstVolume("Need to start from first volume")
737 self
._process
_entry
(h
)
741 fd
.seek(h
.file_offset
+ h
.add_size
, 0)
743 # AES encrypted headers
744 _last_aes_key
= (None, None, None) # (salt, key, iv)
745 def _decrypt_header(self
, fd
):
747 raise NoCrypto('Cannot parse encrypted headers - no crypto')
749 if self
._last
_aes
_key
[0] == salt
:
750 key
, iv
= self
._last
_aes
_key
[1:]
752 key
, iv
= rar3_s2k(self
._password
, salt
)
753 self
._last
_aes
_key
= (salt
, key
, iv
)
754 return HeaderDecrypt(fd
, key
, iv
)
757 def _parse_header(self
, fd
):
759 # handle encrypted headers
760 if self
._main
and self
._main
.flags
& RAR_MAIN_PASSWORD
:
761 if not self
._password
:
763 fd
= self
._decrypt
_header
(fd
)
765 # now read actual header
766 return self
._parse
_block
_header
(fd
)
768 if REPORT_BAD_HEADER
:
769 raise BadRarFile('Broken header in RAR file')
773 def _parse_block_header(self
, fd
):
775 h
.header_offset
= fd
.tell()
778 # read and parse base header
779 buf
= fd
.read(S_BLK_HDR
.size
)
782 t
= S_BLK_HDR
.unpack_from(buf
)
783 h
.header_crc
, h
.type, h
.flags
, h
.header_size
= t
784 h
.header_base
= S_BLK_HDR
.size
788 if h
.header_size
> S_BLK_HDR
.size
:
789 h
.header_data
= buf
+ fd
.read(h
.header_size
- S_BLK_HDR
.size
)
792 h
.file_offset
= fd
.tell()
795 if len(h
.header_data
) != h
.header_size
:
796 if REPORT_BAD_HEADER
:
797 raise BadRarFile('Unexpected EOF when reading header')
800 # block has data assiciated with it?
801 if h
.flags
& RAR_LONG_BLOCK
:
802 h
.add_size
= S_LONG
.unpack_from(h
.header_data
, pos
)[0]
806 # parse interesting ones, decide header boundaries for crc
807 if h
.type == RAR_BLOCK_MARK
:
809 elif h
.type == RAR_BLOCK_MAIN
:
811 if h
.flags
& RAR_MAIN_ENCRYPTVER
:
813 if h
.flags
& RAR_MAIN_COMMENT
:
814 self
._parse
_subblocks
(h
, h
.header_base
)
815 self
.comment
= h
.comment
816 elif h
.type == RAR_BLOCK_FILE
:
817 self
._parse
_file
_header
(h
, pos
)
818 elif h
.type == RAR_BLOCK_SUB
:
819 self
._parse
_file
_header
(h
, pos
)
820 h
.header_base
= h
.header_size
821 elif h
.type == RAR_BLOCK_OLD_AUTH
:
823 elif h
.type == RAR_BLOCK_OLD_EXTRA
:
826 h
.header_base
= h
.header_size
829 if h
.type == RAR_BLOCK_OLD_SUB
:
830 crcdat
= h
.header_data
[2:] + fd
.read(h
.add_size
)
832 crcdat
= h
.header_data
[2:h
.header_base
]
834 calc_crc
= crc32(crcdat
) & 0xFFFF
837 if h
.header_crc
== calc_crc
:
841 if REPORT_BAD_HEADER
:
843 crcdat
= h
.header_data
[2:]
844 msg
= 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h
.type, h
.header_crc
, calc_crc
, xlen
)
846 while xlen
>= S_BLK_HDR
.size
- 2:
847 crc
= crc32(crcdat
[:xlen
]) & 0xFFFF
848 if crc
== h
.header_crc
:
849 msg
+= ' / crc match, xlen = %d' % xlen
851 raise BadRarFile(msg
)
853 # instead panicing, send eof
856 # read file-specific header
857 def _parse_file_header(self
, h
, pos
):
858 fld
= S_FILE_HDR
.unpack_from(h
.header_data
, pos
)
859 h
.compress_size
= fld
[0]
863 h
.date_time
= parse_dos_time(fld
[4])
864 h
.extract_version
= fld
[5]
865 h
.compress_type
= fld
[6]
868 pos
+= S_FILE_HDR
.size
870 if h
.flags
& RAR_FILE_LARGE
:
871 h1
= S_LONG
.unpack_from(h
.header_data
, pos
)[0]
872 h2
= S_LONG
.unpack_from(h
.header_data
, pos
+ 4)[0]
873 h
.compress_size |
= h1
<< 32
874 h
.file_size |
= h2
<< 32
876 h
.add_size
= h
.compress_size
878 name
= h
.header_data
[pos
: pos
+ h
.name_size
]
880 if h
.flags
& RAR_FILE_UNICODE
:
881 nul
= name
.find(ZERO
)
882 h
.orig_filename
= name
[:nul
]
883 u
= UnicodeFilename(h
.orig_filename
, name
[nul
+ 1 : ])
884 h
.filename
= u
.decode()
886 # if parsing failed fall back to simple name
888 h
.filename
= self
._decode
(h
.orig_filename
)
890 h
.orig_filename
= name
891 h
.filename
= self
._decode
(name
)
893 # change separator, if requested
895 h
.filename
= h
.filename
.replace('\\', PATH_SEP
)
897 if h
.flags
& RAR_FILE_SALT
:
898 h
.salt
= h
.header_data
[pos
: pos
+ 8]
903 # optional extended time stamps
904 if h
.flags
& RAR_FILE_EXTTIME
:
905 pos
= self
._parse
_ext
_time
(h
, pos
)
907 h
.mtime
= h
.atime
= h
.ctime
= h
.arctime
= None
912 if h
.flags
& RAR_FILE_COMMENT
:
913 self
._parse
_subblocks
(h
, pos
)
917 h
.date_time
= to_datetime(h
.date_time
)
918 h
.mtime
= to_datetime(h
.mtime
)
919 h
.atime
= to_datetime(h
.atime
)
920 h
.ctime
= to_datetime(h
.ctime
)
921 h
.arctime
= to_datetime(h
.arctime
)
923 # .mtime is .date_time with more precision
926 h
.date_time
= h
.mtime
929 h
.date_time
= h
.mtime
[:5] + (int(h
.mtime
[5]),)
933 # find old-style comment subblock
934 def _parse_subblocks(self
, h
, pos
):
935 hdata
= h
.header_data
936 while pos
< len(hdata
):
937 # ordinary block header
938 t
= S_BLK_HDR
.unpack_from(hdata
, pos
)
939 scrc
, stype
, sflags
, slen
= t
940 pos_next
= pos
+ slen
941 pos
+= S_BLK_HDR
.size
947 # followed by block-specific header
948 if stype
== RAR_BLOCK_OLD_COMMENT
and pos
+ S_COMMENT_HDR
.size
<= pos_next
:
949 declen
, ver
, meth
, crc
= S_COMMENT_HDR
.unpack_from(hdata
, pos
)
950 pos
+= S_COMMENT_HDR
.size
951 data
= hdata
[pos
: pos_next
]
952 cmt
= rar_decompress(ver
, meth
, data
, declen
, sflags
,
954 if not self
._crc
_check
:
955 h
.comment
= self
._decode
_comment
(cmt
)
956 elif crc32(cmt
) & 0xFFFF == crc
:
957 h
.comment
= self
._decode
_comment
(cmt
)
961 def _parse_ext_time(self
, h
, pos
):
964 # flags and rest of data can be missing
966 if pos
+ 2 <= len(data
):
967 flags
= S_SHORT
.unpack_from(data
, pos
)[0]
970 h
.mtime
, pos
= self
._parse
_xtime
(flags
>> 3*4, data
, pos
, h
.date_time
)
971 h
.ctime
, pos
= self
._parse
_xtime
(flags
>> 2*4, data
, pos
)
972 h
.atime
, pos
= self
._parse
_xtime
(flags
>> 1*4, data
, pos
)
973 h
.arctime
, pos
= self
._parse
_xtime
(flags
>> 0*4, data
, pos
)
976 def _parse_xtime(self
, flag
, data
, pos
, dostime
= None):
977 unit
= 10000000.0 # 100 ns units
980 t
= S_LONG
.unpack_from(data
, pos
)[0]
981 dostime
= parse_dos_time(t
)
986 b
= S_BYTE
.unpack_from(data
, pos
)[0]
987 rem
= (b
<< 16) |
(rem
>> 8)
989 sec
= dostime
[5] + rem
/ unit
992 dostime
= dostime
[:5] + (sec
,)
995 # given current vol name, construct next one
996 def _next_volname(self
, volfile
):
997 if self
._main
.flags
& RAR_MAIN_NEWNUMBERING
:
998 return self
._next
_newvol
(volfile
)
999 return self
._next
_oldvol
(volfile
)
1001 # new-style next volume
1002 def _next_newvol(self
, volfile
):
1003 i
= len(volfile
) - 1
1005 if volfile
[i
] >= '0' and volfile
[i
] <= '9':
1006 return self
._inc
_volname
(volfile
, i
)
1008 raise BadRarName("Cannot construct volume name: "+volfile
)
1010 # old-style next volume
1011 def _next_oldvol(self
, volfile
):
1013 if volfile
[-4:].lower() == '.rar':
1014 return volfile
[:-2] + '00'
1015 return self
._inc
_volname
(volfile
, len(volfile
) - 1)
1017 # increase digits with carry, otherwise just increment char
1018 def _inc_volname(self
, volfile
, i
):
1022 fn
[i
] = chr(ord(fn
[i
]) + 1)
1028 def _open_clear(self
, inf
):
1029 return DirectReader(self
, inf
)
1031 # put file compressed data into temporary .rar archive, and run
1032 # unrar on that, thus avoiding unrar going over whole archive
1033 def _open_hack(self
, inf
, psw
= None):
1036 size
= inf
.compress_size
+ inf
.header_size
1037 rf
= open(inf
.volume_file
, "rb", 0)
1038 rf
.seek(inf
.header_offset
)
1040 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
1041 tmpf
= os
.fdopen(tmpfd
, "wb")
1044 # create main header: crc, type, flags, size, res1, res2
1045 mh
= S_BLK_HDR
.pack(0x90CF, 0x73, 0, 13) + ZERO
* (2+4)
1046 tmpf
.write(RAR_ID
+ mh
)
1049 buf
= rf
.read(BSIZE
)
1053 raise BadRarFile('read failed: ' + inf
.filename
)
1064 return self
._open
_unrar
(tmpname
, inf
, psw
, tmpname
)
1066 def _read_comment_v3(self
, inf
, psw
=None):
1069 rf
= open(inf
.volume_file
, "rb")
1070 rf
.seek(inf
.file_offset
)
1071 data
= rf
.read(inf
.compress_size
)
1075 cmt
= rar_decompress(inf
.extract_version
, inf
.compress_type
, data
,
1076 inf
.file_size
, inf
.flags
, inf
.CRC
, psw
, inf
.salt
)
1082 crc
+= (long(1) << 32)
1086 return self
._decode
_comment
(cmt
)
1088 # extract using unrar
1089 def _open_unrar(self
, rarfile
, inf
, psw
= None, tmpfile
= None):
1090 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
1092 cmd
.append("-p" + psw
)
1095 # not giving filename avoids encoding related problems
1098 if PATH_SEP
!= os
.sep
:
1099 fn
= fn
.replace(PATH_SEP
, os
.sep
)
1102 # read from unrar pipe
1103 return PipeReader(self
, inf
, cmd
, tmpfile
)
1105 def _decode(self
, val
):
1106 for c
in TRY_ENCODINGS
:
1108 return val
.decode(c
)
1109 except UnicodeError:
1111 return val
.decode(self
._charset
, 'replace')
1113 def _decode_comment(self
, val
):
1114 if UNICODE_COMMENTS
:
1115 return self
._decode
(val
)
1118 # call unrar to extract a file
1119 def _extract(self
, fnlist
, path
=None, psw
=None):
1120 cmd
= [UNRAR_TOOL
] + list(EXTRACT_ARGS
)
1123 psw
= psw
or self
._password
1125 cmd
.append('-p' + psw
)
1130 cmd
.append(self
.rarfile
)
1134 if os
.sep
!= PATH_SEP
:
1135 fn
= fn
.replace(PATH_SEP
, os
.sep
)
1139 if path
is not None:
1140 cmd
.append(path
+ os
.sep
)
1143 p
= custom_popen(cmd
)
1144 output
= p
.communicate()[0]
1145 check_returncode(p
, output
)
1151 class UnicodeFilename
:
1152 """Handle unicode filename decompression"""
1154 def __init__(self
, name
, encdata
):
1155 self
.std_name
= bytearray(name
)
1156 self
.encdata
= bytearray(encdata
)
1157 self
.pos
= self
.encpos
= 0
1158 self
.buf
= bytearray()
1163 c
= self
.encdata
[self
.encpos
]
1172 return self
.std_name
[self
.pos
]
1177 def put(self
, lo
, hi
):
1183 hi
= self
.enc_byte()
1185 while self
.encpos
< len(self
.encdata
):
1187 flags
= self
.enc_byte()
1190 t
= (flags
>> flagbits
) & 3
1192 self
.put(self
.enc_byte(), 0)
1194 self
.put(self
.enc_byte(), hi
)
1196 self
.put(self
.enc_byte(), self
.enc_byte())
1201 for i
in range((n
& 0x7f) + 2):
1202 lo
= (self
.std_byte() + c
) & 0xFF
1205 for i
in range(n
+ 2):
1206 self
.put(self
.std_byte(), 0)
1207 return self
.buf
.decode("utf-16le", "replace")
1210 class RarExtFile(RawIOBase
):
1211 """Base class for 'file-like' object that RarFile.open() returns.
1213 Provides public methods and common crc checking.
1216 - no short reads - .read() and .readinfo() read as much as requested.
1217 - no internal buffer, use io.BufferedReader for that.
1220 filename of the archive entry.
1223 def __init__(self
, rf
, inf
):
1224 """Fill common fields"""
1226 RawIOBase
.__init
__(self
)
1228 # standard io.* properties
1229 self
.name
= inf
.filename
1234 self
.crc_check
= rf
._crc
_check
1247 self
.remain
= self
.inf
.file_size
1249 def read(self
, cnt
= None):
1250 """Read all or specified amount of data from archive entry."""
1253 if cnt
is None or cnt
< 0:
1255 elif cnt
> self
.remain
:
1261 data
= self
._read
(cnt
)
1263 self
.CRC
= crc32(data
, self
.CRC
)
1264 self
.remain
-= len(data
)
1265 if len(data
) != cnt
:
1266 raise BadRarFile("Failed the read enough data")
1269 if not data
or self
.remain
== 0:
1275 """Check final CRC."""
1276 if not self
.crc_check
:
1279 check_returncode(self
, '')
1280 if self
.remain
!= 0:
1281 raise BadRarFile("Failed the read enough data")
1284 crc
+= (long(1) << 32)
1285 if crc
!= self
.inf
.CRC
:
1286 raise BadRarFile("Corrupt file - CRC check failed: " + self
.inf
.filename
)
1288 def _read(self
, cnt
):
1289 """Actual read that gets sanitized cnt."""
1292 """Close open resources."""
1294 RawIOBase
.close(self
)
1301 """Hook delete to make sure tempfile is removed."""
1304 def readinto(self
, buf
):
1305 """Zero-copy read directly into buffer.
1310 data
= self
.read(len(buf
))
1316 if not isinstance(buf
, array
.array
):
1318 buf
[:n
] = array
.array(buf
.typecode
, data
)
1322 """Return current reading position in uncompressed data."""
1323 return self
.inf
.file_size
- self
.remain
1325 def seek(self
, ofs
, whence
= 0):
1328 # disable crc check when seeking
1331 fsize
= self
.inf
.file_size
1332 cur_ofs
= self
.tell()
1334 if whence
== 0: # seek from beginning of file
1336 elif whence
== 1: # seek from current position
1337 new_ofs
= cur_ofs
+ ofs
1338 elif whence
== 2: # seek from end of file
1339 new_ofs
= fsize
+ ofs
1341 raise ValueError('Invalid value for whence')
1346 elif new_ofs
> fsize
:
1349 # do the actual seek
1350 if new_ofs
>= cur_ofs
:
1351 self
._skip
(new_ofs
- cur_ofs
)
1353 # process old data ?
1354 #self._skip(fsize - cur_ofs)
1360 def _skip(self
, cnt
):
1361 """Read and discard data"""
1364 buf
= self
.read(8192)
1366 buf
= self
.read(cnt
)
1380 """Read all remaining data"""
1381 # avoid RawIOBase default impl
1385 class PipeReader(RarExtFile
):
1386 """Read data from pipe, handle tempfile cleanup."""
1388 def __init__(self
, rf
, inf
, cmd
, tempfile
=None):
1391 self
.tempfile
= tempfile
1392 RarExtFile
.__init
__(self
, rf
, inf
)
1394 def _close_proc(self
):
1397 if self
.proc
.stdout
:
1398 self
.proc
.stdout
.close()
1400 self
.proc
.stdin
.close()
1401 if self
.proc
.stderr
:
1402 self
.proc
.stderr
.close()
1404 self
.returncode
= self
.proc
.returncode
1408 RarExtFile
._open
(self
)
1413 # launch new process
1415 self
.proc
= custom_popen(self
.cmd
)
1416 self
.fd
= self
.proc
.stdout
1418 # avoid situation where unrar waits on stdin
1420 self
.proc
.stdin
.close()
1422 def _read(self
, cnt
):
1423 """Read from pipe."""
1425 # normal read is usually enough
1426 data
= self
.fd
.read(cnt
)
1427 if len(data
) == cnt
or not data
:
1430 # short read, try looping
1434 data
= self
.fd
.read(cnt
)
1439 return EMPTY
.join(buf
)
1442 """Close open resources."""
1445 RarExtFile
.close(self
)
1449 os
.unlink(self
.tempfile
)
1452 self
.tempfile
= None
1455 def readinto(self
, buf
):
1456 """Zero-copy read directly into buffer."""
1458 if cnt
> self
.remain
:
1460 vbuf
= memoryview(buf
)
1463 res
= self
.fd
.readinto(vbuf
[got
: cnt
])
1467 self
.CRC
= crc32(vbuf
[got
: got
+ res
], self
.CRC
)
1473 class DirectReader(RarExtFile
):
1474 """Read uncompressed data directly from archive."""
1477 RarExtFile
._open
(self
)
1479 self
.volfile
= self
.inf
.volume_file
1480 self
.fd
= open(self
.volfile
, "rb", 0)
1481 self
.fd
.seek(self
.inf
.header_offset
, 0)
1482 self
.cur
= self
.rf
._parse
_header
(self
.fd
)
1483 self
.cur_avail
= self
.cur
.add_size
1485 def _skip(self
, cnt
):
1486 """RAR Seek, skipping through rar files to get to correct position
1491 if self
.cur_avail
== 0:
1492 if not self
._open
_next
():
1495 # fd is in read pos, do the read
1496 if cnt
> self
.cur_avail
:
1497 cnt
-= self
.cur_avail
1498 self
.remain
-= self
.cur_avail
1501 self
.fd
.seek(cnt
, 1)
1502 self
.cur_avail
-= cnt
1506 def _read(self
, cnt
):
1507 """Read from potentially multi-volume archive."""
1512 if self
.cur_avail
== 0:
1513 if not self
._open
_next
():
1516 # fd is in read pos, do the read
1517 if cnt
> self
.cur_avail
:
1518 data
= self
.fd
.read(self
.cur_avail
)
1520 data
= self
.fd
.read(cnt
)
1526 self
.cur_avail
-= len(data
)
1531 return EMPTY
.join(buf
)
1533 def _open_next(self
):
1534 """Proceed to next volume."""
1536 # is the file split over archives?
1537 if (self
.cur
.flags
& RAR_FILE_SPLIT_AFTER
) == 0:
1545 self
.volfile
= self
.rf
._next
_volname
(self
.volfile
)
1546 fd
= open(self
.volfile
, "rb", 0)
1549 # loop until first file header
1551 cur
= self
.rf
._parse
_header
(fd
)
1553 raise BadRarFile("Unexpected EOF")
1554 if cur
.type in (RAR_BLOCK_MARK
, RAR_BLOCK_MAIN
):
1556 fd
.seek(cur
.add_size
, 1)
1558 if cur
.orig_filename
!= self
.inf
.orig_filename
:
1559 raise BadRarFile("Did not found file entry")
1561 self
.cur_avail
= cur
.add_size
1565 def readinto(self
, buf
):
1566 """Zero-copy read directly into buffer."""
1568 vbuf
= memoryview(buf
)
1569 while got
< len(buf
):
1571 if self
.cur_avail
== 0:
1572 if not self
._open
_next
():
1575 # lenght for next read
1576 cnt
= len(buf
) - got
1577 if cnt
> self
.cur_avail
:
1578 cnt
= self
.cur_avail
1580 # read into temp view
1581 res
= self
.fd
.readinto(vbuf
[got
: got
+ cnt
])
1585 self
.CRC
= crc32(vbuf
[got
: got
+ res
], self
.CRC
)
1586 self
.cur_avail
-= res
1592 class HeaderDecrypt
:
1593 """File-like object that decrypts from another file"""
1594 def __init__(self
, f
, key
, iv
):
1596 self
.ciph
= AES
.new(key
, AES
.MODE_CBC
, iv
)
1600 return self
.f
.tell()
1602 def read(self
, cnt
=None):
1604 raise BadRarFile('Bad count to header decrypt - wrong password?')
1607 if cnt
<= len(self
.buf
):
1608 res
= self
.buf
[:cnt
]
1609 self
.buf
= self
.buf
[cnt
:]
1616 BLK
= self
.ciph
.block_size
1618 enc
= self
.f
.read(BLK
)
1621 dec
= self
.ciph
.decrypt(enc
)
1627 self
.buf
= dec
[cnt
:]
1633 ## Utility functions
1636 def rar3_s2k(psw
, salt
):
1637 """String-to-key hash for RAR3."""
1639 seed
= psw
.encode('utf-16le') + salt
1643 for j
in range(0x4000):
1644 cnt
= S_LONG
.pack(i
*0x4000 + j
)
1645 h
.update(seed
+ cnt
[:3])
1647 iv
+= h
.digest()[19:20]
1648 key_be
= h
.digest()[:16]
1649 key_le
= pack("<LLLL", *unpack(">LLLL", key_be
))
1652 def rar_decompress(vers
, meth
, data
, declen
=0, flags
=0, crc
=0, psw
=None, salt
=None):
1653 """Decompress blob of compressed data.
1655 Used for data with non-standard header - eg. comments.
1658 # already uncompressed?
1659 if meth
== RAR_M0
and (flags
& RAR_FILE_PASSWORD
) == 0:
1662 # take only necessary flags
1663 flags
= flags
& (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK
)
1664 flags |
= RAR_LONG_BLOCK
1667 fname
= bytes('data', 'ascii')
1670 fhdr
= S_FILE_HDR
.pack(len(data
), declen
, RAR_OS_MSDOS
, crc
,
1671 date
, vers
, meth
, len(fname
), mode
)
1673 if flags
& RAR_FILE_SALT
:
1679 hlen
= S_BLK_HDR
.size
+ len(fhdr
)
1680 hdr
= S_BLK_HDR
.pack(0, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
1681 hcrc
= crc32(hdr
[2:]) & 0xFFFF
1682 hdr
= S_BLK_HDR
.pack(hcrc
, RAR_BLOCK_FILE
, flags
, hlen
) + fhdr
1684 # archive main header
1685 mh
= S_BLK_HDR
.pack(0x90CF, RAR_BLOCK_MAIN
, 0, 13) + ZERO
* (2+4)
1687 # decompress via temp rar
1688 tmpfd
, tmpname
= mkstemp(suffix
='.rar')
1689 tmpf
= os
.fdopen(tmpfd
, "wb")
1691 tmpf
.write(RAR_ID
+ mh
+ hdr
+ data
)
1694 cmd
= [UNRAR_TOOL
] + list(OPEN_ARGS
)
1695 if psw
is not None and (flags
& RAR_FILE_PASSWORD
):
1696 cmd
.append("-p" + psw
)
1701 p
= custom_popen(cmd
)
1702 return p
.communicate()[0]
1708 """Convert 6-part time tuple into datetime object."""
1714 year
, mon
, day
, h
, m
, xs
= t
1716 us
= int(1000000 * (xs
- s
))
1718 # assume the values are valid
1720 return datetime(year
, mon
, day
, h
, m
, s
, us
)
1724 # sanitize invalid values
1725 MDAY
= (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1727 if mon
> 12: mon
= 12
1729 if day
> MDAY
[mon
]: day
= MDAY
[mon
]
1733 if mon
== 2 and day
== 29:
1735 return datetime(year
, mon
, day
, h
, m
, s
, us
)
1738 return datetime(year
, mon
, day
, h
, m
, s
, us
)
1740 def parse_dos_time(stamp
):
1741 """Parse standard 32-bit DOS timestamp."""
1743 sec
= stamp
& 0x1F; stamp
= stamp
>> 5
1744 min = stamp
& 0x3F; stamp
= stamp
>> 6
1745 hr
= stamp
& 0x1F; stamp
= stamp
>> 5
1746 day
= stamp
& 0x1F; stamp
= stamp
>> 5
1747 mon
= stamp
& 0x0F; stamp
= stamp
>> 4
1748 yr
= (stamp
& 0x7F) + 1980
1749 return (yr
, mon
, day
, hr
, min, sec
* 2)
1751 def custom_popen(cmd
):
1752 """Disconnect cmd from parent fds, read only from stdout."""
1756 if sys
.platform
== 'win32':
1757 creationflags
= 0x08000000 # CREATE_NO_WINDOW
1760 p
= Popen(cmd
, bufsize
= 0, stdout
= PIPE
, stdin
= PIPE
, stderr
= STDOUT
,
1761 creationflags
= creationflags
)
1764 def check_returncode(p
, out
):
1765 """Raise exception according to unrar exit code"""
1771 # map return code to exception class
1773 RarWarning
, RarFatalError
, RarCRCError
, RarLockedArchiveError
,
1774 RarWriteError
, RarOpenError
, RarUserError
, RarMemoryError
,
1775 RarCreateError
] # codes from rar.txt
1776 if code
> 0 and code
< len(errmap
):
1783 exc
= RarUnknownError
1787 msg
= "%s [%d]: %s" % (exc
.__doc
__, p
.returncode
, out
)
1789 msg
= "%s [%d]" % (exc
.__doc
__, p
.returncode
)