Merge pull request #4 from hanwentao/master
[rarfile.git] / rarfile.py
blob70503f9585b0fe1d61f4809dd8632990144274ad
1 # rarfile.py
3 # Copyright (c) 2005-2012 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as zipfile like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print rf.read(f)
39 There are few module-level parameters to tune behaviour,
40 here they are with defaults, and reason to change it::
42 import rarfile
44 # Set to full path of unrar.exe if it is not in PATH
45 rarfile.UNRAR_TOOL = "unrar"
47 # Set to 0 if you don't look at comments and want to
48 # avoid wasting time for parsing them
49 rarfile.NEED_COMMENTS = 1
51 # Set up to 1 if you don't want to deal with decoding comments
52 # from unknown encoding. rarfile will try couple of common
53 # encodings in sequence.
54 rarfile.UNICODE_COMMENTS = 0
56 # Set to 1 if you prefer timestamps to be datetime objects
57 # instead tuples
58 rarfile.USE_DATETIME = 0
60 # Set to '/' to be more compatible with zipfile
61 rarfile.PATH_SEP = '\\'
63 For more details, refer to source.
65 """
67 __version__ = '2.5'
69 # export only interesting items
70 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
73 ## Imports and compat - support both Python 2.x and 3.x
76 import sys, os, struct
77 from struct import pack, unpack
78 from binascii import crc32
79 from tempfile import mkstemp
80 from subprocess import Popen, PIPE, STDOUT
81 from datetime import datetime
83 # only needed for encryped headers
84 try:
85 from Crypto.Cipher import AES
86 try:
87 from hashlib import sha1
88 except ImportError:
89 from sha import new as sha1
90 _have_crypto = 1
91 except ImportError:
92 _have_crypto = 0
94 # compat with 2.x
95 if sys.hexversion < 0x3000000:
96 # prefer 3.x behaviour
97 range = xrange
98 # py2.6 has broken bytes()
99 def bytes(s, enc):
100 return str(s)
102 # see if compat bytearray() is needed
103 try:
104 bytearray
105 except NameError:
106 import array
107 class bytearray:
108 def __init__(self, val = ''):
109 self.arr = array.array('B', val)
110 self.append = self.arr.append
111 self.__getitem__ = self.arr.__getitem__
112 self.__len__ = self.arr.__len__
113 def decode(self, *args):
114 return self.arr.tostring().decode(*args)
116 # Optimized .readinto() requires memoryview
117 try:
118 memoryview
119 have_memoryview = 1
120 except NameError:
121 have_memoryview = 0
123 # Struct() for older python
124 try:
125 from struct import Struct
126 except ImportError:
127 class Struct:
128 def __init__(self, fmt):
129 self.format = fmt
130 self.size = struct.calcsize(fmt)
131 def unpack(self, buf):
132 return unpack(self.format, buf)
133 def unpack_from(self, buf, ofs = 0):
134 return unpack(self.format, buf[ofs : ofs + self.size])
135 def pack(self, *args):
136 return pack(self.format, *args)
138 # file object superclass
139 try:
140 from io import RawIOBase
141 except ImportError:
142 class RawIOBase(object):
143 def close(self):
144 pass
148 ## Module configuration. Can be tuned after importing.
151 # default fallback charset
152 DEFAULT_CHARSET = "windows-1252"
154 # list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
155 TRY_ENCODINGS = ('utf8', 'utf-16le')
157 # 'unrar', 'rar' or full path to either one
158 UNRAR_TOOL = "unrar"
160 # Command line args to use for opening file for reading.
161 OPEN_ARGS = ('p', '-inul')
163 # Command line args to use for extracting file to disk.
164 EXTRACT_ARGS = ('x', '-y', '-idq')
166 # args for testrar()
167 TEST_ARGS = ('t', '-idq')
169 # whether to speed up decompression by using tmp archive
170 USE_EXTRACT_HACK = 1
172 # limit the filesize for tmp archive usage
173 HACK_SIZE_LIMIT = 20*1024*1024
175 # whether to parse file/archive comments.
176 NEED_COMMENTS = 1
178 # whether to convert comments to unicode strings
179 UNICODE_COMMENTS = 0
181 # When RAR is corrupt, stopping on bad header is better
182 # On unknown/misparsed RAR headers reporting is better
183 REPORT_BAD_HEADER = 0
185 # Convert RAR time tuple into datetime() object
186 USE_DATETIME = 0
188 # Separator for path name components. RAR internally uses '\\'.
189 # Use '/' to be similar with zipfile.
190 PATH_SEP = '\\'
193 ## rar constants
196 # block types
197 RAR_BLOCK_MARK = 0x72 # r
198 RAR_BLOCK_MAIN = 0x73 # s
199 RAR_BLOCK_FILE = 0x74 # t
200 RAR_BLOCK_OLD_COMMENT = 0x75 # u
201 RAR_BLOCK_OLD_EXTRA = 0x76 # v
202 RAR_BLOCK_OLD_SUB = 0x77 # w
203 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
204 RAR_BLOCK_OLD_AUTH = 0x79 # y
205 RAR_BLOCK_SUB = 0x7a # z
206 RAR_BLOCK_ENDARC = 0x7b # {
208 # flags for RAR_BLOCK_MAIN
209 RAR_MAIN_VOLUME = 0x0001
210 RAR_MAIN_COMMENT = 0x0002
211 RAR_MAIN_LOCK = 0x0004
212 RAR_MAIN_SOLID = 0x0008
213 RAR_MAIN_NEWNUMBERING = 0x0010
214 RAR_MAIN_AUTH = 0x0020
215 RAR_MAIN_RECOVERY = 0x0040
216 RAR_MAIN_PASSWORD = 0x0080
217 RAR_MAIN_FIRSTVOLUME = 0x0100
218 RAR_MAIN_ENCRYPTVER = 0x0200
220 # flags for RAR_BLOCK_FILE
221 RAR_FILE_SPLIT_BEFORE = 0x0001
222 RAR_FILE_SPLIT_AFTER = 0x0002
223 RAR_FILE_PASSWORD = 0x0004
224 RAR_FILE_COMMENT = 0x0008
225 RAR_FILE_SOLID = 0x0010
226 RAR_FILE_DICTMASK = 0x00e0
227 RAR_FILE_DICT64 = 0x0000
228 RAR_FILE_DICT128 = 0x0020
229 RAR_FILE_DICT256 = 0x0040
230 RAR_FILE_DICT512 = 0x0060
231 RAR_FILE_DICT1024 = 0x0080
232 RAR_FILE_DICT2048 = 0x00a0
233 RAR_FILE_DICT4096 = 0x00c0
234 RAR_FILE_DIRECTORY = 0x00e0
235 RAR_FILE_LARGE = 0x0100
236 RAR_FILE_UNICODE = 0x0200
237 RAR_FILE_SALT = 0x0400
238 RAR_FILE_VERSION = 0x0800
239 RAR_FILE_EXTTIME = 0x1000
240 RAR_FILE_EXTFLAGS = 0x2000
242 # flags for RAR_BLOCK_ENDARC
243 RAR_ENDARC_NEXT_VOLUME = 0x0001
244 RAR_ENDARC_DATACRC = 0x0002
245 RAR_ENDARC_REVSPACE = 0x0004
246 RAR_ENDARC_VOLNR = 0x0008
248 # flags common to all blocks
249 RAR_SKIP_IF_UNKNOWN = 0x4000
250 RAR_LONG_BLOCK = 0x8000
252 # Host OS types
253 RAR_OS_MSDOS = 0
254 RAR_OS_OS2 = 1
255 RAR_OS_WIN32 = 2
256 RAR_OS_UNIX = 3
257 RAR_OS_MACOS = 4
258 RAR_OS_BEOS = 5
260 # Compression methods - '0'..'5'
261 RAR_M0 = 0x30
262 RAR_M1 = 0x31
263 RAR_M2 = 0x32
264 RAR_M3 = 0x33
265 RAR_M4 = 0x34
266 RAR_M5 = 0x35
269 ## internal constants
272 RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii')
273 ZERO = bytes("\0", 'ascii')
274 EMPTY = bytes("", 'ascii')
276 S_BLK_HDR = Struct('<HBHH')
277 S_FILE_HDR = Struct('<LLBLLBBHL')
278 S_LONG = Struct('<L')
279 S_SHORT = Struct('<H')
280 S_BYTE = Struct('<B')
281 S_COMMENT_HDR = Struct('<HBBH')
284 ## Public interface
287 class Error(Exception):
288 """Base class for rarfile errors."""
289 class BadRarFile(Error):
290 """Incorrect data in archive."""
291 class NotRarFile(Error):
292 """The file is not RAR archive."""
293 class BadRarName(Error):
294 """Cannot guess multipart name components."""
295 class NoRarEntry(Error):
296 """File not found in RAR"""
297 class PasswordRequired(Error):
298 """File requires password"""
299 class NeedFirstVolume(Error):
300 """Need to start from first volume."""
301 class NoCrypto(Error):
302 """Cannot parse encrypted headers - no crypto available."""
303 class RarExecError(Error):
304 """Problem reported by unrar/rar."""
305 class RarWarning(RarExecError):
306 """Non-fatal error"""
307 class RarFatalError(RarExecError):
308 """Fatal error"""
309 class RarCRCError(RarExecError):
310 """CRC error during unpacking"""
311 class RarLockedArchiveError(RarExecError):
312 """Must not modify locked archive"""
313 class RarWriteError(RarExecError):
314 """Write error"""
315 class RarOpenError(RarExecError):
316 """Open error"""
317 class RarUserError(RarExecError):
318 """User error"""
319 class RarMemoryError(RarExecError):
320 """Memory error"""
321 class RarCreateError(RarExecError):
322 """Create error"""
323 class RarUserBreak(RarExecError):
324 """User stop"""
325 class RarUnknownError(RarExecError):
326 """Unknown exit code"""
327 class RarSignalExit(RarExecError):
328 """Unrar exited with signal"""
331 def is_rarfile(fn):
332 '''Check quickly whether file is rar archive.'''
333 buf = open(fn, "rb").read(len(RAR_ID))
334 return buf == RAR_ID
337 class RarInfo(object):
338 '''An entry in rar archive.
340 @ivar filename:
341 File name with relative path.
342 Default path separator is '/', to change set rarfile.PATH_SEP.
343 Always unicode string.
344 @ivar date_time:
345 Modification time, tuple of (year, month, day, hour, minute, second).
346 Or datetime() object if USE_DATETIME is set.
347 @ivar file_size:
348 Uncompressed size.
349 @ivar compress_size:
350 Compressed size.
351 @ivar compress_type:
352 Compression method: 0x30 - 0x35.
353 @ivar extract_version:
354 Minimal Rar version needed for decompressing.
355 @ivar host_os:
356 Host OS type, one of RAR_OS_* constants.
357 @ivar mode:
358 File attributes. May be either dos-style or unix-style, depending on host_os.
359 @ivar CRC:
360 CRC-32 of uncompressed file, unsigned int.
361 @ivar volume:
362 Volume nr, starting from 0.
363 @ivar volume_file:
364 Volume file name, where file starts.
365 @ivar type:
366 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
367 @ivar flags:
368 For files, RAR_FILE_* bits.
369 @ivar comment:
370 File comment (unicode string or None).
372 @ivar mtime:
373 Optional time field: Modification time, with float seconds.
374 Same as .date_time but with more precision.
375 @ivar ctime:
376 Optional time field: creation time, with float seconds.
377 @ivar atime:
378 Optional time field: last access time, with float seconds.
379 @ivar arctime:
380 Optional time field: archival time, with float seconds.
383 __slots__ = (
384 # zipfile-compatible fields
385 'filename',
386 'file_size',
387 'compress_size',
388 'date_time',
389 'comment',
390 'CRC',
391 'volume',
392 'orig_filename', # bytes in unknown encoding
394 # rar-specific fields
395 'extract_version',
396 'compress_type',
397 'host_os',
398 'mode',
399 'type',
400 'flags',
402 # optional extended time fields
403 # tuple where the sec is float, or datetime().
404 'mtime', # same as .date_time
405 'ctime',
406 'atime',
407 'arctime',
409 # RAR internals
410 'name_size',
411 'header_size',
412 'header_crc',
413 'file_offset',
414 'add_size',
415 'header_data',
416 'header_base',
417 'header_offset',
418 'salt',
419 'volume_file',
422 def isdir(self):
423 '''Returns True if the entry is a directory.'''
424 if self.type == RAR_BLOCK_FILE:
425 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
426 return False
428 def needs_password(self):
429 return self.flags & RAR_FILE_PASSWORD
432 class RarFile(object):
433 '''Parse RAR structure, provide access to files in archive.
435 @ivar comment:
436 Archive comment (unicode string or None).
439 def __init__(self, rarfile, mode="r", charset=None, info_callback=None, crc_check = True):
440 """Open and parse a RAR archive.
442 @param rarfile: archive file name
443 @param mode: only 'r' is supported.
444 @param charset: fallback charset to use, if filenames are not already Unicode-enabled.
445 @param info_callback: debug callback, gets to see all archive entries.
446 @param crc_check: set to False to disable CRC checks
448 self.rarfile = rarfile
449 self.comment = None
450 self._charset = charset or DEFAULT_CHARSET
451 self._info_callback = info_callback
453 self._info_list = []
454 self._info_map = {}
455 self._needs_password = False
456 self._password = None
457 self._crc_check = crc_check
459 self._main = None
461 if mode != "r":
462 raise NotImplementedError("RarFile supports only mode=r")
464 self._parse()
466 def __enter__(self):
467 return self
469 def __exit__(self, type, value, traceback):
470 self.close()
472 def setpassword(self, password):
473 '''Sets the password to use when extracting.'''
474 self._password = password
475 if not self._main:
476 self._parse()
478 def needs_password(self):
479 '''Returns True if any archive entries require password for extraction.'''
480 return self._needs_password
482 def namelist(self):
483 '''Return list of filenames in archive.'''
484 return [f.filename for f in self._info_list]
486 def infolist(self):
487 '''Return RarInfo objects for all files/directories in archive.'''
488 return self._info_list
490 def getinfo(self, fname):
491 '''Return RarInfo for file.'''
493 if isinstance(fname, RarInfo):
494 return fname
496 # accept both ways here
497 if PATH_SEP == '/':
498 fname2 = fname.replace("\\", "/")
499 else:
500 fname2 = fname.replace("/", "\\")
502 try:
503 return self._info_map[fname]
504 except KeyError:
505 try:
506 return self._info_map[fname2]
507 except KeyError:
508 raise NoRarEntry("No such file: "+fname)
510 def open(self, fname, mode = 'r', psw = None):
511 '''Return open file object, where the data can be read.
513 The object implements io.RawIOBase interface, so it can
514 be further wrapped with io.BufferedReader and io.TextIOWrapper.
516 On older Python where io module is not available, it implements
517 only .read(), .seek(), .tell() and .close() methods.
519 The object is seekable, although the seeking is fast only on
520 uncompressed files, on compressed files the seeking is implemented
521 by reading ahead and/or restarting the decompression.
523 @param fname: file name or RarInfo instance.
524 @param mode: must be 'r'
525 @param psw: password to use for extracting.
528 if mode != 'r':
529 raise NotImplementedError("RarFile.open() supports only mode=r")
531 # entry lookup
532 inf = self.getinfo(fname)
533 if inf.isdir():
534 raise TypeError("Directory does not have any data: " + inf.filename)
536 if inf.flags & RAR_FILE_SPLIT_BEFORE:
537 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
539 # check password
540 if inf.needs_password():
541 psw = psw or self._password
542 if psw is None:
543 raise PasswordRequired("File %s requires password" % inf.filename)
544 else:
545 psw = None
547 # is temp write usable?
548 if not USE_EXTRACT_HACK or not self._main:
549 use_hack = 0
550 elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
551 use_hack = 0
552 elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
553 use_hack = 0
554 elif inf.file_size > HACK_SIZE_LIMIT:
555 use_hack = 0
556 else:
557 use_hack = 1
559 # now extract
560 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0:
561 return self._open_clear(inf)
562 elif use_hack:
563 return self._open_hack(inf, psw)
564 else:
565 return self._open_unrar(self.rarfile, inf, psw)
567 def read(self, fname, psw = None):
568 """Return uncompressed data for archive entry.
570 For longer files using .open() may be better idea.
572 @param fname: filename or RarInfo instance
573 @param psw: password to use for extracting.
576 f = self.open(fname, 'r', psw)
577 try:
578 return f.read()
579 finally:
580 f.close()
582 def close(self):
583 """Release open resources."""
584 pass
586 def printdir(self):
587 """Print archive file list to stdout."""
588 for f in self._info_list:
589 print(f.filename)
591 def extract(self, member, path=None, pwd=None):
592 """Extract single file into current directory.
594 @param member: filename or RarInfo instance
595 @param path: optional destination path
596 @param pwd: optional password to use
598 if isinstance(member, RarInfo):
599 fname = member.filename
600 else:
601 fname = member
602 self._extract([fname], path, pwd)
604 def extractall(self, path=None, members=None, pwd=None):
605 """Extract all files into current directory.
607 @param path: optional destination path
608 @param members: optional filename or RarInfo instance list to extract
609 @param pwd: optional password to use
611 fnlist = []
612 if members is not None:
613 for m in members:
614 if isinstance(m, RarInfo):
615 fnlist.append(m.filename)
616 else:
617 fnlist.append(m)
618 self._extract(fnlist, path, pwd)
620 def testrar(self):
621 """Let 'unrar' test the archive.
623 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
624 if self._password is not None:
625 cmd.append('-p' + self._password)
626 else:
627 cmd.append('-p-')
628 cmd.append(self.rarfile)
629 p = custom_popen(cmd)
630 output = p.communicate()[0]
631 check_returncode(p, output)
634 ## private methods
637 # store entry
638 def _process_entry(self, item):
639 if item.type == RAR_BLOCK_FILE:
640 # use only first part
641 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
642 self._info_map[item.filename] = item
643 self._info_list.append(item)
644 # remember if any items require password
645 if item.needs_password():
646 self._needs_password = True
647 elif len(self._info_list) > 0:
648 # final crc is in last block
649 old = self._info_list[-1]
650 old.CRC = item.CRC
651 old.compress_size += item.compress_size
653 # parse new-style comment
654 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
655 if not NEED_COMMENTS:
656 pass
657 elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
658 pass
659 elif item.flags & RAR_FILE_SOLID:
660 # file comment
661 cmt = self._read_comment_v3(item, self._password)
662 if len(self._info_list) > 0:
663 old = self._info_list[-1]
664 old.comment = cmt
665 else:
666 # archive comment
667 cmt = self._read_comment_v3(item, self._password)
668 self.comment = cmt
670 if self._info_callback:
671 self._info_callback(item)
673 # read rar
674 def _parse(self):
675 self._fd = None
676 try:
677 self._parse_real()
678 finally:
679 if self._fd:
680 self._fd.close()
681 self._fd = None
683 def _parse_real(self):
684 fd = open(self.rarfile, "rb")
685 self._fd = fd
686 id = fd.read(len(RAR_ID))
687 if id != RAR_ID:
688 raise NotRarFile("Not a Rar archive: "+self.rarfile)
690 volume = 0 # first vol (.rar) is 0
691 more_vols = 0
692 endarc = 0
693 volfile = self.rarfile
694 while 1:
695 if endarc:
696 h = None # don't read past ENDARC
697 else:
698 h = self._parse_header(fd)
699 if not h:
700 if more_vols:
701 volume += 1
702 volfile = self._next_volname(volfile)
703 fd.close()
704 fd = open(volfile, "rb")
705 self._fd = fd
706 more_vols = 0
707 endarc = 0
708 continue
709 break
710 h.volume = volume
711 h.volume_file = volfile
713 if h.type == RAR_BLOCK_MAIN and not self._main:
714 self._main = h
715 if h.flags & RAR_MAIN_NEWNUMBERING:
716 # RAR 2.x does not set FIRSTVOLUME,
717 # so check it only if NEWNUMBERING is used
718 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
719 raise NeedFirstVolume("Need to start from first volume")
720 if h.flags & RAR_MAIN_PASSWORD:
721 self._needs_password = True
722 if not self._password:
723 self._main = None
724 break
725 elif h.type == RAR_BLOCK_ENDARC:
726 more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME
727 endarc = 1
728 elif h.type == RAR_BLOCK_FILE:
729 # RAR 2.x does not write RAR_BLOCK_ENDARC
730 if h.flags & RAR_FILE_SPLIT_AFTER:
731 more_vols = 1
732 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
733 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
734 raise NeedFirstVolume("Need to start from first volume")
736 # store it
737 self._process_entry(h)
739 # go to next header
740 if h.add_size > 0:
741 fd.seek(h.file_offset + h.add_size, 0)
743 # AES encrypted headers
744 _last_aes_key = (None, None, None) # (salt, key, iv)
745 def _decrypt_header(self, fd):
746 if not _have_crypto:
747 raise NoCrypto('Cannot parse encrypted headers - no crypto')
748 salt = fd.read(8)
749 if self._last_aes_key[0] == salt:
750 key, iv = self._last_aes_key[1:]
751 else:
752 key, iv = rar3_s2k(self._password, salt)
753 self._last_aes_key = (salt, key, iv)
754 return HeaderDecrypt(fd, key, iv)
756 # read single header
757 def _parse_header(self, fd):
758 try:
759 # handle encrypted headers
760 if self._main and self._main.flags & RAR_MAIN_PASSWORD:
761 if not self._password:
762 return
763 fd = self._decrypt_header(fd)
765 # now read actual header
766 return self._parse_block_header(fd)
767 except struct.error:
768 if REPORT_BAD_HEADER:
769 raise BadRarFile('Broken header in RAR file')
770 return None
772 # common header
773 def _parse_block_header(self, fd):
774 h = RarInfo()
775 h.header_offset = fd.tell()
776 h.comment = None
778 # read and parse base header
779 buf = fd.read(S_BLK_HDR.size)
780 if not buf:
781 return None
782 t = S_BLK_HDR.unpack_from(buf)
783 h.header_crc, h.type, h.flags, h.header_size = t
784 h.header_base = S_BLK_HDR.size
785 pos = S_BLK_HDR.size
787 # read full header
788 if h.header_size > S_BLK_HDR.size:
789 h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size)
790 else:
791 h.header_data = buf
792 h.file_offset = fd.tell()
794 # unexpected EOF?
795 if len(h.header_data) != h.header_size:
796 if REPORT_BAD_HEADER:
797 raise BadRarFile('Unexpected EOF when reading header')
798 return None
800 # block has data assiciated with it?
801 if h.flags & RAR_LONG_BLOCK:
802 h.add_size = S_LONG.unpack_from(h.header_data, pos)[0]
803 else:
804 h.add_size = 0
806 # parse interesting ones, decide header boundaries for crc
807 if h.type == RAR_BLOCK_MARK:
808 return h
809 elif h.type == RAR_BLOCK_MAIN:
810 h.header_base += 6
811 if h.flags & RAR_MAIN_ENCRYPTVER:
812 h.header_base += 1
813 if h.flags & RAR_MAIN_COMMENT:
814 self._parse_subblocks(h, h.header_base)
815 self.comment = h.comment
816 elif h.type == RAR_BLOCK_FILE:
817 self._parse_file_header(h, pos)
818 elif h.type == RAR_BLOCK_SUB:
819 self._parse_file_header(h, pos)
820 h.header_base = h.header_size
821 elif h.type == RAR_BLOCK_OLD_AUTH:
822 h.header_base += 8
823 elif h.type == RAR_BLOCK_OLD_EXTRA:
824 h.header_base += 7
825 else:
826 h.header_base = h.header_size
828 # check crc
829 if h.type == RAR_BLOCK_OLD_SUB:
830 crcdat = h.header_data[2:] + fd.read(h.add_size)
831 else:
832 crcdat = h.header_data[2:h.header_base]
834 calc_crc = crc32(crcdat) & 0xFFFF
836 # return good header
837 if h.header_crc == calc_crc:
838 return h
840 # need to panic?
841 if REPORT_BAD_HEADER:
842 xlen = len(crcdat)
843 crcdat = h.header_data[2:]
844 msg = 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h.type, h.header_crc, calc_crc, xlen )
845 xlen = len(crcdat)
846 while xlen >= S_BLK_HDR.size - 2:
847 crc = crc32(crcdat[:xlen]) & 0xFFFF
848 if crc == h.header_crc:
849 msg += ' / crc match, xlen = %d' % xlen
850 xlen -= 1
851 raise BadRarFile(msg)
853 # instead panicing, send eof
854 return None
856 # read file-specific header
857 def _parse_file_header(self, h, pos):
858 fld = S_FILE_HDR.unpack_from(h.header_data, pos)
859 h.compress_size = fld[0]
860 h.file_size = fld[1]
861 h.host_os = fld[2]
862 h.CRC = fld[3]
863 h.date_time = parse_dos_time(fld[4])
864 h.extract_version = fld[5]
865 h.compress_type = fld[6]
866 h.name_size = fld[7]
867 h.mode = fld[8]
868 pos += S_FILE_HDR.size
870 if h.flags & RAR_FILE_LARGE:
871 h1 = S_LONG.unpack_from(h.header_data, pos)[0]
872 h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0]
873 h.compress_size |= h1 << 32
874 h.file_size |= h2 << 32
875 pos += 8
876 h.add_size = h.compress_size
878 name = h.header_data[pos : pos + h.name_size ]
879 pos += h.name_size
880 if h.flags & RAR_FILE_UNICODE:
881 nul = name.find(ZERO)
882 h.orig_filename = name[:nul]
883 u = UnicodeFilename(h.orig_filename, name[nul + 1 : ])
884 h.filename = u.decode()
886 # if parsing failed fall back to simple name
887 if u.failed:
888 h.filename = self._decode(h.orig_filename)
889 else:
890 h.orig_filename = name
891 h.filename = self._decode(name)
893 # change separator, if requested
894 if PATH_SEP != '\\':
895 h.filename = h.filename.replace('\\', PATH_SEP)
897 if h.flags & RAR_FILE_SALT:
898 h.salt = h.header_data[pos : pos + 8]
899 pos += 8
900 else:
901 h.salt = None
903 # optional extended time stamps
904 if h.flags & RAR_FILE_EXTTIME:
905 pos = self._parse_ext_time(h, pos)
906 else:
907 h.mtime = h.atime = h.ctime = h.arctime = None
909 # base header end
910 h.header_base = pos
912 if h.flags & RAR_FILE_COMMENT:
913 self._parse_subblocks(h, pos)
915 # convert timestamps
916 if USE_DATETIME:
917 h.date_time = to_datetime(h.date_time)
918 h.mtime = to_datetime(h.mtime)
919 h.atime = to_datetime(h.atime)
920 h.ctime = to_datetime(h.ctime)
921 h.arctime = to_datetime(h.arctime)
923 # .mtime is .date_time with more precision
924 if h.mtime:
925 if USE_DATETIME:
926 h.date_time = h.mtime
927 else:
928 # keep seconds int
929 h.date_time = h.mtime[:5] + (int(h.mtime[5]),)
931 return pos
933 # find old-style comment subblock
934 def _parse_subblocks(self, h, pos):
935 hdata = h.header_data
936 while pos < len(hdata):
937 # ordinary block header
938 t = S_BLK_HDR.unpack_from(hdata, pos)
939 scrc, stype, sflags, slen = t
940 pos_next = pos + slen
941 pos += S_BLK_HDR.size
943 # corrupt header
944 if pos_next < pos:
945 break
947 # followed by block-specific header
948 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
949 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
950 pos += S_COMMENT_HDR.size
951 data = hdata[pos : pos_next]
952 cmt = rar_decompress(ver, meth, data, declen, sflags,
953 crc, self._password)
954 if not self._crc_check:
955 h.comment = self._decode_comment(cmt)
956 elif crc32(cmt) & 0xFFFF == crc:
957 h.comment = self._decode_comment(cmt)
959 pos = pos_next
961 def _parse_ext_time(self, h, pos):
962 data = h.header_data
964 # flags and rest of data can be missing
965 flags = 0
966 if pos + 2 <= len(data):
967 flags = S_SHORT.unpack_from(data, pos)[0]
968 pos += 2
970 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time)
971 h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos)
972 h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos)
973 h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos)
974 return pos
976 def _parse_xtime(self, flag, data, pos, dostime = None):
977 unit = 10000000.0 # 100 ns units
978 if flag & 8:
979 if not dostime:
980 t = S_LONG.unpack_from(data, pos)[0]
981 dostime = parse_dos_time(t)
982 pos += 4
983 rem = 0
984 cnt = flag & 3
985 for i in range(cnt):
986 b = S_BYTE.unpack_from(data, pos)[0]
987 rem = (b << 16) | (rem >> 8)
988 pos += 1
989 sec = dostime[5] + rem / unit
990 if flag & 4:
991 sec += 1
992 dostime = dostime[:5] + (sec,)
993 return dostime, pos
995 # given current vol name, construct next one
996 def _next_volname(self, volfile):
997 if self._main.flags & RAR_MAIN_NEWNUMBERING:
998 return self._next_newvol(volfile)
999 return self._next_oldvol(volfile)
1001 # new-style next volume
1002 def _next_newvol(self, volfile):
1003 i = len(volfile) - 1
1004 while i >= 0:
1005 if volfile[i] >= '0' and volfile[i] <= '9':
1006 return self._inc_volname(volfile, i)
1007 i -= 1
1008 raise BadRarName("Cannot construct volume name: "+volfile)
1010 # old-style next volume
1011 def _next_oldvol(self, volfile):
1012 # rar -> r00
1013 if volfile[-4:].lower() == '.rar':
1014 return volfile[:-2] + '00'
1015 return self._inc_volname(volfile, len(volfile) - 1)
1017 # increase digits with carry, otherwise just increment char
1018 def _inc_volname(self, volfile, i):
1019 fn = list(volfile)
1020 while i >= 0:
1021 if fn[i] != '9':
1022 fn[i] = chr(ord(fn[i]) + 1)
1023 break
1024 fn[i] = '0'
1025 i -= 1
1026 return ''.join(fn)
1028 def _open_clear(self, inf):
1029 return DirectReader(self, inf)
1031 # put file compressed data into temporary .rar archive, and run
1032 # unrar on that, thus avoiding unrar going over whole archive
1033 def _open_hack(self, inf, psw = None):
1034 BSIZE = 32*1024
1036 size = inf.compress_size + inf.header_size
1037 rf = open(inf.volume_file, "rb", 0)
1038 rf.seek(inf.header_offset)
1040 tmpfd, tmpname = mkstemp(suffix='.rar')
1041 tmpf = os.fdopen(tmpfd, "wb")
1043 try:
1044 # create main header: crc, type, flags, size, res1, res2
1045 mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4)
1046 tmpf.write(RAR_ID + mh)
1047 while size > 0:
1048 if size > BSIZE:
1049 buf = rf.read(BSIZE)
1050 else:
1051 buf = rf.read(size)
1052 if not buf:
1053 raise BadRarFile('read failed: ' + inf.filename)
1054 tmpf.write(buf)
1055 size -= len(buf)
1056 tmpf.close()
1057 rf.close()
1058 except:
1059 rf.close()
1060 tmpf.close()
1061 os.unlink(tmpname)
1062 raise
1064 return self._open_unrar(tmpname, inf, psw, tmpname)
1066 def _read_comment_v3(self, inf, psw=None):
1068 # read data
1069 rf = open(inf.volume_file, "rb")
1070 rf.seek(inf.file_offset)
1071 data = rf.read(inf.compress_size)
1072 rf.close()
1074 # decompress
1075 cmt = rar_decompress(inf.extract_version, inf.compress_type, data,
1076 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1078 # check crc
1079 if self._crc_check:
1080 crc = crc32(cmt)
1081 if crc < 0:
1082 crc += (long(1) << 32)
1083 if crc != inf.CRC:
1084 return None
1086 return self._decode_comment(cmt)
1088 # extract using unrar
1089 def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None):
1090 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1091 if psw is not None:
1092 cmd.append("-p" + psw)
1093 cmd.append(rarfile)
1095 # not giving filename avoids encoding related problems
1096 if not tmpfile:
1097 fn = inf.filename
1098 if PATH_SEP != os.sep:
1099 fn = fn.replace(PATH_SEP, os.sep)
1100 cmd.append(fn)
1102 # read from unrar pipe
1103 return PipeReader(self, inf, cmd, tmpfile)
1105 def _decode(self, val):
1106 for c in TRY_ENCODINGS:
1107 try:
1108 return val.decode(c)
1109 except UnicodeError:
1110 pass
1111 return val.decode(self._charset, 'replace')
1113 def _decode_comment(self, val):
1114 if UNICODE_COMMENTS:
1115 return self._decode(val)
1116 return val
1118 # call unrar to extract a file
1119 def _extract(self, fnlist, path=None, psw=None):
1120 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
1122 # pasoword
1123 psw = psw or self._password
1124 if psw is not None:
1125 cmd.append('-p' + psw)
1126 else:
1127 cmd.append('-p-')
1129 # rar file
1130 cmd.append(self.rarfile)
1132 # file list
1133 for fn in fnlist:
1134 if os.sep != PATH_SEP:
1135 fn = fn.replace(PATH_SEP, os.sep)
1136 cmd.append(fn)
1138 # destination path
1139 if path is not None:
1140 cmd.append(path + os.sep)
1142 # call
1143 p = custom_popen(cmd)
1144 output = p.communicate()[0]
1145 check_returncode(p, output)
1148 ## Utility classes
1151 class UnicodeFilename:
1152 """Handle unicode filename decompression"""
1154 def __init__(self, name, encdata):
1155 self.std_name = bytearray(name)
1156 self.encdata = bytearray(encdata)
1157 self.pos = self.encpos = 0
1158 self.buf = bytearray()
1159 self.failed = 0
1161 def enc_byte(self):
1162 try:
1163 c = self.encdata[self.encpos]
1164 self.encpos += 1
1165 return c
1166 except IndexError:
1167 self.failed = 1
1168 return 0
1170 def std_byte(self):
1171 try:
1172 return self.std_name[self.pos]
1173 except IndexError:
1174 self.failed = 1
1175 return ord('?')
1177 def put(self, lo, hi):
1178 self.buf.append(lo)
1179 self.buf.append(hi)
1180 self.pos += 1
1182 def decode(self):
1183 hi = self.enc_byte()
1184 flagbits = 0
1185 while self.encpos < len(self.encdata):
1186 if flagbits == 0:
1187 flags = self.enc_byte()
1188 flagbits = 8
1189 flagbits -= 2
1190 t = (flags >> flagbits) & 3
1191 if t == 0:
1192 self.put(self.enc_byte(), 0)
1193 elif t == 1:
1194 self.put(self.enc_byte(), hi)
1195 elif t == 2:
1196 self.put(self.enc_byte(), self.enc_byte())
1197 else:
1198 n = self.enc_byte()
1199 if n & 0x80:
1200 c = self.enc_byte()
1201 for i in range((n & 0x7f) + 2):
1202 lo = (self.std_byte() + c) & 0xFF
1203 self.put(lo, hi)
1204 else:
1205 for i in range(n + 2):
1206 self.put(self.std_byte(), 0)
1207 return self.buf.decode("utf-16le", "replace")
1210 class RarExtFile(RawIOBase):
1211 """Base class for 'file-like' object that RarFile.open() returns.
1213 Provides public methods and common crc checking.
1215 Behaviour:
1216 - no short reads - .read() and .readinfo() read as much as requested.
1217 - no internal buffer, use io.BufferedReader for that.
1219 @ivar name:
1220 filename of the archive entry.
1223 def __init__(self, rf, inf):
1224 """Fill common fields"""
1226 RawIOBase.__init__(self)
1228 # standard io.* properties
1229 self.name = inf.filename
1230 self.mode = 'rb'
1232 self.rf = rf
1233 self.inf = inf
1234 self.crc_check = rf._crc_check
1235 self.fd = None
1236 self.CRC = 0
1237 self.remain = 0
1238 self.returncode = 0
1240 self._open()
1242 def _open(self):
1243 if self.fd:
1244 self.fd.close()
1245 self.fd = None
1246 self.CRC = 0
1247 self.remain = self.inf.file_size
1249 def read(self, cnt = None):
1250 """Read all or specified amount of data from archive entry."""
1252 # sanitize cnt
1253 if cnt is None or cnt < 0:
1254 cnt = self.remain
1255 elif cnt > self.remain:
1256 cnt = self.remain
1257 if cnt == 0:
1258 return EMPTY
1260 # actual read
1261 data = self._read(cnt)
1262 if data:
1263 self.CRC = crc32(data, self.CRC)
1264 self.remain -= len(data)
1265 if len(data) != cnt:
1266 raise BadRarFile("Failed the read enough data")
1268 # done?
1269 if not data or self.remain == 0:
1270 #self.close()
1271 self._check()
1272 return data
1274 def _check(self):
1275 """Check final CRC."""
1276 if not self.crc_check:
1277 return
1278 if self.returncode:
1279 check_returncode(self, '')
1280 if self.remain != 0:
1281 raise BadRarFile("Failed the read enough data")
1282 crc = self.CRC
1283 if crc < 0:
1284 crc += (long(1) << 32)
1285 if crc != self.inf.CRC:
1286 raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename)
1288 def _read(self, cnt):
1289 """Actual read that gets sanitized cnt."""
1291 def close(self):
1292 """Close open resources."""
1294 RawIOBase.close(self)
1296 if self.fd:
1297 self.fd.close()
1298 self.fd = None
1300 def __del__(self):
1301 """Hook delete to make sure tempfile is removed."""
1302 self.close()
1304 def readinto(self, buf):
1305 """Zero-copy read directly into buffer.
1307 Returns bytes read.
1310 data = self.read(len(buf))
1311 n = len(data)
1312 try:
1313 buf[:n] = data
1314 except TypeError:
1315 import array
1316 if not isinstance(buf, array.array):
1317 raise
1318 buf[:n] = array.array(buf.typecode, data)
1319 return n
1321 def tell(self):
1322 """Return current reading position in uncompressed data."""
1323 return self.inf.file_size - self.remain
1325 def seek(self, ofs, whence = 0):
1326 """Seek in data."""
1328 # disable crc check when seeking
1329 self.crc_check = 0
1331 fsize = self.inf.file_size
1332 cur_ofs = self.tell()
1334 if whence == 0: # seek from beginning of file
1335 new_ofs = ofs
1336 elif whence == 1: # seek from current position
1337 new_ofs = cur_ofs + ofs
1338 elif whence == 2: # seek from end of file
1339 new_ofs = fsize + ofs
1340 else:
1341 raise ValueError('Invalid value for whence')
1343 # sanity check
1344 if new_ofs < 0:
1345 new_ofs = 0
1346 elif new_ofs > fsize:
1347 new_ofs = fsize
1349 # do the actual seek
1350 if new_ofs >= cur_ofs:
1351 self._skip(new_ofs - cur_ofs)
1352 else:
1353 # process old data ?
1354 #self._skip(fsize - cur_ofs)
1355 # reopen and seek
1356 self._open()
1357 self._skip(new_ofs)
1358 return self.tell()
1360 def _skip(self, cnt):
1361 """Read and discard data"""
1362 while cnt > 0:
1363 if cnt > 8192:
1364 buf = self.read(8192)
1365 else:
1366 buf = self.read(cnt)
1367 if not buf:
1368 break
1369 cnt -= len(buf)
1371 def readable(self):
1372 """Returns True"""
1373 return True
1375 def seekable(self):
1376 """Returns True"""
1377 return True
1379 def readall(self):
1380 """Read all remaining data"""
1381 # avoid RawIOBase default impl
1382 return self.read()
1385 class PipeReader(RarExtFile):
1386 """Read data from pipe, handle tempfile cleanup."""
1388 def __init__(self, rf, inf, cmd, tempfile=None):
1389 self.cmd = cmd
1390 self.proc = None
1391 self.tempfile = tempfile
1392 RarExtFile.__init__(self, rf, inf)
1394 def _close_proc(self):
1395 if not self.proc:
1396 return
1397 if self.proc.stdout:
1398 self.proc.stdout.close()
1399 if self.proc.stdin:
1400 self.proc.stdin.close()
1401 if self.proc.stderr:
1402 self.proc.stderr.close()
1403 self.proc.wait()
1404 self.returncode = self.proc.returncode
1405 self.proc = None
1407 def _open(self):
1408 RarExtFile._open(self)
1410 # stop old process
1411 self._close_proc()
1413 # launch new process
1414 self.returncode = 0
1415 self.proc = custom_popen(self.cmd)
1416 self.fd = self.proc.stdout
1418 # avoid situation where unrar waits on stdin
1419 if self.proc.stdin:
1420 self.proc.stdin.close()
1422 def _read(self, cnt):
1423 """Read from pipe."""
1425 # normal read is usually enough
1426 data = self.fd.read(cnt)
1427 if len(data) == cnt or not data:
1428 return data
1430 # short read, try looping
1431 buf = [data]
1432 cnt -= len(data)
1433 while cnt > 0:
1434 data = self.fd.read(cnt)
1435 if not data:
1436 break
1437 cnt -= len(data)
1438 buf.append(data)
1439 return EMPTY.join(buf)
1441 def close(self):
1442 """Close open resources."""
1444 self._close_proc()
1445 RarExtFile.close(self)
1447 if self.tempfile:
1448 try:
1449 os.unlink(self.tempfile)
1450 except OSError:
1451 pass
1452 self.tempfile = None
1454 if have_memoryview:
1455 def readinto(self, buf):
1456 """Zero-copy read directly into buffer."""
1457 cnt = len(buf)
1458 if cnt > self.remain:
1459 cnt = self.remain
1460 vbuf = memoryview(buf)
1461 res = got = 0
1462 while got < cnt:
1463 res = self.fd.readinto(vbuf[got : cnt])
1464 if not res:
1465 break
1466 if self.crc_check:
1467 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1468 self.remain -= res
1469 got += res
1470 return got
1473 class DirectReader(RarExtFile):
1474 """Read uncompressed data directly from archive."""
1476 def _open(self):
1477 RarExtFile._open(self)
1479 self.volfile = self.inf.volume_file
1480 self.fd = open(self.volfile, "rb", 0)
1481 self.fd.seek(self.inf.header_offset, 0)
1482 self.cur = self.rf._parse_header(self.fd)
1483 self.cur_avail = self.cur.add_size
1485 def _skip(self, cnt):
1486 """RAR Seek, skipping through rar files to get to correct position
1489 while cnt > 0:
1490 # next vol needed?
1491 if self.cur_avail == 0:
1492 if not self._open_next():
1493 break
1495 # fd is in read pos, do the read
1496 if cnt > self.cur_avail:
1497 cnt -= self.cur_avail
1498 self.remain -= self.cur_avail
1499 self.cur_avail = 0
1500 else:
1501 self.fd.seek(cnt, 1)
1502 self.cur_avail -= cnt
1503 self.remain -= cnt
1504 cnt = 0
1506 def _read(self, cnt):
1507 """Read from potentially multi-volume archive."""
1509 buf = []
1510 while cnt > 0:
1511 # next vol needed?
1512 if self.cur_avail == 0:
1513 if not self._open_next():
1514 break
1516 # fd is in read pos, do the read
1517 if cnt > self.cur_avail:
1518 data = self.fd.read(self.cur_avail)
1519 else:
1520 data = self.fd.read(cnt)
1521 if not data:
1522 break
1524 # got some data
1525 cnt -= len(data)
1526 self.cur_avail -= len(data)
1527 buf.append(data)
1529 if len(buf) == 1:
1530 return buf[0]
1531 return EMPTY.join(buf)
1533 def _open_next(self):
1534 """Proceed to next volume."""
1536 # is the file split over archives?
1537 if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
1538 return False
1540 if self.fd:
1541 self.fd.close()
1542 self.fd = None
1544 # open next part
1545 self.volfile = self.rf._next_volname(self.volfile)
1546 fd = open(self.volfile, "rb", 0)
1547 self.fd = fd
1549 # loop until first file header
1550 while 1:
1551 cur = self.rf._parse_header(fd)
1552 if not cur:
1553 raise BadRarFile("Unexpected EOF")
1554 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
1555 if cur.add_size:
1556 fd.seek(cur.add_size, 1)
1557 continue
1558 if cur.orig_filename != self.inf.orig_filename:
1559 raise BadRarFile("Did not found file entry")
1560 self.cur = cur
1561 self.cur_avail = cur.add_size
1562 return True
1564 if have_memoryview:
1565 def readinto(self, buf):
1566 """Zero-copy read directly into buffer."""
1567 got = 0
1568 vbuf = memoryview(buf)
1569 while got < len(buf):
1570 # next vol needed?
1571 if self.cur_avail == 0:
1572 if not self._open_next():
1573 break
1575 # lenght for next read
1576 cnt = len(buf) - got
1577 if cnt > self.cur_avail:
1578 cnt = self.cur_avail
1580 # read into temp view
1581 res = self.fd.readinto(vbuf[got : got + cnt])
1582 if not res:
1583 break
1584 if self.crc_check:
1585 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1586 self.cur_avail -= res
1587 self.remain -= res
1588 got += res
1589 return got
1592 class HeaderDecrypt:
1593 """File-like object that decrypts from another file"""
1594 def __init__(self, f, key, iv):
1595 self.f = f
1596 self.ciph = AES.new(key, AES.MODE_CBC, iv)
1597 self.buf = EMPTY
1599 def tell(self):
1600 return self.f.tell()
1602 def read(self, cnt=None):
1603 if cnt > 8*1024:
1604 raise BadRarFile('Bad count to header decrypt - wrong password?')
1606 # consume old data
1607 if cnt <= len(self.buf):
1608 res = self.buf[:cnt]
1609 self.buf = self.buf[cnt:]
1610 return res
1611 res = self.buf
1612 self.buf = EMPTY
1613 cnt -= len(res)
1615 # decrypt new data
1616 BLK = self.ciph.block_size
1617 while cnt > 0:
1618 enc = self.f.read(BLK)
1619 if len(enc) < BLK:
1620 break
1621 dec = self.ciph.decrypt(enc)
1622 if cnt >= len(dec):
1623 res += dec
1624 cnt -= len(dec)
1625 else:
1626 res += dec[:cnt]
1627 self.buf = dec[cnt:]
1628 cnt = 0
1630 return res
1633 ## Utility functions
1636 def rar3_s2k(psw, salt):
1637 """String-to-key hash for RAR3."""
1639 seed = psw.encode('utf-16le') + salt
1640 iv = EMPTY
1641 h = sha1()
1642 for i in range(16):
1643 for j in range(0x4000):
1644 cnt = S_LONG.pack(i*0x4000 + j)
1645 h.update(seed + cnt[:3])
1646 if j == 0:
1647 iv += h.digest()[19:20]
1648 key_be = h.digest()[:16]
1649 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
1650 return key_le, iv
1652 def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
1653 """Decompress blob of compressed data.
1655 Used for data with non-standard header - eg. comments.
1658 # already uncompressed?
1659 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
1660 return data
1662 # take only necessary flags
1663 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
1664 flags |= RAR_LONG_BLOCK
1666 # file header
1667 fname = bytes('data', 'ascii')
1668 date = 0
1669 mode = 0x20
1670 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
1671 date, vers, meth, len(fname), mode)
1672 fhdr += fname
1673 if flags & RAR_FILE_SALT:
1674 if not salt:
1675 return EMPTY
1676 fhdr += salt
1678 # full header
1679 hlen = S_BLK_HDR.size + len(fhdr)
1680 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
1681 hcrc = crc32(hdr[2:]) & 0xFFFF
1682 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
1684 # archive main header
1685 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4)
1687 # decompress via temp rar
1688 tmpfd, tmpname = mkstemp(suffix='.rar')
1689 tmpf = os.fdopen(tmpfd, "wb")
1690 try:
1691 tmpf.write(RAR_ID + mh + hdr + data)
1692 tmpf.close()
1694 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1695 if psw is not None and (flags & RAR_FILE_PASSWORD):
1696 cmd.append("-p" + psw)
1697 else:
1698 cmd.append("-p-")
1699 cmd.append(tmpname)
1701 p = custom_popen(cmd)
1702 return p.communicate()[0]
1703 finally:
1704 tmpf.close()
1705 os.unlink(tmpname)
1707 def to_datetime(t):
1708 """Convert 6-part time tuple into datetime object."""
1710 if t is None:
1711 return None
1713 # extract values
1714 year, mon, day, h, m, xs = t
1715 s = int(xs)
1716 us = int(1000000 * (xs - s))
1718 # assume the values are valid
1719 try:
1720 return datetime(year, mon, day, h, m, s, us)
1721 except ValueError:
1722 pass
1724 # sanitize invalid values
1725 MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1726 if mon < 1: mon = 1
1727 if mon > 12: mon = 12
1728 if day < 1: day = 1
1729 if day > MDAY[mon]: day = MDAY[mon]
1730 if h > 23: h = 23
1731 if m > 59: m = 59
1732 if s > 59: s = 59
1733 if mon == 2 and day == 29:
1734 try:
1735 return datetime(year, mon, day, h, m, s, us)
1736 except ValueError:
1737 day = 28
1738 return datetime(year, mon, day, h, m, s, us)
1740 def parse_dos_time(stamp):
1741 """Parse standard 32-bit DOS timestamp."""
1743 sec = stamp & 0x1F; stamp = stamp >> 5
1744 min = stamp & 0x3F; stamp = stamp >> 6
1745 hr = stamp & 0x1F; stamp = stamp >> 5
1746 day = stamp & 0x1F; stamp = stamp >> 5
1747 mon = stamp & 0x0F; stamp = stamp >> 4
1748 yr = (stamp & 0x7F) + 1980
1749 return (yr, mon, day, hr, min, sec * 2)
1751 def custom_popen(cmd):
1752 """Disconnect cmd from parent fds, read only from stdout."""
1754 # needed for py2exe
1755 creationflags = 0
1756 if sys.platform == 'win32':
1757 creationflags = 0x08000000 # CREATE_NO_WINDOW
1759 # run command
1760 p = Popen(cmd, bufsize = 0, stdout = PIPE, stdin = PIPE, stderr = STDOUT,
1761 creationflags = creationflags)
1762 return p
1764 def check_returncode(p, out):
1765 """Raise exception according to unrar exit code"""
1767 code = p.returncode
1768 if code == 0:
1769 return
1771 # map return code to exception class
1772 errmap = [None,
1773 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError,
1774 RarWriteError, RarOpenError, RarUserError, RarMemoryError,
1775 RarCreateError] # codes from rar.txt
1776 if code > 0 and code < len(errmap):
1777 exc = errmap[code]
1778 elif code == 255:
1779 exc = RarUserBreak
1780 elif code < 0:
1781 exc = RarSignalExit
1782 else:
1783 exc = RarUnknownError
1785 # format message
1786 if out:
1787 msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out)
1788 else:
1789 msg = "%s [%d]" % (exc.__doc__, p.returncode)
1791 raise exc(msg)