Add python3.4 to tests.
[rarfile.git] / rarfile.py
blobb775a3c6b50a51367967adab015e435ff96fb23c
1 # rarfile.py
3 # Copyright (c) 2005-2013 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
46 for ln in f:
47 print(ln.strip())
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
52 import rarfile
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to 0 if you don't look at comments and want to
58 # avoid wasting time for parsing them
59 rarfile.NEED_COMMENTS = 1
61 # Set up to 1 if you don't want to deal with decoding comments
62 # from unknown encoding. rarfile will try couple of common
63 # encodings in sequence.
64 rarfile.UNICODE_COMMENTS = 0
66 # Set to 1 if you prefer timestamps to be datetime objects
67 # instead tuples
68 rarfile.USE_DATETIME = 0
70 # Set to '/' to be more compatible with zipfile
71 rarfile.PATH_SEP = '\\'
73 For more details, refer to source.
75 """
77 __version__ = '2.6'
79 # export only interesting items
80 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
83 ## Imports and compat - support both Python 2.x and 3.x
86 import sys, os, struct, errno
87 from struct import pack, unpack
88 from binascii import crc32
89 from tempfile import mkstemp
90 from subprocess import Popen, PIPE, STDOUT
91 from datetime import datetime
93 # only needed for encryped headers
94 try:
95 from Crypto.Cipher import AES
96 try:
97 from hashlib import sha1
98 except ImportError:
99 from sha import new as sha1
100 _have_crypto = 1
101 except ImportError:
102 _have_crypto = 0
104 # compat with 2.x
105 if sys.hexversion < 0x3000000:
106 # prefer 3.x behaviour
107 range = xrange
108 # py2.6 has broken bytes()
109 def bytes(s, enc):
110 return str(s)
112 # see if compat bytearray() is needed
113 try:
114 bytearray
115 except NameError:
116 import array
117 class bytearray:
118 def __init__(self, val = ''):
119 self.arr = array.array('B', val)
120 self.append = self.arr.append
121 self.__getitem__ = self.arr.__getitem__
122 self.__len__ = self.arr.__len__
123 def decode(self, *args):
124 return self.arr.tostring().decode(*args)
126 # Optimized .readinto() requires memoryview
127 try:
128 memoryview
129 have_memoryview = 1
130 except NameError:
131 have_memoryview = 0
133 # Struct() for older python
134 try:
135 from struct import Struct
136 except ImportError:
137 class Struct:
138 def __init__(self, fmt):
139 self.format = fmt
140 self.size = struct.calcsize(fmt)
141 def unpack(self, buf):
142 return unpack(self.format, buf)
143 def unpack_from(self, buf, ofs = 0):
144 return unpack(self.format, buf[ofs : ofs + self.size])
145 def pack(self, *args):
146 return pack(self.format, *args)
148 # file object superclass
149 try:
150 from io import RawIOBase
151 except ImportError:
152 class RawIOBase(object):
153 def close(self):
154 pass
158 ## Module configuration. Can be tuned after importing.
161 #: default fallback charset
162 DEFAULT_CHARSET = "windows-1252"
164 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
165 TRY_ENCODINGS = ('utf8', 'utf-16le')
167 #: 'unrar', 'rar' or full path to either one
168 UNRAR_TOOL = "unrar"
170 #: Command line args to use for opening file for reading.
171 OPEN_ARGS = ('p', '-inul')
173 #: Command line args to use for extracting file to disk.
174 EXTRACT_ARGS = ('x', '-y', '-idq')
176 #: args for testrar()
177 TEST_ARGS = ('t', '-idq')
179 #: whether to speed up decompression by using tmp archive
180 USE_EXTRACT_HACK = 1
182 #: limit the filesize for tmp archive usage
183 HACK_SIZE_LIMIT = 20*1024*1024
185 #: whether to parse file/archive comments.
186 NEED_COMMENTS = 1
188 #: whether to convert comments to unicode strings
189 UNICODE_COMMENTS = 0
191 #: When RAR is corrupt, stopping on bad header is better
192 #: On unknown/misparsed RAR headers reporting is better
193 REPORT_BAD_HEADER = 0
195 #: Convert RAR time tuple into datetime() object
196 USE_DATETIME = 0
198 #: Separator for path name components. RAR internally uses '\\'.
199 #: Use '/' to be similar with zipfile.
200 PATH_SEP = '\\'
203 ## rar constants
206 # block types
207 RAR_BLOCK_MARK = 0x72 # r
208 RAR_BLOCK_MAIN = 0x73 # s
209 RAR_BLOCK_FILE = 0x74 # t
210 RAR_BLOCK_OLD_COMMENT = 0x75 # u
211 RAR_BLOCK_OLD_EXTRA = 0x76 # v
212 RAR_BLOCK_OLD_SUB = 0x77 # w
213 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
214 RAR_BLOCK_OLD_AUTH = 0x79 # y
215 RAR_BLOCK_SUB = 0x7a # z
216 RAR_BLOCK_ENDARC = 0x7b # {
218 # flags for RAR_BLOCK_MAIN
219 RAR_MAIN_VOLUME = 0x0001
220 RAR_MAIN_COMMENT = 0x0002
221 RAR_MAIN_LOCK = 0x0004
222 RAR_MAIN_SOLID = 0x0008
223 RAR_MAIN_NEWNUMBERING = 0x0010
224 RAR_MAIN_AUTH = 0x0020
225 RAR_MAIN_RECOVERY = 0x0040
226 RAR_MAIN_PASSWORD = 0x0080
227 RAR_MAIN_FIRSTVOLUME = 0x0100
228 RAR_MAIN_ENCRYPTVER = 0x0200
230 # flags for RAR_BLOCK_FILE
231 RAR_FILE_SPLIT_BEFORE = 0x0001
232 RAR_FILE_SPLIT_AFTER = 0x0002
233 RAR_FILE_PASSWORD = 0x0004
234 RAR_FILE_COMMENT = 0x0008
235 RAR_FILE_SOLID = 0x0010
236 RAR_FILE_DICTMASK = 0x00e0
237 RAR_FILE_DICT64 = 0x0000
238 RAR_FILE_DICT128 = 0x0020
239 RAR_FILE_DICT256 = 0x0040
240 RAR_FILE_DICT512 = 0x0060
241 RAR_FILE_DICT1024 = 0x0080
242 RAR_FILE_DICT2048 = 0x00a0
243 RAR_FILE_DICT4096 = 0x00c0
244 RAR_FILE_DIRECTORY = 0x00e0
245 RAR_FILE_LARGE = 0x0100
246 RAR_FILE_UNICODE = 0x0200
247 RAR_FILE_SALT = 0x0400
248 RAR_FILE_VERSION = 0x0800
249 RAR_FILE_EXTTIME = 0x1000
250 RAR_FILE_EXTFLAGS = 0x2000
252 # flags for RAR_BLOCK_ENDARC
253 RAR_ENDARC_NEXT_VOLUME = 0x0001
254 RAR_ENDARC_DATACRC = 0x0002
255 RAR_ENDARC_REVSPACE = 0x0004
256 RAR_ENDARC_VOLNR = 0x0008
258 # flags common to all blocks
259 RAR_SKIP_IF_UNKNOWN = 0x4000
260 RAR_LONG_BLOCK = 0x8000
262 # Host OS types
263 RAR_OS_MSDOS = 0
264 RAR_OS_OS2 = 1
265 RAR_OS_WIN32 = 2
266 RAR_OS_UNIX = 3
267 RAR_OS_MACOS = 4
268 RAR_OS_BEOS = 5
270 # Compression methods - '0'..'5'
271 RAR_M0 = 0x30
272 RAR_M1 = 0x31
273 RAR_M2 = 0x32
274 RAR_M3 = 0x33
275 RAR_M4 = 0x34
276 RAR_M5 = 0x35
279 ## internal constants
282 RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii')
283 ZERO = bytes("\0", 'ascii')
284 EMPTY = bytes("", 'ascii')
286 S_BLK_HDR = Struct('<HBHH')
287 S_FILE_HDR = Struct('<LLBLLBBHL')
288 S_LONG = Struct('<L')
289 S_SHORT = Struct('<H')
290 S_BYTE = Struct('<B')
291 S_COMMENT_HDR = Struct('<HBBH')
294 ## Public interface
297 class Error(Exception):
298 """Base class for rarfile errors."""
299 class BadRarFile(Error):
300 """Incorrect data in archive."""
301 class NotRarFile(Error):
302 """The file is not RAR archive."""
303 class BadRarName(Error):
304 """Cannot guess multipart name components."""
305 class NoRarEntry(Error):
306 """File not found in RAR"""
307 class PasswordRequired(Error):
308 """File requires password"""
309 class NeedFirstVolume(Error):
310 """Need to start from first volume."""
311 class NoCrypto(Error):
312 """Cannot parse encrypted headers - no crypto available."""
313 class RarExecError(Error):
314 """Problem reported by unrar/rar."""
315 class RarWarning(RarExecError):
316 """Non-fatal error"""
317 class RarFatalError(RarExecError):
318 """Fatal error"""
319 class RarCRCError(RarExecError):
320 """CRC error during unpacking"""
321 class RarLockedArchiveError(RarExecError):
322 """Must not modify locked archive"""
323 class RarWriteError(RarExecError):
324 """Write error"""
325 class RarOpenError(RarExecError):
326 """Open error"""
327 class RarUserError(RarExecError):
328 """User error"""
329 class RarMemoryError(RarExecError):
330 """Memory error"""
331 class RarCreateError(RarExecError):
332 """Create error"""
333 class RarNoFilesError(RarExecError):
334 """No files that match pattern were found"""
335 class RarUserBreak(RarExecError):
336 """User stop"""
337 class RarUnknownError(RarExecError):
338 """Unknown exit code"""
339 class RarSignalExit(RarExecError):
340 """Unrar exited with signal"""
343 def is_rarfile(fn):
344 '''Check quickly whether file is rar archive.'''
345 buf = open(fn, "rb").read(len(RAR_ID))
346 return buf == RAR_ID
349 class RarInfo(object):
350 r'''An entry in rar archive.
352 :mod:`zipfile`-compatible fields:
354 filename
355 File name with relative path.
356 Default path separator is '\\', to change set rarfile.PATH_SEP.
357 Always unicode string.
358 date_time
359 Modification time, tuple of (year, month, day, hour, minute, second).
360 Or datetime() object if USE_DATETIME is set.
361 file_size
362 Uncompressed size.
363 compress_size
364 Compressed size.
366 CRC-32 of uncompressed file, unsigned int.
367 comment
368 File comment. Byte string or None. Use UNICODE_COMMENTS
369 to get automatic decoding to unicode.
370 volume
371 Volume nr, starting from 0.
373 RAR-specific fields:
375 compress_type
376 Compression method: 0x30 - 0x35.
377 extract_version
378 Minimal Rar version needed for decompressing.
379 host_os
380 Host OS type, one of RAR_OS_* constants.
381 mode
382 File attributes. May be either dos-style or unix-style, depending on host_os.
383 volume_file
384 Volume file name, where file starts.
385 mtime
386 Optional time field: Modification time, with float seconds.
387 Same as .date_time but with more precision.
388 ctime
389 Optional time field: creation time, with float seconds.
390 atime
391 Optional time field: last access time, with float seconds.
392 arctime
393 Optional time field: archival time, with float seconds.
395 Internal fields:
397 type
398 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
399 flags
400 For files, RAR_FILE_* bits.
403 __slots__ = (
404 # zipfile-compatible fields
405 'filename',
406 'file_size',
407 'compress_size',
408 'date_time',
409 'comment',
410 'CRC',
411 'volume',
412 'orig_filename', # bytes in unknown encoding
414 # rar-specific fields
415 'extract_version',
416 'compress_type',
417 'host_os',
418 'mode',
419 'type',
420 'flags',
422 # optional extended time fields
423 # tuple where the sec is float, or datetime().
424 'mtime', # same as .date_time
425 'ctime',
426 'atime',
427 'arctime',
429 # RAR internals
430 'name_size',
431 'header_size',
432 'header_crc',
433 'file_offset',
434 'add_size',
435 'header_data',
436 'header_base',
437 'header_offset',
438 'salt',
439 'volume_file',
442 def isdir(self):
443 '''Returns True if the entry is a directory.'''
444 if self.type == RAR_BLOCK_FILE:
445 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
446 return False
448 def needs_password(self):
449 return self.flags & RAR_FILE_PASSWORD
452 class RarFile(object):
453 '''Parse RAR structure, provide access to files in archive.
456 #: Archive comment. Byte string or None. Use UNICODE_COMMENTS
457 #: to get automatic decoding to unicode.
458 comment = None
460 def __init__(self, rarfile, mode="r", charset=None, info_callback=None, crc_check = True):
461 """Open and parse a RAR archive.
463 Parameters:
465 rarfile
466 archive file name
467 mode
468 only 'r' is supported.
469 charset
470 fallback charset to use, if filenames are not already Unicode-enabled.
471 info_callback
472 debug callback, gets to see all archive entries.
473 crc_check
474 set to False to disable CRC checks
476 self.rarfile = rarfile
477 self.comment = None
478 self._charset = charset or DEFAULT_CHARSET
479 self._info_callback = info_callback
481 self._info_list = []
482 self._info_map = {}
483 self._needs_password = False
484 self._password = None
485 self._crc_check = crc_check
486 self._vol_list = []
488 self._main = None
490 if mode != "r":
491 raise NotImplementedError("RarFile supports only mode=r")
493 self._parse()
495 def __enter__(self):
496 return self
498 def __exit__(self, type, value, traceback):
499 self.close()
501 def setpassword(self, password):
502 '''Sets the password to use when extracting.'''
503 self._password = password
504 if not self._main:
505 self._parse()
507 def needs_password(self):
508 '''Returns True if any archive entries require password for extraction.'''
509 return self._needs_password
511 def namelist(self):
512 '''Return list of filenames in archive.'''
513 return [f.filename for f in self._info_list]
515 def infolist(self):
516 '''Return RarInfo objects for all files/directories in archive.'''
517 return self._info_list
519 def volumelist(self):
520 '''Returns filenames of archive volumes.
522 In case of single-volume archive, the list contains
523 just the name of main archive file.
525 return self._vol_list
527 def getinfo(self, fname):
528 '''Return RarInfo for file.'''
530 if isinstance(fname, RarInfo):
531 return fname
533 # accept both ways here
534 if PATH_SEP == '/':
535 fname2 = fname.replace("\\", "/")
536 else:
537 fname2 = fname.replace("/", "\\")
539 try:
540 return self._info_map[fname]
541 except KeyError:
542 try:
543 return self._info_map[fname2]
544 except KeyError:
545 raise NoRarEntry("No such file: "+fname)
547 def open(self, fname, mode = 'r', psw = None):
548 '''Returns file-like object (:class:`RarExtFile`),
549 from where the data can be read.
551 The object implements io.RawIOBase interface, so it can
552 be further wrapped with io.BufferedReader and io.TextIOWrapper.
554 On older Python where io module is not available, it implements
555 only .read(), .seek(), .tell() and .close() methods.
557 The object is seekable, although the seeking is fast only on
558 uncompressed files, on compressed files the seeking is implemented
559 by reading ahead and/or restarting the decompression.
561 Parameters:
563 fname
564 file name or RarInfo instance.
565 mode
566 must be 'r'
568 password to use for extracting.
571 if mode != 'r':
572 raise NotImplementedError("RarFile.open() supports only mode=r")
574 # entry lookup
575 inf = self.getinfo(fname)
576 if inf.isdir():
577 raise TypeError("Directory does not have any data: " + inf.filename)
579 if inf.flags & RAR_FILE_SPLIT_BEFORE:
580 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
582 # check password
583 if inf.needs_password():
584 psw = psw or self._password
585 if psw is None:
586 raise PasswordRequired("File %s requires password" % inf.filename)
587 else:
588 psw = None
590 # is temp write usable?
591 if not USE_EXTRACT_HACK or not self._main:
592 use_hack = 0
593 elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
594 use_hack = 0
595 elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
596 use_hack = 0
597 elif inf.file_size > HACK_SIZE_LIMIT:
598 use_hack = 0
599 else:
600 use_hack = 1
602 # now extract
603 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0:
604 return self._open_clear(inf)
605 elif use_hack:
606 return self._open_hack(inf, psw)
607 else:
608 return self._open_unrar(self.rarfile, inf, psw)
610 def read(self, fname, psw = None):
611 """Return uncompressed data for archive entry.
613 For longer files using .open() may be better idea.
615 Parameters:
617 fname
618 filename or RarInfo instance
620 password to use for extracting.
623 f = self.open(fname, 'r', psw)
624 try:
625 return f.read()
626 finally:
627 f.close()
629 def close(self):
630 """Release open resources."""
631 pass
633 def printdir(self):
634 """Print archive file list to stdout."""
635 for f in self._info_list:
636 print(f.filename)
638 def extract(self, member, path=None, pwd=None):
639 """Extract single file into current directory.
641 Parameters:
643 member
644 filename or RarInfo instance
645 path
646 optional destination path
648 optional password to use
650 if isinstance(member, RarInfo):
651 fname = member.filename
652 else:
653 fname = member
654 self._extract([fname], path, pwd)
656 def extractall(self, path=None, members=None, pwd=None):
657 """Extract all files into current directory.
659 Parameters:
661 path
662 optional destination path
663 members
664 optional filename or RarInfo instance list to extract
666 optional password to use
668 fnlist = []
669 if members is not None:
670 for m in members:
671 if isinstance(m, RarInfo):
672 fnlist.append(m.filename)
673 else:
674 fnlist.append(m)
675 self._extract(fnlist, path, pwd)
677 def testrar(self):
678 """Let 'unrar' test the archive.
680 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
681 if self._password is not None:
682 cmd.append('-p' + self._password)
683 else:
684 cmd.append('-p-')
685 cmd.append(self.rarfile)
686 p = custom_popen(cmd)
687 output = p.communicate()[0]
688 check_returncode(p, output)
691 ## private methods
694 # store entry
695 def _process_entry(self, item):
696 if item.type == RAR_BLOCK_FILE:
697 # use only first part
698 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
699 self._info_map[item.filename] = item
700 self._info_list.append(item)
701 # remember if any items require password
702 if item.needs_password():
703 self._needs_password = True
704 elif len(self._info_list) > 0:
705 # final crc is in last block
706 old = self._info_list[-1]
707 old.CRC = item.CRC
708 old.compress_size += item.compress_size
710 # parse new-style comment
711 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
712 if not NEED_COMMENTS:
713 pass
714 elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
715 pass
716 elif item.flags & RAR_FILE_SOLID:
717 # file comment
718 cmt = self._read_comment_v3(item, self._password)
719 if len(self._info_list) > 0:
720 old = self._info_list[-1]
721 old.comment = cmt
722 else:
723 # archive comment
724 cmt = self._read_comment_v3(item, self._password)
725 self.comment = cmt
727 if self._info_callback:
728 self._info_callback(item)
730 # read rar
731 def _parse(self):
732 self._fd = None
733 try:
734 self._parse_real()
735 finally:
736 if self._fd:
737 self._fd.close()
738 self._fd = None
740 def _parse_real(self):
741 fd = open(self.rarfile, "rb")
742 self._fd = fd
743 id = fd.read(len(RAR_ID))
744 if id != RAR_ID:
745 raise NotRarFile("Not a Rar archive: "+self.rarfile)
747 volume = 0 # first vol (.rar) is 0
748 more_vols = 0
749 endarc = 0
750 volfile = self.rarfile
751 self._vol_list = [self.rarfile]
752 while 1:
753 if endarc:
754 h = None # don't read past ENDARC
755 else:
756 h = self._parse_header(fd)
757 if not h:
758 if more_vols:
759 volume += 1
760 volfile = self._next_volname(volfile)
761 fd.close()
762 fd = open(volfile, "rb")
763 self._fd = fd
764 more_vols = 0
765 endarc = 0
766 self._vol_list.append(volfile)
767 continue
768 break
769 h.volume = volume
770 h.volume_file = volfile
772 if h.type == RAR_BLOCK_MAIN and not self._main:
773 self._main = h
774 if h.flags & RAR_MAIN_NEWNUMBERING:
775 # RAR 2.x does not set FIRSTVOLUME,
776 # so check it only if NEWNUMBERING is used
777 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
778 raise NeedFirstVolume("Need to start from first volume")
779 if h.flags & RAR_MAIN_PASSWORD:
780 self._needs_password = True
781 if not self._password:
782 self._main = None
783 break
784 elif h.type == RAR_BLOCK_ENDARC:
785 more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME
786 endarc = 1
787 elif h.type == RAR_BLOCK_FILE:
788 # RAR 2.x does not write RAR_BLOCK_ENDARC
789 if h.flags & RAR_FILE_SPLIT_AFTER:
790 more_vols = 1
791 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
792 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
793 raise NeedFirstVolume("Need to start from first volume")
795 # store it
796 self._process_entry(h)
798 # go to next header
799 if h.add_size > 0:
800 fd.seek(h.file_offset + h.add_size, 0)
802 # AES encrypted headers
803 _last_aes_key = (None, None, None) # (salt, key, iv)
804 def _decrypt_header(self, fd):
805 if not _have_crypto:
806 raise NoCrypto('Cannot parse encrypted headers - no crypto')
807 salt = fd.read(8)
808 if self._last_aes_key[0] == salt:
809 key, iv = self._last_aes_key[1:]
810 else:
811 key, iv = rar3_s2k(self._password, salt)
812 self._last_aes_key = (salt, key, iv)
813 return HeaderDecrypt(fd, key, iv)
815 # read single header
816 def _parse_header(self, fd):
817 try:
818 # handle encrypted headers
819 if self._main and self._main.flags & RAR_MAIN_PASSWORD:
820 if not self._password:
821 return
822 fd = self._decrypt_header(fd)
824 # now read actual header
825 return self._parse_block_header(fd)
826 except struct.error:
827 if REPORT_BAD_HEADER:
828 raise BadRarFile('Broken header in RAR file')
829 return None
831 # common header
832 def _parse_block_header(self, fd):
833 h = RarInfo()
834 h.header_offset = fd.tell()
835 h.comment = None
837 # read and parse base header
838 buf = fd.read(S_BLK_HDR.size)
839 if not buf:
840 return None
841 t = S_BLK_HDR.unpack_from(buf)
842 h.header_crc, h.type, h.flags, h.header_size = t
843 h.header_base = S_BLK_HDR.size
844 pos = S_BLK_HDR.size
846 # read full header
847 if h.header_size > S_BLK_HDR.size:
848 h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size)
849 else:
850 h.header_data = buf
851 h.file_offset = fd.tell()
853 # unexpected EOF?
854 if len(h.header_data) != h.header_size:
855 if REPORT_BAD_HEADER:
856 raise BadRarFile('Unexpected EOF when reading header')
857 return None
859 # block has data assiciated with it?
860 if h.flags & RAR_LONG_BLOCK:
861 h.add_size = S_LONG.unpack_from(h.header_data, pos)[0]
862 else:
863 h.add_size = 0
865 # parse interesting ones, decide header boundaries for crc
866 if h.type == RAR_BLOCK_MARK:
867 return h
868 elif h.type == RAR_BLOCK_MAIN:
869 h.header_base += 6
870 if h.flags & RAR_MAIN_ENCRYPTVER:
871 h.header_base += 1
872 if h.flags & RAR_MAIN_COMMENT:
873 self._parse_subblocks(h, h.header_base)
874 self.comment = h.comment
875 elif h.type == RAR_BLOCK_FILE:
876 self._parse_file_header(h, pos)
877 elif h.type == RAR_BLOCK_SUB:
878 self._parse_file_header(h, pos)
879 h.header_base = h.header_size
880 elif h.type == RAR_BLOCK_OLD_AUTH:
881 h.header_base += 8
882 elif h.type == RAR_BLOCK_OLD_EXTRA:
883 h.header_base += 7
884 else:
885 h.header_base = h.header_size
887 # check crc
888 if h.type == RAR_BLOCK_OLD_SUB:
889 crcdat = h.header_data[2:] + fd.read(h.add_size)
890 else:
891 crcdat = h.header_data[2:h.header_base]
893 calc_crc = crc32(crcdat) & 0xFFFF
895 # return good header
896 if h.header_crc == calc_crc:
897 return h
899 # need to panic?
900 if REPORT_BAD_HEADER:
901 xlen = len(crcdat)
902 crcdat = h.header_data[2:]
903 msg = 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h.type, h.header_crc, calc_crc, xlen )
904 xlen = len(crcdat)
905 while xlen >= S_BLK_HDR.size - 2:
906 crc = crc32(crcdat[:xlen]) & 0xFFFF
907 if crc == h.header_crc:
908 msg += ' / crc match, xlen = %d' % xlen
909 xlen -= 1
910 raise BadRarFile(msg)
912 # instead panicing, send eof
913 return None
915 # read file-specific header
916 def _parse_file_header(self, h, pos):
917 fld = S_FILE_HDR.unpack_from(h.header_data, pos)
918 h.compress_size = fld[0]
919 h.file_size = fld[1]
920 h.host_os = fld[2]
921 h.CRC = fld[3]
922 h.date_time = parse_dos_time(fld[4])
923 h.extract_version = fld[5]
924 h.compress_type = fld[6]
925 h.name_size = fld[7]
926 h.mode = fld[8]
927 pos += S_FILE_HDR.size
929 if h.flags & RAR_FILE_LARGE:
930 h1 = S_LONG.unpack_from(h.header_data, pos)[0]
931 h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0]
932 h.compress_size |= h1 << 32
933 h.file_size |= h2 << 32
934 pos += 8
935 h.add_size = h.compress_size
937 name = h.header_data[pos : pos + h.name_size ]
938 pos += h.name_size
939 if h.flags & RAR_FILE_UNICODE:
940 nul = name.find(ZERO)
941 h.orig_filename = name[:nul]
942 u = UnicodeFilename(h.orig_filename, name[nul + 1 : ])
943 h.filename = u.decode()
945 # if parsing failed fall back to simple name
946 if u.failed:
947 h.filename = self._decode(h.orig_filename)
948 else:
949 h.orig_filename = name
950 h.filename = self._decode(name)
952 # change separator, if requested
953 if PATH_SEP != '\\':
954 h.filename = h.filename.replace('\\', PATH_SEP)
956 if h.flags & RAR_FILE_SALT:
957 h.salt = h.header_data[pos : pos + 8]
958 pos += 8
959 else:
960 h.salt = None
962 # optional extended time stamps
963 if h.flags & RAR_FILE_EXTTIME:
964 pos = self._parse_ext_time(h, pos)
965 else:
966 h.mtime = h.atime = h.ctime = h.arctime = None
968 # base header end
969 h.header_base = pos
971 if h.flags & RAR_FILE_COMMENT:
972 self._parse_subblocks(h, pos)
974 # convert timestamps
975 if USE_DATETIME:
976 h.date_time = to_datetime(h.date_time)
977 h.mtime = to_datetime(h.mtime)
978 h.atime = to_datetime(h.atime)
979 h.ctime = to_datetime(h.ctime)
980 h.arctime = to_datetime(h.arctime)
982 # .mtime is .date_time with more precision
983 if h.mtime:
984 if USE_DATETIME:
985 h.date_time = h.mtime
986 else:
987 # keep seconds int
988 h.date_time = h.mtime[:5] + (int(h.mtime[5]),)
990 return pos
992 # find old-style comment subblock
993 def _parse_subblocks(self, h, pos):
994 hdata = h.header_data
995 while pos < len(hdata):
996 # ordinary block header
997 t = S_BLK_HDR.unpack_from(hdata, pos)
998 scrc, stype, sflags, slen = t
999 pos_next = pos + slen
1000 pos += S_BLK_HDR.size
1002 # corrupt header
1003 if pos_next < pos:
1004 break
1006 # followed by block-specific header
1007 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1008 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1009 pos += S_COMMENT_HDR.size
1010 data = hdata[pos : pos_next]
1011 cmt = rar_decompress(ver, meth, data, declen, sflags,
1012 crc, self._password)
1013 if not self._crc_check:
1014 h.comment = self._decode_comment(cmt)
1015 elif crc32(cmt) & 0xFFFF == crc:
1016 h.comment = self._decode_comment(cmt)
1018 pos = pos_next
1020 def _parse_ext_time(self, h, pos):
1021 data = h.header_data
1023 # flags and rest of data can be missing
1024 flags = 0
1025 if pos + 2 <= len(data):
1026 flags = S_SHORT.unpack_from(data, pos)[0]
1027 pos += 2
1029 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time)
1030 h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos)
1031 h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos)
1032 h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos)
1033 return pos
1035 def _parse_xtime(self, flag, data, pos, dostime = None):
1036 unit = 10000000.0 # 100 ns units
1037 if flag & 8:
1038 if not dostime:
1039 t = S_LONG.unpack_from(data, pos)[0]
1040 dostime = parse_dos_time(t)
1041 pos += 4
1042 rem = 0
1043 cnt = flag & 3
1044 for i in range(cnt):
1045 b = S_BYTE.unpack_from(data, pos)[0]
1046 rem = (b << 16) | (rem >> 8)
1047 pos += 1
1048 sec = dostime[5] + rem / unit
1049 if flag & 4:
1050 sec += 1
1051 dostime = dostime[:5] + (sec,)
1052 return dostime, pos
1054 # given current vol name, construct next one
1055 def _next_volname(self, volfile):
1056 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1057 return self._next_newvol(volfile)
1058 return self._next_oldvol(volfile)
1060 # new-style next volume
1061 def _next_newvol(self, volfile):
1062 i = len(volfile) - 1
1063 while i >= 0:
1064 if volfile[i] >= '0' and volfile[i] <= '9':
1065 return self._inc_volname(volfile, i)
1066 i -= 1
1067 raise BadRarName("Cannot construct volume name: "+volfile)
1069 # old-style next volume
1070 def _next_oldvol(self, volfile):
1071 # rar -> r00
1072 if volfile[-4:].lower() == '.rar':
1073 return volfile[:-2] + '00'
1074 return self._inc_volname(volfile, len(volfile) - 1)
1076 # increase digits with carry, otherwise just increment char
1077 def _inc_volname(self, volfile, i):
1078 fn = list(volfile)
1079 while i >= 0:
1080 if fn[i] != '9':
1081 fn[i] = chr(ord(fn[i]) + 1)
1082 break
1083 fn[i] = '0'
1084 i -= 1
1085 return ''.join(fn)
1087 def _open_clear(self, inf):
1088 return DirectReader(self, inf)
1090 # put file compressed data into temporary .rar archive, and run
1091 # unrar on that, thus avoiding unrar going over whole archive
1092 def _open_hack(self, inf, psw = None):
1093 BSIZE = 32*1024
1095 size = inf.compress_size + inf.header_size
1096 rf = open(inf.volume_file, "rb", 0)
1097 rf.seek(inf.header_offset)
1099 tmpfd, tmpname = mkstemp(suffix='.rar')
1100 tmpf = os.fdopen(tmpfd, "wb")
1102 try:
1103 # create main header: crc, type, flags, size, res1, res2
1104 mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4)
1105 tmpf.write(RAR_ID + mh)
1106 while size > 0:
1107 if size > BSIZE:
1108 buf = rf.read(BSIZE)
1109 else:
1110 buf = rf.read(size)
1111 if not buf:
1112 raise BadRarFile('read failed: ' + inf.filename)
1113 tmpf.write(buf)
1114 size -= len(buf)
1115 tmpf.close()
1116 rf.close()
1117 except:
1118 rf.close()
1119 tmpf.close()
1120 os.unlink(tmpname)
1121 raise
1123 return self._open_unrar(tmpname, inf, psw, tmpname)
1125 def _read_comment_v3(self, inf, psw=None):
1127 # read data
1128 rf = open(inf.volume_file, "rb")
1129 rf.seek(inf.file_offset)
1130 data = rf.read(inf.compress_size)
1131 rf.close()
1133 # decompress
1134 cmt = rar_decompress(inf.extract_version, inf.compress_type, data,
1135 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1137 # check crc
1138 if self._crc_check:
1139 crc = crc32(cmt)
1140 if crc < 0:
1141 crc += (long(1) << 32)
1142 if crc != inf.CRC:
1143 return None
1145 return self._decode_comment(cmt)
1147 # extract using unrar
1148 def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None):
1149 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1150 if psw is not None:
1151 cmd.append("-p" + psw)
1152 cmd.append(rarfile)
1154 # not giving filename avoids encoding related problems
1155 if not tmpfile:
1156 fn = inf.filename
1157 if PATH_SEP != os.sep:
1158 fn = fn.replace(PATH_SEP, os.sep)
1159 cmd.append(fn)
1161 # read from unrar pipe
1162 return PipeReader(self, inf, cmd, tmpfile)
1164 def _decode(self, val):
1165 for c in TRY_ENCODINGS:
1166 try:
1167 return val.decode(c)
1168 except UnicodeError:
1169 pass
1170 return val.decode(self._charset, 'replace')
1172 def _decode_comment(self, val):
1173 if UNICODE_COMMENTS:
1174 return self._decode(val)
1175 return val
1177 # call unrar to extract a file
1178 def _extract(self, fnlist, path=None, psw=None):
1179 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
1181 # pasoword
1182 psw = psw or self._password
1183 if psw is not None:
1184 cmd.append('-p' + psw)
1185 else:
1186 cmd.append('-p-')
1188 # rar file
1189 cmd.append(self.rarfile)
1191 # file list
1192 for fn in fnlist:
1193 if os.sep != PATH_SEP:
1194 fn = fn.replace(PATH_SEP, os.sep)
1195 cmd.append(fn)
1197 # destination path
1198 if path is not None:
1199 cmd.append(path + os.sep)
1201 # call
1202 p = custom_popen(cmd)
1203 output = p.communicate()[0]
1204 check_returncode(p, output)
1207 ## Utility classes
1210 class UnicodeFilename:
1211 """Handle unicode filename decompression"""
1213 def __init__(self, name, encdata):
1214 self.std_name = bytearray(name)
1215 self.encdata = bytearray(encdata)
1216 self.pos = self.encpos = 0
1217 self.buf = bytearray()
1218 self.failed = 0
1220 def enc_byte(self):
1221 try:
1222 c = self.encdata[self.encpos]
1223 self.encpos += 1
1224 return c
1225 except IndexError:
1226 self.failed = 1
1227 return 0
1229 def std_byte(self):
1230 try:
1231 return self.std_name[self.pos]
1232 except IndexError:
1233 self.failed = 1
1234 return ord('?')
1236 def put(self, lo, hi):
1237 self.buf.append(lo)
1238 self.buf.append(hi)
1239 self.pos += 1
1241 def decode(self):
1242 hi = self.enc_byte()
1243 flagbits = 0
1244 while self.encpos < len(self.encdata):
1245 if flagbits == 0:
1246 flags = self.enc_byte()
1247 flagbits = 8
1248 flagbits -= 2
1249 t = (flags >> flagbits) & 3
1250 if t == 0:
1251 self.put(self.enc_byte(), 0)
1252 elif t == 1:
1253 self.put(self.enc_byte(), hi)
1254 elif t == 2:
1255 self.put(self.enc_byte(), self.enc_byte())
1256 else:
1257 n = self.enc_byte()
1258 if n & 0x80:
1259 c = self.enc_byte()
1260 for i in range((n & 0x7f) + 2):
1261 lo = (self.std_byte() + c) & 0xFF
1262 self.put(lo, hi)
1263 else:
1264 for i in range(n + 2):
1265 self.put(self.std_byte(), 0)
1266 return self.buf.decode("utf-16le", "replace")
1269 class RarExtFile(RawIOBase):
1270 """Base class for file-like object that :meth:`RarFile.open` returns.
1272 Provides public methods and common crc checking.
1274 Behaviour:
1275 - no short reads - .read() and .readinfo() read as much as requested.
1276 - no internal buffer, use io.BufferedReader for that.
1278 If :mod:`io` module is available (Python 2.6+, 3.x), then this calls
1279 will inherit from :class:`io.RawIOBase` class. This makes line-based
1280 access available: :meth:`RarExtFile.readline` and ``for ln in f``.
1283 #: Filename of the archive entry
1284 name = None
1286 def __init__(self, rf, inf):
1287 RawIOBase.__init__(self)
1289 # standard io.* properties
1290 self.name = inf.filename
1291 self.mode = 'rb'
1293 self.rf = rf
1294 self.inf = inf
1295 self.crc_check = rf._crc_check
1296 self.fd = None
1297 self.CRC = 0
1298 self.remain = 0
1299 self.returncode = 0
1301 self._open()
1303 def _open(self):
1304 if self.fd:
1305 self.fd.close()
1306 self.fd = None
1307 self.CRC = 0
1308 self.remain = self.inf.file_size
1310 def read(self, cnt = None):
1311 """Read all or specified amount of data from archive entry."""
1313 # sanitize cnt
1314 if cnt is None or cnt < 0:
1315 cnt = self.remain
1316 elif cnt > self.remain:
1317 cnt = self.remain
1318 if cnt == 0:
1319 return EMPTY
1321 # actual read
1322 data = self._read(cnt)
1323 if data:
1324 self.CRC = crc32(data, self.CRC)
1325 self.remain -= len(data)
1326 if len(data) != cnt:
1327 raise BadRarFile("Failed the read enough data")
1329 # done?
1330 if not data or self.remain == 0:
1331 #self.close()
1332 self._check()
1333 return data
1335 def _check(self):
1336 """Check final CRC."""
1337 if not self.crc_check:
1338 return
1339 if self.returncode:
1340 check_returncode(self, '')
1341 if self.remain != 0:
1342 raise BadRarFile("Failed the read enough data")
1343 crc = self.CRC
1344 if crc < 0:
1345 crc += (long(1) << 32)
1346 if crc != self.inf.CRC:
1347 raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename)
1349 def _read(self, cnt):
1350 """Actual read that gets sanitized cnt."""
1352 def close(self):
1353 """Close open resources."""
1355 RawIOBase.close(self)
1357 if self.fd:
1358 self.fd.close()
1359 self.fd = None
1361 def __del__(self):
1362 """Hook delete to make sure tempfile is removed."""
1363 self.close()
1365 def readinto(self, buf):
1366 """Zero-copy read directly into buffer.
1368 Returns bytes read.
1371 data = self.read(len(buf))
1372 n = len(data)
1373 try:
1374 buf[:n] = data
1375 except TypeError:
1376 import array
1377 if not isinstance(buf, array.array):
1378 raise
1379 buf[:n] = array.array(buf.typecode, data)
1380 return n
1382 def tell(self):
1383 """Return current reading position in uncompressed data."""
1384 return self.inf.file_size - self.remain
1386 def seek(self, ofs, whence = 0):
1387 """Seek in data.
1389 On uncompressed files, the seeking works by actual
1390 seeks so it's fast. On compresses files its slow
1391 - forward seeking happends by reading ahead,
1392 backwards by re-opening and decompressing from the start.
1395 # disable crc check when seeking
1396 self.crc_check = 0
1398 fsize = self.inf.file_size
1399 cur_ofs = self.tell()
1401 if whence == 0: # seek from beginning of file
1402 new_ofs = ofs
1403 elif whence == 1: # seek from current position
1404 new_ofs = cur_ofs + ofs
1405 elif whence == 2: # seek from end of file
1406 new_ofs = fsize + ofs
1407 else:
1408 raise ValueError('Invalid value for whence')
1410 # sanity check
1411 if new_ofs < 0:
1412 new_ofs = 0
1413 elif new_ofs > fsize:
1414 new_ofs = fsize
1416 # do the actual seek
1417 if new_ofs >= cur_ofs:
1418 self._skip(new_ofs - cur_ofs)
1419 else:
1420 # process old data ?
1421 #self._skip(fsize - cur_ofs)
1422 # reopen and seek
1423 self._open()
1424 self._skip(new_ofs)
1425 return self.tell()
1427 def _skip(self, cnt):
1428 """Read and discard data"""
1429 while cnt > 0:
1430 if cnt > 8192:
1431 buf = self.read(8192)
1432 else:
1433 buf = self.read(cnt)
1434 if not buf:
1435 break
1436 cnt -= len(buf)
1438 def readable(self):
1439 """Returns True"""
1440 return True
1442 def writable(self):
1443 """Returns False.
1445 Writing is not supported."""
1446 return False
1448 def seekable(self):
1449 """Returns True.
1451 Seeking is supported, although it's slow on compressed files.
1453 return True
1455 def readall(self):
1456 """Read all remaining data"""
1457 # avoid RawIOBase default impl
1458 return self.read()
1461 class PipeReader(RarExtFile):
1462 """Read data from pipe, handle tempfile cleanup."""
1464 def __init__(self, rf, inf, cmd, tempfile=None):
1465 self.cmd = cmd
1466 self.proc = None
1467 self.tempfile = tempfile
1468 RarExtFile.__init__(self, rf, inf)
1470 def _close_proc(self):
1471 if not self.proc:
1472 return
1473 if self.proc.stdout:
1474 self.proc.stdout.close()
1475 if self.proc.stdin:
1476 self.proc.stdin.close()
1477 if self.proc.stderr:
1478 self.proc.stderr.close()
1479 self.proc.wait()
1480 self.returncode = self.proc.returncode
1481 self.proc = None
1483 def _open(self):
1484 RarExtFile._open(self)
1486 # stop old process
1487 self._close_proc()
1489 # launch new process
1490 self.returncode = 0
1491 self.proc = custom_popen(self.cmd)
1492 self.fd = self.proc.stdout
1494 # avoid situation where unrar waits on stdin
1495 if self.proc.stdin:
1496 self.proc.stdin.close()
1498 def _read(self, cnt):
1499 """Read from pipe."""
1501 # normal read is usually enough
1502 data = self.fd.read(cnt)
1503 if len(data) == cnt or not data:
1504 return data
1506 # short read, try looping
1507 buf = [data]
1508 cnt -= len(data)
1509 while cnt > 0:
1510 data = self.fd.read(cnt)
1511 if not data:
1512 break
1513 cnt -= len(data)
1514 buf.append(data)
1515 return EMPTY.join(buf)
1517 def close(self):
1518 """Close open resources."""
1520 self._close_proc()
1521 RarExtFile.close(self)
1523 if self.tempfile:
1524 try:
1525 os.unlink(self.tempfile)
1526 except OSError:
1527 pass
1528 self.tempfile = None
1530 if have_memoryview:
1531 def readinto(self, buf):
1532 """Zero-copy read directly into buffer."""
1533 cnt = len(buf)
1534 if cnt > self.remain:
1535 cnt = self.remain
1536 vbuf = memoryview(buf)
1537 res = got = 0
1538 while got < cnt:
1539 res = self.fd.readinto(vbuf[got : cnt])
1540 if not res:
1541 break
1542 if self.crc_check:
1543 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1544 self.remain -= res
1545 got += res
1546 return got
1549 class DirectReader(RarExtFile):
1550 """Read uncompressed data directly from archive."""
1552 def _open(self):
1553 RarExtFile._open(self)
1555 self.volfile = self.inf.volume_file
1556 self.fd = open(self.volfile, "rb", 0)
1557 self.fd.seek(self.inf.header_offset, 0)
1558 self.cur = self.rf._parse_header(self.fd)
1559 self.cur_avail = self.cur.add_size
1561 def _skip(self, cnt):
1562 """RAR Seek, skipping through rar files to get to correct position
1565 while cnt > 0:
1566 # next vol needed?
1567 if self.cur_avail == 0:
1568 if not self._open_next():
1569 break
1571 # fd is in read pos, do the read
1572 if cnt > self.cur_avail:
1573 cnt -= self.cur_avail
1574 self.remain -= self.cur_avail
1575 self.cur_avail = 0
1576 else:
1577 self.fd.seek(cnt, 1)
1578 self.cur_avail -= cnt
1579 self.remain -= cnt
1580 cnt = 0
1582 def _read(self, cnt):
1583 """Read from potentially multi-volume archive."""
1585 buf = []
1586 while cnt > 0:
1587 # next vol needed?
1588 if self.cur_avail == 0:
1589 if not self._open_next():
1590 break
1592 # fd is in read pos, do the read
1593 if cnt > self.cur_avail:
1594 data = self.fd.read(self.cur_avail)
1595 else:
1596 data = self.fd.read(cnt)
1597 if not data:
1598 break
1600 # got some data
1601 cnt -= len(data)
1602 self.cur_avail -= len(data)
1603 buf.append(data)
1605 if len(buf) == 1:
1606 return buf[0]
1607 return EMPTY.join(buf)
1609 def _open_next(self):
1610 """Proceed to next volume."""
1612 # is the file split over archives?
1613 if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
1614 return False
1616 if self.fd:
1617 self.fd.close()
1618 self.fd = None
1620 # open next part
1621 self.volfile = self.rf._next_volname(self.volfile)
1622 fd = open(self.volfile, "rb", 0)
1623 self.fd = fd
1625 # loop until first file header
1626 while 1:
1627 cur = self.rf._parse_header(fd)
1628 if not cur:
1629 raise BadRarFile("Unexpected EOF")
1630 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
1631 if cur.add_size:
1632 fd.seek(cur.add_size, 1)
1633 continue
1634 if cur.orig_filename != self.inf.orig_filename:
1635 raise BadRarFile("Did not found file entry")
1636 self.cur = cur
1637 self.cur_avail = cur.add_size
1638 return True
1640 if have_memoryview:
1641 def readinto(self, buf):
1642 """Zero-copy read directly into buffer."""
1643 got = 0
1644 vbuf = memoryview(buf)
1645 while got < len(buf):
1646 # next vol needed?
1647 if self.cur_avail == 0:
1648 if not self._open_next():
1649 break
1651 # lenght for next read
1652 cnt = len(buf) - got
1653 if cnt > self.cur_avail:
1654 cnt = self.cur_avail
1656 # read into temp view
1657 res = self.fd.readinto(vbuf[got : got + cnt])
1658 if not res:
1659 break
1660 if self.crc_check:
1661 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1662 self.cur_avail -= res
1663 self.remain -= res
1664 got += res
1665 return got
1668 class HeaderDecrypt:
1669 """File-like object that decrypts from another file"""
1670 def __init__(self, f, key, iv):
1671 self.f = f
1672 self.ciph = AES.new(key, AES.MODE_CBC, iv)
1673 self.buf = EMPTY
1675 def tell(self):
1676 return self.f.tell()
1678 def read(self, cnt=None):
1679 if cnt > 8*1024:
1680 raise BadRarFile('Bad count to header decrypt - wrong password?')
1682 # consume old data
1683 if cnt <= len(self.buf):
1684 res = self.buf[:cnt]
1685 self.buf = self.buf[cnt:]
1686 return res
1687 res = self.buf
1688 self.buf = EMPTY
1689 cnt -= len(res)
1691 # decrypt new data
1692 BLK = self.ciph.block_size
1693 while cnt > 0:
1694 enc = self.f.read(BLK)
1695 if len(enc) < BLK:
1696 break
1697 dec = self.ciph.decrypt(enc)
1698 if cnt >= len(dec):
1699 res += dec
1700 cnt -= len(dec)
1701 else:
1702 res += dec[:cnt]
1703 self.buf = dec[cnt:]
1704 cnt = 0
1706 return res
1709 ## Utility functions
1712 def rar3_s2k(psw, salt):
1713 """String-to-key hash for RAR3."""
1715 seed = psw.encode('utf-16le') + salt
1716 iv = EMPTY
1717 h = sha1()
1718 for i in range(16):
1719 for j in range(0x4000):
1720 cnt = S_LONG.pack(i*0x4000 + j)
1721 h.update(seed + cnt[:3])
1722 if j == 0:
1723 iv += h.digest()[19:20]
1724 key_be = h.digest()[:16]
1725 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
1726 return key_le, iv
1728 def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
1729 """Decompress blob of compressed data.
1731 Used for data with non-standard header - eg. comments.
1734 # already uncompressed?
1735 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
1736 return data
1738 # take only necessary flags
1739 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
1740 flags |= RAR_LONG_BLOCK
1742 # file header
1743 fname = bytes('data', 'ascii')
1744 date = 0
1745 mode = 0x20
1746 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
1747 date, vers, meth, len(fname), mode)
1748 fhdr += fname
1749 if flags & RAR_FILE_SALT:
1750 if not salt:
1751 return EMPTY
1752 fhdr += salt
1754 # full header
1755 hlen = S_BLK_HDR.size + len(fhdr)
1756 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
1757 hcrc = crc32(hdr[2:]) & 0xFFFF
1758 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
1760 # archive main header
1761 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4)
1763 # decompress via temp rar
1764 tmpfd, tmpname = mkstemp(suffix='.rar')
1765 tmpf = os.fdopen(tmpfd, "wb")
1766 try:
1767 tmpf.write(RAR_ID + mh + hdr + data)
1768 tmpf.close()
1770 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1771 if psw is not None and (flags & RAR_FILE_PASSWORD):
1772 cmd.append("-p" + psw)
1773 else:
1774 cmd.append("-p-")
1775 cmd.append(tmpname)
1777 p = custom_popen(cmd)
1778 return p.communicate()[0]
1779 finally:
1780 tmpf.close()
1781 os.unlink(tmpname)
1783 def to_datetime(t):
1784 """Convert 6-part time tuple into datetime object."""
1786 if t is None:
1787 return None
1789 # extract values
1790 year, mon, day, h, m, xs = t
1791 s = int(xs)
1792 us = int(1000000 * (xs - s))
1794 # assume the values are valid
1795 try:
1796 return datetime(year, mon, day, h, m, s, us)
1797 except ValueError:
1798 pass
1800 # sanitize invalid values
1801 MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1802 if mon < 1: mon = 1
1803 if mon > 12: mon = 12
1804 if day < 1: day = 1
1805 if day > MDAY[mon]: day = MDAY[mon]
1806 if h > 23: h = 23
1807 if m > 59: m = 59
1808 if s > 59: s = 59
1809 if mon == 2 and day == 29:
1810 try:
1811 return datetime(year, mon, day, h, m, s, us)
1812 except ValueError:
1813 day = 28
1814 return datetime(year, mon, day, h, m, s, us)
1816 def parse_dos_time(stamp):
1817 """Parse standard 32-bit DOS timestamp."""
1819 sec = stamp & 0x1F; stamp = stamp >> 5
1820 min = stamp & 0x3F; stamp = stamp >> 6
1821 hr = stamp & 0x1F; stamp = stamp >> 5
1822 day = stamp & 0x1F; stamp = stamp >> 5
1823 mon = stamp & 0x0F; stamp = stamp >> 4
1824 yr = (stamp & 0x7F) + 1980
1825 return (yr, mon, day, hr, min, sec * 2)
1827 def custom_popen(cmd):
1828 """Disconnect cmd from parent fds, read only from stdout."""
1830 # needed for py2exe
1831 creationflags = 0
1832 if sys.platform == 'win32':
1833 creationflags = 0x08000000 # CREATE_NO_WINDOW
1835 # run command
1836 try:
1837 p = Popen(cmd, bufsize = 0,
1838 stdout = PIPE, stdin = PIPE, stderr = STDOUT,
1839 creationflags = creationflags)
1840 except OSError:
1841 ex = sys.exc_info()[1]
1842 if ex.errno == errno.ENOENT:
1843 raise RarExecError("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL)
1844 raise
1845 return p
1847 def check_returncode(p, out):
1848 """Raise exception according to unrar exit code"""
1850 code = p.returncode
1851 if code == 0:
1852 return
1854 # map return code to exception class
1855 errmap = [None,
1856 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError,
1857 RarWriteError, RarOpenError, RarUserError, RarMemoryError,
1858 RarCreateError, RarNoFilesError] # codes from rar.txt
1859 if code > 0 and code < len(errmap):
1860 exc = errmap[code]
1861 elif code == 255:
1862 exc = RarUserBreak
1863 elif code < 0:
1864 exc = RarSignalExit
1865 else:
1866 exc = RarUnknownError
1868 # format message
1869 if out:
1870 msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out)
1871 else:
1872 msg = "%s [%d]" % (exc.__doc__, p.returncode)
1874 raise exc(msg)