No need for exc_info anymore.
[rarfile.git] / rarfile.py
blob373c5465183903176ed37c98ab9ba70206507a02
1 # rarfile.py
3 # Copyright (c) 2005-2016 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as :mod:`zipfile`-like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print(rf.read(f))
39 Archive files can also be accessed via file-like object returned
40 by :meth:`RarFile.open`::
42 import rarfile
44 with rarfile.RarFile('archive.rar') as rf:
45 with rf.open('README') as f:
46 for ln in f:
47 print(ln.strip())
49 There are few module-level parameters to tune behaviour,
50 here they are with defaults, and reason to change it::
52 import rarfile
54 # Set to full path of unrar.exe if it is not in PATH
55 rarfile.UNRAR_TOOL = "unrar"
57 # Set to 0 if you don't look at comments and want to
58 # avoid wasting time for parsing them
59 rarfile.NEED_COMMENTS = 1
61 # Set up to 1 if you don't want to deal with decoding comments
62 # from unknown encoding. rarfile will try couple of common
63 # encodings in sequence.
64 rarfile.UNICODE_COMMENTS = 0
66 # Set to 1 if you prefer timestamps to be datetime objects
67 # instead tuples
68 rarfile.USE_DATETIME = 0
70 # Set to '/' to be more compatible with zipfile
71 rarfile.PATH_SEP = '\\'
73 For more details, refer to source.
75 """
77 __version__ = '2.7'
79 # export only interesting items
80 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
83 ## Imports and compat - support both Python 2.x and 3.x
86 import sys, os, struct, errno
87 from struct import pack, unpack, Struct
88 from binascii import crc32
89 from tempfile import mkstemp
90 from subprocess import Popen, PIPE, STDOUT
91 from datetime import datetime
92 from io import RawIOBase
93 from hashlib import sha1
95 # only needed for encryped headers
96 try:
97 try:
98 from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher
99 from cryptography.hazmat.backends import default_backend
100 class AES_CBC_Decrypt(object):
101 block_size = 16
102 def __init__(self, key, iv):
103 ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend())
104 self.dec = ciph.decryptor()
105 def decrypt(self, data):
106 return self.dec.update(data)
107 except ImportError:
108 from Crypto.Cipher import AES
109 class AES_CBC_Decrypt(object):
110 block_size = 16
111 def __init__(self, key, iv):
112 self.dec = AES.new(key, AES.MODE_CBC, iv)
113 def decrypt(self, data):
114 return self.dec.decrypt(data)
115 _have_crypto = 1
116 except ImportError:
117 _have_crypto = 0
119 # compat with 2.x
120 if sys.hexversion < 0x3000000:
121 # prefer 3.x behaviour
122 range = xrange
123 else:
124 unicode = str
127 ## Module configuration. Can be tuned after importing.
130 #: default fallback charset
131 DEFAULT_CHARSET = "windows-1252"
133 #: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
134 TRY_ENCODINGS = ('utf8', 'utf-16le')
136 #: 'unrar', 'rar' or full path to either one
137 UNRAR_TOOL = "unrar"
139 #: Command line args to use for opening file for reading.
140 OPEN_ARGS = ('p', '-inul')
142 #: Command line args to use for extracting file to disk.
143 EXTRACT_ARGS = ('x', '-y', '-idq')
145 #: args for testrar()
146 TEST_ARGS = ('t', '-idq')
149 # Allow use of tool that is not compatible with unrar.
151 # By default use 'bsdtar' which is 'tar' program that
152 # sits on top of libarchive.
154 # Problems with libarchive RAR backend:
155 # - Does not support solid archives.
156 # - Does not support password-protected archives.
159 ALT_TOOL = 'bsdtar'
160 ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f')
161 ALT_EXTRACT_ARGS = ('-x', '-f')
162 ALT_TEST_ARGS = ('-t', '-f')
163 ALT_CHECK_ARGS = ('--help',)
165 #: whether to speed up decompression by using tmp archive
166 USE_EXTRACT_HACK = 1
168 #: limit the filesize for tmp archive usage
169 HACK_SIZE_LIMIT = 20*1024*1024
171 #: whether to parse file/archive comments.
172 NEED_COMMENTS = 1
174 #: whether to convert comments to unicode strings
175 UNICODE_COMMENTS = 0
177 #: Convert RAR time tuple into datetime() object
178 USE_DATETIME = 0
180 #: Separator for path name components. RAR internally uses '\\'.
181 #: Use '/' to be similar with zipfile.
182 PATH_SEP = '\\'
185 ## rar constants
188 # block types
189 RAR_BLOCK_MARK = 0x72 # r
190 RAR_BLOCK_MAIN = 0x73 # s
191 RAR_BLOCK_FILE = 0x74 # t
192 RAR_BLOCK_OLD_COMMENT = 0x75 # u
193 RAR_BLOCK_OLD_EXTRA = 0x76 # v
194 RAR_BLOCK_OLD_SUB = 0x77 # w
195 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
196 RAR_BLOCK_OLD_AUTH = 0x79 # y
197 RAR_BLOCK_SUB = 0x7a # z
198 RAR_BLOCK_ENDARC = 0x7b # {
200 # flags for RAR_BLOCK_MAIN
201 RAR_MAIN_VOLUME = 0x0001
202 RAR_MAIN_COMMENT = 0x0002
203 RAR_MAIN_LOCK = 0x0004
204 RAR_MAIN_SOLID = 0x0008
205 RAR_MAIN_NEWNUMBERING = 0x0010
206 RAR_MAIN_AUTH = 0x0020
207 RAR_MAIN_RECOVERY = 0x0040
208 RAR_MAIN_PASSWORD = 0x0080
209 RAR_MAIN_FIRSTVOLUME = 0x0100
210 RAR_MAIN_ENCRYPTVER = 0x0200
212 # flags for RAR_BLOCK_FILE
213 RAR_FILE_SPLIT_BEFORE = 0x0001
214 RAR_FILE_SPLIT_AFTER = 0x0002
215 RAR_FILE_PASSWORD = 0x0004
216 RAR_FILE_COMMENT = 0x0008
217 RAR_FILE_SOLID = 0x0010
218 RAR_FILE_DICTMASK = 0x00e0
219 RAR_FILE_DICT64 = 0x0000
220 RAR_FILE_DICT128 = 0x0020
221 RAR_FILE_DICT256 = 0x0040
222 RAR_FILE_DICT512 = 0x0060
223 RAR_FILE_DICT1024 = 0x0080
224 RAR_FILE_DICT2048 = 0x00a0
225 RAR_FILE_DICT4096 = 0x00c0
226 RAR_FILE_DIRECTORY = 0x00e0
227 RAR_FILE_LARGE = 0x0100
228 RAR_FILE_UNICODE = 0x0200
229 RAR_FILE_SALT = 0x0400
230 RAR_FILE_VERSION = 0x0800
231 RAR_FILE_EXTTIME = 0x1000
232 RAR_FILE_EXTFLAGS = 0x2000
234 # flags for RAR_BLOCK_ENDARC
235 RAR_ENDARC_NEXT_VOLUME = 0x0001
236 RAR_ENDARC_DATACRC = 0x0002
237 RAR_ENDARC_REVSPACE = 0x0004
238 RAR_ENDARC_VOLNR = 0x0008
240 # flags common to all blocks
241 RAR_SKIP_IF_UNKNOWN = 0x4000
242 RAR_LONG_BLOCK = 0x8000
244 # Host OS types
245 RAR_OS_MSDOS = 0
246 RAR_OS_OS2 = 1
247 RAR_OS_WIN32 = 2
248 RAR_OS_UNIX = 3
249 RAR_OS_MACOS = 4
250 RAR_OS_BEOS = 5
252 # Compression methods - '0'..'5'
253 RAR_M0 = 0x30
254 RAR_M1 = 0x31
255 RAR_M2 = 0x32
256 RAR_M3 = 0x33
257 RAR_M4 = 0x34
258 RAR_M5 = 0x35
261 ## internal constants
264 RAR_ID = b"Rar!\x1a\x07\x00"
265 ZERO = b"\0"
266 EMPTY = b""
268 S_BLK_HDR = Struct('<HBHH')
269 S_FILE_HDR = Struct('<LLBLLBBHL')
270 S_LONG = Struct('<L')
271 S_SHORT = Struct('<H')
272 S_BYTE = Struct('<B')
273 S_COMMENT_HDR = Struct('<HBBH')
276 ## Public interface
279 class Error(Exception):
280 """Base class for rarfile errors."""
281 class BadRarFile(Error):
282 """Incorrect data in archive."""
283 class NotRarFile(Error):
284 """The file is not RAR archive."""
285 class BadRarName(Error):
286 """Cannot guess multipart name components."""
287 class NoRarEntry(Error):
288 """File not found in RAR"""
289 class PasswordRequired(Error):
290 """File requires password"""
291 class NeedFirstVolume(Error):
292 """Need to start from first volume."""
293 class NoCrypto(Error):
294 """Cannot parse encrypted headers - no crypto available."""
295 class RarExecError(Error):
296 """Problem reported by unrar/rar."""
297 class RarWarning(RarExecError):
298 """Non-fatal error"""
299 class RarFatalError(RarExecError):
300 """Fatal error"""
301 class RarCRCError(RarExecError):
302 """CRC error during unpacking"""
303 class RarLockedArchiveError(RarExecError):
304 """Must not modify locked archive"""
305 class RarWriteError(RarExecError):
306 """Write error"""
307 class RarOpenError(RarExecError):
308 """Open error"""
309 class RarUserError(RarExecError):
310 """User error"""
311 class RarMemoryError(RarExecError):
312 """Memory error"""
313 class RarCreateError(RarExecError):
314 """Create error"""
315 class RarNoFilesError(RarExecError):
316 """No files that match pattern were found"""
317 class RarUserBreak(RarExecError):
318 """User stop"""
319 class RarUnknownError(RarExecError):
320 """Unknown exit code"""
321 class RarSignalExit(RarExecError):
322 """Unrar exited with signal"""
323 class RarCannotExec(RarExecError):
324 """Executable not found."""
327 def is_rarfile(xfile):
328 '''Check quickly whether file is rar archive.'''
329 fd = XFile(xfile)
330 buf = fd.read(len(RAR_ID))
331 fd.close()
332 return buf == RAR_ID
335 class RarInfo(object):
336 r'''An entry in rar archive.
338 :mod:`zipfile`-compatible fields:
340 filename
341 File name with relative path.
342 Default path separator is '\\', to change set rarfile.PATH_SEP.
343 Always unicode string.
344 date_time
345 Modification time, tuple of (year, month, day, hour, minute, second).
346 Or datetime() object if USE_DATETIME is set.
347 file_size
348 Uncompressed size.
349 compress_size
350 Compressed size.
352 CRC-32 of uncompressed file, unsigned int.
353 comment
354 File comment. Byte string or None. Use UNICODE_COMMENTS
355 to get automatic decoding to unicode.
356 volume
357 Volume nr, starting from 0.
359 RAR-specific fields:
361 compress_type
362 Compression method: 0x30 - 0x35.
363 extract_version
364 Minimal Rar version needed for decompressing.
365 host_os
366 Host OS type, one of RAR_OS_* constants.
367 mode
368 File attributes. May be either dos-style or unix-style, depending on host_os.
369 volume_file
370 Volume file name, where file starts.
371 mtime
372 Optional time field: Modification time, with float seconds.
373 Same as .date_time but with more precision.
374 ctime
375 Optional time field: creation time, with float seconds.
376 atime
377 Optional time field: last access time, with float seconds.
378 arctime
379 Optional time field: archival time, with float seconds.
381 Internal fields:
383 type
384 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
385 flags
386 For files, RAR_FILE_* bits.
389 __slots__ = (
390 # zipfile-compatible fields
391 'filename',
392 'file_size',
393 'compress_size',
394 'date_time',
395 'comment',
396 'CRC',
397 'volume',
398 'orig_filename', # bytes in unknown encoding
400 # rar-specific fields
401 'extract_version',
402 'compress_type',
403 'host_os',
404 'mode',
405 'type',
406 'flags',
408 # optional extended time fields
409 # tuple where the sec is float, or datetime().
410 'mtime', # same as .date_time
411 'ctime',
412 'atime',
413 'arctime',
415 # RAR internals
416 'name_size',
417 'header_size',
418 'header_crc',
419 'file_offset',
420 'add_size',
421 'header_data',
422 'header_base',
423 'header_offset',
424 'salt',
425 'volume_file',
428 def isdir(self):
429 '''Returns True if the entry is a directory.'''
430 if self.type == RAR_BLOCK_FILE:
431 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
432 return False
434 def needs_password(self):
435 return (self.flags & RAR_FILE_PASSWORD) > 0
438 class RarFile(object):
439 '''Parse RAR structure, provide access to files in archive.
442 #: Archive comment. Byte string or None. Use :data:`UNICODE_COMMENTS`
443 #: to get automatic decoding to unicode.
444 comment = None
446 def __init__(self, rarfile, mode="r", charset=None, info_callback=None,
447 crc_check = True, errors = "stop"):
448 """Open and parse a RAR archive.
450 Parameters:
452 rarfile
453 archive file name
454 mode
455 only 'r' is supported.
456 charset
457 fallback charset to use, if filenames are not already Unicode-enabled.
458 info_callback
459 debug callback, gets to see all archive entries.
460 crc_check
461 set to False to disable CRC checks
462 errors
463 Either "stop" to quietly stop parsing on errors,
464 or "strict" to raise errors. Default is "stop".
466 self.rarfile = rarfile
467 self.comment = None
468 self._charset = charset or DEFAULT_CHARSET
469 self._info_callback = info_callback
471 self._info_list = []
472 self._info_map = {}
473 self._parse_error = None
474 self._needs_password = False
475 self._password = None
476 self._crc_check = crc_check
477 self._vol_list = []
479 if errors == "stop":
480 self._strict = False
481 elif errors == "strict":
482 self._strict = True
483 else:
484 raise ValueError("Invalid value for 'errors' parameter.")
486 self._main = None
488 if mode != "r":
489 raise NotImplementedError("RarFile supports only mode=r")
491 self._parse()
493 def __enter__(self):
494 return self
496 def __exit__(self, type, value, traceback):
497 self.close()
499 def setpassword(self, password):
500 '''Sets the password to use when extracting.'''
501 self._password = password
502 if not self._main:
503 self._parse()
505 def needs_password(self):
506 '''Returns True if any archive entries require password for extraction.'''
507 return self._needs_password
509 def namelist(self):
510 '''Return list of filenames in archive.'''
511 return [f.filename for f in self.infolist()]
513 def infolist(self):
514 '''Return RarInfo objects for all files/directories in archive.'''
515 return self._info_list
517 def volumelist(self):
518 '''Returns filenames of archive volumes.
520 In case of single-volume archive, the list contains
521 just the name of main archive file.
523 return self._vol_list
525 def getinfo(self, fname):
526 '''Return RarInfo for file.'''
528 if isinstance(fname, RarInfo):
529 return fname
531 # accept both ways here
532 if PATH_SEP == '/':
533 fname2 = fname.replace("\\", "/")
534 else:
535 fname2 = fname.replace("/", "\\")
537 try:
538 return self._info_map[fname]
539 except KeyError:
540 try:
541 return self._info_map[fname2]
542 except KeyError:
543 raise NoRarEntry("No such file: "+fname)
545 def open(self, fname, mode = 'r', psw = None):
546 '''Returns file-like object (:class:`RarExtFile`),
547 from where the data can be read.
549 The object implements :class:`io.RawIOBase` interface, so it can
550 be further wrapped with :class:`io.BufferedReader`
551 and :class:`io.TextIOWrapper`.
553 On older Python where io module is not available, it implements
554 only .read(), .seek(), .tell() and .close() methods.
556 The object is seekable, although the seeking is fast only on
557 uncompressed files, on compressed files the seeking is implemented
558 by reading ahead and/or restarting the decompression.
560 Parameters:
562 fname
563 file name or RarInfo instance.
564 mode
565 must be 'r'
567 password to use for extracting.
570 if mode != 'r':
571 raise NotImplementedError("RarFile.open() supports only mode=r")
573 # entry lookup
574 inf = self.getinfo(fname)
575 if inf.isdir():
576 raise TypeError("Directory does not have any data: " + inf.filename)
578 if inf.flags & RAR_FILE_SPLIT_BEFORE:
579 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
581 # check password
582 if inf.needs_password():
583 psw = psw or self._password
584 if psw is None:
585 raise PasswordRequired("File %s requires password" % inf.filename)
586 else:
587 psw = None
589 # is temp write usable?
590 use_hack = 1
591 if not self._main:
592 use_hack = 0
593 elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
594 use_hack = 0
595 elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
596 use_hack = 0
597 elif is_filelike(self.rarfile):
598 pass
599 elif inf.file_size > HACK_SIZE_LIMIT:
600 use_hack = 0
601 elif not USE_EXTRACT_HACK:
602 use_hack = 0
604 # now extract
605 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0:
606 return self._open_clear(inf)
607 elif use_hack:
608 return self._open_hack(inf, psw)
609 elif is_filelike(self.rarfile):
610 return self._open_unrar_membuf(self.rarfile, inf, psw)
611 else:
612 return self._open_unrar(self.rarfile, inf, psw)
614 def read(self, fname, psw = None):
615 """Return uncompressed data for archive entry.
617 For longer files using :meth:`RarFile.open` may be better idea.
619 Parameters:
621 fname
622 filename or RarInfo instance
624 password to use for extracting.
627 f = self.open(fname, 'r', psw)
628 try:
629 return f.read()
630 finally:
631 f.close()
633 def close(self):
634 """Release open resources."""
635 pass
637 def printdir(self):
638 """Print archive file list to stdout."""
639 for f in self.infolist():
640 print(f.filename)
642 def extract(self, member, path=None, pwd=None):
643 """Extract single file into current directory.
645 Parameters:
647 member
648 filename or :class:`RarInfo` instance
649 path
650 optional destination path
652 optional password to use
654 if isinstance(member, RarInfo):
655 fname = member.filename
656 else:
657 fname = member
658 self._extract([fname], path, pwd)
660 def extractall(self, path=None, members=None, pwd=None):
661 """Extract all files into current directory.
663 Parameters:
665 path
666 optional destination path
667 members
668 optional filename or :class:`RarInfo` instance list to extract
670 optional password to use
672 fnlist = []
673 if members is not None:
674 for m in members:
675 if isinstance(m, RarInfo):
676 fnlist.append(m.filename)
677 else:
678 fnlist.append(m)
679 self._extract(fnlist, path, pwd)
681 def testrar(self):
682 """Let 'unrar' test the archive.
684 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
685 add_password_arg(cmd, self._password)
686 cmd.append(self.rarfile)
687 p = custom_popen(cmd)
688 output = p.communicate()[0]
689 check_returncode(p, output)
691 def strerror(self):
692 """Return error string if parsing failed,
693 or None if no problems.
695 return self._parse_error
698 ## private methods
701 def _set_error(self, msg, *args):
702 if args:
703 msg = msg % args
704 self._parse_error = msg
705 if self._strict:
706 raise BadRarFile(msg)
708 # store entry
709 def _process_entry(self, item):
710 if item.type == RAR_BLOCK_FILE:
711 # use only first part
712 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
713 self._info_map[item.filename] = item
714 self._info_list.append(item)
715 # remember if any items require password
716 if item.needs_password():
717 self._needs_password = True
718 elif len(self._info_list) > 0:
719 # final crc is in last block
720 old = self._info_list[-1]
721 old.CRC = item.CRC
722 old.compress_size += item.compress_size
724 # parse new-style comment
725 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
726 if not NEED_COMMENTS:
727 pass
728 elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
729 pass
730 elif item.flags & RAR_FILE_SOLID:
731 # file comment
732 cmt = self._read_comment_v3(item, self._password)
733 if len(self._info_list) > 0:
734 old = self._info_list[-1]
735 old.comment = cmt
736 else:
737 # archive comment
738 cmt = self._read_comment_v3(item, self._password)
739 self.comment = cmt
741 if self._info_callback:
742 self._info_callback(item)
744 # read rar
745 def _parse(self):
746 self._fd = None
747 try:
748 self._parse_real()
749 finally:
750 if self._fd:
751 self._fd.close()
752 self._fd = None
754 def _parse_real(self):
755 fd = XFile(self.rarfile)
756 self._fd = fd
757 id = fd.read(len(RAR_ID))
758 if id != RAR_ID:
759 if isinstance(self.rarfile, (str, unicode)):
760 raise NotRarFile("Not a Rar archive: {}".format(self.rarfile))
761 raise NotRarFile("Not a Rar archive")
763 volume = 0 # first vol (.rar) is 0
764 more_vols = 0
765 endarc = 0
766 volfile = self.rarfile
767 self._vol_list = [self.rarfile]
768 while 1:
769 if endarc:
770 h = None # don't read past ENDARC
771 else:
772 h = self._parse_header(fd)
773 if not h:
774 if more_vols:
775 volume += 1
776 fd.close()
777 try:
778 volfile = self._next_volname(volfile)
779 fd = XFile(volfile)
780 except IOError:
781 self._set_error("Cannot open next volume: %s", volfile)
782 break
783 self._fd = fd
784 more_vols = 0
785 endarc = 0
786 self._vol_list.append(volfile)
787 continue
788 break
789 h.volume = volume
790 h.volume_file = volfile
792 if h.type == RAR_BLOCK_MAIN and not self._main:
793 self._main = h
794 if h.flags & RAR_MAIN_NEWNUMBERING:
795 # RAR 2.x does not set FIRSTVOLUME,
796 # so check it only if NEWNUMBERING is used
797 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
798 raise NeedFirstVolume("Need to start from first volume")
799 if h.flags & RAR_MAIN_PASSWORD:
800 self._needs_password = True
801 if not self._password:
802 self._main = None
803 break
804 elif h.type == RAR_BLOCK_ENDARC:
805 more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME
806 endarc = 1
807 elif h.type == RAR_BLOCK_FILE:
808 # RAR 2.x does not write RAR_BLOCK_ENDARC
809 if h.flags & RAR_FILE_SPLIT_AFTER:
810 more_vols = 1
811 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
812 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
813 raise NeedFirstVolume("Need to start from first volume")
815 # store it
816 self._process_entry(h)
818 # go to next header
819 if h.add_size > 0:
820 fd.seek(h.file_offset + h.add_size, 0)
822 # AES encrypted headers
823 _last_aes_key = (None, None, None) # (salt, key, iv)
824 def _decrypt_header(self, fd):
825 if not _have_crypto:
826 raise NoCrypto('Cannot parse encrypted headers - no crypto')
827 salt = fd.read(8)
828 if self._last_aes_key[0] == salt:
829 key, iv = self._last_aes_key[1:]
830 else:
831 key, iv = rar3_s2k(self._password, salt)
832 self._last_aes_key = (salt, key, iv)
833 return HeaderDecrypt(fd, key, iv)
835 # read single header
836 def _parse_header(self, fd):
837 try:
838 # handle encrypted headers
839 if self._main and self._main.flags & RAR_MAIN_PASSWORD:
840 if not self._password:
841 return
842 fd = self._decrypt_header(fd)
844 # now read actual header
845 return self._parse_block_header(fd)
846 except struct.error:
847 self._set_error('Broken header in RAR file')
848 return None
850 # common header
851 def _parse_block_header(self, fd):
852 h = RarInfo()
853 h.header_offset = fd.tell()
854 h.comment = None
856 # read and parse base header
857 buf = fd.read(S_BLK_HDR.size)
858 if not buf:
859 return None
860 t = S_BLK_HDR.unpack_from(buf)
861 h.header_crc, h.type, h.flags, h.header_size = t
862 h.header_base = S_BLK_HDR.size
863 pos = S_BLK_HDR.size
865 # read full header
866 if h.header_size > S_BLK_HDR.size:
867 h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size)
868 else:
869 h.header_data = buf
870 h.file_offset = fd.tell()
872 # unexpected EOF?
873 if len(h.header_data) != h.header_size:
874 self._set_error('Unexpected EOF when reading header')
875 return None
877 # block has data assiciated with it?
878 if h.flags & RAR_LONG_BLOCK:
879 h.add_size = S_LONG.unpack_from(h.header_data, pos)[0]
880 else:
881 h.add_size = 0
883 # parse interesting ones, decide header boundaries for crc
884 if h.type == RAR_BLOCK_MARK:
885 return h
886 elif h.type == RAR_BLOCK_MAIN:
887 h.header_base += 6
888 if h.flags & RAR_MAIN_ENCRYPTVER:
889 h.header_base += 1
890 if h.flags & RAR_MAIN_COMMENT:
891 self._parse_subblocks(h, h.header_base)
892 self.comment = h.comment
893 elif h.type == RAR_BLOCK_FILE:
894 self._parse_file_header(h, pos)
895 elif h.type == RAR_BLOCK_SUB:
896 self._parse_file_header(h, pos)
897 h.header_base = h.header_size
898 elif h.type == RAR_BLOCK_OLD_AUTH:
899 h.header_base += 8
900 elif h.type == RAR_BLOCK_OLD_EXTRA:
901 h.header_base += 7
902 else:
903 h.header_base = h.header_size
905 # check crc
906 if h.type == RAR_BLOCK_OLD_SUB:
907 crcdat = h.header_data[2:] + fd.read(h.add_size)
908 else:
909 crcdat = h.header_data[2:h.header_base]
911 calc_crc = crc32(crcdat) & 0xFFFF
913 # return good header
914 if h.header_crc == calc_crc:
915 return h
917 # header parsing failed.
918 self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)',
919 h.type, h.header_crc, calc_crc, len(crcdat))
921 # instead panicing, send eof
922 return None
924 # read file-specific header
925 def _parse_file_header(self, h, pos):
926 fld = S_FILE_HDR.unpack_from(h.header_data, pos)
927 h.compress_size = fld[0]
928 h.file_size = fld[1]
929 h.host_os = fld[2]
930 h.CRC = fld[3]
931 h.date_time = parse_dos_time(fld[4])
932 h.extract_version = fld[5]
933 h.compress_type = fld[6]
934 h.name_size = fld[7]
935 h.mode = fld[8]
936 pos += S_FILE_HDR.size
938 if h.flags & RAR_FILE_LARGE:
939 h1 = S_LONG.unpack_from(h.header_data, pos)[0]
940 h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0]
941 h.compress_size |= h1 << 32
942 h.file_size |= h2 << 32
943 pos += 8
944 h.add_size = h.compress_size
946 name = h.header_data[pos : pos + h.name_size ]
947 pos += h.name_size
948 if h.flags & RAR_FILE_UNICODE:
949 nul = name.find(ZERO)
950 h.orig_filename = name[:nul]
951 u = UnicodeFilename(h.orig_filename, name[nul + 1 : ])
952 h.filename = u.decode()
954 # if parsing failed fall back to simple name
955 if u.failed:
956 h.filename = self._decode(h.orig_filename)
957 else:
958 h.orig_filename = name
959 h.filename = self._decode(name)
961 # change separator, if requested
962 if PATH_SEP != '\\':
963 h.filename = h.filename.replace('\\', PATH_SEP)
965 if h.flags & RAR_FILE_SALT:
966 h.salt = h.header_data[pos : pos + 8]
967 pos += 8
968 else:
969 h.salt = None
971 # optional extended time stamps
972 if h.flags & RAR_FILE_EXTTIME:
973 pos = self._parse_ext_time(h, pos)
974 else:
975 h.mtime = h.atime = h.ctime = h.arctime = None
977 # base header end
978 h.header_base = pos
980 if h.flags & RAR_FILE_COMMENT:
981 self._parse_subblocks(h, pos)
983 # convert timestamps
984 if USE_DATETIME:
985 h.date_time = to_datetime(h.date_time)
986 h.mtime = to_datetime(h.mtime)
987 h.atime = to_datetime(h.atime)
988 h.ctime = to_datetime(h.ctime)
989 h.arctime = to_datetime(h.arctime)
991 # .mtime is .date_time with more precision
992 if h.mtime:
993 if USE_DATETIME:
994 h.date_time = h.mtime
995 else:
996 # keep seconds int
997 h.date_time = h.mtime[:5] + (int(h.mtime[5]),)
999 return pos
1001 # find old-style comment subblock
1002 def _parse_subblocks(self, h, pos):
1003 hdata = h.header_data
1004 while pos < len(hdata):
1005 # ordinary block header
1006 t = S_BLK_HDR.unpack_from(hdata, pos)
1007 scrc, stype, sflags, slen = t
1008 pos_next = pos + slen
1009 pos += S_BLK_HDR.size
1011 # corrupt header
1012 if pos_next < pos:
1013 break
1015 # followed by block-specific header
1016 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
1017 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
1018 pos += S_COMMENT_HDR.size
1019 data = hdata[pos : pos_next]
1020 cmt = rar_decompress(ver, meth, data, declen, sflags,
1021 crc, self._password)
1022 if not self._crc_check:
1023 h.comment = self._decode_comment(cmt)
1024 elif crc32(cmt) & 0xFFFF == crc:
1025 h.comment = self._decode_comment(cmt)
1027 pos = pos_next
1029 def _parse_ext_time(self, h, pos):
1030 data = h.header_data
1032 # flags and rest of data can be missing
1033 flags = 0
1034 if pos + 2 <= len(data):
1035 flags = S_SHORT.unpack_from(data, pos)[0]
1036 pos += 2
1038 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time)
1039 h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos)
1040 h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos)
1041 h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos)
1042 return pos
1044 def _parse_xtime(self, flag, data, pos, dostime = None):
1045 unit = 10000000.0 # 100 ns units
1046 if flag & 8:
1047 if not dostime:
1048 t = S_LONG.unpack_from(data, pos)[0]
1049 dostime = parse_dos_time(t)
1050 pos += 4
1051 rem = 0
1052 cnt = flag & 3
1053 for i in range(cnt):
1054 b = S_BYTE.unpack_from(data, pos)[0]
1055 rem = (b << 16) | (rem >> 8)
1056 pos += 1
1057 sec = dostime[5] + rem / unit
1058 if flag & 4:
1059 sec += 1
1060 dostime = dostime[:5] + (sec,)
1061 return dostime, pos
1063 # given current vol name, construct next one
1064 def _next_volname(self, volfile):
1065 if is_filelike(volfile):
1066 raise IOError("Working on single FD")
1067 if self._main.flags & RAR_MAIN_NEWNUMBERING:
1068 return self._next_newvol(volfile)
1069 return self._next_oldvol(volfile)
1071 # new-style next volume
1072 def _next_newvol(self, volfile):
1073 i = len(volfile) - 1
1074 while i >= 0:
1075 if volfile[i] >= '0' and volfile[i] <= '9':
1076 return self._inc_volname(volfile, i)
1077 i -= 1
1078 raise BadRarName("Cannot construct volume name: "+volfile)
1080 # old-style next volume
1081 def _next_oldvol(self, volfile):
1082 # rar -> r00
1083 if volfile[-4:].lower() == '.rar':
1084 return volfile[:-2] + '00'
1085 return self._inc_volname(volfile, len(volfile) - 1)
1087 # increase digits with carry, otherwise just increment char
1088 def _inc_volname(self, volfile, i):
1089 fn = list(volfile)
1090 while i >= 0:
1091 if fn[i] != '9':
1092 fn[i] = chr(ord(fn[i]) + 1)
1093 break
1094 fn[i] = '0'
1095 i -= 1
1096 return ''.join(fn)
1098 def _open_clear(self, inf):
1099 return DirectReader(self, inf)
1101 # put file compressed data into temporary .rar archive, and run
1102 # unrar on that, thus avoiding unrar going over whole archive
1103 def _open_hack(self, inf, psw = None):
1104 BSIZE = 32*1024
1106 size = inf.compress_size + inf.header_size
1107 rf = XFile(inf.volume_file, 0)
1108 rf.seek(inf.header_offset)
1110 tmpfd, tmpname = mkstemp(suffix='.rar')
1111 tmpf = os.fdopen(tmpfd, "wb")
1113 try:
1114 # create main header: crc, type, flags, size, res1, res2
1115 mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4)
1116 tmpf.write(RAR_ID + mh)
1117 while size > 0:
1118 if size > BSIZE:
1119 buf = rf.read(BSIZE)
1120 else:
1121 buf = rf.read(size)
1122 if not buf:
1123 raise BadRarFile('read failed: ' + inf.filename)
1124 tmpf.write(buf)
1125 size -= len(buf)
1126 tmpf.close()
1127 rf.close()
1128 except:
1129 rf.close()
1130 tmpf.close()
1131 os.unlink(tmpname)
1132 raise
1134 return self._open_unrar(tmpname, inf, psw, tmpname)
1136 def _read_comment_v3(self, inf, psw=None):
1138 # read data
1139 rf = XFile(inf.volume_file)
1140 rf.seek(inf.file_offset)
1141 data = rf.read(inf.compress_size)
1142 rf.close()
1144 # decompress
1145 cmt = rar_decompress(inf.extract_version, inf.compress_type, data,
1146 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1148 # check crc
1149 if self._crc_check:
1150 crc = crc32(cmt)
1151 if crc < 0:
1152 crc += (long(1) << 32)
1153 if crc != inf.CRC:
1154 return None
1156 return self._decode_comment(cmt)
1158 # write in-memory archive to temp file - needed for solid archives
1159 def _open_unrar_membuf(self, memfile, inf, psw):
1160 memfile.seek(0, 0)
1162 tmpfd, tmpname = mkstemp(suffix='.rar')
1163 tmpf = os.fdopen(tmpfd, "wb")
1165 try:
1166 BSIZE = 32*1024
1167 while True:
1168 buf = memfile.read(BSIZE)
1169 if not buf:
1170 break
1171 tmpf.write(buf)
1172 tmpf.close()
1173 except:
1174 tmpf.close()
1175 os.unlink(tmpname)
1176 raise
1177 return self._open_unrar(tmpname, inf, psw, tmpname)
1179 # extract using unrar
1180 def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None):
1181 if is_filelike(rarfile):
1182 raise ValueError("Cannot use unrar directly on memory buffer")
1183 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1184 add_password_arg(cmd, psw)
1185 cmd.append("--")
1186 cmd.append(rarfile)
1188 # not giving filename avoids encoding related problems
1189 if not tmpfile:
1190 fn = inf.filename
1191 if PATH_SEP != os.sep:
1192 fn = fn.replace(PATH_SEP, os.sep)
1193 cmd.append(fn)
1195 # read from unrar pipe
1196 return PipeReader(self, inf, cmd, tmpfile)
1198 def _decode(self, val):
1199 for c in TRY_ENCODINGS:
1200 try:
1201 return val.decode(c)
1202 except UnicodeError:
1203 pass
1204 return val.decode(self._charset, 'replace')
1206 def _decode_comment(self, val):
1207 if UNICODE_COMMENTS:
1208 return self._decode(val)
1209 return val
1211 # call unrar to extract a file
1212 def _extract(self, fnlist, path=None, psw=None):
1213 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
1215 # pasoword
1216 psw = psw or self._password
1217 add_password_arg(cmd, psw)
1219 # rar file
1220 cmd.append(self.rarfile)
1222 # file list
1223 for fn in fnlist:
1224 if os.sep != PATH_SEP:
1225 fn = fn.replace(PATH_SEP, os.sep)
1226 cmd.append(fn)
1228 # destination path
1229 if path is not None:
1230 cmd.append(path + os.sep)
1232 # call
1233 p = custom_popen(cmd)
1234 output = p.communicate()[0]
1235 check_returncode(p, output)
1238 ## Utility classes
1241 class UnicodeFilename(object):
1242 """Handle unicode filename decompression"""
1244 def __init__(self, name, encdata):
1245 self.std_name = bytearray(name)
1246 self.encdata = bytearray(encdata)
1247 self.pos = self.encpos = 0
1248 self.buf = bytearray()
1249 self.failed = 0
1251 def enc_byte(self):
1252 try:
1253 c = self.encdata[self.encpos]
1254 self.encpos += 1
1255 return c
1256 except IndexError:
1257 self.failed = 1
1258 return 0
1260 def std_byte(self):
1261 try:
1262 return self.std_name[self.pos]
1263 except IndexError:
1264 self.failed = 1
1265 return ord('?')
1267 def put(self, lo, hi):
1268 self.buf.append(lo)
1269 self.buf.append(hi)
1270 self.pos += 1
1272 def decode(self):
1273 hi = self.enc_byte()
1274 flagbits = 0
1275 while self.encpos < len(self.encdata):
1276 if flagbits == 0:
1277 flags = self.enc_byte()
1278 flagbits = 8
1279 flagbits -= 2
1280 t = (flags >> flagbits) & 3
1281 if t == 0:
1282 self.put(self.enc_byte(), 0)
1283 elif t == 1:
1284 self.put(self.enc_byte(), hi)
1285 elif t == 2:
1286 self.put(self.enc_byte(), self.enc_byte())
1287 else:
1288 n = self.enc_byte()
1289 if n & 0x80:
1290 c = self.enc_byte()
1291 for i in range((n & 0x7f) + 2):
1292 lo = (self.std_byte() + c) & 0xFF
1293 self.put(lo, hi)
1294 else:
1295 for i in range(n + 2):
1296 self.put(self.std_byte(), 0)
1297 return self.buf.decode("utf-16le", "replace")
1300 class RarExtFile(RawIOBase):
1301 """Base class for file-like object that :meth:`RarFile.open` returns.
1303 Provides public methods and common crc checking.
1305 Behaviour:
1306 - no short reads - .read() and .readinfo() read as much as requested.
1307 - no internal buffer, use io.BufferedReader for that.
1309 If :mod:`io` module is available (Python 2.6+, 3.x), then this calls
1310 will inherit from :class:`io.RawIOBase` class. This makes line-based
1311 access available: :meth:`RarExtFile.readline` and ``for ln in f``.
1314 #: Filename of the archive entry
1315 name = None
1317 def __init__(self, rf, inf):
1318 super(RarExtFile, self).__init__()
1320 # standard io.* properties
1321 self.name = inf.filename
1322 self.mode = 'rb'
1324 self.rf = rf
1325 self.inf = inf
1326 self.crc_check = rf._crc_check
1327 self.fd = None
1328 self.CRC = 0
1329 self.remain = 0
1330 self.returncode = 0
1332 self._open()
1334 def _open(self):
1335 if self.fd:
1336 self.fd.close()
1337 self.fd = None
1338 self.CRC = 0
1339 self.remain = self.inf.file_size
1341 def read(self, cnt = None):
1342 """Read all or specified amount of data from archive entry."""
1344 # sanitize cnt
1345 if cnt is None or cnt < 0:
1346 cnt = self.remain
1347 elif cnt > self.remain:
1348 cnt = self.remain
1349 if cnt == 0:
1350 return EMPTY
1352 # actual read
1353 data = self._read(cnt)
1354 if data:
1355 self.CRC = crc32(data, self.CRC)
1356 self.remain -= len(data)
1357 if len(data) != cnt:
1358 raise BadRarFile("Failed the read enough data")
1360 # done?
1361 if not data or self.remain == 0:
1362 #self.close()
1363 self._check()
1364 return data
1366 def _check(self):
1367 """Check final CRC."""
1368 if not self.crc_check:
1369 return
1370 if self.returncode:
1371 check_returncode(self, '')
1372 if self.remain != 0:
1373 raise BadRarFile("Failed the read enough data")
1374 crc = self.CRC
1375 if crc < 0:
1376 crc += (long(1) << 32)
1377 if crc != self.inf.CRC:
1378 raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename)
1380 def _read(self, cnt):
1381 """Actual read that gets sanitized cnt."""
1383 def close(self):
1384 """Close open resources."""
1386 super(RarExtFile, self).close()
1388 if self.fd:
1389 self.fd.close()
1390 self.fd = None
1392 def __del__(self):
1393 """Hook delete to make sure tempfile is removed."""
1394 self.close()
1396 def readinto(self, buf):
1397 """Zero-copy read directly into buffer.
1399 Returns bytes read.
1402 data = self.read(len(buf))
1403 n = len(data)
1404 try:
1405 buf[:n] = data
1406 except TypeError:
1407 import array
1408 if not isinstance(buf, array.array):
1409 raise
1410 buf[:n] = array.array(buf.typecode, data)
1411 return n
1413 def tell(self):
1414 """Return current reading position in uncompressed data."""
1415 return self.inf.file_size - self.remain
1417 def seek(self, ofs, whence = 0):
1418 """Seek in data.
1420 On uncompressed files, the seeking works by actual
1421 seeks so it's fast. On compresses files its slow
1422 - forward seeking happends by reading ahead,
1423 backwards by re-opening and decompressing from the start.
1426 # disable crc check when seeking
1427 self.crc_check = 0
1429 fsize = self.inf.file_size
1430 cur_ofs = self.tell()
1432 if whence == 0: # seek from beginning of file
1433 new_ofs = ofs
1434 elif whence == 1: # seek from current position
1435 new_ofs = cur_ofs + ofs
1436 elif whence == 2: # seek from end of file
1437 new_ofs = fsize + ofs
1438 else:
1439 raise ValueError('Invalid value for whence')
1441 # sanity check
1442 if new_ofs < 0:
1443 new_ofs = 0
1444 elif new_ofs > fsize:
1445 new_ofs = fsize
1447 # do the actual seek
1448 if new_ofs >= cur_ofs:
1449 self._skip(new_ofs - cur_ofs)
1450 else:
1451 # process old data ?
1452 #self._skip(fsize - cur_ofs)
1453 # reopen and seek
1454 self._open()
1455 self._skip(new_ofs)
1456 return self.tell()
1458 def _skip(self, cnt):
1459 """Read and discard data"""
1460 while cnt > 0:
1461 if cnt > 8192:
1462 buf = self.read(8192)
1463 else:
1464 buf = self.read(cnt)
1465 if not buf:
1466 break
1467 cnt -= len(buf)
1469 def readable(self):
1470 """Returns True"""
1471 return True
1473 def writable(self):
1474 """Returns False.
1476 Writing is not supported."""
1477 return False
1479 def seekable(self):
1480 """Returns True.
1482 Seeking is supported, although it's slow on compressed files.
1484 return True
1486 def readall(self):
1487 """Read all remaining data"""
1488 # avoid RawIOBase default impl
1489 return self.read()
1492 class PipeReader(RarExtFile):
1493 """Read data from pipe, handle tempfile cleanup."""
1495 def __init__(self, rf, inf, cmd, tempfile=None):
1496 self.cmd = cmd
1497 self.proc = None
1498 self.tempfile = tempfile
1499 super(PipeReader, self).__init__(rf, inf)
1501 def _close_proc(self):
1502 if not self.proc:
1503 return
1504 if self.proc.stdout:
1505 self.proc.stdout.close()
1506 if self.proc.stdin:
1507 self.proc.stdin.close()
1508 if self.proc.stderr:
1509 self.proc.stderr.close()
1510 self.proc.wait()
1511 self.returncode = self.proc.returncode
1512 self.proc = None
1514 def _open(self):
1515 super(PipeReader, self)._open()
1517 # stop old process
1518 self._close_proc()
1520 # launch new process
1521 self.returncode = 0
1522 self.proc = custom_popen(self.cmd)
1523 self.fd = self.proc.stdout
1525 # avoid situation where unrar waits on stdin
1526 if self.proc.stdin:
1527 self.proc.stdin.close()
1529 def _read(self, cnt):
1530 """Read from pipe."""
1532 # normal read is usually enough
1533 data = self.fd.read(cnt)
1534 if len(data) == cnt or not data:
1535 return data
1537 # short read, try looping
1538 buf = [data]
1539 cnt -= len(data)
1540 while cnt > 0:
1541 data = self.fd.read(cnt)
1542 if not data:
1543 break
1544 cnt -= len(data)
1545 buf.append(data)
1546 return EMPTY.join(buf)
1548 def close(self):
1549 """Close open resources."""
1551 self._close_proc()
1552 super(PipeReader, self).close()
1554 if self.tempfile:
1555 try:
1556 os.unlink(self.tempfile)
1557 except OSError:
1558 pass
1559 self.tempfile = None
1561 def readinto(self, buf):
1562 """Zero-copy read directly into buffer."""
1563 cnt = len(buf)
1564 if cnt > self.remain:
1565 cnt = self.remain
1566 vbuf = memoryview(buf)
1567 res = got = 0
1568 while got < cnt:
1569 res = self.fd.readinto(vbuf[got : cnt])
1570 if not res:
1571 break
1572 if self.crc_check:
1573 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1574 self.remain -= res
1575 got += res
1576 return got
1579 class DirectReader(RarExtFile):
1580 """Read uncompressed data directly from archive."""
1582 def _open(self):
1583 super(DirectReader, self)._open()
1585 self.volfile = self.inf.volume_file
1586 self.fd = XFile(self.volfile, 0)
1587 self.fd.seek(self.inf.header_offset, 0)
1588 self.cur = self.rf._parse_header(self.fd)
1589 self.cur_avail = self.cur.add_size
1591 def _skip(self, cnt):
1592 """RAR Seek, skipping through rar files to get to correct position
1595 while cnt > 0:
1596 # next vol needed?
1597 if self.cur_avail == 0:
1598 if not self._open_next():
1599 break
1601 # fd is in read pos, do the read
1602 if cnt > self.cur_avail:
1603 cnt -= self.cur_avail
1604 self.remain -= self.cur_avail
1605 self.cur_avail = 0
1606 else:
1607 self.fd.seek(cnt, 1)
1608 self.cur_avail -= cnt
1609 self.remain -= cnt
1610 cnt = 0
1612 def _read(self, cnt):
1613 """Read from potentially multi-volume archive."""
1615 buf = []
1616 while cnt > 0:
1617 # next vol needed?
1618 if self.cur_avail == 0:
1619 if not self._open_next():
1620 break
1622 # fd is in read pos, do the read
1623 if cnt > self.cur_avail:
1624 data = self.fd.read(self.cur_avail)
1625 else:
1626 data = self.fd.read(cnt)
1627 if not data:
1628 break
1630 # got some data
1631 cnt -= len(data)
1632 self.cur_avail -= len(data)
1633 buf.append(data)
1635 if len(buf) == 1:
1636 return buf[0]
1637 return EMPTY.join(buf)
1639 def _open_next(self):
1640 """Proceed to next volume."""
1642 # is the file split over archives?
1643 if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
1644 return False
1646 if self.fd:
1647 self.fd.close()
1648 self.fd = None
1650 # open next part
1651 self.volfile = self.rf._next_volname(self.volfile)
1652 fd = open(self.volfile, "rb", 0)
1653 self.fd = fd
1655 # loop until first file header
1656 while 1:
1657 cur = self.rf._parse_header(fd)
1658 if not cur:
1659 raise BadRarFile("Unexpected EOF")
1660 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
1661 if cur.add_size:
1662 fd.seek(cur.add_size, 1)
1663 continue
1664 if cur.orig_filename != self.inf.orig_filename:
1665 raise BadRarFile("Did not found file entry")
1666 self.cur = cur
1667 self.cur_avail = cur.add_size
1668 return True
1670 def readinto(self, buf):
1671 """Zero-copy read directly into buffer."""
1672 got = 0
1673 vbuf = memoryview(buf)
1674 while got < len(buf):
1675 # next vol needed?
1676 if self.cur_avail == 0:
1677 if not self._open_next():
1678 break
1680 # length for next read
1681 cnt = len(buf) - got
1682 if cnt > self.cur_avail:
1683 cnt = self.cur_avail
1685 # read into temp view
1686 res = self.fd.readinto(vbuf[got : got + cnt])
1687 if not res:
1688 break
1689 if self.crc_check:
1690 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1691 self.cur_avail -= res
1692 self.remain -= res
1693 got += res
1694 return got
1697 class HeaderDecrypt(object):
1698 """File-like object that decrypts from another file"""
1699 def __init__(self, f, key, iv):
1700 self.f = f
1701 self.ciph = AES_CBC_Decrypt(key, iv)
1702 self.buf = EMPTY
1704 def tell(self):
1705 return self.f.tell()
1707 def read(self, cnt=None):
1708 if cnt > 8*1024:
1709 raise BadRarFile('Bad count to header decrypt - wrong password?')
1711 # consume old data
1712 if cnt <= len(self.buf):
1713 res = self.buf[:cnt]
1714 self.buf = self.buf[cnt:]
1715 return res
1716 res = self.buf
1717 self.buf = EMPTY
1718 cnt -= len(res)
1720 # decrypt new data
1721 BLK = self.ciph.block_size
1722 while cnt > 0:
1723 enc = self.f.read(BLK)
1724 if len(enc) < BLK:
1725 break
1726 dec = self.ciph.decrypt(enc)
1727 if cnt >= len(dec):
1728 res += dec
1729 cnt -= len(dec)
1730 else:
1731 res += dec[:cnt]
1732 self.buf = dec[cnt:]
1733 cnt = 0
1735 return res
1737 # handle (filename|filelike) object
1738 class XFile(object):
1739 __slots__ = ('_fd', '_need_close')
1740 def __init__(self, xfile, bufsize = 1024):
1741 if is_filelike(xfile):
1742 self._need_close = False
1743 self._fd = xfile
1744 self._fd.seek(0)
1745 else:
1746 self._need_close = True
1747 self._fd = open(xfile, 'rb', bufsize)
1748 def read(self, n=None):
1749 return self._fd.read(n)
1750 def tell(self):
1751 return self._fd.tell()
1752 def seek(self, ofs, whence=0):
1753 return self._fd.seek(ofs, whence)
1754 def readinto(self, dst):
1755 return self._fd.readinto(dst)
1756 def close(self):
1757 if self._need_close:
1758 self._fd.close()
1759 def __enter__(self):
1760 return self
1761 def __exit__(self, typ, val, tb):
1762 self.close()
1765 ## Utility functions
1768 def is_filelike(obj):
1769 if isinstance(obj, str) or isinstance(obj, unicode):
1770 return False
1771 res = True
1772 for a in ('read', 'tell', 'seek'):
1773 res = res and hasattr(obj, a)
1774 if not res:
1775 raise ValueError("Invalid object passed as file")
1776 return True
1778 def rar3_s2k(psw, salt):
1779 """String-to-key hash for RAR3."""
1781 seed = psw.encode('utf-16le') + salt
1782 iv = EMPTY
1783 h = sha1()
1784 for i in range(16):
1785 for j in range(0x4000):
1786 cnt = S_LONG.pack(i*0x4000 + j)
1787 h.update(seed + cnt[:3])
1788 if j == 0:
1789 iv += h.digest()[19:20]
1790 key_be = h.digest()[:16]
1791 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
1792 return key_le, iv
1794 def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
1795 """Decompress blob of compressed data.
1797 Used for data with non-standard header - eg. comments.
1800 # already uncompressed?
1801 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
1802 return data
1804 # take only necessary flags
1805 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
1806 flags |= RAR_LONG_BLOCK
1808 # file header
1809 fname = b'data'
1810 date = 0
1811 mode = 0x20
1812 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
1813 date, vers, meth, len(fname), mode)
1814 fhdr += fname
1815 if flags & RAR_FILE_SALT:
1816 if not salt:
1817 return EMPTY
1818 fhdr += salt
1820 # full header
1821 hlen = S_BLK_HDR.size + len(fhdr)
1822 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
1823 hcrc = crc32(hdr[2:]) & 0xFFFF
1824 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
1826 # archive main header
1827 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4)
1829 # decompress via temp rar
1830 tmpfd, tmpname = mkstemp(suffix='.rar')
1831 tmpf = os.fdopen(tmpfd, "wb")
1832 try:
1833 tmpf.write(RAR_ID + mh + hdr + data)
1834 tmpf.close()
1836 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1837 add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD))
1838 cmd.append(tmpname)
1840 p = custom_popen(cmd)
1841 return p.communicate()[0]
1842 finally:
1843 tmpf.close()
1844 os.unlink(tmpname)
1846 def to_datetime(t):
1847 """Convert 6-part time tuple into datetime object."""
1849 if t is None:
1850 return None
1852 # extract values
1853 year, mon, day, h, m, xs = t
1854 s = int(xs)
1855 us = int(1000000 * (xs - s))
1857 # assume the values are valid
1858 try:
1859 return datetime(year, mon, day, h, m, s, us)
1860 except ValueError:
1861 pass
1863 # sanitize invalid values
1864 MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1865 if mon < 1: mon = 1
1866 if mon > 12: mon = 12
1867 if day < 1: day = 1
1868 if day > MDAY[mon]: day = MDAY[mon]
1869 if h > 23: h = 23
1870 if m > 59: m = 59
1871 if s > 59: s = 59
1872 if mon == 2 and day == 29:
1873 try:
1874 return datetime(year, mon, day, h, m, s, us)
1875 except ValueError:
1876 day = 28
1877 return datetime(year, mon, day, h, m, s, us)
1879 def parse_dos_time(stamp):
1880 """Parse standard 32-bit DOS timestamp."""
1882 sec = stamp & 0x1F; stamp = stamp >> 5
1883 min = stamp & 0x3F; stamp = stamp >> 6
1884 hr = stamp & 0x1F; stamp = stamp >> 5
1885 day = stamp & 0x1F; stamp = stamp >> 5
1886 mon = stamp & 0x0F; stamp = stamp >> 4
1887 yr = (stamp & 0x7F) + 1980
1888 return (yr, mon, day, hr, min, sec * 2)
1890 def custom_popen(cmd):
1891 """Disconnect cmd from parent fds, read only from stdout."""
1893 # needed for py2exe
1894 creationflags = 0
1895 if sys.platform == 'win32':
1896 creationflags = 0x08000000 # CREATE_NO_WINDOW
1898 # run command
1899 try:
1900 p = Popen(cmd, bufsize = 0,
1901 stdout = PIPE, stdin = PIPE, stderr = STDOUT,
1902 creationflags = creationflags)
1903 except OSError as ex:
1904 if ex.errno == errno.ENOENT:
1905 raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL)
1906 raise
1907 return p
1909 def custom_check(cmd, ignore_retcode=False):
1910 """Run command, collect output, raise error if needed."""
1911 p = custom_popen(cmd)
1912 out, err = p.communicate()
1913 if p.returncode and not ignore_retcode:
1914 raise RarExecError("Check-run failed")
1915 return out
1917 def add_password_arg(cmd, psw, required=False):
1918 """Append password switch to commandline."""
1919 if UNRAR_TOOL == ALT_TOOL:
1920 return
1921 if psw is not None:
1922 cmd.append('-p' + psw)
1923 else:
1924 cmd.append('-p-')
1926 def check_returncode(p, out):
1927 """Raise exception according to unrar exit code"""
1929 code = p.returncode
1930 if code == 0:
1931 return
1933 # map return code to exception class
1934 errmap = [None,
1935 RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError,
1936 RarWriteError, RarOpenError, RarUserError, RarMemoryError,
1937 RarCreateError, RarNoFilesError] # codes from rar.txt
1938 if UNRAR_TOOL == ALT_TOOL:
1939 errmap = [None]
1940 if code > 0 and code < len(errmap):
1941 exc = errmap[code]
1942 elif code == 255:
1943 exc = RarUserBreak
1944 elif code < 0:
1945 exc = RarSignalExit
1946 else:
1947 exc = RarUnknownError
1949 # format message
1950 if out:
1951 msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out)
1952 else:
1953 msg = "%s [%d]" % (exc.__doc__, p.returncode)
1955 raise exc(msg)
1958 # Check if unrar works
1961 try:
1962 # does UNRAR_TOOL work?
1963 custom_check([UNRAR_TOOL], True)
1964 except RarCannotExec:
1965 try:
1966 # does ALT_TOOL work?
1967 custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True)
1968 # replace config
1969 UNRAR_TOOL = ALT_TOOL
1970 OPEN_ARGS = ALT_OPEN_ARGS
1971 EXTRACT_ARGS = ALT_EXTRACT_ARGS
1972 TEST_ARGS = ALT_TEST_ARGS
1973 except RarCannotExec:
1974 # no usable tool, only uncompressed archives work
1975 pass