faq: update
[rarfile.git] / rarfile.py
blob9552361506863421c045e3aee292cd0a38e9fc08
1 # rarfile.py
3 # Copyright (c) 2005-2011 Marko Kreen <markokr@gmail.com>
5 # Permission to use, copy, modify, and/or distribute this software for any
6 # purpose with or without fee is hereby granted, provided that the above
7 # copyright notice and this permission notice appear in all copies.
9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 r"""RAR archive reader.
19 This is Python module for Rar archive reading. The interface
20 is made as zipfile like as possible.
22 Basic logic:
23 - Parse archive structure with Python.
24 - Extract non-compressed files with Python
25 - Extract compressed files with unrar.
26 - Optionally write compressed data to temp file to speed up unrar,
27 otherwise it needs to scan whole archive on each execution.
29 Example::
31 import rarfile
33 rf = rarfile.RarFile('myarchive.rar')
34 for f in rf.infolist():
35 print f.filename, f.file_size
36 if f.filename == 'README':
37 print rf.read(f)
39 There are few module-level parameters to tune behaviour,
40 here they are with defaults, and reason to change it::
42 import rarfile
44 # Set to full path of unrar.exe if it is not in PATH
45 rarfile.UNRAR_TOOL = "unrar"
47 # Set to 0 if you don't look at comments and want to
48 # avoid wasting time for parsing them
49 rarfile.NEED_COMMENTS = 1
51 # Set up to 1 if you don't want to deal with decoding comments
52 # from unknown encoding. rarfile will try couple of common
53 # encodings in sequence.
54 rarfile.UNICODE_COMMENTS = 0
56 # Set to 1 if you prefer timestamps to be datetime objects
57 # instead tuples
58 rarfile.USE_DATETIME = 0
60 # Set to '/' to be more compatible with zipfile
61 rarfile.PATH_SEP = '\\'
63 For more details, refer to source.
65 """
67 __version__ = '2.4'
69 # export only interesting items
70 __all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile']
73 ## Imports and compat - support both Python 2.x and 3.x
76 import sys, os, struct
77 from struct import pack, unpack
78 from binascii import crc32
79 from tempfile import mkstemp
80 from subprocess import Popen, PIPE, STDOUT
81 from datetime import datetime
83 # only needed for encryped headers
84 try:
85 from Crypto.Cipher import AES
86 try:
87 from hashlib import sha1
88 except ImportError:
89 from sha import new as sha1
90 _have_crypto = 1
91 except ImportError:
92 _have_crypto = 0
94 # compat with 2.x
95 if sys.hexversion < 0x3000000:
96 # prefer 3.x behaviour
97 range = xrange
98 # py2.6 has broken bytes()
99 def bytes(s, enc):
100 return str(s)
102 # see if compat bytearray() is needed
103 try:
104 bytearray
105 except NameError:
106 import array
107 class bytearray:
108 def __init__(self, val = ''):
109 self.arr = array.array('B', val)
110 self.append = self.arr.append
111 self.__getitem__ = self.arr.__getitem__
112 self.__len__ = self.arr.__len__
113 def decode(self, *args):
114 return self.arr.tostring().decode(*args)
116 # Optimized .readinto() requires memoryview
117 try:
118 memoryview
119 have_memoryview = 1
120 except NameError:
121 have_memoryview = 0
123 # Struct() for older python
124 try:
125 from struct import Struct
126 except ImportError:
127 class Struct:
128 def __init__(self, fmt):
129 self.format = fmt
130 self.size = struct.calcsize(fmt)
131 def unpack(self, buf):
132 return unpack(self.format, buf)
133 def unpack_from(self, buf, ofs = 0):
134 return unpack(self.format, buf[ofs : ofs + self.size])
135 def pack(self, *args):
136 return pack(self.format, *args)
138 # file object superclass
139 try:
140 from io import RawIOBase
141 except ImportError:
142 class RawIOBase(object):
143 def close(self):
144 pass
148 ## Module configuration. Can be tuned after importing.
151 # default fallback charset
152 DEFAULT_CHARSET = "windows-1252"
154 # list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed
155 TRY_ENCODINGS = ('utf8', 'utf-16le')
157 # 'unrar', 'rar' or full path to either one
158 UNRAR_TOOL = "unrar"
160 # Command line args to use for opening file for reading.
161 OPEN_ARGS = ('p', '-inul')
163 # Command line args to use for extracting file to disk.
164 EXTRACT_ARGS = ('x', '-y', '-idq')
166 # args for testrar()
167 TEST_ARGS = ('t', '-idq')
169 # whether to speed up decompression by using tmp archive
170 USE_EXTRACT_HACK = 1
172 # limit the filesize for tmp archive usage
173 HACK_SIZE_LIMIT = 20*1024*1024
175 # whether to parse file/archive comments.
176 NEED_COMMENTS = 1
178 # whether to convert comments to unicode strings
179 UNICODE_COMMENTS = 0
181 # When RAR is corrupt, stopping on bad header is better
182 # On unknown/misparsed RAR headers reporting is better
183 REPORT_BAD_HEADER = 0
185 # Convert RAR time tuple into datetime() object
186 USE_DATETIME = 0
188 # Separator for path name components. RAR internally uses '\\'.
189 # Use '/' to be similar with zipfile.
190 PATH_SEP = '\\'
193 ## rar constants
196 # block types
197 RAR_BLOCK_MARK = 0x72 # r
198 RAR_BLOCK_MAIN = 0x73 # s
199 RAR_BLOCK_FILE = 0x74 # t
200 RAR_BLOCK_OLD_COMMENT = 0x75 # u
201 RAR_BLOCK_OLD_EXTRA = 0x76 # v
202 RAR_BLOCK_OLD_SUB = 0x77 # w
203 RAR_BLOCK_OLD_RECOVERY = 0x78 # x
204 RAR_BLOCK_OLD_AUTH = 0x79 # y
205 RAR_BLOCK_SUB = 0x7a # z
206 RAR_BLOCK_ENDARC = 0x7b # {
208 # flags for RAR_BLOCK_MAIN
209 RAR_MAIN_VOLUME = 0x0001
210 RAR_MAIN_COMMENT = 0x0002
211 RAR_MAIN_LOCK = 0x0004
212 RAR_MAIN_SOLID = 0x0008
213 RAR_MAIN_NEWNUMBERING = 0x0010
214 RAR_MAIN_AUTH = 0x0020
215 RAR_MAIN_RECOVERY = 0x0040
216 RAR_MAIN_PASSWORD = 0x0080
217 RAR_MAIN_FIRSTVOLUME = 0x0100
218 RAR_MAIN_ENCRYPTVER = 0x0200
220 # flags for RAR_BLOCK_FILE
221 RAR_FILE_SPLIT_BEFORE = 0x0001
222 RAR_FILE_SPLIT_AFTER = 0x0002
223 RAR_FILE_PASSWORD = 0x0004
224 RAR_FILE_COMMENT = 0x0008
225 RAR_FILE_SOLID = 0x0010
226 RAR_FILE_DICTMASK = 0x00e0
227 RAR_FILE_DICT64 = 0x0000
228 RAR_FILE_DICT128 = 0x0020
229 RAR_FILE_DICT256 = 0x0040
230 RAR_FILE_DICT512 = 0x0060
231 RAR_FILE_DICT1024 = 0x0080
232 RAR_FILE_DICT2048 = 0x00a0
233 RAR_FILE_DICT4096 = 0x00c0
234 RAR_FILE_DIRECTORY = 0x00e0
235 RAR_FILE_LARGE = 0x0100
236 RAR_FILE_UNICODE = 0x0200
237 RAR_FILE_SALT = 0x0400
238 RAR_FILE_VERSION = 0x0800
239 RAR_FILE_EXTTIME = 0x1000
240 RAR_FILE_EXTFLAGS = 0x2000
242 # flags for RAR_BLOCK_ENDARC
243 RAR_ENDARC_NEXT_VOLUME = 0x0001
244 RAR_ENDARC_DATACRC = 0x0002
245 RAR_ENDARC_REVSPACE = 0x0004
246 RAR_ENDARC_VOLNR = 0x0008
248 # flags common to all blocks
249 RAR_SKIP_IF_UNKNOWN = 0x4000
250 RAR_LONG_BLOCK = 0x8000
252 # Host OS types
253 RAR_OS_MSDOS = 0
254 RAR_OS_OS2 = 1
255 RAR_OS_WIN32 = 2
256 RAR_OS_UNIX = 3
257 RAR_OS_MACOS = 4
258 RAR_OS_BEOS = 5
260 # Compression methods - '0'..'5'
261 RAR_M0 = 0x30
262 RAR_M1 = 0x31
263 RAR_M2 = 0x32
264 RAR_M3 = 0x33
265 RAR_M4 = 0x34
266 RAR_M5 = 0x35
269 ## internal constants
272 RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii')
273 ZERO = bytes("\0", 'ascii')
274 EMPTY = bytes("", 'ascii')
276 S_BLK_HDR = Struct('<HBHH')
277 S_FILE_HDR = Struct('<LLBLLBBHL')
278 S_LONG = Struct('<L')
279 S_SHORT = Struct('<H')
280 S_BYTE = Struct('<B')
281 S_COMMENT_HDR = Struct('<HBBH')
284 ## Public interface
287 class Error(Exception):
288 """Base class for rarfile errors."""
289 class BadRarFile(Error):
290 """Incorrect data in archive."""
291 class NotRarFile(Error):
292 """The file is not RAR archive."""
293 class BadRarName(Error):
294 """Cannot guess multipart name components."""
295 class NoRarEntry(Error):
296 """File not found in RAR"""
297 class PasswordRequired(Error):
298 """File requires password"""
299 class NeedFirstVolume(Error):
300 """Need to start from first volume."""
301 class NoCrypto(Error):
302 """Cannot parse encrypted headers - no crypto available."""
305 def is_rarfile(fn):
306 '''Check quickly whether file is rar archive.'''
307 buf = open(fn, "rb").read(len(RAR_ID))
308 return buf == RAR_ID
311 class RarInfo(object):
312 '''An entry in rar archive.
314 @ivar filename:
315 File name with relative path.
316 Default path separator is '/', to change set rarfile.PATH_SEP.
317 Always unicode string.
318 @ivar date_time:
319 Modification time, tuple of (year, month, day, hour, minute, second).
320 Or datetime() object if USE_DATETIME is set.
321 @ivar file_size:
322 Uncompressed size.
323 @ivar compress_size:
324 Compressed size.
325 @ivar compress_type:
326 Compression method: 0x30 - 0x35.
327 @ivar extract_version:
328 Minimal Rar version needed for decompressing.
329 @ivar host_os:
330 Host OS type, one of RAR_OS_* constants.
331 @ivar mode:
332 File attributes. May be either dos-style or unix-style, depending on host_os.
333 @ivar CRC:
334 CRC-32 of uncompressed file, unsigned int.
335 @ivar volume:
336 Volume nr, starting from 0.
337 @ivar volume_file:
338 Volume file name, where file starts.
339 @ivar type:
340 One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist().
341 @ivar flags:
342 For files, RAR_FILE_* bits.
343 @ivar comment:
344 File comment (unicode string or None).
346 @ivar mtime:
347 Optional time field: Modification time, with float seconds.
348 Same as .date_time but with more precision.
349 @ivar ctime:
350 Optional time field: creation time, with float seconds.
351 @ivar atime:
352 Optional time field: last access time, with float seconds.
353 @ivar arctime:
354 Optional time field: archival time, with float seconds.
357 __slots__ = (
358 # zipfile-compatible fields
359 'filename',
360 'file_size',
361 'compress_size',
362 'date_time',
363 'comment',
364 'CRC',
365 'volume',
366 'orig_filename', # bytes in unknown encoding
368 # rar-specific fields
369 'extract_version',
370 'compress_type',
371 'host_os',
372 'mode',
373 'type',
374 'flags',
376 # optional extended time fields
377 # tuple where the sec is float, or datetime().
378 'mtime', # same as .date_time
379 'ctime',
380 'atime',
381 'arctime',
383 # RAR internals
384 'name_size',
385 'header_size',
386 'header_crc',
387 'file_offset',
388 'add_size',
389 'header_data',
390 'header_base',
391 'header_offset',
392 'salt',
393 'volume_file',
396 def isdir(self):
397 '''Returns True if the entry is a directory.'''
398 if self.type == RAR_BLOCK_FILE:
399 return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY
400 return False
402 def needs_password(self):
403 return self.flags & RAR_FILE_PASSWORD
406 class RarFile(object):
407 '''Parse RAR structure, provide access to files in archive.
409 @ivar comment:
410 Archive comment (unicode string or None).
413 def __init__(self, rarfile, mode="r", charset=None, info_callback=None, crc_check = True):
414 """Open and parse a RAR archive.
416 @param rarfile: archive file name
417 @param mode: only 'r' is supported.
418 @param charset: fallback charset to use, if filenames are not already Unicode-enabled.
419 @param info_callback: debug callback, gets to see all archive entries.
420 @param crc_check: set to False to disable CRC checks
422 self.rarfile = rarfile
423 self.comment = None
424 self._charset = charset or DEFAULT_CHARSET
425 self._info_callback = info_callback
427 self._info_list = []
428 self._info_map = {}
429 self._needs_password = False
430 self._password = None
431 self._crc_check = crc_check
433 self._main = None
435 if mode != "r":
436 raise NotImplementedError("RarFile supports only mode=r")
438 self._parse()
440 def setpassword(self, password):
441 '''Sets the password to use when extracting.'''
442 self._password = password
443 if not self._main:
444 self._parse()
446 def needs_password(self):
447 '''Returns True if any archive entries require password for extraction.'''
448 return self._needs_password
450 def namelist(self):
451 '''Return list of filenames in archive.'''
452 return [f.filename for f in self._info_list]
454 def infolist(self):
455 '''Return RarInfo objects for all files/directories in archive.'''
456 return self._info_list
458 def getinfo(self, fname):
459 '''Return RarInfo for file.'''
461 if isinstance(fname, RarInfo):
462 return fname
464 # accept both ways here
465 if PATH_SEP == '/':
466 fname2 = fname.replace("\\", "/")
467 else:
468 fname2 = fname.replace("/", "\\")
470 try:
471 return self._info_map[fname]
472 except KeyError:
473 try:
474 return self._info_map[fname2]
475 except KeyError:
476 raise NoRarEntry("No such file: "+fname)
478 def open(self, fname, mode = 'r', psw = None):
479 '''Return open file object, where the data can be read.
481 The object implements io.RawIOBase interface, so it can
482 be further wrapped with io.BufferedReader and io.TextIOWrapper.
484 On older Python where io module is not available, it implements
485 only .read(), .seek(), .tell() and .close() methods.
487 The object is seekable, although the seeking is fast only on
488 uncompressed files, on compressed files the seeking is implemented
489 by reading ahead and/or restarting the decompression.
491 @param fname: file name or RarInfo instance.
492 @param mode: must be 'r'
493 @param psw: password to use for extracting.
496 if mode != 'r':
497 raise NotImplementedError("RarFile.open() supports only mode=r")
499 # entry lookup
500 inf = self.getinfo(fname)
501 if inf.isdir():
502 raise TypeError("Directory does not have any data: " + inf.filename)
504 if inf.flags & RAR_FILE_SPLIT_BEFORE:
505 raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename)
507 # check password
508 if inf.needs_password():
509 psw = psw or self._password
510 if psw is None:
511 raise PasswordRequired("File %s requires password" % inf.filename)
512 else:
513 psw = None
515 # is temp write usable?
516 if not USE_EXTRACT_HACK or not self._main:
517 use_hack = 0
518 elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD):
519 use_hack = 0
520 elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
521 use_hack = 0
522 elif inf.file_size > HACK_SIZE_LIMIT:
523 use_hack = 0
524 else:
525 use_hack = 1
527 # now extract
528 if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0:
529 return self._open_clear(inf)
530 elif use_hack:
531 return self._open_hack(inf, psw)
532 else:
533 return self._open_unrar(self.rarfile, inf, psw)
535 def read(self, fname, psw = None):
536 """Return uncompressed data for archive entry.
538 For longer files using .open() may be better idea.
540 @param fname: filename or RarInfo instance
541 @param psw: password to use for extracting.
544 f = self.open(fname, 'r', psw)
545 try:
546 return f.read()
547 finally:
548 f.close()
550 def close(self):
551 """Release open resources."""
552 pass
554 def printdir(self):
555 """Print archive file list to stdout."""
556 for f in self._info_list:
557 print(f.filename)
559 def extract(self, member, path=None, pwd=None):
560 """Extract single file into current directory.
562 @param member: filename or RarInfo instance
563 @param path: optional destination path
564 @param pwd: optional password to use
566 if isinstance(member, RarInfo):
567 fname = member.filename
568 else:
569 fname = member
570 self._extract([fname], path, pwd)
572 def extractall(self, path=None, members=None, pwd=None):
573 """Extract all files into current directory.
575 @param path: optional destination path
576 @param members: optional filename or RarInfo instance list to extract
577 @param pwd: optional password to use
579 fnlist = []
580 if members is not None:
581 for m in members:
582 if isinstance(m, RarInfo):
583 fnlist.append(m.filename)
584 else:
585 fnlist.append(m)
586 self._extract(fnlist, path, pwd)
588 def testrar(self):
589 """Let 'unrar' test the archive.
591 cmd = [UNRAR_TOOL] + list(TEST_ARGS)
592 if self._password is not None:
593 cmd.append('-p' + self._password)
594 else:
595 cmd.append('-p-')
596 cmd.append(self.rarfile)
597 p = custom_popen(cmd)
598 p.communicate()
599 if p.returncode != 0:
600 raise BadRarFile("Testing failed")
603 ## private methods
606 # store entry
607 def _process_entry(self, item):
608 if item.type == RAR_BLOCK_FILE:
609 # use only first part
610 if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0:
611 self._info_map[item.filename] = item
612 self._info_list.append(item)
613 # remember if any items require password
614 if item.needs_password():
615 self._needs_password = True
616 elif len(self._info_list) > 0:
617 # final crc is in last block
618 old = self._info_list[-1]
619 old.CRC = item.CRC
620 old.compress_size += item.compress_size
622 # parse new-style comment
623 if item.type == RAR_BLOCK_SUB and item.filename == 'CMT':
624 if not NEED_COMMENTS:
625 pass
626 elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER):
627 pass
628 elif item.flags & RAR_FILE_SOLID:
629 # file comment
630 cmt = self._read_comment_v3(item, self._password)
631 if len(self._info_list) > 0:
632 old = self._info_list[-1]
633 old.comment = cmt
634 else:
635 # archive comment
636 cmt = self._read_comment_v3(item, self._password)
637 self.comment = cmt
639 if self._info_callback:
640 self._info_callback(item)
642 # read rar
643 def _parse(self):
644 self._fd = None
645 try:
646 self._parse_real()
647 finally:
648 if self._fd:
649 self._fd.close()
650 self._fd = None
652 def _parse_real(self):
653 fd = open(self.rarfile, "rb")
654 self._fd = fd
655 id = fd.read(len(RAR_ID))
656 if id != RAR_ID:
657 raise NotRarFile("Not a Rar archive: "+self.rarfile)
659 volume = 0 # first vol (.rar) is 0
660 more_vols = 0
661 endarc = 0
662 volfile = self.rarfile
663 while 1:
664 if endarc:
665 h = None # don't read past ENDARC
666 else:
667 h = self._parse_header(fd)
668 if not h:
669 if more_vols:
670 volume += 1
671 volfile = self._next_volname(volfile)
672 fd.close()
673 fd = open(volfile, "rb")
674 self._fd = fd
675 more_vols = 0
676 endarc = 0
677 continue
678 break
679 h.volume = volume
680 h.volume_file = volfile
682 if h.type == RAR_BLOCK_MAIN and not self._main:
683 self._main = h
684 if h.flags & RAR_MAIN_NEWNUMBERING:
685 # RAR 2.x does not set FIRSTVOLUME,
686 # so check it only if NEWNUMBERING is used
687 if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0:
688 raise NeedFirstVolume("Need to start from first volume")
689 if h.flags & RAR_MAIN_PASSWORD:
690 self._needs_password = True
691 if not self._password:
692 self._main = None
693 break
694 elif h.type == RAR_BLOCK_ENDARC:
695 more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME
696 endarc = 1
697 elif h.type == RAR_BLOCK_FILE:
698 # RAR 2.x does not write RAR_BLOCK_ENDARC
699 if h.flags & RAR_FILE_SPLIT_AFTER:
700 more_vols = 1
701 # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME
702 if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE:
703 raise NeedFirstVolume("Need to start from first volume")
705 # store it
706 self._process_entry(h)
708 # go to next header
709 if h.add_size > 0:
710 fd.seek(h.file_offset + h.add_size, 0)
712 # AES encrypted headers
713 _last_aes_key = (None, None, None) # (salt, key, iv)
714 def _decrypt_header(self, fd):
715 if not _have_crypto:
716 raise NoCrypto('Cannot parse encrypted headers - no crypto')
717 salt = fd.read(8)
718 if self._last_aes_key[0] == salt:
719 key, iv = self._last_aes_key[1:]
720 else:
721 key, iv = rar3_s2k(self._password, salt)
722 self._last_aes_key = (salt, key, iv)
723 return HeaderDecrypt(fd, key, iv)
725 # read single header
726 def _parse_header(self, fd):
727 try:
728 # handle encrypted headers
729 if self._main and self._main.flags & RAR_MAIN_PASSWORD:
730 if not self._password:
731 return
732 fd = self._decrypt_header(fd)
734 # now read actual header
735 return self._parse_block_header(fd)
736 except struct.error:
737 if REPORT_BAD_HEADER:
738 raise BadRarFile('Broken header in RAR file')
739 return None
741 # common header
742 def _parse_block_header(self, fd):
743 h = RarInfo()
744 h.header_offset = fd.tell()
745 h.comment = None
747 # read and parse base header
748 buf = fd.read(S_BLK_HDR.size)
749 if not buf:
750 return None
751 t = S_BLK_HDR.unpack_from(buf)
752 h.header_crc, h.type, h.flags, h.header_size = t
753 h.header_base = S_BLK_HDR.size
754 pos = S_BLK_HDR.size
756 # read full header
757 if h.header_size > S_BLK_HDR.size:
758 h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size)
759 else:
760 h.header_data = buf
761 h.file_offset = fd.tell()
763 # unexpected EOF?
764 if len(h.header_data) != h.header_size:
765 if REPORT_BAD_HEADER:
766 raise BadRarFile('Unexpected EOF when reading header')
767 return None
769 # block has data assiciated with it?
770 if h.flags & RAR_LONG_BLOCK:
771 h.add_size = S_LONG.unpack_from(h.header_data, pos)[0]
772 else:
773 h.add_size = 0
775 # parse interesting ones, decide header boundaries for crc
776 if h.type == RAR_BLOCK_MARK:
777 return h
778 elif h.type == RAR_BLOCK_MAIN:
779 h.header_base += 6
780 if h.flags & RAR_MAIN_ENCRYPTVER:
781 h.header_base += 1
782 if h.flags & RAR_MAIN_COMMENT:
783 self._parse_subblocks(h, h.header_base)
784 self.comment = h.comment
785 elif h.type == RAR_BLOCK_FILE:
786 self._parse_file_header(h, pos)
787 elif h.type == RAR_BLOCK_SUB:
788 self._parse_file_header(h, pos)
789 h.header_base = h.header_size
790 elif h.type == RAR_BLOCK_OLD_AUTH:
791 h.header_base += 8
792 elif h.type == RAR_BLOCK_OLD_EXTRA:
793 h.header_base += 7
794 else:
795 h.header_base = h.header_size
797 # check crc
798 if h.type == RAR_BLOCK_OLD_SUB:
799 crcdat = h.header_data[2:] + fd.read(h.add_size)
800 else:
801 crcdat = h.header_data[2:h.header_base]
803 calc_crc = crc32(crcdat) & 0xFFFF
805 # return good header
806 if h.header_crc == calc_crc:
807 return h
809 # need to panic?
810 if REPORT_BAD_HEADER:
811 xlen = len(crcdat)
812 crcdat = h.header_data[2:]
813 msg = 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h.type, h.header_crc, calc_crc, xlen )
814 xlen = len(crcdat)
815 while xlen >= S_BLK_HDR.size - 2:
816 crc = crc32(crcdat[:xlen]) & 0xFFFF
817 if crc == h.header_crc:
818 msg += ' / crc match, xlen = %d' % xlen
819 xlen -= 1
820 raise BadRarFile(msg)
822 # instead panicing, send eof
823 return None
825 # read file-specific header
826 def _parse_file_header(self, h, pos):
827 fld = S_FILE_HDR.unpack_from(h.header_data, pos)
828 h.compress_size = fld[0]
829 h.file_size = fld[1]
830 h.host_os = fld[2]
831 h.CRC = fld[3]
832 h.date_time = parse_dos_time(fld[4])
833 h.extract_version = fld[5]
834 h.compress_type = fld[6]
835 h.name_size = fld[7]
836 h.mode = fld[8]
837 pos += S_FILE_HDR.size
839 if h.flags & RAR_FILE_LARGE:
840 h1 = S_LONG.unpack_from(h.header_data, pos)[0]
841 h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0]
842 h.compress_size |= h1 << 32
843 h.file_size |= h2 << 32
844 pos += 8
845 h.add_size = h.compress_size
847 name = h.header_data[pos : pos + h.name_size ]
848 pos += h.name_size
849 if h.flags & RAR_FILE_UNICODE:
850 nul = name.find(ZERO)
851 h.orig_filename = name[:nul]
852 u = UnicodeFilename(h.orig_filename, name[nul + 1 : ])
853 h.filename = u.decode()
855 # if parsing failed fall back to simple name
856 if u.failed:
857 h.filename = self._decode(h.orig_filename)
858 else:
859 h.orig_filename = name
860 h.filename = self._decode(name)
862 # change separator, if requested
863 if PATH_SEP != '\\':
864 h.filename = h.filename.replace('\\', PATH_SEP)
866 if h.flags & RAR_FILE_SALT:
867 h.salt = h.header_data[pos : pos + 8]
868 pos += 8
869 else:
870 h.salt = None
872 # optional extended time stamps
873 if h.flags & RAR_FILE_EXTTIME:
874 pos = self._parse_ext_time(h, pos)
875 else:
876 h.mtime = h.atime = h.ctime = h.arctime = None
878 # base header end
879 h.header_base = pos
881 if h.flags & RAR_FILE_COMMENT:
882 self._parse_subblocks(h, pos)
884 # convert timestamps
885 if USE_DATETIME:
886 h.date_time = to_datetime(h.date_time)
887 h.mtime = to_datetime(h.mtime)
888 h.atime = to_datetime(h.atime)
889 h.ctime = to_datetime(h.ctime)
890 h.arctime = to_datetime(h.arctime)
892 # .mtime is .date_time with more precision
893 if h.mtime:
894 if USE_DATETIME:
895 h.date_time = h.mtime
896 else:
897 # keep seconds int
898 h.date_time = h.mtime[:5] + (int(h.mtime[5]),)
900 return pos
902 # find old-style comment subblock
903 def _parse_subblocks(self, h, pos):
904 hdata = h.header_data
905 while pos < len(hdata):
906 # ordinary block header
907 t = S_BLK_HDR.unpack_from(hdata, pos)
908 scrc, stype, sflags, slen = t
909 pos_next = pos + slen
910 pos += S_BLK_HDR.size
912 # corrupt header
913 if pos_next < pos:
914 break
916 # followed by block-specific header
917 if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next:
918 declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos)
919 pos += S_COMMENT_HDR.size
920 data = hdata[pos : pos_next]
921 cmt = rar_decompress(ver, meth, data, declen, sflags,
922 crc, self._password)
923 if not self._crc_check:
924 h.comment = self._decode_comment(cmt)
925 elif crc32(cmt) & 0xFFFF == crc:
926 h.comment = self._decode_comment(cmt)
928 pos = pos_next
930 def _parse_ext_time(self, h, pos):
931 data = h.header_data
933 # flags and rest of data can be missing
934 flags = 0
935 if pos + 2 <= len(data):
936 flags = S_SHORT.unpack_from(data, pos)[0]
937 pos += 2
939 h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time)
940 h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos)
941 h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos)
942 h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos)
943 return pos
945 def _parse_xtime(self, flag, data, pos, dostime = None):
946 unit = 10000000.0 # 100 ns units
947 if flag & 8:
948 if not dostime:
949 t = S_LONG.unpack_from(data, pos)[0]
950 dostime = parse_dos_time(t)
951 pos += 4
952 rem = 0
953 cnt = flag & 3
954 for i in range(cnt):
955 b = S_BYTE.unpack_from(data, pos)[0]
956 rem = (b << 16) | (rem >> 8)
957 pos += 1
958 sec = dostime[5] + rem / unit
959 if flag & 4:
960 sec += 1
961 dostime = dostime[:5] + (sec,)
962 return dostime, pos
964 # given current vol name, construct next one
965 def _next_volname(self, volfile):
966 if self._main.flags & RAR_MAIN_NEWNUMBERING:
967 return self._next_newvol(volfile)
968 return self._next_oldvol(volfile)
970 # new-style next volume
971 def _next_newvol(self, volfile):
972 i = len(volfile) - 1
973 while i >= 0:
974 if volfile[i] >= '0' and volfile[i] <= '9':
975 return self._inc_volname(volfile, i)
976 i -= 1
977 raise BadRarName("Cannot construct volume name: "+volfile)
979 # old-style next volume
980 def _next_oldvol(self, volfile):
981 # rar -> r00
982 if volfile[-4:].lower() == '.rar':
983 return volfile[:-2] + '00'
984 return self._inc_volname(volfile, len(volfile) - 1)
986 # increase digits with carry, otherwise just increment char
987 def _inc_volname(self, volfile, i):
988 fn = list(volfile)
989 while i >= 0:
990 if fn[i] != '9':
991 fn[i] = chr(ord(fn[i]) + 1)
992 break
993 fn[i] = '0'
994 i -= 1
995 return ''.join(fn)
997 def _open_clear(self, inf):
998 return DirectReader(self, inf)
1000 # put file compressed data into temporary .rar archive, and run
1001 # unrar on that, thus avoiding unrar going over whole archive
1002 def _open_hack(self, inf, psw = None):
1003 BSIZE = 32*1024
1005 size = inf.compress_size + inf.header_size
1006 rf = open(inf.volume_file, "rb", 0)
1007 rf.seek(inf.header_offset)
1009 tmpfd, tmpname = mkstemp(suffix='.rar')
1010 tmpf = os.fdopen(tmpfd, "wb")
1012 try:
1013 # create main header: crc, type, flags, size, res1, res2
1014 mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4)
1015 tmpf.write(RAR_ID + mh)
1016 while size > 0:
1017 if size > BSIZE:
1018 buf = rf.read(BSIZE)
1019 else:
1020 buf = rf.read(size)
1021 if not buf:
1022 raise BadRarFile('read failed: ' + inf.filename)
1023 tmpf.write(buf)
1024 size -= len(buf)
1025 tmpf.close()
1026 rf.close()
1027 except:
1028 rf.close()
1029 tmpf.close()
1030 os.unlink(tmpname)
1031 raise
1033 return self._open_unrar(tmpname, inf, psw, tmpname)
1035 def _read_comment_v3(self, inf, psw=None):
1037 # read data
1038 rf = open(inf.volume_file, "rb")
1039 rf.seek(inf.file_offset)
1040 data = rf.read(inf.compress_size)
1041 rf.close()
1043 # decompress
1044 cmt = rar_decompress(inf.extract_version, inf.compress_type, data,
1045 inf.file_size, inf.flags, inf.CRC, psw, inf.salt)
1047 # check crc
1048 if self._crc_check:
1049 crc = crc32(cmt)
1050 if crc < 0:
1051 crc += (long(1) << 32)
1052 if crc != inf.CRC:
1053 return None
1055 return self._decode_comment(cmt)
1057 # extract using unrar
1058 def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None):
1059 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1060 if psw is not None:
1061 cmd.append("-p" + psw)
1062 cmd.append(rarfile)
1064 # not giving filename avoids encoding related problems
1065 if not tmpfile:
1066 fn = inf.filename
1067 if PATH_SEP != os.sep:
1068 fn = fn.replace(PATH_SEP, os.sep)
1069 cmd.append(fn)
1071 # read from unrar pipe
1072 return PipeReader(self, inf, cmd, tmpfile)
1074 def _decode(self, val):
1075 for c in TRY_ENCODINGS:
1076 try:
1077 return val.decode(c)
1078 except UnicodeError:
1079 pass
1080 return val.decode(self._charset, 'replace')
1082 def _decode_comment(self, val):
1083 if UNICODE_COMMENTS:
1084 return self._decode(val)
1085 return val
1087 # call unrar to extract a file
1088 def _extract(self, fnlist, path=None, psw=None):
1089 cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS)
1091 # pasoword
1092 psw = psw or self._password
1093 if psw is not None:
1094 cmd.append('-p' + psw)
1095 else:
1096 cmd.append('-p-')
1098 # rar file
1099 cmd.append(self.rarfile)
1101 # file list
1102 for fn in fnlist:
1103 if os.sep != PATH_SEP:
1104 fn = fn.replace(PATH_SEP, os.sep)
1105 cmd.append(fn)
1107 # destination path
1108 if path is not None:
1109 cmd.append(path + os.sep)
1111 # call
1112 p = custom_popen(cmd)
1113 p.communicate()
1116 ## Utility classes
1119 class UnicodeFilename:
1120 """Handle unicode filename decompression"""
1122 def __init__(self, name, encdata):
1123 self.std_name = bytearray(name)
1124 self.encdata = bytearray(encdata)
1125 self.pos = self.encpos = 0
1126 self.buf = bytearray()
1127 self.failed = 0
1129 def enc_byte(self):
1130 try:
1131 c = self.encdata[self.encpos]
1132 self.encpos += 1
1133 return c
1134 except IndexError:
1135 self.failed = 1
1136 return 0
1138 def std_byte(self):
1139 try:
1140 return self.std_name[self.pos]
1141 except IndexError:
1142 self.failed = 1
1143 return ord('?')
1145 def put(self, lo, hi):
1146 self.buf.append(lo)
1147 self.buf.append(hi)
1148 self.pos += 1
1150 def decode(self):
1151 hi = self.enc_byte()
1152 flagbits = 0
1153 while self.encpos < len(self.encdata):
1154 if flagbits == 0:
1155 flags = self.enc_byte()
1156 flagbits = 8
1157 flagbits -= 2
1158 t = (flags >> flagbits) & 3
1159 if t == 0:
1160 self.put(self.enc_byte(), 0)
1161 elif t == 1:
1162 self.put(self.enc_byte(), hi)
1163 elif t == 2:
1164 self.put(self.enc_byte(), self.enc_byte())
1165 else:
1166 n = self.enc_byte()
1167 if n & 0x80:
1168 c = self.enc_byte()
1169 for i in range((n & 0x7f) + 2):
1170 lo = (self.std_byte() + c) & 0xFF
1171 self.put(lo, hi)
1172 else:
1173 for i in range(n + 2):
1174 self.put(self.std_byte(), 0)
1175 return self.buf.decode("utf-16le", "replace")
1178 class RarExtFile(RawIOBase):
1179 """Base class for 'file-like' object that RarFile.open() returns.
1181 Provides public methods and common crc checking.
1183 Behaviour:
1184 - no short reads - .read() and .readinfo() read as much as requested.
1185 - no internal buffer, use io.BufferedReader for that.
1187 @ivar name:
1188 filename of the archive entry.
1191 def __init__(self, rf, inf):
1192 """Fill common fields"""
1194 RawIOBase.__init__(self)
1196 # standard io.* properties
1197 self.name = inf.filename
1198 self.mode = 'rb'
1200 self.rf = rf
1201 self.inf = inf
1202 self.crc_check = rf._crc_check
1203 self.fd = None
1204 self.CRC = 0
1205 self.remain = 0
1207 self._open()
1209 def _open(self):
1210 if self.fd:
1211 self.fd.close()
1212 self.fd = None
1213 self.CRC = 0
1214 self.remain = self.inf.file_size
1216 def read(self, cnt = None):
1217 """Read all or specified amount of data from archive entry."""
1219 # sanitize cnt
1220 if cnt is None or cnt < 0:
1221 cnt = self.remain
1222 elif cnt > self.remain:
1223 cnt = self.remain
1224 if cnt == 0:
1225 return EMPTY
1227 # actual read
1228 data = self._read(cnt)
1229 if data:
1230 self.CRC = crc32(data, self.CRC)
1231 self.remain -= len(data)
1232 if len(data) != cnt:
1233 raise BadRarFile("Failed the read enough data")
1235 # done?
1236 if not data or self.remain == 0:
1237 #self.close()
1238 self._check()
1239 return data
1241 def _check(self):
1242 """Check final CRC."""
1243 if not self.crc_check:
1244 return
1245 if self.remain != 0:
1246 raise BadRarFile("Failed the read enough data")
1247 crc = self.CRC
1248 if crc < 0:
1249 crc += (long(1) << 32)
1250 if crc != self.inf.CRC:
1251 raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename)
1253 def _read(self, cnt):
1254 """Actual read that gets sanitized cnt."""
1256 def close(self):
1257 """Close open resources."""
1259 RawIOBase.close(self)
1261 if self.fd:
1262 self.fd.close()
1263 self.fd = None
1265 def __del__(self):
1266 """Hook delete to make sure tempfile is removed."""
1267 self.close()
1269 def readinto(self, buf):
1270 """Zero-copy read directly into buffer.
1272 Returns bytes read.
1275 data = self.read(len(buf))
1276 n = len(data)
1277 try:
1278 buf[:n] = data
1279 except TypeError:
1280 import array
1281 if not isinstance(buf, array.array):
1282 raise
1283 buf[:n] = array.array(buf.typecode, data)
1284 return n
1286 def tell(self):
1287 """Return current reading position in uncompressed data."""
1288 return self.inf.file_size - self.remain
1290 def seek(self, ofs, whence = 0):
1291 """Seek in data."""
1293 # disable crc check when seeking
1294 self.crc_check = 0
1296 fsize = self.inf.file_size
1297 cur_ofs = self.tell()
1299 if whence == 0: # seek from beginning of file
1300 new_ofs = ofs
1301 elif whence == 1: # seek from current position
1302 new_ofs = cur_ofs + ofs
1303 elif whence == 2: # seek from end of file
1304 new_ofs = fsize + ofs
1305 else:
1306 raise ValueError('Invalid value for whence')
1308 # sanity check
1309 if new_ofs < 0:
1310 new_ofs = 0
1311 elif new_ofs > fsize:
1312 new_ofs = fsize
1314 # do the actual seek
1315 if new_ofs >= cur_ofs:
1316 self._skip(new_ofs - cur_ofs)
1317 else:
1318 # process old data ?
1319 #self._skip(fsize - cur_ofs)
1320 # reopen and seek
1321 self._open()
1322 self._skip(new_ofs)
1323 return self.tell()
1325 def _skip(self, cnt):
1326 """Read and discard data"""
1327 while cnt > 0:
1328 if cnt > 8192:
1329 buf = self.read(8192)
1330 else:
1331 buf = self.read(cnt)
1332 if not buf:
1333 break
1334 cnt -= len(buf)
1336 def readable(self):
1337 """Returns True"""
1338 return True
1340 def seekable(self):
1341 """Returns True"""
1342 return True
1344 def readall(self):
1345 """Read all remaining data"""
1346 # avoid RawIOBase default impl
1347 return self.read()
1350 class PipeReader(RarExtFile):
1351 """Read data from pipe, handle tempfile cleanup."""
1353 def __init__(self, rf, inf, cmd, tempfile=None):
1354 self.cmd = cmd
1355 self.proc = None
1356 self.tempfile = tempfile
1357 RarExtFile.__init__(self, rf, inf)
1359 def _close_proc(self):
1360 if not self.proc:
1361 return
1362 if self.proc.stdout:
1363 self.proc.stdout.close()
1364 if self.proc.stdin:
1365 self.proc.stdin.close()
1366 if self.proc.stderr:
1367 self.proc.stderr.close()
1368 self.proc.wait()
1369 self.proc = None
1371 def _open(self):
1372 RarExtFile._open(self)
1374 # stop old process
1375 self._close_proc()
1377 # launch new process
1378 self.proc = custom_popen(self.cmd)
1379 self.fd = self.proc.stdout
1381 # avoid situation where unrar waits on stdin
1382 if self.proc.stdin:
1383 self.proc.stdin.close()
1385 def _read(self, cnt):
1386 """Read from pipe."""
1388 # normal read is usually enough
1389 data = self.fd.read(cnt)
1390 if len(data) == cnt or not data:
1391 return data
1393 # short read, try looping
1394 buf = [data]
1395 cnt -= len(data)
1396 while cnt > 0:
1397 data = self.fd.read(cnt)
1398 if not data:
1399 break
1400 cnt -= len(data)
1401 buf.append(data)
1402 return EMPTY.join(buf)
1404 def close(self):
1405 """Close open resources."""
1407 self._close_proc()
1408 RarExtFile.close(self)
1410 if self.tempfile:
1411 try:
1412 os.unlink(self.tempfile)
1413 except OSError:
1414 pass
1415 self.tempfile = None
1417 if have_memoryview:
1418 def readinto(self, buf):
1419 """Zero-copy read directly into buffer."""
1420 cnt = len(buf)
1421 if cnt > self.remain:
1422 cnt = self.remain
1423 vbuf = memoryview(buf)
1424 res = got = 0
1425 while got < cnt:
1426 res = self.fd.readinto(vbuf[got : cnt])
1427 if not res:
1428 break
1429 if self.crc_check:
1430 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1431 self.remain -= res
1432 got += res
1433 return got
1436 class DirectReader(RarExtFile):
1437 """Read uncompressed data directly from archive."""
1439 def _open(self):
1440 RarExtFile._open(self)
1442 self.volfile = self.inf.volume_file
1443 self.fd = open(self.volfile, "rb", 0)
1444 self.fd.seek(self.inf.header_offset, 0)
1445 self.cur = self.rf._parse_header(self.fd)
1446 self.cur_avail = self.cur.add_size
1448 def _skip(self, cnt):
1449 """RAR Seek, skipping through rar files to get to correct position
1452 while cnt > 0:
1453 # next vol needed?
1454 if self.cur_avail == 0:
1455 if not self._open_next():
1456 break
1458 # fd is in read pos, do the read
1459 if cnt > self.cur_avail:
1460 cnt -= self.cur_avail
1461 self.remain -= self.cur_avail
1462 self.cur_avail = 0
1463 else:
1464 self.fd.seek(cnt, 1)
1465 self.cur_avail -= cnt
1466 self.remain -= cnt
1467 cnt = 0
1469 def _read(self, cnt):
1470 """Read from potentially multi-volume archive."""
1472 buf = []
1473 while cnt > 0:
1474 # next vol needed?
1475 if self.cur_avail == 0:
1476 if not self._open_next():
1477 break
1479 # fd is in read pos, do the read
1480 if cnt > self.cur_avail:
1481 data = self.fd.read(self.cur_avail)
1482 else:
1483 data = self.fd.read(cnt)
1484 if not data:
1485 break
1487 # got some data
1488 cnt -= len(data)
1489 self.cur_avail -= len(data)
1490 buf.append(data)
1492 if len(buf) == 1:
1493 return buf[0]
1494 return EMPTY.join(buf)
1496 def _open_next(self):
1497 """Proceed to next volume."""
1499 # is the file split over archives?
1500 if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0:
1501 return False
1503 if self.fd:
1504 self.fd.close()
1505 self.fd = None
1507 # open next part
1508 self.volfile = self.rf._next_volname(self.volfile)
1509 fd = open(self.volfile, "rb", 0)
1510 self.fd = fd
1512 # loop until first file header
1513 while 1:
1514 cur = self.rf._parse_header(fd)
1515 if not cur:
1516 raise BadRarFile("Unexpected EOF")
1517 if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN):
1518 if cur.add_size:
1519 fd.seek(cur.add_size, 1)
1520 continue
1521 if cur.orig_filename != self.inf.orig_filename:
1522 raise BadRarFile("Did not found file entry")
1523 self.cur = cur
1524 self.cur_avail = cur.add_size
1525 return True
1527 if have_memoryview:
1528 def readinto(self, buf):
1529 """Zero-copy read directly into buffer."""
1530 got = 0
1531 vbuf = memoryview(buf)
1532 while got < len(buf):
1533 # next vol needed?
1534 if self.cur_avail == 0:
1535 if not self._open_next():
1536 break
1538 # lenght for next read
1539 cnt = len(buf) - got
1540 if cnt > self.cur_avail:
1541 cnt = self.cur_avail
1543 # read into temp view
1544 res = self.fd.readinto(vbuf[got : got + cnt])
1545 if not res:
1546 break
1547 if self.crc_check:
1548 self.CRC = crc32(vbuf[got : got + res], self.CRC)
1549 self.cur_avail -= res
1550 self.remain -= res
1551 got += res
1552 return got
1555 class HeaderDecrypt:
1556 """File-like object that decrypts from another file"""
1557 def __init__(self, f, key, iv):
1558 self.f = f
1559 self.ciph = AES.new(key, AES.MODE_CBC, iv)
1560 self.buf = EMPTY
1562 def tell(self):
1563 return self.f.tell()
1565 def read(self, cnt=None):
1566 if cnt > 8*1024:
1567 raise BadRarFile('Bad count to header decrypt - wrong password?')
1569 # consume old data
1570 if cnt <= len(self.buf):
1571 res = self.buf[:cnt]
1572 self.buf = self.buf[cnt:]
1573 return res
1574 res = self.buf
1575 self.buf = EMPTY
1576 cnt -= len(res)
1578 # decrypt new data
1579 BLK = self.ciph.block_size
1580 while cnt > 0:
1581 enc = self.f.read(BLK)
1582 if len(enc) < BLK:
1583 break
1584 dec = self.ciph.decrypt(enc)
1585 if cnt >= len(dec):
1586 res += dec
1587 cnt -= len(dec)
1588 else:
1589 res += dec[:cnt]
1590 self.buf = dec[cnt:]
1591 cnt = 0
1593 return res
1596 ## Utility functions
1599 def rar3_s2k(psw, salt):
1600 """String-to-key hash for RAR3."""
1602 seed = psw.encode('utf-16le') + salt
1603 iv = EMPTY
1604 h = sha1()
1605 for i in range(16):
1606 for j in range(0x4000):
1607 cnt = S_LONG.pack(i*0x4000 + j)
1608 h.update(seed + cnt[:3])
1609 if j == 0:
1610 iv += h.digest()[19:20]
1611 key_be = h.digest()[:16]
1612 key_le = pack("<LLLL", *unpack(">LLLL", key_be))
1613 return key_le, iv
1615 def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None):
1616 """Decompress blob of compressed data.
1618 Used for data with non-standard header - eg. comments.
1621 # already uncompressed?
1622 if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0:
1623 return data
1625 # take only necessary flags
1626 flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK)
1627 flags |= RAR_LONG_BLOCK
1629 # file header
1630 fname = bytes('data', 'ascii')
1631 date = 0
1632 mode = 0x20
1633 fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc,
1634 date, vers, meth, len(fname), mode)
1635 fhdr += fname
1636 if flags & RAR_FILE_SALT:
1637 if not salt:
1638 return EMPTY
1639 fhdr += salt
1641 # full header
1642 hlen = S_BLK_HDR.size + len(fhdr)
1643 hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr
1644 hcrc = crc32(hdr[2:]) & 0xFFFF
1645 hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr
1647 # archive main header
1648 mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4)
1650 # decompress via temp rar
1651 tmpfd, tmpname = mkstemp(suffix='.rar')
1652 tmpf = os.fdopen(tmpfd, "wb")
1653 try:
1654 tmpf.write(RAR_ID + mh + hdr + data)
1655 tmpf.close()
1657 cmd = [UNRAR_TOOL] + list(OPEN_ARGS)
1658 if psw is not None and (flags & RAR_FILE_PASSWORD):
1659 cmd.append("-p" + psw)
1660 else:
1661 cmd.append("-p-")
1662 cmd.append(tmpname)
1664 p = custom_popen(cmd)
1665 return p.communicate()[0]
1666 finally:
1667 tmpf.close()
1668 os.unlink(tmpname)
1670 def to_datetime(t):
1671 """Convert 6-part time tuple into datetime object."""
1673 if t is None:
1674 return None
1676 # extract values
1677 year, mon, day, h, m, xs = t
1678 s = int(xs)
1679 us = int(1000000 * (xs - s))
1681 # assume the values are valid
1682 try:
1683 return datetime(year, mon, day, h, m, s, us)
1684 except ValueError:
1685 pass
1687 # sanitize invalid values
1688 MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
1689 if mon < 1: mon = 1
1690 if mon > 12: mon = 12
1691 if day < 1: day = 1
1692 if day > MDAY[mon]: day = MDAY[mon]
1693 if h > 23: h = 23
1694 if m > 59: m = 59
1695 if s > 59: s = 59
1696 if mon == 2 and day == 29:
1697 try:
1698 return datetime(year, mon, day, h, m, s, us)
1699 except ValueError:
1700 day = 28
1701 return datetime(year, mon, day, h, m, s, us)
1703 def parse_dos_time(stamp):
1704 """Parse standard 32-bit DOS timestamp."""
1706 sec = stamp & 0x1F; stamp = stamp >> 5
1707 min = stamp & 0x3F; stamp = stamp >> 6
1708 hr = stamp & 0x1F; stamp = stamp >> 5
1709 day = stamp & 0x1F; stamp = stamp >> 5
1710 mon = stamp & 0x0F; stamp = stamp >> 4
1711 yr = (stamp & 0x7F) + 1980
1712 return (yr, mon, day, hr, min, sec * 2)
1714 def custom_popen(cmd):
1715 """Disconnect cmd from parent fds, read only from stdout."""
1717 # needed for py2exe
1718 creationflags = 0
1719 if sys.platform == 'win32':
1720 creationflags = 0x08000000 # CREATE_NO_WINDOW
1722 # run command
1723 p = Popen(cmd, bufsize = 0, stdout = PIPE, stdin = PIPE, stderr = STDOUT,
1724 creationflags = creationflags)
1725 return p