UserString.MutableString has been removed in Python 3.0.
[python.git] / Lib / zipfile.py
blobb812a82c54b2c83713fbe9a29ded6efa90b7a863
1 """
2 Read and write ZIP files.
3 """
4 import struct, os, time, sys, shutil
5 import binascii, cStringIO
7 try:
8 import zlib # We may need its compression method
9 crc32 = zlib.crc32
10 except ImportError:
11 zlib = None
12 crc32 = binascii.crc32
14 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
15 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
17 class BadZipfile(Exception):
18 pass
21 class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
27 error = BadZipfile # The exception raised by this module
29 ZIP64_LIMIT= (1 << 31) - 1
31 # constants for Zip file compression methods
32 ZIP_STORED = 0
33 ZIP_DEFLATED = 8
34 # Other ZIP compression methods not supported
36 # Here are some struct module formats for reading headers
37 structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
38 stringEndArchive = "PK\005\006" # magic number for end of archive record
39 structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
40 stringCentralDir = "PK\001\002" # magic number for central directory
41 structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
42 stringFileHeader = "PK\003\004" # magic number for file header
43 structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
44 stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
45 structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
46 stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
49 # indexes of entries in the central directory structure
50 _CD_SIGNATURE = 0
51 _CD_CREATE_VERSION = 1
52 _CD_CREATE_SYSTEM = 2
53 _CD_EXTRACT_VERSION = 3
54 _CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55 _CD_FLAG_BITS = 5
56 _CD_COMPRESS_TYPE = 6
57 _CD_TIME = 7
58 _CD_DATE = 8
59 _CD_CRC = 9
60 _CD_COMPRESSED_SIZE = 10
61 _CD_UNCOMPRESSED_SIZE = 11
62 _CD_FILENAME_LENGTH = 12
63 _CD_EXTRA_FIELD_LENGTH = 13
64 _CD_COMMENT_LENGTH = 14
65 _CD_DISK_NUMBER_START = 15
66 _CD_INTERNAL_FILE_ATTRIBUTES = 16
67 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
68 _CD_LOCAL_HEADER_OFFSET = 18
70 # indexes of entries in the local file header structure
71 _FH_SIGNATURE = 0
72 _FH_EXTRACT_VERSION = 1
73 _FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
75 _FH_COMPRESSION_METHOD = 4
76 _FH_LAST_MOD_TIME = 5
77 _FH_LAST_MOD_DATE = 6
78 _FH_CRC = 7
79 _FH_COMPRESSED_SIZE = 8
80 _FH_UNCOMPRESSED_SIZE = 9
81 _FH_FILENAME_LENGTH = 10
82 _FH_EXTRA_FIELD_LENGTH = 11
84 def is_zipfile(filename):
85 """Quickly see if file is a ZIP file by checking the magic number."""
86 try:
87 fpin = open(filename, "rb")
88 endrec = _EndRecData(fpin)
89 fpin.close()
90 if endrec:
91 return True # file has correct magic number
92 except IOError:
93 pass
94 return False
96 def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
130 def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
143 if endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
163 if endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
165 return endrec
166 return # Error, return None
169 class ZipInfo (object):
170 """Class with attributes describing each file in the ZIP archive."""
172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 '_raw_time',
194 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
195 self.orig_filename = filename # Original file name in archive
197 # Terminate the file name at the first null byte. Null bytes in file
198 # names are used as tricks by viruses in archives.
199 null_byte = filename.find(chr(0))
200 if null_byte >= 0:
201 filename = filename[0:null_byte]
202 # This is used to ensure paths in generated ZIP files always use
203 # forward slashes as the directory separator, as required by the
204 # ZIP format specification.
205 if os.sep != "/" and os.sep in filename:
206 filename = filename.replace(os.sep, "/")
208 self.filename = filename # Normalized file name
209 self.date_time = date_time # year, month, day, hour, min, sec
210 # Standard values:
211 self.compress_type = ZIP_STORED # Type of compression for the file
212 self.comment = "" # Comment for each file
213 self.extra = "" # ZIP extra data
214 if sys.platform == 'win32':
215 self.create_system = 0 # System which created ZIP archive
216 else:
217 # Assume everything else is unix-y
218 self.create_system = 3 # System which created ZIP archive
219 self.create_version = 20 # Version which created ZIP archive
220 self.extract_version = 20 # Version needed to extract archive
221 self.reserved = 0 # Must be zero
222 self.flag_bits = 0 # ZIP flag bits
223 self.volume = 0 # Volume number of file header
224 self.internal_attr = 0 # Internal attributes
225 self.external_attr = 0 # External file attributes
226 # Other attributes are set by class ZipFile:
227 # header_offset Byte offset to the file header
228 # CRC CRC-32 of the uncompressed file
229 # compress_size Size of the compressed file
230 # file_size Size of the uncompressed file
232 def FileHeader(self):
233 """Return the per-file header as a string."""
234 dt = self.date_time
235 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
236 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
237 if self.flag_bits & 0x08:
238 # Set these to zero because we write them after the file data
239 CRC = compress_size = file_size = 0
240 else:
241 CRC = self.CRC
242 compress_size = self.compress_size
243 file_size = self.file_size
245 extra = self.extra
247 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
248 # File is larger than what fits into a 4 byte integer,
249 # fall back to the ZIP64 extension
250 fmt = '<HHQQ'
251 extra = extra + struct.pack(fmt,
252 1, struct.calcsize(fmt)-4, file_size, compress_size)
253 file_size = 0xffffffff # -1
254 compress_size = 0xffffffff # -1
255 self.extract_version = max(45, self.extract_version)
256 self.create_version = max(45, self.extract_version)
258 filename, flag_bits = self._encodeFilenameFlags()
259 header = struct.pack(structFileHeader, stringFileHeader,
260 self.extract_version, self.reserved, flag_bits,
261 self.compress_type, dostime, dosdate, CRC,
262 compress_size, file_size,
263 len(filename), len(extra))
264 return header + filename + extra
266 def _encodeFilenameFlags(self):
267 if isinstance(self.filename, unicode):
268 try:
269 return self.filename.encode('ascii'), self.flag_bits
270 except UnicodeEncodeError:
271 return self.filename.encode('utf-8'), self.flag_bits | 0x800
272 else:
273 return self.filename, self.flag_bits
275 def _decodeFilename(self):
276 if self.flag_bits & 0x800:
277 return self.filename.decode('utf-8')
278 else:
279 return self.filename
281 def _decodeExtra(self):
282 # Try to decode the extra field.
283 extra = self.extra
284 unpack = struct.unpack
285 while extra:
286 tp, ln = unpack('<HH', extra[:4])
287 if tp == 1:
288 if ln >= 24:
289 counts = unpack('<QQQ', extra[4:28])
290 elif ln == 16:
291 counts = unpack('<QQ', extra[4:20])
292 elif ln == 8:
293 counts = unpack('<Q', extra[4:12])
294 elif ln == 0:
295 counts = ()
296 else:
297 raise RuntimeError, "Corrupt extra field %s"%(ln,)
299 idx = 0
301 # ZIP64 extension (large files and/or large archives)
302 # XXX Is this correct? won't this exclude 2**32-1 byte files?
303 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
304 self.file_size = counts[idx]
305 idx += 1
307 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
308 self.compress_size = counts[idx]
309 idx += 1
311 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
312 old = self.header_offset
313 self.header_offset = counts[idx]
314 idx+=1
316 extra = extra[ln+4:]
319 class _ZipDecrypter:
320 """Class to handle decryption of files stored within a ZIP archive.
322 ZIP supports a password-based form of encryption. Even though known
323 plaintext attacks have been found against it, it is still useful
324 to be able to get data out of such a file.
326 Usage:
327 zd = _ZipDecrypter(mypwd)
328 plain_char = zd(cypher_char)
329 plain_text = map(zd, cypher_text)
332 def _GenerateCRCTable():
333 """Generate a CRC-32 table.
335 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
336 internal keys. We noticed that a direct implementation is faster than
337 relying on binascii.crc32().
339 poly = 0xedb88320
340 table = [0] * 256
341 for i in range(256):
342 crc = i
343 for j in range(8):
344 if crc & 1:
345 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
346 else:
347 crc = ((crc >> 1) & 0x7FFFFFFF)
348 table[i] = crc
349 return table
350 crctable = _GenerateCRCTable()
352 def _crc32(self, ch, crc):
353 """Compute the CRC32 primitive on one byte."""
354 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
356 def __init__(self, pwd):
357 self.key0 = 305419896
358 self.key1 = 591751049
359 self.key2 = 878082192
360 for p in pwd:
361 self._UpdateKeys(p)
363 def _UpdateKeys(self, c):
364 self.key0 = self._crc32(c, self.key0)
365 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
366 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
367 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
369 def __call__(self, c):
370 """Decrypt a single character."""
371 c = ord(c)
372 k = self.key2 | 2
373 c = c ^ (((k * (k^1)) >> 8) & 255)
374 c = chr(c)
375 self._UpdateKeys(c)
376 return c
378 class ZipExtFile:
379 """File-like object for reading an archive member.
380 Is returned by ZipFile.open().
383 def __init__(self, fileobj, zipinfo, decrypt=None):
384 self.fileobj = fileobj
385 self.decrypter = decrypt
386 self.bytes_read = 0L
387 self.rawbuffer = ''
388 self.readbuffer = ''
389 self.linebuffer = ''
390 self.eof = False
391 self.univ_newlines = False
392 self.nlSeps = ("\n", )
393 self.lastdiscard = ''
395 self.compress_type = zipinfo.compress_type
396 self.compress_size = zipinfo.compress_size
398 self.closed = False
399 self.mode = "r"
400 self.name = zipinfo.filename
402 # read from compressed files in 64k blocks
403 self.compreadsize = 64*1024
404 if self.compress_type == ZIP_DEFLATED:
405 self.dc = zlib.decompressobj(-15)
407 def set_univ_newlines(self, univ_newlines):
408 self.univ_newlines = univ_newlines
410 # pick line separator char(s) based on universal newlines flag
411 self.nlSeps = ("\n", )
412 if self.univ_newlines:
413 self.nlSeps = ("\r\n", "\r", "\n")
415 def __iter__(self):
416 return self
418 def next(self):
419 nextline = self.readline()
420 if not nextline:
421 raise StopIteration()
423 return nextline
425 def close(self):
426 self.closed = True
428 def _checkfornewline(self):
429 nl, nllen = -1, -1
430 if self.linebuffer:
431 # ugly check for cases where half of an \r\n pair was
432 # read on the last pass, and the \r was discarded. In this
433 # case we just throw away the \n at the start of the buffer.
434 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
435 self.linebuffer = self.linebuffer[1:]
437 for sep in self.nlSeps:
438 nl = self.linebuffer.find(sep)
439 if nl >= 0:
440 nllen = len(sep)
441 return nl, nllen
443 return nl, nllen
445 def readline(self, size = -1):
446 """Read a line with approx. size. If size is negative,
447 read a whole line.
449 if size < 0:
450 size = sys.maxint
451 elif size == 0:
452 return ''
454 # check for a newline already in buffer
455 nl, nllen = self._checkfornewline()
457 if nl >= 0:
458 # the next line was already in the buffer
459 nl = min(nl, size)
460 else:
461 # no line break in buffer - try to read more
462 size -= len(self.linebuffer)
463 while nl < 0 and size > 0:
464 buf = self.read(min(size, 100))
465 if not buf:
466 break
467 self.linebuffer += buf
468 size -= len(buf)
470 # check for a newline in buffer
471 nl, nllen = self._checkfornewline()
473 # we either ran out of bytes in the file, or
474 # met the specified size limit without finding a newline,
475 # so return current buffer
476 if nl < 0:
477 s = self.linebuffer
478 self.linebuffer = ''
479 return s
481 buf = self.linebuffer[:nl]
482 self.lastdiscard = self.linebuffer[nl:nl + nllen]
483 self.linebuffer = self.linebuffer[nl + nllen:]
485 # line is always returned with \n as newline char (except possibly
486 # for a final incomplete line in the file, which is handled above).
487 return buf + "\n"
489 def readlines(self, sizehint = -1):
490 """Return a list with all (following) lines. The sizehint parameter
491 is ignored in this implementation.
493 result = []
494 while True:
495 line = self.readline()
496 if not line: break
497 result.append(line)
498 return result
500 def read(self, size = None):
501 # act like file() obj and return empty string if size is 0
502 if size == 0:
503 return ''
505 # determine read size
506 bytesToRead = self.compress_size - self.bytes_read
508 # adjust read size for encrypted files since the first 12 bytes
509 # are for the encryption/password information
510 if self.decrypter is not None:
511 bytesToRead -= 12
513 if size is not None and size >= 0:
514 if self.compress_type == ZIP_STORED:
515 lr = len(self.readbuffer)
516 bytesToRead = min(bytesToRead, size - lr)
517 elif self.compress_type == ZIP_DEFLATED:
518 if len(self.readbuffer) > size:
519 # the user has requested fewer bytes than we've already
520 # pulled through the decompressor; don't read any more
521 bytesToRead = 0
522 else:
523 # user will use up the buffer, so read some more
524 lr = len(self.rawbuffer)
525 bytesToRead = min(bytesToRead, self.compreadsize - lr)
527 # avoid reading past end of file contents
528 if bytesToRead + self.bytes_read > self.compress_size:
529 bytesToRead = self.compress_size - self.bytes_read
531 # try to read from file (if necessary)
532 if bytesToRead > 0:
533 bytes = self.fileobj.read(bytesToRead)
534 self.bytes_read += len(bytes)
535 self.rawbuffer += bytes
537 # handle contents of raw buffer
538 if self.rawbuffer:
539 newdata = self.rawbuffer
540 self.rawbuffer = ''
542 # decrypt new data if we were given an object to handle that
543 if newdata and self.decrypter is not None:
544 newdata = ''.join(map(self.decrypter, newdata))
546 # decompress newly read data if necessary
547 if newdata and self.compress_type == ZIP_DEFLATED:
548 newdata = self.dc.decompress(newdata)
549 self.rawbuffer = self.dc.unconsumed_tail
550 if self.eof and len(self.rawbuffer) == 0:
551 # we're out of raw bytes (both from the file and
552 # the local buffer); flush just to make sure the
553 # decompressor is done
554 newdata += self.dc.flush()
555 # prevent decompressor from being used again
556 self.dc = None
558 self.readbuffer += newdata
561 # return what the user asked for
562 if size is None or len(self.readbuffer) <= size:
563 bytes = self.readbuffer
564 self.readbuffer = ''
565 else:
566 bytes = self.readbuffer[:size]
567 self.readbuffer = self.readbuffer[size:]
569 return bytes
572 class ZipFile:
573 """ Class with methods to open, read, write, close, list zip files.
575 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
577 file: Either the path to the file, or a file-like object.
578 If it is a path, the file will be opened and closed by ZipFile.
579 mode: The mode can be either read "r", write "w" or append "a".
580 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
581 allowZip64: if True ZipFile will create files with ZIP64 extensions when
582 needed, otherwise it will raise an exception when this would
583 be necessary.
587 fp = None # Set here since __del__ checks it
589 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
590 """Open the ZIP file with mode read "r", write "w" or append "a"."""
591 if mode not in ("r", "w", "a"):
592 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
594 if compression == ZIP_STORED:
595 pass
596 elif compression == ZIP_DEFLATED:
597 if not zlib:
598 raise RuntimeError,\
599 "Compression requires the (missing) zlib module"
600 else:
601 raise RuntimeError, "That compression method is not supported"
603 self._allowZip64 = allowZip64
604 self._didModify = False
605 self.debug = 0 # Level of printing: 0 through 3
606 self.NameToInfo = {} # Find file info given name
607 self.filelist = [] # List of ZipInfo instances for archive
608 self.compression = compression # Method of compression
609 self.mode = key = mode.replace('b', '')[0]
610 self.pwd = None
612 # Check if we were passed a file-like object
613 if isinstance(file, basestring):
614 self._filePassed = 0
615 self.filename = file
616 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
617 try:
618 self.fp = open(file, modeDict[mode])
619 except IOError:
620 if mode == 'a':
621 mode = key = 'w'
622 self.fp = open(file, modeDict[mode])
623 else:
624 raise
625 else:
626 self._filePassed = 1
627 self.fp = file
628 self.filename = getattr(file, 'name', None)
630 if key == 'r':
631 self._GetContents()
632 elif key == 'w':
633 pass
634 elif key == 'a':
635 try: # See if file is a zip file
636 self._RealGetContents()
637 # seek to start of directory and overwrite
638 self.fp.seek(self.start_dir, 0)
639 except BadZipfile: # file is not a zip file, just append
640 self.fp.seek(0, 2)
641 else:
642 if not self._filePassed:
643 self.fp.close()
644 self.fp = None
645 raise RuntimeError, 'Mode must be "r", "w" or "a"'
647 def _GetContents(self):
648 """Read the directory, making sure we close the file if the format
649 is bad."""
650 try:
651 self._RealGetContents()
652 except BadZipfile:
653 if not self._filePassed:
654 self.fp.close()
655 self.fp = None
656 raise
658 def _RealGetContents(self):
659 """Read in the table of contents for the ZIP file."""
660 fp = self.fp
661 endrec = _EndRecData(fp)
662 if not endrec:
663 raise BadZipfile, "File is not a zip file"
664 if self.debug > 1:
665 print endrec
666 size_cd = endrec[5] # bytes in central directory
667 offset_cd = endrec[6] # offset of central directory
668 self.comment = endrec[8] # archive comment
669 # endrec[9] is the offset of the "End of Central Dir" record
670 if endrec[9] > ZIP64_LIMIT:
671 x = endrec[9] - size_cd - 56 - 20
672 else:
673 x = endrec[9] - size_cd
674 # "concat" is zero, unless zip was concatenated to another file
675 concat = x - offset_cd
676 if self.debug > 2:
677 print "given, inferred, offset", offset_cd, x, concat
678 # self.start_dir: Position of start of central directory
679 self.start_dir = offset_cd + concat
680 fp.seek(self.start_dir, 0)
681 data = fp.read(size_cd)
682 fp = cStringIO.StringIO(data)
683 total = 0
684 while total < size_cd:
685 centdir = fp.read(46)
686 total = total + 46
687 if centdir[0:4] != stringCentralDir:
688 raise BadZipfile, "Bad magic number for central directory"
689 centdir = struct.unpack(structCentralDir, centdir)
690 if self.debug > 2:
691 print centdir
692 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
693 # Create ZipInfo instance to store file information
694 x = ZipInfo(filename)
695 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
696 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
697 total = (total + centdir[_CD_FILENAME_LENGTH]
698 + centdir[_CD_EXTRA_FIELD_LENGTH]
699 + centdir[_CD_COMMENT_LENGTH])
700 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
701 (x.create_version, x.create_system, x.extract_version, x.reserved,
702 x.flag_bits, x.compress_type, t, d,
703 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
704 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
705 # Convert date/time code to (year, month, day, hour, min, sec)
706 x._raw_time = t
707 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
708 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
710 x._decodeExtra()
711 x.header_offset = x.header_offset + concat
712 x.filename = x._decodeFilename()
713 self.filelist.append(x)
714 self.NameToInfo[x.filename] = x
715 if self.debug > 2:
716 print "total", total
719 def namelist(self):
720 """Return a list of file names in the archive."""
721 l = []
722 for data in self.filelist:
723 l.append(data.filename)
724 return l
726 def infolist(self):
727 """Return a list of class ZipInfo instances for files in the
728 archive."""
729 return self.filelist
731 def printdir(self):
732 """Print a table of contents for the zip file."""
733 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
734 for zinfo in self.filelist:
735 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
736 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
738 def testzip(self):
739 """Read all the files and check the CRC."""
740 for zinfo in self.filelist:
741 try:
742 self.read(zinfo.filename) # Check CRC-32
743 except BadZipfile:
744 return zinfo.filename
747 def getinfo(self, name):
748 """Return the instance of ZipInfo given 'name'."""
749 info = self.NameToInfo.get(name)
750 if info is None:
751 raise KeyError(
752 'There is no item named %r in the archive' % name)
754 return info
756 def setpassword(self, pwd):
757 """Set default password for encrypted files."""
758 self.pwd = pwd
760 def read(self, name, pwd=None):
761 """Return file bytes (as a string) for name."""
762 return self.open(name, "r", pwd).read()
764 def open(self, name, mode="r", pwd=None):
765 """Return file-like object for 'name'."""
766 if mode not in ("r", "U", "rU"):
767 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
768 if not self.fp:
769 raise RuntimeError, \
770 "Attempt to read ZIP archive that was already closed"
772 # Only open a new file for instances where we were not
773 # given a file object in the constructor
774 if self._filePassed:
775 zef_file = self.fp
776 else:
777 zef_file = open(self.filename, 'rb')
779 # Make sure we have an info object
780 if isinstance(name, ZipInfo):
781 # 'name' is already an info object
782 zinfo = name
783 else:
784 # Get info object for name
785 zinfo = self.getinfo(name)
787 zef_file.seek(zinfo.header_offset, 0)
789 # Skip the file header:
790 fheader = zef_file.read(30)
791 if fheader[0:4] != stringFileHeader:
792 raise BadZipfile, "Bad magic number for file header"
794 fheader = struct.unpack(structFileHeader, fheader)
795 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
796 if fheader[_FH_EXTRA_FIELD_LENGTH]:
797 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
799 if fname != zinfo.orig_filename:
800 raise BadZipfile, \
801 'File name in directory "%s" and header "%s" differ.' % (
802 zinfo.orig_filename, fname)
804 # check for encrypted flag & handle password
805 is_encrypted = zinfo.flag_bits & 0x1
806 zd = None
807 if is_encrypted:
808 if not pwd:
809 pwd = self.pwd
810 if not pwd:
811 raise RuntimeError, "File %s is encrypted, " \
812 "password required for extraction" % name
814 zd = _ZipDecrypter(pwd)
815 # The first 12 bytes in the cypher stream is an encryption header
816 # used to strengthen the algorithm. The first 11 bytes are
817 # completely random, while the 12th contains the MSB of the CRC,
818 # or the MSB of the file time depending on the header type
819 # and is used to check the correctness of the password.
820 bytes = zef_file.read(12)
821 h = map(zd, bytes[0:12])
822 if zinfo.flag_bits & 0x8:
823 # compare against the file type from extended local headers
824 check_byte = (zinfo._raw_time >> 8) & 0xff
825 else:
826 # compare against the CRC otherwise
827 check_byte = (zinfo.CRC >> 24) & 0xff
828 if ord(h[11]) != check_byte:
829 raise RuntimeError("Bad password for file", name)
831 # build and return a ZipExtFile
832 if zd is None:
833 zef = ZipExtFile(zef_file, zinfo)
834 else:
835 zef = ZipExtFile(zef_file, zinfo, zd)
837 # set universal newlines on ZipExtFile if necessary
838 if "U" in mode:
839 zef.set_univ_newlines(True)
840 return zef
842 def extract(self, member, path=None, pwd=None):
843 """Extract a member from the archive to the current working directory,
844 using its full name. Its file information is extracted as accurately
845 as possible. `member' may be a filename or a ZipInfo object. You can
846 specify a different directory using `path'.
848 if not isinstance(member, ZipInfo):
849 member = self.getinfo(member)
851 if path is None:
852 path = os.getcwd()
854 return self._extract_member(member, path, pwd)
856 def extractall(self, path=None, members=None, pwd=None):
857 """Extract all members from the archive to the current working
858 directory. `path' specifies a different directory to extract to.
859 `members' is optional and must be a subset of the list returned
860 by namelist().
862 if members is None:
863 members = self.namelist()
865 for zipinfo in members:
866 self.extract(zipinfo, path, pwd)
868 def _extract_member(self, member, targetpath, pwd):
869 """Extract the ZipInfo object 'member' to a physical
870 file on the path targetpath.
872 # build the destination pathname, replacing
873 # forward slashes to platform specific separators.
874 if targetpath[-1:] == "/":
875 targetpath = targetpath[:-1]
877 # don't include leading "/" from file name if present
878 if os.path.isabs(member.filename):
879 targetpath = os.path.join(targetpath, member.filename[1:])
880 else:
881 targetpath = os.path.join(targetpath, member.filename)
883 targetpath = os.path.normpath(targetpath)
885 # Create all upper directories if necessary.
886 upperdirs = os.path.dirname(targetpath)
887 if upperdirs and not os.path.exists(upperdirs):
888 os.makedirs(upperdirs)
890 source = self.open(member, pwd=pwd)
891 target = file(targetpath, "wb")
892 shutil.copyfileobj(source, target)
893 source.close()
894 target.close()
896 return targetpath
898 def _writecheck(self, zinfo):
899 """Check for errors before writing a file to the archive."""
900 if zinfo.filename in self.NameToInfo:
901 if self.debug: # Warning for duplicate names
902 print "Duplicate name:", zinfo.filename
903 if self.mode not in ("w", "a"):
904 raise RuntimeError, 'write() requires mode "w" or "a"'
905 if not self.fp:
906 raise RuntimeError, \
907 "Attempt to write ZIP archive that was already closed"
908 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
909 raise RuntimeError, \
910 "Compression requires the (missing) zlib module"
911 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
912 raise RuntimeError, \
913 "That compression method is not supported"
914 if zinfo.file_size > ZIP64_LIMIT:
915 if not self._allowZip64:
916 raise LargeZipFile("Filesize would require ZIP64 extensions")
917 if zinfo.header_offset > ZIP64_LIMIT:
918 if not self._allowZip64:
919 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
921 def write(self, filename, arcname=None, compress_type=None):
922 """Put the bytes from filename into the archive under the name
923 arcname."""
924 if not self.fp:
925 raise RuntimeError(
926 "Attempt to write to ZIP archive that was already closed")
928 st = os.stat(filename)
929 mtime = time.localtime(st.st_mtime)
930 date_time = mtime[0:6]
931 # Create ZipInfo instance to store file information
932 if arcname is None:
933 arcname = filename
934 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
935 while arcname[0] in (os.sep, os.altsep):
936 arcname = arcname[1:]
937 zinfo = ZipInfo(arcname, date_time)
938 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
939 if compress_type is None:
940 zinfo.compress_type = self.compression
941 else:
942 zinfo.compress_type = compress_type
944 zinfo.file_size = st.st_size
945 zinfo.flag_bits = 0x00
946 zinfo.header_offset = self.fp.tell() # Start of header bytes
948 self._writecheck(zinfo)
949 self._didModify = True
950 fp = open(filename, "rb")
951 # Must overwrite CRC and sizes with correct data later
952 zinfo.CRC = CRC = 0
953 zinfo.compress_size = compress_size = 0
954 zinfo.file_size = file_size = 0
955 self.fp.write(zinfo.FileHeader())
956 if zinfo.compress_type == ZIP_DEFLATED:
957 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
958 zlib.DEFLATED, -15)
959 else:
960 cmpr = None
961 while 1:
962 buf = fp.read(1024 * 8)
963 if not buf:
964 break
965 file_size = file_size + len(buf)
966 CRC = crc32(buf, CRC) & 0xffffffff
967 if cmpr:
968 buf = cmpr.compress(buf)
969 compress_size = compress_size + len(buf)
970 self.fp.write(buf)
971 fp.close()
972 if cmpr:
973 buf = cmpr.flush()
974 compress_size = compress_size + len(buf)
975 self.fp.write(buf)
976 zinfo.compress_size = compress_size
977 else:
978 zinfo.compress_size = file_size
979 zinfo.CRC = CRC
980 zinfo.file_size = file_size
981 # Seek backwards and write CRC and file sizes
982 position = self.fp.tell() # Preserve current position in file
983 self.fp.seek(zinfo.header_offset + 14, 0)
984 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
985 zinfo.file_size))
986 self.fp.seek(position, 0)
987 self.filelist.append(zinfo)
988 self.NameToInfo[zinfo.filename] = zinfo
990 def writestr(self, zinfo_or_arcname, bytes):
991 """Write a file into the archive. The contents is the string
992 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
993 the name of the file in the archive."""
994 if not isinstance(zinfo_or_arcname, ZipInfo):
995 zinfo = ZipInfo(filename=zinfo_or_arcname,
996 date_time=time.localtime(time.time())[:6])
997 zinfo.compress_type = self.compression
998 else:
999 zinfo = zinfo_or_arcname
1001 if not self.fp:
1002 raise RuntimeError(
1003 "Attempt to write to ZIP archive that was already closed")
1005 zinfo.file_size = len(bytes) # Uncompressed size
1006 zinfo.header_offset = self.fp.tell() # Start of header bytes
1007 self._writecheck(zinfo)
1008 self._didModify = True
1009 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1010 if zinfo.compress_type == ZIP_DEFLATED:
1011 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1012 zlib.DEFLATED, -15)
1013 bytes = co.compress(bytes) + co.flush()
1014 zinfo.compress_size = len(bytes) # Compressed size
1015 else:
1016 zinfo.compress_size = zinfo.file_size
1017 zinfo.header_offset = self.fp.tell() # Start of header bytes
1018 self.fp.write(zinfo.FileHeader())
1019 self.fp.write(bytes)
1020 self.fp.flush()
1021 if zinfo.flag_bits & 0x08:
1022 # Write CRC and file sizes after the file data
1023 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
1024 zinfo.file_size))
1025 self.filelist.append(zinfo)
1026 self.NameToInfo[zinfo.filename] = zinfo
1028 def __del__(self):
1029 """Call the "close()" method in case the user forgot."""
1030 self.close()
1032 def close(self):
1033 """Close the file, and for mode "w" and "a" write the ending
1034 records."""
1035 if self.fp is None:
1036 return
1038 if self.mode in ("w", "a") and self._didModify: # write ending records
1039 count = 0
1040 pos1 = self.fp.tell()
1041 for zinfo in self.filelist: # write central directory
1042 count = count + 1
1043 dt = zinfo.date_time
1044 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1045 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1046 extra = []
1047 if zinfo.file_size > ZIP64_LIMIT \
1048 or zinfo.compress_size > ZIP64_LIMIT:
1049 extra.append(zinfo.file_size)
1050 extra.append(zinfo.compress_size)
1051 file_size = 0xffffffff #-1
1052 compress_size = 0xffffffff #-1
1053 else:
1054 file_size = zinfo.file_size
1055 compress_size = zinfo.compress_size
1057 if zinfo.header_offset > ZIP64_LIMIT:
1058 extra.append(zinfo.header_offset)
1059 header_offset = 0xffffffffL # -1 32 bit
1060 else:
1061 header_offset = zinfo.header_offset
1063 extra_data = zinfo.extra
1064 if extra:
1065 # Append a ZIP64 field to the extra's
1066 extra_data = struct.pack(
1067 '<HH' + 'Q'*len(extra),
1068 1, 8*len(extra), *extra) + extra_data
1070 extract_version = max(45, zinfo.extract_version)
1071 create_version = max(45, zinfo.create_version)
1072 else:
1073 extract_version = zinfo.extract_version
1074 create_version = zinfo.create_version
1076 try:
1077 filename, flag_bits = zinfo._encodeFilenameFlags()
1078 centdir = struct.pack(structCentralDir,
1079 stringCentralDir, create_version,
1080 zinfo.create_system, extract_version, zinfo.reserved,
1081 flag_bits, zinfo.compress_type, dostime, dosdate,
1082 zinfo.CRC, compress_size, file_size,
1083 len(filename), len(extra_data), len(zinfo.comment),
1084 0, zinfo.internal_attr, zinfo.external_attr,
1085 header_offset)
1086 except DeprecationWarning:
1087 print >>sys.stderr, (structCentralDir,
1088 stringCentralDir, create_version,
1089 zinfo.create_system, extract_version, zinfo.reserved,
1090 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1091 zinfo.CRC, compress_size, file_size,
1092 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1093 0, zinfo.internal_attr, zinfo.external_attr,
1094 header_offset)
1095 raise
1096 self.fp.write(centdir)
1097 self.fp.write(filename)
1098 self.fp.write(extra_data)
1099 self.fp.write(zinfo.comment)
1101 pos2 = self.fp.tell()
1102 # Write end-of-zip-archive record
1103 if pos1 > ZIP64_LIMIT:
1104 # Need to write the ZIP64 end-of-archive records
1105 zip64endrec = struct.pack(
1106 structEndArchive64, stringEndArchive64,
1107 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1108 self.fp.write(zip64endrec)
1110 zip64locrec = struct.pack(
1111 structEndArchive64Locator,
1112 stringEndArchive64Locator, 0, pos2, 1)
1113 self.fp.write(zip64locrec)
1115 endrec = struct.pack(structEndArchive, stringEndArchive,
1116 0, 0, count, count, pos2 - pos1, 0xffffffffL, 0)
1117 self.fp.write(endrec)
1119 else:
1120 endrec = struct.pack(structEndArchive, stringEndArchive,
1121 0, 0, count, count, pos2 - pos1, pos1, 0)
1122 self.fp.write(endrec)
1123 self.fp.flush()
1124 if not self._filePassed:
1125 self.fp.close()
1126 self.fp = None
1129 class PyZipFile(ZipFile):
1130 """Class to create ZIP archives with Python library files and packages."""
1132 def writepy(self, pathname, basename = ""):
1133 """Add all files from "pathname" to the ZIP archive.
1135 If pathname is a package directory, search the directory and
1136 all package subdirectories recursively for all *.py and enter
1137 the modules into the archive. If pathname is a plain
1138 directory, listdir *.py and enter all modules. Else, pathname
1139 must be a Python *.py file and the module will be put into the
1140 archive. Added modules are always module.pyo or module.pyc.
1141 This method will compile the module.py into module.pyc if
1142 necessary.
1144 dir, name = os.path.split(pathname)
1145 if os.path.isdir(pathname):
1146 initname = os.path.join(pathname, "__init__.py")
1147 if os.path.isfile(initname):
1148 # This is a package directory, add it
1149 if basename:
1150 basename = "%s/%s" % (basename, name)
1151 else:
1152 basename = name
1153 if self.debug:
1154 print "Adding package in", pathname, "as", basename
1155 fname, arcname = self._get_codename(initname[0:-3], basename)
1156 if self.debug:
1157 print "Adding", arcname
1158 self.write(fname, arcname)
1159 dirlist = os.listdir(pathname)
1160 dirlist.remove("__init__.py")
1161 # Add all *.py files and package subdirectories
1162 for filename in dirlist:
1163 path = os.path.join(pathname, filename)
1164 root, ext = os.path.splitext(filename)
1165 if os.path.isdir(path):
1166 if os.path.isfile(os.path.join(path, "__init__.py")):
1167 # This is a package directory, add it
1168 self.writepy(path, basename) # Recursive call
1169 elif ext == ".py":
1170 fname, arcname = self._get_codename(path[0:-3],
1171 basename)
1172 if self.debug:
1173 print "Adding", arcname
1174 self.write(fname, arcname)
1175 else:
1176 # This is NOT a package directory, add its files at top level
1177 if self.debug:
1178 print "Adding files from directory", pathname
1179 for filename in os.listdir(pathname):
1180 path = os.path.join(pathname, filename)
1181 root, ext = os.path.splitext(filename)
1182 if ext == ".py":
1183 fname, arcname = self._get_codename(path[0:-3],
1184 basename)
1185 if self.debug:
1186 print "Adding", arcname
1187 self.write(fname, arcname)
1188 else:
1189 if pathname[-3:] != ".py":
1190 raise RuntimeError, \
1191 'Files added with writepy() must end with ".py"'
1192 fname, arcname = self._get_codename(pathname[0:-3], basename)
1193 if self.debug:
1194 print "Adding file", arcname
1195 self.write(fname, arcname)
1197 def _get_codename(self, pathname, basename):
1198 """Return (filename, archivename) for the path.
1200 Given a module name path, return the correct file path and
1201 archive name, compiling if necessary. For example, given
1202 /python/lib/string, return (/python/lib/string.pyc, string).
1204 file_py = pathname + ".py"
1205 file_pyc = pathname + ".pyc"
1206 file_pyo = pathname + ".pyo"
1207 if os.path.isfile(file_pyo) and \
1208 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1209 fname = file_pyo # Use .pyo file
1210 elif not os.path.isfile(file_pyc) or \
1211 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1212 import py_compile
1213 if self.debug:
1214 print "Compiling", file_py
1215 try:
1216 py_compile.compile(file_py, file_pyc, None, True)
1217 except py_compile.PyCompileError,err:
1218 print err.msg
1219 fname = file_pyc
1220 else:
1221 fname = file_pyc
1222 archivename = os.path.split(fname)[1]
1223 if basename:
1224 archivename = "%s/%s" % (basename, archivename)
1225 return (fname, archivename)
1228 def main(args = None):
1229 import textwrap
1230 USAGE=textwrap.dedent("""\
1231 Usage:
1232 zipfile.py -l zipfile.zip # Show listing of a zipfile
1233 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1234 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1235 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1236 """)
1237 if args is None:
1238 args = sys.argv[1:]
1240 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1241 print USAGE
1242 sys.exit(1)
1244 if args[0] == '-l':
1245 if len(args) != 2:
1246 print USAGE
1247 sys.exit(1)
1248 zf = ZipFile(args[1], 'r')
1249 zf.printdir()
1250 zf.close()
1252 elif args[0] == '-t':
1253 if len(args) != 2:
1254 print USAGE
1255 sys.exit(1)
1256 zf = ZipFile(args[1], 'r')
1257 zf.testzip()
1258 print "Done testing"
1260 elif args[0] == '-e':
1261 if len(args) != 3:
1262 print USAGE
1263 sys.exit(1)
1265 zf = ZipFile(args[1], 'r')
1266 out = args[2]
1267 for path in zf.namelist():
1268 if path.startswith('./'):
1269 tgt = os.path.join(out, path[2:])
1270 else:
1271 tgt = os.path.join(out, path)
1273 tgtdir = os.path.dirname(tgt)
1274 if not os.path.exists(tgtdir):
1275 os.makedirs(tgtdir)
1276 fp = open(tgt, 'wb')
1277 fp.write(zf.read(path))
1278 fp.close()
1279 zf.close()
1281 elif args[0] == '-c':
1282 if len(args) < 3:
1283 print USAGE
1284 sys.exit(1)
1286 def addToZip(zf, path, zippath):
1287 if os.path.isfile(path):
1288 zf.write(path, zippath, ZIP_DEFLATED)
1289 elif os.path.isdir(path):
1290 for nm in os.listdir(path):
1291 addToZip(zf,
1292 os.path.join(path, nm), os.path.join(zippath, nm))
1293 # else: ignore
1295 zf = ZipFile(args[1], 'w', allowZip64=True)
1296 for src in args[2:]:
1297 addToZip(zf, src, os.path.basename(src))
1299 zf.close()
1301 if __name__ == "__main__":
1302 main()