Updated docs for basicConfig to indicate it's a no-op if handlers have been defined...
[python.git] / Lib / zipfile.py
bloba53f8ed22942302b9823409595ef645ed40ecf1f
1 """
2 Read and write ZIP files.
3 """
4 import struct, os, time, sys
5 import binascii, cStringIO
7 try:
8 import zlib # We may need its compression method
9 except ImportError:
10 zlib = None
12 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
13 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
15 class BadZipfile(Exception):
16 pass
19 class LargeZipFile(Exception):
20 """
21 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
25 error = BadZipfile # The exception raised by this module
27 ZIP64_LIMIT= (1 << 31) - 1
29 # constants for Zip file compression methods
30 ZIP_STORED = 0
31 ZIP_DEFLATED = 8
32 # Other ZIP compression methods not supported
34 # Here are some struct module formats for reading headers
35 structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36 stringEndArchive = "PK\005\006" # magic number for end of archive record
37 structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
38 stringCentralDir = "PK\001\002" # magic number for central directory
39 structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
40 stringFileHeader = "PK\003\004" # magic number for file header
41 structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42 stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43 structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44 stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
47 # indexes of entries in the central directory structure
48 _CD_SIGNATURE = 0
49 _CD_CREATE_VERSION = 1
50 _CD_CREATE_SYSTEM = 2
51 _CD_EXTRACT_VERSION = 3
52 _CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53 _CD_FLAG_BITS = 5
54 _CD_COMPRESS_TYPE = 6
55 _CD_TIME = 7
56 _CD_DATE = 8
57 _CD_CRC = 9
58 _CD_COMPRESSED_SIZE = 10
59 _CD_UNCOMPRESSED_SIZE = 11
60 _CD_FILENAME_LENGTH = 12
61 _CD_EXTRA_FIELD_LENGTH = 13
62 _CD_COMMENT_LENGTH = 14
63 _CD_DISK_NUMBER_START = 15
64 _CD_INTERNAL_FILE_ATTRIBUTES = 16
65 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
66 _CD_LOCAL_HEADER_OFFSET = 18
68 # indexes of entries in the local file header structure
69 _FH_SIGNATURE = 0
70 _FH_EXTRACT_VERSION = 1
71 _FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
73 _FH_COMPRESSION_METHOD = 4
74 _FH_LAST_MOD_TIME = 5
75 _FH_LAST_MOD_DATE = 6
76 _FH_CRC = 7
77 _FH_COMPRESSED_SIZE = 8
78 _FH_UNCOMPRESSED_SIZE = 9
79 _FH_FILENAME_LENGTH = 10
80 _FH_EXTRA_FIELD_LENGTH = 11
82 def is_zipfile(filename):
83 """Quickly see if file is a ZIP file by checking the magic number."""
84 try:
85 fpin = open(filename, "rb")
86 endrec = _EndRecData(fpin)
87 fpin.close()
88 if endrec:
89 return True # file has correct magic number
90 except IOError:
91 pass
92 return False
94 def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
102 if sig != stringEndArchive64Locator:
103 return endrec
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
108 # Assume no 'zip64 extensible data'
109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
115 if sig != stringEndArchive64:
116 return endrec
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
128 def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
163 return endrec
164 return # Error, return None
167 class ZipInfo (object):
168 """Class with attributes describing each file in the ZIP archive."""
170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
192 self.orig_filename = filename # Original file name in archive
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
203 filename = filename.replace(os.sep, "/")
205 self.filename = filename # Normalized file name
206 self.date_time = date_time # year, month, day, hour, min, sec
207 # Standard values:
208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
223 # Other attributes are set by class ZipFile:
224 # header_offset Byte offset to the file header
225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
229 def FileHeader(self):
230 """Return the per-file header as a string."""
231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
234 if self.flag_bits & 0x08:
235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
237 else:
238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
242 extra = self.extra
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
259 len(self.filename), len(extra))
260 return header + self.filename + extra
262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
280 idx = 0
282 # ZIP64 extension (large files and/or large archives)
283 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
284 self.file_size = counts[idx]
285 idx += 1
287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
288 self.compress_size = counts[idx]
289 idx += 1
291 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
296 extra = extra[ln+4:]
299 class _ZipDecrypter:
300 """Class to handle decryption of files stored within a ZIP archive.
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
306 Usage:
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
319 poly = 0xedb88320
320 table = [0] * 256
321 for i in range(256):
322 crc = i
323 for j in range(8):
324 if crc & 1:
325 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
326 else:
327 crc = ((crc >> 1) & 0x7FFFFFFF)
328 table[i] = crc
329 return table
330 crctable = _GenerateCRCTable()
332 def _crc32(self, ch, crc):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
336 def __init__(self, pwd):
337 self.key0 = 305419896
338 self.key1 = 591751049
339 self.key2 = 878082192
340 for p in pwd:
341 self._UpdateKeys(p)
343 def _UpdateKeys(self, c):
344 self.key0 = self._crc32(c, self.key0)
345 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
346 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
347 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
349 def __call__(self, c):
350 """Decrypt a single character."""
351 c = ord(c)
352 k = self.key2 | 2
353 c = c ^ (((k * (k^1)) >> 8) & 255)
354 c = chr(c)
355 self._UpdateKeys(c)
356 return c
358 class ZipExtFile:
359 """File-like object for reading an archive member.
360 Is returned by ZipFile.open().
363 def __init__(self, fileobj, zipinfo, decrypt=None):
364 self.fileobj = fileobj
365 self.decrypter = decrypt
366 self.bytes_read = 0L
367 self.rawbuffer = ''
368 self.readbuffer = ''
369 self.linebuffer = ''
370 self.eof = False
371 self.univ_newlines = False
372 self.nlSeps = ("\n", )
373 self.lastdiscard = ''
375 self.compress_type = zipinfo.compress_type
376 self.compress_size = zipinfo.compress_size
378 self.closed = False
379 self.mode = "r"
380 self.name = zipinfo.filename
382 # read from compressed files in 64k blocks
383 self.compreadsize = 64*1024
384 if self.compress_type == ZIP_DEFLATED:
385 self.dc = zlib.decompressobj(-15)
387 def set_univ_newlines(self, univ_newlines):
388 self.univ_newlines = univ_newlines
390 # pick line separator char(s) based on universal newlines flag
391 self.nlSeps = ("\n", )
392 if self.univ_newlines:
393 self.nlSeps = ("\r\n", "\r", "\n")
395 def __iter__(self):
396 return self
398 def next(self):
399 nextline = self.readline()
400 if not nextline:
401 raise StopIteration()
403 return nextline
405 def close(self):
406 self.closed = True
408 def _checkfornewline(self):
409 nl, nllen = -1, -1
410 if self.linebuffer:
411 # ugly check for cases where half of an \r\n pair was
412 # read on the last pass, and the \r was discarded. In this
413 # case we just throw away the \n at the start of the buffer.
414 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
415 self.linebuffer = self.linebuffer[1:]
417 for sep in self.nlSeps:
418 nl = self.linebuffer.find(sep)
419 if nl >= 0:
420 nllen = len(sep)
421 return nl, nllen
423 return nl, nllen
425 def readline(self, size = -1):
426 """Read a line with approx. size. If size is negative,
427 read a whole line.
429 if size < 0:
430 size = sys.maxint
431 elif size == 0:
432 return ''
434 # check for a newline already in buffer
435 nl, nllen = self._checkfornewline()
437 if nl >= 0:
438 # the next line was already in the buffer
439 nl = min(nl, size)
440 else:
441 # no line break in buffer - try to read more
442 size -= len(self.linebuffer)
443 while nl < 0 and size > 0:
444 buf = self.read(min(size, 100))
445 if not buf:
446 break
447 self.linebuffer += buf
448 size -= len(buf)
450 # check for a newline in buffer
451 nl, nllen = self._checkfornewline()
453 # we either ran out of bytes in the file, or
454 # met the specified size limit without finding a newline,
455 # so return current buffer
456 if nl < 0:
457 s = self.linebuffer
458 self.linebuffer = ''
459 return s
461 buf = self.linebuffer[:nl]
462 self.lastdiscard = self.linebuffer[nl:nl + nllen]
463 self.linebuffer = self.linebuffer[nl + nllen:]
465 # line is always returned with \n as newline char (except possibly
466 # for a final incomplete line in the file, which is handled above).
467 return buf + "\n"
469 def readlines(self, sizehint = -1):
470 """Return a list with all (following) lines. The sizehint parameter
471 is ignored in this implementation.
473 result = []
474 while True:
475 line = self.readline()
476 if not line: break
477 result.append(line)
478 return result
480 def read(self, size = None):
481 # act like file() obj and return empty string if size is 0
482 if size == 0:
483 return ''
485 # determine read size
486 bytesToRead = self.compress_size - self.bytes_read
488 # adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information
490 if self.decrypter is not None:
491 bytesToRead -= 12
493 if size is not None and size >= 0:
494 if self.compress_type == ZIP_STORED:
495 lr = len(self.readbuffer)
496 bytesToRead = min(bytesToRead, size - lr)
497 elif self.compress_type == ZIP_DEFLATED:
498 if len(self.readbuffer) > size:
499 # the user has requested fewer bytes than we've already
500 # pulled through the decompressor; don't read any more
501 bytesToRead = 0
502 else:
503 # user will use up the buffer, so read some more
504 lr = len(self.rawbuffer)
505 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507 # avoid reading past end of file contents
508 if bytesToRead + self.bytes_read > self.compress_size:
509 bytesToRead = self.compress_size - self.bytes_read
511 # try to read from file (if necessary)
512 if bytesToRead > 0:
513 bytes = self.fileobj.read(bytesToRead)
514 self.bytes_read += len(bytes)
515 self.rawbuffer += bytes
517 # handle contents of raw buffer
518 if self.rawbuffer:
519 newdata = self.rawbuffer
520 self.rawbuffer = ''
522 # decrypt new data if we were given an object to handle that
523 if newdata and self.decrypter is not None:
524 newdata = ''.join(map(self.decrypter, newdata))
526 # decompress newly read data if necessary
527 if newdata and self.compress_type == ZIP_DEFLATED:
528 newdata = self.dc.decompress(newdata)
529 self.rawbuffer = self.dc.unconsumed_tail
530 if self.eof and len(self.rawbuffer) == 0:
531 # we're out of raw bytes (both from the file and
532 # the local buffer); flush just to make sure the
533 # decompressor is done
534 newdata += self.dc.flush()
535 # prevent decompressor from being used again
536 self.dc = None
538 self.readbuffer += newdata
541 # return what the user asked for
542 if size is None or len(self.readbuffer) <= size:
543 bytes = self.readbuffer
544 self.readbuffer = ''
545 else:
546 bytes = self.readbuffer[:size]
547 self.readbuffer = self.readbuffer[size:]
549 return bytes
552 class ZipFile:
553 """ Class with methods to open, read, write, close, list zip files.
555 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
557 file: Either the path to the file, or a file-like object.
558 If it is a path, the file will be opened and closed by ZipFile.
559 mode: The mode can be either read "r", write "w" or append "a".
560 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
561 allowZip64: if True ZipFile will create files with ZIP64 extensions when
562 needed, otherwise it will raise an exception when this would
563 be necessary.
567 fp = None # Set here since __del__ checks it
569 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
570 """Open the ZIP file with mode read "r", write "w" or append "a"."""
571 if mode not in ("r", "w", "a"):
572 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
574 if compression == ZIP_STORED:
575 pass
576 elif compression == ZIP_DEFLATED:
577 if not zlib:
578 raise RuntimeError,\
579 "Compression requires the (missing) zlib module"
580 else:
581 raise RuntimeError, "That compression method is not supported"
583 self._allowZip64 = allowZip64
584 self._didModify = False
585 self.debug = 0 # Level of printing: 0 through 3
586 self.NameToInfo = {} # Find file info given name
587 self.filelist = [] # List of ZipInfo instances for archive
588 self.compression = compression # Method of compression
589 self.mode = key = mode.replace('b', '')[0]
590 self.pwd = None
592 # Check if we were passed a file-like object
593 if isinstance(file, basestring):
594 self._filePassed = 0
595 self.filename = file
596 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
597 try:
598 self.fp = open(file, modeDict[mode])
599 except IOError:
600 if mode == 'a':
601 mode = key = 'w'
602 self.fp = open(file, modeDict[mode])
603 else:
604 raise
605 else:
606 self._filePassed = 1
607 self.fp = file
608 self.filename = getattr(file, 'name', None)
610 if key == 'r':
611 self._GetContents()
612 elif key == 'w':
613 pass
614 elif key == 'a':
615 try: # See if file is a zip file
616 self._RealGetContents()
617 # seek to start of directory and overwrite
618 self.fp.seek(self.start_dir, 0)
619 except BadZipfile: # file is not a zip file, just append
620 self.fp.seek(0, 2)
621 else:
622 if not self._filePassed:
623 self.fp.close()
624 self.fp = None
625 raise RuntimeError, 'Mode must be "r", "w" or "a"'
627 def _GetContents(self):
628 """Read the directory, making sure we close the file if the format
629 is bad."""
630 try:
631 self._RealGetContents()
632 except BadZipfile:
633 if not self._filePassed:
634 self.fp.close()
635 self.fp = None
636 raise
638 def _RealGetContents(self):
639 """Read in the table of contents for the ZIP file."""
640 fp = self.fp
641 endrec = _EndRecData(fp)
642 if not endrec:
643 raise BadZipfile, "File is not a zip file"
644 if self.debug > 1:
645 print endrec
646 size_cd = endrec[5] # bytes in central directory
647 offset_cd = endrec[6] # offset of central directory
648 self.comment = endrec[8] # archive comment
649 # endrec[9] is the offset of the "End of Central Dir" record
650 if endrec[9] > ZIP64_LIMIT:
651 x = endrec[9] - size_cd - 56 - 20
652 else:
653 x = endrec[9] - size_cd
654 # "concat" is zero, unless zip was concatenated to another file
655 concat = x - offset_cd
656 if self.debug > 2:
657 print "given, inferred, offset", offset_cd, x, concat
658 # self.start_dir: Position of start of central directory
659 self.start_dir = offset_cd + concat
660 fp.seek(self.start_dir, 0)
661 data = fp.read(size_cd)
662 fp = cStringIO.StringIO(data)
663 total = 0
664 while total < size_cd:
665 centdir = fp.read(46)
666 total = total + 46
667 if centdir[0:4] != stringCentralDir:
668 raise BadZipfile, "Bad magic number for central directory"
669 centdir = struct.unpack(structCentralDir, centdir)
670 if self.debug > 2:
671 print centdir
672 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
673 # Create ZipInfo instance to store file information
674 x = ZipInfo(filename)
675 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
676 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
677 total = (total + centdir[_CD_FILENAME_LENGTH]
678 + centdir[_CD_EXTRA_FIELD_LENGTH]
679 + centdir[_CD_COMMENT_LENGTH])
680 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
681 (x.create_version, x.create_system, x.extract_version, x.reserved,
682 x.flag_bits, x.compress_type, t, d,
683 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
684 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
685 # Convert date/time code to (year, month, day, hour, min, sec)
686 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
687 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
689 x._decodeExtra()
690 x.header_offset = x.header_offset + concat
691 self.filelist.append(x)
692 self.NameToInfo[x.filename] = x
693 if self.debug > 2:
694 print "total", total
697 def namelist(self):
698 """Return a list of file names in the archive."""
699 l = []
700 for data in self.filelist:
701 l.append(data.filename)
702 return l
704 def infolist(self):
705 """Return a list of class ZipInfo instances for files in the
706 archive."""
707 return self.filelist
709 def printdir(self):
710 """Print a table of contents for the zip file."""
711 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
712 for zinfo in self.filelist:
713 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
714 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
716 def testzip(self):
717 """Read all the files and check the CRC."""
718 for zinfo in self.filelist:
719 try:
720 self.read(zinfo.filename) # Check CRC-32
721 except BadZipfile:
722 return zinfo.filename
725 def getinfo(self, name):
726 """Return the instance of ZipInfo given 'name'."""
727 info = self.NameToInfo.get(name)
728 if info is None:
729 raise KeyError(
730 'There is no item named %r in the archive' % name)
732 return info
734 def setpassword(self, pwd):
735 """Set default password for encrypted files."""
736 self.pwd = pwd
738 def read(self, name, pwd=None):
739 """Return file bytes (as a string) for name."""
740 return self.open(name, "r", pwd).read()
742 def open(self, name, mode="r", pwd=None):
743 """Return file-like object for 'name'."""
744 if mode not in ("r", "U", "rU"):
745 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
746 if not self.fp:
747 raise RuntimeError, \
748 "Attempt to read ZIP archive that was already closed"
750 # Only open a new file for instances where we were not
751 # given a file object in the constructor
752 if self._filePassed:
753 zef_file = self.fp
754 else:
755 zef_file = open(self.filename, 'rb')
757 # Get info object for name
758 zinfo = self.getinfo(name)
760 filepos = zef_file.tell()
762 zef_file.seek(zinfo.header_offset, 0)
764 # Skip the file header:
765 fheader = zef_file.read(30)
766 if fheader[0:4] != stringFileHeader:
767 raise BadZipfile, "Bad magic number for file header"
769 fheader = struct.unpack(structFileHeader, fheader)
770 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
771 if fheader[_FH_EXTRA_FIELD_LENGTH]:
772 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
774 if fname != zinfo.orig_filename:
775 raise BadZipfile, \
776 'File name in directory "%s" and header "%s" differ.' % (
777 zinfo.orig_filename, fname)
779 # check for encrypted flag & handle password
780 is_encrypted = zinfo.flag_bits & 0x1
781 zd = None
782 if is_encrypted:
783 if not pwd:
784 pwd = self.pwd
785 if not pwd:
786 raise RuntimeError, "File %s is encrypted, " \
787 "password required for extraction" % name
789 zd = _ZipDecrypter(pwd)
790 # The first 12 bytes in the cypher stream is an encryption header
791 # used to strengthen the algorithm. The first 11 bytes are
792 # completely random, while the 12th contains the MSB of the CRC,
793 # and is used to check the correctness of the password.
794 bytes = zef_file.read(12)
795 h = map(zd, bytes[0:12])
796 if ord(h[11]) != ((zinfo.CRC>>24)&255):
797 raise RuntimeError, "Bad password for file %s" % name
799 # build and return a ZipExtFile
800 if zd is None:
801 zef = ZipExtFile(zef_file, zinfo)
802 else:
803 zef = ZipExtFile(zef_file, zinfo, zd)
805 # set universal newlines on ZipExtFile if necessary
806 if "U" in mode:
807 zef.set_univ_newlines(True)
808 return zef
810 def _writecheck(self, zinfo):
811 """Check for errors before writing a file to the archive."""
812 if zinfo.filename in self.NameToInfo:
813 if self.debug: # Warning for duplicate names
814 print "Duplicate name:", zinfo.filename
815 if self.mode not in ("w", "a"):
816 raise RuntimeError, 'write() requires mode "w" or "a"'
817 if not self.fp:
818 raise RuntimeError, \
819 "Attempt to write ZIP archive that was already closed"
820 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
821 raise RuntimeError, \
822 "Compression requires the (missing) zlib module"
823 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
824 raise RuntimeError, \
825 "That compression method is not supported"
826 if zinfo.file_size > ZIP64_LIMIT:
827 if not self._allowZip64:
828 raise LargeZipFile("Filesize would require ZIP64 extensions")
829 if zinfo.header_offset > ZIP64_LIMIT:
830 if not self._allowZip64:
831 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
833 def write(self, filename, arcname=None, compress_type=None):
834 """Put the bytes from filename into the archive under the name
835 arcname."""
836 if not self.fp:
837 raise RuntimeError(
838 "Attempt to write to ZIP archive that was already closed")
840 st = os.stat(filename)
841 mtime = time.localtime(st.st_mtime)
842 date_time = mtime[0:6]
843 # Create ZipInfo instance to store file information
844 if arcname is None:
845 arcname = filename
846 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
847 while arcname[0] in (os.sep, os.altsep):
848 arcname = arcname[1:]
849 zinfo = ZipInfo(arcname, date_time)
850 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
851 if compress_type is None:
852 zinfo.compress_type = self.compression
853 else:
854 zinfo.compress_type = compress_type
856 zinfo.file_size = st.st_size
857 zinfo.flag_bits = 0x00
858 zinfo.header_offset = self.fp.tell() # Start of header bytes
860 self._writecheck(zinfo)
861 self._didModify = True
862 fp = open(filename, "rb")
863 # Must overwrite CRC and sizes with correct data later
864 zinfo.CRC = CRC = 0
865 zinfo.compress_size = compress_size = 0
866 zinfo.file_size = file_size = 0
867 self.fp.write(zinfo.FileHeader())
868 if zinfo.compress_type == ZIP_DEFLATED:
869 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
870 zlib.DEFLATED, -15)
871 else:
872 cmpr = None
873 while 1:
874 buf = fp.read(1024 * 8)
875 if not buf:
876 break
877 file_size = file_size + len(buf)
878 CRC = binascii.crc32(buf, CRC)
879 if cmpr:
880 buf = cmpr.compress(buf)
881 compress_size = compress_size + len(buf)
882 self.fp.write(buf)
883 fp.close()
884 if cmpr:
885 buf = cmpr.flush()
886 compress_size = compress_size + len(buf)
887 self.fp.write(buf)
888 zinfo.compress_size = compress_size
889 else:
890 zinfo.compress_size = file_size
891 zinfo.CRC = CRC
892 zinfo.file_size = file_size
893 # Seek backwards and write CRC and file sizes
894 position = self.fp.tell() # Preserve current position in file
895 self.fp.seek(zinfo.header_offset + 14, 0)
896 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
897 zinfo.file_size))
898 self.fp.seek(position, 0)
899 self.filelist.append(zinfo)
900 self.NameToInfo[zinfo.filename] = zinfo
902 def writestr(self, zinfo_or_arcname, bytes):
903 """Write a file into the archive. The contents is the string
904 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
905 the name of the file in the archive."""
906 if not isinstance(zinfo_or_arcname, ZipInfo):
907 zinfo = ZipInfo(filename=zinfo_or_arcname,
908 date_time=time.localtime(time.time()))
909 zinfo.compress_type = self.compression
910 else:
911 zinfo = zinfo_or_arcname
913 if not self.fp:
914 raise RuntimeError(
915 "Attempt to write to ZIP archive that was already closed")
917 zinfo.file_size = len(bytes) # Uncompressed size
918 zinfo.header_offset = self.fp.tell() # Start of header bytes
919 self._writecheck(zinfo)
920 self._didModify = True
921 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
922 if zinfo.compress_type == ZIP_DEFLATED:
923 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
924 zlib.DEFLATED, -15)
925 bytes = co.compress(bytes) + co.flush()
926 zinfo.compress_size = len(bytes) # Compressed size
927 else:
928 zinfo.compress_size = zinfo.file_size
929 zinfo.header_offset = self.fp.tell() # Start of header bytes
930 self.fp.write(zinfo.FileHeader())
931 self.fp.write(bytes)
932 self.fp.flush()
933 if zinfo.flag_bits & 0x08:
934 # Write CRC and file sizes after the file data
935 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
936 zinfo.file_size))
937 self.filelist.append(zinfo)
938 self.NameToInfo[zinfo.filename] = zinfo
940 def __del__(self):
941 """Call the "close()" method in case the user forgot."""
942 self.close()
944 def close(self):
945 """Close the file, and for mode "w" and "a" write the ending
946 records."""
947 if self.fp is None:
948 return
950 if self.mode in ("w", "a") and self._didModify: # write ending records
951 count = 0
952 pos1 = self.fp.tell()
953 for zinfo in self.filelist: # write central directory
954 count = count + 1
955 dt = zinfo.date_time
956 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
957 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
958 extra = []
959 if zinfo.file_size > ZIP64_LIMIT \
960 or zinfo.compress_size > ZIP64_LIMIT:
961 extra.append(zinfo.file_size)
962 extra.append(zinfo.compress_size)
963 file_size = 0xffffffff #-1
964 compress_size = 0xffffffff #-1
965 else:
966 file_size = zinfo.file_size
967 compress_size = zinfo.compress_size
969 if zinfo.header_offset > ZIP64_LIMIT:
970 extra.append(zinfo.header_offset)
971 header_offset = -1 # struct "l" format: 32 one bits
972 else:
973 header_offset = zinfo.header_offset
975 extra_data = zinfo.extra
976 if extra:
977 # Append a ZIP64 field to the extra's
978 extra_data = struct.pack(
979 '<hh' + 'q'*len(extra),
980 1, 8*len(extra), *extra) + extra_data
982 extract_version = max(45, zinfo.extract_version)
983 create_version = max(45, zinfo.create_version)
984 else:
985 extract_version = zinfo.extract_version
986 create_version = zinfo.create_version
988 centdir = struct.pack(structCentralDir,
989 stringCentralDir, create_version,
990 zinfo.create_system, extract_version, zinfo.reserved,
991 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
992 zinfo.CRC, compress_size, file_size,
993 len(zinfo.filename), len(extra_data), len(zinfo.comment),
994 0, zinfo.internal_attr, zinfo.external_attr,
995 header_offset)
996 self.fp.write(centdir)
997 self.fp.write(zinfo.filename)
998 self.fp.write(extra_data)
999 self.fp.write(zinfo.comment)
1001 pos2 = self.fp.tell()
1002 # Write end-of-zip-archive record
1003 if pos1 > ZIP64_LIMIT:
1004 # Need to write the ZIP64 end-of-archive records
1005 zip64endrec = struct.pack(
1006 structEndArchive64, stringEndArchive64,
1007 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1008 self.fp.write(zip64endrec)
1010 zip64locrec = struct.pack(
1011 structEndArchive64Locator,
1012 stringEndArchive64Locator, 0, pos2, 1)
1013 self.fp.write(zip64locrec)
1015 # XXX Why is `pos3` computed next? It's never referenced.
1016 pos3 = self.fp.tell()
1017 endrec = struct.pack(structEndArchive, stringEndArchive,
1018 0, 0, count, count, pos2 - pos1, -1, 0)
1019 self.fp.write(endrec)
1021 else:
1022 endrec = struct.pack(structEndArchive, stringEndArchive,
1023 0, 0, count, count, pos2 - pos1, pos1, 0)
1024 self.fp.write(endrec)
1025 self.fp.flush()
1026 if not self._filePassed:
1027 self.fp.close()
1028 self.fp = None
1031 class PyZipFile(ZipFile):
1032 """Class to create ZIP archives with Python library files and packages."""
1034 def writepy(self, pathname, basename = ""):
1035 """Add all files from "pathname" to the ZIP archive.
1037 If pathname is a package directory, search the directory and
1038 all package subdirectories recursively for all *.py and enter
1039 the modules into the archive. If pathname is a plain
1040 directory, listdir *.py and enter all modules. Else, pathname
1041 must be a Python *.py file and the module will be put into the
1042 archive. Added modules are always module.pyo or module.pyc.
1043 This method will compile the module.py into module.pyc if
1044 necessary.
1046 dir, name = os.path.split(pathname)
1047 if os.path.isdir(pathname):
1048 initname = os.path.join(pathname, "__init__.py")
1049 if os.path.isfile(initname):
1050 # This is a package directory, add it
1051 if basename:
1052 basename = "%s/%s" % (basename, name)
1053 else:
1054 basename = name
1055 if self.debug:
1056 print "Adding package in", pathname, "as", basename
1057 fname, arcname = self._get_codename(initname[0:-3], basename)
1058 if self.debug:
1059 print "Adding", arcname
1060 self.write(fname, arcname)
1061 dirlist = os.listdir(pathname)
1062 dirlist.remove("__init__.py")
1063 # Add all *.py files and package subdirectories
1064 for filename in dirlist:
1065 path = os.path.join(pathname, filename)
1066 root, ext = os.path.splitext(filename)
1067 if os.path.isdir(path):
1068 if os.path.isfile(os.path.join(path, "__init__.py")):
1069 # This is a package directory, add it
1070 self.writepy(path, basename) # Recursive call
1071 elif ext == ".py":
1072 fname, arcname = self._get_codename(path[0:-3],
1073 basename)
1074 if self.debug:
1075 print "Adding", arcname
1076 self.write(fname, arcname)
1077 else:
1078 # This is NOT a package directory, add its files at top level
1079 if self.debug:
1080 print "Adding files from directory", pathname
1081 for filename in os.listdir(pathname):
1082 path = os.path.join(pathname, filename)
1083 root, ext = os.path.splitext(filename)
1084 if ext == ".py":
1085 fname, arcname = self._get_codename(path[0:-3],
1086 basename)
1087 if self.debug:
1088 print "Adding", arcname
1089 self.write(fname, arcname)
1090 else:
1091 if pathname[-3:] != ".py":
1092 raise RuntimeError, \
1093 'Files added with writepy() must end with ".py"'
1094 fname, arcname = self._get_codename(pathname[0:-3], basename)
1095 if self.debug:
1096 print "Adding file", arcname
1097 self.write(fname, arcname)
1099 def _get_codename(self, pathname, basename):
1100 """Return (filename, archivename) for the path.
1102 Given a module name path, return the correct file path and
1103 archive name, compiling if necessary. For example, given
1104 /python/lib/string, return (/python/lib/string.pyc, string).
1106 file_py = pathname + ".py"
1107 file_pyc = pathname + ".pyc"
1108 file_pyo = pathname + ".pyo"
1109 if os.path.isfile(file_pyo) and \
1110 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1111 fname = file_pyo # Use .pyo file
1112 elif not os.path.isfile(file_pyc) or \
1113 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1114 import py_compile
1115 if self.debug:
1116 print "Compiling", file_py
1117 try:
1118 py_compile.compile(file_py, file_pyc, None, True)
1119 except py_compile.PyCompileError,err:
1120 print err.msg
1121 fname = file_pyc
1122 else:
1123 fname = file_pyc
1124 archivename = os.path.split(fname)[1]
1125 if basename:
1126 archivename = "%s/%s" % (basename, archivename)
1127 return (fname, archivename)
1130 def main(args = None):
1131 import textwrap
1132 USAGE=textwrap.dedent("""\
1133 Usage:
1134 zipfile.py -l zipfile.zip # Show listing of a zipfile
1135 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1136 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1137 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1138 """)
1139 if args is None:
1140 args = sys.argv[1:]
1142 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1143 print USAGE
1144 sys.exit(1)
1146 if args[0] == '-l':
1147 if len(args) != 2:
1148 print USAGE
1149 sys.exit(1)
1150 zf = ZipFile(args[1], 'r')
1151 zf.printdir()
1152 zf.close()
1154 elif args[0] == '-t':
1155 if len(args) != 2:
1156 print USAGE
1157 sys.exit(1)
1158 zf = ZipFile(args[1], 'r')
1159 zf.testzip()
1160 print "Done testing"
1162 elif args[0] == '-e':
1163 if len(args) != 3:
1164 print USAGE
1165 sys.exit(1)
1167 zf = ZipFile(args[1], 'r')
1168 out = args[2]
1169 for path in zf.namelist():
1170 if path.startswith('./'):
1171 tgt = os.path.join(out, path[2:])
1172 else:
1173 tgt = os.path.join(out, path)
1175 tgtdir = os.path.dirname(tgt)
1176 if not os.path.exists(tgtdir):
1177 os.makedirs(tgtdir)
1178 fp = open(tgt, 'wb')
1179 fp.write(zf.read(path))
1180 fp.close()
1181 zf.close()
1183 elif args[0] == '-c':
1184 if len(args) < 3:
1185 print USAGE
1186 sys.exit(1)
1188 def addToZip(zf, path, zippath):
1189 if os.path.isfile(path):
1190 zf.write(path, zippath, ZIP_DEFLATED)
1191 elif os.path.isdir(path):
1192 for nm in os.listdir(path):
1193 addToZip(zf,
1194 os.path.join(path, nm), os.path.join(zippath, nm))
1195 # else: ignore
1197 zf = ZipFile(args[1], 'w', allowZip64=True)
1198 for src in args[2:]:
1199 addToZip(zf, src, os.path.basename(src))
1201 zf.close()
1203 if __name__ == "__main__":
1204 main()