Instead of doing a make test, run the regression tests out of the installed
[python.git] / Lib / zipfile.py
blob865267a7ccc6c71a3b608a1bc8c82bdb2c794794
1 """
2 Read and write ZIP files.
3 """
4 import struct, os, time, sys
5 import binascii, cStringIO
7 try:
8 import zlib # We may need its compression method
9 except ImportError:
10 zlib = None
12 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
13 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
15 class BadZipfile(Exception):
16 pass
19 class LargeZipFile(Exception):
20 """
21 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
25 error = BadZipfile # The exception raised by this module
27 ZIP64_LIMIT= (1 << 31) - 1
29 # constants for Zip file compression methods
30 ZIP_STORED = 0
31 ZIP_DEFLATED = 8
32 # Other ZIP compression methods not supported
34 # Here are some struct module formats for reading headers
35 structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36 stringEndArchive = "PK\005\006" # magic number for end of archive record
37 structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
38 stringCentralDir = "PK\001\002" # magic number for central directory
39 structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
40 stringFileHeader = "PK\003\004" # magic number for file header
41 structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42 stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43 structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44 stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
47 # indexes of entries in the central directory structure
48 _CD_SIGNATURE = 0
49 _CD_CREATE_VERSION = 1
50 _CD_CREATE_SYSTEM = 2
51 _CD_EXTRACT_VERSION = 3
52 _CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53 _CD_FLAG_BITS = 5
54 _CD_COMPRESS_TYPE = 6
55 _CD_TIME = 7
56 _CD_DATE = 8
57 _CD_CRC = 9
58 _CD_COMPRESSED_SIZE = 10
59 _CD_UNCOMPRESSED_SIZE = 11
60 _CD_FILENAME_LENGTH = 12
61 _CD_EXTRA_FIELD_LENGTH = 13
62 _CD_COMMENT_LENGTH = 14
63 _CD_DISK_NUMBER_START = 15
64 _CD_INTERNAL_FILE_ATTRIBUTES = 16
65 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
66 _CD_LOCAL_HEADER_OFFSET = 18
68 # indexes of entries in the local file header structure
69 _FH_SIGNATURE = 0
70 _FH_EXTRACT_VERSION = 1
71 _FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
73 _FH_COMPRESSION_METHOD = 4
74 _FH_LAST_MOD_TIME = 5
75 _FH_LAST_MOD_DATE = 6
76 _FH_CRC = 7
77 _FH_COMPRESSED_SIZE = 8
78 _FH_UNCOMPRESSED_SIZE = 9
79 _FH_FILENAME_LENGTH = 10
80 _FH_EXTRA_FIELD_LENGTH = 11
82 def is_zipfile(filename):
83 """Quickly see if file is a ZIP file by checking the magic number."""
84 try:
85 fpin = open(filename, "rb")
86 endrec = _EndRecData(fpin)
87 fpin.close()
88 if endrec:
89 return True # file has correct magic number
90 except IOError:
91 pass
92 return False
94 def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
102 if sig != stringEndArchive64Locator:
103 return endrec
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
108 # Assume no 'zip64 extensible data'
109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
115 if sig != stringEndArchive64:
116 return endrec
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
128 def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
163 return endrec
164 return # Error, return None
167 class ZipInfo (object):
168 """Class with attributes describing each file in the ZIP archive."""
170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
192 self.orig_filename = filename # Original file name in archive
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
203 filename = filename.replace(os.sep, "/")
205 self.filename = filename # Normalized file name
206 self.date_time = date_time # year, month, day, hour, min, sec
207 # Standard values:
208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
223 # Other attributes are set by class ZipFile:
224 # header_offset Byte offset to the file header
225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
229 def FileHeader(self):
230 """Return the per-file header as a string."""
231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
234 if self.flag_bits & 0x08:
235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
237 else:
238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
242 extra = self.extra
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
259 len(self.filename), len(extra))
260 return header + self.filename + extra
262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
280 idx = 0
282 # ZIP64 extension (large files and/or large archives)
283 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
284 self.file_size = counts[idx]
285 idx += 1
287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
288 self.compress_size = counts[idx]
289 idx += 1
291 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
296 extra = extra[ln+4:]
299 class ZipFile:
300 """ Class with methods to open, read, write, close, list zip files.
302 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
304 file: Either the path to the file, or a file-like object.
305 If it is a path, the file will be opened and closed by ZipFile.
306 mode: The mode can be either read "r", write "w" or append "a".
307 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
308 allowZip64: if True ZipFile will create files with ZIP64 extensions when
309 needed, otherwise it will raise an exception when this would
310 be necessary.
314 fp = None # Set here since __del__ checks it
316 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
317 """Open the ZIP file with mode read "r", write "w" or append "a"."""
318 self._allowZip64 = allowZip64
319 self._didModify = False
320 if compression == ZIP_STORED:
321 pass
322 elif compression == ZIP_DEFLATED:
323 if not zlib:
324 raise RuntimeError,\
325 "Compression requires the (missing) zlib module"
326 else:
327 raise RuntimeError, "That compression method is not supported"
328 self.debug = 0 # Level of printing: 0 through 3
329 self.NameToInfo = {} # Find file info given name
330 self.filelist = [] # List of ZipInfo instances for archive
331 self.compression = compression # Method of compression
332 self.mode = key = mode.replace('b', '')[0]
334 # Check if we were passed a file-like object
335 if isinstance(file, basestring):
336 self._filePassed = 0
337 self.filename = file
338 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
339 self.fp = open(file, modeDict[mode])
340 else:
341 self._filePassed = 1
342 self.fp = file
343 self.filename = getattr(file, 'name', None)
345 if key == 'r':
346 self._GetContents()
347 elif key == 'w':
348 pass
349 elif key == 'a':
350 try: # See if file is a zip file
351 self._RealGetContents()
352 # seek to start of directory and overwrite
353 self.fp.seek(self.start_dir, 0)
354 except BadZipfile: # file is not a zip file, just append
355 self.fp.seek(0, 2)
356 else:
357 if not self._filePassed:
358 self.fp.close()
359 self.fp = None
360 raise RuntimeError, 'Mode must be "r", "w" or "a"'
362 def _GetContents(self):
363 """Read the directory, making sure we close the file if the format
364 is bad."""
365 try:
366 self._RealGetContents()
367 except BadZipfile:
368 if not self._filePassed:
369 self.fp.close()
370 self.fp = None
371 raise
373 def _RealGetContents(self):
374 """Read in the table of contents for the ZIP file."""
375 fp = self.fp
376 endrec = _EndRecData(fp)
377 if not endrec:
378 raise BadZipfile, "File is not a zip file"
379 if self.debug > 1:
380 print endrec
381 size_cd = endrec[5] # bytes in central directory
382 offset_cd = endrec[6] # offset of central directory
383 self.comment = endrec[8] # archive comment
384 # endrec[9] is the offset of the "End of Central Dir" record
385 if endrec[9] > ZIP64_LIMIT:
386 x = endrec[9] - size_cd - 56 - 20
387 else:
388 x = endrec[9] - size_cd
389 # "concat" is zero, unless zip was concatenated to another file
390 concat = x - offset_cd
391 if self.debug > 2:
392 print "given, inferred, offset", offset_cd, x, concat
393 # self.start_dir: Position of start of central directory
394 self.start_dir = offset_cd + concat
395 fp.seek(self.start_dir, 0)
396 data = fp.read(size_cd)
397 fp = cStringIO.StringIO(data)
398 total = 0
399 while total < size_cd:
400 centdir = fp.read(46)
401 total = total + 46
402 if centdir[0:4] != stringCentralDir:
403 raise BadZipfile, "Bad magic number for central directory"
404 centdir = struct.unpack(structCentralDir, centdir)
405 if self.debug > 2:
406 print centdir
407 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
408 # Create ZipInfo instance to store file information
409 x = ZipInfo(filename)
410 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
411 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
412 total = (total + centdir[_CD_FILENAME_LENGTH]
413 + centdir[_CD_EXTRA_FIELD_LENGTH]
414 + centdir[_CD_COMMENT_LENGTH])
415 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
416 (x.create_version, x.create_system, x.extract_version, x.reserved,
417 x.flag_bits, x.compress_type, t, d,
418 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
419 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
420 # Convert date/time code to (year, month, day, hour, min, sec)
421 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
422 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
424 x._decodeExtra()
425 x.header_offset = x.header_offset + concat
426 self.filelist.append(x)
427 self.NameToInfo[x.filename] = x
428 if self.debug > 2:
429 print "total", total
432 def namelist(self):
433 """Return a list of file names in the archive."""
434 l = []
435 for data in self.filelist:
436 l.append(data.filename)
437 return l
439 def infolist(self):
440 """Return a list of class ZipInfo instances for files in the
441 archive."""
442 return self.filelist
444 def printdir(self):
445 """Print a table of contents for the zip file."""
446 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
447 for zinfo in self.filelist:
448 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
449 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
451 def testzip(self):
452 """Read all the files and check the CRC."""
453 for zinfo in self.filelist:
454 try:
455 self.read(zinfo.filename) # Check CRC-32
456 except BadZipfile:
457 return zinfo.filename
460 def getinfo(self, name):
461 """Return the instance of ZipInfo given 'name'."""
462 return self.NameToInfo[name]
464 def read(self, name):
465 """Return file bytes (as a string) for name."""
466 if self.mode not in ("r", "a"):
467 raise RuntimeError, 'read() requires mode "r" or "a"'
468 if not self.fp:
469 raise RuntimeError, \
470 "Attempt to read ZIP archive that was already closed"
471 zinfo = self.getinfo(name)
472 filepos = self.fp.tell()
474 self.fp.seek(zinfo.header_offset, 0)
476 # Skip the file header:
477 fheader = self.fp.read(30)
478 if fheader[0:4] != stringFileHeader:
479 raise BadZipfile, "Bad magic number for file header"
481 fheader = struct.unpack(structFileHeader, fheader)
482 fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
483 if fheader[_FH_EXTRA_FIELD_LENGTH]:
484 self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
486 if fname != zinfo.orig_filename:
487 raise BadZipfile, \
488 'File name in directory "%s" and header "%s" differ.' % (
489 zinfo.orig_filename, fname)
491 bytes = self.fp.read(zinfo.compress_size)
492 self.fp.seek(filepos, 0)
493 if zinfo.compress_type == ZIP_STORED:
494 pass
495 elif zinfo.compress_type == ZIP_DEFLATED:
496 if not zlib:
497 raise RuntimeError, \
498 "De-compression requires the (missing) zlib module"
499 # zlib compress/decompress code by Jeremy Hylton of CNRI
500 dc = zlib.decompressobj(-15)
501 bytes = dc.decompress(bytes)
502 # need to feed in unused pad byte so that zlib won't choke
503 ex = dc.decompress('Z') + dc.flush()
504 if ex:
505 bytes = bytes + ex
506 else:
507 raise BadZipfile, \
508 "Unsupported compression method %d for file %s" % \
509 (zinfo.compress_type, name)
510 crc = binascii.crc32(bytes)
511 if crc != zinfo.CRC:
512 raise BadZipfile, "Bad CRC-32 for file %s" % name
513 return bytes
515 def _writecheck(self, zinfo):
516 """Check for errors before writing a file to the archive."""
517 if zinfo.filename in self.NameToInfo:
518 if self.debug: # Warning for duplicate names
519 print "Duplicate name:", zinfo.filename
520 if self.mode not in ("w", "a"):
521 raise RuntimeError, 'write() requires mode "w" or "a"'
522 if not self.fp:
523 raise RuntimeError, \
524 "Attempt to write ZIP archive that was already closed"
525 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
526 raise RuntimeError, \
527 "Compression requires the (missing) zlib module"
528 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
529 raise RuntimeError, \
530 "That compression method is not supported"
531 if zinfo.file_size > ZIP64_LIMIT:
532 if not self._allowZip64:
533 raise LargeZipFile("Filesize would require ZIP64 extensions")
534 if zinfo.header_offset > ZIP64_LIMIT:
535 if not self._allowZip64:
536 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
538 def write(self, filename, arcname=None, compress_type=None):
539 """Put the bytes from filename into the archive under the name
540 arcname."""
541 st = os.stat(filename)
542 mtime = time.localtime(st.st_mtime)
543 date_time = mtime[0:6]
544 # Create ZipInfo instance to store file information
545 if arcname is None:
546 arcname = filename
547 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
548 while arcname[0] in (os.sep, os.altsep):
549 arcname = arcname[1:]
550 zinfo = ZipInfo(arcname, date_time)
551 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
552 if compress_type is None:
553 zinfo.compress_type = self.compression
554 else:
555 zinfo.compress_type = compress_type
557 zinfo.file_size = st.st_size
558 zinfo.flag_bits = 0x00
559 zinfo.header_offset = self.fp.tell() # Start of header bytes
561 self._writecheck(zinfo)
562 self._didModify = True
563 fp = open(filename, "rb")
564 # Must overwrite CRC and sizes with correct data later
565 zinfo.CRC = CRC = 0
566 zinfo.compress_size = compress_size = 0
567 zinfo.file_size = file_size = 0
568 self.fp.write(zinfo.FileHeader())
569 if zinfo.compress_type == ZIP_DEFLATED:
570 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
571 zlib.DEFLATED, -15)
572 else:
573 cmpr = None
574 while 1:
575 buf = fp.read(1024 * 8)
576 if not buf:
577 break
578 file_size = file_size + len(buf)
579 CRC = binascii.crc32(buf, CRC)
580 if cmpr:
581 buf = cmpr.compress(buf)
582 compress_size = compress_size + len(buf)
583 self.fp.write(buf)
584 fp.close()
585 if cmpr:
586 buf = cmpr.flush()
587 compress_size = compress_size + len(buf)
588 self.fp.write(buf)
589 zinfo.compress_size = compress_size
590 else:
591 zinfo.compress_size = file_size
592 zinfo.CRC = CRC
593 zinfo.file_size = file_size
594 # Seek backwards and write CRC and file sizes
595 position = self.fp.tell() # Preserve current position in file
596 self.fp.seek(zinfo.header_offset + 14, 0)
597 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
598 zinfo.file_size))
599 self.fp.seek(position, 0)
600 self.filelist.append(zinfo)
601 self.NameToInfo[zinfo.filename] = zinfo
603 def writestr(self, zinfo_or_arcname, bytes):
604 """Write a file into the archive. The contents is the string
605 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
606 the name of the file in the archive."""
607 if not isinstance(zinfo_or_arcname, ZipInfo):
608 zinfo = ZipInfo(filename=zinfo_or_arcname,
609 date_time=time.localtime(time.time()))
610 zinfo.compress_type = self.compression
611 else:
612 zinfo = zinfo_or_arcname
613 zinfo.file_size = len(bytes) # Uncompressed size
614 zinfo.header_offset = self.fp.tell() # Start of header bytes
615 self._writecheck(zinfo)
616 self._didModify = True
617 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
618 if zinfo.compress_type == ZIP_DEFLATED:
619 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
620 zlib.DEFLATED, -15)
621 bytes = co.compress(bytes) + co.flush()
622 zinfo.compress_size = len(bytes) # Compressed size
623 else:
624 zinfo.compress_size = zinfo.file_size
625 zinfo.header_offset = self.fp.tell() # Start of header bytes
626 self.fp.write(zinfo.FileHeader())
627 self.fp.write(bytes)
628 self.fp.flush()
629 if zinfo.flag_bits & 0x08:
630 # Write CRC and file sizes after the file data
631 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
632 zinfo.file_size))
633 self.filelist.append(zinfo)
634 self.NameToInfo[zinfo.filename] = zinfo
636 def __del__(self):
637 """Call the "close()" method in case the user forgot."""
638 self.close()
640 def close(self):
641 """Close the file, and for mode "w" and "a" write the ending
642 records."""
643 if self.fp is None:
644 return
646 if self.mode in ("w", "a") and self._didModify: # write ending records
647 count = 0
648 pos1 = self.fp.tell()
649 for zinfo in self.filelist: # write central directory
650 count = count + 1
651 dt = zinfo.date_time
652 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
653 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
654 extra = []
655 if zinfo.file_size > ZIP64_LIMIT \
656 or zinfo.compress_size > ZIP64_LIMIT:
657 extra.append(zinfo.file_size)
658 extra.append(zinfo.compress_size)
659 file_size = 0xffffffff #-1
660 compress_size = 0xffffffff #-1
661 else:
662 file_size = zinfo.file_size
663 compress_size = zinfo.compress_size
665 if zinfo.header_offset > ZIP64_LIMIT:
666 extra.append(zinfo.header_offset)
667 header_offset = 0xffffffff #-1
668 else:
669 header_offset = zinfo.header_offset
671 extra_data = zinfo.extra
672 if extra:
673 # Append a ZIP64 field to the extra's
674 extra_data = struct.pack(
675 '<hh' + 'q'*len(extra),
676 1, 8*len(extra), *extra) + extra_data
678 extract_version = max(45, zinfo.extract_version)
679 create_version = max(45, zinfo.create_version)
680 else:
681 extract_version = zinfo.extract_version
682 create_version = zinfo.create_version
684 centdir = struct.pack(structCentralDir,
685 stringCentralDir, create_version,
686 zinfo.create_system, extract_version, zinfo.reserved,
687 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
688 zinfo.CRC, compress_size, file_size,
689 len(zinfo.filename), len(extra_data), len(zinfo.comment),
690 0, zinfo.internal_attr, zinfo.external_attr,
691 header_offset)
692 self.fp.write(centdir)
693 self.fp.write(zinfo.filename)
694 self.fp.write(extra_data)
695 self.fp.write(zinfo.comment)
697 pos2 = self.fp.tell()
698 # Write end-of-zip-archive record
699 if pos1 > ZIP64_LIMIT:
700 # Need to write the ZIP64 end-of-archive records
701 zip64endrec = struct.pack(
702 structEndArchive64, stringEndArchive64,
703 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
704 self.fp.write(zip64endrec)
706 zip64locrec = struct.pack(
707 structEndArchive64Locator,
708 stringEndArchive64Locator, 0, pos2, 1)
709 self.fp.write(zip64locrec)
711 pos3 = self.fp.tell()
712 endrec = struct.pack(structEndArchive, stringEndArchive,
713 0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
714 self.fp.write(endrec)
716 else:
717 endrec = struct.pack(structEndArchive, stringEndArchive,
718 0, 0, count, count, pos2 - pos1, pos1, 0)
719 self.fp.write(endrec)
720 self.fp.flush()
721 if not self._filePassed:
722 self.fp.close()
723 self.fp = None
726 class PyZipFile(ZipFile):
727 """Class to create ZIP archives with Python library files and packages."""
729 def writepy(self, pathname, basename = ""):
730 """Add all files from "pathname" to the ZIP archive.
732 If pathname is a package directory, search the directory and
733 all package subdirectories recursively for all *.py and enter
734 the modules into the archive. If pathname is a plain
735 directory, listdir *.py and enter all modules. Else, pathname
736 must be a Python *.py file and the module will be put into the
737 archive. Added modules are always module.pyo or module.pyc.
738 This method will compile the module.py into module.pyc if
739 necessary.
741 dir, name = os.path.split(pathname)
742 if os.path.isdir(pathname):
743 initname = os.path.join(pathname, "__init__.py")
744 if os.path.isfile(initname):
745 # This is a package directory, add it
746 if basename:
747 basename = "%s/%s" % (basename, name)
748 else:
749 basename = name
750 if self.debug:
751 print "Adding package in", pathname, "as", basename
752 fname, arcname = self._get_codename(initname[0:-3], basename)
753 if self.debug:
754 print "Adding", arcname
755 self.write(fname, arcname)
756 dirlist = os.listdir(pathname)
757 dirlist.remove("__init__.py")
758 # Add all *.py files and package subdirectories
759 for filename in dirlist:
760 path = os.path.join(pathname, filename)
761 root, ext = os.path.splitext(filename)
762 if os.path.isdir(path):
763 if os.path.isfile(os.path.join(path, "__init__.py")):
764 # This is a package directory, add it
765 self.writepy(path, basename) # Recursive call
766 elif ext == ".py":
767 fname, arcname = self._get_codename(path[0:-3],
768 basename)
769 if self.debug:
770 print "Adding", arcname
771 self.write(fname, arcname)
772 else:
773 # This is NOT a package directory, add its files at top level
774 if self.debug:
775 print "Adding files from directory", pathname
776 for filename in os.listdir(pathname):
777 path = os.path.join(pathname, filename)
778 root, ext = os.path.splitext(filename)
779 if ext == ".py":
780 fname, arcname = self._get_codename(path[0:-3],
781 basename)
782 if self.debug:
783 print "Adding", arcname
784 self.write(fname, arcname)
785 else:
786 if pathname[-3:] != ".py":
787 raise RuntimeError, \
788 'Files added with writepy() must end with ".py"'
789 fname, arcname = self._get_codename(pathname[0:-3], basename)
790 if self.debug:
791 print "Adding file", arcname
792 self.write(fname, arcname)
794 def _get_codename(self, pathname, basename):
795 """Return (filename, archivename) for the path.
797 Given a module name path, return the correct file path and
798 archive name, compiling if necessary. For example, given
799 /python/lib/string, return (/python/lib/string.pyc, string).
801 file_py = pathname + ".py"
802 file_pyc = pathname + ".pyc"
803 file_pyo = pathname + ".pyo"
804 if os.path.isfile(file_pyo) and \
805 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
806 fname = file_pyo # Use .pyo file
807 elif not os.path.isfile(file_pyc) or \
808 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
809 import py_compile
810 if self.debug:
811 print "Compiling", file_py
812 try:
813 py_compile.compile(file_py, file_pyc, None, True)
814 except py_compile.PyCompileError,err:
815 print err.msg
816 fname = file_pyc
817 else:
818 fname = file_pyc
819 archivename = os.path.split(fname)[1]
820 if basename:
821 archivename = "%s/%s" % (basename, archivename)
822 return (fname, archivename)
825 def main(args = None):
826 import textwrap
827 USAGE=textwrap.dedent("""\
828 Usage:
829 zipfile.py -l zipfile.zip # Show listing of a zipfile
830 zipfile.py -t zipfile.zip # Test if a zipfile is valid
831 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
832 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
833 """)
834 if args is None:
835 args = sys.argv[1:]
837 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
838 print USAGE
839 sys.exit(1)
841 if args[0] == '-l':
842 if len(args) != 2:
843 print USAGE
844 sys.exit(1)
845 zf = ZipFile(args[1], 'r')
846 zf.printdir()
847 zf.close()
849 elif args[0] == '-t':
850 if len(args) != 2:
851 print USAGE
852 sys.exit(1)
853 zf = ZipFile(args[1], 'r')
854 zf.testzip()
855 print "Done testing"
857 elif args[0] == '-e':
858 if len(args) != 3:
859 print USAGE
860 sys.exit(1)
862 zf = ZipFile(args[1], 'r')
863 out = args[2]
864 for path in zf.namelist():
865 if path.startswith('./'):
866 tgt = os.path.join(out, path[2:])
867 else:
868 tgt = os.path.join(out, path)
870 tgtdir = os.path.dirname(tgt)
871 if not os.path.exists(tgtdir):
872 os.makedirs(tgtdir)
873 fp = open(tgt, 'wb')
874 fp.write(zf.read(path))
875 fp.close()
876 zf.close()
878 elif args[0] == '-c':
879 if len(args) < 3:
880 print USAGE
881 sys.exit(1)
883 def addToZip(zf, path, zippath):
884 if os.path.isfile(path):
885 zf.write(path, zippath, ZIP_DEFLATED)
886 elif os.path.isdir(path):
887 for nm in os.listdir(path):
888 addToZip(zf,
889 os.path.join(path, nm), os.path.join(zippath, nm))
890 # else: ignore
892 zf = ZipFile(args[1], 'w', allowZip64=True)
893 for src in args[2:]:
894 addToZip(zf, src, os.path.basename(src))
896 zf.close()
898 if __name__ == "__main__":
899 main()