1 "Read and write ZIP files."
3 import struct
, os
, time
, sys
7 import zlib
# We may need its compression method
11 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12 "ZipInfo", "ZipFile", "PyZipFile"]
14 class BadZipfile(Exception):
16 error
= BadZipfile
# The exception raised by this module
18 # constants for Zip file compression methods
21 # Other ZIP compression methods not supported
23 # Here are some struct module formats for reading headers
24 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
25 stringEndArchive
= "PK\005\006" # magic number for end of archive record
26 structCentralDir
= "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
27 stringCentralDir
= "PK\001\002" # magic number for central directory
28 structFileHeader
= "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
29 stringFileHeader
= "PK\003\004" # magic number for file header
31 # indexes of entries in the central directory structure
33 _CD_CREATE_VERSION
= 1
35 _CD_EXTRACT_VERSION
= 3
36 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
42 _CD_COMPRESSED_SIZE
= 10
43 _CD_UNCOMPRESSED_SIZE
= 11
44 _CD_FILENAME_LENGTH
= 12
45 _CD_EXTRA_FIELD_LENGTH
= 13
46 _CD_COMMENT_LENGTH
= 14
47 _CD_DISK_NUMBER_START
= 15
48 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
49 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
50 _CD_LOCAL_HEADER_OFFSET
= 18
52 # indexes of entries in the local file header structure
54 _FH_EXTRACT_VERSION
= 1
55 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
56 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
57 _FH_COMPRESSION_METHOD
= 4
61 _FH_COMPRESSED_SIZE
= 8
62 _FH_UNCOMPRESSED_SIZE
= 9
63 _FH_FILENAME_LENGTH
= 10
64 _FH_EXTRA_FIELD_LENGTH
= 11
66 def is_zipfile(filename
):
67 """Quickly see if file is a ZIP file by checking the magic number."""
69 fpin
= open(filename
, "rb")
70 endrec
= _EndRecData(fpin
)
73 return True # file has correct magic number
78 def _EndRecData(fpin
):
79 """Return data from the "End of Central Directory" record, or None.
81 The data is a list of the nine items in the ZIP "End of central dir"
82 record followed by a tenth item, the file seek offset of this record."""
83 fpin
.seek(-22, 2) # Assume no archive comment.
84 filesize
= fpin
.tell() + 22 # Get file size
86 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
87 endrec
= struct
.unpack(structEndArchive
, data
)
89 endrec
.append("") # Append the archive comment
90 endrec
.append(filesize
- 22) # Append the record start offset
92 # Search the last END_BLOCK bytes of the file for the record signature.
93 # The comment is appended to the ZIP file and has a 16 bit length.
94 # So the comment may be up to 64K long. We limit the search for the
95 # signature to a few Kbytes at the end of the file for efficiency.
96 # also, the signature must not appear in the comment.
97 END_BLOCK
= min(filesize
, 1024 * 4)
98 fpin
.seek(filesize
- END_BLOCK
, 0)
100 start
= data
.rfind(stringEndArchive
)
101 if start
>= 0: # Correct signature string was found
102 endrec
= struct
.unpack(structEndArchive
, data
[start
:start
+22])
103 endrec
= list(endrec
)
104 comment
= data
[start
+22:]
105 if endrec
[7] == len(comment
): # Comment length checks out
106 # Append the archive comment and start offset
107 endrec
.append(comment
)
108 endrec
.append(filesize
- END_BLOCK
+ start
)
110 return # Error, return None
114 """Class with attributes describing each file in the ZIP archive."""
116 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
117 self
.orig_filename
= filename
# Original file name in archive
118 # Terminate the file name at the first null byte. Null bytes in file
119 # names are used as tricks by viruses in archives.
120 null_byte
= filename
.find(chr(0))
122 filename
= filename
[0:null_byte
]
123 # This is used to ensure paths in generated ZIP files always use
124 # forward slashes as the directory separator, as required by the
125 # ZIP format specification.
127 filename
= filename
.replace(os
.sep
, "/")
128 self
.filename
= filename
# Normalized file name
129 self
.date_time
= date_time
# year, month, day, hour, min, sec
131 self
.compress_type
= ZIP_STORED
# Type of compression for the file
132 self
.comment
= "" # Comment for each file
133 self
.extra
= "" # ZIP extra data
134 if sys
.platform
== 'win32':
135 self
.create_system
= 0 # System which created ZIP archive
137 # Assume everything else is unix-y
138 self
.create_system
= 3 # System which created ZIP archive
139 self
.create_version
= 20 # Version which created ZIP archive
140 self
.extract_version
= 20 # Version needed to extract archive
141 self
.reserved
= 0 # Must be zero
142 self
.flag_bits
= 0 # ZIP flag bits
143 self
.volume
= 0 # Volume number of file header
144 self
.internal_attr
= 0 # Internal attributes
145 self
.external_attr
= 0 # External file attributes
146 # Other attributes are set by class ZipFile:
147 # header_offset Byte offset to the file header
148 # file_offset Byte offset to the start of the file data
149 # CRC CRC-32 of the uncompressed file
150 # compress_size Size of the compressed file
151 # file_size Size of the uncompressed file
153 def FileHeader(self
):
154 """Return the per-file header as a string."""
156 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
157 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
158 if self
.flag_bits
& 0x08:
159 # Set these to zero because we write them after the file data
160 CRC
= compress_size
= file_size
= 0
163 compress_size
= self
.compress_size
164 file_size
= self
.file_size
165 header
= struct
.pack(structFileHeader
, stringFileHeader
,
166 self
.extract_version
, self
.reserved
, self
.flag_bits
,
167 self
.compress_type
, dostime
, dosdate
, CRC
,
168 compress_size
, file_size
,
169 len(self
.filename
), len(self
.extra
))
170 return header
+ self
.filename
+ self
.extra
174 """ Class with methods to open, read, write, close, list zip files.
176 z = ZipFile(file, mode="r", compression=ZIP_STORED)
178 file: Either the path to the file, or a file-like object.
179 If it is a path, the file will be opened and closed by ZipFile.
180 mode: The mode can be either read "r", write "w" or append "a".
181 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
184 fp
= None # Set here since __del__ checks it
186 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
187 """Open the ZIP file with mode read "r", write "w" or append "a"."""
188 if compression
== ZIP_STORED
:
190 elif compression
== ZIP_DEFLATED
:
193 "Compression requires the (missing) zlib module"
195 raise RuntimeError, "That compression method is not supported"
196 self
.debug
= 0 # Level of printing: 0 through 3
197 self
.NameToInfo
= {} # Find file info given name
198 self
.filelist
= [] # List of ZipInfo instances for archive
199 self
.compression
= compression
# Method of compression
200 self
.mode
= key
= mode
.replace('b', '')[0]
202 # Check if we were passed a file-like object
203 if isinstance(file, basestring
):
206 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
207 self
.fp
= open(file, modeDict
[mode
])
211 self
.filename
= getattr(file, 'name', None)
218 try: # See if file is a zip file
219 self
._RealGetContents
()
220 # seek to start of directory and overwrite
221 self
.fp
.seek(self
.start_dir
, 0)
222 except BadZipfile
: # file is not a zip file, just append
225 if not self
._filePassed
:
228 raise RuntimeError, 'Mode must be "r", "w" or "a"'
230 def _GetContents(self
):
231 """Read the directory, making sure we close the file if the format
234 self
._RealGetContents
()
236 if not self
._filePassed
:
241 def _RealGetContents(self
):
242 """Read in the table of contents for the ZIP file."""
244 endrec
= _EndRecData(fp
)
246 raise BadZipfile
, "File is not a zip file"
249 size_cd
= endrec
[5] # bytes in central directory
250 offset_cd
= endrec
[6] # offset of central directory
251 self
.comment
= endrec
[8] # archive comment
252 # endrec[9] is the offset of the "End of Central Dir" record
253 x
= endrec
[9] - size_cd
254 # "concat" is zero, unless zip was concatenated to another file
255 concat
= x
- offset_cd
257 print "given, inferred, offset", offset_cd
, x
, concat
258 # self.start_dir: Position of start of central directory
259 self
.start_dir
= offset_cd
+ concat
260 fp
.seek(self
.start_dir
, 0)
262 while total
< size_cd
:
263 centdir
= fp
.read(46)
265 if centdir
[0:4] != stringCentralDir
:
266 raise BadZipfile
, "Bad magic number for central directory"
267 centdir
= struct
.unpack(structCentralDir
, centdir
)
270 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
271 # Create ZipInfo instance to store file information
272 x
= ZipInfo(filename
)
273 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
274 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
275 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
276 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
277 + centdir
[_CD_COMMENT_LENGTH
])
278 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
279 # file_offset must be computed below...
280 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
281 x
.flag_bits
, x
.compress_type
, t
, d
,
282 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
283 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
284 # Convert date/time code to (year, month, day, hour, min, sec)
285 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
286 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
287 self
.filelist
.append(x
)
288 self
.NameToInfo
[x
.filename
] = x
291 for data
in self
.filelist
:
292 fp
.seek(data
.header_offset
, 0)
293 fheader
= fp
.read(30)
294 if fheader
[0:4] != stringFileHeader
:
295 raise BadZipfile
, "Bad magic number for file header"
296 fheader
= struct
.unpack(structFileHeader
, fheader
)
297 # file_offset is computed here, since the extra field for
298 # the central directory and for the local file header
299 # refer to different fields, and they can have different
301 data
.file_offset
= (data
.header_offset
+ 30
302 + fheader
[_FH_FILENAME_LENGTH
]
303 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
304 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
305 if fname
!= data
.orig_filename
:
306 raise RuntimeError, \
307 'File name in directory "%s" and header "%s" differ.' % (
308 data
.orig_filename
, fname
)
311 """Return a list of file names in the archive."""
313 for data
in self
.filelist
:
314 l
.append(data
.filename
)
318 """Return a list of class ZipInfo instances for files in the
323 """Print a table of contents for the zip file."""
324 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
325 for zinfo
in self
.filelist
:
326 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
327 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
330 """Read all the files and check the CRC."""
331 for zinfo
in self
.filelist
:
333 self
.read(zinfo
.filename
) # Check CRC-32
335 return zinfo
.filename
337 def getinfo(self
, name
):
338 """Return the instance of ZipInfo given 'name'."""
339 return self
.NameToInfo
[name
]
341 def read(self
, name
):
342 """Return file bytes (as a string) for name."""
343 if self
.mode
not in ("r", "a"):
344 raise RuntimeError, 'read() requires mode "r" or "a"'
346 raise RuntimeError, \
347 "Attempt to read ZIP archive that was already closed"
348 zinfo
= self
.getinfo(name
)
349 filepos
= self
.fp
.tell()
350 self
.fp
.seek(zinfo
.file_offset
, 0)
351 bytes
= self
.fp
.read(zinfo
.compress_size
)
352 self
.fp
.seek(filepos
, 0)
353 if zinfo
.compress_type
== ZIP_STORED
:
355 elif zinfo
.compress_type
== ZIP_DEFLATED
:
357 raise RuntimeError, \
358 "De-compression requires the (missing) zlib module"
359 # zlib compress/decompress code by Jeremy Hylton of CNRI
360 dc
= zlib
.decompressobj(-15)
361 bytes
= dc
.decompress(bytes
)
362 # need to feed in unused pad byte so that zlib won't choke
363 ex
= dc
.decompress('Z') + dc
.flush()
368 "Unsupported compression method %d for file %s" % \
369 (zinfo
.compress_type
, name
)
370 crc
= binascii
.crc32(bytes
)
372 raise BadZipfile
, "Bad CRC-32 for file %s" % name
375 def _writecheck(self
, zinfo
):
376 """Check for errors before writing a file to the archive."""
377 if zinfo
.filename
in self
.NameToInfo
:
378 if self
.debug
: # Warning for duplicate names
379 print "Duplicate name:", zinfo
.filename
380 if self
.mode
not in ("w", "a"):
381 raise RuntimeError, 'write() requires mode "w" or "a"'
383 raise RuntimeError, \
384 "Attempt to write ZIP archive that was already closed"
385 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
386 raise RuntimeError, \
387 "Compression requires the (missing) zlib module"
388 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
389 raise RuntimeError, \
390 "That compression method is not supported"
392 def write(self
, filename
, arcname
=None, compress_type
=None):
393 """Put the bytes from filename into the archive under the name
395 st
= os
.stat(filename
)
396 mtime
= time
.localtime(st
.st_mtime
)
397 date_time
= mtime
[0:6]
398 # Create ZipInfo instance to store file information
401 arcname
= os
.path
.normpath(os
.path
.splitdrive(arcname
)[1])
402 while arcname
[0] in (os
.sep
, os
.altsep
):
403 arcname
= arcname
[1:]
404 zinfo
= ZipInfo(arcname
, date_time
)
405 zinfo
.external_attr
= (st
[0] & 0xFFFF) << 16L # Unix attributes
406 if compress_type
is None:
407 zinfo
.compress_type
= self
.compression
409 zinfo
.compress_type
= compress_type
410 self
._writecheck
(zinfo
)
411 fp
= open(filename
, "rb")
412 zinfo
.flag_bits
= 0x00
413 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
414 # Must overwrite CRC and sizes with correct data later
416 zinfo
.compress_size
= compress_size
= 0
417 zinfo
.file_size
= file_size
= 0
418 self
.fp
.write(zinfo
.FileHeader())
419 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
420 if zinfo
.compress_type
== ZIP_DEFLATED
:
421 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
426 buf
= fp
.read(1024 * 8)
429 file_size
= file_size
+ len(buf
)
430 CRC
= binascii
.crc32(buf
, CRC
)
432 buf
= cmpr
.compress(buf
)
433 compress_size
= compress_size
+ len(buf
)
438 compress_size
= compress_size
+ len(buf
)
440 zinfo
.compress_size
= compress_size
442 zinfo
.compress_size
= file_size
444 zinfo
.file_size
= file_size
445 # Seek backwards and write CRC and file sizes
446 position
= self
.fp
.tell() # Preserve current position in file
447 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
448 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
450 self
.fp
.seek(position
, 0)
451 self
.filelist
.append(zinfo
)
452 self
.NameToInfo
[zinfo
.filename
] = zinfo
454 def writestr(self
, zinfo_or_arcname
, bytes
):
455 """Write a file into the archive. The contents is the string
456 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
457 the name of the file in the archive."""
458 if not isinstance(zinfo_or_arcname
, ZipInfo
):
459 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
460 date_time
=time
.localtime(time
.time()))
461 zinfo
.compress_type
= self
.compression
463 zinfo
= zinfo_or_arcname
464 self
._writecheck
(zinfo
)
465 zinfo
.file_size
= len(bytes
) # Uncompressed size
466 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
467 if zinfo
.compress_type
== ZIP_DEFLATED
:
468 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
470 bytes
= co
.compress(bytes
) + co
.flush()
471 zinfo
.compress_size
= len(bytes
) # Compressed size
473 zinfo
.compress_size
= zinfo
.file_size
474 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
475 self
.fp
.write(zinfo
.FileHeader())
476 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
478 if zinfo
.flag_bits
& 0x08:
479 # Write CRC and file sizes after the file data
480 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
482 self
.filelist
.append(zinfo
)
483 self
.NameToInfo
[zinfo
.filename
] = zinfo
486 """Call the "close()" method in case the user forgot."""
490 """Close the file, and for mode "w" and "a" write the ending
494 if self
.mode
in ("w", "a"): # write ending records
496 pos1
= self
.fp
.tell()
497 for zinfo
in self
.filelist
: # write central directory
500 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
501 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
502 centdir
= struct
.pack(structCentralDir
,
503 stringCentralDir
, zinfo
.create_version
,
504 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
505 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
506 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
507 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
508 0, zinfo
.internal_attr
, zinfo
.external_attr
,
510 self
.fp
.write(centdir
)
511 self
.fp
.write(zinfo
.filename
)
512 self
.fp
.write(zinfo
.extra
)
513 self
.fp
.write(zinfo
.comment
)
514 pos2
= self
.fp
.tell()
515 # Write end-of-zip-archive record
516 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
517 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
518 self
.fp
.write(endrec
)
520 if not self
._filePassed
:
525 class PyZipFile(ZipFile
):
526 """Class to create ZIP archives with Python library files and packages."""
528 def writepy(self
, pathname
, basename
= ""):
529 """Add all files from "pathname" to the ZIP archive.
531 If pathname is a package directory, search the directory and
532 all package subdirectories recursively for all *.py and enter
533 the modules into the archive. If pathname is a plain
534 directory, listdir *.py and enter all modules. Else, pathname
535 must be a Python *.py file and the module will be put into the
536 archive. Added modules are always module.pyo or module.pyc.
537 This method will compile the module.py into module.pyc if
540 dir, name
= os
.path
.split(pathname
)
541 if os
.path
.isdir(pathname
):
542 initname
= os
.path
.join(pathname
, "__init__.py")
543 if os
.path
.isfile(initname
):
544 # This is a package directory, add it
546 basename
= "%s/%s" % (basename
, name
)
550 print "Adding package in", pathname
, "as", basename
551 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
553 print "Adding", arcname
554 self
.write(fname
, arcname
)
555 dirlist
= os
.listdir(pathname
)
556 dirlist
.remove("__init__.py")
557 # Add all *.py files and package subdirectories
558 for filename
in dirlist
:
559 path
= os
.path
.join(pathname
, filename
)
560 root
, ext
= os
.path
.splitext(filename
)
561 if os
.path
.isdir(path
):
562 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
563 # This is a package directory, add it
564 self
.writepy(path
, basename
) # Recursive call
566 fname
, arcname
= self
._get
_codename
(path
[0:-3],
569 print "Adding", arcname
570 self
.write(fname
, arcname
)
572 # This is NOT a package directory, add its files at top level
574 print "Adding files from directory", pathname
575 for filename
in os
.listdir(pathname
):
576 path
= os
.path
.join(pathname
, filename
)
577 root
, ext
= os
.path
.splitext(filename
)
579 fname
, arcname
= self
._get
_codename
(path
[0:-3],
582 print "Adding", arcname
583 self
.write(fname
, arcname
)
585 if pathname
[-3:] != ".py":
586 raise RuntimeError, \
587 'Files added with writepy() must end with ".py"'
588 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
590 print "Adding file", arcname
591 self
.write(fname
, arcname
)
593 def _get_codename(self
, pathname
, basename
):
594 """Return (filename, archivename) for the path.
596 Given a module name path, return the correct file path and
597 archive name, compiling if necessary. For example, given
598 /python/lib/string, return (/python/lib/string.pyc, string).
600 file_py
= pathname
+ ".py"
601 file_pyc
= pathname
+ ".pyc"
602 file_pyo
= pathname
+ ".pyo"
603 if os
.path
.isfile(file_pyo
) and \
604 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
605 fname
= file_pyo
# Use .pyo file
606 elif not os
.path
.isfile(file_pyc
) or \
607 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
610 print "Compiling", file_py
612 py_compile
.compile(file_py
, file_pyc
, None, True)
613 except py_compile
.PyCompileError
,err
:
618 archivename
= os
.path
.split(fname
)[1]
620 archivename
= "%s/%s" % (basename
, archivename
)
621 return (fname
, archivename
)