1 "Read and write ZIP files."
3 import struct
, os
, time
, sys
7 import zlib
# We may need its compression method
11 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
12 "ZipInfo", "ZipFile", "PyZipFile"]
14 class BadZipfile(Exception):
16 error
= BadZipfile
# The exception raised by this module
18 # constants for Zip file compression methods
21 # Other ZIP compression methods not supported
23 # Here are some struct module formats for reading headers
24 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
25 stringEndArchive
= "PK\005\006" # magic number for end of archive record
26 structCentralDir
= "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
27 stringCentralDir
= "PK\001\002" # magic number for central directory
28 structFileHeader
= "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
29 stringFileHeader
= "PK\003\004" # magic number for file header
31 # indexes of entries in the central directory structure
33 _CD_CREATE_VERSION
= 1
35 _CD_EXTRACT_VERSION
= 3
36 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
42 _CD_COMPRESSED_SIZE
= 10
43 _CD_UNCOMPRESSED_SIZE
= 11
44 _CD_FILENAME_LENGTH
= 12
45 _CD_EXTRA_FIELD_LENGTH
= 13
46 _CD_COMMENT_LENGTH
= 14
47 _CD_DISK_NUMBER_START
= 15
48 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
49 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
50 _CD_LOCAL_HEADER_OFFSET
= 18
52 # indexes of entries in the local file header structure
54 _FH_EXTRACT_VERSION
= 1
55 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
56 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
57 _FH_COMPRESSION_METHOD
= 4
61 _FH_COMPRESSED_SIZE
= 8
62 _FH_UNCOMPRESSED_SIZE
= 9
63 _FH_FILENAME_LENGTH
= 10
64 _FH_EXTRA_FIELD_LENGTH
= 11
66 def is_zipfile(filename
):
67 """Quickly see if file is a ZIP file by checking the magic number."""
69 fpin
= open(filename
, "rb")
70 endrec
= _EndRecData(fpin
)
73 return True # file has correct magic number
78 def _EndRecData(fpin
):
79 """Return data from the "End of Central Directory" record, or None.
81 The data is a list of the nine items in the ZIP "End of central dir"
82 record followed by a tenth item, the file seek offset of this record."""
83 fpin
.seek(-22, 2) # Assume no archive comment.
84 filesize
= fpin
.tell() + 22 # Get file size
86 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
87 endrec
= struct
.unpack(structEndArchive
, data
)
89 endrec
.append("") # Append the archive comment
90 endrec
.append(filesize
- 22) # Append the record start offset
92 # Search the last END_BLOCK bytes of the file for the record signature.
93 # The comment is appended to the ZIP file and has a 16 bit length.
94 # So the comment may be up to 64K long. We limit the search for the
95 # signature to a few Kbytes at the end of the file for efficiency.
96 # also, the signature must not appear in the comment.
97 END_BLOCK
= min(filesize
, 1024 * 4)
98 fpin
.seek(filesize
- END_BLOCK
, 0)
100 start
= data
.rfind(stringEndArchive
)
101 if start
>= 0: # Correct signature string was found
102 endrec
= struct
.unpack(structEndArchive
, data
[start
:start
+22])
103 endrec
= list(endrec
)
104 comment
= data
[start
+22:]
105 if endrec
[7] == len(comment
): # Comment length checks out
106 # Append the archive comment and start offset
107 endrec
.append(comment
)
108 endrec
.append(filesize
- END_BLOCK
+ start
)
110 return # Error, return None
114 """Class with attributes describing each file in the ZIP archive."""
116 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
117 self
.orig_filename
= filename
# Original file name in archive
118 # Terminate the file name at the first null byte. Null bytes in file
119 # names are used as tricks by viruses in archives.
120 null_byte
= filename
.find(chr(0))
122 filename
= filename
[0:null_byte
]
123 # This is used to ensure paths in generated ZIP files always use
124 # forward slashes as the directory separator, as required by the
125 # ZIP format specification.
127 filename
= filename
.replace(os
.sep
, "/")
128 self
.filename
= filename
# Normalized file name
129 self
.date_time
= date_time
# year, month, day, hour, min, sec
131 self
.compress_type
= ZIP_STORED
# Type of compression for the file
132 self
.comment
= "" # Comment for each file
133 self
.extra
= "" # ZIP extra data
134 if sys
.platform
== 'win32':
135 self
.create_system
= 0 # System which created ZIP archive
137 # Assume everything else is unix-y
138 self
.create_system
= 3 # System which created ZIP archive
139 self
.create_version
= 20 # Version which created ZIP archive
140 self
.extract_version
= 20 # Version needed to extract archive
141 self
.reserved
= 0 # Must be zero
142 self
.flag_bits
= 0 # ZIP flag bits
143 self
.volume
= 0 # Volume number of file header
144 self
.internal_attr
= 0 # Internal attributes
145 self
.external_attr
= 0 # External file attributes
146 # Other attributes are set by class ZipFile:
147 # header_offset Byte offset to the file header
148 # file_offset Byte offset to the start of the file data
149 # CRC CRC-32 of the uncompressed file
150 # compress_size Size of the compressed file
151 # file_size Size of the uncompressed file
153 def FileHeader(self
):
154 """Return the per-file header as a string."""
156 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
157 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
158 if self
.flag_bits
& 0x08:
159 # Set these to zero because we write them after the file data
160 CRC
= compress_size
= file_size
= 0
163 compress_size
= self
.compress_size
164 file_size
= self
.file_size
165 header
= struct
.pack(structFileHeader
, stringFileHeader
,
166 self
.extract_version
, self
.reserved
, self
.flag_bits
,
167 self
.compress_type
, dostime
, dosdate
, CRC
,
168 compress_size
, file_size
,
169 len(self
.filename
), len(self
.extra
))
170 return header
+ self
.filename
+ self
.extra
174 """ Class with methods to open, read, write, close, list zip files.
176 z = ZipFile(file, mode="r", compression=ZIP_STORED)
178 file: Either the path to the file, or a file-like object.
179 If it is a path, the file will be opened and closed by ZipFile.
180 mode: The mode can be either read "r", write "w" or append "a".
181 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
184 fp
= None # Set here since __del__ checks it
186 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
):
187 """Open the ZIP file with mode read "r", write "w" or append "a"."""
188 if compression
== ZIP_STORED
:
190 elif compression
== ZIP_DEFLATED
:
193 "Compression requires the (missing) zlib module"
195 raise RuntimeError, "That compression method is not supported"
196 self
.debug
= 0 # Level of printing: 0 through 3
197 self
.NameToInfo
= {} # Find file info given name
198 self
.filelist
= [] # List of ZipInfo instances for archive
199 self
.compression
= compression
# Method of compression
200 self
.mode
= key
= mode
.replace('b', '')[0]
202 # Check if we were passed a file-like object
203 if isinstance(file, basestring
):
206 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
207 self
.fp
= open(file, modeDict
[mode
])
211 self
.filename
= getattr(file, 'name', None)
218 try: # See if file is a zip file
219 self
._RealGetContents
()
220 # seek to start of directory and overwrite
221 self
.fp
.seek(self
.start_dir
, 0)
222 except BadZipfile
: # file is not a zip file, just append
225 if not self
._filePassed
:
228 raise RuntimeError, 'Mode must be "r", "w" or "a"'
230 def _GetContents(self
):
231 """Read the directory, making sure we close the file if the format
234 self
._RealGetContents
()
236 if not self
._filePassed
:
241 def _RealGetContents(self
):
242 """Read in the table of contents for the ZIP file."""
244 endrec
= _EndRecData(fp
)
246 raise BadZipfile
, "File is not a zip file"
249 size_cd
= endrec
[5] # bytes in central directory
250 offset_cd
= endrec
[6] # offset of central directory
251 self
.comment
= endrec
[8] # archive comment
252 # endrec[9] is the offset of the "End of Central Dir" record
253 x
= endrec
[9] - size_cd
254 # "concat" is zero, unless zip was concatenated to another file
255 concat
= x
- offset_cd
257 print "given, inferred, offset", offset_cd
, x
, concat
258 # self.start_dir: Position of start of central directory
259 self
.start_dir
= offset_cd
+ concat
260 fp
.seek(self
.start_dir
, 0)
262 while total
< size_cd
:
263 centdir
= fp
.read(46)
265 if centdir
[0:4] != stringCentralDir
:
266 raise BadZipfile
, "Bad magic number for central directory"
267 centdir
= struct
.unpack(structCentralDir
, centdir
)
270 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
271 # Create ZipInfo instance to store file information
272 x
= ZipInfo(filename
)
273 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
274 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
275 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
276 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
277 + centdir
[_CD_COMMENT_LENGTH
])
278 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
] + concat
279 # file_offset must be computed below...
280 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
281 x
.flag_bits
, x
.compress_type
, t
, d
,
282 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
283 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
284 # Convert date/time code to (year, month, day, hour, min, sec)
285 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
286 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
287 self
.filelist
.append(x
)
288 self
.NameToInfo
[x
.filename
] = x
291 for data
in self
.filelist
:
292 fp
.seek(data
.header_offset
, 0)
293 fheader
= fp
.read(30)
294 if fheader
[0:4] != stringFileHeader
:
295 raise BadZipfile
, "Bad magic number for file header"
296 fheader
= struct
.unpack(structFileHeader
, fheader
)
297 # file_offset is computed here, since the extra field for
298 # the central directory and for the local file header
299 # refer to different fields, and they can have different
301 data
.file_offset
= (data
.header_offset
+ 30
302 + fheader
[_FH_FILENAME_LENGTH
]
303 + fheader
[_FH_EXTRA_FIELD_LENGTH
])
304 fname
= fp
.read(fheader
[_FH_FILENAME_LENGTH
])
305 if fname
!= data
.orig_filename
:
306 raise RuntimeError, \
307 'File name in directory "%s" and header "%s" differ.' % (
308 data
.orig_filename
, fname
)
311 """Return a list of file names in the archive."""
313 for data
in self
.filelist
:
314 l
.append(data
.filename
)
318 """Return a list of class ZipInfo instances for files in the
323 """Print a table of contents for the zip file."""
324 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
325 for zinfo
in self
.filelist
:
326 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
327 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
330 """Read all the files and check the CRC."""
331 for zinfo
in self
.filelist
:
333 self
.read(zinfo
.filename
) # Check CRC-32
335 return zinfo
.filename
337 def getinfo(self
, name
):
338 """Return the instance of ZipInfo given 'name'."""
339 return self
.NameToInfo
[name
]
341 def read(self
, name
):
342 """Return file bytes (as a string) for name."""
343 if self
.mode
not in ("r", "a"):
344 raise RuntimeError, 'read() requires mode "r" or "a"'
346 raise RuntimeError, \
347 "Attempt to read ZIP archive that was already closed"
348 zinfo
= self
.getinfo(name
)
349 filepos
= self
.fp
.tell()
350 self
.fp
.seek(zinfo
.file_offset
, 0)
351 bytes
= self
.fp
.read(zinfo
.compress_size
)
352 self
.fp
.seek(filepos
, 0)
353 if zinfo
.compress_type
== ZIP_STORED
:
355 elif zinfo
.compress_type
== ZIP_DEFLATED
:
357 raise RuntimeError, \
358 "De-compression requires the (missing) zlib module"
359 # zlib compress/decompress code by Jeremy Hylton of CNRI
360 dc
= zlib
.decompressobj(-15)
361 bytes
= dc
.decompress(bytes
)
362 # need to feed in unused pad byte so that zlib won't choke
363 ex
= dc
.decompress('Z') + dc
.flush()
368 "Unsupported compression method %d for file %s" % \
369 (zinfo
.compress_type
, name
)
370 crc
= binascii
.crc32(bytes
)
372 raise BadZipfile
, "Bad CRC-32 for file %s" % name
375 def _writecheck(self
, zinfo
):
376 """Check for errors before writing a file to the archive."""
377 if zinfo
.filename
in self
.NameToInfo
:
378 if self
.debug
: # Warning for duplicate names
379 print "Duplicate name:", zinfo
.filename
380 if self
.mode
not in ("w", "a"):
381 raise RuntimeError, 'write() requires mode "w" or "a"'
383 raise RuntimeError, \
384 "Attempt to write ZIP archive that was already closed"
385 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
386 raise RuntimeError, \
387 "Compression requires the (missing) zlib module"
388 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
389 raise RuntimeError, \
390 "That compression method is not supported"
392 def write(self
, filename
, arcname
=None, compress_type
=None):
393 """Put the bytes from filename into the archive under the name
395 st
= os
.stat(filename
)
396 mtime
= time
.localtime(st
.st_mtime
)
397 date_time
= mtime
[0:6]
398 # Create ZipInfo instance to store file information
400 zinfo
= ZipInfo(filename
, date_time
)
402 zinfo
= ZipInfo(arcname
, date_time
)
403 zinfo
.external_attr
= (st
[0] & 0xFFFF) << 16L # Unix attributes
404 if compress_type
is None:
405 zinfo
.compress_type
= self
.compression
407 zinfo
.compress_type
= compress_type
408 self
._writecheck
(zinfo
)
409 fp
= open(filename
, "rb")
410 zinfo
.flag_bits
= 0x00
411 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
412 # Must overwrite CRC and sizes with correct data later
414 zinfo
.compress_size
= compress_size
= 0
415 zinfo
.file_size
= file_size
= 0
416 self
.fp
.write(zinfo
.FileHeader())
417 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
418 if zinfo
.compress_type
== ZIP_DEFLATED
:
419 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
424 buf
= fp
.read(1024 * 8)
427 file_size
= file_size
+ len(buf
)
428 CRC
= binascii
.crc32(buf
, CRC
)
430 buf
= cmpr
.compress(buf
)
431 compress_size
= compress_size
+ len(buf
)
436 compress_size
= compress_size
+ len(buf
)
438 zinfo
.compress_size
= compress_size
440 zinfo
.compress_size
= file_size
442 zinfo
.file_size
= file_size
443 # Seek backwards and write CRC and file sizes
444 position
= self
.fp
.tell() # Preserve current position in file
445 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
446 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
448 self
.fp
.seek(position
, 0)
449 self
.filelist
.append(zinfo
)
450 self
.NameToInfo
[zinfo
.filename
] = zinfo
452 def writestr(self
, zinfo_or_arcname
, bytes
):
453 """Write a file into the archive. The contents is the string
454 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
455 the name of the file in the archive."""
456 if not isinstance(zinfo_or_arcname
, ZipInfo
):
457 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
458 date_time
=time
.localtime(time
.time()))
459 zinfo
.compress_type
= self
.compression
461 zinfo
= zinfo_or_arcname
462 self
._writecheck
(zinfo
)
463 zinfo
.file_size
= len(bytes
) # Uncompressed size
464 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
465 if zinfo
.compress_type
== ZIP_DEFLATED
:
466 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
468 bytes
= co
.compress(bytes
) + co
.flush()
469 zinfo
.compress_size
= len(bytes
) # Compressed size
471 zinfo
.compress_size
= zinfo
.file_size
472 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
473 self
.fp
.write(zinfo
.FileHeader())
474 zinfo
.file_offset
= self
.fp
.tell() # Start of file bytes
476 if zinfo
.flag_bits
& 0x08:
477 # Write CRC and file sizes after the file data
478 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
480 self
.filelist
.append(zinfo
)
481 self
.NameToInfo
[zinfo
.filename
] = zinfo
484 """Call the "close()" method in case the user forgot."""
488 """Close the file, and for mode "w" and "a" write the ending
492 if self
.mode
in ("w", "a"): # write ending records
494 pos1
= self
.fp
.tell()
495 for zinfo
in self
.filelist
: # write central directory
498 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
499 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
500 centdir
= struct
.pack(structCentralDir
,
501 stringCentralDir
, zinfo
.create_version
,
502 zinfo
.create_system
, zinfo
.extract_version
, zinfo
.reserved
,
503 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
504 zinfo
.CRC
, zinfo
.compress_size
, zinfo
.file_size
,
505 len(zinfo
.filename
), len(zinfo
.extra
), len(zinfo
.comment
),
506 0, zinfo
.internal_attr
, zinfo
.external_attr
,
508 self
.fp
.write(centdir
)
509 self
.fp
.write(zinfo
.filename
)
510 self
.fp
.write(zinfo
.extra
)
511 self
.fp
.write(zinfo
.comment
)
512 pos2
= self
.fp
.tell()
513 # Write end-of-zip-archive record
514 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
515 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
516 self
.fp
.write(endrec
)
518 if not self
._filePassed
:
523 class PyZipFile(ZipFile
):
524 """Class to create ZIP archives with Python library files and packages."""
526 def writepy(self
, pathname
, basename
= ""):
527 """Add all files from "pathname" to the ZIP archive.
529 If pathname is a package directory, search the directory and
530 all package subdirectories recursively for all *.py and enter
531 the modules into the archive. If pathname is a plain
532 directory, listdir *.py and enter all modules. Else, pathname
533 must be a Python *.py file and the module will be put into the
534 archive. Added modules are always module.pyo or module.pyc.
535 This method will compile the module.py into module.pyc if
538 dir, name
= os
.path
.split(pathname
)
539 if os
.path
.isdir(pathname
):
540 initname
= os
.path
.join(pathname
, "__init__.py")
541 if os
.path
.isfile(initname
):
542 # This is a package directory, add it
544 basename
= "%s/%s" % (basename
, name
)
548 print "Adding package in", pathname
, "as", basename
549 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
551 print "Adding", arcname
552 self
.write(fname
, arcname
)
553 dirlist
= os
.listdir(pathname
)
554 dirlist
.remove("__init__.py")
555 # Add all *.py files and package subdirectories
556 for filename
in dirlist
:
557 path
= os
.path
.join(pathname
, filename
)
558 root
, ext
= os
.path
.splitext(filename
)
559 if os
.path
.isdir(path
):
560 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
561 # This is a package directory, add it
562 self
.writepy(path
, basename
) # Recursive call
564 fname
, arcname
= self
._get
_codename
(path
[0:-3],
567 print "Adding", arcname
568 self
.write(fname
, arcname
)
570 # This is NOT a package directory, add its files at top level
572 print "Adding files from directory", pathname
573 for filename
in os
.listdir(pathname
):
574 path
= os
.path
.join(pathname
, filename
)
575 root
, ext
= os
.path
.splitext(filename
)
577 fname
, arcname
= self
._get
_codename
(path
[0:-3],
580 print "Adding", arcname
581 self
.write(fname
, arcname
)
583 if pathname
[-3:] != ".py":
584 raise RuntimeError, \
585 'Files added with writepy() must end with ".py"'
586 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
588 print "Adding file", arcname
589 self
.write(fname
, arcname
)
591 def _get_codename(self
, pathname
, basename
):
592 """Return (filename, archivename) for the path.
594 Given a module name path, return the correct file path and
595 archive name, compiling if necessary. For example, given
596 /python/lib/string, return (/python/lib/string.pyc, string).
598 file_py
= pathname
+ ".py"
599 file_pyc
= pathname
+ ".pyc"
600 file_pyo
= pathname
+ ".pyo"
601 if os
.path
.isfile(file_pyo
) and \
602 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
603 fname
= file_pyo
# Use .pyo file
604 elif not os
.path
.isfile(file_pyc
) or \
605 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
608 print "Compiling", file_py
610 py_compile
.compile(file_py
, file_pyc
, None, True)
611 except py_compile
.PyCompileError
,err
:
616 archivename
= os
.path
.split(fname
)[1]
618 archivename
= "%s/%s" % (basename
, archivename
)
619 return (fname
, archivename
)