2 Read and write ZIP files.
4 import struct
, os
, time
, sys
5 import binascii
, cStringIO
8 import zlib
# We may need its compression method
12 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
13 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
15 class BadZipfile(Exception):
19 class LargeZipFile(Exception):
21 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
25 error
= BadZipfile
# The exception raised by this module
27 ZIP64_LIMIT
= (1 << 31) - 1
29 # constants for Zip file compression methods
32 # Other ZIP compression methods not supported
34 # Here are some struct module formats for reading headers
35 structEndArchive
= "<4s4H2lH" # 9 items, end of archive, 22 bytes
36 stringEndArchive
= "PK\005\006" # magic number for end of archive record
37 structCentralDir
= "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
38 stringCentralDir
= "PK\001\002" # magic number for central directory
39 structFileHeader
= "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
40 stringFileHeader
= "PK\003\004" # magic number for file header
41 structEndArchive64Locator
= "<4slql" # 4 items, locate Zip64 header, 20 bytes
42 stringEndArchive64Locator
= "PK\x06\x07" # magic token for locator header
43 structEndArchive64
= "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44 stringEndArchive64
= "PK\x06\x06" # magic token for Zip64 header
47 # indexes of entries in the central directory structure
49 _CD_CREATE_VERSION
= 1
51 _CD_EXTRACT_VERSION
= 3
52 _CD_EXTRACT_SYSTEM
= 4 # is this meaningful?
58 _CD_COMPRESSED_SIZE
= 10
59 _CD_UNCOMPRESSED_SIZE
= 11
60 _CD_FILENAME_LENGTH
= 12
61 _CD_EXTRA_FIELD_LENGTH
= 13
62 _CD_COMMENT_LENGTH
= 14
63 _CD_DISK_NUMBER_START
= 15
64 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
65 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
66 _CD_LOCAL_HEADER_OFFSET
= 18
68 # indexes of entries in the local file header structure
70 _FH_EXTRACT_VERSION
= 1
71 _FH_EXTRACT_SYSTEM
= 2 # is this meaningful?
72 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
73 _FH_COMPRESSION_METHOD
= 4
77 _FH_COMPRESSED_SIZE
= 8
78 _FH_UNCOMPRESSED_SIZE
= 9
79 _FH_FILENAME_LENGTH
= 10
80 _FH_EXTRA_FIELD_LENGTH
= 11
82 def is_zipfile(filename
):
83 """Quickly see if file is a ZIP file by checking the magic number."""
85 fpin
= open(filename
, "rb")
86 endrec
= _EndRecData(fpin
)
89 return True # file has correct magic number
94 def _EndRecData64(fpin
, offset
, endrec
):
96 Read the ZIP64 end-of-archive records and use that to update endrec
98 locatorSize
= struct
.calcsize(structEndArchive64Locator
)
99 fpin
.seek(offset
- locatorSize
, 2)
100 data
= fpin
.read(locatorSize
)
101 sig
, diskno
, reloff
, disks
= struct
.unpack(structEndArchive64Locator
, data
)
102 if sig
!= stringEndArchive64Locator
:
105 if diskno
!= 0 or disks
!= 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
108 # Assume no 'zip64 extensible data'
109 endArchiveSize
= struct
.calcsize(structEndArchive64
)
110 fpin
.seek(offset
- locatorSize
- endArchiveSize
, 2)
111 data
= fpin
.read(endArchiveSize
)
112 sig
, sz
, create_version
, read_version
, disk_num
, disk_dir
, \
113 dircount
, dircount2
, dirsize
, diroffset
= \
114 struct
.unpack(structEndArchive64
, data
)
115 if sig
!= stringEndArchive64
:
118 # Update the original endrec using data from the ZIP64 record
122 endrec
[4] = dircount2
124 endrec
[6] = diroffset
128 def _EndRecData(fpin
):
129 """Return data from the "End of Central Directory" record, or None.
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin
.seek(-22, 2) # Assume no archive comment.
134 filesize
= fpin
.tell() + 22 # Get file size
136 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
137 endrec
= struct
.unpack(structEndArchive
, data
)
138 endrec
= list(endrec
)
139 endrec
.append("") # Append the archive comment
140 endrec
.append(filesize
- 22) # Append the record start offset
141 if endrec
[-4] == -1 or endrec
[-4] == 0xffffffff:
142 return _EndRecData64(fpin
, -22, endrec
)
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK
= min(filesize
, 1024 * 4)
150 fpin
.seek(filesize
- END_BLOCK
, 0)
152 start
= data
.rfind(stringEndArchive
)
153 if start
>= 0: # Correct signature string was found
154 endrec
= struct
.unpack(structEndArchive
, data
[start
:start
+22])
155 endrec
= list(endrec
)
156 comment
= data
[start
+22:]
157 if endrec
[7] == len(comment
): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec
.append(comment
)
160 endrec
.append(filesize
- END_BLOCK
+ start
)
161 if endrec
[-4] == -1 or endrec
[-4] == 0xffffffff:
162 return _EndRecData64(fpin
, - END_BLOCK
+ start
, endrec
)
164 return # Error, return None
167 class ZipInfo (object):
168 """Class with attributes describing each file in the ZIP archive."""
191 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
192 self
.orig_filename
= filename
# Original file name in archive
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
196 null_byte
= filename
.find(chr(0))
198 filename
= filename
[0:null_byte
]
199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os
.sep
!= "/" and os
.sep
in filename
:
203 filename
= filename
.replace(os
.sep
, "/")
205 self
.filename
= filename
# Normalized file name
206 self
.date_time
= date_time
# year, month, day, hour, min, sec
208 self
.compress_type
= ZIP_STORED
# Type of compression for the file
209 self
.comment
= "" # Comment for each file
210 self
.extra
= "" # ZIP extra data
211 if sys
.platform
== 'win32':
212 self
.create_system
= 0 # System which created ZIP archive
214 # Assume everything else is unix-y
215 self
.create_system
= 3 # System which created ZIP archive
216 self
.create_version
= 20 # Version which created ZIP archive
217 self
.extract_version
= 20 # Version needed to extract archive
218 self
.reserved
= 0 # Must be zero
219 self
.flag_bits
= 0 # ZIP flag bits
220 self
.volume
= 0 # Volume number of file header
221 self
.internal_attr
= 0 # Internal attributes
222 self
.external_attr
= 0 # External file attributes
223 # Other attributes are set by class ZipFile:
224 # header_offset Byte offset to the file header
225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
229 def FileHeader(self
):
230 """Return the per-file header as a string."""
232 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
233 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
234 if self
.flag_bits
& 0x08:
235 # Set these to zero because we write them after the file data
236 CRC
= compress_size
= file_size
= 0
239 compress_size
= self
.compress_size
240 file_size
= self
.file_size
244 if file_size
> ZIP64_LIMIT
or compress_size
> ZIP64_LIMIT
:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
248 extra
= extra
+ struct
.pack(fmt
,
249 1, struct
.calcsize(fmt
)-4, file_size
, compress_size
)
250 file_size
= 0xffffffff # -1
251 compress_size
= 0xffffffff # -1
252 self
.extract_version
= max(45, self
.extract_version
)
253 self
.create_version
= max(45, self
.extract_version
)
255 header
= struct
.pack(structFileHeader
, stringFileHeader
,
256 self
.extract_version
, self
.reserved
, self
.flag_bits
,
257 self
.compress_type
, dostime
, dosdate
, CRC
,
258 compress_size
, file_size
,
259 len(self
.filename
), len(extra
))
260 return header
+ self
.filename
+ extra
262 def _decodeExtra(self
):
263 # Try to decode the extra field.
265 unpack
= struct
.unpack
267 tp
, ln
= unpack('<hh', extra
[:4])
270 counts
= unpack('<qqq', extra
[4:28])
272 counts
= unpack('<qq', extra
[4:20])
274 counts
= unpack('<q', extra
[4:12])
278 raise RuntimeError, "Corrupt extra field %s"%(ln
,)
282 # ZIP64 extension (large files and/or large archives)
283 if self
.file_size
== -1 or self
.file_size
== 0xFFFFFFFFL
:
284 self
.file_size
= counts
[idx
]
287 if self
.compress_size
== -1 or self
.compress_size
== 0xFFFFFFFFL
:
288 self
.compress_size
= counts
[idx
]
291 if self
.header_offset
== -1 or self
.header_offset
== 0xffffffffL
:
292 old
= self
.header_offset
293 self
.header_offset
= counts
[idx
]
300 """Class to handle decryption of files stored within a ZIP archive.
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
325 crc
= ((crc
>> 1) & 0x7FFFFFFF) ^ poly
327 crc
= ((crc
>> 1) & 0x7FFFFFFF)
330 crctable
= _GenerateCRCTable()
332 def _crc32(self
, ch
, crc
):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc
>> 8) & 0xffffff) ^ self
.crctable
[(crc ^
ord(ch
)) & 0xff]
336 def __init__(self
, pwd
):
337 self
.key0
= 305419896
338 self
.key1
= 591751049
339 self
.key2
= 878082192
343 def _UpdateKeys(self
, c
):
344 self
.key0
= self
._crc
32(c
, self
.key0
)
345 self
.key1
= (self
.key1
+ (self
.key0
& 255)) & 4294967295
346 self
.key1
= (self
.key1
* 134775813 + 1) & 4294967295
347 self
.key2
= self
._crc
32(chr((self
.key1
>> 24) & 255), self
.key2
)
349 def __call__(self
, c
):
350 """Decrypt a single character."""
353 c
= c ^
(((k
* (k^
1)) >> 8) & 255)
359 """File-like object for reading an archive member.
360 Is returned by ZipFile.open().
363 def __init__(self
, fileobj
, zipinfo
, decrypt
=None):
364 self
.fileobj
= fileobj
365 self
.decrypter
= decrypt
371 self
.univ_newlines
= False
372 self
.nlSeps
= ("\n", )
373 self
.lastdiscard
= ''
375 self
.compress_type
= zipinfo
.compress_type
376 self
.compress_size
= zipinfo
.compress_size
380 self
.name
= zipinfo
.filename
382 # read from compressed files in 64k blocks
383 self
.compreadsize
= 64*1024
384 if self
.compress_type
== ZIP_DEFLATED
:
385 self
.dc
= zlib
.decompressobj(-15)
387 def set_univ_newlines(self
, univ_newlines
):
388 self
.univ_newlines
= univ_newlines
390 # pick line separator char(s) based on universal newlines flag
391 self
.nlSeps
= ("\n", )
392 if self
.univ_newlines
:
393 self
.nlSeps
= ("\r\n", "\r", "\n")
399 nextline
= self
.readline()
401 raise StopIteration()
408 def _checkfornewline(self
):
411 # ugly check for cases where half of an \r\n pair was
412 # read on the last pass, and the \r was discarded. In this
413 # case we just throw away the \n at the start of the buffer.
414 if (self
.lastdiscard
, self
.linebuffer
[0]) == ('\r','\n'):
415 self
.linebuffer
= self
.linebuffer
[1:]
417 for sep
in self
.nlSeps
:
418 nl
= self
.linebuffer
.find(sep
)
425 def readline(self
, size
= -1):
426 """Read a line with approx. size. If size is negative,
434 # check for a newline already in buffer
435 nl
, nllen
= self
._checkfornewline
()
438 # the next line was already in the buffer
441 # no line break in buffer - try to read more
442 size
-= len(self
.linebuffer
)
443 while nl
< 0 and size
> 0:
444 buf
= self
.read(min(size
, 100))
447 self
.linebuffer
+= buf
450 # check for a newline in buffer
451 nl
, nllen
= self
._checkfornewline
()
453 # we either ran out of bytes in the file, or
454 # met the specified size limit without finding a newline,
455 # so return current buffer
461 buf
= self
.linebuffer
[:nl
]
462 self
.lastdiscard
= self
.linebuffer
[nl
:nl
+ nllen
]
463 self
.linebuffer
= self
.linebuffer
[nl
+ nllen
:]
465 # line is always returned with \n as newline char (except possibly
466 # for a final incomplete line in the file, which is handled above).
469 def readlines(self
, sizehint
= -1):
470 """Return a list with all (following) lines. The sizehint parameter
471 is ignored in this implementation.
475 line
= self
.readline()
480 def read(self
, size
= None):
481 # act like file() obj and return empty string if size is 0
485 # determine read size
486 bytesToRead
= self
.compress_size
- self
.bytes_read
488 # adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information
490 if self
.decrypter
is not None:
493 if size
is not None and size
>= 0:
494 if self
.compress_type
== ZIP_STORED
:
495 lr
= len(self
.readbuffer
)
496 bytesToRead
= min(bytesToRead
, size
- lr
)
497 elif self
.compress_type
== ZIP_DEFLATED
:
498 if len(self
.readbuffer
) > size
:
499 # the user has requested fewer bytes than we've already
500 # pulled through the decompressor; don't read any more
503 # user will use up the buffer, so read some more
504 lr
= len(self
.rawbuffer
)
505 bytesToRead
= min(bytesToRead
, self
.compreadsize
- lr
)
507 # avoid reading past end of file contents
508 if bytesToRead
+ self
.bytes_read
> self
.compress_size
:
509 bytesToRead
= self
.compress_size
- self
.bytes_read
511 # try to read from file (if necessary)
513 bytes
= self
.fileobj
.read(bytesToRead
)
514 self
.bytes_read
+= len(bytes
)
515 self
.rawbuffer
+= bytes
517 # handle contents of raw buffer
519 newdata
= self
.rawbuffer
522 # decrypt new data if we were given an object to handle that
523 if newdata
and self
.decrypter
is not None:
524 newdata
= ''.join(map(self
.decrypter
, newdata
))
526 # decompress newly read data if necessary
527 if newdata
and self
.compress_type
== ZIP_DEFLATED
:
528 newdata
= self
.dc
.decompress(newdata
)
529 self
.rawbuffer
= self
.dc
.unconsumed_tail
530 if self
.eof
and len(self
.rawbuffer
) == 0:
531 # we're out of raw bytes (both from the file and
532 # the local buffer); flush just to make sure the
533 # decompressor is done
534 newdata
+= self
.dc
.flush()
535 # prevent decompressor from being used again
538 self
.readbuffer
+= newdata
541 # return what the user asked for
542 if size
is None or len(self
.readbuffer
) <= size
:
543 bytes
= self
.readbuffer
546 bytes
= self
.readbuffer
[:size
]
547 self
.readbuffer
= self
.readbuffer
[size
:]
553 """ Class with methods to open, read, write, close, list zip files.
555 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
557 file: Either the path to the file, or a file-like object.
558 If it is a path, the file will be opened and closed by ZipFile.
559 mode: The mode can be either read "r", write "w" or append "a".
560 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
561 allowZip64: if True ZipFile will create files with ZIP64 extensions when
562 needed, otherwise it will raise an exception when this would
567 fp
= None # Set here since __del__ checks it
569 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
, allowZip64
=False):
570 """Open the ZIP file with mode read "r", write "w" or append "a"."""
571 if mode
not in ("r", "w", "a"):
572 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
574 if compression
== ZIP_STORED
:
576 elif compression
== ZIP_DEFLATED
:
579 "Compression requires the (missing) zlib module"
581 raise RuntimeError, "That compression method is not supported"
583 self
._allowZip
64 = allowZip64
584 self
._didModify
= False
585 self
.debug
= 0 # Level of printing: 0 through 3
586 self
.NameToInfo
= {} # Find file info given name
587 self
.filelist
= [] # List of ZipInfo instances for archive
588 self
.compression
= compression
# Method of compression
589 self
.mode
= key
= mode
.replace('b', '')[0]
592 # Check if we were passed a file-like object
593 if isinstance(file, basestring
):
596 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
598 self
.fp
= open(file, modeDict
[mode
])
602 self
.fp
= open(file, modeDict
[mode
])
608 self
.filename
= getattr(file, 'name', None)
615 try: # See if file is a zip file
616 self
._RealGetContents
()
617 # seek to start of directory and overwrite
618 self
.fp
.seek(self
.start_dir
, 0)
619 except BadZipfile
: # file is not a zip file, just append
622 if not self
._filePassed
:
625 raise RuntimeError, 'Mode must be "r", "w" or "a"'
627 def _GetContents(self
):
628 """Read the directory, making sure we close the file if the format
631 self
._RealGetContents
()
633 if not self
._filePassed
:
638 def _RealGetContents(self
):
639 """Read in the table of contents for the ZIP file."""
641 endrec
= _EndRecData(fp
)
643 raise BadZipfile
, "File is not a zip file"
646 size_cd
= endrec
[5] # bytes in central directory
647 offset_cd
= endrec
[6] # offset of central directory
648 self
.comment
= endrec
[8] # archive comment
649 # endrec[9] is the offset of the "End of Central Dir" record
650 if endrec
[9] > ZIP64_LIMIT
:
651 x
= endrec
[9] - size_cd
- 56 - 20
653 x
= endrec
[9] - size_cd
654 # "concat" is zero, unless zip was concatenated to another file
655 concat
= x
- offset_cd
657 print "given, inferred, offset", offset_cd
, x
, concat
658 # self.start_dir: Position of start of central directory
659 self
.start_dir
= offset_cd
+ concat
660 fp
.seek(self
.start_dir
, 0)
661 data
= fp
.read(size_cd
)
662 fp
= cStringIO
.StringIO(data
)
664 while total
< size_cd
:
665 centdir
= fp
.read(46)
667 if centdir
[0:4] != stringCentralDir
:
668 raise BadZipfile
, "Bad magic number for central directory"
669 centdir
= struct
.unpack(structCentralDir
, centdir
)
672 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
673 # Create ZipInfo instance to store file information
674 x
= ZipInfo(filename
)
675 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
676 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
677 total
= (total
+ centdir
[_CD_FILENAME_LENGTH
]
678 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
679 + centdir
[_CD_COMMENT_LENGTH
])
680 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
]
681 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
682 x
.flag_bits
, x
.compress_type
, t
, d
,
683 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
684 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
685 # Convert date/time code to (year, month, day, hour, min, sec)
686 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
687 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
690 x
.header_offset
= x
.header_offset
+ concat
691 self
.filelist
.append(x
)
692 self
.NameToInfo
[x
.filename
] = x
698 """Return a list of file names in the archive."""
700 for data
in self
.filelist
:
701 l
.append(data
.filename
)
705 """Return a list of class ZipInfo instances for files in the
710 """Print a table of contents for the zip file."""
711 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
712 for zinfo
in self
.filelist
:
713 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
714 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
717 """Read all the files and check the CRC."""
718 for zinfo
in self
.filelist
:
720 self
.read(zinfo
.filename
) # Check CRC-32
722 return zinfo
.filename
725 def getinfo(self
, name
):
726 """Return the instance of ZipInfo given 'name'."""
727 info
= self
.NameToInfo
.get(name
)
730 'There is no item named %r in the archive' % name
)
734 def setpassword(self
, pwd
):
735 """Set default password for encrypted files."""
738 def read(self
, name
, pwd
=None):
739 """Return file bytes (as a string) for name."""
740 return self
.open(name
, "r", pwd
).read()
742 def open(self
, name
, mode
="r", pwd
=None):
743 """Return file-like object for 'name'."""
744 if mode
not in ("r", "U", "rU"):
745 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
747 raise RuntimeError, \
748 "Attempt to read ZIP archive that was already closed"
750 # Only open a new file for instances where we were not
751 # given a file object in the constructor
755 zef_file
= open(self
.filename
, 'rb')
757 # Get info object for name
758 zinfo
= self
.getinfo(name
)
760 filepos
= zef_file
.tell()
762 zef_file
.seek(zinfo
.header_offset
, 0)
764 # Skip the file header:
765 fheader
= zef_file
.read(30)
766 if fheader
[0:4] != stringFileHeader
:
767 raise BadZipfile
, "Bad magic number for file header"
769 fheader
= struct
.unpack(structFileHeader
, fheader
)
770 fname
= zef_file
.read(fheader
[_FH_FILENAME_LENGTH
])
771 if fheader
[_FH_EXTRA_FIELD_LENGTH
]:
772 zef_file
.read(fheader
[_FH_EXTRA_FIELD_LENGTH
])
774 if fname
!= zinfo
.orig_filename
:
776 'File name in directory "%s" and header "%s" differ.' % (
777 zinfo
.orig_filename
, fname
)
779 # check for encrypted flag & handle password
780 is_encrypted
= zinfo
.flag_bits
& 0x1
786 raise RuntimeError, "File %s is encrypted, " \
787 "password required for extraction" % name
789 zd
= _ZipDecrypter(pwd
)
790 # The first 12 bytes in the cypher stream is an encryption header
791 # used to strengthen the algorithm. The first 11 bytes are
792 # completely random, while the 12th contains the MSB of the CRC,
793 # and is used to check the correctness of the password.
794 bytes
= zef_file
.read(12)
795 h
= map(zd
, bytes
[0:12])
796 if ord(h
[11]) != ((zinfo
.CRC
>>24)&255):
797 raise RuntimeError, "Bad password for file %s" % name
799 # build and return a ZipExtFile
801 zef
= ZipExtFile(zef_file
, zinfo
)
803 zef
= ZipExtFile(zef_file
, zinfo
, zd
)
805 # set universal newlines on ZipExtFile if necessary
807 zef
.set_univ_newlines(True)
810 def _writecheck(self
, zinfo
):
811 """Check for errors before writing a file to the archive."""
812 if zinfo
.filename
in self
.NameToInfo
:
813 if self
.debug
: # Warning for duplicate names
814 print "Duplicate name:", zinfo
.filename
815 if self
.mode
not in ("w", "a"):
816 raise RuntimeError, 'write() requires mode "w" or "a"'
818 raise RuntimeError, \
819 "Attempt to write ZIP archive that was already closed"
820 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
821 raise RuntimeError, \
822 "Compression requires the (missing) zlib module"
823 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
824 raise RuntimeError, \
825 "That compression method is not supported"
826 if zinfo
.file_size
> ZIP64_LIMIT
:
827 if not self
._allowZip
64:
828 raise LargeZipFile("Filesize would require ZIP64 extensions")
829 if zinfo
.header_offset
> ZIP64_LIMIT
:
830 if not self
._allowZip
64:
831 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
833 def write(self
, filename
, arcname
=None, compress_type
=None):
834 """Put the bytes from filename into the archive under the name
838 "Attempt to write to ZIP archive that was already closed")
840 st
= os
.stat(filename
)
841 mtime
= time
.localtime(st
.st_mtime
)
842 date_time
= mtime
[0:6]
843 # Create ZipInfo instance to store file information
846 arcname
= os
.path
.normpath(os
.path
.splitdrive(arcname
)[1])
847 while arcname
[0] in (os
.sep
, os
.altsep
):
848 arcname
= arcname
[1:]
849 zinfo
= ZipInfo(arcname
, date_time
)
850 zinfo
.external_attr
= (st
[0] & 0xFFFF) << 16L # Unix attributes
851 if compress_type
is None:
852 zinfo
.compress_type
= self
.compression
854 zinfo
.compress_type
= compress_type
856 zinfo
.file_size
= st
.st_size
857 zinfo
.flag_bits
= 0x00
858 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
860 self
._writecheck
(zinfo
)
861 self
._didModify
= True
862 fp
= open(filename
, "rb")
863 # Must overwrite CRC and sizes with correct data later
865 zinfo
.compress_size
= compress_size
= 0
866 zinfo
.file_size
= file_size
= 0
867 self
.fp
.write(zinfo
.FileHeader())
868 if zinfo
.compress_type
== ZIP_DEFLATED
:
869 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
874 buf
= fp
.read(1024 * 8)
877 file_size
= file_size
+ len(buf
)
878 CRC
= binascii
.crc32(buf
, CRC
)
880 buf
= cmpr
.compress(buf
)
881 compress_size
= compress_size
+ len(buf
)
886 compress_size
= compress_size
+ len(buf
)
888 zinfo
.compress_size
= compress_size
890 zinfo
.compress_size
= file_size
892 zinfo
.file_size
= file_size
893 # Seek backwards and write CRC and file sizes
894 position
= self
.fp
.tell() # Preserve current position in file
895 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
896 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
898 self
.fp
.seek(position
, 0)
899 self
.filelist
.append(zinfo
)
900 self
.NameToInfo
[zinfo
.filename
] = zinfo
902 def writestr(self
, zinfo_or_arcname
, bytes
):
903 """Write a file into the archive. The contents is the string
904 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
905 the name of the file in the archive."""
906 if not isinstance(zinfo_or_arcname
, ZipInfo
):
907 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
908 date_time
=time
.localtime(time
.time()))
909 zinfo
.compress_type
= self
.compression
911 zinfo
= zinfo_or_arcname
915 "Attempt to write to ZIP archive that was already closed")
917 zinfo
.file_size
= len(bytes
) # Uncompressed size
918 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
919 self
._writecheck
(zinfo
)
920 self
._didModify
= True
921 zinfo
.CRC
= binascii
.crc32(bytes
) # CRC-32 checksum
922 if zinfo
.compress_type
== ZIP_DEFLATED
:
923 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
925 bytes
= co
.compress(bytes
) + co
.flush()
926 zinfo
.compress_size
= len(bytes
) # Compressed size
928 zinfo
.compress_size
= zinfo
.file_size
929 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
930 self
.fp
.write(zinfo
.FileHeader())
933 if zinfo
.flag_bits
& 0x08:
934 # Write CRC and file sizes after the file data
935 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
937 self
.filelist
.append(zinfo
)
938 self
.NameToInfo
[zinfo
.filename
] = zinfo
941 """Call the "close()" method in case the user forgot."""
945 """Close the file, and for mode "w" and "a" write the ending
950 if self
.mode
in ("w", "a") and self
._didModify
: # write ending records
952 pos1
= self
.fp
.tell()
953 for zinfo
in self
.filelist
: # write central directory
956 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
957 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
959 if zinfo
.file_size
> ZIP64_LIMIT \
960 or zinfo
.compress_size
> ZIP64_LIMIT
:
961 extra
.append(zinfo
.file_size
)
962 extra
.append(zinfo
.compress_size
)
963 file_size
= 0xffffffff #-1
964 compress_size
= 0xffffffff #-1
966 file_size
= zinfo
.file_size
967 compress_size
= zinfo
.compress_size
969 if zinfo
.header_offset
> ZIP64_LIMIT
:
970 extra
.append(zinfo
.header_offset
)
971 header_offset
= -1 # struct "l" format: 32 one bits
973 header_offset
= zinfo
.header_offset
975 extra_data
= zinfo
.extra
977 # Append a ZIP64 field to the extra's
978 extra_data
= struct
.pack(
979 '<hh' + 'q'*len(extra
),
980 1, 8*len(extra
), *extra
) + extra_data
982 extract_version
= max(45, zinfo
.extract_version
)
983 create_version
= max(45, zinfo
.create_version
)
985 extract_version
= zinfo
.extract_version
986 create_version
= zinfo
.create_version
988 centdir
= struct
.pack(structCentralDir
,
989 stringCentralDir
, create_version
,
990 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
991 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
992 zinfo
.CRC
, compress_size
, file_size
,
993 len(zinfo
.filename
), len(extra_data
), len(zinfo
.comment
),
994 0, zinfo
.internal_attr
, zinfo
.external_attr
,
996 self
.fp
.write(centdir
)
997 self
.fp
.write(zinfo
.filename
)
998 self
.fp
.write(extra_data
)
999 self
.fp
.write(zinfo
.comment
)
1001 pos2
= self
.fp
.tell()
1002 # Write end-of-zip-archive record
1003 if pos1
> ZIP64_LIMIT
:
1004 # Need to write the ZIP64 end-of-archive records
1005 zip64endrec
= struct
.pack(
1006 structEndArchive64
, stringEndArchive64
,
1007 44, 45, 45, 0, 0, count
, count
, pos2
- pos1
, pos1
)
1008 self
.fp
.write(zip64endrec
)
1010 zip64locrec
= struct
.pack(
1011 structEndArchive64Locator
,
1012 stringEndArchive64Locator
, 0, pos2
, 1)
1013 self
.fp
.write(zip64locrec
)
1015 # XXX Why is `pos3` computed next? It's never referenced.
1016 pos3
= self
.fp
.tell()
1017 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
1018 0, 0, count
, count
, pos2
- pos1
, -1, 0)
1019 self
.fp
.write(endrec
)
1022 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
1023 0, 0, count
, count
, pos2
- pos1
, pos1
, 0)
1024 self
.fp
.write(endrec
)
1026 if not self
._filePassed
:
1031 class PyZipFile(ZipFile
):
1032 """Class to create ZIP archives with Python library files and packages."""
1034 def writepy(self
, pathname
, basename
= ""):
1035 """Add all files from "pathname" to the ZIP archive.
1037 If pathname is a package directory, search the directory and
1038 all package subdirectories recursively for all *.py and enter
1039 the modules into the archive. If pathname is a plain
1040 directory, listdir *.py and enter all modules. Else, pathname
1041 must be a Python *.py file and the module will be put into the
1042 archive. Added modules are always module.pyo or module.pyc.
1043 This method will compile the module.py into module.pyc if
1046 dir, name
= os
.path
.split(pathname
)
1047 if os
.path
.isdir(pathname
):
1048 initname
= os
.path
.join(pathname
, "__init__.py")
1049 if os
.path
.isfile(initname
):
1050 # This is a package directory, add it
1052 basename
= "%s/%s" % (basename
, name
)
1056 print "Adding package in", pathname
, "as", basename
1057 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
1059 print "Adding", arcname
1060 self
.write(fname
, arcname
)
1061 dirlist
= os
.listdir(pathname
)
1062 dirlist
.remove("__init__.py")
1063 # Add all *.py files and package subdirectories
1064 for filename
in dirlist
:
1065 path
= os
.path
.join(pathname
, filename
)
1066 root
, ext
= os
.path
.splitext(filename
)
1067 if os
.path
.isdir(path
):
1068 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
1069 # This is a package directory, add it
1070 self
.writepy(path
, basename
) # Recursive call
1072 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1075 print "Adding", arcname
1076 self
.write(fname
, arcname
)
1078 # This is NOT a package directory, add its files at top level
1080 print "Adding files from directory", pathname
1081 for filename
in os
.listdir(pathname
):
1082 path
= os
.path
.join(pathname
, filename
)
1083 root
, ext
= os
.path
.splitext(filename
)
1085 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1088 print "Adding", arcname
1089 self
.write(fname
, arcname
)
1091 if pathname
[-3:] != ".py":
1092 raise RuntimeError, \
1093 'Files added with writepy() must end with ".py"'
1094 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
1096 print "Adding file", arcname
1097 self
.write(fname
, arcname
)
1099 def _get_codename(self
, pathname
, basename
):
1100 """Return (filename, archivename) for the path.
1102 Given a module name path, return the correct file path and
1103 archive name, compiling if necessary. For example, given
1104 /python/lib/string, return (/python/lib/string.pyc, string).
1106 file_py
= pathname
+ ".py"
1107 file_pyc
= pathname
+ ".pyc"
1108 file_pyo
= pathname
+ ".pyo"
1109 if os
.path
.isfile(file_pyo
) and \
1110 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
1111 fname
= file_pyo
# Use .pyo file
1112 elif not os
.path
.isfile(file_pyc
) or \
1113 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
1116 print "Compiling", file_py
1118 py_compile
.compile(file_py
, file_pyc
, None, True)
1119 except py_compile
.PyCompileError
,err
:
1124 archivename
= os
.path
.split(fname
)[1]
1126 archivename
= "%s/%s" % (basename
, archivename
)
1127 return (fname
, archivename
)
1130 def main(args
= None):
1132 USAGE
=textwrap
.dedent("""\
1134 zipfile.py -l zipfile.zip # Show listing of a zipfile
1135 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1136 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1137 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1142 if not args
or args
[0] not in ('-l', '-c', '-e', '-t'):
1150 zf
= ZipFile(args
[1], 'r')
1154 elif args
[0] == '-t':
1158 zf
= ZipFile(args
[1], 'r')
1160 print "Done testing"
1162 elif args
[0] == '-e':
1167 zf
= ZipFile(args
[1], 'r')
1169 for path
in zf
.namelist():
1170 if path
.startswith('./'):
1171 tgt
= os
.path
.join(out
, path
[2:])
1173 tgt
= os
.path
.join(out
, path
)
1175 tgtdir
= os
.path
.dirname(tgt
)
1176 if not os
.path
.exists(tgtdir
):
1178 fp
= open(tgt
, 'wb')
1179 fp
.write(zf
.read(path
))
1183 elif args
[0] == '-c':
1188 def addToZip(zf
, path
, zippath
):
1189 if os
.path
.isfile(path
):
1190 zf
.write(path
, zippath
, ZIP_DEFLATED
)
1191 elif os
.path
.isdir(path
):
1192 for nm
in os
.listdir(path
):
1194 os
.path
.join(path
, nm
), os
.path
.join(zippath
, nm
))
1197 zf
= ZipFile(args
[1], 'w', allowZip64
=True)
1198 for src
in args
[2:]:
1199 addToZip(zf
, src
, os
.path
.basename(src
))
1203 if __name__
== "__main__":