2 Read and write ZIP files.
4 import struct
, os
, time
, sys
, shutil
5 import binascii
, cStringIO
, stat
8 import zlib
# We may need its compression method
12 crc32
= binascii
.crc32
14 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
15 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
17 class BadZipfile(Exception):
21 class LargeZipFile(Exception):
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
27 error
= BadZipfile
# The exception raised by this module
29 ZIP64_LIMIT
= (1 << 31) - 1
30 ZIP_FILECOUNT_LIMIT
= 1 << 16
31 ZIP_MAX_COMMENT
= (1 << 16) - 1
33 # constants for Zip file compression methods
36 # Other ZIP compression methods not supported
38 # Below are some formats and associated data for reading/writing headers using
39 # the struct module. The names and structures of headers/records are those used
40 # in the PKWARE description of the ZIP file format:
41 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42 # (URL valid as of January 2008)
44 # The "end of central directory" structure, magic number, size, and indices
45 # (section V.I in the format document)
46 structEndArchive
= "<4s4H2LH"
47 stringEndArchive
= "PK\005\006"
48 sizeEndCentDir
= struct
.calcsize(structEndArchive
)
53 _ECD_ENTRIES_THIS_DISK
= 3
54 _ECD_ENTRIES_TOTAL
= 4
58 # These last two indices are not part of the structure as defined in the
59 # spec, but they are used internally by this module as a convenience
63 # The "central directory" structure, magic number, size, and indices
64 # of entries in the structure (section V.F in the format document)
65 structCentralDir
= "<4s4B4HL2L5H2L"
66 stringCentralDir
= "PK\001\002"
67 sizeCentralDir
= struct
.calcsize(structCentralDir
)
69 # indexes of entries in the central directory structure
71 _CD_CREATE_VERSION
= 1
73 _CD_EXTRACT_VERSION
= 3
74 _CD_EXTRACT_SYSTEM
= 4
80 _CD_COMPRESSED_SIZE
= 10
81 _CD_UNCOMPRESSED_SIZE
= 11
82 _CD_FILENAME_LENGTH
= 12
83 _CD_EXTRA_FIELD_LENGTH
= 13
84 _CD_COMMENT_LENGTH
= 14
85 _CD_DISK_NUMBER_START
= 15
86 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
87 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
88 _CD_LOCAL_HEADER_OFFSET
= 18
90 # The "local file header" structure, magic number, size, and indices
91 # (section V.A in the format document)
92 structFileHeader
= "<4s2B4HL2L2H"
93 stringFileHeader
= "PK\003\004"
94 sizeFileHeader
= struct
.calcsize(structFileHeader
)
97 _FH_EXTRACT_VERSION
= 1
98 _FH_EXTRACT_SYSTEM
= 2
99 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
100 _FH_COMPRESSION_METHOD
= 4
101 _FH_LAST_MOD_TIME
= 5
102 _FH_LAST_MOD_DATE
= 6
104 _FH_COMPRESSED_SIZE
= 8
105 _FH_UNCOMPRESSED_SIZE
= 9
106 _FH_FILENAME_LENGTH
= 10
107 _FH_EXTRA_FIELD_LENGTH
= 11
109 # The "Zip64 end of central directory locator" structure, magic number, and size
110 structEndArchive64Locator
= "<4sLQL"
111 stringEndArchive64Locator
= "PK\x06\x07"
112 sizeEndCentDir64Locator
= struct
.calcsize(structEndArchive64Locator
)
114 # The "Zip64 end of central directory" record, magic number, size, and indices
115 # (section V.G in the format document)
116 structEndArchive64
= "<4sQ2H2L4Q"
117 stringEndArchive64
= "PK\x06\x06"
118 sizeEndCentDir64
= struct
.calcsize(structEndArchive64
)
121 _CD64_DIRECTORY_RECSIZE
= 1
122 _CD64_CREATE_VERSION
= 2
123 _CD64_EXTRACT_VERSION
= 3
124 _CD64_DISK_NUMBER
= 4
125 _CD64_DISK_NUMBER_START
= 5
126 _CD64_NUMBER_ENTRIES_THIS_DISK
= 6
127 _CD64_NUMBER_ENTRIES_TOTAL
= 7
128 _CD64_DIRECTORY_SIZE
= 8
129 _CD64_OFFSET_START_CENTDIR
= 9
131 def _check_zipfile(fp
):
134 return True # file has correct magic number
139 def is_zipfile(filename
):
140 """Quickly see if a file is a ZIP file by checking the magic number.
142 The filename argument may be a file or file-like object too.
146 if hasattr(filename
, "read"):
147 result
= _check_zipfile(fp
=filename
)
149 with
open(filename
, "rb") as fp
:
150 result
= _check_zipfile(fp
)
155 def _EndRecData64(fpin
, offset
, endrec
):
157 Read the ZIP64 end-of-archive records and use that to update endrec
159 fpin
.seek(offset
- sizeEndCentDir64Locator
, 2)
160 data
= fpin
.read(sizeEndCentDir64Locator
)
161 sig
, diskno
, reloff
, disks
= struct
.unpack(structEndArchive64Locator
, data
)
162 if sig
!= stringEndArchive64Locator
:
165 if diskno
!= 0 or disks
!= 1:
166 raise BadZipfile("zipfiles that span multiple disks are not supported")
168 # Assume no 'zip64 extensible data'
169 fpin
.seek(offset
- sizeEndCentDir64Locator
- sizeEndCentDir64
, 2)
170 data
= fpin
.read(sizeEndCentDir64
)
171 sig
, sz
, create_version
, read_version
, disk_num
, disk_dir
, \
172 dircount
, dircount2
, dirsize
, diroffset
= \
173 struct
.unpack(structEndArchive64
, data
)
174 if sig
!= stringEndArchive64
:
177 # Update the original endrec using data from the ZIP64 record
178 endrec
[_ECD_SIGNATURE
] = sig
179 endrec
[_ECD_DISK_NUMBER
] = disk_num
180 endrec
[_ECD_DISK_START
] = disk_dir
181 endrec
[_ECD_ENTRIES_THIS_DISK
] = dircount
182 endrec
[_ECD_ENTRIES_TOTAL
] = dircount2
183 endrec
[_ECD_SIZE
] = dirsize
184 endrec
[_ECD_OFFSET
] = diroffset
188 def _EndRecData(fpin
):
189 """Return data from the "End of Central Directory" record, or None.
191 The data is a list of the nine items in the ZIP "End of central dir"
192 record followed by a tenth item, the file seek offset of this record."""
194 # Determine file size
196 filesize
= fpin
.tell()
198 # Check to see if this is ZIP file with no archive comment (the
199 # "end of central directory" structure should be the last item in the
200 # file if this is the case).
202 fpin
.seek(-sizeEndCentDir
, 2)
206 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
207 # the signature is correct and there's no comment, unpack structure
208 endrec
= struct
.unpack(structEndArchive
, data
)
211 # Append a blank comment and record start offset
213 endrec
.append(filesize
- sizeEndCentDir
)
215 # Try to read the "Zip64 end of central directory" structure
216 return _EndRecData64(fpin
, -sizeEndCentDir
, endrec
)
218 # Either this is not a ZIP file, or it is a ZIP file with an archive
219 # comment. Search the end of the file for the "end of central directory"
220 # record signature. The comment is the last item in the ZIP file and may be
221 # up to 64K long. It is assumed that the "end of central directory" magic
222 # number does not appear in the comment.
223 maxCommentStart
= max(filesize
- (1 << 16) - sizeEndCentDir
, 0)
224 fpin
.seek(maxCommentStart
, 0)
226 start
= data
.rfind(stringEndArchive
)
228 # found the magic number; attempt to unpack and interpret
229 recData
= data
[start
:start
+sizeEndCentDir
]
230 endrec
= list(struct
.unpack(structEndArchive
, recData
))
231 comment
= data
[start
+sizeEndCentDir
:]
232 # check that comment length is correct
233 if endrec
[_ECD_COMMENT_SIZE
] == len(comment
):
234 # Append the archive comment and start offset
235 endrec
.append(comment
)
236 endrec
.append(maxCommentStart
+ start
)
238 # Try to read the "Zip64 end of central directory" structure
239 return _EndRecData64(fpin
, maxCommentStart
+ start
- filesize
,
242 # Unable to find a valid end of central directory structure
246 class ZipInfo (object):
247 """Class with attributes describing each file in the ZIP archive."""
271 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
272 self
.orig_filename
= filename
# Original file name in archive
274 # Terminate the file name at the first null byte. Null bytes in file
275 # names are used as tricks by viruses in archives.
276 null_byte
= filename
.find(chr(0))
278 filename
= filename
[0:null_byte
]
279 # This is used to ensure paths in generated ZIP files always use
280 # forward slashes as the directory separator, as required by the
281 # ZIP format specification.
282 if os
.sep
!= "/" and os
.sep
in filename
:
283 filename
= filename
.replace(os
.sep
, "/")
285 self
.filename
= filename
# Normalized file name
286 self
.date_time
= date_time
# year, month, day, hour, min, sec
288 self
.compress_type
= ZIP_STORED
# Type of compression for the file
289 self
.comment
= "" # Comment for each file
290 self
.extra
= "" # ZIP extra data
291 if sys
.platform
== 'win32':
292 self
.create_system
= 0 # System which created ZIP archive
294 # Assume everything else is unix-y
295 self
.create_system
= 3 # System which created ZIP archive
296 self
.create_version
= 20 # Version which created ZIP archive
297 self
.extract_version
= 20 # Version needed to extract archive
298 self
.reserved
= 0 # Must be zero
299 self
.flag_bits
= 0 # ZIP flag bits
300 self
.volume
= 0 # Volume number of file header
301 self
.internal_attr
= 0 # Internal attributes
302 self
.external_attr
= 0 # External file attributes
303 # Other attributes are set by class ZipFile:
304 # header_offset Byte offset to the file header
305 # CRC CRC-32 of the uncompressed file
306 # compress_size Size of the compressed file
307 # file_size Size of the uncompressed file
309 def FileHeader(self
):
310 """Return the per-file header as a string."""
312 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
313 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
314 if self
.flag_bits
& 0x08:
315 # Set these to zero because we write them after the file data
316 CRC
= compress_size
= file_size
= 0
319 compress_size
= self
.compress_size
320 file_size
= self
.file_size
324 if file_size
> ZIP64_LIMIT
or compress_size
> ZIP64_LIMIT
:
325 # File is larger than what fits into a 4 byte integer,
326 # fall back to the ZIP64 extension
328 extra
= extra
+ struct
.pack(fmt
,
329 1, struct
.calcsize(fmt
)-4, file_size
, compress_size
)
330 file_size
= 0xffffffff
331 compress_size
= 0xffffffff
332 self
.extract_version
= max(45, self
.extract_version
)
333 self
.create_version
= max(45, self
.extract_version
)
335 filename
, flag_bits
= self
._encodeFilenameFlags
()
336 header
= struct
.pack(structFileHeader
, stringFileHeader
,
337 self
.extract_version
, self
.reserved
, flag_bits
,
338 self
.compress_type
, dostime
, dosdate
, CRC
,
339 compress_size
, file_size
,
340 len(filename
), len(extra
))
341 return header
+ filename
+ extra
343 def _encodeFilenameFlags(self
):
344 if isinstance(self
.filename
, unicode):
346 return self
.filename
.encode('ascii'), self
.flag_bits
347 except UnicodeEncodeError:
348 return self
.filename
.encode('utf-8'), self
.flag_bits |
0x800
350 return self
.filename
, self
.flag_bits
352 def _decodeFilename(self
):
353 if self
.flag_bits
& 0x800:
354 return self
.filename
.decode('utf-8')
358 def _decodeExtra(self
):
359 # Try to decode the extra field.
361 unpack
= struct
.unpack
363 tp
, ln
= unpack('<HH', extra
[:4])
366 counts
= unpack('<QQQ', extra
[4:28])
368 counts
= unpack('<QQ', extra
[4:20])
370 counts
= unpack('<Q', extra
[4:12])
374 raise RuntimeError, "Corrupt extra field %s"%(ln
,)
378 # ZIP64 extension (large files and/or large archives)
379 if self
.file_size
in (0xffffffffffffffffL
, 0xffffffffL
):
380 self
.file_size
= counts
[idx
]
383 if self
.compress_size
== 0xFFFFFFFFL
:
384 self
.compress_size
= counts
[idx
]
387 if self
.header_offset
== 0xffffffffL
:
388 old
= self
.header_offset
389 self
.header_offset
= counts
[idx
]
396 """Class to handle decryption of files stored within a ZIP archive.
398 ZIP supports a password-based form of encryption. Even though known
399 plaintext attacks have been found against it, it is still useful
400 to be able to get data out of such a file.
403 zd = _ZipDecrypter(mypwd)
404 plain_char = zd(cypher_char)
405 plain_text = map(zd, cypher_text)
408 def _GenerateCRCTable():
409 """Generate a CRC-32 table.
411 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
412 internal keys. We noticed that a direct implementation is faster than
413 relying on binascii.crc32().
421 crc
= ((crc
>> 1) & 0x7FFFFFFF) ^ poly
423 crc
= ((crc
>> 1) & 0x7FFFFFFF)
426 crctable
= _GenerateCRCTable()
428 def _crc32(self
, ch
, crc
):
429 """Compute the CRC32 primitive on one byte."""
430 return ((crc
>> 8) & 0xffffff) ^ self
.crctable
[(crc ^
ord(ch
)) & 0xff]
432 def __init__(self
, pwd
):
433 self
.key0
= 305419896
434 self
.key1
= 591751049
435 self
.key2
= 878082192
439 def _UpdateKeys(self
, c
):
440 self
.key0
= self
._crc
32(c
, self
.key0
)
441 self
.key1
= (self
.key1
+ (self
.key0
& 255)) & 4294967295
442 self
.key1
= (self
.key1
* 134775813 + 1) & 4294967295
443 self
.key2
= self
._crc
32(chr((self
.key1
>> 24) & 255), self
.key2
)
445 def __call__(self
, c
):
446 """Decrypt a single character."""
449 c
= c ^
(((k
* (k^
1)) >> 8) & 255)
455 """File-like object for reading an archive member.
456 Is returned by ZipFile.open().
459 def __init__(self
, fileobj
, zipinfo
, decrypt
=None):
460 self
.fileobj
= fileobj
461 self
.decrypter
= decrypt
467 self
.univ_newlines
= False
468 self
.nlSeps
= ("\n", )
469 self
.lastdiscard
= ''
471 self
.compress_type
= zipinfo
.compress_type
472 self
.compress_size
= zipinfo
.compress_size
476 self
.name
= zipinfo
.filename
478 # read from compressed files in 64k blocks
479 self
.compreadsize
= 64*1024
480 if self
.compress_type
== ZIP_DEFLATED
:
481 self
.dc
= zlib
.decompressobj(-15)
483 def set_univ_newlines(self
, univ_newlines
):
484 self
.univ_newlines
= univ_newlines
486 # pick line separator char(s) based on universal newlines flag
487 self
.nlSeps
= ("\n", )
488 if self
.univ_newlines
:
489 self
.nlSeps
= ("\r\n", "\r", "\n")
495 nextline
= self
.readline()
497 raise StopIteration()
504 def _checkfornewline(self
):
507 # ugly check for cases where half of an \r\n pair was
508 # read on the last pass, and the \r was discarded. In this
509 # case we just throw away the \n at the start of the buffer.
510 if (self
.lastdiscard
, self
.linebuffer
[0]) == ('\r','\n'):
511 self
.linebuffer
= self
.linebuffer
[1:]
513 for sep
in self
.nlSeps
:
514 nl
= self
.linebuffer
.find(sep
)
521 def readline(self
, size
= -1):
522 """Read a line with approx. size. If size is negative,
530 # check for a newline already in buffer
531 nl
, nllen
= self
._checkfornewline
()
534 # the next line was already in the buffer
537 # no line break in buffer - try to read more
538 size
-= len(self
.linebuffer
)
539 while nl
< 0 and size
> 0:
540 buf
= self
.read(min(size
, 100))
543 self
.linebuffer
+= buf
546 # check for a newline in buffer
547 nl
, nllen
= self
._checkfornewline
()
549 # we either ran out of bytes in the file, or
550 # met the specified size limit without finding a newline,
551 # so return current buffer
557 buf
= self
.linebuffer
[:nl
]
558 self
.lastdiscard
= self
.linebuffer
[nl
:nl
+ nllen
]
559 self
.linebuffer
= self
.linebuffer
[nl
+ nllen
:]
561 # line is always returned with \n as newline char (except possibly
562 # for a final incomplete line in the file, which is handled above).
565 def readlines(self
, sizehint
= -1):
566 """Return a list with all (following) lines. The sizehint parameter
567 is ignored in this implementation.
571 line
= self
.readline()
576 def read(self
, size
= None):
577 # act like file() obj and return empty string if size is 0
581 # determine read size
582 bytesToRead
= self
.compress_size
- self
.bytes_read
584 # adjust read size for encrypted files since the first 12 bytes
585 # are for the encryption/password information
586 if self
.decrypter
is not None:
589 if size
is not None and size
>= 0:
590 if self
.compress_type
== ZIP_STORED
:
591 lr
= len(self
.readbuffer
)
592 bytesToRead
= min(bytesToRead
, size
- lr
)
593 elif self
.compress_type
== ZIP_DEFLATED
:
594 if len(self
.readbuffer
) > size
:
595 # the user has requested fewer bytes than we've already
596 # pulled through the decompressor; don't read any more
599 # user will use up the buffer, so read some more
600 lr
= len(self
.rawbuffer
)
601 bytesToRead
= min(bytesToRead
, self
.compreadsize
- lr
)
603 # avoid reading past end of file contents
604 if bytesToRead
+ self
.bytes_read
> self
.compress_size
:
605 bytesToRead
= self
.compress_size
- self
.bytes_read
607 # try to read from file (if necessary)
609 bytes
= self
.fileobj
.read(bytesToRead
)
610 self
.bytes_read
+= len(bytes
)
611 self
.rawbuffer
+= bytes
613 # handle contents of raw buffer
615 newdata
= self
.rawbuffer
618 # decrypt new data if we were given an object to handle that
619 if newdata
and self
.decrypter
is not None:
620 newdata
= ''.join(map(self
.decrypter
, newdata
))
622 # decompress newly read data if necessary
623 if newdata
and self
.compress_type
== ZIP_DEFLATED
:
624 newdata
= self
.dc
.decompress(newdata
)
625 self
.rawbuffer
= self
.dc
.unconsumed_tail
626 if self
.eof
and len(self
.rawbuffer
) == 0:
627 # we're out of raw bytes (both from the file and
628 # the local buffer); flush just to make sure the
629 # decompressor is done
630 newdata
+= self
.dc
.flush()
631 # prevent decompressor from being used again
634 self
.readbuffer
+= newdata
637 # return what the user asked for
638 if size
is None or len(self
.readbuffer
) <= size
:
639 bytes
= self
.readbuffer
642 bytes
= self
.readbuffer
[:size
]
643 self
.readbuffer
= self
.readbuffer
[size
:]
649 """ Class with methods to open, read, write, close, list zip files.
651 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
653 file: Either the path to the file, or a file-like object.
654 If it is a path, the file will be opened and closed by ZipFile.
655 mode: The mode can be either read "r", write "w" or append "a".
656 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
657 allowZip64: if True ZipFile will create files with ZIP64 extensions when
658 needed, otherwise it will raise an exception when this would
663 fp
= None # Set here since __del__ checks it
665 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
, allowZip64
=False):
666 """Open the ZIP file with mode read "r", write "w" or append "a"."""
667 if mode
not in ("r", "w", "a"):
668 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
670 if compression
== ZIP_STORED
:
672 elif compression
== ZIP_DEFLATED
:
675 "Compression requires the (missing) zlib module"
677 raise RuntimeError, "That compression method is not supported"
679 self
._allowZip
64 = allowZip64
680 self
._didModify
= False
681 self
.debug
= 0 # Level of printing: 0 through 3
682 self
.NameToInfo
= {} # Find file info given name
683 self
.filelist
= [] # List of ZipInfo instances for archive
684 self
.compression
= compression
# Method of compression
685 self
.mode
= key
= mode
.replace('b', '')[0]
689 # Check if we were passed a file-like object
690 if isinstance(file, basestring
):
693 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
695 self
.fp
= open(file, modeDict
[mode
])
699 self
.fp
= open(file, modeDict
[mode
])
705 self
.filename
= getattr(file, 'name', None)
712 try: # See if file is a zip file
713 self
._RealGetContents
()
714 # seek to start of directory and overwrite
715 self
.fp
.seek(self
.start_dir
, 0)
716 except BadZipfile
: # file is not a zip file, just append
719 if not self
._filePassed
:
722 raise RuntimeError, 'Mode must be "r", "w" or "a"'
727 def __exit__(self
, type, value
, traceback
):
730 def _GetContents(self
):
731 """Read the directory, making sure we close the file if the format
734 self
._RealGetContents
()
736 if not self
._filePassed
:
741 def _RealGetContents(self
):
742 """Read in the table of contents for the ZIP file."""
744 endrec
= _EndRecData(fp
)
746 raise BadZipfile
, "File is not a zip file"
749 size_cd
= endrec
[_ECD_SIZE
] # bytes in central directory
750 offset_cd
= endrec
[_ECD_OFFSET
] # offset of central directory
751 self
.comment
= endrec
[_ECD_COMMENT
] # archive comment
753 # "concat" is zero, unless zip was concatenated to another file
754 concat
= endrec
[_ECD_LOCATION
] - size_cd
- offset_cd
755 if endrec
[_ECD_SIGNATURE
] == stringEndArchive64
:
756 # If Zip64 extension structures are present, account for them
757 concat
-= (sizeEndCentDir64
+ sizeEndCentDir64Locator
)
760 inferred
= concat
+ offset_cd
761 print "given, inferred, offset", offset_cd
, inferred
, concat
762 # self.start_dir: Position of start of central directory
763 self
.start_dir
= offset_cd
+ concat
764 fp
.seek(self
.start_dir
, 0)
765 data
= fp
.read(size_cd
)
766 fp
= cStringIO
.StringIO(data
)
768 while total
< size_cd
:
769 centdir
= fp
.read(sizeCentralDir
)
770 if centdir
[0:4] != stringCentralDir
:
771 raise BadZipfile
, "Bad magic number for central directory"
772 centdir
= struct
.unpack(structCentralDir
, centdir
)
775 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
776 # Create ZipInfo instance to store file information
777 x
= ZipInfo(filename
)
778 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
779 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
780 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
]
781 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
782 x
.flag_bits
, x
.compress_type
, t
, d
,
783 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
784 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
785 # Convert date/time code to (year, month, day, hour, min, sec)
787 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
788 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
791 x
.header_offset
= x
.header_offset
+ concat
792 x
.filename
= x
._decodeFilename
()
793 self
.filelist
.append(x
)
794 self
.NameToInfo
[x
.filename
] = x
796 # update total bytes read from central directory
797 total
= (total
+ sizeCentralDir
+ centdir
[_CD_FILENAME_LENGTH
]
798 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
799 + centdir
[_CD_COMMENT_LENGTH
])
806 """Return a list of file names in the archive."""
808 for data
in self
.filelist
:
809 l
.append(data
.filename
)
813 """Return a list of class ZipInfo instances for files in the
818 """Print a table of contents for the zip file."""
819 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
820 for zinfo
in self
.filelist
:
821 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
[:6]
822 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
825 """Read all the files and check the CRC."""
827 for zinfo
in self
.filelist
:
829 # Read by chunks, to avoid an OverflowError or a
830 # MemoryError with very large embedded files.
831 f
= self
.open(zinfo
.filename
, "r")
832 while f
.read(chunk_size
): # Check CRC-32
835 return zinfo
.filename
837 def getinfo(self
, name
):
838 """Return the instance of ZipInfo given 'name'."""
839 info
= self
.NameToInfo
.get(name
)
842 'There is no item named %r in the archive' % name
)
846 def setpassword(self
, pwd
):
847 """Set default password for encrypted files."""
850 def read(self
, name
, pwd
=None):
851 """Return file bytes (as a string) for name."""
852 return self
.open(name
, "r", pwd
).read()
854 def open(self
, name
, mode
="r", pwd
=None):
855 """Return file-like object for 'name'."""
856 if mode
not in ("r", "U", "rU"):
857 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
859 raise RuntimeError, \
860 "Attempt to read ZIP archive that was already closed"
862 # Only open a new file for instances where we were not
863 # given a file object in the constructor
867 zef_file
= open(self
.filename
, 'rb')
869 # Make sure we have an info object
870 if isinstance(name
, ZipInfo
):
871 # 'name' is already an info object
874 # Get info object for name
875 zinfo
= self
.getinfo(name
)
877 zef_file
.seek(zinfo
.header_offset
, 0)
879 # Skip the file header:
880 fheader
= zef_file
.read(sizeFileHeader
)
881 if fheader
[0:4] != stringFileHeader
:
882 raise BadZipfile
, "Bad magic number for file header"
884 fheader
= struct
.unpack(structFileHeader
, fheader
)
885 fname
= zef_file
.read(fheader
[_FH_FILENAME_LENGTH
])
886 if fheader
[_FH_EXTRA_FIELD_LENGTH
]:
887 zef_file
.read(fheader
[_FH_EXTRA_FIELD_LENGTH
])
889 if fname
!= zinfo
.orig_filename
:
891 'File name in directory "%s" and header "%s" differ.' % (
892 zinfo
.orig_filename
, fname
)
894 # check for encrypted flag & handle password
895 is_encrypted
= zinfo
.flag_bits
& 0x1
901 raise RuntimeError, "File %s is encrypted, " \
902 "password required for extraction" % name
904 zd
= _ZipDecrypter(pwd
)
905 # The first 12 bytes in the cypher stream is an encryption header
906 # used to strengthen the algorithm. The first 11 bytes are
907 # completely random, while the 12th contains the MSB of the CRC,
908 # or the MSB of the file time depending on the header type
909 # and is used to check the correctness of the password.
910 bytes
= zef_file
.read(12)
911 h
= map(zd
, bytes
[0:12])
912 if zinfo
.flag_bits
& 0x8:
913 # compare against the file type from extended local headers
914 check_byte
= (zinfo
._raw
_time
>> 8) & 0xff
916 # compare against the CRC otherwise
917 check_byte
= (zinfo
.CRC
>> 24) & 0xff
918 if ord(h
[11]) != check_byte
:
919 raise RuntimeError("Bad password for file", name
)
921 # build and return a ZipExtFile
923 zef
= ZipExtFile(zef_file
, zinfo
)
925 zef
= ZipExtFile(zef_file
, zinfo
, zd
)
927 # set universal newlines on ZipExtFile if necessary
929 zef
.set_univ_newlines(True)
932 def extract(self
, member
, path
=None, pwd
=None):
933 """Extract a member from the archive to the current working directory,
934 using its full name. Its file information is extracted as accurately
935 as possible. `member' may be a filename or a ZipInfo object. You can
936 specify a different directory using `path'.
938 if not isinstance(member
, ZipInfo
):
939 member
= self
.getinfo(member
)
944 return self
._extract
_member
(member
, path
, pwd
)
946 def extractall(self
, path
=None, members
=None, pwd
=None):
947 """Extract all members from the archive to the current working
948 directory. `path' specifies a different directory to extract to.
949 `members' is optional and must be a subset of the list returned
953 members
= self
.namelist()
955 for zipinfo
in members
:
956 self
.extract(zipinfo
, path
, pwd
)
958 def _extract_member(self
, member
, targetpath
, pwd
):
959 """Extract the ZipInfo object 'member' to a physical
960 file on the path targetpath.
962 # build the destination pathname, replacing
963 # forward slashes to platform specific separators.
964 # Strip trailing path separator, unless it represents the root.
965 if (targetpath
[-1:] in (os
.path
.sep
, os
.path
.altsep
)
966 and len(os
.path
.splitdrive(targetpath
)[1]) > 1):
967 targetpath
= targetpath
[:-1]
969 # don't include leading "/" from file name if present
970 if member
.filename
[0] == '/':
971 targetpath
= os
.path
.join(targetpath
, member
.filename
[1:])
973 targetpath
= os
.path
.join(targetpath
, member
.filename
)
975 targetpath
= os
.path
.normpath(targetpath
)
977 # Create all upper directories if necessary.
978 upperdirs
= os
.path
.dirname(targetpath
)
979 if upperdirs
and not os
.path
.exists(upperdirs
):
980 os
.makedirs(upperdirs
)
982 if member
.filename
[-1] == '/':
983 if not os
.path
.isdir(targetpath
):
987 source
= self
.open(member
, pwd
=pwd
)
988 target
= file(targetpath
, "wb")
989 shutil
.copyfileobj(source
, target
)
995 def _writecheck(self
, zinfo
):
996 """Check for errors before writing a file to the archive."""
997 if zinfo
.filename
in self
.NameToInfo
:
998 if self
.debug
: # Warning for duplicate names
999 print "Duplicate name:", zinfo
.filename
1000 if self
.mode
not in ("w", "a"):
1001 raise RuntimeError, 'write() requires mode "w" or "a"'
1003 raise RuntimeError, \
1004 "Attempt to write ZIP archive that was already closed"
1005 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
1006 raise RuntimeError, \
1007 "Compression requires the (missing) zlib module"
1008 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
1009 raise RuntimeError, \
1010 "That compression method is not supported"
1011 if zinfo
.file_size
> ZIP64_LIMIT
:
1012 if not self
._allowZip
64:
1013 raise LargeZipFile("Filesize would require ZIP64 extensions")
1014 if zinfo
.header_offset
> ZIP64_LIMIT
:
1015 if not self
._allowZip
64:
1016 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1018 def write(self
, filename
, arcname
=None, compress_type
=None):
1019 """Put the bytes from filename into the archive under the name
1023 "Attempt to write to ZIP archive that was already closed")
1025 st
= os
.stat(filename
)
1026 isdir
= stat
.S_ISDIR(st
.st_mode
)
1027 mtime
= time
.localtime(st
.st_mtime
)
1028 date_time
= mtime
[0:6]
1029 # Create ZipInfo instance to store file information
1032 arcname
= os
.path
.normpath(os
.path
.splitdrive(arcname
)[1])
1033 while arcname
[0] in (os
.sep
, os
.altsep
):
1034 arcname
= arcname
[1:]
1037 zinfo
= ZipInfo(arcname
, date_time
)
1038 zinfo
.external_attr
= (st
[0] & 0xFFFF) << 16L # Unix attributes
1039 if compress_type
is None:
1040 zinfo
.compress_type
= self
.compression
1042 zinfo
.compress_type
= compress_type
1044 zinfo
.file_size
= st
.st_size
1045 zinfo
.flag_bits
= 0x00
1046 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1048 self
._writecheck
(zinfo
)
1049 self
._didModify
= True
1053 zinfo
.compress_size
= 0
1055 self
.filelist
.append(zinfo
)
1056 self
.NameToInfo
[zinfo
.filename
] = zinfo
1057 self
.fp
.write(zinfo
.FileHeader())
1060 with
open(filename
, "rb") as fp
:
1061 # Must overwrite CRC and sizes with correct data later
1063 zinfo
.compress_size
= compress_size
= 0
1064 zinfo
.file_size
= file_size
= 0
1065 self
.fp
.write(zinfo
.FileHeader())
1066 if zinfo
.compress_type
== ZIP_DEFLATED
:
1067 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
1072 buf
= fp
.read(1024 * 8)
1075 file_size
= file_size
+ len(buf
)
1076 CRC
= crc32(buf
, CRC
) & 0xffffffff
1078 buf
= cmpr
.compress(buf
)
1079 compress_size
= compress_size
+ len(buf
)
1083 compress_size
= compress_size
+ len(buf
)
1085 zinfo
.compress_size
= compress_size
1087 zinfo
.compress_size
= file_size
1089 zinfo
.file_size
= file_size
1090 # Seek backwards and write CRC and file sizes
1091 position
= self
.fp
.tell() # Preserve current position in file
1092 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
1093 self
.fp
.write(struct
.pack("<LLL", zinfo
.CRC
, zinfo
.compress_size
,
1095 self
.fp
.seek(position
, 0)
1096 self
.filelist
.append(zinfo
)
1097 self
.NameToInfo
[zinfo
.filename
] = zinfo
1099 def writestr(self
, zinfo_or_arcname
, bytes
):
1100 """Write a file into the archive. The contents is the string
1101 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1102 the name of the file in the archive."""
1103 if not isinstance(zinfo_or_arcname
, ZipInfo
):
1104 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
1105 date_time
=time
.localtime(time
.time())[:6])
1106 zinfo
.compress_type
= self
.compression
1107 zinfo
.external_attr
= 0600 << 16
1109 zinfo
= zinfo_or_arcname
1113 "Attempt to write to ZIP archive that was already closed")
1115 zinfo
.file_size
= len(bytes
) # Uncompressed size
1116 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1117 self
._writecheck
(zinfo
)
1118 self
._didModify
= True
1119 zinfo
.CRC
= crc32(bytes
) & 0xffffffff # CRC-32 checksum
1120 if zinfo
.compress_type
== ZIP_DEFLATED
:
1121 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
1123 bytes
= co
.compress(bytes
) + co
.flush()
1124 zinfo
.compress_size
= len(bytes
) # Compressed size
1126 zinfo
.compress_size
= zinfo
.file_size
1127 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1128 self
.fp
.write(zinfo
.FileHeader())
1129 self
.fp
.write(bytes
)
1131 if zinfo
.flag_bits
& 0x08:
1132 # Write CRC and file sizes after the file data
1133 self
.fp
.write(struct
.pack("<LLL", zinfo
.CRC
, zinfo
.compress_size
,
1135 self
.filelist
.append(zinfo
)
1136 self
.NameToInfo
[zinfo
.filename
] = zinfo
1139 """Call the "close()" method in case the user forgot."""
1143 """Close the file, and for mode "w" and "a" write the ending
1148 if self
.mode
in ("w", "a") and self
._didModify
: # write ending records
1150 pos1
= self
.fp
.tell()
1151 for zinfo
in self
.filelist
: # write central directory
1153 dt
= zinfo
.date_time
1154 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
1155 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
1157 if zinfo
.file_size
> ZIP64_LIMIT \
1158 or zinfo
.compress_size
> ZIP64_LIMIT
:
1159 extra
.append(zinfo
.file_size
)
1160 extra
.append(zinfo
.compress_size
)
1161 file_size
= 0xffffffff
1162 compress_size
= 0xffffffff
1164 file_size
= zinfo
.file_size
1165 compress_size
= zinfo
.compress_size
1167 if zinfo
.header_offset
> ZIP64_LIMIT
:
1168 extra
.append(zinfo
.header_offset
)
1169 header_offset
= 0xffffffffL
1171 header_offset
= zinfo
.header_offset
1173 extra_data
= zinfo
.extra
1175 # Append a ZIP64 field to the extra's
1176 extra_data
= struct
.pack(
1177 '<HH' + 'Q'*len(extra
),
1178 1, 8*len(extra
), *extra
) + extra_data
1180 extract_version
= max(45, zinfo
.extract_version
)
1181 create_version
= max(45, zinfo
.create_version
)
1183 extract_version
= zinfo
.extract_version
1184 create_version
= zinfo
.create_version
1187 filename
, flag_bits
= zinfo
._encodeFilenameFlags
()
1188 centdir
= struct
.pack(structCentralDir
,
1189 stringCentralDir
, create_version
,
1190 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
1191 flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
1192 zinfo
.CRC
, compress_size
, file_size
,
1193 len(filename
), len(extra_data
), len(zinfo
.comment
),
1194 0, zinfo
.internal_attr
, zinfo
.external_attr
,
1196 except DeprecationWarning:
1197 print >>sys
.stderr
, (structCentralDir
,
1198 stringCentralDir
, create_version
,
1199 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
1200 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
1201 zinfo
.CRC
, compress_size
, file_size
,
1202 len(zinfo
.filename
), len(extra_data
), len(zinfo
.comment
),
1203 0, zinfo
.internal_attr
, zinfo
.external_attr
,
1206 self
.fp
.write(centdir
)
1207 self
.fp
.write(filename
)
1208 self
.fp
.write(extra_data
)
1209 self
.fp
.write(zinfo
.comment
)
1211 pos2
= self
.fp
.tell()
1212 # Write end-of-zip-archive record
1213 centDirCount
= count
1214 centDirSize
= pos2
- pos1
1215 centDirOffset
= pos1
1216 if (centDirCount
>= ZIP_FILECOUNT_LIMIT
or
1217 centDirOffset
> ZIP64_LIMIT
or
1218 centDirSize
> ZIP64_LIMIT
):
1219 # Need to write the ZIP64 end-of-archive records
1220 zip64endrec
= struct
.pack(
1221 structEndArchive64
, stringEndArchive64
,
1222 44, 45, 45, 0, 0, centDirCount
, centDirCount
,
1223 centDirSize
, centDirOffset
)
1224 self
.fp
.write(zip64endrec
)
1226 zip64locrec
= struct
.pack(
1227 structEndArchive64Locator
,
1228 stringEndArchive64Locator
, 0, pos2
, 1)
1229 self
.fp
.write(zip64locrec
)
1230 centDirCount
= min(centDirCount
, 0xFFFF)
1231 centDirSize
= min(centDirSize
, 0xFFFFFFFF)
1232 centDirOffset
= min(centDirOffset
, 0xFFFFFFFF)
1234 # check for valid comment length
1235 if len(self
.comment
) >= ZIP_MAX_COMMENT
:
1237 msg
= 'Archive comment is too long; truncating to %d bytes' \
1239 self
.comment
= self
.comment
[:ZIP_MAX_COMMENT
]
1241 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
1242 0, 0, centDirCount
, centDirCount
,
1243 centDirSize
, centDirOffset
, len(self
.comment
))
1244 self
.fp
.write(endrec
)
1245 self
.fp
.write(self
.comment
)
1248 if not self
._filePassed
:
1253 class PyZipFile(ZipFile
):
1254 """Class to create ZIP archives with Python library files and packages."""
1256 def writepy(self
, pathname
, basename
= ""):
1257 """Add all files from "pathname" to the ZIP archive.
1259 If pathname is a package directory, search the directory and
1260 all package subdirectories recursively for all *.py and enter
1261 the modules into the archive. If pathname is a plain
1262 directory, listdir *.py and enter all modules. Else, pathname
1263 must be a Python *.py file and the module will be put into the
1264 archive. Added modules are always module.pyo or module.pyc.
1265 This method will compile the module.py into module.pyc if
1268 dir, name
= os
.path
.split(pathname
)
1269 if os
.path
.isdir(pathname
):
1270 initname
= os
.path
.join(pathname
, "__init__.py")
1271 if os
.path
.isfile(initname
):
1272 # This is a package directory, add it
1274 basename
= "%s/%s" % (basename
, name
)
1278 print "Adding package in", pathname
, "as", basename
1279 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
1281 print "Adding", arcname
1282 self
.write(fname
, arcname
)
1283 dirlist
= os
.listdir(pathname
)
1284 dirlist
.remove("__init__.py")
1285 # Add all *.py files and package subdirectories
1286 for filename
in dirlist
:
1287 path
= os
.path
.join(pathname
, filename
)
1288 root
, ext
= os
.path
.splitext(filename
)
1289 if os
.path
.isdir(path
):
1290 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
1291 # This is a package directory, add it
1292 self
.writepy(path
, basename
) # Recursive call
1294 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1297 print "Adding", arcname
1298 self
.write(fname
, arcname
)
1300 # This is NOT a package directory, add its files at top level
1302 print "Adding files from directory", pathname
1303 for filename
in os
.listdir(pathname
):
1304 path
= os
.path
.join(pathname
, filename
)
1305 root
, ext
= os
.path
.splitext(filename
)
1307 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1310 print "Adding", arcname
1311 self
.write(fname
, arcname
)
1313 if pathname
[-3:] != ".py":
1314 raise RuntimeError, \
1315 'Files added with writepy() must end with ".py"'
1316 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
1318 print "Adding file", arcname
1319 self
.write(fname
, arcname
)
1321 def _get_codename(self
, pathname
, basename
):
1322 """Return (filename, archivename) for the path.
1324 Given a module name path, return the correct file path and
1325 archive name, compiling if necessary. For example, given
1326 /python/lib/string, return (/python/lib/string.pyc, string).
1328 file_py
= pathname
+ ".py"
1329 file_pyc
= pathname
+ ".pyc"
1330 file_pyo
= pathname
+ ".pyo"
1331 if os
.path
.isfile(file_pyo
) and \
1332 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
1333 fname
= file_pyo
# Use .pyo file
1334 elif not os
.path
.isfile(file_pyc
) or \
1335 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
1338 print "Compiling", file_py
1340 py_compile
.compile(file_py
, file_pyc
, None, True)
1341 except py_compile
.PyCompileError
,err
:
1346 archivename
= os
.path
.split(fname
)[1]
1348 archivename
= "%s/%s" % (basename
, archivename
)
1349 return (fname
, archivename
)
1352 def main(args
= None):
1354 USAGE
=textwrap
.dedent("""\
1356 zipfile.py -l zipfile.zip # Show listing of a zipfile
1357 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1358 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1359 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1364 if not args
or args
[0] not in ('-l', '-c', '-e', '-t'):
1372 zf
= ZipFile(args
[1], 'r')
1376 elif args
[0] == '-t':
1380 zf
= ZipFile(args
[1], 'r')
1382 print "Done testing"
1384 elif args
[0] == '-e':
1389 zf
= ZipFile(args
[1], 'r')
1391 for path
in zf
.namelist():
1392 if path
.startswith('./'):
1393 tgt
= os
.path
.join(out
, path
[2:])
1395 tgt
= os
.path
.join(out
, path
)
1397 tgtdir
= os
.path
.dirname(tgt
)
1398 if not os
.path
.exists(tgtdir
):
1400 with
open(tgt
, 'wb') as fp
:
1401 fp
.write(zf
.read(path
))
1404 elif args
[0] == '-c':
1409 def addToZip(zf
, path
, zippath
):
1410 if os
.path
.isfile(path
):
1411 zf
.write(path
, zippath
, ZIP_DEFLATED
)
1412 elif os
.path
.isdir(path
):
1413 for nm
in os
.listdir(path
):
1415 os
.path
.join(path
, nm
), os
.path
.join(zippath
, nm
))
1418 zf
= ZipFile(args
[1], 'w', allowZip64
=True)
1419 for src
in args
[2:]:
1420 addToZip(zf
, src
, os
.path
.basename(src
))
1424 if __name__
== "__main__":