2 Read and write ZIP files.
4 import struct
, os
, time
, sys
, shutil
5 import binascii
, cStringIO
8 import zlib
# We may need its compression method
12 crc32
= binascii
.crc32
14 __all__
= ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
15 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
17 class BadZipfile(Exception):
21 class LargeZipFile(Exception):
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
27 error
= BadZipfile
# The exception raised by this module
29 ZIP64_LIMIT
= (1 << 31) - 1
30 ZIP_FILECOUNT_LIMIT
= 1 << 16
31 ZIP_MAX_COMMENT
= (1 << 16) - 1
33 # constants for Zip file compression methods
36 # Other ZIP compression methods not supported
38 # Below are some formats and associated data for reading/writing headers using
39 # the struct module. The names and structures of headers/records are those used
40 # in the PKWARE description of the ZIP file format:
41 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
42 # (URL valid as of January 2008)
44 # The "end of central directory" structure, magic number, size, and indices
45 # (section V.I in the format document)
46 structEndArchive
= "<4s4H2LH"
47 stringEndArchive
= "PK\005\006"
48 sizeEndCentDir
= struct
.calcsize(structEndArchive
)
53 _ECD_ENTRIES_THIS_DISK
= 3
54 _ECD_ENTRIES_TOTAL
= 4
58 # These last two indices are not part of the structure as defined in the
59 # spec, but they are used internally by this module as a convenience
63 # The "central directory" structure, magic number, size, and indices
64 # of entries in the structure (section V.F in the format document)
65 structCentralDir
= "<4s4B4HL2L5H2L"
66 stringCentralDir
= "PK\001\002"
67 sizeCentralDir
= struct
.calcsize(structCentralDir
)
69 # indexes of entries in the central directory structure
71 _CD_CREATE_VERSION
= 1
73 _CD_EXTRACT_VERSION
= 3
74 _CD_EXTRACT_SYSTEM
= 4
80 _CD_COMPRESSED_SIZE
= 10
81 _CD_UNCOMPRESSED_SIZE
= 11
82 _CD_FILENAME_LENGTH
= 12
83 _CD_EXTRA_FIELD_LENGTH
= 13
84 _CD_COMMENT_LENGTH
= 14
85 _CD_DISK_NUMBER_START
= 15
86 _CD_INTERNAL_FILE_ATTRIBUTES
= 16
87 _CD_EXTERNAL_FILE_ATTRIBUTES
= 17
88 _CD_LOCAL_HEADER_OFFSET
= 18
90 # The "local file header" structure, magic number, size, and indices
91 # (section V.A in the format document)
92 structFileHeader
= "<4s2B4HL2L2H"
93 stringFileHeader
= "PK\003\004"
94 sizeFileHeader
= struct
.calcsize(structFileHeader
)
97 _FH_EXTRACT_VERSION
= 1
98 _FH_EXTRACT_SYSTEM
= 2
99 _FH_GENERAL_PURPOSE_FLAG_BITS
= 3
100 _FH_COMPRESSION_METHOD
= 4
101 _FH_LAST_MOD_TIME
= 5
102 _FH_LAST_MOD_DATE
= 6
104 _FH_COMPRESSED_SIZE
= 8
105 _FH_UNCOMPRESSED_SIZE
= 9
106 _FH_FILENAME_LENGTH
= 10
107 _FH_EXTRA_FIELD_LENGTH
= 11
109 # The "Zip64 end of central directory locator" structure, magic number, and size
110 structEndArchive64Locator
= "<4sLQL"
111 stringEndArchive64Locator
= "PK\x06\x07"
112 sizeEndCentDir64Locator
= struct
.calcsize(structEndArchive64Locator
)
114 # The "Zip64 end of central directory" record, magic number, size, and indices
115 # (section V.G in the format document)
116 structEndArchive64
= "<4sQ2H2L4Q"
117 stringEndArchive64
= "PK\x06\x06"
118 sizeEndCentDir64
= struct
.calcsize(structEndArchive64
)
121 _CD64_DIRECTORY_RECSIZE
= 1
122 _CD64_CREATE_VERSION
= 2
123 _CD64_EXTRACT_VERSION
= 3
124 _CD64_DISK_NUMBER
= 4
125 _CD64_DISK_NUMBER_START
= 5
126 _CD64_NUMBER_ENTRIES_THIS_DISK
= 6
127 _CD64_NUMBER_ENTRIES_TOTAL
= 7
128 _CD64_DIRECTORY_SIZE
= 8
129 _CD64_OFFSET_START_CENTDIR
= 9
131 def is_zipfile(filename
):
132 """Quickly see if file is a ZIP file by checking the magic number."""
134 fpin
= open(filename
, "rb")
135 endrec
= _EndRecData(fpin
)
138 return True # file has correct magic number
143 def _EndRecData64(fpin
, offset
, endrec
):
145 Read the ZIP64 end-of-archive records and use that to update endrec
147 fpin
.seek(offset
- sizeEndCentDir64Locator
, 2)
148 data
= fpin
.read(sizeEndCentDir64Locator
)
149 sig
, diskno
, reloff
, disks
= struct
.unpack(structEndArchive64Locator
, data
)
150 if sig
!= stringEndArchive64Locator
:
153 if diskno
!= 0 or disks
!= 1:
154 raise BadZipfile("zipfiles that span multiple disks are not supported")
156 # Assume no 'zip64 extensible data'
157 fpin
.seek(offset
- sizeEndCentDir64Locator
- sizeEndCentDir64
, 2)
158 data
= fpin
.read(sizeEndCentDir64
)
159 sig
, sz
, create_version
, read_version
, disk_num
, disk_dir
, \
160 dircount
, dircount2
, dirsize
, diroffset
= \
161 struct
.unpack(structEndArchive64
, data
)
162 if sig
!= stringEndArchive64
:
165 # Update the original endrec using data from the ZIP64 record
166 endrec
[_ECD_SIGNATURE
] = sig
167 endrec
[_ECD_DISK_NUMBER
] = disk_num
168 endrec
[_ECD_DISK_START
] = disk_dir
169 endrec
[_ECD_ENTRIES_THIS_DISK
] = dircount
170 endrec
[_ECD_ENTRIES_TOTAL
] = dircount2
171 endrec
[_ECD_SIZE
] = dirsize
172 endrec
[_ECD_OFFSET
] = diroffset
176 def _EndRecData(fpin
):
177 """Return data from the "End of Central Directory" record, or None.
179 The data is a list of the nine items in the ZIP "End of central dir"
180 record followed by a tenth item, the file seek offset of this record."""
182 # Determine file size
184 filesize
= fpin
.tell()
186 # Check to see if this is ZIP file with no archive comment (the
187 # "end of central directory" structure should be the last item in the
188 # file if this is the case).
189 fpin
.seek(-sizeEndCentDir
, 2)
191 if data
[0:4] == stringEndArchive
and data
[-2:] == "\000\000":
192 # the signature is correct and there's no comment, unpack structure
193 endrec
= struct
.unpack(structEndArchive
, data
)
196 # Append a blank comment and record start offset
198 endrec
.append(filesize
- sizeEndCentDir
)
199 if endrec
[_ECD_OFFSET
] == 0xffffffff:
200 # the value for the "offset of the start of the central directory"
201 # indicates that there is a "Zip64 end of central directory"
202 # structure present, so go look for it
203 return _EndRecData64(fpin
, -sizeEndCentDir
, endrec
)
207 # Either this is not a ZIP file, or it is a ZIP file with an archive
208 # comment. Search the end of the file for the "end of central directory"
209 # record signature. The comment is the last item in the ZIP file and may be
210 # up to 64K long. It is assumed that the "end of central directory" magic
211 # number does not appear in the comment.
212 maxCommentStart
= max(filesize
- (1 << 16) - sizeEndCentDir
, 0)
213 fpin
.seek(maxCommentStart
, 0)
215 start
= data
.rfind(stringEndArchive
)
217 # found the magic number; attempt to unpack and interpret
218 recData
= data
[start
:start
+sizeEndCentDir
]
219 endrec
= list(struct
.unpack(structEndArchive
, recData
))
220 comment
= data
[start
+sizeEndCentDir
:]
221 # check that comment length is correct
222 if endrec
[_ECD_COMMENT_SIZE
] == len(comment
):
223 # Append the archive comment and start offset
224 endrec
.append(comment
)
225 endrec
.append(maxCommentStart
+ start
)
226 if endrec
[_ECD_OFFSET
] == 0xffffffff:
227 # There is apparently a "Zip64 end of central directory"
228 # structure present, so go look for it
229 return _EndRecData64(fpin
, start
- filesize
, endrec
)
232 # Unable to find a valid end of central directory structure
236 class ZipInfo (object):
237 """Class with attributes describing each file in the ZIP archive."""
261 def __init__(self
, filename
="NoName", date_time
=(1980,1,1,0,0,0)):
262 self
.orig_filename
= filename
# Original file name in archive
264 # Terminate the file name at the first null byte. Null bytes in file
265 # names are used as tricks by viruses in archives.
266 null_byte
= filename
.find(chr(0))
268 filename
= filename
[0:null_byte
]
269 # This is used to ensure paths in generated ZIP files always use
270 # forward slashes as the directory separator, as required by the
271 # ZIP format specification.
272 if os
.sep
!= "/" and os
.sep
in filename
:
273 filename
= filename
.replace(os
.sep
, "/")
275 self
.filename
= filename
# Normalized file name
276 self
.date_time
= date_time
# year, month, day, hour, min, sec
278 self
.compress_type
= ZIP_STORED
# Type of compression for the file
279 self
.comment
= "" # Comment for each file
280 self
.extra
= "" # ZIP extra data
281 if sys
.platform
== 'win32':
282 self
.create_system
= 0 # System which created ZIP archive
284 # Assume everything else is unix-y
285 self
.create_system
= 3 # System which created ZIP archive
286 self
.create_version
= 20 # Version which created ZIP archive
287 self
.extract_version
= 20 # Version needed to extract archive
288 self
.reserved
= 0 # Must be zero
289 self
.flag_bits
= 0 # ZIP flag bits
290 self
.volume
= 0 # Volume number of file header
291 self
.internal_attr
= 0 # Internal attributes
292 self
.external_attr
= 0 # External file attributes
293 # Other attributes are set by class ZipFile:
294 # header_offset Byte offset to the file header
295 # CRC CRC-32 of the uncompressed file
296 # compress_size Size of the compressed file
297 # file_size Size of the uncompressed file
299 def FileHeader(self
):
300 """Return the per-file header as a string."""
302 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
303 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
304 if self
.flag_bits
& 0x08:
305 # Set these to zero because we write them after the file data
306 CRC
= compress_size
= file_size
= 0
309 compress_size
= self
.compress_size
310 file_size
= self
.file_size
314 if file_size
> ZIP64_LIMIT
or compress_size
> ZIP64_LIMIT
:
315 # File is larger than what fits into a 4 byte integer,
316 # fall back to the ZIP64 extension
318 extra
= extra
+ struct
.pack(fmt
,
319 1, struct
.calcsize(fmt
)-4, file_size
, compress_size
)
320 file_size
= 0xffffffff
321 compress_size
= 0xffffffff
322 self
.extract_version
= max(45, self
.extract_version
)
323 self
.create_version
= max(45, self
.extract_version
)
325 filename
, flag_bits
= self
._encodeFilenameFlags
()
326 header
= struct
.pack(structFileHeader
, stringFileHeader
,
327 self
.extract_version
, self
.reserved
, flag_bits
,
328 self
.compress_type
, dostime
, dosdate
, CRC
,
329 compress_size
, file_size
,
330 len(filename
), len(extra
))
331 return header
+ filename
+ extra
333 def _encodeFilenameFlags(self
):
334 if isinstance(self
.filename
, unicode):
336 return self
.filename
.encode('ascii'), self
.flag_bits
337 except UnicodeEncodeError:
338 return self
.filename
.encode('utf-8'), self
.flag_bits |
0x800
340 return self
.filename
, self
.flag_bits
342 def _decodeFilename(self
):
343 if self
.flag_bits
& 0x800:
344 return self
.filename
.decode('utf-8')
348 def _decodeExtra(self
):
349 # Try to decode the extra field.
351 unpack
= struct
.unpack
353 tp
, ln
= unpack('<HH', extra
[:4])
356 counts
= unpack('<QQQ', extra
[4:28])
358 counts
= unpack('<QQ', extra
[4:20])
360 counts
= unpack('<Q', extra
[4:12])
364 raise RuntimeError, "Corrupt extra field %s"%(ln
,)
368 # ZIP64 extension (large files and/or large archives)
369 if self
.file_size
in (0xffffffffffffffffL
, 0xffffffffL
):
370 self
.file_size
= counts
[idx
]
373 if self
.compress_size
== 0xFFFFFFFFL
:
374 self
.compress_size
= counts
[idx
]
377 if self
.header_offset
== 0xffffffffL
:
378 old
= self
.header_offset
379 self
.header_offset
= counts
[idx
]
386 """Class to handle decryption of files stored within a ZIP archive.
388 ZIP supports a password-based form of encryption. Even though known
389 plaintext attacks have been found against it, it is still useful
390 to be able to get data out of such a file.
393 zd = _ZipDecrypter(mypwd)
394 plain_char = zd(cypher_char)
395 plain_text = map(zd, cypher_text)
398 def _GenerateCRCTable():
399 """Generate a CRC-32 table.
401 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
402 internal keys. We noticed that a direct implementation is faster than
403 relying on binascii.crc32().
411 crc
= ((crc
>> 1) & 0x7FFFFFFF) ^ poly
413 crc
= ((crc
>> 1) & 0x7FFFFFFF)
416 crctable
= _GenerateCRCTable()
418 def _crc32(self
, ch
, crc
):
419 """Compute the CRC32 primitive on one byte."""
420 return ((crc
>> 8) & 0xffffff) ^ self
.crctable
[(crc ^
ord(ch
)) & 0xff]
422 def __init__(self
, pwd
):
423 self
.key0
= 305419896
424 self
.key1
= 591751049
425 self
.key2
= 878082192
429 def _UpdateKeys(self
, c
):
430 self
.key0
= self
._crc
32(c
, self
.key0
)
431 self
.key1
= (self
.key1
+ (self
.key0
& 255)) & 4294967295
432 self
.key1
= (self
.key1
* 134775813 + 1) & 4294967295
433 self
.key2
= self
._crc
32(chr((self
.key1
>> 24) & 255), self
.key2
)
435 def __call__(self
, c
):
436 """Decrypt a single character."""
439 c
= c ^
(((k
* (k^
1)) >> 8) & 255)
445 """File-like object for reading an archive member.
446 Is returned by ZipFile.open().
449 def __init__(self
, fileobj
, zipinfo
, decrypt
=None):
450 self
.fileobj
= fileobj
451 self
.decrypter
= decrypt
457 self
.univ_newlines
= False
458 self
.nlSeps
= ("\n", )
459 self
.lastdiscard
= ''
461 self
.compress_type
= zipinfo
.compress_type
462 self
.compress_size
= zipinfo
.compress_size
466 self
.name
= zipinfo
.filename
468 # read from compressed files in 64k blocks
469 self
.compreadsize
= 64*1024
470 if self
.compress_type
== ZIP_DEFLATED
:
471 self
.dc
= zlib
.decompressobj(-15)
473 def set_univ_newlines(self
, univ_newlines
):
474 self
.univ_newlines
= univ_newlines
476 # pick line separator char(s) based on universal newlines flag
477 self
.nlSeps
= ("\n", )
478 if self
.univ_newlines
:
479 self
.nlSeps
= ("\r\n", "\r", "\n")
485 nextline
= self
.readline()
487 raise StopIteration()
494 def _checkfornewline(self
):
497 # ugly check for cases where half of an \r\n pair was
498 # read on the last pass, and the \r was discarded. In this
499 # case we just throw away the \n at the start of the buffer.
500 if (self
.lastdiscard
, self
.linebuffer
[0]) == ('\r','\n'):
501 self
.linebuffer
= self
.linebuffer
[1:]
503 for sep
in self
.nlSeps
:
504 nl
= self
.linebuffer
.find(sep
)
511 def readline(self
, size
= -1):
512 """Read a line with approx. size. If size is negative,
520 # check for a newline already in buffer
521 nl
, nllen
= self
._checkfornewline
()
524 # the next line was already in the buffer
527 # no line break in buffer - try to read more
528 size
-= len(self
.linebuffer
)
529 while nl
< 0 and size
> 0:
530 buf
= self
.read(min(size
, 100))
533 self
.linebuffer
+= buf
536 # check for a newline in buffer
537 nl
, nllen
= self
._checkfornewline
()
539 # we either ran out of bytes in the file, or
540 # met the specified size limit without finding a newline,
541 # so return current buffer
547 buf
= self
.linebuffer
[:nl
]
548 self
.lastdiscard
= self
.linebuffer
[nl
:nl
+ nllen
]
549 self
.linebuffer
= self
.linebuffer
[nl
+ nllen
:]
551 # line is always returned with \n as newline char (except possibly
552 # for a final incomplete line in the file, which is handled above).
555 def readlines(self
, sizehint
= -1):
556 """Return a list with all (following) lines. The sizehint parameter
557 is ignored in this implementation.
561 line
= self
.readline()
566 def read(self
, size
= None):
567 # act like file() obj and return empty string if size is 0
571 # determine read size
572 bytesToRead
= self
.compress_size
- self
.bytes_read
574 # adjust read size for encrypted files since the first 12 bytes
575 # are for the encryption/password information
576 if self
.decrypter
is not None:
579 if size
is not None and size
>= 0:
580 if self
.compress_type
== ZIP_STORED
:
581 lr
= len(self
.readbuffer
)
582 bytesToRead
= min(bytesToRead
, size
- lr
)
583 elif self
.compress_type
== ZIP_DEFLATED
:
584 if len(self
.readbuffer
) > size
:
585 # the user has requested fewer bytes than we've already
586 # pulled through the decompressor; don't read any more
589 # user will use up the buffer, so read some more
590 lr
= len(self
.rawbuffer
)
591 bytesToRead
= min(bytesToRead
, self
.compreadsize
- lr
)
593 # avoid reading past end of file contents
594 if bytesToRead
+ self
.bytes_read
> self
.compress_size
:
595 bytesToRead
= self
.compress_size
- self
.bytes_read
597 # try to read from file (if necessary)
599 bytes
= self
.fileobj
.read(bytesToRead
)
600 self
.bytes_read
+= len(bytes
)
601 self
.rawbuffer
+= bytes
603 # handle contents of raw buffer
605 newdata
= self
.rawbuffer
608 # decrypt new data if we were given an object to handle that
609 if newdata
and self
.decrypter
is not None:
610 newdata
= ''.join(map(self
.decrypter
, newdata
))
612 # decompress newly read data if necessary
613 if newdata
and self
.compress_type
== ZIP_DEFLATED
:
614 newdata
= self
.dc
.decompress(newdata
)
615 self
.rawbuffer
= self
.dc
.unconsumed_tail
616 if self
.eof
and len(self
.rawbuffer
) == 0:
617 # we're out of raw bytes (both from the file and
618 # the local buffer); flush just to make sure the
619 # decompressor is done
620 newdata
+= self
.dc
.flush()
621 # prevent decompressor from being used again
624 self
.readbuffer
+= newdata
627 # return what the user asked for
628 if size
is None or len(self
.readbuffer
) <= size
:
629 bytes
= self
.readbuffer
632 bytes
= self
.readbuffer
[:size
]
633 self
.readbuffer
= self
.readbuffer
[size
:]
639 """ Class with methods to open, read, write, close, list zip files.
641 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
643 file: Either the path to the file, or a file-like object.
644 If it is a path, the file will be opened and closed by ZipFile.
645 mode: The mode can be either read "r", write "w" or append "a".
646 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
647 allowZip64: if True ZipFile will create files with ZIP64 extensions when
648 needed, otherwise it will raise an exception when this would
653 fp
= None # Set here since __del__ checks it
655 def __init__(self
, file, mode
="r", compression
=ZIP_STORED
, allowZip64
=False):
656 """Open the ZIP file with mode read "r", write "w" or append "a"."""
657 if mode
not in ("r", "w", "a"):
658 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
660 if compression
== ZIP_STORED
:
662 elif compression
== ZIP_DEFLATED
:
665 "Compression requires the (missing) zlib module"
667 raise RuntimeError, "That compression method is not supported"
669 self
._allowZip
64 = allowZip64
670 self
._didModify
= False
671 self
.debug
= 0 # Level of printing: 0 through 3
672 self
.NameToInfo
= {} # Find file info given name
673 self
.filelist
= [] # List of ZipInfo instances for archive
674 self
.compression
= compression
# Method of compression
675 self
.mode
= key
= mode
.replace('b', '')[0]
679 # Check if we were passed a file-like object
680 if isinstance(file, basestring
):
683 modeDict
= {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
685 self
.fp
= open(file, modeDict
[mode
])
689 self
.fp
= open(file, modeDict
[mode
])
695 self
.filename
= getattr(file, 'name', None)
702 try: # See if file is a zip file
703 self
._RealGetContents
()
704 # seek to start of directory and overwrite
705 self
.fp
.seek(self
.start_dir
, 0)
706 except BadZipfile
: # file is not a zip file, just append
709 if not self
._filePassed
:
712 raise RuntimeError, 'Mode must be "r", "w" or "a"'
714 def _GetContents(self
):
715 """Read the directory, making sure we close the file if the format
718 self
._RealGetContents
()
720 if not self
._filePassed
:
725 def _RealGetContents(self
):
726 """Read in the table of contents for the ZIP file."""
728 endrec
= _EndRecData(fp
)
730 raise BadZipfile
, "File is not a zip file"
733 size_cd
= endrec
[_ECD_SIZE
] # bytes in central directory
734 offset_cd
= endrec
[_ECD_OFFSET
] # offset of central directory
735 self
.comment
= endrec
[_ECD_COMMENT
] # archive comment
737 # "concat" is zero, unless zip was concatenated to another file
738 concat
= endrec
[_ECD_LOCATION
] - size_cd
- offset_cd
739 if endrec
[_ECD_SIGNATURE
] == stringEndArchive64
:
740 # If Zip64 extension structures are present, account for them
741 concat
-= (sizeEndCentDir64
+ sizeEndCentDir64Locator
)
744 inferred
= concat
+ offset_cd
745 print "given, inferred, offset", offset_cd
, inferred
, concat
746 # self.start_dir: Position of start of central directory
747 self
.start_dir
= offset_cd
+ concat
748 fp
.seek(self
.start_dir
, 0)
749 data
= fp
.read(size_cd
)
750 fp
= cStringIO
.StringIO(data
)
752 while total
< size_cd
:
753 centdir
= fp
.read(sizeCentralDir
)
754 if centdir
[0:4] != stringCentralDir
:
755 raise BadZipfile
, "Bad magic number for central directory"
756 centdir
= struct
.unpack(structCentralDir
, centdir
)
759 filename
= fp
.read(centdir
[_CD_FILENAME_LENGTH
])
760 # Create ZipInfo instance to store file information
761 x
= ZipInfo(filename
)
762 x
.extra
= fp
.read(centdir
[_CD_EXTRA_FIELD_LENGTH
])
763 x
.comment
= fp
.read(centdir
[_CD_COMMENT_LENGTH
])
764 x
.header_offset
= centdir
[_CD_LOCAL_HEADER_OFFSET
]
765 (x
.create_version
, x
.create_system
, x
.extract_version
, x
.reserved
,
766 x
.flag_bits
, x
.compress_type
, t
, d
,
767 x
.CRC
, x
.compress_size
, x
.file_size
) = centdir
[1:12]
768 x
.volume
, x
.internal_attr
, x
.external_attr
= centdir
[15:18]
769 # Convert date/time code to (year, month, day, hour, min, sec)
771 x
.date_time
= ( (d
>>9)+1980, (d
>>5)&0xF, d
&0x1F,
772 t
>>11, (t
>>5)&0x3F, (t
&0x1F) * 2 )
775 x
.header_offset
= x
.header_offset
+ concat
776 x
.filename
= x
._decodeFilename
()
777 self
.filelist
.append(x
)
778 self
.NameToInfo
[x
.filename
] = x
780 # update total bytes read from central directory
781 total
= (total
+ sizeCentralDir
+ centdir
[_CD_FILENAME_LENGTH
]
782 + centdir
[_CD_EXTRA_FIELD_LENGTH
]
783 + centdir
[_CD_COMMENT_LENGTH
])
790 """Return a list of file names in the archive."""
792 for data
in self
.filelist
:
793 l
.append(data
.filename
)
797 """Return a list of class ZipInfo instances for files in the
802 """Print a table of contents for the zip file."""
803 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
804 for zinfo
in self
.filelist
:
805 date
= "%d-%02d-%02d %02d:%02d:%02d" % zinfo
.date_time
[:6]
806 print "%-46s %s %12d" % (zinfo
.filename
, date
, zinfo
.file_size
)
809 """Read all the files and check the CRC."""
811 for zinfo
in self
.filelist
:
813 # Read by chunks, to avoid an OverflowError or a
814 # MemoryError with very large embedded files.
815 f
= self
.open(zinfo
.filename
, "r")
816 while f
.read(chunk_size
): # Check CRC-32
819 return zinfo
.filename
821 def getinfo(self
, name
):
822 """Return the instance of ZipInfo given 'name'."""
823 info
= self
.NameToInfo
.get(name
)
826 'There is no item named %r in the archive' % name
)
830 def setpassword(self
, pwd
):
831 """Set default password for encrypted files."""
834 def read(self
, name
, pwd
=None):
835 """Return file bytes (as a string) for name."""
836 return self
.open(name
, "r", pwd
).read()
838 def open(self
, name
, mode
="r", pwd
=None):
839 """Return file-like object for 'name'."""
840 if mode
not in ("r", "U", "rU"):
841 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
843 raise RuntimeError, \
844 "Attempt to read ZIP archive that was already closed"
846 # Only open a new file for instances where we were not
847 # given a file object in the constructor
851 zef_file
= open(self
.filename
, 'rb')
853 # Make sure we have an info object
854 if isinstance(name
, ZipInfo
):
855 # 'name' is already an info object
858 # Get info object for name
859 zinfo
= self
.getinfo(name
)
861 zef_file
.seek(zinfo
.header_offset
, 0)
863 # Skip the file header:
864 fheader
= zef_file
.read(sizeFileHeader
)
865 if fheader
[0:4] != stringFileHeader
:
866 raise BadZipfile
, "Bad magic number for file header"
868 fheader
= struct
.unpack(structFileHeader
, fheader
)
869 fname
= zef_file
.read(fheader
[_FH_FILENAME_LENGTH
])
870 if fheader
[_FH_EXTRA_FIELD_LENGTH
]:
871 zef_file
.read(fheader
[_FH_EXTRA_FIELD_LENGTH
])
873 if fname
!= zinfo
.orig_filename
:
875 'File name in directory "%s" and header "%s" differ.' % (
876 zinfo
.orig_filename
, fname
)
878 # check for encrypted flag & handle password
879 is_encrypted
= zinfo
.flag_bits
& 0x1
885 raise RuntimeError, "File %s is encrypted, " \
886 "password required for extraction" % name
888 zd
= _ZipDecrypter(pwd
)
889 # The first 12 bytes in the cypher stream is an encryption header
890 # used to strengthen the algorithm. The first 11 bytes are
891 # completely random, while the 12th contains the MSB of the CRC,
892 # or the MSB of the file time depending on the header type
893 # and is used to check the correctness of the password.
894 bytes
= zef_file
.read(12)
895 h
= map(zd
, bytes
[0:12])
896 if zinfo
.flag_bits
& 0x8:
897 # compare against the file type from extended local headers
898 check_byte
= (zinfo
._raw
_time
>> 8) & 0xff
900 # compare against the CRC otherwise
901 check_byte
= (zinfo
.CRC
>> 24) & 0xff
902 if ord(h
[11]) != check_byte
:
903 raise RuntimeError("Bad password for file", name
)
905 # build and return a ZipExtFile
907 zef
= ZipExtFile(zef_file
, zinfo
)
909 zef
= ZipExtFile(zef_file
, zinfo
, zd
)
911 # set universal newlines on ZipExtFile if necessary
913 zef
.set_univ_newlines(True)
916 def extract(self
, member
, path
=None, pwd
=None):
917 """Extract a member from the archive to the current working directory,
918 using its full name. Its file information is extracted as accurately
919 as possible. `member' may be a filename or a ZipInfo object. You can
920 specify a different directory using `path'.
922 if not isinstance(member
, ZipInfo
):
923 member
= self
.getinfo(member
)
928 return self
._extract
_member
(member
, path
, pwd
)
930 def extractall(self
, path
=None, members
=None, pwd
=None):
931 """Extract all members from the archive to the current working
932 directory. `path' specifies a different directory to extract to.
933 `members' is optional and must be a subset of the list returned
937 members
= self
.namelist()
939 for zipinfo
in members
:
940 self
.extract(zipinfo
, path
, pwd
)
942 def _extract_member(self
, member
, targetpath
, pwd
):
943 """Extract the ZipInfo object 'member' to a physical
944 file on the path targetpath.
946 # build the destination pathname, replacing
947 # forward slashes to platform specific separators.
948 if targetpath
[-1:] == "/":
949 targetpath
= targetpath
[:-1]
951 # don't include leading "/" from file name if present
952 if os
.path
.isabs(member
.filename
):
953 targetpath
= os
.path
.join(targetpath
, member
.filename
[1:])
955 targetpath
= os
.path
.join(targetpath
, member
.filename
)
957 targetpath
= os
.path
.normpath(targetpath
)
959 # Create all upper directories if necessary.
960 upperdirs
= os
.path
.dirname(targetpath
)
961 if upperdirs
and not os
.path
.exists(upperdirs
):
962 os
.makedirs(upperdirs
)
964 source
= self
.open(member
, pwd
=pwd
)
965 target
= file(targetpath
, "wb")
966 shutil
.copyfileobj(source
, target
)
972 def _writecheck(self
, zinfo
):
973 """Check for errors before writing a file to the archive."""
974 if zinfo
.filename
in self
.NameToInfo
:
975 if self
.debug
: # Warning for duplicate names
976 print "Duplicate name:", zinfo
.filename
977 if self
.mode
not in ("w", "a"):
978 raise RuntimeError, 'write() requires mode "w" or "a"'
980 raise RuntimeError, \
981 "Attempt to write ZIP archive that was already closed"
982 if zinfo
.compress_type
== ZIP_DEFLATED
and not zlib
:
983 raise RuntimeError, \
984 "Compression requires the (missing) zlib module"
985 if zinfo
.compress_type
not in (ZIP_STORED
, ZIP_DEFLATED
):
986 raise RuntimeError, \
987 "That compression method is not supported"
988 if zinfo
.file_size
> ZIP64_LIMIT
:
989 if not self
._allowZip
64:
990 raise LargeZipFile("Filesize would require ZIP64 extensions")
991 if zinfo
.header_offset
> ZIP64_LIMIT
:
992 if not self
._allowZip
64:
993 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
995 def write(self
, filename
, arcname
=None, compress_type
=None):
996 """Put the bytes from filename into the archive under the name
1000 "Attempt to write to ZIP archive that was already closed")
1002 st
= os
.stat(filename
)
1003 mtime
= time
.localtime(st
.st_mtime
)
1004 date_time
= mtime
[0:6]
1005 # Create ZipInfo instance to store file information
1008 arcname
= os
.path
.normpath(os
.path
.splitdrive(arcname
)[1])
1009 while arcname
[0] in (os
.sep
, os
.altsep
):
1010 arcname
= arcname
[1:]
1011 zinfo
= ZipInfo(arcname
, date_time
)
1012 zinfo
.external_attr
= (st
[0] & 0xFFFF) << 16L # Unix attributes
1013 if compress_type
is None:
1014 zinfo
.compress_type
= self
.compression
1016 zinfo
.compress_type
= compress_type
1018 zinfo
.file_size
= st
.st_size
1019 zinfo
.flag_bits
= 0x00
1020 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1022 self
._writecheck
(zinfo
)
1023 self
._didModify
= True
1024 fp
= open(filename
, "rb")
1025 # Must overwrite CRC and sizes with correct data later
1027 zinfo
.compress_size
= compress_size
= 0
1028 zinfo
.file_size
= file_size
= 0
1029 self
.fp
.write(zinfo
.FileHeader())
1030 if zinfo
.compress_type
== ZIP_DEFLATED
:
1031 cmpr
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
1036 buf
= fp
.read(1024 * 8)
1039 file_size
= file_size
+ len(buf
)
1040 CRC
= crc32(buf
, CRC
) & 0xffffffff
1042 buf
= cmpr
.compress(buf
)
1043 compress_size
= compress_size
+ len(buf
)
1048 compress_size
= compress_size
+ len(buf
)
1050 zinfo
.compress_size
= compress_size
1052 zinfo
.compress_size
= file_size
1054 zinfo
.file_size
= file_size
1055 # Seek backwards and write CRC and file sizes
1056 position
= self
.fp
.tell() # Preserve current position in file
1057 self
.fp
.seek(zinfo
.header_offset
+ 14, 0)
1058 self
.fp
.write(struct
.pack("<LLL", zinfo
.CRC
, zinfo
.compress_size
,
1060 self
.fp
.seek(position
, 0)
1061 self
.filelist
.append(zinfo
)
1062 self
.NameToInfo
[zinfo
.filename
] = zinfo
1064 def writestr(self
, zinfo_or_arcname
, bytes
):
1065 """Write a file into the archive. The contents is the string
1066 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1067 the name of the file in the archive."""
1068 if not isinstance(zinfo_or_arcname
, ZipInfo
):
1069 zinfo
= ZipInfo(filename
=zinfo_or_arcname
,
1070 date_time
=time
.localtime(time
.time())[:6])
1071 zinfo
.compress_type
= self
.compression
1072 zinfo
.external_attr
= 0600 << 16
1074 zinfo
= zinfo_or_arcname
1078 "Attempt to write to ZIP archive that was already closed")
1080 zinfo
.file_size
= len(bytes
) # Uncompressed size
1081 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1082 self
._writecheck
(zinfo
)
1083 self
._didModify
= True
1084 zinfo
.CRC
= crc32(bytes
) & 0xffffffff # CRC-32 checksum
1085 if zinfo
.compress_type
== ZIP_DEFLATED
:
1086 co
= zlib
.compressobj(zlib
.Z_DEFAULT_COMPRESSION
,
1088 bytes
= co
.compress(bytes
) + co
.flush()
1089 zinfo
.compress_size
= len(bytes
) # Compressed size
1091 zinfo
.compress_size
= zinfo
.file_size
1092 zinfo
.header_offset
= self
.fp
.tell() # Start of header bytes
1093 self
.fp
.write(zinfo
.FileHeader())
1094 self
.fp
.write(bytes
)
1096 if zinfo
.flag_bits
& 0x08:
1097 # Write CRC and file sizes after the file data
1098 self
.fp
.write(struct
.pack("<lLL", zinfo
.CRC
, zinfo
.compress_size
,
1100 self
.filelist
.append(zinfo
)
1101 self
.NameToInfo
[zinfo
.filename
] = zinfo
1104 """Call the "close()" method in case the user forgot."""
1108 """Close the file, and for mode "w" and "a" write the ending
1113 if self
.mode
in ("w", "a") and self
._didModify
: # write ending records
1115 pos1
= self
.fp
.tell()
1116 for zinfo
in self
.filelist
: # write central directory
1118 dt
= zinfo
.date_time
1119 dosdate
= (dt
[0] - 1980) << 9 | dt
[1] << 5 | dt
[2]
1120 dostime
= dt
[3] << 11 | dt
[4] << 5 |
(dt
[5] // 2)
1122 if zinfo
.file_size
> ZIP64_LIMIT \
1123 or zinfo
.compress_size
> ZIP64_LIMIT
:
1124 extra
.append(zinfo
.file_size
)
1125 extra
.append(zinfo
.compress_size
)
1126 file_size
= 0xffffffff
1127 compress_size
= 0xffffffff
1129 file_size
= zinfo
.file_size
1130 compress_size
= zinfo
.compress_size
1132 if zinfo
.header_offset
> ZIP64_LIMIT
:
1133 extra
.append(zinfo
.header_offset
)
1134 header_offset
= 0xffffffffL
1136 header_offset
= zinfo
.header_offset
1138 extra_data
= zinfo
.extra
1140 # Append a ZIP64 field to the extra's
1141 extra_data
= struct
.pack(
1142 '<HH' + 'Q'*len(extra
),
1143 1, 8*len(extra
), *extra
) + extra_data
1145 extract_version
= max(45, zinfo
.extract_version
)
1146 create_version
= max(45, zinfo
.create_version
)
1148 extract_version
= zinfo
.extract_version
1149 create_version
= zinfo
.create_version
1152 filename
, flag_bits
= zinfo
._encodeFilenameFlags
()
1153 centdir
= struct
.pack(structCentralDir
,
1154 stringCentralDir
, create_version
,
1155 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
1156 flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
1157 zinfo
.CRC
, compress_size
, file_size
,
1158 len(filename
), len(extra_data
), len(zinfo
.comment
),
1159 0, zinfo
.internal_attr
, zinfo
.external_attr
,
1161 except DeprecationWarning:
1162 print >>sys
.stderr
, (structCentralDir
,
1163 stringCentralDir
, create_version
,
1164 zinfo
.create_system
, extract_version
, zinfo
.reserved
,
1165 zinfo
.flag_bits
, zinfo
.compress_type
, dostime
, dosdate
,
1166 zinfo
.CRC
, compress_size
, file_size
,
1167 len(zinfo
.filename
), len(extra_data
), len(zinfo
.comment
),
1168 0, zinfo
.internal_attr
, zinfo
.external_attr
,
1171 self
.fp
.write(centdir
)
1172 self
.fp
.write(filename
)
1173 self
.fp
.write(extra_data
)
1174 self
.fp
.write(zinfo
.comment
)
1176 pos2
= self
.fp
.tell()
1177 # Write end-of-zip-archive record
1178 centDirOffset
= pos1
1179 if pos1
> ZIP64_LIMIT
:
1180 # Need to write the ZIP64 end-of-archive records
1181 zip64endrec
= struct
.pack(
1182 structEndArchive64
, stringEndArchive64
,
1183 44, 45, 45, 0, 0, count
, count
, pos2
- pos1
, pos1
)
1184 self
.fp
.write(zip64endrec
)
1186 zip64locrec
= struct
.pack(
1187 structEndArchive64Locator
,
1188 stringEndArchive64Locator
, 0, pos2
, 1)
1189 self
.fp
.write(zip64locrec
)
1190 centDirOffset
= 0xFFFFFFFF
1192 # check for valid comment length
1193 if len(self
.comment
) >= ZIP_MAX_COMMENT
:
1195 msg
= 'Archive comment is too long; truncating to %d bytes' \
1197 self
.comment
= self
.comment
[:ZIP_MAX_COMMENT
]
1199 endrec
= struct
.pack(structEndArchive
, stringEndArchive
,
1200 0, 0, count
% ZIP_FILECOUNT_LIMIT
,
1201 count
% ZIP_FILECOUNT_LIMIT
, pos2
- pos1
,
1202 centDirOffset
, len(self
.comment
))
1203 self
.fp
.write(endrec
)
1204 self
.fp
.write(self
.comment
)
1207 if not self
._filePassed
:
1212 class PyZipFile(ZipFile
):
1213 """Class to create ZIP archives with Python library files and packages."""
1215 def writepy(self
, pathname
, basename
= ""):
1216 """Add all files from "pathname" to the ZIP archive.
1218 If pathname is a package directory, search the directory and
1219 all package subdirectories recursively for all *.py and enter
1220 the modules into the archive. If pathname is a plain
1221 directory, listdir *.py and enter all modules. Else, pathname
1222 must be a Python *.py file and the module will be put into the
1223 archive. Added modules are always module.pyo or module.pyc.
1224 This method will compile the module.py into module.pyc if
1227 dir, name
= os
.path
.split(pathname
)
1228 if os
.path
.isdir(pathname
):
1229 initname
= os
.path
.join(pathname
, "__init__.py")
1230 if os
.path
.isfile(initname
):
1231 # This is a package directory, add it
1233 basename
= "%s/%s" % (basename
, name
)
1237 print "Adding package in", pathname
, "as", basename
1238 fname
, arcname
= self
._get
_codename
(initname
[0:-3], basename
)
1240 print "Adding", arcname
1241 self
.write(fname
, arcname
)
1242 dirlist
= os
.listdir(pathname
)
1243 dirlist
.remove("__init__.py")
1244 # Add all *.py files and package subdirectories
1245 for filename
in dirlist
:
1246 path
= os
.path
.join(pathname
, filename
)
1247 root
, ext
= os
.path
.splitext(filename
)
1248 if os
.path
.isdir(path
):
1249 if os
.path
.isfile(os
.path
.join(path
, "__init__.py")):
1250 # This is a package directory, add it
1251 self
.writepy(path
, basename
) # Recursive call
1253 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1256 print "Adding", arcname
1257 self
.write(fname
, arcname
)
1259 # This is NOT a package directory, add its files at top level
1261 print "Adding files from directory", pathname
1262 for filename
in os
.listdir(pathname
):
1263 path
= os
.path
.join(pathname
, filename
)
1264 root
, ext
= os
.path
.splitext(filename
)
1266 fname
, arcname
= self
._get
_codename
(path
[0:-3],
1269 print "Adding", arcname
1270 self
.write(fname
, arcname
)
1272 if pathname
[-3:] != ".py":
1273 raise RuntimeError, \
1274 'Files added with writepy() must end with ".py"'
1275 fname
, arcname
= self
._get
_codename
(pathname
[0:-3], basename
)
1277 print "Adding file", arcname
1278 self
.write(fname
, arcname
)
1280 def _get_codename(self
, pathname
, basename
):
1281 """Return (filename, archivename) for the path.
1283 Given a module name path, return the correct file path and
1284 archive name, compiling if necessary. For example, given
1285 /python/lib/string, return (/python/lib/string.pyc, string).
1287 file_py
= pathname
+ ".py"
1288 file_pyc
= pathname
+ ".pyc"
1289 file_pyo
= pathname
+ ".pyo"
1290 if os
.path
.isfile(file_pyo
) and \
1291 os
.stat(file_pyo
).st_mtime
>= os
.stat(file_py
).st_mtime
:
1292 fname
= file_pyo
# Use .pyo file
1293 elif not os
.path
.isfile(file_pyc
) or \
1294 os
.stat(file_pyc
).st_mtime
< os
.stat(file_py
).st_mtime
:
1297 print "Compiling", file_py
1299 py_compile
.compile(file_py
, file_pyc
, None, True)
1300 except py_compile
.PyCompileError
,err
:
1305 archivename
= os
.path
.split(fname
)[1]
1307 archivename
= "%s/%s" % (basename
, archivename
)
1308 return (fname
, archivename
)
1311 def main(args
= None):
1313 USAGE
=textwrap
.dedent("""\
1315 zipfile.py -l zipfile.zip # Show listing of a zipfile
1316 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1317 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1318 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1323 if not args
or args
[0] not in ('-l', '-c', '-e', '-t'):
1331 zf
= ZipFile(args
[1], 'r')
1335 elif args
[0] == '-t':
1339 zf
= ZipFile(args
[1], 'r')
1341 print "Done testing"
1343 elif args
[0] == '-e':
1348 zf
= ZipFile(args
[1], 'r')
1350 for path
in zf
.namelist():
1351 if path
.startswith('./'):
1352 tgt
= os
.path
.join(out
, path
[2:])
1354 tgt
= os
.path
.join(out
, path
)
1356 tgtdir
= os
.path
.dirname(tgt
)
1357 if not os
.path
.exists(tgtdir
):
1359 fp
= open(tgt
, 'wb')
1360 fp
.write(zf
.read(path
))
1364 elif args
[0] == '-c':
1369 def addToZip(zf
, path
, zippath
):
1370 if os
.path
.isfile(path
):
1371 zf
.write(path
, zippath
, ZIP_DEFLATED
)
1372 elif os
.path
.isdir(path
):
1373 for nm
in os
.listdir(path
):
1375 os
.path
.join(path
, nm
), os
.path
.join(zippath
, nm
))
1378 zf
= ZipFile(args
[1], 'w', allowZip64
=True)
1379 for src
in args
[2:]:
1380 addToZip(zf
, src
, os
.path
.basename(src
))
1384 if __name__
== "__main__":