Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3
   4 XXX references to utf-8 need further investigation.
   5 """
   6 import struct, os, time, sys, shutil
   7 import binascii, io, stat
   8
   9 try:
  10     import zlib # We may need its compression method
  11     crc32 = zlib.crc32
  12 except ImportError:
  13     zlib = None
  14     crc32 = binascii.crc32
  15
  16 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  17            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  18
  19 class BadZipfile(Exception):
  20     pass
  21
  22
  23 class LargeZipFile(Exception):
  24     """
  25     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  26     and those extensions are disabled.
  27     """
  28
  29 error = BadZipfile      # The exception raised by this module
  30
  31 ZIP64_LIMIT = (1 << 31) - 1
  32 ZIP_FILECOUNT_LIMIT = 1 << 16
  33 ZIP_MAX_COMMENT = (1 << 16) - 1
  34
  35 # constants for Zip file compression methods
  36 ZIP_STORED = 0
  37 ZIP_DEFLATED = 8
  38 # Other ZIP compression methods not supported
  39
  40 # Below are some formats and associated data for reading/writing headers using
  41 # the struct module.  The names and structures of headers/records are those used
  42 # in the PKWARE description of the ZIP file format:
  43 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  44 # (URL valid as of January 2008)
  45
  46 # The "end of central directory" structure, magic number, size, and indices
  47 # (section V.I in the format document)
  48 structEndArchive = b"<4s4H2LH"
  49 stringEndArchive = b"PK\005\006"
  50 sizeEndCentDir = struct.calcsize(structEndArchive)
  51
  52 _ECD_SIGNATURE = 0
  53 _ECD_DISK_NUMBER = 1
  54 _ECD_DISK_START = 2
  55 _ECD_ENTRIES_THIS_DISK = 3
  56 _ECD_ENTRIES_TOTAL = 4
  57 _ECD_SIZE = 5
  58 _ECD_OFFSET = 6
  59 _ECD_COMMENT_SIZE = 7
  60 # These last two indices are not part of the structure as defined in the
  61 # spec, but they are used internally by this module as a convenience
  62 _ECD_COMMENT = 8
  63 _ECD_LOCATION = 9
  64
  65 # The "central directory" structure, magic number, size, and indices
  66 # of entries in the structure (section V.F in the format document)
  67 structCentralDir = "<4s4B4HL2L5H2L"
  68 stringCentralDir = b"PK\001\002"
  69 sizeCentralDir = struct.calcsize(structCentralDir)
  70
  71 # indexes of entries in the central directory structure
  72 _CD_SIGNATURE = 0
  73 _CD_CREATE_VERSION = 1
  74 _CD_CREATE_SYSTEM = 2
  75 _CD_EXTRACT_VERSION = 3
  76 _CD_EXTRACT_SYSTEM = 4
  77 _CD_FLAG_BITS = 5
  78 _CD_COMPRESS_TYPE = 6
  79 _CD_TIME = 7
  80 _CD_DATE = 8
  81 _CD_CRC = 9
  82 _CD_COMPRESSED_SIZE = 10
  83 _CD_UNCOMPRESSED_SIZE = 11
  84 _CD_FILENAME_LENGTH = 12
  85 _CD_EXTRA_FIELD_LENGTH = 13
  86 _CD_COMMENT_LENGTH = 14
  87 _CD_DISK_NUMBER_START = 15
  88 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  89 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  90 _CD_LOCAL_HEADER_OFFSET = 18
  91
  92 # The "local file header" structure, magic number, size, and indices
  93 # (section V.A in the format document)
  94 structFileHeader = "<4s2B4HL2L2H"
  95 stringFileHeader = b"PK\003\004"
  96 sizeFileHeader = struct.calcsize(structFileHeader)
  97
  98 _FH_SIGNATURE = 0
  99 _FH_EXTRACT_VERSION = 1
 100 _FH_EXTRACT_SYSTEM = 2
 101 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
 102 _FH_COMPRESSION_METHOD = 4
 103 _FH_LAST_MOD_TIME = 5
 104 _FH_LAST_MOD_DATE = 6
 105 _FH_CRC = 7
 106 _FH_COMPRESSED_SIZE = 8
 107 _FH_UNCOMPRESSED_SIZE = 9
 108 _FH_FILENAME_LENGTH = 10
 109 _FH_EXTRA_FIELD_LENGTH = 11
 110
 111 # The "Zip64 end of central directory locator" structure, magic number, and size
 112 structEndArchive64Locator = "<4sLQL"
 113 stringEndArchive64Locator = b"PK\x06\x07"
 114 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 115
 116 # The "Zip64 end of central directory" record, magic number, size, and indices
 117 # (section V.G in the format document)
 118 structEndArchive64 = "<4sQ2H2L4Q"
 119 stringEndArchive64 = b"PK\x06\x06"
 120 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 121
 122 _CD64_SIGNATURE = 0
 123 _CD64_DIRECTORY_RECSIZE = 1
 124 _CD64_CREATE_VERSION = 2
 125 _CD64_EXTRACT_VERSION = 3
 126 _CD64_DISK_NUMBER = 4
 127 _CD64_DISK_NUMBER_START = 5
 128 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
 129 _CD64_NUMBER_ENTRIES_TOTAL = 7
 130 _CD64_DIRECTORY_SIZE = 8
 131 _CD64_OFFSET_START_CENTDIR = 9
 132
 133 def _check_zipfile(fp):
 134     try:
 135         if _EndRecData(fp):
 136             return True         # file has correct magic number
 137     except IOError:
 138         pass
 139     return False
 140
 141 def is_zipfile(filename):
 142     """Quickly see if a file is a ZIP file by checking the magic number.
 143
 144     The filename argument may be a file or file-like object too.
 145     """
 146     result = False
 147     try:
 148         if hasattr(filename, "read"):
 149             result = _check_zipfile(fp=filename)
 150         else:
 151             with open(filename, "rb") as fp:
 152                 result = _check_zipfile(fp)
 153     except IOError:
 154         pass
 155     return result
 156
 157 def _EndRecData64(fpin, offset, endrec):
 158     """
 159     Read the ZIP64 end-of-archive records and use that to update endrec
 160     """
 161     fpin.seek(offset - sizeEndCentDir64Locator, 2)
 162     data = fpin.read(sizeEndCentDir64Locator)
 163     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 164     if sig != stringEndArchive64Locator:
 165         return endrec
 166
 167     if diskno != 0 or disks != 1:
 168         raise BadZipfile("zipfiles that span multiple disks are not supported")
 169
 170     # Assume no 'zip64 extensible data'
 171     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 172     data = fpin.read(sizeEndCentDir64)
 173     sig, sz, create_version, read_version, disk_num, disk_dir, \
 174             dircount, dircount2, dirsize, diroffset = \
 175             struct.unpack(structEndArchive64, data)
 176     if sig != stringEndArchive64:
 177         return endrec
 178
 179     # Update the original endrec using data from the ZIP64 record
 180     endrec[_ECD_SIGNATURE] = sig
 181     endrec[_ECD_DISK_NUMBER] = disk_num
 182     endrec[_ECD_DISK_START] = disk_dir
 183     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 184     endrec[_ECD_ENTRIES_TOTAL] = dircount2
 185     endrec[_ECD_SIZE] = dirsize
 186     endrec[_ECD_OFFSET] = diroffset
 187     return endrec
 188
 189
 190 def _EndRecData(fpin):
 191     """Return data from the "End of Central Directory" record, or None.
 192
 193     The data is a list of the nine items in the ZIP "End of central dir"
 194     record followed by a tenth item, the file seek offset of this record."""
 195
 196     # Determine file size
 197     fpin.seek(0, 2)
 198     filesize = fpin.tell()
 199
 200     # Check to see if this is ZIP file with no archive comment (the
 201     # "end of central directory" structure should be the last item in the
 202     # file if this is the case).
 203     try:
 204         fpin.seek(-sizeEndCentDir, 2)
 205     except IOError:
 206         return None
 207     data = fpin.read()
 208     if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
 209         # the signature is correct and there's no comment, unpack structure
 210         endrec = struct.unpack(structEndArchive, data)
 211         endrec=list(endrec)
 212
 213         # Append a blank comment and record start offset
 214         endrec.append(b"")
 215         endrec.append(filesize - sizeEndCentDir)
 216
 217         # Try to read the "Zip64 end of central directory" structure
 218         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 219
 220     # Either this is not a ZIP file, or it is a ZIP file with an archive
 221     # comment.  Search the end of the file for the "end of central directory"
 222     # record signature. The comment is the last item in the ZIP file and may be
 223     # up to 64K long.  It is assumed that the "end of central directory" magic
 224     # number does not appear in the comment.
 225     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 226     fpin.seek(maxCommentStart, 0)
 227     data = fpin.read()
 228     start = data.rfind(stringEndArchive)
 229     if start >= 0:
 230         # found the magic number; attempt to unpack and interpret
 231         recData = data[start:start+sizeEndCentDir]
 232         endrec = list(struct.unpack(structEndArchive, recData))
 233         comment = data[start+sizeEndCentDir:]
 234         # check that comment length is correct
 235         if endrec[_ECD_COMMENT_SIZE] == len(comment):
 236             # Append the archive comment and start offset
 237             endrec.append(comment)
 238             endrec.append(maxCommentStart + start)
 239
 240             # Try to read the "Zip64 end of central directory" structure
 241             return _EndRecData64(fpin, maxCommentStart + start - filesize,
 242                                  endrec)
 243
 244     # Unable to find a valid end of central directory structure
 245     return
 246
 247
 248 class ZipInfo (object):
 249     """Class with attributes describing each file in the ZIP archive."""
 250
 251     __slots__ = (
 252             'orig_filename',
 253             'filename',
 254             'date_time',
 255             'compress_type',
 256             'comment',
 257             'extra',
 258             'create_system',
 259             'create_version',
 260             'extract_version',
 261             'reserved',
 262             'flag_bits',
 263             'volume',
 264             'internal_attr',
 265             'external_attr',
 266             'header_offset',
 267             'CRC',
 268             'compress_size',
 269             'file_size',
 270             '_raw_time',
 271         )
 272
 273     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 274         self.orig_filename = filename   # Original file name in archive
 275
 276         # Terminate the file name at the first null byte.  Null bytes in file
 277         # names are used as tricks by viruses in archives.
 278         null_byte = filename.find(chr(0))
 279         if null_byte >= 0:
 280             filename = filename[0:null_byte]
 281         # This is used to ensure paths in generated ZIP files always use
 282         # forward slashes as the directory separator, as required by the
 283         # ZIP format specification.
 284         if os.sep != "/" and os.sep in filename:
 285             filename = filename.replace(os.sep, "/")
 286
 287         self.filename = filename        # Normalized file name
 288         self.date_time = date_time      # year, month, day, hour, min, sec
 289         # Standard values:
 290         self.compress_type = ZIP_STORED # Type of compression for the file
 291         self.comment = b""              # Comment for each file
 292         self.extra = b""                # ZIP extra data
 293         if sys.platform == 'win32':
 294             self.create_system = 0          # System which created ZIP archive
 295         else:
 296             # Assume everything else is unix-y
 297             self.create_system = 3          # System which created ZIP archive
 298         self.create_version = 20        # Version which created ZIP archive
 299         self.extract_version = 20       # Version needed to extract archive
 300         self.reserved = 0               # Must be zero
 301         self.flag_bits = 0              # ZIP flag bits
 302         self.volume = 0                 # Volume number of file header
 303         self.internal_attr = 0          # Internal attributes
 304         self.external_attr = 0          # External file attributes
 305         # Other attributes are set by class ZipFile:
 306         # header_offset         Byte offset to the file header
 307         # CRC                   CRC-32 of the uncompressed file
 308         # compress_size         Size of the compressed file
 309         # file_size             Size of the uncompressed file
 310
 311     def FileHeader(self):
 312         """Return the per-file header as a string."""
 313         dt = self.date_time
 314         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 315         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 316         if self.flag_bits & 0x08:
 317             # Set these to zero because we write them after the file data
 318             CRC = compress_size = file_size = 0
 319         else:
 320             CRC = self.CRC
 321             compress_size = self.compress_size
 322             file_size = self.file_size
 323
 324         extra = self.extra
 325
 326         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 327             # File is larger than what fits into a 4 byte integer,
 328             # fall back to the ZIP64 extension
 329             fmt = '<HHQQ'
 330             extra = extra + struct.pack(fmt,
 331                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 332             file_size = 0xffffffff
 333             compress_size = 0xffffffff
 334             self.extract_version = max(45, self.extract_version)
 335             self.create_version = max(45, self.extract_version)
 336
 337         filename, flag_bits = self._encodeFilenameFlags()
 338         header = struct.pack(structFileHeader, stringFileHeader,
 339                  self.extract_version, self.reserved, flag_bits,
 340                  self.compress_type, dostime, dosdate, CRC,
 341                  compress_size, file_size,
 342                  len(filename), len(extra))
 343         return header + filename + extra
 344
 345     def _encodeFilenameFlags(self):
 346         try:
 347             return self.filename.encode('ascii'), self.flag_bits
 348         except UnicodeEncodeError:
 349             return self.filename.encode('utf-8'), self.flag_bits | 0x800
 350
 351     def _decodeExtra(self):
 352         # Try to decode the extra field.
 353         extra = self.extra
 354         unpack = struct.unpack
 355         while extra:
 356             tp, ln = unpack('<HH', extra[:4])
 357             if tp == 1:
 358                 if ln >= 24:
 359                     counts = unpack('<QQQ', extra[4:28])
 360                 elif ln == 16:
 361                     counts = unpack('<QQ', extra[4:20])
 362                 elif ln == 8:
 363                     counts = unpack('<Q', extra[4:12])
 364                 elif ln == 0:
 365                     counts = ()
 366                 else:
 367                     raise RuntimeError("Corrupt extra field %s"%(ln,))
 368
 369                 idx = 0
 370
 371                 # ZIP64 extension (large files and/or large archives)
 372                 if self.file_size in (0xffffffffffffffff, 0xffffffff):
 373                     self.file_size = counts[idx]
 374                     idx += 1
 375
 376                 if self.compress_size == 0xFFFFFFFF:
 377                     self.compress_size = counts[idx]
 378                     idx += 1
 379
 380                 if self.header_offset == 0xffffffff:
 381                     old = self.header_offset
 382                     self.header_offset = counts[idx]
 383                     idx+=1
 384
 385             extra = extra[ln+4:]
 386
 387
 388 class _ZipDecrypter:
 389     """Class to handle decryption of files stored within a ZIP archive.
 390
 391     ZIP supports a password-based form of encryption. Even though known
 392     plaintext attacks have been found against it, it is still useful
 393     to be able to get data out of such a file.
 394
 395     Usage:
 396         zd = _ZipDecrypter(mypwd)
 397         plain_char = zd(cypher_char)
 398         plain_text = map(zd, cypher_text)
 399     """
 400
 401     def _GenerateCRCTable():
 402         """Generate a CRC-32 table.
 403
 404         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 405         internal keys. We noticed that a direct implementation is faster than
 406         relying on binascii.crc32().
 407         """
 408         poly = 0xedb88320
 409         table = [0] * 256
 410         for i in range(256):
 411             crc = i
 412             for j in range(8):
 413                 if crc & 1:
 414                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 415                 else:
 416                     crc = ((crc >> 1) & 0x7FFFFFFF)
 417             table[i] = crc
 418         return table
 419     crctable = _GenerateCRCTable()
 420
 421     def _crc32(self, ch, crc):
 422         """Compute the CRC32 primitive on one byte."""
 423         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
 424
 425     def __init__(self, pwd):
 426         self.key0 = 305419896
 427         self.key1 = 591751049
 428         self.key2 = 878082192
 429         for p in pwd:
 430             self._UpdateKeys(p)
 431
 432     def _UpdateKeys(self, c):
 433         self.key0 = self._crc32(c, self.key0)
 434         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 435         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 436         self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
 437
 438     def __call__(self, c):
 439         """Decrypt a single character."""
 440         assert isinstance(c, int)
 441         k = self.key2 | 2
 442         c = c ^ (((k * (k^1)) >> 8) & 255)
 443         self._UpdateKeys(c)
 444         return c
 445
 446 class ZipExtFile:
 447     """File-like object for reading an archive member.
 448        Is returned by ZipFile.open().
 449     """
 450
 451     def __init__(self, fileobj, zipinfo, decrypt=None):
 452         self.fileobj = fileobj
 453         self.decrypter = decrypt
 454         self.bytes_read = 0
 455         self.rawbuffer = b''
 456         self.readbuffer = b''
 457         self.linebuffer = b''
 458         self.eof = False
 459         self.univ_newlines = False
 460         self.nlSeps = (b"\n", )
 461         self.lastdiscard = b''
 462
 463         self.compress_type = zipinfo.compress_type
 464         self.compress_size = zipinfo.compress_size
 465
 466         self.closed  = False
 467         self.mode    = "r"
 468         self.name = zipinfo.filename
 469
 470         # read from compressed files in 64k blocks
 471         self.compreadsize = 64*1024
 472         if self.compress_type == ZIP_DEFLATED:
 473             self.dc = zlib.decompressobj(-15)
 474
 475     def set_univ_newlines(self, univ_newlines):
 476         self.univ_newlines = univ_newlines
 477
 478         # pick line separator char(s) based on universal newlines flag
 479         self.nlSeps = (b"\n", )
 480         if self.univ_newlines:
 481             self.nlSeps = (b"\r\n", b"\r", b"\n")
 482
 483     def __iter__(self):
 484         return self
 485
 486     def __next__(self):
 487         nextline = self.readline()
 488         if not nextline:
 489             raise StopIteration()
 490
 491         return nextline
 492
 493     def close(self):
 494         self.closed = True
 495
 496     def _checkfornewline(self):
 497         nl, nllen = -1, -1
 498         if self.linebuffer:
 499             # ugly check for cases where half of an \r\n pair was
 500             # read on the last pass, and the \r was discarded.  In this
 501             # case we just throw away the \n at the start of the buffer.
 502             if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
 503                 self.linebuffer = self.linebuffer[1:]
 504
 505             for sep in self.nlSeps:
 506                 nl = self.linebuffer.find(sep)
 507                 if nl >= 0:
 508                     nllen = len(sep)
 509                     return nl, nllen
 510
 511         return nl, nllen
 512
 513     def readline(self, size = -1):
 514         """Read a line with approx. size. If size is negative,
 515            read a whole line.
 516         """
 517         if size < 0:
 518             size = sys.maxsize
 519         elif size == 0:
 520             return b''
 521
 522         # check for a newline already in buffer
 523         nl, nllen = self._checkfornewline()
 524
 525         if nl >= 0:
 526             # the next line was already in the buffer
 527             nl = min(nl, size)
 528         else:
 529             # no line break in buffer - try to read more
 530             size -= len(self.linebuffer)
 531             while nl < 0 and size > 0:
 532                 buf = self.read(min(size, 100))
 533                 if not buf:
 534                     break
 535                 self.linebuffer += buf
 536                 size -= len(buf)
 537
 538                 # check for a newline in buffer
 539                 nl, nllen = self._checkfornewline()
 540
 541             # we either ran out of bytes in the file, or
 542             # met the specified size limit without finding a newline,
 543             # so return current buffer
 544             if nl < 0:
 545                 s = self.linebuffer
 546                 self.linebuffer = b''
 547                 return s
 548
 549         buf = self.linebuffer[:nl]
 550         self.lastdiscard = self.linebuffer[nl:nl + nllen]
 551         self.linebuffer = self.linebuffer[nl + nllen:]
 552
 553         # line is always returned with \n as newline char (except possibly
 554         # for a final incomplete line in the file, which is handled above).
 555         return buf + b"\n"
 556
 557     def readlines(self, sizehint = -1):
 558         """Return a list with all (following) lines. The sizehint parameter
 559         is ignored in this implementation.
 560         """
 561         result = []
 562         while True:
 563             line = self.readline()
 564             if not line: break
 565             result.append(line)
 566         return result
 567
 568     def read(self, size = None):
 569         # act like file obj and return empty string if size is 0
 570         if size == 0:
 571             return b''
 572
 573         # determine read size
 574         bytesToRead = self.compress_size - self.bytes_read
 575
 576         # adjust read size for encrypted files since the first 12 bytes
 577         # are for the encryption/password information
 578         if self.decrypter is not None:
 579             bytesToRead -= 12
 580
 581         if size is not None and size >= 0:
 582             if self.compress_type == ZIP_STORED:
 583                 lr = len(self.readbuffer)
 584                 bytesToRead = min(bytesToRead, size - lr)
 585             elif self.compress_type == ZIP_DEFLATED:
 586                 if len(self.readbuffer) > size:
 587                     # the user has requested fewer bytes than we've already
 588                     # pulled through the decompressor; don't read any more
 589                     bytesToRead = 0
 590                 else:
 591                     # user will use up the buffer, so read some more
 592                     lr = len(self.rawbuffer)
 593                     bytesToRead = min(bytesToRead, self.compreadsize - lr)
 594
 595         # avoid reading past end of file contents
 596         if bytesToRead + self.bytes_read > self.compress_size:
 597             bytesToRead = self.compress_size - self.bytes_read
 598
 599         # try to read from file (if necessary)
 600         if bytesToRead > 0:
 601             data = self.fileobj.read(bytesToRead)
 602             self.bytes_read += len(data)
 603             try:
 604                 self.rawbuffer += data
 605             except:
 606                 print(repr(self.fileobj), repr(self.rawbuffer),
 607                       repr(data))
 608                 raise
 609
 610             # handle contents of raw buffer
 611             if self.rawbuffer:
 612                 newdata = self.rawbuffer
 613                 self.rawbuffer = b''
 614
 615                 # decrypt new data if we were given an object to handle that
 616                 if newdata and self.decrypter is not None:
 617                     newdata = bytes(map(self.decrypter, newdata))
 618
 619                 # decompress newly read data if necessary
 620                 if newdata and self.compress_type == ZIP_DEFLATED:
 621                     newdata = self.dc.decompress(newdata)
 622                     self.rawbuffer = self.dc.unconsumed_tail
 623                     if self.eof and len(self.rawbuffer) == 0:
 624                         # we're out of raw bytes (both from the file and
 625                         # the local buffer); flush just to make sure the
 626                         # decompressor is done
 627                         newdata += self.dc.flush()
 628                         # prevent decompressor from being used again
 629                         self.dc = None
 630
 631                 self.readbuffer += newdata
 632
 633
 634         # return what the user asked for
 635         if size is None or len(self.readbuffer) <= size:
 636             data = self.readbuffer
 637             self.readbuffer = b''
 638         else:
 639             data = self.readbuffer[:size]
 640             self.readbuffer = self.readbuffer[size:]
 641
 642         return data
 643
 644
 645 class ZipFile:
 646     """ Class with methods to open, read, write, close, list zip files.
 647
 648     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
 649
 650     file: Either the path to the file, or a file-like object.
 651           If it is a path, the file will be opened and closed by ZipFile.
 652     mode: The mode can be either read "r", write "w" or append "a".
 653     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 654     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 655                 needed, otherwise it will raise an exception when this would
 656                 be necessary.
 657
 658     """
 659
 660     fp = None                   # Set here since __del__ checks it
 661
 662     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 663         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 664         if mode not in ("r", "w", "a"):
 665             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 666
 667         if compression == ZIP_STORED:
 668             pass
 669         elif compression == ZIP_DEFLATED:
 670             if not zlib:
 671                 raise RuntimeError(
 672                       "Compression requires the (missing) zlib module")
 673         else:
 674             raise RuntimeError("That compression method is not supported")
 675
 676         self._allowZip64 = allowZip64
 677         self._didModify = False
 678         self.debug = 0  # Level of printing: 0 through 3
 679         self.NameToInfo = {}    # Find file info given name
 680         self.filelist = []      # List of ZipInfo instances for archive
 681         self.compression = compression  # Method of compression
 682         self.mode = key = mode.replace('b', '')[0]
 683         self.pwd = None
 684         self.comment = b''
 685
 686         # Check if we were passed a file-like object
 687         if isinstance(file, str):
 688             # No, it's a filename
 689             self._filePassed = 0
 690             self.filename = file
 691             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 692             try:
 693                 self.fp = io.open(file, modeDict[mode])
 694             except IOError:
 695                 if mode == 'a':
 696                     mode = key = 'w'
 697                     self.fp = io.open(file, modeDict[mode])
 698                 else:
 699                     raise
 700         else:
 701             self._filePassed = 1
 702             self.fp = file
 703             self.filename = getattr(file, 'name', None)
 704
 705         if key == 'r':
 706             self._GetContents()
 707         elif key == 'w':
 708             pass
 709         elif key == 'a':
 710             try:                        # See if file is a zip file
 711                 self._RealGetContents()
 712                 # seek to start of directory and overwrite
 713                 self.fp.seek(self.start_dir, 0)
 714             except BadZipfile:          # file is not a zip file, just append
 715                 self.fp.seek(0, 2)
 716         else:
 717             if not self._filePassed:
 718                 self.fp.close()
 719                 self.fp = None
 720             raise RuntimeError('Mode must be "r", "w" or "a"')
 721
 722     def _GetContents(self):
 723         """Read the directory, making sure we close the file if the format
 724         is bad."""
 725         try:
 726             self._RealGetContents()
 727         except BadZipfile:
 728             if not self._filePassed:
 729                 self.fp.close()
 730                 self.fp = None
 731             raise
 732
 733     def _RealGetContents(self):
 734         """Read in the table of contents for the ZIP file."""
 735         fp = self.fp
 736         endrec = _EndRecData(fp)
 737         if not endrec:
 738             raise BadZipfile("File is not a zip file")
 739         if self.debug > 1:
 740             print(endrec)
 741         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
 742         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
 743         self.comment = endrec[_ECD_COMMENT]     # archive comment
 744
 745         # "concat" is zero, unless zip was concatenated to another file
 746         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
 747         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
 748             # If Zip64 extension structures are present, account for them
 749             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 750
 751         if self.debug > 2:
 752             inferred = concat + offset_cd
 753             print("given, inferred, offset", offset_cd, inferred, concat)
 754         # self.start_dir:  Position of start of central directory
 755         self.start_dir = offset_cd + concat
 756         fp.seek(self.start_dir, 0)
 757         data = fp.read(size_cd)
 758         fp = io.BytesIO(data)
 759         total = 0
 760         while total < size_cd:
 761             centdir = fp.read(sizeCentralDir)
 762             if centdir[0:4] != stringCentralDir:
 763                 raise BadZipfile("Bad magic number for central directory")
 764             centdir = struct.unpack(structCentralDir, centdir)
 765             if self.debug > 2:
 766                 print(centdir)
 767             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 768             flags = centdir[5]
 769             if flags & 0x800:
 770                 # UTF-8 file names extension
 771                 filename = filename.decode('utf-8')
 772             else:
 773                 # Historical ZIP filename encoding
 774                 filename = filename.decode('cp437')
 775             # Create ZipInfo instance to store file information
 776             x = ZipInfo(filename)
 777             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 778             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 779             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 780             (x.create_version, x.create_system, x.extract_version, x.reserved,
 781                 x.flag_bits, x.compress_type, t, d,
 782                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 783             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 784             # Convert date/time code to (year, month, day, hour, min, sec)
 785             x._raw_time = t
 786             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 787                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 788
 789             x._decodeExtra()
 790             x.header_offset = x.header_offset + concat
 791             self.filelist.append(x)
 792             self.NameToInfo[x.filename] = x
 793
 794             # update total bytes read from central directory
 795             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
 796                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 797                      + centdir[_CD_COMMENT_LENGTH])
 798
 799             if self.debug > 2:
 800                 print("total", total)
 801
 802
 803     def namelist(self):
 804         """Return a list of file names in the archive."""
 805         l = []
 806         for data in self.filelist:
 807             l.append(data.filename)
 808         return l
 809
 810     def infolist(self):
 811         """Return a list of class ZipInfo instances for files in the
 812         archive."""
 813         return self.filelist
 814
 815     def printdir(self, file=None):
 816         """Print a table of contents for the zip file."""
 817         print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
 818               file=file)
 819         for zinfo in self.filelist:
 820             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
 821             print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
 822                   file=file)
 823
 824     def testzip(self):
 825         """Read all the files and check the CRC."""
 826         chunk_size = 2 ** 20
 827         for zinfo in self.filelist:
 828             try:
 829                 # Read by chunks, to avoid an OverflowError or a
 830                 # MemoryError with very large embedded files.
 831                 f = self.open(zinfo.filename, "r")
 832                 while f.read(chunk_size):     # Check CRC-32
 833                     pass
 834             except BadZipfile:
 835                 return zinfo.filename
 836
 837     def getinfo(self, name):
 838         """Return the instance of ZipInfo given 'name'."""
 839         info = self.NameToInfo.get(name)
 840         if info is None:
 841             raise KeyError(
 842                 'There is no item named %r in the archive' % name)
 843
 844         return info
 845
 846     def setpassword(self, pwd):
 847         """Set default password for encrypted files."""
 848         assert isinstance(pwd, bytes)
 849         self.pwd = pwd
 850
 851     def read(self, name, pwd=None):
 852         """Return file bytes (as a string) for name."""
 853         return self.open(name, "r", pwd).read()
 854
 855     def open(self, name, mode="r", pwd=None):
 856         """Return file-like object for 'name'."""
 857         if mode not in ("r", "U", "rU"):
 858             raise RuntimeError('open() requires mode "r", "U", or "rU"')
 859         if not self.fp:
 860             raise RuntimeError(
 861                   "Attempt to read ZIP archive that was already closed")
 862
 863         # Only open a new file for instances where we were not
 864         # given a file object in the constructor
 865         if self._filePassed:
 866             zef_file = self.fp
 867         else:
 868             zef_file = io.open(self.filename, 'rb')
 869
 870         # Make sure we have an info object
 871         if isinstance(name, ZipInfo):
 872             # 'name' is already an info object
 873             zinfo = name
 874         else:
 875             # Get info object for name
 876             zinfo = self.getinfo(name)
 877
 878         zef_file.seek(zinfo.header_offset, 0)
 879
 880         # Skip the file header:
 881         fheader = zef_file.read(sizeFileHeader)
 882         if fheader[0:4] != stringFileHeader:
 883             raise BadZipfile("Bad magic number for file header")
 884
 885         fheader = struct.unpack(structFileHeader, fheader)
 886         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 887         if fheader[_FH_EXTRA_FIELD_LENGTH]:
 888             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 889
 890         if fname != zinfo.orig_filename.encode("utf-8"):
 891             raise BadZipfile(
 892                   'File name in directory %r and header %r differ.'
 893                   % (zinfo.orig_filename, fname))
 894
 895         # check for encrypted flag & handle password
 896         is_encrypted = zinfo.flag_bits & 0x1
 897         zd = None
 898         if is_encrypted:
 899             if not pwd:
 900                 pwd = self.pwd
 901             if not pwd:
 902                 raise RuntimeError("File %s is encrypted, "
 903                                    "password required for extraction" % name)
 904
 905             zd = _ZipDecrypter(pwd)
 906             # The first 12 bytes in the cypher stream is an encryption header
 907             #  used to strengthen the algorithm. The first 11 bytes are
 908             #  completely random, while the 12th contains the MSB of the CRC,
 909             #  or the MSB of the file time depending on the header type
 910             #  and is used to check the correctness of the password.
 911             bytes = zef_file.read(12)
 912             h = list(map(zd, bytes[0:12]))
 913             if zinfo.flag_bits & 0x8:
 914                 # compare against the file type from extended local headers
 915                 check_byte = (zinfo._raw_time >> 8) & 0xff
 916             else:
 917                 # compare against the CRC otherwise
 918                 check_byte = (zinfo.CRC >> 24) & 0xff
 919             if h[11] != check_byte:
 920                 raise RuntimeError("Bad password for file", name)
 921
 922         # build and return a ZipExtFile
 923         if zd is None:
 924             zef = ZipExtFile(zef_file, zinfo)
 925         else:
 926             zef = ZipExtFile(zef_file, zinfo, zd)
 927
 928         # set universal newlines on ZipExtFile if necessary
 929         if "U" in mode:
 930             zef.set_univ_newlines(True)
 931         return zef
 932
 933     def extract(self, member, path=None, pwd=None):
 934         """Extract a member from the archive to the current working directory,
 935            using its full name. Its file information is extracted as accurately
 936            as possible. `member' may be a filename or a ZipInfo object. You can
 937            specify a different directory using `path'.
 938         """
 939         if not isinstance(member, ZipInfo):
 940             member = self.getinfo(member)
 941
 942         if path is None:
 943             path = os.getcwd()
 944
 945         return self._extract_member(member, path, pwd)
 946
 947     def extractall(self, path=None, members=None, pwd=None):
 948         """Extract all members from the archive to the current working
 949            directory. `path' specifies a different directory to extract to.
 950            `members' is optional and must be a subset of the list returned
 951            by namelist().
 952         """
 953         if members is None:
 954             members = self.namelist()
 955
 956         for zipinfo in members:
 957             self.extract(zipinfo, path, pwd)
 958
 959     def _extract_member(self, member, targetpath, pwd):
 960         """Extract the ZipInfo object 'member' to a physical
 961            file on the path targetpath.
 962         """
 963         # build the destination pathname, replacing
 964         # forward slashes to platform specific separators.
 965         # Strip trailing path separator, unless it represents the root.
 966         if (targetpath[-1:] in (os.path.sep, os.path.altsep)
 967             and len(os.path.splitdrive(targetpath)[1]) > 1):
 968             targetpath = targetpath[:-1]
 969
 970         # don't include leading "/" from file name if present
 971         if member.filename[0] == '/':
 972             targetpath = os.path.join(targetpath, member.filename[1:])
 973         else:
 974             targetpath = os.path.join(targetpath, member.filename)
 975
 976         targetpath = os.path.normpath(targetpath)
 977
 978         # Create all upper directories if necessary.
 979         upperdirs = os.path.dirname(targetpath)
 980         if upperdirs and not os.path.exists(upperdirs):
 981             os.makedirs(upperdirs)
 982
 983         if member.filename[-1] == '/':
 984             if not os.path.isdir(targetpath):
 985                 os.mkdir(targetpath)
 986             return targetpath
 987
 988         source = self.open(member, pwd=pwd)
 989         target = open(targetpath, "wb")
 990         shutil.copyfileobj(source, target)
 991         source.close()
 992         target.close()
 993
 994         return targetpath
 995
 996     def _writecheck(self, zinfo):
 997         """Check for errors before writing a file to the archive."""
 998         if zinfo.filename in self.NameToInfo:
 999             if self.debug:      # Warning for duplicate names
1000                 print("Duplicate name:", zinfo.filename)
1001         if self.mode not in ("w", "a"):
1002             raise RuntimeError('write() requires mode "w" or "a"')
1003         if not self.fp:
1004             raise RuntimeError(
1005                   "Attempt to write ZIP archive that was already closed")
1006         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1007             raise RuntimeError(
1008                   "Compression requires the (missing) zlib module")
1009         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1010             raise RuntimeError("That compression method is not supported")
1011         if zinfo.file_size > ZIP64_LIMIT:
1012             if not self._allowZip64:
1013                 raise LargeZipFile("Filesize would require ZIP64 extensions")
1014         if zinfo.header_offset > ZIP64_LIMIT:
1015             if not self._allowZip64:
1016                 raise LargeZipFile(
1017                       "Zipfile size would require ZIP64 extensions")
1018
1019     def write(self, filename, arcname=None, compress_type=None):
1020         """Put the bytes from filename into the archive under the name
1021         arcname."""
1022         if not self.fp:
1023             raise RuntimeError(
1024                   "Attempt to write to ZIP archive that was already closed")
1025
1026         st = os.stat(filename)
1027         isdir = stat.S_ISDIR(st.st_mode)
1028         mtime = time.localtime(st.st_mtime)
1029         date_time = mtime[0:6]
1030         # Create ZipInfo instance to store file information
1031         if arcname is None:
1032             arcname = filename
1033         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1034         while arcname[0] in (os.sep, os.altsep):
1035             arcname = arcname[1:]
1036         if isdir:
1037             arcname += '/'
1038         zinfo = ZipInfo(arcname, date_time)
1039         zinfo.external_attr = (st[0] & 0xFFFF) << 16      # Unix attributes
1040         if compress_type is None:
1041             zinfo.compress_type = self.compression
1042         else:
1043             zinfo.compress_type = compress_type
1044
1045         zinfo.file_size = st.st_size
1046         zinfo.flag_bits = 0x00
1047         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1048
1049         self._writecheck(zinfo)
1050         self._didModify = True
1051
1052         if isdir:
1053             zinfo.file_size = 0
1054             zinfo.compress_size = 0
1055             zinfo.CRC = 0
1056             self.filelist.append(zinfo)
1057             self.NameToInfo[zinfo.filename] = zinfo
1058             self.fp.write(zinfo.FileHeader())
1059             return
1060
1061         with open(filename, "rb") as fp:
1062             # Must overwrite CRC and sizes with correct data later
1063             zinfo.CRC = CRC = 0
1064             zinfo.compress_size = compress_size = 0
1065             zinfo.file_size = file_size = 0
1066             self.fp.write(zinfo.FileHeader())
1067             if zinfo.compress_type == ZIP_DEFLATED:
1068                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1069                      zlib.DEFLATED, -15)
1070             else:
1071                 cmpr = None
1072             while 1:
1073                 buf = fp.read(1024 * 8)
1074                 if not buf:
1075                     break
1076                 file_size = file_size + len(buf)
1077                 CRC = crc32(buf, CRC) & 0xffffffff
1078                 if cmpr:
1079                     buf = cmpr.compress(buf)
1080                     compress_size = compress_size + len(buf)
1081                 self.fp.write(buf)
1082         if cmpr:
1083             buf = cmpr.flush()
1084             compress_size = compress_size + len(buf)
1085             self.fp.write(buf)
1086             zinfo.compress_size = compress_size
1087         else:
1088             zinfo.compress_size = file_size
1089         zinfo.CRC = CRC
1090         zinfo.file_size = file_size
1091         # Seek backwards and write CRC and file sizes
1092         position = self.fp.tell()       # Preserve current position in file
1093         self.fp.seek(zinfo.header_offset + 14, 0)
1094         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1095               zinfo.file_size))
1096         self.fp.seek(position, 0)
1097         self.filelist.append(zinfo)
1098         self.NameToInfo[zinfo.filename] = zinfo
1099
1100     def writestr(self, zinfo_or_arcname, data):
1101         """Write a file into the archive.  The contents is 'data', which
1102         may be either a 'str' or a 'bytes' instance; if it is a 'str',
1103         it is encoded as UTF-8 first.
1104         'zinfo_or_arcname' is either a ZipInfo instance or
1105         the name of the file in the archive."""
1106         if isinstance(data, str):
1107             data = data.encode("utf-8")
1108         if not isinstance(zinfo_or_arcname, ZipInfo):
1109             zinfo = ZipInfo(filename=zinfo_or_arcname,
1110                             date_time=time.localtime(time.time())[:6])
1111             zinfo.compress_type = self.compression
1112             zinfo.external_attr = 0o600 << 16
1113         else:
1114             zinfo = zinfo_or_arcname
1115
1116         if not self.fp:
1117             raise RuntimeError(
1118                   "Attempt to write to ZIP archive that was already closed")
1119
1120         zinfo.file_size = len(data)            # Uncompressed size
1121         zinfo.header_offset = self.fp.tell()    # Start of header data
1122         self._writecheck(zinfo)
1123         self._didModify = True
1124         zinfo.CRC = crc32(data) & 0xffffffff       # CRC-32 checksum
1125         if zinfo.compress_type == ZIP_DEFLATED:
1126             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1127                  zlib.DEFLATED, -15)
1128             data = co.compress(data) + co.flush()
1129             zinfo.compress_size = len(data)    # Compressed size
1130         else:
1131             zinfo.compress_size = zinfo.file_size
1132         zinfo.header_offset = self.fp.tell()    # Start of header data
1133         self.fp.write(zinfo.FileHeader())
1134         self.fp.write(data)
1135         self.fp.flush()
1136         if zinfo.flag_bits & 0x08:
1137             # Write CRC and file sizes after the file data
1138             self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1139                   zinfo.file_size))
1140         self.filelist.append(zinfo)
1141         self.NameToInfo[zinfo.filename] = zinfo
1142
1143     def __del__(self):
1144         """Call the "close()" method in case the user forgot."""
1145         self.close()
1146
1147     def close(self):
1148         """Close the file, and for mode "w" and "a" write the ending
1149         records."""
1150         if self.fp is None:
1151             return
1152
1153         if self.mode in ("w", "a") and self._didModify: # write ending records
1154             count = 0
1155             pos1 = self.fp.tell()
1156             for zinfo in self.filelist:         # write central directory
1157                 count = count + 1
1158                 dt = zinfo.date_time
1159                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1160                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1161                 extra = []
1162                 if zinfo.file_size > ZIP64_LIMIT \
1163                         or zinfo.compress_size > ZIP64_LIMIT:
1164                     extra.append(zinfo.file_size)
1165                     extra.append(zinfo.compress_size)
1166                     file_size = 0xffffffff
1167                     compress_size = 0xffffffff
1168                 else:
1169                     file_size = zinfo.file_size
1170                     compress_size = zinfo.compress_size
1171
1172                 if zinfo.header_offset > ZIP64_LIMIT:
1173                     extra.append(zinfo.header_offset)
1174                     header_offset = 0xffffffff
1175                 else:
1176                     header_offset = zinfo.header_offset
1177
1178                 extra_data = zinfo.extra
1179                 if extra:
1180                     # Append a ZIP64 field to the extra's
1181                     extra_data = struct.pack(
1182                             '<HH' + 'Q'*len(extra),
1183                             1, 8*len(extra), *extra) + extra_data
1184
1185                     extract_version = max(45, zinfo.extract_version)
1186                     create_version = max(45, zinfo.create_version)
1187                 else:
1188                     extract_version = zinfo.extract_version
1189                     create_version = zinfo.create_version
1190
1191                 try:
1192                     filename, flag_bits = zinfo._encodeFilenameFlags()
1193                     centdir = struct.pack(structCentralDir,
1194                         stringCentralDir, create_version,
1195                         zinfo.create_system, extract_version, zinfo.reserved,
1196                         flag_bits, zinfo.compress_type, dostime, dosdate,
1197                         zinfo.CRC, compress_size, file_size,
1198                         len(filename), len(extra_data), len(zinfo.comment),
1199                         0, zinfo.internal_attr, zinfo.external_attr,
1200                         header_offset)
1201                 except DeprecationWarning:
1202                     print((structCentralDir, stringCentralDir, create_version,
1203                         zinfo.create_system, extract_version, zinfo.reserved,
1204                         zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1205                         zinfo.CRC, compress_size, file_size,
1206                         len(zinfo.filename), len(extra_data), len(zinfo.comment),
1207                         0, zinfo.internal_attr, zinfo.external_attr,
1208                         header_offset), file=sys.stderr)
1209                     raise
1210                 self.fp.write(centdir)
1211                 self.fp.write(filename)
1212                 self.fp.write(extra_data)
1213                 self.fp.write(zinfo.comment)
1214
1215             pos2 = self.fp.tell()
1216             # Write end-of-zip-archive record
1217             centDirCount = count
1218             centDirSize = pos2 - pos1
1219             centDirOffset = pos1
1220             if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1221                 centDirOffset > ZIP64_LIMIT or
1222                 centDirSize > ZIP64_LIMIT):
1223                 # Need to write the ZIP64 end-of-archive records
1224                 zip64endrec = struct.pack(
1225                         structEndArchive64, stringEndArchive64,
1226                         44, 45, 45, 0, 0, centDirCount, centDirCount,
1227                         centDirSize, centDirOffset)
1228                 self.fp.write(zip64endrec)
1229
1230                 zip64locrec = struct.pack(
1231                         structEndArchive64Locator,
1232                         stringEndArchive64Locator, 0, pos2, 1)
1233                 self.fp.write(zip64locrec)
1234                 centDirCount = min(centDirCount, 0xFFFF)
1235                 centDirSize = min(centDirSize, 0xFFFFFFFF)
1236                 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1237
1238             # check for valid comment length
1239             if len(self.comment) >= ZIP_MAX_COMMENT:
1240                 if self.debug > 0:
1241                     msg = 'Archive comment is too long; truncating to %d bytes' \
1242                           % ZIP_MAX_COMMENT
1243                 self.comment = self.comment[:ZIP_MAX_COMMENT]
1244
1245             endrec = struct.pack(structEndArchive, stringEndArchive,
1246                                  0, 0, centDirCount, centDirCount,
1247                                  centDirSize, centDirOffset, len(self.comment))
1248             self.fp.write(endrec)
1249             self.fp.write(self.comment)
1250             self.fp.flush()
1251
1252         if not self._filePassed:
1253             self.fp.close()
1254         self.fp = None
1255
1256
1257 class PyZipFile(ZipFile):
1258     """Class to create ZIP archives with Python library files and packages."""
1259
1260     def writepy(self, pathname, basename=""):
1261         """Add all files from "pathname" to the ZIP archive.
1262
1263         If pathname is a package directory, search the directory and
1264         all package subdirectories recursively for all *.py and enter
1265         the modules into the archive.  If pathname is a plain
1266         directory, listdir *.py and enter all modules.  Else, pathname
1267         must be a Python *.py file and the module will be put into the
1268         archive.  Added modules are always module.pyo or module.pyc.
1269         This method will compile the module.py into module.pyc if
1270         necessary.
1271         """
1272         dir, name = os.path.split(pathname)
1273         if os.path.isdir(pathname):
1274             initname = os.path.join(pathname, "__init__.py")
1275             if os.path.isfile(initname):
1276                 # This is a package directory, add it
1277                 if basename:
1278                     basename = "%s/%s" % (basename, name)
1279                 else:
1280                     basename = name
1281                 if self.debug:
1282                     print("Adding package in", pathname, "as", basename)
1283                 fname, arcname = self._get_codename(initname[0:-3], basename)
1284                 if self.debug:
1285                     print("Adding", arcname)
1286                 self.write(fname, arcname)
1287                 dirlist = os.listdir(pathname)
1288                 dirlist.remove("__init__.py")
1289                 # Add all *.py files and package subdirectories
1290                 for filename in dirlist:
1291                     path = os.path.join(pathname, filename)
1292                     root, ext = os.path.splitext(filename)
1293                     if os.path.isdir(path):
1294                         if os.path.isfile(os.path.join(path, "__init__.py")):
1295                             # This is a package directory, add it
1296                             self.writepy(path, basename)  # Recursive call
1297                     elif ext == ".py":
1298                         fname, arcname = self._get_codename(path[0:-3],
1299                                          basename)
1300                         if self.debug:
1301                             print("Adding", arcname)
1302                         self.write(fname, arcname)
1303             else:
1304                 # This is NOT a package directory, add its files at top level
1305                 if self.debug:
1306                     print("Adding files from directory", pathname)
1307                 for filename in os.listdir(pathname):
1308                     path = os.path.join(pathname, filename)
1309                     root, ext = os.path.splitext(filename)
1310                     if ext == ".py":
1311                         fname, arcname = self._get_codename(path[0:-3],
1312                                          basename)
1313                         if self.debug:
1314                             print("Adding", arcname)
1315                         self.write(fname, arcname)
1316         else:
1317             if pathname[-3:] != ".py":
1318                 raise RuntimeError(
1319                       'Files added with writepy() must end with ".py"')
1320             fname, arcname = self._get_codename(pathname[0:-3], basename)
1321             if self.debug:
1322                 print("Adding file", arcname)
1323             self.write(fname, arcname)
1324
1325     def _get_codename(self, pathname, basename):
1326         """Return (filename, archivename) for the path.
1327
1328         Given a module name path, return the correct file path and
1329         archive name, compiling if necessary.  For example, given
1330         /python/lib/string, return (/python/lib/string.pyc, string).
1331         """
1332         file_py  = pathname + ".py"
1333         file_pyc = pathname + ".pyc"
1334         file_pyo = pathname + ".pyo"
1335         if os.path.isfile(file_pyo) and \
1336                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1337             fname = file_pyo    # Use .pyo file
1338         elif not os.path.isfile(file_pyc) or \
1339              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1340             import py_compile
1341             if self.debug:
1342                 print("Compiling", file_py)
1343             try:
1344                 py_compile.compile(file_py, file_pyc, None, True)
1345             except py_compile.PyCompileError as err:
1346                 print(err.msg)
1347             fname = file_pyc
1348         else:
1349             fname = file_pyc
1350         archivename = os.path.split(fname)[1]
1351         if basename:
1352             archivename = "%s/%s" % (basename, archivename)
1353         return (fname, archivename)
1354
1355
1356 def main(args = None):
1357     import textwrap
1358     USAGE=textwrap.dedent("""\
1359         Usage:
1360             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1361             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1362             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1363             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1364         """)
1365     if args is None:
1366         args = sys.argv[1:]
1367
1368     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1369         print(USAGE)
1370         sys.exit(1)
1371
1372     if args[0] == '-l':
1373         if len(args) != 2:
1374             print(USAGE)
1375             sys.exit(1)
1376         zf = ZipFile(args[1], 'r')
1377         zf.printdir()
1378         zf.close()
1379
1380     elif args[0] == '-t':
1381         if len(args) != 2:
1382             print(USAGE)
1383             sys.exit(1)
1384         zf = ZipFile(args[1], 'r')
1385         zf.testzip()
1386         print("Done testing")
1387
1388     elif args[0] == '-e':
1389         if len(args) != 3:
1390             print(USAGE)
1391             sys.exit(1)
1392
1393         zf = ZipFile(args[1], 'r')
1394         out = args[2]
1395         for path in zf.namelist():
1396             if path.startswith('./'):
1397                 tgt = os.path.join(out, path[2:])
1398             else:
1399                 tgt = os.path.join(out, path)
1400
1401             tgtdir = os.path.dirname(tgt)
1402             if not os.path.exists(tgtdir):
1403                 os.makedirs(tgtdir)
1404             with open(tgt, 'wb') as fp:
1405                 fp.write(zf.read(path))
1406         zf.close()
1407
1408     elif args[0] == '-c':
1409         if len(args) < 3:
1410             print(USAGE)
1411             sys.exit(1)
1412
1413         def addToZip(zf, path, zippath):
1414             if os.path.isfile(path):
1415                 zf.write(path, zippath, ZIP_DEFLATED)
1416             elif os.path.isdir(path):
1417                 for nm in os.listdir(path):
1418                     addToZip(zf,
1419                             os.path.join(path, nm), os.path.join(zippath, nm))
1420             # else: ignore
1421
1422         zf = ZipFile(args[1], 'w', allowZip64=True)
1423         for src in args[2:]:
1424             addToZip(zf, src, os.path.basename(src))
1425
1426         zf.close()
1427
1428 if __name__ == "__main__":
1429     main()