Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3 """
   4 import struct, os, time, sys
   5 import binascii, cStringIO
   6
   7 try:
   8     import zlib # We may need its compression method
   9 except ImportError:
  10     zlib = None
  11
  12 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  13            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  14
  15 class BadZipfile(Exception):
  16     pass
  17
  18
  19 class LargeZipFile(Exception):
  20     """
  21     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  22     and those extensions are disabled.
  23     """
  24
  25 error = BadZipfile      # The exception raised by this module
  26
  27 ZIP64_LIMIT= (1 << 31) - 1
  28
  29 # constants for Zip file compression methods
  30 ZIP_STORED = 0
  31 ZIP_DEFLATED = 8
  32 # Other ZIP compression methods not supported
  33
  34 # Here are some struct module formats for reading headers
  35 structEndArchive = "<4s4H2lH"     # 9 items, end of archive, 22 bytes
  36 stringEndArchive = "PK\005\006"   # magic number for end of archive record
  37 structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
  38 stringCentralDir = "PK\001\002"   # magic number for central directory
  39 structFileHeader = "<4s2B4HlLL2H"  # 12 items, file header record, 30 bytes
  40 stringFileHeader = "PK\003\004"   # magic number for file header
  41 structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
  42 stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
  43 structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
  44 stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
  45
  46
  47 # indexes of entries in the central directory structure
  48 _CD_SIGNATURE = 0
  49 _CD_CREATE_VERSION = 1
  50 _CD_CREATE_SYSTEM = 2
  51 _CD_EXTRACT_VERSION = 3
  52 _CD_EXTRACT_SYSTEM = 4                  # is this meaningful?
  53 _CD_FLAG_BITS = 5
  54 _CD_COMPRESS_TYPE = 6
  55 _CD_TIME = 7
  56 _CD_DATE = 8
  57 _CD_CRC = 9
  58 _CD_COMPRESSED_SIZE = 10
  59 _CD_UNCOMPRESSED_SIZE = 11
  60 _CD_FILENAME_LENGTH = 12
  61 _CD_EXTRA_FIELD_LENGTH = 13
  62 _CD_COMMENT_LENGTH = 14
  63 _CD_DISK_NUMBER_START = 15
  64 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  65 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  66 _CD_LOCAL_HEADER_OFFSET = 18
  67
  68 # indexes of entries in the local file header structure
  69 _FH_SIGNATURE = 0
  70 _FH_EXTRACT_VERSION = 1
  71 _FH_EXTRACT_SYSTEM = 2                  # is this meaningful?
  72 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
  73 _FH_COMPRESSION_METHOD = 4
  74 _FH_LAST_MOD_TIME = 5
  75 _FH_LAST_MOD_DATE = 6
  76 _FH_CRC = 7
  77 _FH_COMPRESSED_SIZE = 8
  78 _FH_UNCOMPRESSED_SIZE = 9
  79 _FH_FILENAME_LENGTH = 10
  80 _FH_EXTRA_FIELD_LENGTH = 11
  81
  82 def is_zipfile(filename):
  83     """Quickly see if file is a ZIP file by checking the magic number."""
  84     try:
  85         fpin = open(filename, "rb")
  86         endrec = _EndRecData(fpin)
  87         fpin.close()
  88         if endrec:
  89             return True                 # file has correct magic number
  90     except IOError:
  91         pass
  92     return False
  93
  94 def _EndRecData64(fpin, offset, endrec):
  95     """
  96     Read the ZIP64 end-of-archive records and use that to update endrec
  97     """
  98     locatorSize = struct.calcsize(structEndArchive64Locator)
  99     fpin.seek(offset - locatorSize, 2)
 100     data = fpin.read(locatorSize)
 101     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 102     if sig != stringEndArchive64Locator:
 103         return endrec
 104
 105     if diskno != 0 or disks != 1:
 106         raise BadZipfile("zipfiles that span multiple disks are not supported")
 107
 108     # Assume no 'zip64 extensible data'
 109     endArchiveSize = struct.calcsize(structEndArchive64)
 110     fpin.seek(offset - locatorSize - endArchiveSize, 2)
 111     data = fpin.read(endArchiveSize)
 112     sig, sz, create_version, read_version, disk_num, disk_dir, \
 113             dircount, dircount2, dirsize, diroffset = \
 114             struct.unpack(structEndArchive64, data)
 115     if sig != stringEndArchive64:
 116         return endrec
 117
 118     # Update the original endrec using data from the ZIP64 record
 119     endrec[1] = disk_num
 120     endrec[2] = disk_dir
 121     endrec[3] = dircount
 122     endrec[4] = dircount2
 123     endrec[5] = dirsize
 124     endrec[6] = diroffset
 125     return endrec
 126
 127
 128 def _EndRecData(fpin):
 129     """Return data from the "End of Central Directory" record, or None.
 130
 131     The data is a list of the nine items in the ZIP "End of central dir"
 132     record followed by a tenth item, the file seek offset of this record."""
 133     fpin.seek(-22, 2)               # Assume no archive comment.
 134     filesize = fpin.tell() + 22     # Get file size
 135     data = fpin.read()
 136     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
 137         endrec = struct.unpack(structEndArchive, data)
 138         endrec = list(endrec)
 139         endrec.append("")               # Append the archive comment
 140         endrec.append(filesize - 22)    # Append the record start offset
 141         if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
 142             return _EndRecData64(fpin, -22, endrec)
 143         return endrec
 144     # Search the last END_BLOCK bytes of the file for the record signature.
 145     # The comment is appended to the ZIP file and has a 16 bit length.
 146     # So the comment may be up to 64K long.  We limit the search for the
 147     # signature to a few Kbytes at the end of the file for efficiency.
 148     # also, the signature must not appear in the comment.
 149     END_BLOCK = min(filesize, 1024 * 4)
 150     fpin.seek(filesize - END_BLOCK, 0)
 151     data = fpin.read()
 152     start = data.rfind(stringEndArchive)
 153     if start >= 0:     # Correct signature string was found
 154         endrec = struct.unpack(structEndArchive, data[start:start+22])
 155         endrec = list(endrec)
 156         comment = data[start+22:]
 157         if endrec[7] == len(comment):     # Comment length checks out
 158             # Append the archive comment and start offset
 159             endrec.append(comment)
 160             endrec.append(filesize - END_BLOCK + start)
 161             if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
 162                 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
 163             return endrec
 164     return      # Error, return None
 165
 166
 167 class ZipInfo (object):
 168     """Class with attributes describing each file in the ZIP archive."""
 169
 170     __slots__ = (
 171             'orig_filename',
 172             'filename',
 173             'date_time',
 174             'compress_type',
 175             'comment',
 176             'extra',
 177             'create_system',
 178             'create_version',
 179             'extract_version',
 180             'reserved',
 181             'flag_bits',
 182             'volume',
 183             'internal_attr',
 184             'external_attr',
 185             'header_offset',
 186             'CRC',
 187             'compress_size',
 188             'file_size',
 189         )
 190
 191     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 192         self.orig_filename = filename   # Original file name in archive
 193
 194         # Terminate the file name at the first null byte.  Null bytes in file
 195         # names are used as tricks by viruses in archives.
 196         null_byte = filename.find(chr(0))
 197         if null_byte >= 0:
 198             filename = filename[0:null_byte]
 199         # This is used to ensure paths in generated ZIP files always use
 200         # forward slashes as the directory separator, as required by the
 201         # ZIP format specification.
 202         if os.sep != "/" and os.sep in filename:
 203             filename = filename.replace(os.sep, "/")
 204
 205         self.filename = filename        # Normalized file name
 206         self.date_time = date_time      # year, month, day, hour, min, sec
 207         # Standard values:
 208         self.compress_type = ZIP_STORED # Type of compression for the file
 209         self.comment = ""               # Comment for each file
 210         self.extra = ""                 # ZIP extra data
 211         if sys.platform == 'win32':
 212             self.create_system = 0          # System which created ZIP archive
 213         else:
 214             # Assume everything else is unix-y
 215             self.create_system = 3          # System which created ZIP archive
 216         self.create_version = 20        # Version which created ZIP archive
 217         self.extract_version = 20       # Version needed to extract archive
 218         self.reserved = 0               # Must be zero
 219         self.flag_bits = 0              # ZIP flag bits
 220         self.volume = 0                 # Volume number of file header
 221         self.internal_attr = 0          # Internal attributes
 222         self.external_attr = 0          # External file attributes
 223         # Other attributes are set by class ZipFile:
 224         # header_offset         Byte offset to the file header
 225         # CRC                   CRC-32 of the uncompressed file
 226         # compress_size         Size of the compressed file
 227         # file_size             Size of the uncompressed file
 228
 229     def FileHeader(self):
 230         """Return the per-file header as a string."""
 231         dt = self.date_time
 232         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 233         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 234         if self.flag_bits & 0x08:
 235             # Set these to zero because we write them after the file data
 236             CRC = compress_size = file_size = 0
 237         else:
 238             CRC = self.CRC
 239             compress_size = self.compress_size
 240             file_size = self.file_size
 241
 242         extra = self.extra
 243
 244         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 245             # File is larger than what fits into a 4 byte integer,
 246             # fall back to the ZIP64 extension
 247             fmt = '<hhqq'
 248             extra = extra + struct.pack(fmt,
 249                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 250             file_size = 0xffffffff # -1
 251             compress_size = 0xffffffff # -1
 252             self.extract_version = max(45, self.extract_version)
 253             self.create_version = max(45, self.extract_version)
 254
 255         header = struct.pack(structFileHeader, stringFileHeader,
 256                  self.extract_version, self.reserved, self.flag_bits,
 257                  self.compress_type, dostime, dosdate, CRC,
 258                  compress_size, file_size,
 259                  len(self.filename), len(extra))
 260         return header + self.filename + extra
 261
 262     def _decodeExtra(self):
 263         # Try to decode the extra field.
 264         extra = self.extra
 265         unpack = struct.unpack
 266         while extra:
 267             tp, ln = unpack('<hh', extra[:4])
 268             if tp == 1:
 269                 if ln >= 24:
 270                     counts = unpack('<qqq', extra[4:28])
 271                 elif ln == 16:
 272                     counts = unpack('<qq', extra[4:20])
 273                 elif ln == 8:
 274                     counts = unpack('<q', extra[4:12])
 275                 elif ln == 0:
 276                     counts = ()
 277                 else:
 278                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
 279
 280                 idx = 0
 281
 282                 # ZIP64 extension (large files and/or large archives)
 283                 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
 284                     self.file_size = counts[idx]
 285                     idx += 1
 286
 287                 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
 288                     self.compress_size = counts[idx]
 289                     idx += 1
 290
 291                 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
 292                     old = self.header_offset
 293                     self.header_offset = counts[idx]
 294                     idx+=1
 295
 296             extra = extra[ln+4:]
 297
 298
 299 class _ZipDecrypter:
 300     """Class to handle decryption of files stored within a ZIP archive.
 301
 302     ZIP supports a password-based form of encryption. Even though known
 303     plaintext attacks have been found against it, it is still useful
 304     for low-level securicy.
 305
 306     Usage:
 307         zd = _ZipDecrypter(mypwd)
 308         plain_char = zd(cypher_char)
 309         plain_text = map(zd, cypher_text)
 310     """
 311
 312     def _GenerateCRCTable():
 313         """Generate a CRC-32 table.
 314
 315         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 316         internal keys. We noticed that a direct implementation is faster than
 317         relying on binascii.crc32().
 318         """
 319         poly = 0xedb88320
 320         table = [0] * 256
 321         for i in range(256):
 322             crc = i
 323             for j in range(8):
 324                 if crc & 1:
 325                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 326                 else:
 327                     crc = ((crc >> 1) & 0x7FFFFFFF)
 328             table[i] = crc
 329         return table
 330     crctable = _GenerateCRCTable()
 331
 332     def _crc32(self, ch, crc):
 333         """Compute the CRC32 primitive on one byte."""
 334         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
 335
 336     def __init__(self, pwd):
 337         self.key0 = 305419896
 338         self.key1 = 591751049
 339         self.key2 = 878082192
 340         for p in pwd:
 341             self._UpdateKeys(p)
 342
 343     def _UpdateKeys(self, c):
 344         self.key0 = self._crc32(c, self.key0)
 345         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 346         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 347         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
 348
 349     def __call__(self, c):
 350         """Decrypt a single character."""
 351         c = ord(c)
 352         k = self.key2 | 2
 353         c = c ^ (((k * (k^1)) >> 8) & 255)
 354         c = chr(c)
 355         self._UpdateKeys(c)
 356         return c
 357
 358 class ZipExtFile:
 359     """File-like object for reading an archive member.
 360        Is returned by ZipFile.open().
 361     """
 362
 363     def __init__(self, fileobj, zipinfo, decrypt=None):
 364         self.fileobj = fileobj
 365         self.decrypter = decrypt
 366         self.bytes_read = 0L
 367         self.rawbuffer = ''
 368         self.readbuffer = ''
 369         self.linebuffer = ''
 370         self.eof = False
 371         self.univ_newlines = False
 372         self.nlSeps = ("\n", )
 373         self.lastdiscard = ''
 374
 375         self.compress_type = zipinfo.compress_type
 376         self.compress_size = zipinfo.compress_size
 377
 378         self.closed  = False
 379         self.mode    = "r"
 380         self.name = zipinfo.filename
 381
 382         # read from compressed files in 64k blocks
 383         self.compreadsize = 64*1024
 384         if self.compress_type == ZIP_DEFLATED:
 385             self.dc = zlib.decompressobj(-15)
 386
 387     def set_univ_newlines(self, univ_newlines):
 388         self.univ_newlines = univ_newlines
 389
 390         # pick line separator char(s) based on universal newlines flag
 391         self.nlSeps = ("\n", )
 392         if self.univ_newlines:
 393             self.nlSeps = ("\r\n", "\r", "\n")
 394
 395     def __iter__(self):
 396         return self
 397
 398     def next(self):
 399         nextline = self.readline()
 400         if not nextline:
 401             raise StopIteration()
 402
 403         return nextline
 404
 405     def close(self):
 406         self.closed = True
 407
 408     def _checkfornewline(self):
 409         nl, nllen = -1, -1
 410         if self.linebuffer:
 411             # ugly check for cases where half of an \r\n pair was
 412             # read on the last pass, and the \r was discarded.  In this
 413             # case we just throw away the \n at the start of the buffer.
 414             if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
 415                 self.linebuffer = self.linebuffer[1:]
 416
 417             for sep in self.nlSeps:
 418                 nl = self.linebuffer.find(sep)
 419                 if nl >= 0:
 420                     nllen = len(sep)
 421                     return nl, nllen
 422
 423         return nl, nllen
 424
 425     def readline(self, size = -1):
 426         """Read a line with approx. size. If size is negative,
 427            read a whole line.
 428         """
 429         if size < 0:
 430             size = sys.maxint
 431         elif size == 0:
 432             return ''
 433
 434         # check for a newline already in buffer
 435         nl, nllen = self._checkfornewline()
 436
 437         if nl >= 0:
 438             # the next line was already in the buffer
 439             nl = min(nl, size)
 440         else:
 441             # no line break in buffer - try to read more
 442             size -= len(self.linebuffer)
 443             while nl < 0 and size > 0:
 444                 buf = self.read(min(size, 100))
 445                 if not buf:
 446                     break
 447                 self.linebuffer += buf
 448                 size -= len(buf)
 449
 450                 # check for a newline in buffer
 451                 nl, nllen = self._checkfornewline()
 452
 453             # we either ran out of bytes in the file, or
 454             # met the specified size limit without finding a newline,
 455             # so return current buffer
 456             if nl < 0:
 457                 s = self.linebuffer
 458                 self.linebuffer = ''
 459                 return s
 460
 461         buf = self.linebuffer[:nl]
 462         self.lastdiscard = self.linebuffer[nl:nl + nllen]
 463         self.linebuffer = self.linebuffer[nl + nllen:]
 464
 465         # line is always returned with \n as newline char (except possibly
 466         # for a final incomplete line in the file, which is handled above).
 467         return buf + "\n"
 468
 469     def readlines(self, sizehint = -1):
 470         """Return a list with all (following) lines. The sizehint parameter
 471         is ignored in this implementation.
 472         """
 473         result = []
 474         while True:
 475             line = self.readline()
 476             if not line: break
 477             result.append(line)
 478         return result
 479
 480     def read(self, size = None):
 481         # act like file() obj and return empty string if size is 0
 482         if size == 0:
 483             return ''
 484
 485         # determine read size
 486         bytesToRead = self.compress_size - self.bytes_read
 487
 488         # adjust read size for encrypted files since the first 12 bytes
 489         # are for the encryption/password information
 490         if self.decrypter is not None:
 491             bytesToRead -= 12
 492
 493         if size is not None and size >= 0:
 494             if self.compress_type == ZIP_STORED:
 495                 lr = len(self.readbuffer)
 496                 bytesToRead = min(bytesToRead, size - lr)
 497             elif self.compress_type == ZIP_DEFLATED:
 498                 if len(self.readbuffer) > size:
 499                     # the user has requested fewer bytes than we've already
 500                     # pulled through the decompressor; don't read any more
 501                     bytesToRead = 0
 502                 else:
 503                     # user will use up the buffer, so read some more
 504                     lr = len(self.rawbuffer)
 505                     bytesToRead = min(bytesToRead, self.compreadsize - lr)
 506
 507         # avoid reading past end of file contents
 508         if bytesToRead + self.bytes_read > self.compress_size:
 509             bytesToRead = self.compress_size - self.bytes_read
 510
 511         # try to read from file (if necessary)
 512         if bytesToRead > 0:
 513             bytes = self.fileobj.read(bytesToRead)
 514             self.bytes_read += len(bytes)
 515             self.rawbuffer += bytes
 516
 517             # handle contents of raw buffer
 518             if self.rawbuffer:
 519                 newdata = self.rawbuffer
 520                 self.rawbuffer = ''
 521
 522                 # decrypt new data if we were given an object to handle that
 523                 if newdata and self.decrypter is not None:
 524                     newdata = ''.join(map(self.decrypter, newdata))
 525
 526                 # decompress newly read data if necessary
 527                 if newdata and self.compress_type == ZIP_DEFLATED:
 528                     newdata = self.dc.decompress(newdata)
 529                     self.rawbuffer = self.dc.unconsumed_tail
 530                     if self.eof and len(self.rawbuffer) == 0:
 531                         # we're out of raw bytes (both from the file and
 532                         # the local buffer); flush just to make sure the
 533                         # decompressor is done
 534                         newdata += self.dc.flush()
 535                         # prevent decompressor from being used again
 536                         self.dc = None
 537
 538                 self.readbuffer += newdata
 539
 540
 541         # return what the user asked for
 542         if size is None or len(self.readbuffer) <= size:
 543             bytes = self.readbuffer
 544             self.readbuffer = ''
 545         else:
 546             bytes = self.readbuffer[:size]
 547             self.readbuffer = self.readbuffer[size:]
 548
 549         return bytes
 550
 551
 552 class ZipFile:
 553     """ Class with methods to open, read, write, close, list zip files.
 554
 555     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
 556
 557     file: Either the path to the file, or a file-like object.
 558           If it is a path, the file will be opened and closed by ZipFile.
 559     mode: The mode can be either read "r", write "w" or append "a".
 560     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 561     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 562                 needed, otherwise it will raise an exception when this would
 563                 be necessary.
 564
 565     """
 566
 567     fp = None                   # Set here since __del__ checks it
 568
 569     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 570         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 571         if mode not in ("r", "w", "a"):
 572             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 573
 574         if compression == ZIP_STORED:
 575             pass
 576         elif compression == ZIP_DEFLATED:
 577             if not zlib:
 578                 raise RuntimeError,\
 579                       "Compression requires the (missing) zlib module"
 580         else:
 581             raise RuntimeError, "That compression method is not supported"
 582
 583         self._allowZip64 = allowZip64
 584         self._didModify = False
 585         self.debug = 0  # Level of printing: 0 through 3
 586         self.NameToInfo = {}    # Find file info given name
 587         self.filelist = []      # List of ZipInfo instances for archive
 588         self.compression = compression  # Method of compression
 589         self.mode = key = mode.replace('b', '')[0]
 590         self.pwd = None
 591
 592         # Check if we were passed a file-like object
 593         if isinstance(file, basestring):
 594             self._filePassed = 0
 595             self.filename = file
 596             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 597             try:
 598                 self.fp = open(file, modeDict[mode])
 599             except IOError:
 600                 if mode == 'a':
 601                     mode = key = 'w'
 602                     self.fp = open(file, modeDict[mode])
 603                 else:
 604                     raise
 605         else:
 606             self._filePassed = 1
 607             self.fp = file
 608             self.filename = getattr(file, 'name', None)
 609
 610         if key == 'r':
 611             self._GetContents()
 612         elif key == 'w':
 613             pass
 614         elif key == 'a':
 615             try:                        # See if file is a zip file
 616                 self._RealGetContents()
 617                 # seek to start of directory and overwrite
 618                 self.fp.seek(self.start_dir, 0)
 619             except BadZipfile:          # file is not a zip file, just append
 620                 self.fp.seek(0, 2)
 621         else:
 622             if not self._filePassed:
 623                 self.fp.close()
 624                 self.fp = None
 625             raise RuntimeError, 'Mode must be "r", "w" or "a"'
 626
 627     def _GetContents(self):
 628         """Read the directory, making sure we close the file if the format
 629         is bad."""
 630         try:
 631             self._RealGetContents()
 632         except BadZipfile:
 633             if not self._filePassed:
 634                 self.fp.close()
 635                 self.fp = None
 636             raise
 637
 638     def _RealGetContents(self):
 639         """Read in the table of contents for the ZIP file."""
 640         fp = self.fp
 641         endrec = _EndRecData(fp)
 642         if not endrec:
 643             raise BadZipfile, "File is not a zip file"
 644         if self.debug > 1:
 645             print endrec
 646         size_cd = endrec[5]             # bytes in central directory
 647         offset_cd = endrec[6]   # offset of central directory
 648         self.comment = endrec[8]        # archive comment
 649         # endrec[9] is the offset of the "End of Central Dir" record
 650         if endrec[9] > ZIP64_LIMIT:
 651             x = endrec[9] - size_cd - 56 - 20
 652         else:
 653             x = endrec[9] - size_cd
 654         # "concat" is zero, unless zip was concatenated to another file
 655         concat = x - offset_cd
 656         if self.debug > 2:
 657             print "given, inferred, offset", offset_cd, x, concat
 658         # self.start_dir:  Position of start of central directory
 659         self.start_dir = offset_cd + concat
 660         fp.seek(self.start_dir, 0)
 661         data = fp.read(size_cd)
 662         fp = cStringIO.StringIO(data)
 663         total = 0
 664         while total < size_cd:
 665             centdir = fp.read(46)
 666             total = total + 46
 667             if centdir[0:4] != stringCentralDir:
 668                 raise BadZipfile, "Bad magic number for central directory"
 669             centdir = struct.unpack(structCentralDir, centdir)
 670             if self.debug > 2:
 671                 print centdir
 672             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 673             # Create ZipInfo instance to store file information
 674             x = ZipInfo(filename)
 675             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 676             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 677             total = (total + centdir[_CD_FILENAME_LENGTH]
 678                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 679                      + centdir[_CD_COMMENT_LENGTH])
 680             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 681             (x.create_version, x.create_system, x.extract_version, x.reserved,
 682                 x.flag_bits, x.compress_type, t, d,
 683                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 684             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 685             # Convert date/time code to (year, month, day, hour, min, sec)
 686             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 687                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 688
 689             x._decodeExtra()
 690             x.header_offset = x.header_offset + concat
 691             self.filelist.append(x)
 692             self.NameToInfo[x.filename] = x
 693             if self.debug > 2:
 694                 print "total", total
 695
 696
 697     def namelist(self):
 698         """Return a list of file names in the archive."""
 699         l = []
 700         for data in self.filelist:
 701             l.append(data.filename)
 702         return l
 703
 704     def infolist(self):
 705         """Return a list of class ZipInfo instances for files in the
 706         archive."""
 707         return self.filelist
 708
 709     def printdir(self):
 710         """Print a table of contents for the zip file."""
 711         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
 712         for zinfo in self.filelist:
 713             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
 714             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
 715
 716     def testzip(self):
 717         """Read all the files and check the CRC."""
 718         for zinfo in self.filelist:
 719             try:
 720                 self.read(zinfo.filename)       # Check CRC-32
 721             except BadZipfile:
 722                 return zinfo.filename
 723
 724
 725     def getinfo(self, name):
 726         """Return the instance of ZipInfo given 'name'."""
 727         info = self.NameToInfo.get(name)
 728         if info is None:
 729             raise KeyError(
 730                 'There is no item named %r in the archive' % name)
 731
 732         return info
 733
 734     def setpassword(self, pwd):
 735         """Set default password for encrypted files."""
 736         self.pwd = pwd
 737
 738     def read(self, name, pwd=None):
 739         """Return file bytes (as a string) for name."""
 740         return self.open(name, "r", pwd).read()
 741
 742     def open(self, name, mode="r", pwd=None):
 743         """Return file-like object for 'name'."""
 744         if mode not in ("r", "U", "rU"):
 745             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
 746         if not self.fp:
 747             raise RuntimeError, \
 748                   "Attempt to read ZIP archive that was already closed"
 749
 750         # Only open a new file for instances where we were not
 751         # given a file object in the constructor
 752         if self._filePassed:
 753             zef_file = self.fp
 754         else:
 755             zef_file = open(self.filename, 'rb')
 756
 757         # Get info object for name
 758         zinfo = self.getinfo(name)
 759
 760         filepos = zef_file.tell()
 761
 762         zef_file.seek(zinfo.header_offset, 0)
 763
 764         # Skip the file header:
 765         fheader = zef_file.read(30)
 766         if fheader[0:4] != stringFileHeader:
 767             raise BadZipfile, "Bad magic number for file header"
 768
 769         fheader = struct.unpack(structFileHeader, fheader)
 770         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 771         if fheader[_FH_EXTRA_FIELD_LENGTH]:
 772             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 773
 774         if fname != zinfo.orig_filename:
 775             raise BadZipfile, \
 776                       'File name in directory "%s" and header "%s" differ.' % (
 777                           zinfo.orig_filename, fname)
 778
 779         # check for encrypted flag & handle password
 780         is_encrypted = zinfo.flag_bits & 0x1
 781         zd = None
 782         if is_encrypted:
 783             if not pwd:
 784                 pwd = self.pwd
 785             if not pwd:
 786                 raise RuntimeError, "File %s is encrypted, " \
 787                       "password required for extraction" % name
 788
 789             zd = _ZipDecrypter(pwd)
 790             # The first 12 bytes in the cypher stream is an encryption header
 791             #  used to strengthen the algorithm. The first 11 bytes are
 792             #  completely random, while the 12th contains the MSB of the CRC,
 793             #  and is used to check the correctness of the password.
 794             bytes = zef_file.read(12)
 795             h = map(zd, bytes[0:12])
 796             if ord(h[11]) != ((zinfo.CRC>>24)&255):
 797                 raise RuntimeError, "Bad password for file %s" % name
 798
 799         # build and return a ZipExtFile
 800         if zd is None:
 801             zef = ZipExtFile(zef_file, zinfo)
 802         else:
 803             zef = ZipExtFile(zef_file, zinfo, zd)
 804
 805         # set universal newlines on ZipExtFile if necessary
 806         if "U" in mode:
 807             zef.set_univ_newlines(True)
 808         return zef
 809
 810     def _writecheck(self, zinfo):
 811         """Check for errors before writing a file to the archive."""
 812         if zinfo.filename in self.NameToInfo:
 813             if self.debug:      # Warning for duplicate names
 814                 print "Duplicate name:", zinfo.filename
 815         if self.mode not in ("w", "a"):
 816             raise RuntimeError, 'write() requires mode "w" or "a"'
 817         if not self.fp:
 818             raise RuntimeError, \
 819                   "Attempt to write ZIP archive that was already closed"
 820         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
 821             raise RuntimeError, \
 822                   "Compression requires the (missing) zlib module"
 823         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
 824             raise RuntimeError, \
 825                   "That compression method is not supported"
 826         if zinfo.file_size > ZIP64_LIMIT:
 827             if not self._allowZip64:
 828                 raise LargeZipFile("Filesize would require ZIP64 extensions")
 829         if zinfo.header_offset > ZIP64_LIMIT:
 830             if not self._allowZip64:
 831                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
 832
 833     def write(self, filename, arcname=None, compress_type=None):
 834         """Put the bytes from filename into the archive under the name
 835         arcname."""
 836         if not self.fp:
 837             raise RuntimeError(
 838                   "Attempt to write to ZIP archive that was already closed")
 839
 840         st = os.stat(filename)
 841         mtime = time.localtime(st.st_mtime)
 842         date_time = mtime[0:6]
 843         # Create ZipInfo instance to store file information
 844         if arcname is None:
 845             arcname = filename
 846         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
 847         while arcname[0] in (os.sep, os.altsep):
 848             arcname = arcname[1:]
 849         zinfo = ZipInfo(arcname, date_time)
 850         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
 851         if compress_type is None:
 852             zinfo.compress_type = self.compression
 853         else:
 854             zinfo.compress_type = compress_type
 855
 856         zinfo.file_size = st.st_size
 857         zinfo.flag_bits = 0x00
 858         zinfo.header_offset = self.fp.tell()    # Start of header bytes
 859
 860         self._writecheck(zinfo)
 861         self._didModify = True
 862         fp = open(filename, "rb")
 863         # Must overwrite CRC and sizes with correct data later
 864         zinfo.CRC = CRC = 0
 865         zinfo.compress_size = compress_size = 0
 866         zinfo.file_size = file_size = 0
 867         self.fp.write(zinfo.FileHeader())
 868         if zinfo.compress_type == ZIP_DEFLATED:
 869             cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
 870                  zlib.DEFLATED, -15)
 871         else:
 872             cmpr = None
 873         while 1:
 874             buf = fp.read(1024 * 8)
 875             if not buf:
 876                 break
 877             file_size = file_size + len(buf)
 878             CRC = binascii.crc32(buf, CRC)
 879             if cmpr:
 880                 buf = cmpr.compress(buf)
 881                 compress_size = compress_size + len(buf)
 882             self.fp.write(buf)
 883         fp.close()
 884         if cmpr:
 885             buf = cmpr.flush()
 886             compress_size = compress_size + len(buf)
 887             self.fp.write(buf)
 888             zinfo.compress_size = compress_size
 889         else:
 890             zinfo.compress_size = file_size
 891         zinfo.CRC = CRC
 892         zinfo.file_size = file_size
 893         # Seek backwards and write CRC and file sizes
 894         position = self.fp.tell()       # Preserve current position in file
 895         self.fp.seek(zinfo.header_offset + 14, 0)
 896         self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
 897               zinfo.file_size))
 898         self.fp.seek(position, 0)
 899         self.filelist.append(zinfo)
 900         self.NameToInfo[zinfo.filename] = zinfo
 901
 902     def writestr(self, zinfo_or_arcname, bytes):
 903         """Write a file into the archive.  The contents is the string
 904         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
 905         the name of the file in the archive."""
 906         if not isinstance(zinfo_or_arcname, ZipInfo):
 907             zinfo = ZipInfo(filename=zinfo_or_arcname,
 908                             date_time=time.localtime(time.time()))
 909             zinfo.compress_type = self.compression
 910         else:
 911             zinfo = zinfo_or_arcname
 912
 913         if not self.fp:
 914             raise RuntimeError(
 915                   "Attempt to write to ZIP archive that was already closed")
 916
 917         zinfo.file_size = len(bytes)            # Uncompressed size
 918         zinfo.header_offset = self.fp.tell()    # Start of header bytes
 919         self._writecheck(zinfo)
 920         self._didModify = True
 921         zinfo.CRC = binascii.crc32(bytes)       # CRC-32 checksum
 922         if zinfo.compress_type == ZIP_DEFLATED:
 923             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
 924                  zlib.DEFLATED, -15)
 925             bytes = co.compress(bytes) + co.flush()
 926             zinfo.compress_size = len(bytes)    # Compressed size
 927         else:
 928             zinfo.compress_size = zinfo.file_size
 929         zinfo.header_offset = self.fp.tell()    # Start of header bytes
 930         self.fp.write(zinfo.FileHeader())
 931         self.fp.write(bytes)
 932         self.fp.flush()
 933         if zinfo.flag_bits & 0x08:
 934             # Write CRC and file sizes after the file data
 935             self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
 936                   zinfo.file_size))
 937         self.filelist.append(zinfo)
 938         self.NameToInfo[zinfo.filename] = zinfo
 939
 940     def __del__(self):
 941         """Call the "close()" method in case the user forgot."""
 942         self.close()
 943
 944     def close(self):
 945         """Close the file, and for mode "w" and "a" write the ending
 946         records."""
 947         if self.fp is None:
 948             return
 949
 950         if self.mode in ("w", "a") and self._didModify: # write ending records
 951             count = 0
 952             pos1 = self.fp.tell()
 953             for zinfo in self.filelist:         # write central directory
 954                 count = count + 1
 955                 dt = zinfo.date_time
 956                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 957                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 958                 extra = []
 959                 if zinfo.file_size > ZIP64_LIMIT \
 960                         or zinfo.compress_size > ZIP64_LIMIT:
 961                     extra.append(zinfo.file_size)
 962                     extra.append(zinfo.compress_size)
 963                     file_size = 0xffffffff #-1
 964                     compress_size = 0xffffffff #-1
 965                 else:
 966                     file_size = zinfo.file_size
 967                     compress_size = zinfo.compress_size
 968
 969                 if zinfo.header_offset > ZIP64_LIMIT:
 970                     extra.append(zinfo.header_offset)
 971                     header_offset = -1  # struct "l" format:  32 one bits
 972                 else:
 973                     header_offset = zinfo.header_offset
 974
 975                 extra_data = zinfo.extra
 976                 if extra:
 977                     # Append a ZIP64 field to the extra's
 978                     extra_data = struct.pack(
 979                             '<hh' + 'q'*len(extra),
 980                             1, 8*len(extra), *extra) + extra_data
 981
 982                     extract_version = max(45, zinfo.extract_version)
 983                     create_version = max(45, zinfo.create_version)
 984                 else:
 985                     extract_version = zinfo.extract_version
 986                     create_version = zinfo.create_version
 987
 988                 centdir = struct.pack(structCentralDir,
 989                   stringCentralDir, create_version,
 990                   zinfo.create_system, extract_version, zinfo.reserved,
 991                   zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
 992                   zinfo.CRC, compress_size, file_size,
 993                   len(zinfo.filename), len(extra_data), len(zinfo.comment),
 994                   0, zinfo.internal_attr, zinfo.external_attr,
 995                   header_offset)
 996                 self.fp.write(centdir)
 997                 self.fp.write(zinfo.filename)
 998                 self.fp.write(extra_data)
 999                 self.fp.write(zinfo.comment)
1000
1001             pos2 = self.fp.tell()
1002             # Write end-of-zip-archive record
1003             if pos1 > ZIP64_LIMIT:
1004                 # Need to write the ZIP64 end-of-archive records
1005                 zip64endrec = struct.pack(
1006                         structEndArchive64, stringEndArchive64,
1007                         44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1008                 self.fp.write(zip64endrec)
1009
1010                 zip64locrec = struct.pack(
1011                         structEndArchive64Locator,
1012                         stringEndArchive64Locator, 0, pos2, 1)
1013                 self.fp.write(zip64locrec)
1014
1015                 # XXX Why is `pos3` computed next?  It's never referenced.
1016                 pos3 = self.fp.tell()
1017                 endrec = struct.pack(structEndArchive, stringEndArchive,
1018                             0, 0, count, count, pos2 - pos1, -1, 0)
1019                 self.fp.write(endrec)
1020
1021             else:
1022                 endrec = struct.pack(structEndArchive, stringEndArchive,
1023                          0, 0, count, count, pos2 - pos1, pos1, 0)
1024                 self.fp.write(endrec)
1025             self.fp.flush()
1026         if not self._filePassed:
1027             self.fp.close()
1028         self.fp = None
1029
1030
1031 class PyZipFile(ZipFile):
1032     """Class to create ZIP archives with Python library files and packages."""
1033
1034     def writepy(self, pathname, basename = ""):
1035         """Add all files from "pathname" to the ZIP archive.
1036
1037         If pathname is a package directory, search the directory and
1038         all package subdirectories recursively for all *.py and enter
1039         the modules into the archive.  If pathname is a plain
1040         directory, listdir *.py and enter all modules.  Else, pathname
1041         must be a Python *.py file and the module will be put into the
1042         archive.  Added modules are always module.pyo or module.pyc.
1043         This method will compile the module.py into module.pyc if
1044         necessary.
1045         """
1046         dir, name = os.path.split(pathname)
1047         if os.path.isdir(pathname):
1048             initname = os.path.join(pathname, "__init__.py")
1049             if os.path.isfile(initname):
1050                 # This is a package directory, add it
1051                 if basename:
1052                     basename = "%s/%s" % (basename, name)
1053                 else:
1054                     basename = name
1055                 if self.debug:
1056                     print "Adding package in", pathname, "as", basename
1057                 fname, arcname = self._get_codename(initname[0:-3], basename)
1058                 if self.debug:
1059                     print "Adding", arcname
1060                 self.write(fname, arcname)
1061                 dirlist = os.listdir(pathname)
1062                 dirlist.remove("__init__.py")
1063                 # Add all *.py files and package subdirectories
1064                 for filename in dirlist:
1065                     path = os.path.join(pathname, filename)
1066                     root, ext = os.path.splitext(filename)
1067                     if os.path.isdir(path):
1068                         if os.path.isfile(os.path.join(path, "__init__.py")):
1069                             # This is a package directory, add it
1070                             self.writepy(path, basename)  # Recursive call
1071                     elif ext == ".py":
1072                         fname, arcname = self._get_codename(path[0:-3],
1073                                          basename)
1074                         if self.debug:
1075                             print "Adding", arcname
1076                         self.write(fname, arcname)
1077             else:
1078                 # This is NOT a package directory, add its files at top level
1079                 if self.debug:
1080                     print "Adding files from directory", pathname
1081                 for filename in os.listdir(pathname):
1082                     path = os.path.join(pathname, filename)
1083                     root, ext = os.path.splitext(filename)
1084                     if ext == ".py":
1085                         fname, arcname = self._get_codename(path[0:-3],
1086                                          basename)
1087                         if self.debug:
1088                             print "Adding", arcname
1089                         self.write(fname, arcname)
1090         else:
1091             if pathname[-3:] != ".py":
1092                 raise RuntimeError, \
1093                       'Files added with writepy() must end with ".py"'
1094             fname, arcname = self._get_codename(pathname[0:-3], basename)
1095             if self.debug:
1096                 print "Adding file", arcname
1097             self.write(fname, arcname)
1098
1099     def _get_codename(self, pathname, basename):
1100         """Return (filename, archivename) for the path.
1101
1102         Given a module name path, return the correct file path and
1103         archive name, compiling if necessary.  For example, given
1104         /python/lib/string, return (/python/lib/string.pyc, string).
1105         """
1106         file_py  = pathname + ".py"
1107         file_pyc = pathname + ".pyc"
1108         file_pyo = pathname + ".pyo"
1109         if os.path.isfile(file_pyo) and \
1110                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1111             fname = file_pyo    # Use .pyo file
1112         elif not os.path.isfile(file_pyc) or \
1113              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1114             import py_compile
1115             if self.debug:
1116                 print "Compiling", file_py
1117             try:
1118                 py_compile.compile(file_py, file_pyc, None, True)
1119             except py_compile.PyCompileError,err:
1120                 print err.msg
1121             fname = file_pyc
1122         else:
1123             fname = file_pyc
1124         archivename = os.path.split(fname)[1]
1125         if basename:
1126             archivename = "%s/%s" % (basename, archivename)
1127         return (fname, archivename)
1128
1129
1130 def main(args = None):
1131     import textwrap
1132     USAGE=textwrap.dedent("""\
1133         Usage:
1134             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1135             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1136             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1137             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1138         """)
1139     if args is None:
1140         args = sys.argv[1:]
1141
1142     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1143         print USAGE
1144         sys.exit(1)
1145
1146     if args[0] == '-l':
1147         if len(args) != 2:
1148             print USAGE
1149             sys.exit(1)
1150         zf = ZipFile(args[1], 'r')
1151         zf.printdir()
1152         zf.close()
1153
1154     elif args[0] == '-t':
1155         if len(args) != 2:
1156             print USAGE
1157             sys.exit(1)
1158         zf = ZipFile(args[1], 'r')
1159         zf.testzip()
1160         print "Done testing"
1161
1162     elif args[0] == '-e':
1163         if len(args) != 3:
1164             print USAGE
1165             sys.exit(1)
1166
1167         zf = ZipFile(args[1], 'r')
1168         out = args[2]
1169         for path in zf.namelist():
1170             if path.startswith('./'):
1171                 tgt = os.path.join(out, path[2:])
1172             else:
1173                 tgt = os.path.join(out, path)
1174
1175             tgtdir = os.path.dirname(tgt)
1176             if not os.path.exists(tgtdir):
1177                 os.makedirs(tgtdir)
1178             fp = open(tgt, 'wb')
1179             fp.write(zf.read(path))
1180             fp.close()
1181         zf.close()
1182
1183     elif args[0] == '-c':
1184         if len(args) < 3:
1185             print USAGE
1186             sys.exit(1)
1187
1188         def addToZip(zf, path, zippath):
1189             if os.path.isfile(path):
1190                 zf.write(path, zippath, ZIP_DEFLATED)
1191             elif os.path.isdir(path):
1192                 for nm in os.listdir(path):
1193                     addToZip(zf,
1194                             os.path.join(path, nm), os.path.join(zippath, nm))
1195             # else: ignore
1196
1197         zf = ZipFile(args[1], 'w', allowZip64=True)
1198         for src in args[2:]:
1199             addToZip(zf, src, os.path.basename(src))
1200
1201         zf.close()
1202
1203 if __name__ == "__main__":
1204     main()