Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3 """
   4 import struct, os, time, sys, shutil
   5 import binascii, cStringIO, stat
   6
   7 try:
   8     import zlib # We may need its compression method
   9     crc32 = zlib.crc32
  10 except ImportError:
  11     zlib = None
  12     crc32 = binascii.crc32
  13
  14 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  15            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  16
  17 class BadZipfile(Exception):
  18     pass
  19
  20
  21 class LargeZipFile(Exception):
  22     """
  23     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  24     and those extensions are disabled.
  25     """
  26
  27 error = BadZipfile      # The exception raised by this module
  28
  29 ZIP64_LIMIT = (1 << 31) - 1
  30 ZIP_FILECOUNT_LIMIT = 1 << 16
  31 ZIP_MAX_COMMENT = (1 << 16) - 1
  32
  33 # constants for Zip file compression methods
  34 ZIP_STORED = 0
  35 ZIP_DEFLATED = 8
  36 # Other ZIP compression methods not supported
  37
  38 # Below are some formats and associated data for reading/writing headers using
  39 # the struct module.  The names and structures of headers/records are those used
  40 # in the PKWARE description of the ZIP file format:
  41 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  42 # (URL valid as of January 2008)
  43
  44 # The "end of central directory" structure, magic number, size, and indices
  45 # (section V.I in the format document)
  46 structEndArchive = "<4s4H2LH"
  47 stringEndArchive = "PK\005\006"
  48 sizeEndCentDir = struct.calcsize(structEndArchive)
  49
  50 _ECD_SIGNATURE = 0
  51 _ECD_DISK_NUMBER = 1
  52 _ECD_DISK_START = 2
  53 _ECD_ENTRIES_THIS_DISK = 3
  54 _ECD_ENTRIES_TOTAL = 4
  55 _ECD_SIZE = 5
  56 _ECD_OFFSET = 6
  57 _ECD_COMMENT_SIZE = 7
  58 # These last two indices are not part of the structure as defined in the
  59 # spec, but they are used internally by this module as a convenience
  60 _ECD_COMMENT = 8
  61 _ECD_LOCATION = 9
  62
  63 # The "central directory" structure, magic number, size, and indices
  64 # of entries in the structure (section V.F in the format document)
  65 structCentralDir = "<4s4B4HL2L5H2L"
  66 stringCentralDir = "PK\001\002"
  67 sizeCentralDir = struct.calcsize(structCentralDir)
  68
  69 # indexes of entries in the central directory structure
  70 _CD_SIGNATURE = 0
  71 _CD_CREATE_VERSION = 1
  72 _CD_CREATE_SYSTEM = 2
  73 _CD_EXTRACT_VERSION = 3
  74 _CD_EXTRACT_SYSTEM = 4
  75 _CD_FLAG_BITS = 5
  76 _CD_COMPRESS_TYPE = 6
  77 _CD_TIME = 7
  78 _CD_DATE = 8
  79 _CD_CRC = 9
  80 _CD_COMPRESSED_SIZE = 10
  81 _CD_UNCOMPRESSED_SIZE = 11
  82 _CD_FILENAME_LENGTH = 12
  83 _CD_EXTRA_FIELD_LENGTH = 13
  84 _CD_COMMENT_LENGTH = 14
  85 _CD_DISK_NUMBER_START = 15
  86 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  87 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  88 _CD_LOCAL_HEADER_OFFSET = 18
  89
  90 # The "local file header" structure, magic number, size, and indices
  91 # (section V.A in the format document)
  92 structFileHeader = "<4s2B4HL2L2H"
  93 stringFileHeader = "PK\003\004"
  94 sizeFileHeader = struct.calcsize(structFileHeader)
  95
  96 _FH_SIGNATURE = 0
  97 _FH_EXTRACT_VERSION = 1
  98 _FH_EXTRACT_SYSTEM = 2
  99 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
 100 _FH_COMPRESSION_METHOD = 4
 101 _FH_LAST_MOD_TIME = 5
 102 _FH_LAST_MOD_DATE = 6
 103 _FH_CRC = 7
 104 _FH_COMPRESSED_SIZE = 8
 105 _FH_UNCOMPRESSED_SIZE = 9
 106 _FH_FILENAME_LENGTH = 10
 107 _FH_EXTRA_FIELD_LENGTH = 11
 108
 109 # The "Zip64 end of central directory locator" structure, magic number, and size
 110 structEndArchive64Locator = "<4sLQL"
 111 stringEndArchive64Locator = "PK\x06\x07"
 112 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 113
 114 # The "Zip64 end of central directory" record, magic number, size, and indices
 115 # (section V.G in the format document)
 116 structEndArchive64 = "<4sQ2H2L4Q"
 117 stringEndArchive64 = "PK\x06\x06"
 118 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 119
 120 _CD64_SIGNATURE = 0
 121 _CD64_DIRECTORY_RECSIZE = 1
 122 _CD64_CREATE_VERSION = 2
 123 _CD64_EXTRACT_VERSION = 3
 124 _CD64_DISK_NUMBER = 4
 125 _CD64_DISK_NUMBER_START = 5
 126 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
 127 _CD64_NUMBER_ENTRIES_TOTAL = 7
 128 _CD64_DIRECTORY_SIZE = 8
 129 _CD64_OFFSET_START_CENTDIR = 9
 130
 131 def _check_zipfile(fp):
 132     try:
 133         if _EndRecData(fp):
 134             return True         # file has correct magic number
 135     except IOError:
 136         pass
 137     return False
 138
 139 def is_zipfile(filename):
 140     """Quickly see if a file is a ZIP file by checking the magic number.
 141
 142     The filename argument may be a file or file-like object too.
 143     """
 144     result = False
 145     try:
 146         if hasattr(filename, "read"):
 147             result = _check_zipfile(fp=filename)
 148         else:
 149             with open(filename, "rb") as fp:
 150                 result = _check_zipfile(fp)
 151     except IOError:
 152         pass
 153     return result
 154
 155 def _EndRecData64(fpin, offset, endrec):
 156     """
 157     Read the ZIP64 end-of-archive records and use that to update endrec
 158     """
 159     fpin.seek(offset - sizeEndCentDir64Locator, 2)
 160     data = fpin.read(sizeEndCentDir64Locator)
 161     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 162     if sig != stringEndArchive64Locator:
 163         return endrec
 164
 165     if diskno != 0 or disks != 1:
 166         raise BadZipfile("zipfiles that span multiple disks are not supported")
 167
 168     # Assume no 'zip64 extensible data'
 169     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 170     data = fpin.read(sizeEndCentDir64)
 171     sig, sz, create_version, read_version, disk_num, disk_dir, \
 172             dircount, dircount2, dirsize, diroffset = \
 173             struct.unpack(structEndArchive64, data)
 174     if sig != stringEndArchive64:
 175         return endrec
 176
 177     # Update the original endrec using data from the ZIP64 record
 178     endrec[_ECD_SIGNATURE] = sig
 179     endrec[_ECD_DISK_NUMBER] = disk_num
 180     endrec[_ECD_DISK_START] = disk_dir
 181     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 182     endrec[_ECD_ENTRIES_TOTAL] = dircount2
 183     endrec[_ECD_SIZE] = dirsize
 184     endrec[_ECD_OFFSET] = diroffset
 185     return endrec
 186
 187
 188 def _EndRecData(fpin):
 189     """Return data from the "End of Central Directory" record, or None.
 190
 191     The data is a list of the nine items in the ZIP "End of central dir"
 192     record followed by a tenth item, the file seek offset of this record."""
 193
 194     # Determine file size
 195     fpin.seek(0, 2)
 196     filesize = fpin.tell()
 197
 198     # Check to see if this is ZIP file with no archive comment (the
 199     # "end of central directory" structure should be the last item in the
 200     # file if this is the case).
 201     fpin.seek(-sizeEndCentDir, 2)
 202     data = fpin.read()
 203     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
 204         # the signature is correct and there's no comment, unpack structure
 205         endrec = struct.unpack(structEndArchive, data)
 206         endrec=list(endrec)
 207
 208         # Append a blank comment and record start offset
 209         endrec.append("")
 210         endrec.append(filesize - sizeEndCentDir)
 211
 212         # Try to read the "Zip64 end of central directory" structure
 213         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 214
 215     # Either this is not a ZIP file, or it is a ZIP file with an archive
 216     # comment.  Search the end of the file for the "end of central directory"
 217     # record signature. The comment is the last item in the ZIP file and may be
 218     # up to 64K long.  It is assumed that the "end of central directory" magic
 219     # number does not appear in the comment.
 220     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 221     fpin.seek(maxCommentStart, 0)
 222     data = fpin.read()
 223     start = data.rfind(stringEndArchive)
 224     if start >= 0:
 225         # found the magic number; attempt to unpack and interpret
 226         recData = data[start:start+sizeEndCentDir]
 227         endrec = list(struct.unpack(structEndArchive, recData))
 228         comment = data[start+sizeEndCentDir:]
 229         # check that comment length is correct
 230         if endrec[_ECD_COMMENT_SIZE] == len(comment):
 231             # Append the archive comment and start offset
 232             endrec.append(comment)
 233             endrec.append(maxCommentStart + start)
 234
 235             # Try to read the "Zip64 end of central directory" structure
 236             return _EndRecData64(fpin, maxCommentStart + start - filesize,
 237                                  endrec)
 238
 239     # Unable to find a valid end of central directory structure
 240     return
 241
 242
 243 class ZipInfo (object):
 244     """Class with attributes describing each file in the ZIP archive."""
 245
 246     __slots__ = (
 247             'orig_filename',
 248             'filename',
 249             'date_time',
 250             'compress_type',
 251             'comment',
 252             'extra',
 253             'create_system',
 254             'create_version',
 255             'extract_version',
 256             'reserved',
 257             'flag_bits',
 258             'volume',
 259             'internal_attr',
 260             'external_attr',
 261             'header_offset',
 262             'CRC',
 263             'compress_size',
 264             'file_size',
 265             '_raw_time',
 266         )
 267
 268     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 269         self.orig_filename = filename   # Original file name in archive
 270
 271         # Terminate the file name at the first null byte.  Null bytes in file
 272         # names are used as tricks by viruses in archives.
 273         null_byte = filename.find(chr(0))
 274         if null_byte >= 0:
 275             filename = filename[0:null_byte]
 276         # This is used to ensure paths in generated ZIP files always use
 277         # forward slashes as the directory separator, as required by the
 278         # ZIP format specification.
 279         if os.sep != "/" and os.sep in filename:
 280             filename = filename.replace(os.sep, "/")
 281
 282         self.filename = filename        # Normalized file name
 283         self.date_time = date_time      # year, month, day, hour, min, sec
 284         # Standard values:
 285         self.compress_type = ZIP_STORED # Type of compression for the file
 286         self.comment = ""               # Comment for each file
 287         self.extra = ""                 # ZIP extra data
 288         if sys.platform == 'win32':
 289             self.create_system = 0          # System which created ZIP archive
 290         else:
 291             # Assume everything else is unix-y
 292             self.create_system = 3          # System which created ZIP archive
 293         self.create_version = 20        # Version which created ZIP archive
 294         self.extract_version = 20       # Version needed to extract archive
 295         self.reserved = 0               # Must be zero
 296         self.flag_bits = 0              # ZIP flag bits
 297         self.volume = 0                 # Volume number of file header
 298         self.internal_attr = 0          # Internal attributes
 299         self.external_attr = 0          # External file attributes
 300         # Other attributes are set by class ZipFile:
 301         # header_offset         Byte offset to the file header
 302         # CRC                   CRC-32 of the uncompressed file
 303         # compress_size         Size of the compressed file
 304         # file_size             Size of the uncompressed file
 305
 306     def FileHeader(self):
 307         """Return the per-file header as a string."""
 308         dt = self.date_time
 309         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 310         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 311         if self.flag_bits & 0x08:
 312             # Set these to zero because we write them after the file data
 313             CRC = compress_size = file_size = 0
 314         else:
 315             CRC = self.CRC
 316             compress_size = self.compress_size
 317             file_size = self.file_size
 318
 319         extra = self.extra
 320
 321         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 322             # File is larger than what fits into a 4 byte integer,
 323             # fall back to the ZIP64 extension
 324             fmt = '<HHQQ'
 325             extra = extra + struct.pack(fmt,
 326                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 327             file_size = 0xffffffff
 328             compress_size = 0xffffffff
 329             self.extract_version = max(45, self.extract_version)
 330             self.create_version = max(45, self.extract_version)
 331
 332         filename, flag_bits = self._encodeFilenameFlags()
 333         header = struct.pack(structFileHeader, stringFileHeader,
 334                  self.extract_version, self.reserved, flag_bits,
 335                  self.compress_type, dostime, dosdate, CRC,
 336                  compress_size, file_size,
 337                  len(filename), len(extra))
 338         return header + filename + extra
 339
 340     def _encodeFilenameFlags(self):
 341         if isinstance(self.filename, unicode):
 342             try:
 343                 return self.filename.encode('ascii'), self.flag_bits
 344             except UnicodeEncodeError:
 345                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
 346         else:
 347             return self.filename, self.flag_bits
 348
 349     def _decodeFilename(self):
 350         if self.flag_bits & 0x800:
 351             return self.filename.decode('utf-8')
 352         else:
 353             return self.filename
 354
 355     def _decodeExtra(self):
 356         # Try to decode the extra field.
 357         extra = self.extra
 358         unpack = struct.unpack
 359         while extra:
 360             tp, ln = unpack('<HH', extra[:4])
 361             if tp == 1:
 362                 if ln >= 24:
 363                     counts = unpack('<QQQ', extra[4:28])
 364                 elif ln == 16:
 365                     counts = unpack('<QQ', extra[4:20])
 366                 elif ln == 8:
 367                     counts = unpack('<Q', extra[4:12])
 368                 elif ln == 0:
 369                     counts = ()
 370                 else:
 371                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
 372
 373                 idx = 0
 374
 375                 # ZIP64 extension (large files and/or large archives)
 376                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
 377                     self.file_size = counts[idx]
 378                     idx += 1
 379
 380                 if self.compress_size == 0xFFFFFFFFL:
 381                     self.compress_size = counts[idx]
 382                     idx += 1
 383
 384                 if self.header_offset == 0xffffffffL:
 385                     old = self.header_offset
 386                     self.header_offset = counts[idx]
 387                     idx+=1
 388
 389             extra = extra[ln+4:]
 390
 391
 392 class _ZipDecrypter:
 393     """Class to handle decryption of files stored within a ZIP archive.
 394
 395     ZIP supports a password-based form of encryption. Even though known
 396     plaintext attacks have been found against it, it is still useful
 397     to be able to get data out of such a file.
 398
 399     Usage:
 400         zd = _ZipDecrypter(mypwd)
 401         plain_char = zd(cypher_char)
 402         plain_text = map(zd, cypher_text)
 403     """
 404
 405     def _GenerateCRCTable():
 406         """Generate a CRC-32 table.
 407
 408         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 409         internal keys. We noticed that a direct implementation is faster than
 410         relying on binascii.crc32().
 411         """
 412         poly = 0xedb88320
 413         table = [0] * 256
 414         for i in range(256):
 415             crc = i
 416             for j in range(8):
 417                 if crc & 1:
 418                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 419                 else:
 420                     crc = ((crc >> 1) & 0x7FFFFFFF)
 421             table[i] = crc
 422         return table
 423     crctable = _GenerateCRCTable()
 424
 425     def _crc32(self, ch, crc):
 426         """Compute the CRC32 primitive on one byte."""
 427         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
 428
 429     def __init__(self, pwd):
 430         self.key0 = 305419896
 431         self.key1 = 591751049
 432         self.key2 = 878082192
 433         for p in pwd:
 434             self._UpdateKeys(p)
 435
 436     def _UpdateKeys(self, c):
 437         self.key0 = self._crc32(c, self.key0)
 438         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 439         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 440         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
 441
 442     def __call__(self, c):
 443         """Decrypt a single character."""
 444         c = ord(c)
 445         k = self.key2 | 2
 446         c = c ^ (((k * (k^1)) >> 8) & 255)
 447         c = chr(c)
 448         self._UpdateKeys(c)
 449         return c
 450
 451 class ZipExtFile:
 452     """File-like object for reading an archive member.
 453        Is returned by ZipFile.open().
 454     """
 455
 456     def __init__(self, fileobj, zipinfo, decrypt=None):
 457         self.fileobj = fileobj
 458         self.decrypter = decrypt
 459         self.bytes_read = 0L
 460         self.rawbuffer = ''
 461         self.readbuffer = ''
 462         self.linebuffer = ''
 463         self.eof = False
 464         self.univ_newlines = False
 465         self.nlSeps = ("\n", )
 466         self.lastdiscard = ''
 467
 468         self.compress_type = zipinfo.compress_type
 469         self.compress_size = zipinfo.compress_size
 470
 471         self.closed  = False
 472         self.mode    = "r"
 473         self.name = zipinfo.filename
 474
 475         # read from compressed files in 64k blocks
 476         self.compreadsize = 64*1024
 477         if self.compress_type == ZIP_DEFLATED:
 478             self.dc = zlib.decompressobj(-15)
 479
 480     def set_univ_newlines(self, univ_newlines):
 481         self.univ_newlines = univ_newlines
 482
 483         # pick line separator char(s) based on universal newlines flag
 484         self.nlSeps = ("\n", )
 485         if self.univ_newlines:
 486             self.nlSeps = ("\r\n", "\r", "\n")
 487
 488     def __iter__(self):
 489         return self
 490
 491     def next(self):
 492         nextline = self.readline()
 493         if not nextline:
 494             raise StopIteration()
 495
 496         return nextline
 497
 498     def close(self):
 499         self.closed = True
 500
 501     def _checkfornewline(self):
 502         nl, nllen = -1, -1
 503         if self.linebuffer:
 504             # ugly check for cases where half of an \r\n pair was
 505             # read on the last pass, and the \r was discarded.  In this
 506             # case we just throw away the \n at the start of the buffer.
 507             if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
 508                 self.linebuffer = self.linebuffer[1:]
 509
 510             for sep in self.nlSeps:
 511                 nl = self.linebuffer.find(sep)
 512                 if nl >= 0:
 513                     nllen = len(sep)
 514                     return nl, nllen
 515
 516         return nl, nllen
 517
 518     def readline(self, size = -1):
 519         """Read a line with approx. size. If size is negative,
 520            read a whole line.
 521         """
 522         if size < 0:
 523             size = sys.maxint
 524         elif size == 0:
 525             return ''
 526
 527         # check for a newline already in buffer
 528         nl, nllen = self._checkfornewline()
 529
 530         if nl >= 0:
 531             # the next line was already in the buffer
 532             nl = min(nl, size)
 533         else:
 534             # no line break in buffer - try to read more
 535             size -= len(self.linebuffer)
 536             while nl < 0 and size > 0:
 537                 buf = self.read(min(size, 100))
 538                 if not buf:
 539                     break
 540                 self.linebuffer += buf
 541                 size -= len(buf)
 542
 543                 # check for a newline in buffer
 544                 nl, nllen = self._checkfornewline()
 545
 546             # we either ran out of bytes in the file, or
 547             # met the specified size limit without finding a newline,
 548             # so return current buffer
 549             if nl < 0:
 550                 s = self.linebuffer
 551                 self.linebuffer = ''
 552                 return s
 553
 554         buf = self.linebuffer[:nl]
 555         self.lastdiscard = self.linebuffer[nl:nl + nllen]
 556         self.linebuffer = self.linebuffer[nl + nllen:]
 557
 558         # line is always returned with \n as newline char (except possibly
 559         # for a final incomplete line in the file, which is handled above).
 560         return buf + "\n"
 561
 562     def readlines(self, sizehint = -1):
 563         """Return a list with all (following) lines. The sizehint parameter
 564         is ignored in this implementation.
 565         """
 566         result = []
 567         while True:
 568             line = self.readline()
 569             if not line: break
 570             result.append(line)
 571         return result
 572
 573     def read(self, size = None):
 574         # act like file() obj and return empty string if size is 0
 575         if size == 0:
 576             return ''
 577
 578         # determine read size
 579         bytesToRead = self.compress_size - self.bytes_read
 580
 581         # adjust read size for encrypted files since the first 12 bytes
 582         # are for the encryption/password information
 583         if self.decrypter is not None:
 584             bytesToRead -= 12
 585
 586         if size is not None and size >= 0:
 587             if self.compress_type == ZIP_STORED:
 588                 lr = len(self.readbuffer)
 589                 bytesToRead = min(bytesToRead, size - lr)
 590             elif self.compress_type == ZIP_DEFLATED:
 591                 if len(self.readbuffer) > size:
 592                     # the user has requested fewer bytes than we've already
 593                     # pulled through the decompressor; don't read any more
 594                     bytesToRead = 0
 595                 else:
 596                     # user will use up the buffer, so read some more
 597                     lr = len(self.rawbuffer)
 598                     bytesToRead = min(bytesToRead, self.compreadsize - lr)
 599
 600         # avoid reading past end of file contents
 601         if bytesToRead + self.bytes_read > self.compress_size:
 602             bytesToRead = self.compress_size - self.bytes_read
 603
 604         # try to read from file (if necessary)
 605         if bytesToRead > 0:
 606             bytes = self.fileobj.read(bytesToRead)
 607             self.bytes_read += len(bytes)
 608             self.rawbuffer += bytes
 609
 610             # handle contents of raw buffer
 611             if self.rawbuffer:
 612                 newdata = self.rawbuffer
 613                 self.rawbuffer = ''
 614
 615                 # decrypt new data if we were given an object to handle that
 616                 if newdata and self.decrypter is not None:
 617                     newdata = ''.join(map(self.decrypter, newdata))
 618
 619                 # decompress newly read data if necessary
 620                 if newdata and self.compress_type == ZIP_DEFLATED:
 621                     newdata = self.dc.decompress(newdata)
 622                     self.rawbuffer = self.dc.unconsumed_tail
 623                     if self.eof and len(self.rawbuffer) == 0:
 624                         # we're out of raw bytes (both from the file and
 625                         # the local buffer); flush just to make sure the
 626                         # decompressor is done
 627                         newdata += self.dc.flush()
 628                         # prevent decompressor from being used again
 629                         self.dc = None
 630
 631                 self.readbuffer += newdata
 632
 633
 634         # return what the user asked for
 635         if size is None or len(self.readbuffer) <= size:
 636             bytes = self.readbuffer
 637             self.readbuffer = ''
 638         else:
 639             bytes = self.readbuffer[:size]
 640             self.readbuffer = self.readbuffer[size:]
 641
 642         return bytes
 643
 644
 645 class ZipFile:
 646     """ Class with methods to open, read, write, close, list zip files.
 647
 648     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
 649
 650     file: Either the path to the file, or a file-like object.
 651           If it is a path, the file will be opened and closed by ZipFile.
 652     mode: The mode can be either read "r", write "w" or append "a".
 653     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 654     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 655                 needed, otherwise it will raise an exception when this would
 656                 be necessary.
 657
 658     """
 659
 660     fp = None                   # Set here since __del__ checks it
 661
 662     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 663         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 664         if mode not in ("r", "w", "a"):
 665             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 666
 667         if compression == ZIP_STORED:
 668             pass
 669         elif compression == ZIP_DEFLATED:
 670             if not zlib:
 671                 raise RuntimeError,\
 672                       "Compression requires the (missing) zlib module"
 673         else:
 674             raise RuntimeError, "That compression method is not supported"
 675
 676         self._allowZip64 = allowZip64
 677         self._didModify = False
 678         self.debug = 0  # Level of printing: 0 through 3
 679         self.NameToInfo = {}    # Find file info given name
 680         self.filelist = []      # List of ZipInfo instances for archive
 681         self.compression = compression  # Method of compression
 682         self.mode = key = mode.replace('b', '')[0]
 683         self.pwd = None
 684         self.comment = ''
 685
 686         # Check if we were passed a file-like object
 687         if isinstance(file, basestring):
 688             self._filePassed = 0
 689             self.filename = file
 690             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 691             try:
 692                 self.fp = open(file, modeDict[mode])
 693             except IOError:
 694                 if mode == 'a':
 695                     mode = key = 'w'
 696                     self.fp = open(file, modeDict[mode])
 697                 else:
 698                     raise
 699         else:
 700             self._filePassed = 1
 701             self.fp = file
 702             self.filename = getattr(file, 'name', None)
 703
 704         if key == 'r':
 705             self._GetContents()
 706         elif key == 'w':
 707             pass
 708         elif key == 'a':
 709             try:                        # See if file is a zip file
 710                 self._RealGetContents()
 711                 # seek to start of directory and overwrite
 712                 self.fp.seek(self.start_dir, 0)
 713             except BadZipfile:          # file is not a zip file, just append
 714                 self.fp.seek(0, 2)
 715         else:
 716             if not self._filePassed:
 717                 self.fp.close()
 718                 self.fp = None
 719             raise RuntimeError, 'Mode must be "r", "w" or "a"'
 720
 721     def _GetContents(self):
 722         """Read the directory, making sure we close the file if the format
 723         is bad."""
 724         try:
 725             self._RealGetContents()
 726         except BadZipfile:
 727             if not self._filePassed:
 728                 self.fp.close()
 729                 self.fp = None
 730             raise
 731
 732     def _RealGetContents(self):
 733         """Read in the table of contents for the ZIP file."""
 734         fp = self.fp
 735         endrec = _EndRecData(fp)
 736         if not endrec:
 737             raise BadZipfile, "File is not a zip file"
 738         if self.debug > 1:
 739             print endrec
 740         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
 741         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
 742         self.comment = endrec[_ECD_COMMENT]     # archive comment
 743
 744         # "concat" is zero, unless zip was concatenated to another file
 745         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
 746         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
 747             # If Zip64 extension structures are present, account for them
 748             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 749
 750         if self.debug > 2:
 751             inferred = concat + offset_cd
 752             print "given, inferred, offset", offset_cd, inferred, concat
 753         # self.start_dir:  Position of start of central directory
 754         self.start_dir = offset_cd + concat
 755         fp.seek(self.start_dir, 0)
 756         data = fp.read(size_cd)
 757         fp = cStringIO.StringIO(data)
 758         total = 0
 759         while total < size_cd:
 760             centdir = fp.read(sizeCentralDir)
 761             if centdir[0:4] != stringCentralDir:
 762                 raise BadZipfile, "Bad magic number for central directory"
 763             centdir = struct.unpack(structCentralDir, centdir)
 764             if self.debug > 2:
 765                 print centdir
 766             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 767             # Create ZipInfo instance to store file information
 768             x = ZipInfo(filename)
 769             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 770             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 771             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 772             (x.create_version, x.create_system, x.extract_version, x.reserved,
 773                 x.flag_bits, x.compress_type, t, d,
 774                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 775             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 776             # Convert date/time code to (year, month, day, hour, min, sec)
 777             x._raw_time = t
 778             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 779                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 780
 781             x._decodeExtra()
 782             x.header_offset = x.header_offset + concat
 783             x.filename = x._decodeFilename()
 784             self.filelist.append(x)
 785             self.NameToInfo[x.filename] = x
 786
 787             # update total bytes read from central directory
 788             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
 789                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 790                      + centdir[_CD_COMMENT_LENGTH])
 791
 792             if self.debug > 2:
 793                 print "total", total
 794
 795
 796     def namelist(self):
 797         """Return a list of file names in the archive."""
 798         l = []
 799         for data in self.filelist:
 800             l.append(data.filename)
 801         return l
 802
 803     def infolist(self):
 804         """Return a list of class ZipInfo instances for files in the
 805         archive."""
 806         return self.filelist
 807
 808     def printdir(self):
 809         """Print a table of contents for the zip file."""
 810         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
 811         for zinfo in self.filelist:
 812             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
 813             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
 814
 815     def testzip(self):
 816         """Read all the files and check the CRC."""
 817         chunk_size = 2 ** 20
 818         for zinfo in self.filelist:
 819             try:
 820                 # Read by chunks, to avoid an OverflowError or a
 821                 # MemoryError with very large embedded files.
 822                 f = self.open(zinfo.filename, "r")
 823                 while f.read(chunk_size):     # Check CRC-32
 824                     pass
 825             except BadZipfile:
 826                 return zinfo.filename
 827
 828     def getinfo(self, name):
 829         """Return the instance of ZipInfo given 'name'."""
 830         info = self.NameToInfo.get(name)
 831         if info is None:
 832             raise KeyError(
 833                 'There is no item named %r in the archive' % name)
 834
 835         return info
 836
 837     def setpassword(self, pwd):
 838         """Set default password for encrypted files."""
 839         self.pwd = pwd
 840
 841     def read(self, name, pwd=None):
 842         """Return file bytes (as a string) for name."""
 843         return self.open(name, "r", pwd).read()
 844
 845     def open(self, name, mode="r", pwd=None):
 846         """Return file-like object for 'name'."""
 847         if mode not in ("r", "U", "rU"):
 848             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
 849         if not self.fp:
 850             raise RuntimeError, \
 851                   "Attempt to read ZIP archive that was already closed"
 852
 853         # Only open a new file for instances where we were not
 854         # given a file object in the constructor
 855         if self._filePassed:
 856             zef_file = self.fp
 857         else:
 858             zef_file = open(self.filename, 'rb')
 859
 860         # Make sure we have an info object
 861         if isinstance(name, ZipInfo):
 862             # 'name' is already an info object
 863             zinfo = name
 864         else:
 865             # Get info object for name
 866             zinfo = self.getinfo(name)
 867
 868         zef_file.seek(zinfo.header_offset, 0)
 869
 870         # Skip the file header:
 871         fheader = zef_file.read(sizeFileHeader)
 872         if fheader[0:4] != stringFileHeader:
 873             raise BadZipfile, "Bad magic number for file header"
 874
 875         fheader = struct.unpack(structFileHeader, fheader)
 876         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 877         if fheader[_FH_EXTRA_FIELD_LENGTH]:
 878             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 879
 880         if fname != zinfo.orig_filename:
 881             raise BadZipfile, \
 882                       'File name in directory "%s" and header "%s" differ.' % (
 883                           zinfo.orig_filename, fname)
 884
 885         # check for encrypted flag & handle password
 886         is_encrypted = zinfo.flag_bits & 0x1
 887         zd = None
 888         if is_encrypted:
 889             if not pwd:
 890                 pwd = self.pwd
 891             if not pwd:
 892                 raise RuntimeError, "File %s is encrypted, " \
 893                       "password required for extraction" % name
 894
 895             zd = _ZipDecrypter(pwd)
 896             # The first 12 bytes in the cypher stream is an encryption header
 897             #  used to strengthen the algorithm. The first 11 bytes are
 898             #  completely random, while the 12th contains the MSB of the CRC,
 899             #  or the MSB of the file time depending on the header type
 900             #  and is used to check the correctness of the password.
 901             bytes = zef_file.read(12)
 902             h = map(zd, bytes[0:12])
 903             if zinfo.flag_bits & 0x8:
 904                 # compare against the file type from extended local headers
 905                 check_byte = (zinfo._raw_time >> 8) & 0xff
 906             else:
 907                 # compare against the CRC otherwise
 908                 check_byte = (zinfo.CRC >> 24) & 0xff
 909             if ord(h[11]) != check_byte:
 910                 raise RuntimeError("Bad password for file", name)
 911
 912         # build and return a ZipExtFile
 913         if zd is None:
 914             zef = ZipExtFile(zef_file, zinfo)
 915         else:
 916             zef = ZipExtFile(zef_file, zinfo, zd)
 917
 918         # set universal newlines on ZipExtFile if necessary
 919         if "U" in mode:
 920             zef.set_univ_newlines(True)
 921         return zef
 922
 923     def extract(self, member, path=None, pwd=None):
 924         """Extract a member from the archive to the current working directory,
 925            using its full name. Its file information is extracted as accurately
 926            as possible. `member' may be a filename or a ZipInfo object. You can
 927            specify a different directory using `path'.
 928         """
 929         if not isinstance(member, ZipInfo):
 930             member = self.getinfo(member)
 931
 932         if path is None:
 933             path = os.getcwd()
 934
 935         return self._extract_member(member, path, pwd)
 936
 937     def extractall(self, path=None, members=None, pwd=None):
 938         """Extract all members from the archive to the current working
 939            directory. `path' specifies a different directory to extract to.
 940            `members' is optional and must be a subset of the list returned
 941            by namelist().
 942         """
 943         if members is None:
 944             members = self.namelist()
 945
 946         for zipinfo in members:
 947             self.extract(zipinfo, path, pwd)
 948
 949     def _extract_member(self, member, targetpath, pwd):
 950         """Extract the ZipInfo object 'member' to a physical
 951            file on the path targetpath.
 952         """
 953         # build the destination pathname, replacing
 954         # forward slashes to platform specific separators.
 955         if targetpath[-1:] in (os.path.sep, os.path.altsep):
 956             targetpath = targetpath[:-1]
 957
 958         # don't include leading "/" from file name if present
 959         if member.filename[0] == '/':
 960             targetpath = os.path.join(targetpath, member.filename[1:])
 961         else:
 962             targetpath = os.path.join(targetpath, member.filename)
 963
 964         targetpath = os.path.normpath(targetpath)
 965
 966         # Create all upper directories if necessary.
 967         upperdirs = os.path.dirname(targetpath)
 968         if upperdirs and not os.path.exists(upperdirs):
 969             os.makedirs(upperdirs)
 970
 971         if member.filename[-1] == '/':
 972             os.mkdir(targetpath)
 973             return targetpath
 974
 975         source = self.open(member, pwd=pwd)
 976         target = file(targetpath, "wb")
 977         shutil.copyfileobj(source, target)
 978         source.close()
 979         target.close()
 980
 981         return targetpath
 982
 983     def _writecheck(self, zinfo):
 984         """Check for errors before writing a file to the archive."""
 985         if zinfo.filename in self.NameToInfo:
 986             if self.debug:      # Warning for duplicate names
 987                 print "Duplicate name:", zinfo.filename
 988         if self.mode not in ("w", "a"):
 989             raise RuntimeError, 'write() requires mode "w" or "a"'
 990         if not self.fp:
 991             raise RuntimeError, \
 992                   "Attempt to write ZIP archive that was already closed"
 993         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
 994             raise RuntimeError, \
 995                   "Compression requires the (missing) zlib module"
 996         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
 997             raise RuntimeError, \
 998                   "That compression method is not supported"
 999         if zinfo.file_size > ZIP64_LIMIT:
1000             if not self._allowZip64:
1001                 raise LargeZipFile("Filesize would require ZIP64 extensions")
1002         if zinfo.header_offset > ZIP64_LIMIT:
1003             if not self._allowZip64:
1004                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1005
1006     def write(self, filename, arcname=None, compress_type=None):
1007         """Put the bytes from filename into the archive under the name
1008         arcname."""
1009         if not self.fp:
1010             raise RuntimeError(
1011                   "Attempt to write to ZIP archive that was already closed")
1012
1013         st = os.stat(filename)
1014         isdir = stat.S_ISDIR(st.st_mode)
1015         mtime = time.localtime(st.st_mtime)
1016         date_time = mtime[0:6]
1017         # Create ZipInfo instance to store file information
1018         if arcname is None:
1019             arcname = filename
1020         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1021         while arcname[0] in (os.sep, os.altsep):
1022             arcname = arcname[1:]
1023         if isdir:
1024             arcname += '/'
1025         zinfo = ZipInfo(arcname, date_time)
1026         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1027         if compress_type is None:
1028             zinfo.compress_type = self.compression
1029         else:
1030             zinfo.compress_type = compress_type
1031
1032         zinfo.file_size = st.st_size
1033         zinfo.flag_bits = 0x00
1034         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1035
1036         self._writecheck(zinfo)
1037         self._didModify = True
1038
1039         if isdir:
1040             zinfo.file_size = 0
1041             zinfo.compress_size = 0
1042             zinfo.CRC = 0
1043             self.filelist.append(zinfo)
1044             self.NameToInfo[zinfo.filename] = zinfo
1045             self.fp.write(zinfo.FileHeader())
1046             return
1047
1048         fp = open(filename, "rb")
1049         # Must overwrite CRC and sizes with correct data later
1050         zinfo.CRC = CRC = 0
1051         zinfo.compress_size = compress_size = 0
1052         zinfo.file_size = file_size = 0
1053         self.fp.write(zinfo.FileHeader())
1054         if zinfo.compress_type == ZIP_DEFLATED:
1055             cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1056                  zlib.DEFLATED, -15)
1057         else:
1058             cmpr = None
1059         while 1:
1060             buf = fp.read(1024 * 8)
1061             if not buf:
1062                 break
1063             file_size = file_size + len(buf)
1064             CRC = crc32(buf, CRC) & 0xffffffff
1065             if cmpr:
1066                 buf = cmpr.compress(buf)
1067                 compress_size = compress_size + len(buf)
1068             self.fp.write(buf)
1069         fp.close()
1070         if cmpr:
1071             buf = cmpr.flush()
1072             compress_size = compress_size + len(buf)
1073             self.fp.write(buf)
1074             zinfo.compress_size = compress_size
1075         else:
1076             zinfo.compress_size = file_size
1077         zinfo.CRC = CRC
1078         zinfo.file_size = file_size
1079         # Seek backwards and write CRC and file sizes
1080         position = self.fp.tell()       # Preserve current position in file
1081         self.fp.seek(zinfo.header_offset + 14, 0)
1082         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1083               zinfo.file_size))
1084         self.fp.seek(position, 0)
1085         self.filelist.append(zinfo)
1086         self.NameToInfo[zinfo.filename] = zinfo
1087
1088     def writestr(self, zinfo_or_arcname, bytes):
1089         """Write a file into the archive.  The contents is the string
1090         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1091         the name of the file in the archive."""
1092         if not isinstance(zinfo_or_arcname, ZipInfo):
1093             zinfo = ZipInfo(filename=zinfo_or_arcname,
1094                             date_time=time.localtime(time.time())[:6])
1095             zinfo.compress_type = self.compression
1096             zinfo.external_attr = 0600 << 16
1097         else:
1098             zinfo = zinfo_or_arcname
1099
1100         if not self.fp:
1101             raise RuntimeError(
1102                   "Attempt to write to ZIP archive that was already closed")
1103
1104         zinfo.file_size = len(bytes)            # Uncompressed size
1105         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1106         self._writecheck(zinfo)
1107         self._didModify = True
1108         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1109         if zinfo.compress_type == ZIP_DEFLATED:
1110             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1111                  zlib.DEFLATED, -15)
1112             bytes = co.compress(bytes) + co.flush()
1113             zinfo.compress_size = len(bytes)    # Compressed size
1114         else:
1115             zinfo.compress_size = zinfo.file_size
1116         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1117         self.fp.write(zinfo.FileHeader())
1118         self.fp.write(bytes)
1119         self.fp.flush()
1120         if zinfo.flag_bits & 0x08:
1121             # Write CRC and file sizes after the file data
1122             self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
1123                   zinfo.file_size))
1124         self.filelist.append(zinfo)
1125         self.NameToInfo[zinfo.filename] = zinfo
1126
1127     def __del__(self):
1128         """Call the "close()" method in case the user forgot."""
1129         self.close()
1130
1131     def close(self):
1132         """Close the file, and for mode "w" and "a" write the ending
1133         records."""
1134         if self.fp is None:
1135             return
1136
1137         if self.mode in ("w", "a") and self._didModify: # write ending records
1138             count = 0
1139             pos1 = self.fp.tell()
1140             for zinfo in self.filelist:         # write central directory
1141                 count = count + 1
1142                 dt = zinfo.date_time
1143                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1144                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1145                 extra = []
1146                 if zinfo.file_size > ZIP64_LIMIT \
1147                         or zinfo.compress_size > ZIP64_LIMIT:
1148                     extra.append(zinfo.file_size)
1149                     extra.append(zinfo.compress_size)
1150                     file_size = 0xffffffff
1151                     compress_size = 0xffffffff
1152                 else:
1153                     file_size = zinfo.file_size
1154                     compress_size = zinfo.compress_size
1155
1156                 if zinfo.header_offset > ZIP64_LIMIT:
1157                     extra.append(zinfo.header_offset)
1158                     header_offset = 0xffffffffL
1159                 else:
1160                     header_offset = zinfo.header_offset
1161
1162                 extra_data = zinfo.extra
1163                 if extra:
1164                     # Append a ZIP64 field to the extra's
1165                     extra_data = struct.pack(
1166                             '<HH' + 'Q'*len(extra),
1167                             1, 8*len(extra), *extra) + extra_data
1168
1169                     extract_version = max(45, zinfo.extract_version)
1170                     create_version = max(45, zinfo.create_version)
1171                 else:
1172                     extract_version = zinfo.extract_version
1173                     create_version = zinfo.create_version
1174
1175                 try:
1176                     filename, flag_bits = zinfo._encodeFilenameFlags()
1177                     centdir = struct.pack(structCentralDir,
1178                      stringCentralDir, create_version,
1179                      zinfo.create_system, extract_version, zinfo.reserved,
1180                      flag_bits, zinfo.compress_type, dostime, dosdate,
1181                      zinfo.CRC, compress_size, file_size,
1182                      len(filename), len(extra_data), len(zinfo.comment),
1183                      0, zinfo.internal_attr, zinfo.external_attr,
1184                      header_offset)
1185                 except DeprecationWarning:
1186                     print >>sys.stderr, (structCentralDir,
1187                      stringCentralDir, create_version,
1188                      zinfo.create_system, extract_version, zinfo.reserved,
1189                      zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1190                      zinfo.CRC, compress_size, file_size,
1191                      len(zinfo.filename), len(extra_data), len(zinfo.comment),
1192                      0, zinfo.internal_attr, zinfo.external_attr,
1193                      header_offset)
1194                     raise
1195                 self.fp.write(centdir)
1196                 self.fp.write(filename)
1197                 self.fp.write(extra_data)
1198                 self.fp.write(zinfo.comment)
1199
1200             pos2 = self.fp.tell()
1201             # Write end-of-zip-archive record
1202             centDirCount = count
1203             centDirSize = pos2 - pos1
1204             centDirOffset = pos1
1205             if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1206                 centDirOffset > ZIP64_LIMIT or
1207                 centDirSize > ZIP64_LIMIT):
1208                 # Need to write the ZIP64 end-of-archive records
1209                 zip64endrec = struct.pack(
1210                         structEndArchive64, stringEndArchive64,
1211                         44, 45, 45, 0, 0, centDirCount, centDirCount,
1212                         centDirSize, centDirOffset)
1213                 self.fp.write(zip64endrec)
1214
1215                 zip64locrec = struct.pack(
1216                         structEndArchive64Locator,
1217                         stringEndArchive64Locator, 0, pos2, 1)
1218                 self.fp.write(zip64locrec)
1219                 centDirCount = min(centDirCount, 0xFFFF)
1220                 centDirSize = min(centDirSize, 0xFFFFFFFF)
1221                 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1222
1223             # check for valid comment length
1224             if len(self.comment) >= ZIP_MAX_COMMENT:
1225                 if self.debug > 0:
1226                     msg = 'Archive comment is too long; truncating to %d bytes' \
1227                           % ZIP_MAX_COMMENT
1228                 self.comment = self.comment[:ZIP_MAX_COMMENT]
1229
1230             endrec = struct.pack(structEndArchive, stringEndArchive,
1231                                  0, 0, centDirCount, centDirCount,
1232                                  centDirSize, centDirOffset, len(self.comment))
1233             self.fp.write(endrec)
1234             self.fp.write(self.comment)
1235             self.fp.flush()
1236
1237         if not self._filePassed:
1238             self.fp.close()
1239         self.fp = None
1240
1241
1242 class PyZipFile(ZipFile):
1243     """Class to create ZIP archives with Python library files and packages."""
1244
1245     def writepy(self, pathname, basename = ""):
1246         """Add all files from "pathname" to the ZIP archive.
1247
1248         If pathname is a package directory, search the directory and
1249         all package subdirectories recursively for all *.py and enter
1250         the modules into the archive.  If pathname is a plain
1251         directory, listdir *.py and enter all modules.  Else, pathname
1252         must be a Python *.py file and the module will be put into the
1253         archive.  Added modules are always module.pyo or module.pyc.
1254         This method will compile the module.py into module.pyc if
1255         necessary.
1256         """
1257         dir, name = os.path.split(pathname)
1258         if os.path.isdir(pathname):
1259             initname = os.path.join(pathname, "__init__.py")
1260             if os.path.isfile(initname):
1261                 # This is a package directory, add it
1262                 if basename:
1263                     basename = "%s/%s" % (basename, name)
1264                 else:
1265                     basename = name
1266                 if self.debug:
1267                     print "Adding package in", pathname, "as", basename
1268                 fname, arcname = self._get_codename(initname[0:-3], basename)
1269                 if self.debug:
1270                     print "Adding", arcname
1271                 self.write(fname, arcname)
1272                 dirlist = os.listdir(pathname)
1273                 dirlist.remove("__init__.py")
1274                 # Add all *.py files and package subdirectories
1275                 for filename in dirlist:
1276                     path = os.path.join(pathname, filename)
1277                     root, ext = os.path.splitext(filename)
1278                     if os.path.isdir(path):
1279                         if os.path.isfile(os.path.join(path, "__init__.py")):
1280                             # This is a package directory, add it
1281                             self.writepy(path, basename)  # Recursive call
1282                     elif ext == ".py":
1283                         fname, arcname = self._get_codename(path[0:-3],
1284                                          basename)
1285                         if self.debug:
1286                             print "Adding", arcname
1287                         self.write(fname, arcname)
1288             else:
1289                 # This is NOT a package directory, add its files at top level
1290                 if self.debug:
1291                     print "Adding files from directory", pathname
1292                 for filename in os.listdir(pathname):
1293                     path = os.path.join(pathname, filename)
1294                     root, ext = os.path.splitext(filename)
1295                     if ext == ".py":
1296                         fname, arcname = self._get_codename(path[0:-3],
1297                                          basename)
1298                         if self.debug:
1299                             print "Adding", arcname
1300                         self.write(fname, arcname)
1301         else:
1302             if pathname[-3:] != ".py":
1303                 raise RuntimeError, \
1304                       'Files added with writepy() must end with ".py"'
1305             fname, arcname = self._get_codename(pathname[0:-3], basename)
1306             if self.debug:
1307                 print "Adding file", arcname
1308             self.write(fname, arcname)
1309
1310     def _get_codename(self, pathname, basename):
1311         """Return (filename, archivename) for the path.
1312
1313         Given a module name path, return the correct file path and
1314         archive name, compiling if necessary.  For example, given
1315         /python/lib/string, return (/python/lib/string.pyc, string).
1316         """
1317         file_py  = pathname + ".py"
1318         file_pyc = pathname + ".pyc"
1319         file_pyo = pathname + ".pyo"
1320         if os.path.isfile(file_pyo) and \
1321                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1322             fname = file_pyo    # Use .pyo file
1323         elif not os.path.isfile(file_pyc) or \
1324              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1325             import py_compile
1326             if self.debug:
1327                 print "Compiling", file_py
1328             try:
1329                 py_compile.compile(file_py, file_pyc, None, True)
1330             except py_compile.PyCompileError,err:
1331                 print err.msg
1332             fname = file_pyc
1333         else:
1334             fname = file_pyc
1335         archivename = os.path.split(fname)[1]
1336         if basename:
1337             archivename = "%s/%s" % (basename, archivename)
1338         return (fname, archivename)
1339
1340
1341 def main(args = None):
1342     import textwrap
1343     USAGE=textwrap.dedent("""\
1344         Usage:
1345             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1346             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1347             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1348             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1349         """)
1350     if args is None:
1351         args = sys.argv[1:]
1352
1353     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1354         print USAGE
1355         sys.exit(1)
1356
1357     if args[0] == '-l':
1358         if len(args) != 2:
1359             print USAGE
1360             sys.exit(1)
1361         zf = ZipFile(args[1], 'r')
1362         zf.printdir()
1363         zf.close()
1364
1365     elif args[0] == '-t':
1366         if len(args) != 2:
1367             print USAGE
1368             sys.exit(1)
1369         zf = ZipFile(args[1], 'r')
1370         zf.testzip()
1371         print "Done testing"
1372
1373     elif args[0] == '-e':
1374         if len(args) != 3:
1375             print USAGE
1376             sys.exit(1)
1377
1378         zf = ZipFile(args[1], 'r')
1379         out = args[2]
1380         for path in zf.namelist():
1381             if path.startswith('./'):
1382                 tgt = os.path.join(out, path[2:])
1383             else:
1384                 tgt = os.path.join(out, path)
1385
1386             tgtdir = os.path.dirname(tgt)
1387             if not os.path.exists(tgtdir):
1388                 os.makedirs(tgtdir)
1389             fp = open(tgt, 'wb')
1390             fp.write(zf.read(path))
1391             fp.close()
1392         zf.close()
1393
1394     elif args[0] == '-c':
1395         if len(args) < 3:
1396             print USAGE
1397             sys.exit(1)
1398
1399         def addToZip(zf, path, zippath):
1400             if os.path.isfile(path):
1401                 zf.write(path, zippath, ZIP_DEFLATED)
1402             elif os.path.isdir(path):
1403                 for nm in os.listdir(path):
1404                     addToZip(zf,
1405                             os.path.join(path, nm), os.path.join(zippath, nm))
1406             # else: ignore
1407
1408         zf = ZipFile(args[1], 'w', allowZip64=True)
1409         for src in args[2:]:
1410             addToZip(zf, src, os.path.basename(src))
1411
1412         zf.close()
1413
1414 if __name__ == "__main__":
1415     main()