Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3 """
   4 import struct, os, time, sys, shutil
   5 import binascii, cStringIO
   6
   7 try:
   8     import zlib # We may need its compression method
   9     crc32 = zlib.crc32
  10 except ImportError:
  11     zlib = None
  12     crc32 = binascii.crc32
  13
  14 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  15            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  16
  17 class BadZipfile(Exception):
  18     pass
  19
  20
  21 class LargeZipFile(Exception):
  22     """
  23     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  24     and those extensions are disabled.
  25     """
  26
  27 error = BadZipfile      # The exception raised by this module
  28
  29 ZIP64_LIMIT= (1 << 31) - 1
  30 ZIP_FILECOUNT_LIMIT = 1 << 16
  31 ZIP_MAX_COMMENT = (1 << 16) - 1
  32
  33 # constants for Zip file compression methods
  34 ZIP_STORED = 0
  35 ZIP_DEFLATED = 8
  36 # Other ZIP compression methods not supported
  37
  38 # Below are some formats and associated data for reading/writing headers using
  39 # the struct module.  The names and structures of headers/records are those used
  40 # in the PKWARE description of the ZIP file format:
  41 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  42 # (URL valid as of January 2008)
  43
  44 # The "end of central directory" structure, magic number, size, and indices
  45 # (section V.I in the format document)
  46 structEndArchive = "<4s4H2LH"
  47 stringEndArchive = "PK\005\006"
  48 sizeEndCentDir = struct.calcsize(structEndArchive)
  49
  50 _ECD_SIGNATURE = 0
  51 _ECD_DISK_NUMBER = 1
  52 _ECD_DISK_START = 2
  53 _ECD_ENTRIES_THIS_DISK = 3
  54 _ECD_ENTRIES_TOTAL = 4
  55 _ECD_SIZE = 5
  56 _ECD_OFFSET = 6
  57 _ECD_COMMENT_SIZE = 7
  58 # These last two indices are not part of the structure as defined in the
  59 # spec, but they are used internally by this module as a convenience
  60 _ECD_COMMENT = 8
  61 _ECD_LOCATION = 9
  62
  63 # The "central directory" structure, magic number, size, and indices
  64 # of entries in the structure (section V.F in the format document)
  65 structCentralDir = "<4s4B4HL2L5H2L"
  66 stringCentralDir = "PK\001\002"
  67 sizeCentralDir = struct.calcsize(structCentralDir)
  68
  69 # indexes of entries in the central directory structure
  70 _CD_SIGNATURE = 0
  71 _CD_CREATE_VERSION = 1
  72 _CD_CREATE_SYSTEM = 2
  73 _CD_EXTRACT_VERSION = 3
  74 _CD_EXTRACT_SYSTEM = 4
  75 _CD_FLAG_BITS = 5
  76 _CD_COMPRESS_TYPE = 6
  77 _CD_TIME = 7
  78 _CD_DATE = 8
  79 _CD_CRC = 9
  80 _CD_COMPRESSED_SIZE = 10
  81 _CD_UNCOMPRESSED_SIZE = 11
  82 _CD_FILENAME_LENGTH = 12
  83 _CD_EXTRA_FIELD_LENGTH = 13
  84 _CD_COMMENT_LENGTH = 14
  85 _CD_DISK_NUMBER_START = 15
  86 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  87 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  88 _CD_LOCAL_HEADER_OFFSET = 18
  89
  90 # The "local file header" structure, magic number, size, and indices
  91 # (section V.A in the format document)
  92 structFileHeader = "<4s2B4HL2L2H"
  93 stringFileHeader = "PK\003\004"
  94 sizeFileHeader = struct.calcsize(structFileHeader)
  95
  96 _FH_SIGNATURE = 0
  97 _FH_EXTRACT_VERSION = 1
  98 _FH_EXTRACT_SYSTEM = 2
  99 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
 100 _FH_COMPRESSION_METHOD = 4
 101 _FH_LAST_MOD_TIME = 5
 102 _FH_LAST_MOD_DATE = 6
 103 _FH_CRC = 7
 104 _FH_COMPRESSED_SIZE = 8
 105 _FH_UNCOMPRESSED_SIZE = 9
 106 _FH_FILENAME_LENGTH = 10
 107 _FH_EXTRA_FIELD_LENGTH = 11
 108
 109 # The "Zip64 end of central directory locator" structure, magic number, and size
 110 structEndArchive64Locator = "<4sLQL"
 111 stringEndArchive64Locator = "PK\x06\x07"
 112 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 113
 114 # The "Zip64 end of central directory" record, magic number, size, and indices
 115 # (section V.G in the format document)
 116 structEndArchive64 = "<4sQ2H2L4Q"
 117 stringEndArchive64 = "PK\x06\x06"
 118 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 119
 120 _CD64_SIGNATURE = 0
 121 _CD64_DIRECTORY_RECSIZE = 1
 122 _CD64_CREATE_VERSION = 2
 123 _CD64_EXTRACT_VERSION = 3
 124 _CD64_DISK_NUMBER = 4
 125 _CD64_DISK_NUMBER_START = 5
 126 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
 127 _CD64_NUMBER_ENTRIES_TOTAL = 7
 128 _CD64_DIRECTORY_SIZE = 8
 129 _CD64_OFFSET_START_CENTDIR = 9
 130
 131 def is_zipfile(filename):
 132     """Quickly see if file is a ZIP file by checking the magic number."""
 133     try:
 134         fpin = open(filename, "rb")
 135         endrec = _EndRecData(fpin)
 136         fpin.close()
 137         if endrec:
 138             return True                 # file has correct magic number
 139     except IOError:
 140         pass
 141     return False
 142
 143 def _EndRecData64(fpin, offset, endrec):
 144     """
 145     Read the ZIP64 end-of-archive records and use that to update endrec
 146     """
 147     fpin.seek(offset - sizeEndCentDir64Locator, 2)
 148     data = fpin.read(sizeEndCentDir64Locator)
 149     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 150     if sig != stringEndArchive64Locator:
 151         return endrec
 152
 153     if diskno != 0 or disks != 1:
 154         raise BadZipfile("zipfiles that span multiple disks are not supported")
 155
 156     # Assume no 'zip64 extensible data'
 157     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 158     data = fpin.read(sizeEndCentDir64)
 159     sig, sz, create_version, read_version, disk_num, disk_dir, \
 160             dircount, dircount2, dirsize, diroffset = \
 161             struct.unpack(structEndArchive64, data)
 162     if sig != stringEndArchive64:
 163         return endrec
 164
 165     # Update the original endrec using data from the ZIP64 record
 166     endrec[_ECD_DISK_NUMBER] = disk_num
 167     endrec[_ECD_DISK_START] = disk_dir
 168     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 169     endrec[_ECD_ENTRIES_TOTAL] = dircount2
 170     endrec[_ECD_SIZE] = dirsize
 171     endrec[_ECD_OFFSET] = diroffset
 172     return endrec
 173
 174
 175 def _EndRecData(fpin):
 176     """Return data from the "End of Central Directory" record, or None.
 177
 178     The data is a list of the nine items in the ZIP "End of central dir"
 179     record followed by a tenth item, the file seek offset of this record."""
 180
 181     # Determine file size
 182     fpin.seek(0, 2)
 183     filesize = fpin.tell()
 184
 185     # Check to see if this is ZIP file with no archive comment (the
 186     # "end of central directory" structure should be the last item in the
 187     # file if this is the case).
 188     fpin.seek(-sizeEndCentDir, 2)
 189     data = fpin.read()
 190     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
 191         # the signature is correct and there's no comment, unpack structure
 192         endrec = struct.unpack(structEndArchive, data)
 193         endrec=list(endrec)
 194
 195         # Append a blank comment and record start offset
 196         endrec.append("")
 197         endrec.append(filesize - sizeEndCentDir)
 198         if endrec[_ECD_OFFSET] == 0xffffffff:
 199             # the value for the "offset of the start of the central directory"
 200             # indicates that there is a "Zip64 end of central directory"
 201             # structure present, so go look for it
 202             return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 203
 204         return endrec
 205
 206     # Either this is not a ZIP file, or it is a ZIP file with an archive
 207     # comment.  Search the end of the file for the "end of central directory"
 208     # record signature. The comment is the last item in the ZIP file and may be
 209     # up to 64K long.  It is assumed that the "end of central directory" magic
 210     # number does not appear in the comment.
 211     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 212     fpin.seek(maxCommentStart, 0)
 213     data = fpin.read()
 214     start = data.rfind(stringEndArchive)
 215     if start >= 0:
 216         # found the magic number; attempt to unpack and interpret
 217         recData = data[start:start+sizeEndCentDir]
 218         endrec = list(struct.unpack(structEndArchive, recData))
 219         comment = data[start+sizeEndCentDir:]
 220         # check that comment length is correct
 221         if endrec[_ECD_COMMENT_SIZE] == len(comment):
 222             # Append the archive comment and start offset
 223             endrec.append(comment)
 224             endrec.append(maxCommentStart + start)
 225             if endrec[_ECD_OFFSET] == 0xffffffff:
 226                 # There is apparently a "Zip64 end of central directory"
 227                 # structure present, so go look for it
 228                 return _EndRecData64(fpin, start - filesize, endrec)
 229             return endrec
 230
 231     # Unable to find a valid end of central directory structure
 232     return
 233
 234
 235 class ZipInfo (object):
 236     """Class with attributes describing each file in the ZIP archive."""
 237
 238     __slots__ = (
 239             'orig_filename',
 240             'filename',
 241             'date_time',
 242             'compress_type',
 243             'comment',
 244             'extra',
 245             'create_system',
 246             'create_version',
 247             'extract_version',
 248             'reserved',
 249             'flag_bits',
 250             'volume',
 251             'internal_attr',
 252             'external_attr',
 253             'header_offset',
 254             'CRC',
 255             'compress_size',
 256             'file_size',
 257             '_raw_time',
 258         )
 259
 260     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 261         self.orig_filename = filename   # Original file name in archive
 262
 263         # Terminate the file name at the first null byte.  Null bytes in file
 264         # names are used as tricks by viruses in archives.
 265         null_byte = filename.find(chr(0))
 266         if null_byte >= 0:
 267             filename = filename[0:null_byte]
 268         # This is used to ensure paths in generated ZIP files always use
 269         # forward slashes as the directory separator, as required by the
 270         # ZIP format specification.
 271         if os.sep != "/" and os.sep in filename:
 272             filename = filename.replace(os.sep, "/")
 273
 274         self.filename = filename        # Normalized file name
 275         self.date_time = date_time      # year, month, day, hour, min, sec
 276         # Standard values:
 277         self.compress_type = ZIP_STORED # Type of compression for the file
 278         self.comment = ""               # Comment for each file
 279         self.extra = ""                 # ZIP extra data
 280         if sys.platform == 'win32':
 281             self.create_system = 0          # System which created ZIP archive
 282         else:
 283             # Assume everything else is unix-y
 284             self.create_system = 3          # System which created ZIP archive
 285         self.create_version = 20        # Version which created ZIP archive
 286         self.extract_version = 20       # Version needed to extract archive
 287         self.reserved = 0               # Must be zero
 288         self.flag_bits = 0              # ZIP flag bits
 289         self.volume = 0                 # Volume number of file header
 290         self.internal_attr = 0          # Internal attributes
 291         self.external_attr = 0          # External file attributes
 292         # Other attributes are set by class ZipFile:
 293         # header_offset         Byte offset to the file header
 294         # CRC                   CRC-32 of the uncompressed file
 295         # compress_size         Size of the compressed file
 296         # file_size             Size of the uncompressed file
 297
 298     def FileHeader(self):
 299         """Return the per-file header as a string."""
 300         dt = self.date_time
 301         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 302         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 303         if self.flag_bits & 0x08:
 304             # Set these to zero because we write them after the file data
 305             CRC = compress_size = file_size = 0
 306         else:
 307             CRC = self.CRC
 308             compress_size = self.compress_size
 309             file_size = self.file_size
 310
 311         extra = self.extra
 312
 313         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 314             # File is larger than what fits into a 4 byte integer,
 315             # fall back to the ZIP64 extension
 316             fmt = '<HHQQ'
 317             extra = extra + struct.pack(fmt,
 318                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 319             file_size = 0xffffffff
 320             compress_size = 0xffffffff
 321             self.extract_version = max(45, self.extract_version)
 322             self.create_version = max(45, self.extract_version)
 323
 324         filename, flag_bits = self._encodeFilenameFlags()
 325         header = struct.pack(structFileHeader, stringFileHeader,
 326                  self.extract_version, self.reserved, flag_bits,
 327                  self.compress_type, dostime, dosdate, CRC,
 328                  compress_size, file_size,
 329                  len(filename), len(extra))
 330         return header + filename + extra
 331
 332     def _encodeFilenameFlags(self):
 333         if isinstance(self.filename, unicode):
 334             try:
 335                 return self.filename.encode('ascii'), self.flag_bits
 336             except UnicodeEncodeError:
 337                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
 338         else:
 339             return self.filename, self.flag_bits
 340
 341     def _decodeFilename(self):
 342         if self.flag_bits & 0x800:
 343             return self.filename.decode('utf-8')
 344         else:
 345             return self.filename
 346
 347     def _decodeExtra(self):
 348         # Try to decode the extra field.
 349         extra = self.extra
 350         unpack = struct.unpack
 351         while extra:
 352             tp, ln = unpack('<HH', extra[:4])
 353             if tp == 1:
 354                 if ln >= 24:
 355                     counts = unpack('<QQQ', extra[4:28])
 356                 elif ln == 16:
 357                     counts = unpack('<QQ', extra[4:20])
 358                 elif ln == 8:
 359                     counts = unpack('<Q', extra[4:12])
 360                 elif ln == 0:
 361                     counts = ()
 362                 else:
 363                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
 364
 365                 idx = 0
 366
 367                 # ZIP64 extension (large files and/or large archives)
 368                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
 369                     self.file_size = counts[idx]
 370                     idx += 1
 371
 372                 if self.compress_size == 0xFFFFFFFFL:
 373                     self.compress_size = counts[idx]
 374                     idx += 1
 375
 376                 if self.header_offset == 0xffffffffL:
 377                     old = self.header_offset
 378                     self.header_offset = counts[idx]
 379                     idx+=1
 380
 381             extra = extra[ln+4:]
 382
 383
 384 class _ZipDecrypter:
 385     """Class to handle decryption of files stored within a ZIP archive.
 386
 387     ZIP supports a password-based form of encryption. Even though known
 388     plaintext attacks have been found against it, it is still useful
 389     to be able to get data out of such a file.
 390
 391     Usage:
 392         zd = _ZipDecrypter(mypwd)
 393         plain_char = zd(cypher_char)
 394         plain_text = map(zd, cypher_text)
 395     """
 396
 397     def _GenerateCRCTable():
 398         """Generate a CRC-32 table.
 399
 400         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 401         internal keys. We noticed that a direct implementation is faster than
 402         relying on binascii.crc32().
 403         """
 404         poly = 0xedb88320
 405         table = [0] * 256
 406         for i in range(256):
 407             crc = i
 408             for j in range(8):
 409                 if crc & 1:
 410                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 411                 else:
 412                     crc = ((crc >> 1) & 0x7FFFFFFF)
 413             table[i] = crc
 414         return table
 415     crctable = _GenerateCRCTable()
 416
 417     def _crc32(self, ch, crc):
 418         """Compute the CRC32 primitive on one byte."""
 419         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
 420
 421     def __init__(self, pwd):
 422         self.key0 = 305419896
 423         self.key1 = 591751049
 424         self.key2 = 878082192
 425         for p in pwd:
 426             self._UpdateKeys(p)
 427
 428     def _UpdateKeys(self, c):
 429         self.key0 = self._crc32(c, self.key0)
 430         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 431         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 432         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
 433
 434     def __call__(self, c):
 435         """Decrypt a single character."""
 436         c = ord(c)
 437         k = self.key2 | 2
 438         c = c ^ (((k * (k^1)) >> 8) & 255)
 439         c = chr(c)
 440         self._UpdateKeys(c)
 441         return c
 442
 443 class ZipExtFile:
 444     """File-like object for reading an archive member.
 445        Is returned by ZipFile.open().
 446     """
 447
 448     def __init__(self, fileobj, zipinfo, decrypt=None):
 449         self.fileobj = fileobj
 450         self.decrypter = decrypt
 451         self.bytes_read = 0L
 452         self.rawbuffer = ''
 453         self.readbuffer = ''
 454         self.linebuffer = ''
 455         self.eof = False
 456         self.univ_newlines = False
 457         self.nlSeps = ("\n", )
 458         self.lastdiscard = ''
 459
 460         self.compress_type = zipinfo.compress_type
 461         self.compress_size = zipinfo.compress_size
 462
 463         self.closed  = False
 464         self.mode    = "r"
 465         self.name = zipinfo.filename
 466
 467         # read from compressed files in 64k blocks
 468         self.compreadsize = 64*1024
 469         if self.compress_type == ZIP_DEFLATED:
 470             self.dc = zlib.decompressobj(-15)
 471
 472     def set_univ_newlines(self, univ_newlines):
 473         self.univ_newlines = univ_newlines
 474
 475         # pick line separator char(s) based on universal newlines flag
 476         self.nlSeps = ("\n", )
 477         if self.univ_newlines:
 478             self.nlSeps = ("\r\n", "\r", "\n")
 479
 480     def __iter__(self):
 481         return self
 482
 483     def next(self):
 484         nextline = self.readline()
 485         if not nextline:
 486             raise StopIteration()
 487
 488         return nextline
 489
 490     def close(self):
 491         self.closed = True
 492
 493     def _checkfornewline(self):
 494         nl, nllen = -1, -1
 495         if self.linebuffer:
 496             # ugly check for cases where half of an \r\n pair was
 497             # read on the last pass, and the \r was discarded.  In this
 498             # case we just throw away the \n at the start of the buffer.
 499             if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
 500                 self.linebuffer = self.linebuffer[1:]
 501
 502             for sep in self.nlSeps:
 503                 nl = self.linebuffer.find(sep)
 504                 if nl >= 0:
 505                     nllen = len(sep)
 506                     return nl, nllen
 507
 508         return nl, nllen
 509
 510     def readline(self, size = -1):
 511         """Read a line with approx. size. If size is negative,
 512            read a whole line.
 513         """
 514         if size < 0:
 515             size = sys.maxint
 516         elif size == 0:
 517             return ''
 518
 519         # check for a newline already in buffer
 520         nl, nllen = self._checkfornewline()
 521
 522         if nl >= 0:
 523             # the next line was already in the buffer
 524             nl = min(nl, size)
 525         else:
 526             # no line break in buffer - try to read more
 527             size -= len(self.linebuffer)
 528             while nl < 0 and size > 0:
 529                 buf = self.read(min(size, 100))
 530                 if not buf:
 531                     break
 532                 self.linebuffer += buf
 533                 size -= len(buf)
 534
 535                 # check for a newline in buffer
 536                 nl, nllen = self._checkfornewline()
 537
 538             # we either ran out of bytes in the file, or
 539             # met the specified size limit without finding a newline,
 540             # so return current buffer
 541             if nl < 0:
 542                 s = self.linebuffer
 543                 self.linebuffer = ''
 544                 return s
 545
 546         buf = self.linebuffer[:nl]
 547         self.lastdiscard = self.linebuffer[nl:nl + nllen]
 548         self.linebuffer = self.linebuffer[nl + nllen:]
 549
 550         # line is always returned with \n as newline char (except possibly
 551         # for a final incomplete line in the file, which is handled above).
 552         return buf + "\n"
 553
 554     def readlines(self, sizehint = -1):
 555         """Return a list with all (following) lines. The sizehint parameter
 556         is ignored in this implementation.
 557         """
 558         result = []
 559         while True:
 560             line = self.readline()
 561             if not line: break
 562             result.append(line)
 563         return result
 564
 565     def read(self, size = None):
 566         # act like file() obj and return empty string if size is 0
 567         if size == 0:
 568             return ''
 569
 570         # determine read size
 571         bytesToRead = self.compress_size - self.bytes_read
 572
 573         # adjust read size for encrypted files since the first 12 bytes
 574         # are for the encryption/password information
 575         if self.decrypter is not None:
 576             bytesToRead -= 12
 577
 578         if size is not None and size >= 0:
 579             if self.compress_type == ZIP_STORED:
 580                 lr = len(self.readbuffer)
 581                 bytesToRead = min(bytesToRead, size - lr)
 582             elif self.compress_type == ZIP_DEFLATED:
 583                 if len(self.readbuffer) > size:
 584                     # the user has requested fewer bytes than we've already
 585                     # pulled through the decompressor; don't read any more
 586                     bytesToRead = 0
 587                 else:
 588                     # user will use up the buffer, so read some more
 589                     lr = len(self.rawbuffer)
 590                     bytesToRead = min(bytesToRead, self.compreadsize - lr)
 591
 592         # avoid reading past end of file contents
 593         if bytesToRead + self.bytes_read > self.compress_size:
 594             bytesToRead = self.compress_size - self.bytes_read
 595
 596         # try to read from file (if necessary)
 597         if bytesToRead > 0:
 598             bytes = self.fileobj.read(bytesToRead)
 599             self.bytes_read += len(bytes)
 600             self.rawbuffer += bytes
 601
 602             # handle contents of raw buffer
 603             if self.rawbuffer:
 604                 newdata = self.rawbuffer
 605                 self.rawbuffer = ''
 606
 607                 # decrypt new data if we were given an object to handle that
 608                 if newdata and self.decrypter is not None:
 609                     newdata = ''.join(map(self.decrypter, newdata))
 610
 611                 # decompress newly read data if necessary
 612                 if newdata and self.compress_type == ZIP_DEFLATED:
 613                     newdata = self.dc.decompress(newdata)
 614                     self.rawbuffer = self.dc.unconsumed_tail
 615                     if self.eof and len(self.rawbuffer) == 0:
 616                         # we're out of raw bytes (both from the file and
 617                         # the local buffer); flush just to make sure the
 618                         # decompressor is done
 619                         newdata += self.dc.flush()
 620                         # prevent decompressor from being used again
 621                         self.dc = None
 622
 623                 self.readbuffer += newdata
 624
 625
 626         # return what the user asked for
 627         if size is None or len(self.readbuffer) <= size:
 628             bytes = self.readbuffer
 629             self.readbuffer = ''
 630         else:
 631             bytes = self.readbuffer[:size]
 632             self.readbuffer = self.readbuffer[size:]
 633
 634         return bytes
 635
 636
 637 class ZipFile:
 638     """ Class with methods to open, read, write, close, list zip files.
 639
 640     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
 641
 642     file: Either the path to the file, or a file-like object.
 643           If it is a path, the file will be opened and closed by ZipFile.
 644     mode: The mode can be either read "r", write "w" or append "a".
 645     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 646     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 647                 needed, otherwise it will raise an exception when this would
 648                 be necessary.
 649
 650     """
 651
 652     fp = None                   # Set here since __del__ checks it
 653
 654     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 655         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 656         if mode not in ("r", "w", "a"):
 657             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 658
 659         if compression == ZIP_STORED:
 660             pass
 661         elif compression == ZIP_DEFLATED:
 662             if not zlib:
 663                 raise RuntimeError,\
 664                       "Compression requires the (missing) zlib module"
 665         else:
 666             raise RuntimeError, "That compression method is not supported"
 667
 668         self._allowZip64 = allowZip64
 669         self._didModify = False
 670         self.debug = 0  # Level of printing: 0 through 3
 671         self.NameToInfo = {}    # Find file info given name
 672         self.filelist = []      # List of ZipInfo instances for archive
 673         self.compression = compression  # Method of compression
 674         self.mode = key = mode.replace('b', '')[0]
 675         self.pwd = None
 676         self.comment = ''
 677
 678         # Check if we were passed a file-like object
 679         if isinstance(file, basestring):
 680             self._filePassed = 0
 681             self.filename = file
 682             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 683             try:
 684                 self.fp = open(file, modeDict[mode])
 685             except IOError:
 686                 if mode == 'a':
 687                     mode = key = 'w'
 688                     self.fp = open(file, modeDict[mode])
 689                 else:
 690                     raise
 691         else:
 692             self._filePassed = 1
 693             self.fp = file
 694             self.filename = getattr(file, 'name', None)
 695
 696         if key == 'r':
 697             self._GetContents()
 698         elif key == 'w':
 699             pass
 700         elif key == 'a':
 701             try:                        # See if file is a zip file
 702                 self._RealGetContents()
 703                 # seek to start of directory and overwrite
 704                 self.fp.seek(self.start_dir, 0)
 705             except BadZipfile:          # file is not a zip file, just append
 706                 self.fp.seek(0, 2)
 707         else:
 708             if not self._filePassed:
 709                 self.fp.close()
 710                 self.fp = None
 711             raise RuntimeError, 'Mode must be "r", "w" or "a"'
 712
 713     def _GetContents(self):
 714         """Read the directory, making sure we close the file if the format
 715         is bad."""
 716         try:
 717             self._RealGetContents()
 718         except BadZipfile:
 719             if not self._filePassed:
 720                 self.fp.close()
 721                 self.fp = None
 722             raise
 723
 724     def _RealGetContents(self):
 725         """Read in the table of contents for the ZIP file."""
 726         fp = self.fp
 727         endrec = _EndRecData(fp)
 728         if not endrec:
 729             raise BadZipfile, "File is not a zip file"
 730         if self.debug > 1:
 731             print endrec
 732         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
 733         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
 734         self.comment = endrec[_ECD_COMMENT]     # archive comment
 735
 736         # "concat" is zero, unless zip was concatenated to another file
 737         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
 738         if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
 739             # If the offset of the "End of Central Dir" record requires Zip64
 740             # extension structures, account for them
 741             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 742
 743         if self.debug > 2:
 744             inferred = concat + offset_cd
 745             print "given, inferred, offset", offset_cd, inferred, concat
 746         # self.start_dir:  Position of start of central directory
 747         self.start_dir = offset_cd + concat
 748         fp.seek(self.start_dir, 0)
 749         data = fp.read(size_cd)
 750         fp = cStringIO.StringIO(data)
 751         total = 0
 752         while total < size_cd:
 753             centdir = fp.read(sizeCentralDir)
 754             if centdir[0:4] != stringCentralDir:
 755                 raise BadZipfile, "Bad magic number for central directory"
 756             centdir = struct.unpack(structCentralDir, centdir)
 757             if self.debug > 2:
 758                 print centdir
 759             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 760             # Create ZipInfo instance to store file information
 761             x = ZipInfo(filename)
 762             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 763             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 764             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 765             (x.create_version, x.create_system, x.extract_version, x.reserved,
 766                 x.flag_bits, x.compress_type, t, d,
 767                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 768             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 769             # Convert date/time code to (year, month, day, hour, min, sec)
 770             x._raw_time = t
 771             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 772                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 773
 774             x._decodeExtra()
 775             x.header_offset = x.header_offset + concat
 776             x.filename = x._decodeFilename()
 777             self.filelist.append(x)
 778             self.NameToInfo[x.filename] = x
 779
 780             # update total bytes read from central directory
 781             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
 782                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 783                      + centdir[_CD_COMMENT_LENGTH])
 784
 785             if self.debug > 2:
 786                 print "total", total
 787
 788
 789     def namelist(self):
 790         """Return a list of file names in the archive."""
 791         l = []
 792         for data in self.filelist:
 793             l.append(data.filename)
 794         return l
 795
 796     def infolist(self):
 797         """Return a list of class ZipInfo instances for files in the
 798         archive."""
 799         return self.filelist
 800
 801     def printdir(self):
 802         """Print a table of contents for the zip file."""
 803         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
 804         for zinfo in self.filelist:
 805             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
 806             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
 807
 808     def testzip(self):
 809         """Read all the files and check the CRC."""
 810         chunk_size = 2 ** 20
 811         for zinfo in self.filelist:
 812             try:
 813                 # Read by chunks, to avoid an OverflowError or a
 814                 # MemoryError with very large embedded files.
 815                 f = self.open(zinfo.filename, "r")
 816                 while f.read(chunk_size):     # Check CRC-32
 817                     pass
 818             except BadZipfile:
 819                 return zinfo.filename
 820
 821     def getinfo(self, name):
 822         """Return the instance of ZipInfo given 'name'."""
 823         info = self.NameToInfo.get(name)
 824         if info is None:
 825             raise KeyError(
 826                 'There is no item named %r in the archive' % name)
 827
 828         return info
 829
 830     def setpassword(self, pwd):
 831         """Set default password for encrypted files."""
 832         self.pwd = pwd
 833
 834     def read(self, name, pwd=None):
 835         """Return file bytes (as a string) for name."""
 836         return self.open(name, "r", pwd).read()
 837
 838     def open(self, name, mode="r", pwd=None):
 839         """Return file-like object for 'name'."""
 840         if mode not in ("r", "U", "rU"):
 841             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
 842         if not self.fp:
 843             raise RuntimeError, \
 844                   "Attempt to read ZIP archive that was already closed"
 845
 846         # Only open a new file for instances where we were not
 847         # given a file object in the constructor
 848         if self._filePassed:
 849             zef_file = self.fp
 850         else:
 851             zef_file = open(self.filename, 'rb')
 852
 853         # Make sure we have an info object
 854         if isinstance(name, ZipInfo):
 855             # 'name' is already an info object
 856             zinfo = name
 857         else:
 858             # Get info object for name
 859             zinfo = self.getinfo(name)
 860
 861         zef_file.seek(zinfo.header_offset, 0)
 862
 863         # Skip the file header:
 864         fheader = zef_file.read(sizeFileHeader)
 865         if fheader[0:4] != stringFileHeader:
 866             raise BadZipfile, "Bad magic number for file header"
 867
 868         fheader = struct.unpack(structFileHeader, fheader)
 869         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 870         if fheader[_FH_EXTRA_FIELD_LENGTH]:
 871             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 872
 873         if fname != zinfo.orig_filename:
 874             raise BadZipfile, \
 875                       'File name in directory "%s" and header "%s" differ.' % (
 876                           zinfo.orig_filename, fname)
 877
 878         # check for encrypted flag & handle password
 879         is_encrypted = zinfo.flag_bits & 0x1
 880         zd = None
 881         if is_encrypted:
 882             if not pwd:
 883                 pwd = self.pwd
 884             if not pwd:
 885                 raise RuntimeError, "File %s is encrypted, " \
 886                       "password required for extraction" % name
 887
 888             zd = _ZipDecrypter(pwd)
 889             # The first 12 bytes in the cypher stream is an encryption header
 890             #  used to strengthen the algorithm. The first 11 bytes are
 891             #  completely random, while the 12th contains the MSB of the CRC,
 892             #  or the MSB of the file time depending on the header type
 893             #  and is used to check the correctness of the password.
 894             bytes = zef_file.read(12)
 895             h = map(zd, bytes[0:12])
 896             if zinfo.flag_bits & 0x8:
 897                 # compare against the file type from extended local headers
 898                 check_byte = (zinfo._raw_time >> 8) & 0xff
 899             else:
 900                 # compare against the CRC otherwise
 901                 check_byte = (zinfo.CRC >> 24) & 0xff
 902             if ord(h[11]) != check_byte:
 903                 raise RuntimeError("Bad password for file", name)
 904
 905         # build and return a ZipExtFile
 906         if zd is None:
 907             zef = ZipExtFile(zef_file, zinfo)
 908         else:
 909             zef = ZipExtFile(zef_file, zinfo, zd)
 910
 911         # set universal newlines on ZipExtFile if necessary
 912         if "U" in mode:
 913             zef.set_univ_newlines(True)
 914         return zef
 915
 916     def extract(self, member, path=None, pwd=None):
 917         """Extract a member from the archive to the current working directory,
 918            using its full name. Its file information is extracted as accurately
 919            as possible. `member' may be a filename or a ZipInfo object. You can
 920            specify a different directory using `path'.
 921         """
 922         if not isinstance(member, ZipInfo):
 923             member = self.getinfo(member)
 924
 925         if path is None:
 926             path = os.getcwd()
 927
 928         return self._extract_member(member, path, pwd)
 929
 930     def extractall(self, path=None, members=None, pwd=None):
 931         """Extract all members from the archive to the current working
 932            directory. `path' specifies a different directory to extract to.
 933            `members' is optional and must be a subset of the list returned
 934            by namelist().
 935         """
 936         if members is None:
 937             members = self.namelist()
 938
 939         for zipinfo in members:
 940             self.extract(zipinfo, path, pwd)
 941
 942     def _extract_member(self, member, targetpath, pwd):
 943         """Extract the ZipInfo object 'member' to a physical
 944            file on the path targetpath.
 945         """
 946         # build the destination pathname, replacing
 947         # forward slashes to platform specific separators.
 948         if targetpath[-1:] == "/":
 949             targetpath = targetpath[:-1]
 950
 951         # don't include leading "/" from file name if present
 952         if os.path.isabs(member.filename):
 953             targetpath = os.path.join(targetpath, member.filename[1:])
 954         else:
 955             targetpath = os.path.join(targetpath, member.filename)
 956
 957         targetpath = os.path.normpath(targetpath)
 958
 959         # Create all upper directories if necessary.
 960         upperdirs = os.path.dirname(targetpath)
 961         if upperdirs and not os.path.exists(upperdirs):
 962             os.makedirs(upperdirs)
 963
 964         source = self.open(member, pwd=pwd)
 965         target = file(targetpath, "wb")
 966         shutil.copyfileobj(source, target)
 967         source.close()
 968         target.close()
 969
 970         return targetpath
 971
 972     def _writecheck(self, zinfo):
 973         """Check for errors before writing a file to the archive."""
 974         if zinfo.filename in self.NameToInfo:
 975             if self.debug:      # Warning for duplicate names
 976                 print "Duplicate name:", zinfo.filename
 977         if self.mode not in ("w", "a"):
 978             raise RuntimeError, 'write() requires mode "w" or "a"'
 979         if not self.fp:
 980             raise RuntimeError, \
 981                   "Attempt to write ZIP archive that was already closed"
 982         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
 983             raise RuntimeError, \
 984                   "Compression requires the (missing) zlib module"
 985         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
 986             raise RuntimeError, \
 987                   "That compression method is not supported"
 988         if zinfo.file_size > ZIP64_LIMIT:
 989             if not self._allowZip64:
 990                 raise LargeZipFile("Filesize would require ZIP64 extensions")
 991         if zinfo.header_offset > ZIP64_LIMIT:
 992             if not self._allowZip64:
 993                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
 994
 995     def write(self, filename, arcname=None, compress_type=None):
 996         """Put the bytes from filename into the archive under the name
 997         arcname."""
 998         if not self.fp:
 999             raise RuntimeError(
1000                   "Attempt to write to ZIP archive that was already closed")
1001
1002         st = os.stat(filename)
1003         mtime = time.localtime(st.st_mtime)
1004         date_time = mtime[0:6]
1005         # Create ZipInfo instance to store file information
1006         if arcname is None:
1007             arcname = filename
1008         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1009         while arcname[0] in (os.sep, os.altsep):
1010             arcname = arcname[1:]
1011         zinfo = ZipInfo(arcname, date_time)
1012         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1013         if compress_type is None:
1014             zinfo.compress_type = self.compression
1015         else:
1016             zinfo.compress_type = compress_type
1017
1018         zinfo.file_size = st.st_size
1019         zinfo.flag_bits = 0x00
1020         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1021
1022         self._writecheck(zinfo)
1023         self._didModify = True
1024         fp = open(filename, "rb")
1025         # Must overwrite CRC and sizes with correct data later
1026         zinfo.CRC = CRC = 0
1027         zinfo.compress_size = compress_size = 0
1028         zinfo.file_size = file_size = 0
1029         self.fp.write(zinfo.FileHeader())
1030         if zinfo.compress_type == ZIP_DEFLATED:
1031             cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1032                  zlib.DEFLATED, -15)
1033         else:
1034             cmpr = None
1035         while 1:
1036             buf = fp.read(1024 * 8)
1037             if not buf:
1038                 break
1039             file_size = file_size + len(buf)
1040             CRC = crc32(buf, CRC) & 0xffffffff
1041             if cmpr:
1042                 buf = cmpr.compress(buf)
1043                 compress_size = compress_size + len(buf)
1044             self.fp.write(buf)
1045         fp.close()
1046         if cmpr:
1047             buf = cmpr.flush()
1048             compress_size = compress_size + len(buf)
1049             self.fp.write(buf)
1050             zinfo.compress_size = compress_size
1051         else:
1052             zinfo.compress_size = file_size
1053         zinfo.CRC = CRC
1054         zinfo.file_size = file_size
1055         # Seek backwards and write CRC and file sizes
1056         position = self.fp.tell()       # Preserve current position in file
1057         self.fp.seek(zinfo.header_offset + 14, 0)
1058         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1059               zinfo.file_size))
1060         self.fp.seek(position, 0)
1061         self.filelist.append(zinfo)
1062         self.NameToInfo[zinfo.filename] = zinfo
1063
1064     def writestr(self, zinfo_or_arcname, bytes):
1065         """Write a file into the archive.  The contents is the string
1066         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1067         the name of the file in the archive."""
1068         if not isinstance(zinfo_or_arcname, ZipInfo):
1069             zinfo = ZipInfo(filename=zinfo_or_arcname,
1070                             date_time=time.localtime(time.time())[:6])
1071             zinfo.compress_type = self.compression
1072             zinfo.external_attr = 0600 << 16
1073         else:
1074             zinfo = zinfo_or_arcname
1075
1076         if not self.fp:
1077             raise RuntimeError(
1078                   "Attempt to write to ZIP archive that was already closed")
1079
1080         zinfo.file_size = len(bytes)            # Uncompressed size
1081         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1082         self._writecheck(zinfo)
1083         self._didModify = True
1084         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1085         if zinfo.compress_type == ZIP_DEFLATED:
1086             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1087                  zlib.DEFLATED, -15)
1088             bytes = co.compress(bytes) + co.flush()
1089             zinfo.compress_size = len(bytes)    # Compressed size
1090         else:
1091             zinfo.compress_size = zinfo.file_size
1092         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1093         self.fp.write(zinfo.FileHeader())
1094         self.fp.write(bytes)
1095         self.fp.flush()
1096         if zinfo.flag_bits & 0x08:
1097             # Write CRC and file sizes after the file data
1098             self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
1099                   zinfo.file_size))
1100         self.filelist.append(zinfo)
1101         self.NameToInfo[zinfo.filename] = zinfo
1102
1103     def __del__(self):
1104         """Call the "close()" method in case the user forgot."""
1105         self.close()
1106
1107     def close(self):
1108         """Close the file, and for mode "w" and "a" write the ending
1109         records."""
1110         if self.fp is None:
1111             return
1112
1113         if self.mode in ("w", "a") and self._didModify: # write ending records
1114             count = 0
1115             pos1 = self.fp.tell()
1116             for zinfo in self.filelist:         # write central directory
1117                 count = count + 1
1118                 dt = zinfo.date_time
1119                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1120                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1121                 extra = []
1122                 if zinfo.file_size > ZIP64_LIMIT \
1123                         or zinfo.compress_size > ZIP64_LIMIT:
1124                     extra.append(zinfo.file_size)
1125                     extra.append(zinfo.compress_size)
1126                     file_size = 0xffffffff
1127                     compress_size = 0xffffffff
1128                 else:
1129                     file_size = zinfo.file_size
1130                     compress_size = zinfo.compress_size
1131
1132                 if zinfo.header_offset > ZIP64_LIMIT:
1133                     extra.append(zinfo.header_offset)
1134                     header_offset = 0xffffffffL
1135                 else:
1136                     header_offset = zinfo.header_offset
1137
1138                 extra_data = zinfo.extra
1139                 if extra:
1140                     # Append a ZIP64 field to the extra's
1141                     extra_data = struct.pack(
1142                             '<HH' + 'Q'*len(extra),
1143                             1, 8*len(extra), *extra) + extra_data
1144
1145                     extract_version = max(45, zinfo.extract_version)
1146                     create_version = max(45, zinfo.create_version)
1147                 else:
1148                     extract_version = zinfo.extract_version
1149                     create_version = zinfo.create_version
1150
1151                 try:
1152                     filename, flag_bits = zinfo._encodeFilenameFlags()
1153                     centdir = struct.pack(structCentralDir,
1154                      stringCentralDir, create_version,
1155                      zinfo.create_system, extract_version, zinfo.reserved,
1156                      flag_bits, zinfo.compress_type, dostime, dosdate,
1157                      zinfo.CRC, compress_size, file_size,
1158                      len(filename), len(extra_data), len(zinfo.comment),
1159                      0, zinfo.internal_attr, zinfo.external_attr,
1160                      header_offset)
1161                 except DeprecationWarning:
1162                     print >>sys.stderr, (structCentralDir,
1163                      stringCentralDir, create_version,
1164                      zinfo.create_system, extract_version, zinfo.reserved,
1165                      zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1166                      zinfo.CRC, compress_size, file_size,
1167                      len(zinfo.filename), len(extra_data), len(zinfo.comment),
1168                      0, zinfo.internal_attr, zinfo.external_attr,
1169                      header_offset)
1170                     raise
1171                 self.fp.write(centdir)
1172                 self.fp.write(filename)
1173                 self.fp.write(extra_data)
1174                 self.fp.write(zinfo.comment)
1175
1176             pos2 = self.fp.tell()
1177             # Write end-of-zip-archive record
1178             centDirOffset = pos1
1179             if pos1 > ZIP64_LIMIT:
1180                 # Need to write the ZIP64 end-of-archive records
1181                 zip64endrec = struct.pack(
1182                         structEndArchive64, stringEndArchive64,
1183                         44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1184                 self.fp.write(zip64endrec)
1185
1186                 zip64locrec = struct.pack(
1187                         structEndArchive64Locator,
1188                         stringEndArchive64Locator, 0, pos2, 1)
1189                 self.fp.write(zip64locrec)
1190                 centDirOffset = 0xFFFFFFFF
1191
1192             # check for valid comment length
1193             if len(self.comment) >= ZIP_MAX_COMMENT:
1194                 if self.debug > 0:
1195                     msg = 'Archive comment is too long; truncating to %d bytes' \
1196                           % ZIP_MAX_COMMENT
1197                 self.comment = self.comment[:ZIP_MAX_COMMENT]
1198
1199             endrec = struct.pack(structEndArchive, stringEndArchive,
1200                                  0, 0, count % ZIP_FILECOUNT_LIMIT,
1201                                  count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1202                                  centDirOffset, len(self.comment))
1203             self.fp.write(endrec)
1204             self.fp.write(self.comment)
1205             self.fp.flush()
1206
1207         if not self._filePassed:
1208             self.fp.close()
1209         self.fp = None
1210
1211
1212 class PyZipFile(ZipFile):
1213     """Class to create ZIP archives with Python library files and packages."""
1214
1215     def writepy(self, pathname, basename = ""):
1216         """Add all files from "pathname" to the ZIP archive.
1217
1218         If pathname is a package directory, search the directory and
1219         all package subdirectories recursively for all *.py and enter
1220         the modules into the archive.  If pathname is a plain
1221         directory, listdir *.py and enter all modules.  Else, pathname
1222         must be a Python *.py file and the module will be put into the
1223         archive.  Added modules are always module.pyo or module.pyc.
1224         This method will compile the module.py into module.pyc if
1225         necessary.
1226         """
1227         dir, name = os.path.split(pathname)
1228         if os.path.isdir(pathname):
1229             initname = os.path.join(pathname, "__init__.py")
1230             if os.path.isfile(initname):
1231                 # This is a package directory, add it
1232                 if basename:
1233                     basename = "%s/%s" % (basename, name)
1234                 else:
1235                     basename = name
1236                 if self.debug:
1237                     print "Adding package in", pathname, "as", basename
1238                 fname, arcname = self._get_codename(initname[0:-3], basename)
1239                 if self.debug:
1240                     print "Adding", arcname
1241                 self.write(fname, arcname)
1242                 dirlist = os.listdir(pathname)
1243                 dirlist.remove("__init__.py")
1244                 # Add all *.py files and package subdirectories
1245                 for filename in dirlist:
1246                     path = os.path.join(pathname, filename)
1247                     root, ext = os.path.splitext(filename)
1248                     if os.path.isdir(path):
1249                         if os.path.isfile(os.path.join(path, "__init__.py")):
1250                             # This is a package directory, add it
1251                             self.writepy(path, basename)  # Recursive call
1252                     elif ext == ".py":
1253                         fname, arcname = self._get_codename(path[0:-3],
1254                                          basename)
1255                         if self.debug:
1256                             print "Adding", arcname
1257                         self.write(fname, arcname)
1258             else:
1259                 # This is NOT a package directory, add its files at top level
1260                 if self.debug:
1261                     print "Adding files from directory", pathname
1262                 for filename in os.listdir(pathname):
1263                     path = os.path.join(pathname, filename)
1264                     root, ext = os.path.splitext(filename)
1265                     if ext == ".py":
1266                         fname, arcname = self._get_codename(path[0:-3],
1267                                          basename)
1268                         if self.debug:
1269                             print "Adding", arcname
1270                         self.write(fname, arcname)
1271         else:
1272             if pathname[-3:] != ".py":
1273                 raise RuntimeError, \
1274                       'Files added with writepy() must end with ".py"'
1275             fname, arcname = self._get_codename(pathname[0:-3], basename)
1276             if self.debug:
1277                 print "Adding file", arcname
1278             self.write(fname, arcname)
1279
1280     def _get_codename(self, pathname, basename):
1281         """Return (filename, archivename) for the path.
1282
1283         Given a module name path, return the correct file path and
1284         archive name, compiling if necessary.  For example, given
1285         /python/lib/string, return (/python/lib/string.pyc, string).
1286         """
1287         file_py  = pathname + ".py"
1288         file_pyc = pathname + ".pyc"
1289         file_pyo = pathname + ".pyo"
1290         if os.path.isfile(file_pyo) and \
1291                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1292             fname = file_pyo    # Use .pyo file
1293         elif not os.path.isfile(file_pyc) or \
1294              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1295             import py_compile
1296             if self.debug:
1297                 print "Compiling", file_py
1298             try:
1299                 py_compile.compile(file_py, file_pyc, None, True)
1300             except py_compile.PyCompileError,err:
1301                 print err.msg
1302             fname = file_pyc
1303         else:
1304             fname = file_pyc
1305         archivename = os.path.split(fname)[1]
1306         if basename:
1307             archivename = "%s/%s" % (basename, archivename)
1308         return (fname, archivename)
1309
1310
1311 def main(args = None):
1312     import textwrap
1313     USAGE=textwrap.dedent("""\
1314         Usage:
1315             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1316             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1317             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1318             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1319         """)
1320     if args is None:
1321         args = sys.argv[1:]
1322
1323     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1324         print USAGE
1325         sys.exit(1)
1326
1327     if args[0] == '-l':
1328         if len(args) != 2:
1329             print USAGE
1330             sys.exit(1)
1331         zf = ZipFile(args[1], 'r')
1332         zf.printdir()
1333         zf.close()
1334
1335     elif args[0] == '-t':
1336         if len(args) != 2:
1337             print USAGE
1338             sys.exit(1)
1339         zf = ZipFile(args[1], 'r')
1340         zf.testzip()
1341         print "Done testing"
1342
1343     elif args[0] == '-e':
1344         if len(args) != 3:
1345             print USAGE
1346             sys.exit(1)
1347
1348         zf = ZipFile(args[1], 'r')
1349         out = args[2]
1350         for path in zf.namelist():
1351             if path.startswith('./'):
1352                 tgt = os.path.join(out, path[2:])
1353             else:
1354                 tgt = os.path.join(out, path)
1355
1356             tgtdir = os.path.dirname(tgt)
1357             if not os.path.exists(tgtdir):
1358                 os.makedirs(tgtdir)
1359             fp = open(tgt, 'wb')
1360             fp.write(zf.read(path))
1361             fp.close()
1362         zf.close()
1363
1364     elif args[0] == '-c':
1365         if len(args) < 3:
1366             print USAGE
1367             sys.exit(1)
1368
1369         def addToZip(zf, path, zippath):
1370             if os.path.isfile(path):
1371                 zf.write(path, zippath, ZIP_DEFLATED)
1372             elif os.path.isdir(path):
1373                 for nm in os.listdir(path):
1374                     addToZip(zf,
1375                             os.path.join(path, nm), os.path.join(zippath, nm))
1376             # else: ignore
1377
1378         zf = ZipFile(args[1], 'w', allowZip64=True)
1379         for src in args[2:]:
1380             addToZip(zf, src, os.path.basename(src))
1381
1382         zf.close()
1383
1384 if __name__ == "__main__":
1385     main()