Lib/zipfile.py

   1 """
   2 Read and write ZIP files.
   3 """
   4 import struct, os, time, sys, shutil
   5 import binascii, cStringIO
   6
   7 try:
   8     import zlib # We may need its compression method
   9     crc32 = zlib.crc32
  10 except ImportError:
  11     zlib = None
  12     crc32 = binascii.crc32
  13
  14 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
  15            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
  16
  17 class BadZipfile(Exception):
  18     pass
  19
  20
  21 class LargeZipFile(Exception):
  22     """
  23     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
  24     and those extensions are disabled.
  25     """
  26
  27 error = BadZipfile      # The exception raised by this module
  28
  29 ZIP64_LIMIT= (1 << 31) - 1
  30 ZIP_FILECOUNT_LIMIT = 1 << 16
  31 ZIP_MAX_COMMENT = (1 << 16) - 1
  32
  33 # constants for Zip file compression methods
  34 ZIP_STORED = 0
  35 ZIP_DEFLATED = 8
  36 # Other ZIP compression methods not supported
  37
  38 # Below are some formats and associated data for reading/writing headers using
  39 # the struct module.  The names and structures of headers/records are those used
  40 # in the PKWARE description of the ZIP file format:
  41 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
  42 # (URL valid as of January 2008)
  43
  44 # The "end of central directory" structure, magic number, size, and indices
  45 # (section V.I in the format document)
  46 structEndArchive = "<4s4H2LH"
  47 stringEndArchive = "PK\005\006"
  48 sizeEndCentDir = struct.calcsize(structEndArchive)
  49
  50 _ECD_SIGNATURE = 0
  51 _ECD_DISK_NUMBER = 1
  52 _ECD_DISK_START = 2
  53 _ECD_ENTRIES_THIS_DISK = 3
  54 _ECD_ENTRIES_TOTAL = 4
  55 _ECD_SIZE = 5
  56 _ECD_OFFSET = 6
  57 _ECD_COMMENT_SIZE = 7
  58 # These last two indices are not part of the structure as defined in the
  59 # spec, but they are used internally by this module as a convenience
  60 _ECD_COMMENT = 8
  61 _ECD_LOCATION = 9
  62
  63 # The "central directory" structure, magic number, size, and indices
  64 # of entries in the structure (section V.F in the format document)
  65 structCentralDir = "<4s4B4HL2L5H2L"
  66 stringCentralDir = "PK\001\002"
  67 sizeCentralDir = struct.calcsize(structCentralDir)
  68
  69 # indexes of entries in the central directory structure
  70 _CD_SIGNATURE = 0
  71 _CD_CREATE_VERSION = 1
  72 _CD_CREATE_SYSTEM = 2
  73 _CD_EXTRACT_VERSION = 3
  74 _CD_EXTRACT_SYSTEM = 4
  75 _CD_FLAG_BITS = 5
  76 _CD_COMPRESS_TYPE = 6
  77 _CD_TIME = 7
  78 _CD_DATE = 8
  79 _CD_CRC = 9
  80 _CD_COMPRESSED_SIZE = 10
  81 _CD_UNCOMPRESSED_SIZE = 11
  82 _CD_FILENAME_LENGTH = 12
  83 _CD_EXTRA_FIELD_LENGTH = 13
  84 _CD_COMMENT_LENGTH = 14
  85 _CD_DISK_NUMBER_START = 15
  86 _CD_INTERNAL_FILE_ATTRIBUTES = 16
  87 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
  88 _CD_LOCAL_HEADER_OFFSET = 18
  89
  90 # The "local file header" structure, magic number, size, and indices
  91 # (section V.A in the format document)
  92 structFileHeader = "<4s2B4HL2L2H"
  93 stringFileHeader = "PK\003\004"
  94 sizeFileHeader = struct.calcsize(structFileHeader)
  95
  96 _FH_SIGNATURE = 0
  97 _FH_EXTRACT_VERSION = 1
  98 _FH_EXTRACT_SYSTEM = 2
  99 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
 100 _FH_COMPRESSION_METHOD = 4
 101 _FH_LAST_MOD_TIME = 5
 102 _FH_LAST_MOD_DATE = 6
 103 _FH_CRC = 7
 104 _FH_COMPRESSED_SIZE = 8
 105 _FH_UNCOMPRESSED_SIZE = 9
 106 _FH_FILENAME_LENGTH = 10
 107 _FH_EXTRA_FIELD_LENGTH = 11
 108
 109 # The "Zip64 end of central directory locator" structure, magic number, and size
 110 structEndArchive64Locator = "<4sLQL"
 111 stringEndArchive64Locator = "PK\x06\x07"
 112 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
 113
 114 # The "Zip64 end of central directory" record, magic number, size, and indices
 115 # (section V.G in the format document)
 116 structEndArchive64 = "<4sQ2H2L4Q"
 117 stringEndArchive64 = "PK\x06\x06"
 118 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
 119
 120 _CD64_SIGNATURE = 0
 121 _CD64_DIRECTORY_RECSIZE = 1
 122 _CD64_CREATE_VERSION = 2
 123 _CD64_EXTRACT_VERSION = 3
 124 _CD64_DISK_NUMBER = 4
 125 _CD64_DISK_NUMBER_START = 5
 126 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
 127 _CD64_NUMBER_ENTRIES_TOTAL = 7
 128 _CD64_DIRECTORY_SIZE = 8
 129 _CD64_OFFSET_START_CENTDIR = 9
 130
 131 def is_zipfile(filename):
 132     """Quickly see if file is a ZIP file by checking the magic number."""
 133     try:
 134         fpin = open(filename, "rb")
 135         endrec = _EndRecData(fpin)
 136         fpin.close()
 137         if endrec:
 138             return True                 # file has correct magic number
 139     except IOError:
 140         pass
 141     return False
 142
 143 def _EndRecData64(fpin, offset, endrec):
 144     """
 145     Read the ZIP64 end-of-archive records and use that to update endrec
 146     """
 147     fpin.seek(offset - sizeEndCentDir64Locator, 2)
 148     data = fpin.read(sizeEndCentDir64Locator)
 149     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
 150     if sig != stringEndArchive64Locator:
 151         return endrec
 152
 153     if diskno != 0 or disks != 1:
 154         raise BadZipfile("zipfiles that span multiple disks are not supported")
 155
 156     # Assume no 'zip64 extensible data'
 157     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
 158     data = fpin.read(sizeEndCentDir64)
 159     sig, sz, create_version, read_version, disk_num, disk_dir, \
 160             dircount, dircount2, dirsize, diroffset = \
 161             struct.unpack(structEndArchive64, data)
 162     if sig != stringEndArchive64:
 163         return endrec
 164
 165     # Update the original endrec using data from the ZIP64 record
 166     endrec[_ECD_SIGNATURE] = sig
 167     endrec[_ECD_DISK_NUMBER] = disk_num
 168     endrec[_ECD_DISK_START] = disk_dir
 169     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
 170     endrec[_ECD_ENTRIES_TOTAL] = dircount2
 171     endrec[_ECD_SIZE] = dirsize
 172     endrec[_ECD_OFFSET] = diroffset
 173     return endrec
 174
 175
 176 def _EndRecData(fpin):
 177     """Return data from the "End of Central Directory" record, or None.
 178
 179     The data is a list of the nine items in the ZIP "End of central dir"
 180     record followed by a tenth item, the file seek offset of this record."""
 181
 182     # Determine file size
 183     fpin.seek(0, 2)
 184     filesize = fpin.tell()
 185
 186     # Check to see if this is ZIP file with no archive comment (the
 187     # "end of central directory" structure should be the last item in the
 188     # file if this is the case).
 189     fpin.seek(-sizeEndCentDir, 2)
 190     data = fpin.read()
 191     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
 192         # the signature is correct and there's no comment, unpack structure
 193         endrec = struct.unpack(structEndArchive, data)
 194         endrec=list(endrec)
 195
 196         # Append a blank comment and record start offset
 197         endrec.append("")
 198         endrec.append(filesize - sizeEndCentDir)
 199         if endrec[_ECD_OFFSET] == 0xffffffff:
 200             # the value for the "offset of the start of the central directory"
 201             # indicates that there is a "Zip64 end of central directory"
 202             # structure present, so go look for it
 203             return _EndRecData64(fpin, -sizeEndCentDir, endrec)
 204
 205         return endrec
 206
 207     # Either this is not a ZIP file, or it is a ZIP file with an archive
 208     # comment.  Search the end of the file for the "end of central directory"
 209     # record signature. The comment is the last item in the ZIP file and may be
 210     # up to 64K long.  It is assumed that the "end of central directory" magic
 211     # number does not appear in the comment.
 212     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
 213     fpin.seek(maxCommentStart, 0)
 214     data = fpin.read()
 215     start = data.rfind(stringEndArchive)
 216     if start >= 0:
 217         # found the magic number; attempt to unpack and interpret
 218         recData = data[start:start+sizeEndCentDir]
 219         endrec = list(struct.unpack(structEndArchive, recData))
 220         comment = data[start+sizeEndCentDir:]
 221         # check that comment length is correct
 222         if endrec[_ECD_COMMENT_SIZE] == len(comment):
 223             # Append the archive comment and start offset
 224             endrec.append(comment)
 225             endrec.append(maxCommentStart + start)
 226             if endrec[_ECD_OFFSET] == 0xffffffff:
 227                 # There is apparently a "Zip64 end of central directory"
 228                 # structure present, so go look for it
 229                 return _EndRecData64(fpin, start - filesize, endrec)
 230             return endrec
 231
 232     # Unable to find a valid end of central directory structure
 233     return
 234
 235
 236 class ZipInfo (object):
 237     """Class with attributes describing each file in the ZIP archive."""
 238
 239     __slots__ = (
 240             'orig_filename',
 241             'filename',
 242             'date_time',
 243             'compress_type',
 244             'comment',
 245             'extra',
 246             'create_system',
 247             'create_version',
 248             'extract_version',
 249             'reserved',
 250             'flag_bits',
 251             'volume',
 252             'internal_attr',
 253             'external_attr',
 254             'header_offset',
 255             'CRC',
 256             'compress_size',
 257             'file_size',
 258             '_raw_time',
 259         )
 260
 261     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
 262         self.orig_filename = filename   # Original file name in archive
 263
 264         # Terminate the file name at the first null byte.  Null bytes in file
 265         # names are used as tricks by viruses in archives.
 266         null_byte = filename.find(chr(0))
 267         if null_byte >= 0:
 268             filename = filename[0:null_byte]
 269         # This is used to ensure paths in generated ZIP files always use
 270         # forward slashes as the directory separator, as required by the
 271         # ZIP format specification.
 272         if os.sep != "/" and os.sep in filename:
 273             filename = filename.replace(os.sep, "/")
 274
 275         self.filename = filename        # Normalized file name
 276         self.date_time = date_time      # year, month, day, hour, min, sec
 277         # Standard values:
 278         self.compress_type = ZIP_STORED # Type of compression for the file
 279         self.comment = ""               # Comment for each file
 280         self.extra = ""                 # ZIP extra data
 281         if sys.platform == 'win32':
 282             self.create_system = 0          # System which created ZIP archive
 283         else:
 284             # Assume everything else is unix-y
 285             self.create_system = 3          # System which created ZIP archive
 286         self.create_version = 20        # Version which created ZIP archive
 287         self.extract_version = 20       # Version needed to extract archive
 288         self.reserved = 0               # Must be zero
 289         self.flag_bits = 0              # ZIP flag bits
 290         self.volume = 0                 # Volume number of file header
 291         self.internal_attr = 0          # Internal attributes
 292         self.external_attr = 0          # External file attributes
 293         # Other attributes are set by class ZipFile:
 294         # header_offset         Byte offset to the file header
 295         # CRC                   CRC-32 of the uncompressed file
 296         # compress_size         Size of the compressed file
 297         # file_size             Size of the uncompressed file
 298
 299     def FileHeader(self):
 300         """Return the per-file header as a string."""
 301         dt = self.date_time
 302         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
 303         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
 304         if self.flag_bits & 0x08:
 305             # Set these to zero because we write them after the file data
 306             CRC = compress_size = file_size = 0
 307         else:
 308             CRC = self.CRC
 309             compress_size = self.compress_size
 310             file_size = self.file_size
 311
 312         extra = self.extra
 313
 314         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
 315             # File is larger than what fits into a 4 byte integer,
 316             # fall back to the ZIP64 extension
 317             fmt = '<HHQQ'
 318             extra = extra + struct.pack(fmt,
 319                     1, struct.calcsize(fmt)-4, file_size, compress_size)
 320             file_size = 0xffffffff
 321             compress_size = 0xffffffff
 322             self.extract_version = max(45, self.extract_version)
 323             self.create_version = max(45, self.extract_version)
 324
 325         filename, flag_bits = self._encodeFilenameFlags()
 326         header = struct.pack(structFileHeader, stringFileHeader,
 327                  self.extract_version, self.reserved, flag_bits,
 328                  self.compress_type, dostime, dosdate, CRC,
 329                  compress_size, file_size,
 330                  len(filename), len(extra))
 331         return header + filename + extra
 332
 333     def _encodeFilenameFlags(self):
 334         if isinstance(self.filename, unicode):
 335             try:
 336                 return self.filename.encode('ascii'), self.flag_bits
 337             except UnicodeEncodeError:
 338                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
 339         else:
 340             return self.filename, self.flag_bits
 341
 342     def _decodeFilename(self):
 343         if self.flag_bits & 0x800:
 344             return self.filename.decode('utf-8')
 345         else:
 346             return self.filename
 347
 348     def _decodeExtra(self):
 349         # Try to decode the extra field.
 350         extra = self.extra
 351         unpack = struct.unpack
 352         while extra:
 353             tp, ln = unpack('<HH', extra[:4])
 354             if tp == 1:
 355                 if ln >= 24:
 356                     counts = unpack('<QQQ', extra[4:28])
 357                 elif ln == 16:
 358                     counts = unpack('<QQ', extra[4:20])
 359                 elif ln == 8:
 360                     counts = unpack('<Q', extra[4:12])
 361                 elif ln == 0:
 362                     counts = ()
 363                 else:
 364                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
 365
 366                 idx = 0
 367
 368                 # ZIP64 extension (large files and/or large archives)
 369                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
 370                     self.file_size = counts[idx]
 371                     idx += 1
 372
 373                 if self.compress_size == 0xFFFFFFFFL:
 374                     self.compress_size = counts[idx]
 375                     idx += 1
 376
 377                 if self.header_offset == 0xffffffffL:
 378                     old = self.header_offset
 379                     self.header_offset = counts[idx]
 380                     idx+=1
 381
 382             extra = extra[ln+4:]
 383
 384
 385 class _ZipDecrypter:
 386     """Class to handle decryption of files stored within a ZIP archive.
 387
 388     ZIP supports a password-based form of encryption. Even though known
 389     plaintext attacks have been found against it, it is still useful
 390     to be able to get data out of such a file.
 391
 392     Usage:
 393         zd = _ZipDecrypter(mypwd)
 394         plain_char = zd(cypher_char)
 395         plain_text = map(zd, cypher_text)
 396     """
 397
 398     def _GenerateCRCTable():
 399         """Generate a CRC-32 table.
 400
 401         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
 402         internal keys. We noticed that a direct implementation is faster than
 403         relying on binascii.crc32().
 404         """
 405         poly = 0xedb88320
 406         table = [0] * 256
 407         for i in range(256):
 408             crc = i
 409             for j in range(8):
 410                 if crc & 1:
 411                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
 412                 else:
 413                     crc = ((crc >> 1) & 0x7FFFFFFF)
 414             table[i] = crc
 415         return table
 416     crctable = _GenerateCRCTable()
 417
 418     def _crc32(self, ch, crc):
 419         """Compute the CRC32 primitive on one byte."""
 420         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
 421
 422     def __init__(self, pwd):
 423         self.key0 = 305419896
 424         self.key1 = 591751049
 425         self.key2 = 878082192
 426         for p in pwd:
 427             self._UpdateKeys(p)
 428
 429     def _UpdateKeys(self, c):
 430         self.key0 = self._crc32(c, self.key0)
 431         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
 432         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
 433         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
 434
 435     def __call__(self, c):
 436         """Decrypt a single character."""
 437         c = ord(c)
 438         k = self.key2 | 2
 439         c = c ^ (((k * (k^1)) >> 8) & 255)
 440         c = chr(c)
 441         self._UpdateKeys(c)
 442         return c
 443
 444 class ZipExtFile:
 445     """File-like object for reading an archive member.
 446        Is returned by ZipFile.open().
 447     """
 448
 449     def __init__(self, fileobj, zipinfo, decrypt=None):
 450         self.fileobj = fileobj
 451         self.decrypter = decrypt
 452         self.bytes_read = 0L
 453         self.rawbuffer = ''
 454         self.readbuffer = ''
 455         self.linebuffer = ''
 456         self.eof = False
 457         self.univ_newlines = False
 458         self.nlSeps = ("\n", )
 459         self.lastdiscard = ''
 460
 461         self.compress_type = zipinfo.compress_type
 462         self.compress_size = zipinfo.compress_size
 463
 464         self.closed  = False
 465         self.mode    = "r"
 466         self.name = zipinfo.filename
 467
 468         # read from compressed files in 64k blocks
 469         self.compreadsize = 64*1024
 470         if self.compress_type == ZIP_DEFLATED:
 471             self.dc = zlib.decompressobj(-15)
 472
 473     def set_univ_newlines(self, univ_newlines):
 474         self.univ_newlines = univ_newlines
 475
 476         # pick line separator char(s) based on universal newlines flag
 477         self.nlSeps = ("\n", )
 478         if self.univ_newlines:
 479             self.nlSeps = ("\r\n", "\r", "\n")
 480
 481     def __iter__(self):
 482         return self
 483
 484     def next(self):
 485         nextline = self.readline()
 486         if not nextline:
 487             raise StopIteration()
 488
 489         return nextline
 490
 491     def close(self):
 492         self.closed = True
 493
 494     def _checkfornewline(self):
 495         nl, nllen = -1, -1
 496         if self.linebuffer:
 497             # ugly check for cases where half of an \r\n pair was
 498             # read on the last pass, and the \r was discarded.  In this
 499             # case we just throw away the \n at the start of the buffer.
 500             if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
 501                 self.linebuffer = self.linebuffer[1:]
 502
 503             for sep in self.nlSeps:
 504                 nl = self.linebuffer.find(sep)
 505                 if nl >= 0:
 506                     nllen = len(sep)
 507                     return nl, nllen
 508
 509         return nl, nllen
 510
 511     def readline(self, size = -1):
 512         """Read a line with approx. size. If size is negative,
 513            read a whole line.
 514         """
 515         if size < 0:
 516             size = sys.maxint
 517         elif size == 0:
 518             return ''
 519
 520         # check for a newline already in buffer
 521         nl, nllen = self._checkfornewline()
 522
 523         if nl >= 0:
 524             # the next line was already in the buffer
 525             nl = min(nl, size)
 526         else:
 527             # no line break in buffer - try to read more
 528             size -= len(self.linebuffer)
 529             while nl < 0 and size > 0:
 530                 buf = self.read(min(size, 100))
 531                 if not buf:
 532                     break
 533                 self.linebuffer += buf
 534                 size -= len(buf)
 535
 536                 # check for a newline in buffer
 537                 nl, nllen = self._checkfornewline()
 538
 539             # we either ran out of bytes in the file, or
 540             # met the specified size limit without finding a newline,
 541             # so return current buffer
 542             if nl < 0:
 543                 s = self.linebuffer
 544                 self.linebuffer = ''
 545                 return s
 546
 547         buf = self.linebuffer[:nl]
 548         self.lastdiscard = self.linebuffer[nl:nl + nllen]
 549         self.linebuffer = self.linebuffer[nl + nllen:]
 550
 551         # line is always returned with \n as newline char (except possibly
 552         # for a final incomplete line in the file, which is handled above).
 553         return buf + "\n"
 554
 555     def readlines(self, sizehint = -1):
 556         """Return a list with all (following) lines. The sizehint parameter
 557         is ignored in this implementation.
 558         """
 559         result = []
 560         while True:
 561             line = self.readline()
 562             if not line: break
 563             result.append(line)
 564         return result
 565
 566     def read(self, size = None):
 567         # act like file() obj and return empty string if size is 0
 568         if size == 0:
 569             return ''
 570
 571         # determine read size
 572         bytesToRead = self.compress_size - self.bytes_read
 573
 574         # adjust read size for encrypted files since the first 12 bytes
 575         # are for the encryption/password information
 576         if self.decrypter is not None:
 577             bytesToRead -= 12
 578
 579         if size is not None and size >= 0:
 580             if self.compress_type == ZIP_STORED:
 581                 lr = len(self.readbuffer)
 582                 bytesToRead = min(bytesToRead, size - lr)
 583             elif self.compress_type == ZIP_DEFLATED:
 584                 if len(self.readbuffer) > size:
 585                     # the user has requested fewer bytes than we've already
 586                     # pulled through the decompressor; don't read any more
 587                     bytesToRead = 0
 588                 else:
 589                     # user will use up the buffer, so read some more
 590                     lr = len(self.rawbuffer)
 591                     bytesToRead = min(bytesToRead, self.compreadsize - lr)
 592
 593         # avoid reading past end of file contents
 594         if bytesToRead + self.bytes_read > self.compress_size:
 595             bytesToRead = self.compress_size - self.bytes_read
 596
 597         # try to read from file (if necessary)
 598         if bytesToRead > 0:
 599             bytes = self.fileobj.read(bytesToRead)
 600             self.bytes_read += len(bytes)
 601             self.rawbuffer += bytes
 602
 603             # handle contents of raw buffer
 604             if self.rawbuffer:
 605                 newdata = self.rawbuffer
 606                 self.rawbuffer = ''
 607
 608                 # decrypt new data if we were given an object to handle that
 609                 if newdata and self.decrypter is not None:
 610                     newdata = ''.join(map(self.decrypter, newdata))
 611
 612                 # decompress newly read data if necessary
 613                 if newdata and self.compress_type == ZIP_DEFLATED:
 614                     newdata = self.dc.decompress(newdata)
 615                     self.rawbuffer = self.dc.unconsumed_tail
 616                     if self.eof and len(self.rawbuffer) == 0:
 617                         # we're out of raw bytes (both from the file and
 618                         # the local buffer); flush just to make sure the
 619                         # decompressor is done
 620                         newdata += self.dc.flush()
 621                         # prevent decompressor from being used again
 622                         self.dc = None
 623
 624                 self.readbuffer += newdata
 625
 626
 627         # return what the user asked for
 628         if size is None or len(self.readbuffer) <= size:
 629             bytes = self.readbuffer
 630             self.readbuffer = ''
 631         else:
 632             bytes = self.readbuffer[:size]
 633             self.readbuffer = self.readbuffer[size:]
 634
 635         return bytes
 636
 637
 638 class ZipFile:
 639     """ Class with methods to open, read, write, close, list zip files.
 640
 641     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
 642
 643     file: Either the path to the file, or a file-like object.
 644           If it is a path, the file will be opened and closed by ZipFile.
 645     mode: The mode can be either read "r", write "w" or append "a".
 646     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
 647     allowZip64: if True ZipFile will create files with ZIP64 extensions when
 648                 needed, otherwise it will raise an exception when this would
 649                 be necessary.
 650
 651     """
 652
 653     fp = None                   # Set here since __del__ checks it
 654
 655     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
 656         """Open the ZIP file with mode read "r", write "w" or append "a"."""
 657         if mode not in ("r", "w", "a"):
 658             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
 659
 660         if compression == ZIP_STORED:
 661             pass
 662         elif compression == ZIP_DEFLATED:
 663             if not zlib:
 664                 raise RuntimeError,\
 665                       "Compression requires the (missing) zlib module"
 666         else:
 667             raise RuntimeError, "That compression method is not supported"
 668
 669         self._allowZip64 = allowZip64
 670         self._didModify = False
 671         self.debug = 0  # Level of printing: 0 through 3
 672         self.NameToInfo = {}    # Find file info given name
 673         self.filelist = []      # List of ZipInfo instances for archive
 674         self.compression = compression  # Method of compression
 675         self.mode = key = mode.replace('b', '')[0]
 676         self.pwd = None
 677         self.comment = ''
 678
 679         # Check if we were passed a file-like object
 680         if isinstance(file, basestring):
 681             self._filePassed = 0
 682             self.filename = file
 683             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
 684             try:
 685                 self.fp = open(file, modeDict[mode])
 686             except IOError:
 687                 if mode == 'a':
 688                     mode = key = 'w'
 689                     self.fp = open(file, modeDict[mode])
 690                 else:
 691                     raise
 692         else:
 693             self._filePassed = 1
 694             self.fp = file
 695             self.filename = getattr(file, 'name', None)
 696
 697         if key == 'r':
 698             self._GetContents()
 699         elif key == 'w':
 700             pass
 701         elif key == 'a':
 702             try:                        # See if file is a zip file
 703                 self._RealGetContents()
 704                 # seek to start of directory and overwrite
 705                 self.fp.seek(self.start_dir, 0)
 706             except BadZipfile:          # file is not a zip file, just append
 707                 self.fp.seek(0, 2)
 708         else:
 709             if not self._filePassed:
 710                 self.fp.close()
 711                 self.fp = None
 712             raise RuntimeError, 'Mode must be "r", "w" or "a"'
 713
 714     def _GetContents(self):
 715         """Read the directory, making sure we close the file if the format
 716         is bad."""
 717         try:
 718             self._RealGetContents()
 719         except BadZipfile:
 720             if not self._filePassed:
 721                 self.fp.close()
 722                 self.fp = None
 723             raise
 724
 725     def _RealGetContents(self):
 726         """Read in the table of contents for the ZIP file."""
 727         fp = self.fp
 728         endrec = _EndRecData(fp)
 729         if not endrec:
 730             raise BadZipfile, "File is not a zip file"
 731         if self.debug > 1:
 732             print endrec
 733         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
 734         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
 735         self.comment = endrec[_ECD_COMMENT]     # archive comment
 736
 737         # "concat" is zero, unless zip was concatenated to another file
 738         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
 739         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
 740             # If Zip64 extension structures are present, account for them
 741             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 742
 743         if self.debug > 2:
 744             inferred = concat + offset_cd
 745             print "given, inferred, offset", offset_cd, inferred, concat
 746         # self.start_dir:  Position of start of central directory
 747         self.start_dir = offset_cd + concat
 748         fp.seek(self.start_dir, 0)
 749         data = fp.read(size_cd)
 750         fp = cStringIO.StringIO(data)
 751         total = 0
 752         while total < size_cd:
 753             centdir = fp.read(sizeCentralDir)
 754             if centdir[0:4] != stringCentralDir:
 755                 raise BadZipfile, "Bad magic number for central directory"
 756             centdir = struct.unpack(structCentralDir, centdir)
 757             if self.debug > 2:
 758                 print centdir
 759             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
 760             # Create ZipInfo instance to store file information
 761             x = ZipInfo(filename)
 762             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
 763             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
 764             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
 765             (x.create_version, x.create_system, x.extract_version, x.reserved,
 766                 x.flag_bits, x.compress_type, t, d,
 767                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
 768             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
 769             # Convert date/time code to (year, month, day, hour, min, sec)
 770             x._raw_time = t
 771             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
 772                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 773
 774             x._decodeExtra()
 775             x.header_offset = x.header_offset + concat
 776             x.filename = x._decodeFilename()
 777             self.filelist.append(x)
 778             self.NameToInfo[x.filename] = x
 779
 780             # update total bytes read from central directory
 781             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
 782                      + centdir[_CD_EXTRA_FIELD_LENGTH]
 783                      + centdir[_CD_COMMENT_LENGTH])
 784
 785             if self.debug > 2:
 786                 print "total", total
 787
 788
 789     def namelist(self):
 790         """Return a list of file names in the archive."""
 791         l = []
 792         for data in self.filelist:
 793             l.append(data.filename)
 794         return l
 795
 796     def infolist(self):
 797         """Return a list of class ZipInfo instances for files in the
 798         archive."""
 799         return self.filelist
 800
 801     def printdir(self):
 802         """Print a table of contents for the zip file."""
 803         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
 804         for zinfo in self.filelist:
 805             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
 806             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
 807
 808     def testzip(self):
 809         """Read all the files and check the CRC."""
 810         chunk_size = 2 ** 20
 811         for zinfo in self.filelist:
 812             try:
 813                 # Read by chunks, to avoid an OverflowError or a
 814                 # MemoryError with very large embedded files.
 815                 f = self.open(zinfo.filename, "r")
 816                 while f.read(chunk_size):     # Check CRC-32
 817                     pass
 818             except BadZipfile:
 819                 return zinfo.filename
 820
 821     def getinfo(self, name):
 822         """Return the instance of ZipInfo given 'name'."""
 823         info = self.NameToInfo.get(name)
 824         if info is None:
 825             raise KeyError(
 826                 'There is no item named %r in the archive' % name)
 827
 828         return info
 829
 830     def setpassword(self, pwd):
 831         """Set default password for encrypted files."""
 832         self.pwd = pwd
 833
 834     def read(self, name, pwd=None):
 835         """Return file bytes (as a string) for name."""
 836         return self.open(name, "r", pwd).read()
 837
 838     def open(self, name, mode="r", pwd=None):
 839         """Return file-like object for 'name'."""
 840         if mode not in ("r", "U", "rU"):
 841             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
 842         if not self.fp:
 843             raise RuntimeError, \
 844                   "Attempt to read ZIP archive that was already closed"
 845
 846         # Only open a new file for instances where we were not
 847         # given a file object in the constructor
 848         if self._filePassed:
 849             zef_file = self.fp
 850         else:
 851             zef_file = open(self.filename, 'rb')
 852
 853         # Make sure we have an info object
 854         if isinstance(name, ZipInfo):
 855             # 'name' is already an info object
 856             zinfo = name
 857         else:
 858             # Get info object for name
 859             zinfo = self.getinfo(name)
 860
 861         zef_file.seek(zinfo.header_offset, 0)
 862
 863         # Skip the file header:
 864         fheader = zef_file.read(sizeFileHeader)
 865         if fheader[0:4] != stringFileHeader:
 866             raise BadZipfile, "Bad magic number for file header"
 867
 868         fheader = struct.unpack(structFileHeader, fheader)
 869         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
 870         if fheader[_FH_EXTRA_FIELD_LENGTH]:
 871             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
 872
 873         if fname != zinfo.orig_filename:
 874             raise BadZipfile, \
 875                       'File name in directory "%s" and header "%s" differ.' % (
 876                           zinfo.orig_filename, fname)
 877
 878         # check for encrypted flag & handle password
 879         is_encrypted = zinfo.flag_bits & 0x1
 880         zd = None
 881         if is_encrypted:
 882             if not pwd:
 883                 pwd = self.pwd
 884             if not pwd:
 885                 raise RuntimeError, "File %s is encrypted, " \
 886                       "password required for extraction" % name
 887
 888             zd = _ZipDecrypter(pwd)
 889             # The first 12 bytes in the cypher stream is an encryption header
 890             #  used to strengthen the algorithm. The first 11 bytes are
 891             #  completely random, while the 12th contains the MSB of the CRC,
 892             #  or the MSB of the file time depending on the header type
 893             #  and is used to check the correctness of the password.
 894             bytes = zef_file.read(12)
 895             h = map(zd, bytes[0:12])
 896             if zinfo.flag_bits & 0x8:
 897                 # compare against the file type from extended local headers
 898                 check_byte = (zinfo._raw_time >> 8) & 0xff
 899             else:
 900                 # compare against the CRC otherwise
 901                 check_byte = (zinfo.CRC >> 24) & 0xff
 902             if ord(h[11]) != check_byte:
 903                 raise RuntimeError("Bad password for file", name)
 904
 905         # build and return a ZipExtFile
 906         if zd is None:
 907             zef = ZipExtFile(zef_file, zinfo)
 908         else:
 909             zef = ZipExtFile(zef_file, zinfo, zd)
 910
 911         # set universal newlines on ZipExtFile if necessary
 912         if "U" in mode:
 913             zef.set_univ_newlines(True)
 914         return zef
 915
 916     def extract(self, member, path=None, pwd=None):
 917         """Extract a member from the archive to the current working directory,
 918            using its full name. Its file information is extracted as accurately
 919            as possible. `member' may be a filename or a ZipInfo object. You can
 920            specify a different directory using `path'.
 921         """
 922         if not isinstance(member, ZipInfo):
 923             member = self.getinfo(member)
 924
 925         if path is None:
 926             path = os.getcwd()
 927
 928         return self._extract_member(member, path, pwd)
 929
 930     def extractall(self, path=None, members=None, pwd=None):
 931         """Extract all members from the archive to the current working
 932            directory. `path' specifies a different directory to extract to.
 933            `members' is optional and must be a subset of the list returned
 934            by namelist().
 935         """
 936         if members is None:
 937             members = self.namelist()
 938
 939         for zipinfo in members:
 940             self.extract(zipinfo, path, pwd)
 941
 942     def _extract_member(self, member, targetpath, pwd):
 943         """Extract the ZipInfo object 'member' to a physical
 944            file on the path targetpath.
 945         """
 946         # build the destination pathname, replacing
 947         # forward slashes to platform specific separators.
 948         if targetpath[-1:] == "/":
 949             targetpath = targetpath[:-1]
 950
 951         # don't include leading "/" from file name if present
 952         if os.path.isabs(member.filename):
 953             targetpath = os.path.join(targetpath, member.filename[1:])
 954         else:
 955             targetpath = os.path.join(targetpath, member.filename)
 956
 957         targetpath = os.path.normpath(targetpath)
 958
 959         # Create all upper directories if necessary.
 960         upperdirs = os.path.dirname(targetpath)
 961         if upperdirs and not os.path.exists(upperdirs):
 962             os.makedirs(upperdirs)
 963
 964         source = self.open(member, pwd=pwd)
 965         target = file(targetpath, "wb")
 966         shutil.copyfileobj(source, target)
 967         source.close()
 968         target.close()
 969
 970         return targetpath
 971
 972     def _writecheck(self, zinfo):
 973         """Check for errors before writing a file to the archive."""
 974         if zinfo.filename in self.NameToInfo:
 975             if self.debug:      # Warning for duplicate names
 976                 print "Duplicate name:", zinfo.filename
 977         if self.mode not in ("w", "a"):
 978             raise RuntimeError, 'write() requires mode "w" or "a"'
 979         if not self.fp:
 980             raise RuntimeError, \
 981                   "Attempt to write ZIP archive that was already closed"
 982         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
 983             raise RuntimeError, \
 984                   "Compression requires the (missing) zlib module"
 985         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
 986             raise RuntimeError, \
 987                   "That compression method is not supported"
 988         if zinfo.file_size > ZIP64_LIMIT:
 989             if not self._allowZip64:
 990                 raise LargeZipFile("Filesize would require ZIP64 extensions")
 991         if zinfo.header_offset > ZIP64_LIMIT:
 992             if not self._allowZip64:
 993                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
 994
 995     def write(self, filename, arcname=None, compress_type=None):
 996         """Put the bytes from filename into the archive under the name
 997         arcname."""
 998         if not self.fp:
 999             raise RuntimeError(
1000                   "Attempt to write to ZIP archive that was already closed")
1001
1002         st = os.stat(filename)
1003         mtime = time.localtime(st.st_mtime)
1004         date_time = mtime[0:6]
1005         # Create ZipInfo instance to store file information
1006         if arcname is None:
1007             arcname = filename
1008         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1009         while arcname[0] in (os.sep, os.altsep):
1010             arcname = arcname[1:]
1011         zinfo = ZipInfo(arcname, date_time)
1012         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1013         if compress_type is None:
1014             zinfo.compress_type = self.compression
1015         else:
1016             zinfo.compress_type = compress_type
1017
1018         zinfo.file_size = st.st_size
1019         zinfo.flag_bits = 0x00
1020         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1021
1022         self._writecheck(zinfo)
1023         self._didModify = True
1024         fp = open(filename, "rb")
1025         # Must overwrite CRC and sizes with correct data later
1026         zinfo.CRC = CRC = 0
1027         zinfo.compress_size = compress_size = 0
1028         zinfo.file_size = file_size = 0
1029         self.fp.write(zinfo.FileHeader())
1030         if zinfo.compress_type == ZIP_DEFLATED:
1031             cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1032                  zlib.DEFLATED, -15)
1033         else:
1034             cmpr = None
1035         while 1:
1036             buf = fp.read(1024 * 8)
1037             if not buf:
1038                 break
1039             file_size = file_size + len(buf)
1040             CRC = crc32(buf, CRC) & 0xffffffff
1041             if cmpr:
1042                 buf = cmpr.compress(buf)
1043                 compress_size = compress_size + len(buf)
1044             self.fp.write(buf)
1045         fp.close()
1046         if cmpr:
1047             buf = cmpr.flush()
1048             compress_size = compress_size + len(buf)
1049             self.fp.write(buf)
1050             zinfo.compress_size = compress_size
1051         else:
1052             zinfo.compress_size = file_size
1053         zinfo.CRC = CRC
1054         zinfo.file_size = file_size
1055         # Seek backwards and write CRC and file sizes
1056         position = self.fp.tell()       # Preserve current position in file
1057         self.fp.seek(zinfo.header_offset + 14, 0)
1058         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1059               zinfo.file_size))
1060         self.fp.seek(position, 0)
1061         self.filelist.append(zinfo)
1062         self.NameToInfo[zinfo.filename] = zinfo
1063
1064     def writestr(self, zinfo_or_arcname, bytes):
1065         """Write a file into the archive.  The contents is the string
1066         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1067         the name of the file in the archive."""
1068         if not isinstance(zinfo_or_arcname, ZipInfo):
1069             zinfo = ZipInfo(filename=zinfo_or_arcname,
1070                             date_time=time.localtime(time.time())[:6])
1071             zinfo.compress_type = self.compression
1072             zinfo.external_attr = 0600 << 16
1073         else:
1074             zinfo = zinfo_or_arcname
1075
1076         if not self.fp:
1077             raise RuntimeError(
1078                   "Attempt to write to ZIP archive that was already closed")
1079
1080         zinfo.file_size = len(bytes)            # Uncompressed size
1081         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1082         self._writecheck(zinfo)
1083         self._didModify = True
1084         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1085         if zinfo.compress_type == ZIP_DEFLATED:
1086             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1087                  zlib.DEFLATED, -15)
1088             bytes = co.compress(bytes) + co.flush()
1089             zinfo.compress_size = len(bytes)    # Compressed size
1090         else:
1091             zinfo.compress_size = zinfo.file_size
1092         zinfo.header_offset = self.fp.tell()    # Start of header bytes
1093         self.fp.write(zinfo.FileHeader())
1094         self.fp.write(bytes)
1095         self.fp.flush()
1096         if zinfo.flag_bits & 0x08:
1097             # Write CRC and file sizes after the file data
1098             self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
1099                   zinfo.file_size))
1100         self.filelist.append(zinfo)
1101         self.NameToInfo[zinfo.filename] = zinfo
1102
1103     def __del__(self):
1104         """Call the "close()" method in case the user forgot."""
1105         self.close()
1106
1107     def close(self):
1108         """Close the file, and for mode "w" and "a" write the ending
1109         records."""
1110         if self.fp is None:
1111             return
1112
1113         if self.mode in ("w", "a") and self._didModify: # write ending records
1114             count = 0
1115             pos1 = self.fp.tell()
1116             for zinfo in self.filelist:         # write central directory
1117                 count = count + 1
1118                 dt = zinfo.date_time
1119                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1120                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1121                 extra = []
1122                 if zinfo.file_size > ZIP64_LIMIT \
1123                         or zinfo.compress_size > ZIP64_LIMIT:
1124                     extra.append(zinfo.file_size)
1125                     extra.append(zinfo.compress_size)
1126                     file_size = 0xffffffff
1127                     compress_size = 0xffffffff
1128                 else:
1129                     file_size = zinfo.file_size
1130                     compress_size = zinfo.compress_size
1131
1132                 if zinfo.header_offset > ZIP64_LIMIT:
1133                     extra.append(zinfo.header_offset)
1134                     header_offset = 0xffffffffL
1135                 else:
1136                     header_offset = zinfo.header_offset
1137
1138                 extra_data = zinfo.extra
1139                 if extra:
1140                     # Append a ZIP64 field to the extra's
1141                     extra_data = struct.pack(
1142                             '<HH' + 'Q'*len(extra),
1143                             1, 8*len(extra), *extra) + extra_data
1144
1145                     extract_version = max(45, zinfo.extract_version)
1146                     create_version = max(45, zinfo.create_version)
1147                 else:
1148                     extract_version = zinfo.extract_version
1149                     create_version = zinfo.create_version
1150
1151                 try:
1152                     filename, flag_bits = zinfo._encodeFilenameFlags()
1153                     centdir = struct.pack(structCentralDir,
1154                      stringCentralDir, create_version,
1155                      zinfo.create_system, extract_version, zinfo.reserved,
1156                      flag_bits, zinfo.compress_type, dostime, dosdate,
1157                      zinfo.CRC, compress_size, file_size,
1158                      len(filename), len(extra_data), len(zinfo.comment),
1159                      0, zinfo.internal_attr, zinfo.external_attr,
1160                      header_offset)
1161                 except DeprecationWarning:
1162                     print >>sys.stderr, (structCentralDir,
1163                      stringCentralDir, create_version,
1164                      zinfo.create_system, extract_version, zinfo.reserved,
1165                      zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1166                      zinfo.CRC, compress_size, file_size,
1167                      len(zinfo.filename), len(extra_data), len(zinfo.comment),
1168                      0, zinfo.internal_attr, zinfo.external_attr,
1169                      header_offset)
1170                     raise
1171                 self.fp.write(centdir)
1172                 self.fp.write(filename)
1173                 self.fp.write(extra_data)
1174                 self.fp.write(zinfo.comment)
1175
1176             pos2 = self.fp.tell()
1177             # Write end-of-zip-archive record
1178             centDirOffset = pos1
1179             if pos1 > ZIP64_LIMIT:
1180                 # Need to write the ZIP64 end-of-archive records
1181                 zip64endrec = struct.pack(
1182                         structEndArchive64, stringEndArchive64,
1183                         44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1184                 self.fp.write(zip64endrec)
1185
1186                 zip64locrec = struct.pack(
1187                         structEndArchive64Locator,
1188                         stringEndArchive64Locator, 0, pos2, 1)
1189                 self.fp.write(zip64locrec)
1190                 centDirOffset = 0xFFFFFFFF
1191
1192             # check for valid comment length
1193             if len(self.comment) >= ZIP_MAX_COMMENT:
1194                 if self.debug > 0:
1195                     msg = 'Archive comment is too long; truncating to %d bytes' \
1196                           % ZIP_MAX_COMMENT
1197                 self.comment = self.comment[:ZIP_MAX_COMMENT]
1198
1199             endrec = struct.pack(structEndArchive, stringEndArchive,
1200                                  0, 0, count % ZIP_FILECOUNT_LIMIT,
1201                                  count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1202                                  centDirOffset, len(self.comment))
1203             self.fp.write(endrec)
1204             self.fp.write(self.comment)
1205             self.fp.flush()
1206
1207         if not self._filePassed:
1208             self.fp.close()
1209         self.fp = None
1210
1211
1212 class PyZipFile(ZipFile):
1213     """Class to create ZIP archives with Python library files and packages."""
1214
1215     def writepy(self, pathname, basename = ""):
1216         """Add all files from "pathname" to the ZIP archive.
1217
1218         If pathname is a package directory, search the directory and
1219         all package subdirectories recursively for all *.py and enter
1220         the modules into the archive.  If pathname is a plain
1221         directory, listdir *.py and enter all modules.  Else, pathname
1222         must be a Python *.py file and the module will be put into the
1223         archive.  Added modules are always module.pyo or module.pyc.
1224         This method will compile the module.py into module.pyc if
1225         necessary.
1226         """
1227         dir, name = os.path.split(pathname)
1228         if os.path.isdir(pathname):
1229             initname = os.path.join(pathname, "__init__.py")
1230             if os.path.isfile(initname):
1231                 # This is a package directory, add it
1232                 if basename:
1233                     basename = "%s/%s" % (basename, name)
1234                 else:
1235                     basename = name
1236                 if self.debug:
1237                     print "Adding package in", pathname, "as", basename
1238                 fname, arcname = self._get_codename(initname[0:-3], basename)
1239                 if self.debug:
1240                     print "Adding", arcname
1241                 self.write(fname, arcname)
1242                 dirlist = os.listdir(pathname)
1243                 dirlist.remove("__init__.py")
1244                 # Add all *.py files and package subdirectories
1245                 for filename in dirlist:
1246                     path = os.path.join(pathname, filename)
1247                     root, ext = os.path.splitext(filename)
1248                     if os.path.isdir(path):
1249                         if os.path.isfile(os.path.join(path, "__init__.py")):
1250                             # This is a package directory, add it
1251                             self.writepy(path, basename)  # Recursive call
1252                     elif ext == ".py":
1253                         fname, arcname = self._get_codename(path[0:-3],
1254                                          basename)
1255                         if self.debug:
1256                             print "Adding", arcname
1257                         self.write(fname, arcname)
1258             else:
1259                 # This is NOT a package directory, add its files at top level
1260                 if self.debug:
1261                     print "Adding files from directory", pathname
1262                 for filename in os.listdir(pathname):
1263                     path = os.path.join(pathname, filename)
1264                     root, ext = os.path.splitext(filename)
1265                     if ext == ".py":
1266                         fname, arcname = self._get_codename(path[0:-3],
1267                                          basename)
1268                         if self.debug:
1269                             print "Adding", arcname
1270                         self.write(fname, arcname)
1271         else:
1272             if pathname[-3:] != ".py":
1273                 raise RuntimeError, \
1274                       'Files added with writepy() must end with ".py"'
1275             fname, arcname = self._get_codename(pathname[0:-3], basename)
1276             if self.debug:
1277                 print "Adding file", arcname
1278             self.write(fname, arcname)
1279
1280     def _get_codename(self, pathname, basename):
1281         """Return (filename, archivename) for the path.
1282
1283         Given a module name path, return the correct file path and
1284         archive name, compiling if necessary.  For example, given
1285         /python/lib/string, return (/python/lib/string.pyc, string).
1286         """
1287         file_py  = pathname + ".py"
1288         file_pyc = pathname + ".pyc"
1289         file_pyo = pathname + ".pyo"
1290         if os.path.isfile(file_pyo) and \
1291                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1292             fname = file_pyo    # Use .pyo file
1293         elif not os.path.isfile(file_pyc) or \
1294              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1295             import py_compile
1296             if self.debug:
1297                 print "Compiling", file_py
1298             try:
1299                 py_compile.compile(file_py, file_pyc, None, True)
1300             except py_compile.PyCompileError,err:
1301                 print err.msg
1302             fname = file_pyc
1303         else:
1304             fname = file_pyc
1305         archivename = os.path.split(fname)[1]
1306         if basename:
1307             archivename = "%s/%s" % (basename, archivename)
1308         return (fname, archivename)
1309
1310
1311 def main(args = None):
1312     import textwrap
1313     USAGE=textwrap.dedent("""\
1314         Usage:
1315             zipfile.py -l zipfile.zip        # Show listing of a zipfile
1316             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1317             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1318             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1319         """)
1320     if args is None:
1321         args = sys.argv[1:]
1322
1323     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1324         print USAGE
1325         sys.exit(1)
1326
1327     if args[0] == '-l':
1328         if len(args) != 2:
1329             print USAGE
1330             sys.exit(1)
1331         zf = ZipFile(args[1], 'r')
1332         zf.printdir()
1333         zf.close()
1334
1335     elif args[0] == '-t':
1336         if len(args) != 2:
1337             print USAGE
1338             sys.exit(1)
1339         zf = ZipFile(args[1], 'r')
1340         zf.testzip()
1341         print "Done testing"
1342
1343     elif args[0] == '-e':
1344         if len(args) != 3:
1345             print USAGE
1346             sys.exit(1)
1347
1348         zf = ZipFile(args[1], 'r')
1349         out = args[2]
1350         for path in zf.namelist():
1351             if path.startswith('./'):
1352                 tgt = os.path.join(out, path[2:])
1353             else:
1354                 tgt = os.path.join(out, path)
1355
1356             tgtdir = os.path.dirname(tgt)
1357             if not os.path.exists(tgtdir):
1358                 os.makedirs(tgtdir)
1359             fp = open(tgt, 'wb')
1360             fp.write(zf.read(path))
1361             fp.close()
1362         zf.close()
1363
1364     elif args[0] == '-c':
1365         if len(args) < 3:
1366             print USAGE
1367             sys.exit(1)
1368
1369         def addToZip(zf, path, zippath):
1370             if os.path.isfile(path):
1371                 zf.write(path, zippath, ZIP_DEFLATED)
1372             elif os.path.isdir(path):
1373                 for nm in os.listdir(path):
1374                     addToZip(zf,
1375                             os.path.join(path, nm), os.path.join(zippath, nm))
1376             # else: ignore
1377
1378         zf = ZipFile(args[1], 'w', allowZip64=True)
1379         for src in args[2:]:
1380             addToZip(zf, src, os.path.basename(src))
1381
1382         zf.close()
1383
1384 if __name__ == "__main__":
1385     main()