Lib/tarfile.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 #-------------------------------------------------------------------
   4 # tarfile.py
   5 #-------------------------------------------------------------------
   6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7 # All rights reserved.
   8 #
   9 # Permission  is  hereby granted,  free  of charge,  to  any person
  10 # obtaining a  copy of  this software  and associated documentation
  11 # files  (the  "Software"),  to   deal  in  the  Software   without
  12 # restriction,  including  without limitation  the  rights to  use,
  13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 # copies  of  the  Software,  and to  permit  persons  to  whom the
  15 # Software  is  furnished  to  do  so,  subject  to  the  following
  16 # conditions:
  17 #
  18 # The above copyright  notice and this  permission notice shall  be
  19 # included in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 # OTHER DEALINGS IN THE SOFTWARE.
  29 #
  30 """Read from and write to tar format archives.
  31 """
  32
  33 __version__ = "$Revision$"
  34 # $Source$
  35
  36 version     = "0.9.0"
  37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38 __date__    = "$Date$"
  39 __cvsid__   = "$Id$"
  40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42 #---------
  43 # Imports
  44 #---------
  45 import sys
  46 import os
  47 import shutil
  48 import stat
  49 import errno
  50 import time
  51 import struct
  52 import copy
  53 import re
  54 import operator
  55
  56 if sys.platform == 'mac':
  57     # This module needs work for MacOS9, especially in the area of pathname
  58     # handling. In many places it is assumed a simple substitution of / by the
  59     # local os.path.sep is good enough to convert pathnames, but this does not
  60     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  61     raise ImportError, "tarfile does not work for platform==mac"
  62
  63 try:
  64     import grp, pwd
  65 except ImportError:
  66     grp = pwd = None
  67
  68 # from tarfile import *
  69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  70
  71 #---------------------------------------------------------
  72 # tar constants
  73 #---------------------------------------------------------
  74 NUL = "\0"                      # the null character
  75 BLOCKSIZE = 512                 # length of processing blocks
  76 RECORDSIZE = BLOCKSIZE * 20     # length of records
  77 GNU_MAGIC = "ustar  \0"         # magic gnu tar string
  78 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
  79
  80 LENGTH_NAME = 100               # maximum length of a filename
  81 LENGTH_LINK = 100               # maximum length of a linkname
  82 LENGTH_PREFIX = 155             # maximum length of the prefix field
  83
  84 REGTYPE = "0"                   # regular file
  85 AREGTYPE = "\0"                 # regular file
  86 LNKTYPE = "1"                   # link (inside tarfile)
  87 SYMTYPE = "2"                   # symbolic link
  88 CHRTYPE = "3"                   # character special device
  89 BLKTYPE = "4"                   # block special device
  90 DIRTYPE = "5"                   # directory
  91 FIFOTYPE = "6"                  # fifo special device
  92 CONTTYPE = "7"                  # contiguous file
  93
  94 GNUTYPE_LONGNAME = "L"          # GNU tar longname
  95 GNUTYPE_LONGLINK = "K"          # GNU tar longlink
  96 GNUTYPE_SPARSE = "S"            # GNU tar sparse file
  97
  98 XHDTYPE = "x"                   # POSIX.1-2001 extended header
  99 XGLTYPE = "g"                   # POSIX.1-2001 global header
 100 SOLARIS_XHDTYPE = "X"           # Solaris extended header
 101
 102 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 103 GNU_FORMAT = 1                  # GNU tar format
 104 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 105 DEFAULT_FORMAT = GNU_FORMAT
 106
 107 #---------------------------------------------------------
 108 # tarfile constants
 109 #---------------------------------------------------------
 110 # File types that tarfile supports:
 111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 112                    SYMTYPE, DIRTYPE, FIFOTYPE,
 113                    CONTTYPE, CHRTYPE, BLKTYPE,
 114                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 115                    GNUTYPE_SPARSE)
 116
 117 # File types that will be treated as a regular file.
 118 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 119                  CONTTYPE, GNUTYPE_SPARSE)
 120
 121 # File types that are part of the GNU tar format.
 122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 123              GNUTYPE_SPARSE)
 124
 125 # Fields from a pax header that override a TarInfo attribute.
 126 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 127               "uid", "gid", "uname", "gname")
 128
 129 # Fields in a pax header that are numbers, all other fields
 130 # are treated as strings.
 131 PAX_NUMBER_FIELDS = {
 132     "atime": float,
 133     "ctime": float,
 134     "mtime": float,
 135     "uid": int,
 136     "gid": int,
 137     "size": int
 138 }
 139
 140 #---------------------------------------------------------
 141 # Bits used in the mode field, values in octal.
 142 #---------------------------------------------------------
 143 S_IFLNK = 0120000        # symbolic link
 144 S_IFREG = 0100000        # regular file
 145 S_IFBLK = 0060000        # block device
 146 S_IFDIR = 0040000        # directory
 147 S_IFCHR = 0020000        # character device
 148 S_IFIFO = 0010000        # fifo
 149
 150 TSUID   = 04000          # set UID on execution
 151 TSGID   = 02000          # set GID on execution
 152 TSVTX   = 01000          # reserved
 153
 154 TUREAD  = 0400           # read by owner
 155 TUWRITE = 0200           # write by owner
 156 TUEXEC  = 0100           # execute/search by owner
 157 TGREAD  = 0040           # read by group
 158 TGWRITE = 0020           # write by group
 159 TGEXEC  = 0010           # execute/search by group
 160 TOREAD  = 0004           # read by other
 161 TOWRITE = 0002           # write by other
 162 TOEXEC  = 0001           # execute/search by other
 163
 164 #---------------------------------------------------------
 165 # initialization
 166 #---------------------------------------------------------
 167 ENCODING = sys.getfilesystemencoding()
 168 if ENCODING is None:
 169     ENCODING = sys.getdefaultencoding()
 170
 171 #---------------------------------------------------------
 172 # Some useful functions
 173 #---------------------------------------------------------
 174
 175 def stn(s, length):
 176     """Convert a python string to a null-terminated string buffer.
 177     """
 178     return s[:length] + (length - len(s)) * NUL
 179
 180 def nts(s):
 181     """Convert a null-terminated string field to a python string.
 182     """
 183     # Use the string up to the first null char.
 184     p = s.find("\0")
 185     if p == -1:
 186         return s
 187     return s[:p]
 188
 189 def nti(s):
 190     """Convert a number field to a python number.
 191     """
 192     # There are two possible encodings for a number field, see
 193     # itn() below.
 194     if s[0] != chr(0200):
 195         try:
 196             n = int(nts(s) or "0", 8)
 197         except ValueError:
 198             raise InvalidHeaderError("invalid header")
 199     else:
 200         n = 0L
 201         for i in xrange(len(s) - 1):
 202             n <<= 8
 203             n += ord(s[i + 1])
 204     return n
 205
 206 def itn(n, digits=8, format=DEFAULT_FORMAT):
 207     """Convert a python number to a number field.
 208     """
 209     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 210     # octal digits followed by a null-byte, this allows values up to
 211     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 212     # that if necessary. A leading 0200 byte indicates this particular
 213     # encoding, the following digits-1 bytes are a big-endian
 214     # representation. This allows values up to (256**(digits-1))-1.
 215     if 0 <= n < 8 ** (digits - 1):
 216         s = "%0*o" % (digits - 1, n) + NUL
 217     else:
 218         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 219             raise ValueError("overflow in number field")
 220
 221         if n < 0:
 222             # XXX We mimic GNU tar's behaviour with negative numbers,
 223             # this could raise OverflowError.
 224             n = struct.unpack("L", struct.pack("l", n))[0]
 225
 226         s = ""
 227         for i in xrange(digits - 1):
 228             s = chr(n & 0377) + s
 229             n >>= 8
 230         s = chr(0200) + s
 231     return s
 232
 233 def uts(s, encoding, errors):
 234     """Convert a unicode object to a string.
 235     """
 236     if errors == "utf-8":
 237         # An extra error handler similar to the -o invalid=UTF-8 option
 238         # in POSIX.1-2001. Replace untranslatable characters with their
 239         # UTF-8 representation.
 240         try:
 241             return s.encode(encoding, "strict")
 242         except UnicodeEncodeError:
 243             x = []
 244             for c in s:
 245                 try:
 246                     x.append(c.encode(encoding, "strict"))
 247                 except UnicodeEncodeError:
 248                     x.append(c.encode("utf8"))
 249             return "".join(x)
 250     else:
 251         return s.encode(encoding, errors)
 252
 253 def calc_chksums(buf):
 254     """Calculate the checksum for a member's header by summing up all
 255        characters except for the chksum field which is treated as if
 256        it was filled with spaces. According to the GNU tar sources,
 257        some tars (Sun and NeXT) calculate chksum with signed char,
 258        which will be different if there are chars in the buffer with
 259        the high bit set. So we calculate two checksums, unsigned and
 260        signed.
 261     """
 262     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 263     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 264     return unsigned_chksum, signed_chksum
 265
 266 def copyfileobj(src, dst, length=None):
 267     """Copy length bytes from fileobj src to fileobj dst.
 268        If length is None, copy the entire content.
 269     """
 270     if length == 0:
 271         return
 272     if length is None:
 273         shutil.copyfileobj(src, dst)
 274         return
 275
 276     BUFSIZE = 16 * 1024
 277     blocks, remainder = divmod(length, BUFSIZE)
 278     for b in xrange(blocks):
 279         buf = src.read(BUFSIZE)
 280         if len(buf) < BUFSIZE:
 281             raise IOError("end of file reached")
 282         dst.write(buf)
 283
 284     if remainder != 0:
 285         buf = src.read(remainder)
 286         if len(buf) < remainder:
 287             raise IOError("end of file reached")
 288         dst.write(buf)
 289     return
 290
 291 filemode_table = (
 292     ((S_IFLNK,      "l"),
 293      (S_IFREG,      "-"),
 294      (S_IFBLK,      "b"),
 295      (S_IFDIR,      "d"),
 296      (S_IFCHR,      "c"),
 297      (S_IFIFO,      "p")),
 298
 299     ((TUREAD,       "r"),),
 300     ((TUWRITE,      "w"),),
 301     ((TUEXEC|TSUID, "s"),
 302      (TSUID,        "S"),
 303      (TUEXEC,       "x")),
 304
 305     ((TGREAD,       "r"),),
 306     ((TGWRITE,      "w"),),
 307     ((TGEXEC|TSGID, "s"),
 308      (TSGID,        "S"),
 309      (TGEXEC,       "x")),
 310
 311     ((TOREAD,       "r"),),
 312     ((TOWRITE,      "w"),),
 313     ((TOEXEC|TSVTX, "t"),
 314      (TSVTX,        "T"),
 315      (TOEXEC,       "x"))
 316 )
 317
 318 def filemode(mode):
 319     """Convert a file's mode to a string of the form
 320        -rwxrwxrwx.
 321        Used by TarFile.list()
 322     """
 323     perm = []
 324     for table in filemode_table:
 325         for bit, char in table:
 326             if mode & bit == bit:
 327                 perm.append(char)
 328                 break
 329         else:
 330             perm.append("-")
 331     return "".join(perm)
 332
 333 class TarError(Exception):
 334     """Base exception."""
 335     pass
 336 class ExtractError(TarError):
 337     """General exception for extract errors."""
 338     pass
 339 class ReadError(TarError):
 340     """Exception for unreadble tar archives."""
 341     pass
 342 class CompressionError(TarError):
 343     """Exception for unavailable compression methods."""
 344     pass
 345 class StreamError(TarError):
 346     """Exception for unsupported operations on stream-like TarFiles."""
 347     pass
 348 class HeaderError(TarError):
 349     """Base exception for header errors."""
 350     pass
 351 class EmptyHeaderError(HeaderError):
 352     """Exception for empty headers."""
 353     pass
 354 class TruncatedHeaderError(HeaderError):
 355     """Exception for truncated headers."""
 356     pass
 357 class EOFHeaderError(HeaderError):
 358     """Exception for end of file headers."""
 359     pass
 360 class InvalidHeaderError(HeaderError):
 361     """Exception for invalid headers."""
 362     pass
 363 class SubsequentHeaderError(HeaderError):
 364     """Exception for missing and invalid extended headers."""
 365     pass
 366
 367 #---------------------------
 368 # internal stream interface
 369 #---------------------------
 370 class _LowLevelFile:
 371     """Low-level file object. Supports reading and writing.
 372        It is used instead of a regular file object for streaming
 373        access.
 374     """
 375
 376     def __init__(self, name, mode):
 377         mode = {
 378             "r": os.O_RDONLY,
 379             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 380         }[mode]
 381         if hasattr(os, "O_BINARY"):
 382             mode |= os.O_BINARY
 383         self.fd = os.open(name, mode)
 384
 385     def close(self):
 386         os.close(self.fd)
 387
 388     def read(self, size):
 389         return os.read(self.fd, size)
 390
 391     def write(self, s):
 392         os.write(self.fd, s)
 393
 394 class _Stream:
 395     """Class that serves as an adapter between TarFile and
 396        a stream-like object.  The stream-like object only
 397        needs to have a read() or write() method and is accessed
 398        blockwise.  Use of gzip or bzip2 compression is possible.
 399        A stream-like object could be for example: sys.stdin,
 400        sys.stdout, a socket, a tape device etc.
 401
 402        _Stream is intended to be used only internally.
 403     """
 404
 405     def __init__(self, name, mode, comptype, fileobj, bufsize):
 406         """Construct a _Stream object.
 407         """
 408         self._extfileobj = True
 409         if fileobj is None:
 410             fileobj = _LowLevelFile(name, mode)
 411             self._extfileobj = False
 412
 413         if comptype == '*':
 414             # Enable transparent compression detection for the
 415             # stream interface
 416             fileobj = _StreamProxy(fileobj)
 417             comptype = fileobj.getcomptype()
 418
 419         self.name     = name or ""
 420         self.mode     = mode
 421         self.comptype = comptype
 422         self.fileobj  = fileobj
 423         self.bufsize  = bufsize
 424         self.buf      = ""
 425         self.pos      = 0L
 426         self.closed   = False
 427
 428         if comptype == "gz":
 429             try:
 430                 import zlib
 431             except ImportError:
 432                 raise CompressionError("zlib module is not available")
 433             self.zlib = zlib
 434             self.crc = zlib.crc32("") & 0xffffffffL
 435             if mode == "r":
 436                 self._init_read_gz()
 437             else:
 438                 self._init_write_gz()
 439
 440         if comptype == "bz2":
 441             try:
 442                 import bz2
 443             except ImportError:
 444                 raise CompressionError("bz2 module is not available")
 445             if mode == "r":
 446                 self.dbuf = ""
 447                 self.cmp = bz2.BZ2Decompressor()
 448             else:
 449                 self.cmp = bz2.BZ2Compressor()
 450
 451     def __del__(self):
 452         if hasattr(self, "closed") and not self.closed:
 453             self.close()
 454
 455     def _init_write_gz(self):
 456         """Initialize for writing with gzip compression.
 457         """
 458         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 459                                             -self.zlib.MAX_WBITS,
 460                                             self.zlib.DEF_MEM_LEVEL,
 461                                             0)
 462         timestamp = struct.pack("<L", long(time.time()))
 463         self.__write("\037\213\010\010%s\002\377" % timestamp)
 464         if self.name.endswith(".gz"):
 465             self.name = self.name[:-3]
 466         self.__write(self.name + NUL)
 467
 468     def write(self, s):
 469         """Write string s to the stream.
 470         """
 471         if self.comptype == "gz":
 472             self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
 473         self.pos += len(s)
 474         if self.comptype != "tar":
 475             s = self.cmp.compress(s)
 476         self.__write(s)
 477
 478     def __write(self, s):
 479         """Write string s to the stream if a whole new block
 480            is ready to be written.
 481         """
 482         self.buf += s
 483         while len(self.buf) > self.bufsize:
 484             self.fileobj.write(self.buf[:self.bufsize])
 485             self.buf = self.buf[self.bufsize:]
 486
 487     def close(self):
 488         """Close the _Stream object. No operation should be
 489            done on it afterwards.
 490         """
 491         if self.closed:
 492             return
 493
 494         if self.mode == "w" and self.comptype != "tar":
 495             self.buf += self.cmp.flush()
 496
 497         if self.mode == "w" and self.buf:
 498             self.fileobj.write(self.buf)
 499             self.buf = ""
 500             if self.comptype == "gz":
 501                 # The native zlib crc is an unsigned 32-bit integer, but
 502                 # the Python wrapper implicitly casts that to a signed C
 503                 # long.  So, on a 32-bit box self.crc may "look negative",
 504                 # while the same crc on a 64-bit box may "look positive".
 505                 # To avoid irksome warnings from the `struct` module, force
 506                 # it to look positive on all boxes.
 507                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
 508                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 509
 510         if not self._extfileobj:
 511             self.fileobj.close()
 512
 513         self.closed = True
 514
 515     def _init_read_gz(self):
 516         """Initialize for reading a gzip compressed fileobj.
 517         """
 518         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 519         self.dbuf = ""
 520
 521         # taken from gzip.GzipFile with some alterations
 522         if self.__read(2) != "\037\213":
 523             raise ReadError("not a gzip file")
 524         if self.__read(1) != "\010":
 525             raise CompressionError("unsupported compression method")
 526
 527         flag = ord(self.__read(1))
 528         self.__read(6)
 529
 530         if flag & 4:
 531             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 532             self.read(xlen)
 533         if flag & 8:
 534             while True:
 535                 s = self.__read(1)
 536                 if not s or s == NUL:
 537                     break
 538         if flag & 16:
 539             while True:
 540                 s = self.__read(1)
 541                 if not s or s == NUL:
 542                     break
 543         if flag & 2:
 544             self.__read(2)
 545
 546     def tell(self):
 547         """Return the stream's file pointer position.
 548         """
 549         return self.pos
 550
 551     def seek(self, pos=0):
 552         """Set the stream's file pointer to pos. Negative seeking
 553            is forbidden.
 554         """
 555         if pos - self.pos >= 0:
 556             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 557             for i in xrange(blocks):
 558                 self.read(self.bufsize)
 559             self.read(remainder)
 560         else:
 561             raise StreamError("seeking backwards is not allowed")
 562         return self.pos
 563
 564     def read(self, size=None):
 565         """Return the next size number of bytes from the stream.
 566            If size is not defined, return all bytes of the stream
 567            up to EOF.
 568         """
 569         if size is None:
 570             t = []
 571             while True:
 572                 buf = self._read(self.bufsize)
 573                 if not buf:
 574                     break
 575                 t.append(buf)
 576             buf = "".join(t)
 577         else:
 578             buf = self._read(size)
 579         self.pos += len(buf)
 580         return buf
 581
 582     def _read(self, size):
 583         """Return size bytes from the stream.
 584         """
 585         if self.comptype == "tar":
 586             return self.__read(size)
 587
 588         c = len(self.dbuf)
 589         t = [self.dbuf]
 590         while c < size:
 591             buf = self.__read(self.bufsize)
 592             if not buf:
 593                 break
 594             try:
 595                 buf = self.cmp.decompress(buf)
 596             except IOError:
 597                 raise ReadError("invalid compressed data")
 598             t.append(buf)
 599             c += len(buf)
 600         t = "".join(t)
 601         self.dbuf = t[size:]
 602         return t[:size]
 603
 604     def __read(self, size):
 605         """Return size bytes from stream. If internal buffer is empty,
 606            read another block from the stream.
 607         """
 608         c = len(self.buf)
 609         t = [self.buf]
 610         while c < size:
 611             buf = self.fileobj.read(self.bufsize)
 612             if not buf:
 613                 break
 614             t.append(buf)
 615             c += len(buf)
 616         t = "".join(t)
 617         self.buf = t[size:]
 618         return t[:size]
 619 # class _Stream
 620
 621 class _StreamProxy(object):
 622     """Small proxy class that enables transparent compression
 623        detection for the Stream interface (mode 'r|*').
 624     """
 625
 626     def __init__(self, fileobj):
 627         self.fileobj = fileobj
 628         self.buf = self.fileobj.read(BLOCKSIZE)
 629
 630     def read(self, size):
 631         self.read = self.fileobj.read
 632         return self.buf
 633
 634     def getcomptype(self):
 635         if self.buf.startswith("\037\213\010"):
 636             return "gz"
 637         if self.buf.startswith("BZh91"):
 638             return "bz2"
 639         return "tar"
 640
 641     def close(self):
 642         self.fileobj.close()
 643 # class StreamProxy
 644
 645 class _BZ2Proxy(object):
 646     """Small proxy class that enables external file object
 647        support for "r:bz2" and "w:bz2" modes. This is actually
 648        a workaround for a limitation in bz2 module's BZ2File
 649        class which (unlike gzip.GzipFile) has no support for
 650        a file object argument.
 651     """
 652
 653     blocksize = 16 * 1024
 654
 655     def __init__(self, fileobj, mode):
 656         self.fileobj = fileobj
 657         self.mode = mode
 658         self.name = getattr(self.fileobj, "name", None)
 659         self.init()
 660
 661     def init(self):
 662         import bz2
 663         self.pos = 0
 664         if self.mode == "r":
 665             self.bz2obj = bz2.BZ2Decompressor()
 666             self.fileobj.seek(0)
 667             self.buf = ""
 668         else:
 669             self.bz2obj = bz2.BZ2Compressor()
 670
 671     def read(self, size):
 672         b = [self.buf]
 673         x = len(self.buf)
 674         while x < size:
 675             raw = self.fileobj.read(self.blocksize)
 676             if not raw:
 677                 break
 678             data = self.bz2obj.decompress(raw)
 679             b.append(data)
 680             x += len(data)
 681         self.buf = "".join(b)
 682
 683         buf = self.buf[:size]
 684         self.buf = self.buf[size:]
 685         self.pos += len(buf)
 686         return buf
 687
 688     def seek(self, pos):
 689         if pos < self.pos:
 690             self.init()
 691         self.read(pos - self.pos)
 692
 693     def tell(self):
 694         return self.pos
 695
 696     def write(self, data):
 697         self.pos += len(data)
 698         raw = self.bz2obj.compress(data)
 699         self.fileobj.write(raw)
 700
 701     def close(self):
 702         if self.mode == "w":
 703             raw = self.bz2obj.flush()
 704             self.fileobj.write(raw)
 705 # class _BZ2Proxy
 706
 707 #------------------------
 708 # Extraction file object
 709 #------------------------
 710 class _FileInFile(object):
 711     """A thin wrapper around an existing file object that
 712        provides a part of its data as an individual file
 713        object.
 714     """
 715
 716     def __init__(self, fileobj, offset, size, sparse=None):
 717         self.fileobj = fileobj
 718         self.offset = offset
 719         self.size = size
 720         self.sparse = sparse
 721         self.position = 0
 722
 723     def tell(self):
 724         """Return the current file position.
 725         """
 726         return self.position
 727
 728     def seek(self, position):
 729         """Seek to a position in the file.
 730         """
 731         self.position = position
 732
 733     def read(self, size=None):
 734         """Read data from the file.
 735         """
 736         if size is None:
 737             size = self.size - self.position
 738         else:
 739             size = min(size, self.size - self.position)
 740
 741         if self.sparse is None:
 742             return self.readnormal(size)
 743         else:
 744             return self.readsparse(size)
 745
 746     def readnormal(self, size):
 747         """Read operation for regular files.
 748         """
 749         self.fileobj.seek(self.offset + self.position)
 750         self.position += size
 751         return self.fileobj.read(size)
 752
 753     def readsparse(self, size):
 754         """Read operation for sparse files.
 755         """
 756         data = []
 757         while size > 0:
 758             buf = self.readsparsesection(size)
 759             if not buf:
 760                 break
 761             size -= len(buf)
 762             data.append(buf)
 763         return "".join(data)
 764
 765     def readsparsesection(self, size):
 766         """Read a single section of a sparse file.
 767         """
 768         section = self.sparse.find(self.position)
 769
 770         if section is None:
 771             return ""
 772
 773         size = min(size, section.offset + section.size - self.position)
 774
 775         if isinstance(section, _data):
 776             realpos = section.realpos + self.position - section.offset
 777             self.fileobj.seek(self.offset + realpos)
 778             self.position += size
 779             return self.fileobj.read(size)
 780         else:
 781             self.position += size
 782             return NUL * size
 783 #class _FileInFile
 784
 785
 786 class ExFileObject(object):
 787     """File-like object for reading an archive member.
 788        Is returned by TarFile.extractfile().
 789     """
 790     blocksize = 1024
 791
 792     def __init__(self, tarfile, tarinfo):
 793         self.fileobj = _FileInFile(tarfile.fileobj,
 794                                    tarinfo.offset_data,
 795                                    tarinfo.size,
 796                                    getattr(tarinfo, "sparse", None))
 797         self.name = tarinfo.name
 798         self.mode = "r"
 799         self.closed = False
 800         self.size = tarinfo.size
 801
 802         self.position = 0
 803         self.buffer = ""
 804
 805     def read(self, size=None):
 806         """Read at most size bytes from the file. If size is not
 807            present or None, read all data until EOF is reached.
 808         """
 809         if self.closed:
 810             raise ValueError("I/O operation on closed file")
 811
 812         buf = ""
 813         if self.buffer:
 814             if size is None:
 815                 buf = self.buffer
 816                 self.buffer = ""
 817             else:
 818                 buf = self.buffer[:size]
 819                 self.buffer = self.buffer[size:]
 820
 821         if size is None:
 822             buf += self.fileobj.read()
 823         else:
 824             buf += self.fileobj.read(size - len(buf))
 825
 826         self.position += len(buf)
 827         return buf
 828
 829     def readline(self, size=-1):
 830         """Read one entire line from the file. If size is present
 831            and non-negative, return a string with at most that
 832            size, which may be an incomplete line.
 833         """
 834         if self.closed:
 835             raise ValueError("I/O operation on closed file")
 836
 837         if "\n" in self.buffer:
 838             pos = self.buffer.find("\n") + 1
 839         else:
 840             buffers = [self.buffer]
 841             while True:
 842                 buf = self.fileobj.read(self.blocksize)
 843                 buffers.append(buf)
 844                 if not buf or "\n" in buf:
 845                     self.buffer = "".join(buffers)
 846                     pos = self.buffer.find("\n") + 1
 847                     if pos == 0:
 848                         # no newline found.
 849                         pos = len(self.buffer)
 850                     break
 851
 852         if size != -1:
 853             pos = min(size, pos)
 854
 855         buf = self.buffer[:pos]
 856         self.buffer = self.buffer[pos:]
 857         self.position += len(buf)
 858         return buf
 859
 860     def readlines(self):
 861         """Return a list with all remaining lines.
 862         """
 863         result = []
 864         while True:
 865             line = self.readline()
 866             if not line: break
 867             result.append(line)
 868         return result
 869
 870     def tell(self):
 871         """Return the current file position.
 872         """
 873         if self.closed:
 874             raise ValueError("I/O operation on closed file")
 875
 876         return self.position
 877
 878     def seek(self, pos, whence=os.SEEK_SET):
 879         """Seek to a position in the file.
 880         """
 881         if self.closed:
 882             raise ValueError("I/O operation on closed file")
 883
 884         if whence == os.SEEK_SET:
 885             self.position = min(max(pos, 0), self.size)
 886         elif whence == os.SEEK_CUR:
 887             if pos < 0:
 888                 self.position = max(self.position + pos, 0)
 889             else:
 890                 self.position = min(self.position + pos, self.size)
 891         elif whence == os.SEEK_END:
 892             self.position = max(min(self.size + pos, self.size), 0)
 893         else:
 894             raise ValueError("Invalid argument")
 895
 896         self.buffer = ""
 897         self.fileobj.seek(self.position)
 898
 899     def close(self):
 900         """Close the file object.
 901         """
 902         self.closed = True
 903
 904     def __iter__(self):
 905         """Get an iterator over the file's lines.
 906         """
 907         while True:
 908             line = self.readline()
 909             if not line:
 910                 break
 911             yield line
 912 #class ExFileObject
 913
 914 #------------------
 915 # Exported Classes
 916 #------------------
 917 class TarInfo(object):
 918     """Informational class which holds the details about an
 919        archive member given by a tar header block.
 920        TarInfo objects are returned by TarFile.getmember(),
 921        TarFile.getmembers() and TarFile.gettarinfo() and are
 922        usually created internally.
 923     """
 924
 925     def __init__(self, name=""):
 926         """Construct a TarInfo object. name is the optional name
 927            of the member.
 928         """
 929         self.name = name        # member name
 930         self.mode = 0644        # file permissions
 931         self.uid = 0            # user id
 932         self.gid = 0            # group id
 933         self.size = 0           # file size
 934         self.mtime = 0          # modification time
 935         self.chksum = 0         # header checksum
 936         self.type = REGTYPE     # member type
 937         self.linkname = ""      # link name
 938         self.uname = "root"     # user name
 939         self.gname = "root"     # group name
 940         self.devmajor = 0       # device major number
 941         self.devminor = 0       # device minor number
 942
 943         self.offset = 0         # the tar header starts here
 944         self.offset_data = 0    # the file's data starts here
 945
 946         self.pax_headers = {}   # pax header information
 947
 948     # In pax headers the "name" and "linkname" field are called
 949     # "path" and "linkpath".
 950     def _getpath(self):
 951         return self.name
 952     def _setpath(self, name):
 953         self.name = name
 954     path = property(_getpath, _setpath)
 955
 956     def _getlinkpath(self):
 957         return self.linkname
 958     def _setlinkpath(self, linkname):
 959         self.linkname = linkname
 960     linkpath = property(_getlinkpath, _setlinkpath)
 961
 962     def __repr__(self):
 963         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 964
 965     def get_info(self, encoding, errors):
 966         """Return the TarInfo's attributes as a dictionary.
 967         """
 968         info = {
 969             "name":     self.name,
 970             "mode":     self.mode & 07777,
 971             "uid":      self.uid,
 972             "gid":      self.gid,
 973             "size":     self.size,
 974             "mtime":    self.mtime,
 975             "chksum":   self.chksum,
 976             "type":     self.type,
 977             "linkname": self.linkname,
 978             "uname":    self.uname,
 979             "gname":    self.gname,
 980             "devmajor": self.devmajor,
 981             "devminor": self.devminor
 982         }
 983
 984         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 985             info["name"] += "/"
 986
 987         for key in ("name", "linkname", "uname", "gname"):
 988             if type(info[key]) is unicode:
 989                 info[key] = info[key].encode(encoding, errors)
 990
 991         return info
 992
 993     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 994         """Return a tar header as a string of 512 byte blocks.
 995         """
 996         info = self.get_info(encoding, errors)
 997
 998         if format == USTAR_FORMAT:
 999             return self.create_ustar_header(info)
1000         elif format == GNU_FORMAT:
1001             return self.create_gnu_header(info)
1002         elif format == PAX_FORMAT:
1003             return self.create_pax_header(info, encoding, errors)
1004         else:
1005             raise ValueError("invalid format")
1006
1007     def create_ustar_header(self, info):
1008         """Return the object as a ustar header block.
1009         """
1010         info["magic"] = POSIX_MAGIC
1011
1012         if len(info["linkname"]) > LENGTH_LINK:
1013             raise ValueError("linkname is too long")
1014
1015         if len(info["name"]) > LENGTH_NAME:
1016             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1017
1018         return self._create_header(info, USTAR_FORMAT)
1019
1020     def create_gnu_header(self, info):
1021         """Return the object as a GNU header block sequence.
1022         """
1023         info["magic"] = GNU_MAGIC
1024
1025         buf = ""
1026         if len(info["linkname"]) > LENGTH_LINK:
1027             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1028
1029         if len(info["name"]) > LENGTH_NAME:
1030             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1031
1032         return buf + self._create_header(info, GNU_FORMAT)
1033
1034     def create_pax_header(self, info, encoding, errors):
1035         """Return the object as a ustar header block. If it cannot be
1036            represented this way, prepend a pax extended header sequence
1037            with supplement information.
1038         """
1039         info["magic"] = POSIX_MAGIC
1040         pax_headers = self.pax_headers.copy()
1041
1042         # Test string fields for values that exceed the field length or cannot
1043         # be represented in ASCII encoding.
1044         for name, hname, length in (
1045                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1046                 ("uname", "uname", 32), ("gname", "gname", 32)):
1047
1048             if hname in pax_headers:
1049                 # The pax header has priority.
1050                 continue
1051
1052             val = info[name].decode(encoding, errors)
1053
1054             # Try to encode the string as ASCII.
1055             try:
1056                 val.encode("ascii")
1057             except UnicodeEncodeError:
1058                 pax_headers[hname] = val
1059                 continue
1060
1061             if len(info[name]) > length:
1062                 pax_headers[hname] = val
1063
1064         # Test number fields for values that exceed the field limit or values
1065         # that like to be stored as float.
1066         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1067             if name in pax_headers:
1068                 # The pax header has priority. Avoid overflow.
1069                 info[name] = 0
1070                 continue
1071
1072             val = info[name]
1073             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1074                 pax_headers[name] = unicode(val)
1075                 info[name] = 0
1076
1077         # Create a pax extended header if necessary.
1078         if pax_headers:
1079             buf = self._create_pax_generic_header(pax_headers)
1080         else:
1081             buf = ""
1082
1083         return buf + self._create_header(info, USTAR_FORMAT)
1084
1085     @classmethod
1086     def create_pax_global_header(cls, pax_headers):
1087         """Return the object as a pax global header block sequence.
1088         """
1089         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1090
1091     def _posix_split_name(self, name):
1092         """Split a name longer than 100 chars into a prefix
1093            and a name part.
1094         """
1095         prefix = name[:LENGTH_PREFIX + 1]
1096         while prefix and prefix[-1] != "/":
1097             prefix = prefix[:-1]
1098
1099         name = name[len(prefix):]
1100         prefix = prefix[:-1]
1101
1102         if not prefix or len(name) > LENGTH_NAME:
1103             raise ValueError("name is too long")
1104         return prefix, name
1105
1106     @staticmethod
1107     def _create_header(info, format):
1108         """Return a header block. info is a dictionary with file
1109            information, format must be one of the *_FORMAT constants.
1110         """
1111         parts = [
1112             stn(info.get("name", ""), 100),
1113             itn(info.get("mode", 0) & 07777, 8, format),
1114             itn(info.get("uid", 0), 8, format),
1115             itn(info.get("gid", 0), 8, format),
1116             itn(info.get("size", 0), 12, format),
1117             itn(info.get("mtime", 0), 12, format),
1118             "        ", # checksum field
1119             info.get("type", REGTYPE),
1120             stn(info.get("linkname", ""), 100),
1121             stn(info.get("magic", POSIX_MAGIC), 8),
1122             stn(info.get("uname", "root"), 32),
1123             stn(info.get("gname", "root"), 32),
1124             itn(info.get("devmajor", 0), 8, format),
1125             itn(info.get("devminor", 0), 8, format),
1126             stn(info.get("prefix", ""), 155)
1127         ]
1128
1129         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1130         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1131         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1132         return buf
1133
1134     @staticmethod
1135     def _create_payload(payload):
1136         """Return the string payload filled with zero bytes
1137            up to the next 512 byte border.
1138         """
1139         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1140         if remainder > 0:
1141             payload += (BLOCKSIZE - remainder) * NUL
1142         return payload
1143
1144     @classmethod
1145     def _create_gnu_long_header(cls, name, type):
1146         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1147            for name.
1148         """
1149         name += NUL
1150
1151         info = {}
1152         info["name"] = "././@LongLink"
1153         info["type"] = type
1154         info["size"] = len(name)
1155         info["magic"] = GNU_MAGIC
1156
1157         # create extended header + name blocks.
1158         return cls._create_header(info, USTAR_FORMAT) + \
1159                 cls._create_payload(name)
1160
1161     @classmethod
1162     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1163         """Return a POSIX.1-2001 extended or global header sequence
1164            that contains a list of keyword, value pairs. The values
1165            must be unicode objects.
1166         """
1167         records = []
1168         for keyword, value in pax_headers.iteritems():
1169             keyword = keyword.encode("utf8")
1170             value = value.encode("utf8")
1171             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1172             n = p = 0
1173             while True:
1174                 n = l + len(str(p))
1175                 if n == p:
1176                     break
1177                 p = n
1178             records.append("%d %s=%s\n" % (p, keyword, value))
1179         records = "".join(records)
1180
1181         # We use a hardcoded "././@PaxHeader" name like star does
1182         # instead of the one that POSIX recommends.
1183         info = {}
1184         info["name"] = "././@PaxHeader"
1185         info["type"] = type
1186         info["size"] = len(records)
1187         info["magic"] = POSIX_MAGIC
1188
1189         # Create pax header + record blocks.
1190         return cls._create_header(info, USTAR_FORMAT) + \
1191                 cls._create_payload(records)
1192
1193     @classmethod
1194     def frombuf(cls, buf):
1195         """Construct a TarInfo object from a 512 byte string buffer.
1196         """
1197         if len(buf) == 0:
1198             raise EmptyHeaderError("empty header")
1199         if len(buf) != BLOCKSIZE:
1200             raise TruncatedHeaderError("truncated header")
1201         if buf.count(NUL) == BLOCKSIZE:
1202             raise EOFHeaderError("end of file header")
1203
1204         chksum = nti(buf[148:156])
1205         if chksum not in calc_chksums(buf):
1206             raise InvalidHeaderError("bad checksum")
1207
1208         obj = cls()
1209         obj.buf = buf
1210         obj.name = nts(buf[0:100])
1211         obj.mode = nti(buf[100:108])
1212         obj.uid = nti(buf[108:116])
1213         obj.gid = nti(buf[116:124])
1214         obj.size = nti(buf[124:136])
1215         obj.mtime = nti(buf[136:148])
1216         obj.chksum = chksum
1217         obj.type = buf[156:157]
1218         obj.linkname = nts(buf[157:257])
1219         obj.uname = nts(buf[265:297])
1220         obj.gname = nts(buf[297:329])
1221         obj.devmajor = nti(buf[329:337])
1222         obj.devminor = nti(buf[337:345])
1223         prefix = nts(buf[345:500])
1224
1225         # Old V7 tar format represents a directory as a regular
1226         # file with a trailing slash.
1227         if obj.type == AREGTYPE and obj.name.endswith("/"):
1228             obj.type = DIRTYPE
1229
1230         # Remove redundant slashes from directories.
1231         if obj.isdir():
1232             obj.name = obj.name.rstrip("/")
1233
1234         # Reconstruct a ustar longname.
1235         if prefix and obj.type not in GNU_TYPES:
1236             obj.name = prefix + "/" + obj.name
1237         return obj
1238
1239     @classmethod
1240     def fromtarfile(cls, tarfile):
1241         """Return the next TarInfo object from TarFile object
1242            tarfile.
1243         """
1244         buf = tarfile.fileobj.read(BLOCKSIZE)
1245         obj = cls.frombuf(buf)
1246         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1247         return obj._proc_member(tarfile)
1248
1249     #--------------------------------------------------------------------------
1250     # The following are methods that are called depending on the type of a
1251     # member. The entry point is _proc_member() which can be overridden in a
1252     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1253     # implement the following
1254     # operations:
1255     # 1. Set self.offset_data to the position where the data blocks begin,
1256     #    if there is data that follows.
1257     # 2. Set tarfile.offset to the position where the next member's header will
1258     #    begin.
1259     # 3. Return self or another valid TarInfo object.
1260     def _proc_member(self, tarfile):
1261         """Choose the right processing method depending on
1262            the type and call it.
1263         """
1264         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1265             return self._proc_gnulong(tarfile)
1266         elif self.type == GNUTYPE_SPARSE:
1267             return self._proc_sparse(tarfile)
1268         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1269             return self._proc_pax(tarfile)
1270         else:
1271             return self._proc_builtin(tarfile)
1272
1273     def _proc_builtin(self, tarfile):
1274         """Process a builtin type or an unknown type which
1275            will be treated as a regular file.
1276         """
1277         self.offset_data = tarfile.fileobj.tell()
1278         offset = self.offset_data
1279         if self.isreg() or self.type not in SUPPORTED_TYPES:
1280             # Skip the following data blocks.
1281             offset += self._block(self.size)
1282         tarfile.offset = offset
1283
1284         # Patch the TarInfo object with saved global
1285         # header information.
1286         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1287
1288         return self
1289
1290     def _proc_gnulong(self, tarfile):
1291         """Process the blocks that hold a GNU longname
1292            or longlink member.
1293         """
1294         buf = tarfile.fileobj.read(self._block(self.size))
1295
1296         # Fetch the next header and process it.
1297         try:
1298             next = self.fromtarfile(tarfile)
1299         except HeaderError:
1300             raise SubsequentHeaderError("missing or bad subsequent header")
1301
1302         # Patch the TarInfo object from the next header with
1303         # the longname information.
1304         next.offset = self.offset
1305         if self.type == GNUTYPE_LONGNAME:
1306             next.name = nts(buf)
1307         elif self.type == GNUTYPE_LONGLINK:
1308             next.linkname = nts(buf)
1309
1310         return next
1311
1312     def _proc_sparse(self, tarfile):
1313         """Process a GNU sparse header plus extra headers.
1314         """
1315         buf = self.buf
1316         sp = _ringbuffer()
1317         pos = 386
1318         lastpos = 0L
1319         realpos = 0L
1320         # There are 4 possible sparse structs in the
1321         # first header.
1322         for i in xrange(4):
1323             try:
1324                 offset = nti(buf[pos:pos + 12])
1325                 numbytes = nti(buf[pos + 12:pos + 24])
1326             except ValueError:
1327                 break
1328             if offset > lastpos:
1329                 sp.append(_hole(lastpos, offset - lastpos))
1330             sp.append(_data(offset, numbytes, realpos))
1331             realpos += numbytes
1332             lastpos = offset + numbytes
1333             pos += 24
1334
1335         isextended = ord(buf[482])
1336         origsize = nti(buf[483:495])
1337
1338         # If the isextended flag is given,
1339         # there are extra headers to process.
1340         while isextended == 1:
1341             buf = tarfile.fileobj.read(BLOCKSIZE)
1342             pos = 0
1343             for i in xrange(21):
1344                 try:
1345                     offset = nti(buf[pos:pos + 12])
1346                     numbytes = nti(buf[pos + 12:pos + 24])
1347                 except ValueError:
1348                     break
1349                 if offset > lastpos:
1350                     sp.append(_hole(lastpos, offset - lastpos))
1351                 sp.append(_data(offset, numbytes, realpos))
1352                 realpos += numbytes
1353                 lastpos = offset + numbytes
1354                 pos += 24
1355             isextended = ord(buf[504])
1356
1357         if lastpos < origsize:
1358             sp.append(_hole(lastpos, origsize - lastpos))
1359
1360         self.sparse = sp
1361
1362         self.offset_data = tarfile.fileobj.tell()
1363         tarfile.offset = self.offset_data + self._block(self.size)
1364         self.size = origsize
1365
1366         return self
1367
1368     def _proc_pax(self, tarfile):
1369         """Process an extended or global header as described in
1370            POSIX.1-2001.
1371         """
1372         # Read the header information.
1373         buf = tarfile.fileobj.read(self._block(self.size))
1374
1375         # A pax header stores supplemental information for either
1376         # the following file (extended) or all following files
1377         # (global).
1378         if self.type == XGLTYPE:
1379             pax_headers = tarfile.pax_headers
1380         else:
1381             pax_headers = tarfile.pax_headers.copy()
1382
1383         # Parse pax header information. A record looks like that:
1384         # "%d %s=%s\n" % (length, keyword, value). length is the size
1385         # of the complete record including the length field itself and
1386         # the newline. keyword and value are both UTF-8 encoded strings.
1387         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1388         pos = 0
1389         while True:
1390             match = regex.match(buf, pos)
1391             if not match:
1392                 break
1393
1394             length, keyword = match.groups()
1395             length = int(length)
1396             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1397
1398             keyword = keyword.decode("utf8")
1399             value = value.decode("utf8")
1400
1401             pax_headers[keyword] = value
1402             pos += length
1403
1404         # Fetch the next header.
1405         try:
1406             next = self.fromtarfile(tarfile)
1407         except HeaderError:
1408             raise SubsequentHeaderError("missing or bad subsequent header")
1409
1410         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1411             # Patch the TarInfo object with the extended header info.
1412             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1413             next.offset = self.offset
1414
1415             if "size" in pax_headers:
1416                 # If the extended header replaces the size field,
1417                 # we need to recalculate the offset where the next
1418                 # header starts.
1419                 offset = next.offset_data
1420                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1421                     offset += next._block(next.size)
1422                 tarfile.offset = offset
1423
1424         return next
1425
1426     def _apply_pax_info(self, pax_headers, encoding, errors):
1427         """Replace fields with supplemental information from a previous
1428            pax extended or global header.
1429         """
1430         for keyword, value in pax_headers.iteritems():
1431             if keyword not in PAX_FIELDS:
1432                 continue
1433
1434             if keyword == "path":
1435                 value = value.rstrip("/")
1436
1437             if keyword in PAX_NUMBER_FIELDS:
1438                 try:
1439                     value = PAX_NUMBER_FIELDS[keyword](value)
1440                 except ValueError:
1441                     value = 0
1442             else:
1443                 value = uts(value, encoding, errors)
1444
1445             setattr(self, keyword, value)
1446
1447         self.pax_headers = pax_headers.copy()
1448
1449     def _block(self, count):
1450         """Round up a byte count by BLOCKSIZE and return it,
1451            e.g. _block(834) => 1024.
1452         """
1453         blocks, remainder = divmod(count, BLOCKSIZE)
1454         if remainder:
1455             blocks += 1
1456         return blocks * BLOCKSIZE
1457
1458     def isreg(self):
1459         return self.type in REGULAR_TYPES
1460     def isfile(self):
1461         return self.isreg()
1462     def isdir(self):
1463         return self.type == DIRTYPE
1464     def issym(self):
1465         return self.type == SYMTYPE
1466     def islnk(self):
1467         return self.type == LNKTYPE
1468     def ischr(self):
1469         return self.type == CHRTYPE
1470     def isblk(self):
1471         return self.type == BLKTYPE
1472     def isfifo(self):
1473         return self.type == FIFOTYPE
1474     def issparse(self):
1475         return self.type == GNUTYPE_SPARSE
1476     def isdev(self):
1477         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1478 # class TarInfo
1479
1480 class TarFile(object):
1481     """The TarFile Class provides an interface to tar archives.
1482     """
1483
1484     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1485
1486     dereference = False         # If true, add content of linked file to the
1487                                 # tar file, else the link.
1488
1489     ignore_zeros = False        # If true, skips empty or invalid blocks and
1490                                 # continues processing.
1491
1492     errorlevel = 1              # If 0, fatal errors only appear in debug
1493                                 # messages (if debug >= 0). If > 0, errors
1494                                 # are passed to the caller as exceptions.
1495
1496     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1497
1498     encoding = ENCODING         # Encoding for 8-bit character strings.
1499
1500     errors = None               # Error handler for unicode conversion.
1501
1502     tarinfo = TarInfo           # The default TarInfo class to use.
1503
1504     fileobject = ExFileObject   # The default ExFileObject class to use.
1505
1506     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1507             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1508             errors=None, pax_headers=None, debug=None, errorlevel=None):
1509         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1510            read from an existing archive, 'a' to append data to an existing
1511            file or 'w' to create a new file overwriting an existing one. `mode'
1512            defaults to 'r'.
1513            If `fileobj' is given, it is used for reading or writing data. If it
1514            can be determined, `mode' is overridden by `fileobj's mode.
1515            `fileobj' is not closed, when TarFile is closed.
1516         """
1517         if len(mode) > 1 or mode not in "raw":
1518             raise ValueError("mode must be 'r', 'a' or 'w'")
1519         self.mode = mode
1520         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1521
1522         if not fileobj:
1523             if self.mode == "a" and not os.path.exists(name):
1524                 # Create nonexistent files in append mode.
1525                 self.mode = "w"
1526                 self._mode = "wb"
1527             fileobj = bltn_open(name, self._mode)
1528             self._extfileobj = False
1529         else:
1530             if name is None and hasattr(fileobj, "name"):
1531                 name = fileobj.name
1532             if hasattr(fileobj, "mode"):
1533                 self._mode = fileobj.mode
1534             self._extfileobj = True
1535         self.name = os.path.abspath(name) if name else None
1536         self.fileobj = fileobj
1537
1538         # Init attributes.
1539         if format is not None:
1540             self.format = format
1541         if tarinfo is not None:
1542             self.tarinfo = tarinfo
1543         if dereference is not None:
1544             self.dereference = dereference
1545         if ignore_zeros is not None:
1546             self.ignore_zeros = ignore_zeros
1547         if encoding is not None:
1548             self.encoding = encoding
1549
1550         if errors is not None:
1551             self.errors = errors
1552         elif mode == "r":
1553             self.errors = "utf-8"
1554         else:
1555             self.errors = "strict"
1556
1557         if pax_headers is not None and self.format == PAX_FORMAT:
1558             self.pax_headers = pax_headers
1559         else:
1560             self.pax_headers = {}
1561
1562         if debug is not None:
1563             self.debug = debug
1564         if errorlevel is not None:
1565             self.errorlevel = errorlevel
1566
1567         # Init datastructures.
1568         self.closed = False
1569         self.members = []       # list of members as TarInfo objects
1570         self._loaded = False    # flag if all members have been read
1571         self.offset = self.fileobj.tell()
1572                                 # current position in the archive file
1573         self.inodes = {}        # dictionary caching the inodes of
1574                                 # archive members already added
1575
1576         try:
1577             if self.mode == "r":
1578                 self.firstmember = None
1579                 self.firstmember = self.next()
1580
1581             if self.mode == "a":
1582                 # Move to the end of the archive,
1583                 # before the first empty block.
1584                 while True:
1585                     self.fileobj.seek(self.offset)
1586                     try:
1587                         tarinfo = self.tarinfo.fromtarfile(self)
1588                         self.members.append(tarinfo)
1589                     except EOFHeaderError:
1590                         self.fileobj.seek(self.offset)
1591                         break
1592                     except HeaderError, e:
1593                         raise ReadError(str(e))
1594
1595             if self.mode in "aw":
1596                 self._loaded = True
1597
1598                 if self.pax_headers:
1599                     buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1600                     self.fileobj.write(buf)
1601                     self.offset += len(buf)
1602         except:
1603             if not self._extfileobj:
1604                 self.fileobj.close()
1605             self.closed = True
1606             raise
1607
1608     def _getposix(self):
1609         return self.format == USTAR_FORMAT
1610     def _setposix(self, value):
1611         import warnings
1612         warnings.warn("use the format attribute instead", DeprecationWarning,
1613                       2)
1614         if value:
1615             self.format = USTAR_FORMAT
1616         else:
1617             self.format = GNU_FORMAT
1618     posix = property(_getposix, _setposix)
1619
1620     #--------------------------------------------------------------------------
1621     # Below are the classmethods which act as alternate constructors to the
1622     # TarFile class. The open() method is the only one that is needed for
1623     # public use; it is the "super"-constructor and is able to select an
1624     # adequate "sub"-constructor for a particular compression using the mapping
1625     # from OPEN_METH.
1626     #
1627     # This concept allows one to subclass TarFile without losing the comfort of
1628     # the super-constructor. A sub-constructor is registered and made available
1629     # by adding it to the mapping in OPEN_METH.
1630
1631     @classmethod
1632     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1633         """Open a tar archive for reading, writing or appending. Return
1634            an appropriate TarFile class.
1635
1636            mode:
1637            'r' or 'r:*' open for reading with transparent compression
1638            'r:'         open for reading exclusively uncompressed
1639            'r:gz'       open for reading with gzip compression
1640            'r:bz2'      open for reading with bzip2 compression
1641            'a' or 'a:'  open for appending, creating the file if necessary
1642            'w' or 'w:'  open for writing without compression
1643            'w:gz'       open for writing with gzip compression
1644            'w:bz2'      open for writing with bzip2 compression
1645
1646            'r|*'        open a stream of tar blocks with transparent compression
1647            'r|'         open an uncompressed stream of tar blocks for reading
1648            'r|gz'       open a gzip compressed stream of tar blocks
1649            'r|bz2'      open a bzip2 compressed stream of tar blocks
1650            'w|'         open an uncompressed stream for writing
1651            'w|gz'       open a gzip compressed stream for writing
1652            'w|bz2'      open a bzip2 compressed stream for writing
1653         """
1654
1655         if not name and not fileobj:
1656             raise ValueError("nothing to open")
1657
1658         if mode in ("r", "r:*"):
1659             # Find out which *open() is appropriate for opening the file.
1660             for comptype in cls.OPEN_METH:
1661                 func = getattr(cls, cls.OPEN_METH[comptype])
1662                 if fileobj is not None:
1663                     saved_pos = fileobj.tell()
1664                 try:
1665                     return func(name, "r", fileobj, **kwargs)
1666                 except (ReadError, CompressionError), e:
1667                     if fileobj is not None:
1668                         fileobj.seek(saved_pos)
1669                     continue
1670             raise ReadError("file could not be opened successfully")
1671
1672         elif ":" in mode:
1673             filemode, comptype = mode.split(":", 1)
1674             filemode = filemode or "r"
1675             comptype = comptype or "tar"
1676
1677             # Select the *open() function according to
1678             # given compression.
1679             if comptype in cls.OPEN_METH:
1680                 func = getattr(cls, cls.OPEN_METH[comptype])
1681             else:
1682                 raise CompressionError("unknown compression type %r" % comptype)
1683             return func(name, filemode, fileobj, **kwargs)
1684
1685         elif "|" in mode:
1686             filemode, comptype = mode.split("|", 1)
1687             filemode = filemode or "r"
1688             comptype = comptype or "tar"
1689
1690             if filemode not in "rw":
1691                 raise ValueError("mode must be 'r' or 'w'")
1692
1693             t = cls(name, filemode,
1694                     _Stream(name, filemode, comptype, fileobj, bufsize),
1695                     **kwargs)
1696             t._extfileobj = False
1697             return t
1698
1699         elif mode in "aw":
1700             return cls.taropen(name, mode, fileobj, **kwargs)
1701
1702         raise ValueError("undiscernible mode")
1703
1704     @classmethod
1705     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1706         """Open uncompressed tar archive name for reading or writing.
1707         """
1708         if len(mode) > 1 or mode not in "raw":
1709             raise ValueError("mode must be 'r', 'a' or 'w'")
1710         return cls(name, mode, fileobj, **kwargs)
1711
1712     @classmethod
1713     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1714         """Open gzip compressed tar archive name for reading or writing.
1715            Appending is not allowed.
1716         """
1717         if len(mode) > 1 or mode not in "rw":
1718             raise ValueError("mode must be 'r' or 'w'")
1719
1720         try:
1721             import gzip
1722             gzip.GzipFile
1723         except (ImportError, AttributeError):
1724             raise CompressionError("gzip module is not available")
1725
1726         if fileobj is None:
1727             fileobj = bltn_open(name, mode + "b")
1728
1729         try:
1730             t = cls.taropen(name, mode,
1731                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1732                 **kwargs)
1733         except IOError:
1734             raise ReadError("not a gzip file")
1735         t._extfileobj = False
1736         return t
1737
1738     @classmethod
1739     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1740         """Open bzip2 compressed tar archive name for reading or writing.
1741            Appending is not allowed.
1742         """
1743         if len(mode) > 1 or mode not in "rw":
1744             raise ValueError("mode must be 'r' or 'w'.")
1745
1746         try:
1747             import bz2
1748         except ImportError:
1749             raise CompressionError("bz2 module is not available")
1750
1751         if fileobj is not None:
1752             fileobj = _BZ2Proxy(fileobj, mode)
1753         else:
1754             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1755
1756         try:
1757             t = cls.taropen(name, mode, fileobj, **kwargs)
1758         except (IOError, EOFError):
1759             raise ReadError("not a bzip2 file")
1760         t._extfileobj = False
1761         return t
1762
1763     # All *open() methods are registered here.
1764     OPEN_METH = {
1765         "tar": "taropen",   # uncompressed tar
1766         "gz":  "gzopen",    # gzip compressed tar
1767         "bz2": "bz2open"    # bzip2 compressed tar
1768     }
1769
1770     #--------------------------------------------------------------------------
1771     # The public methods which TarFile provides:
1772
1773     def close(self):
1774         """Close the TarFile. In write-mode, two finishing zero blocks are
1775            appended to the archive.
1776         """
1777         if self.closed:
1778             return
1779
1780         if self.mode in "aw":
1781             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1782             self.offset += (BLOCKSIZE * 2)
1783             # fill up the end with zero-blocks
1784             # (like option -b20 for tar does)
1785             blocks, remainder = divmod(self.offset, RECORDSIZE)
1786             if remainder > 0:
1787                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1788
1789         if not self._extfileobj:
1790             self.fileobj.close()
1791         self.closed = True
1792
1793     def getmember(self, name):
1794         """Return a TarInfo object for member `name'. If `name' can not be
1795            found in the archive, KeyError is raised. If a member occurs more
1796            than once in the archive, its last occurrence is assumed to be the
1797            most up-to-date version.
1798         """
1799         tarinfo = self._getmember(name)
1800         if tarinfo is None:
1801             raise KeyError("filename %r not found" % name)
1802         return tarinfo
1803
1804     def getmembers(self):
1805         """Return the members of the archive as a list of TarInfo objects. The
1806            list has the same order as the members in the archive.
1807         """
1808         self._check()
1809         if not self._loaded:    # if we want to obtain a list of
1810             self._load()        # all members, we first have to
1811                                 # scan the whole archive.
1812         return self.members
1813
1814     def getnames(self):
1815         """Return the members of the archive as a list of their names. It has
1816            the same order as the list returned by getmembers().
1817         """
1818         return [tarinfo.name for tarinfo in self.getmembers()]
1819
1820     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1821         """Create a TarInfo object for either the file `name' or the file
1822            object `fileobj' (using os.fstat on its file descriptor). You can
1823            modify some of the TarInfo's attributes before you add it using
1824            addfile(). If given, `arcname' specifies an alternative name for the
1825            file in the archive.
1826         """
1827         self._check("aw")
1828
1829         # When fileobj is given, replace name by
1830         # fileobj's real name.
1831         if fileobj is not None:
1832             name = fileobj.name
1833
1834         # Building the name of the member in the archive.
1835         # Backward slashes are converted to forward slashes,
1836         # Absolute paths are turned to relative paths.
1837         if arcname is None:
1838             arcname = name
1839         drv, arcname = os.path.splitdrive(arcname)
1840         arcname = arcname.replace(os.sep, "/")
1841         arcname = arcname.lstrip("/")
1842
1843         # Now, fill the TarInfo object with
1844         # information specific for the file.
1845         tarinfo = self.tarinfo()
1846         tarinfo.tarfile = self
1847
1848         # Use os.stat or os.lstat, depending on platform
1849         # and if symlinks shall be resolved.
1850         if fileobj is None:
1851             if hasattr(os, "lstat") and not self.dereference:
1852                 statres = os.lstat(name)
1853             else:
1854                 statres = os.stat(name)
1855         else:
1856             statres = os.fstat(fileobj.fileno())
1857         linkname = ""
1858
1859         stmd = statres.st_mode
1860         if stat.S_ISREG(stmd):
1861             inode = (statres.st_ino, statres.st_dev)
1862             if not self.dereference and statres.st_nlink > 1 and \
1863                     inode in self.inodes and arcname != self.inodes[inode]:
1864                 # Is it a hardlink to an already
1865                 # archived file?
1866                 type = LNKTYPE
1867                 linkname = self.inodes[inode]
1868             else:
1869                 # The inode is added only if its valid.
1870                 # For win32 it is always 0.
1871                 type = REGTYPE
1872                 if inode[0]:
1873                     self.inodes[inode] = arcname
1874         elif stat.S_ISDIR(stmd):
1875             type = DIRTYPE
1876         elif stat.S_ISFIFO(stmd):
1877             type = FIFOTYPE
1878         elif stat.S_ISLNK(stmd):
1879             type = SYMTYPE
1880             linkname = os.readlink(name)
1881         elif stat.S_ISCHR(stmd):
1882             type = CHRTYPE
1883         elif stat.S_ISBLK(stmd):
1884             type = BLKTYPE
1885         else:
1886             return None
1887
1888         # Fill the TarInfo object with all
1889         # information we can get.
1890         tarinfo.name = arcname
1891         tarinfo.mode = stmd
1892         tarinfo.uid = statres.st_uid
1893         tarinfo.gid = statres.st_gid
1894         if stat.S_ISREG(stmd):
1895             tarinfo.size = statres.st_size
1896         else:
1897             tarinfo.size = 0L
1898         tarinfo.mtime = statres.st_mtime
1899         tarinfo.type = type
1900         tarinfo.linkname = linkname
1901         if pwd:
1902             try:
1903                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1904             except KeyError:
1905                 pass
1906         if grp:
1907             try:
1908                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1909             except KeyError:
1910                 pass
1911
1912         if type in (CHRTYPE, BLKTYPE):
1913             if hasattr(os, "major") and hasattr(os, "minor"):
1914                 tarinfo.devmajor = os.major(statres.st_rdev)
1915                 tarinfo.devminor = os.minor(statres.st_rdev)
1916         return tarinfo
1917
1918     def list(self, verbose=True):
1919         """Print a table of contents to sys.stdout. If `verbose' is False, only
1920            the names of the members are printed. If it is True, an `ls -l'-like
1921            output is produced.
1922         """
1923         self._check()
1924
1925         for tarinfo in self:
1926             if verbose:
1927                 print filemode(tarinfo.mode),
1928                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1929                                  tarinfo.gname or tarinfo.gid),
1930                 if tarinfo.ischr() or tarinfo.isblk():
1931                     print "%10s" % ("%d,%d" \
1932                                     % (tarinfo.devmajor, tarinfo.devminor)),
1933                 else:
1934                     print "%10d" % tarinfo.size,
1935                 print "%d-%02d-%02d %02d:%02d:%02d" \
1936                       % time.localtime(tarinfo.mtime)[:6],
1937
1938             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1939
1940             if verbose:
1941                 if tarinfo.issym():
1942                     print "->", tarinfo.linkname,
1943                 if tarinfo.islnk():
1944                     print "link to", tarinfo.linkname,
1945             print
1946
1947     def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1948         """Add the file `name' to the archive. `name' may be any type of file
1949            (directory, fifo, symbolic link, etc.). If given, `arcname'
1950            specifies an alternative name for the file in the archive.
1951            Directories are added recursively by default. This can be avoided by
1952            setting `recursive' to False. `exclude' is a function that should
1953            return True for each filename to be excluded. `filter' is a function
1954            that expects a TarInfo object argument and returns the changed
1955            TarInfo object, if it returns None the TarInfo object will be
1956            excluded from the archive.
1957         """
1958         self._check("aw")
1959
1960         if arcname is None:
1961             arcname = name
1962
1963         # Exclude pathnames.
1964         if exclude is not None:
1965             import warnings
1966             warnings.warn("use the filter argument instead",
1967                     DeprecationWarning, 2)
1968             if exclude(name):
1969                 self._dbg(2, "tarfile: Excluded %r" % name)
1970                 return
1971
1972         # Skip if somebody tries to archive the archive...
1973         if self.name is not None and os.path.abspath(name) == self.name:
1974             self._dbg(2, "tarfile: Skipped %r" % name)
1975             return
1976
1977         self._dbg(1, name)
1978
1979         # Create a TarInfo object from the file.
1980         tarinfo = self.gettarinfo(name, arcname)
1981
1982         if tarinfo is None:
1983             self._dbg(1, "tarfile: Unsupported type %r" % name)
1984             return
1985
1986         # Change or exclude the TarInfo object.
1987         if filter is not None:
1988             tarinfo = filter(tarinfo)
1989             if tarinfo is None:
1990                 self._dbg(2, "tarfile: Excluded %r" % name)
1991                 return
1992
1993         # Append the tar header and data to the archive.
1994         if tarinfo.isreg():
1995             f = bltn_open(name, "rb")
1996             self.addfile(tarinfo, f)
1997             f.close()
1998
1999         elif tarinfo.isdir():
2000             self.addfile(tarinfo)
2001             if recursive:
2002                 for f in os.listdir(name):
2003                     self.add(os.path.join(name, f), os.path.join(arcname, f),
2004                             recursive, exclude, filter)
2005
2006         else:
2007             self.addfile(tarinfo)
2008
2009     def addfile(self, tarinfo, fileobj=None):
2010         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2011            given, tarinfo.size bytes are read from it and added to the archive.
2012            You can create TarInfo objects using gettarinfo().
2013            On Windows platforms, `fileobj' should always be opened with mode
2014            'rb' to avoid irritation about the file size.
2015         """
2016         self._check("aw")
2017
2018         tarinfo = copy.copy(tarinfo)
2019
2020         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2021         self.fileobj.write(buf)
2022         self.offset += len(buf)
2023
2024         # If there's data to follow, append it.
2025         if fileobj is not None:
2026             copyfileobj(fileobj, self.fileobj, tarinfo.size)
2027             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2028             if remainder > 0:
2029                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2030                 blocks += 1
2031             self.offset += blocks * BLOCKSIZE
2032
2033         self.members.append(tarinfo)
2034
2035     def extractall(self, path=".", members=None):
2036         """Extract all members from the archive to the current working
2037            directory and set owner, modification time and permissions on
2038            directories afterwards. `path' specifies a different directory
2039            to extract to. `members' is optional and must be a subset of the
2040            list returned by getmembers().
2041         """
2042         directories = []
2043
2044         if members is None:
2045             members = self
2046
2047         for tarinfo in members:
2048             if tarinfo.isdir():
2049                 # Extract directories with a safe mode.
2050                 directories.append(tarinfo)
2051                 tarinfo = copy.copy(tarinfo)
2052                 tarinfo.mode = 0700
2053             self.extract(tarinfo, path)
2054
2055         # Reverse sort directories.
2056         directories.sort(key=operator.attrgetter('name'))
2057         directories.reverse()
2058
2059         # Set correct owner, mtime and filemode on directories.
2060         for tarinfo in directories:
2061             dirpath = os.path.join(path, tarinfo.name)
2062             try:
2063                 self.chown(tarinfo, dirpath)
2064                 self.utime(tarinfo, dirpath)
2065                 self.chmod(tarinfo, dirpath)
2066             except ExtractError, e:
2067                 if self.errorlevel > 1:
2068                     raise
2069                 else:
2070                     self._dbg(1, "tarfile: %s" % e)
2071
2072     def extract(self, member, path=""):
2073         """Extract a member from the archive to the current working directory,
2074            using its full name. Its file information is extracted as accurately
2075            as possible. `member' may be a filename or a TarInfo object. You can
2076            specify a different directory using `path'.
2077         """
2078         self._check("r")
2079
2080         if isinstance(member, basestring):
2081             tarinfo = self.getmember(member)
2082         else:
2083             tarinfo = member
2084
2085         # Prepare the link target for makelink().
2086         if tarinfo.islnk():
2087             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2088
2089         try:
2090             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2091         except EnvironmentError, e:
2092             if self.errorlevel > 0:
2093                 raise
2094             else:
2095                 if e.filename is None:
2096                     self._dbg(1, "tarfile: %s" % e.strerror)
2097                 else:
2098                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2099         except ExtractError, e:
2100             if self.errorlevel > 1:
2101                 raise
2102             else:
2103                 self._dbg(1, "tarfile: %s" % e)
2104
2105     def extractfile(self, member):
2106         """Extract a member from the archive as a file object. `member' may be
2107            a filename or a TarInfo object. If `member' is a regular file, a
2108            file-like object is returned. If `member' is a link, a file-like
2109            object is constructed from the link's target. If `member' is none of
2110            the above, None is returned.
2111            The file-like object is read-only and provides the following
2112            methods: read(), readline(), readlines(), seek() and tell()
2113         """
2114         self._check("r")
2115
2116         if isinstance(member, basestring):
2117             tarinfo = self.getmember(member)
2118         else:
2119             tarinfo = member
2120
2121         if tarinfo.isreg():
2122             return self.fileobject(self, tarinfo)
2123
2124         elif tarinfo.type not in SUPPORTED_TYPES:
2125             # If a member's type is unknown, it is treated as a
2126             # regular file.
2127             return self.fileobject(self, tarinfo)
2128
2129         elif tarinfo.islnk() or tarinfo.issym():
2130             if isinstance(self.fileobj, _Stream):
2131                 # A small but ugly workaround for the case that someone tries
2132                 # to extract a (sym)link as a file-object from a non-seekable
2133                 # stream of tar blocks.
2134                 raise StreamError("cannot extract (sym)link as file object")
2135             else:
2136                 # A (sym)link's file object is its target's file object.
2137                 return self.extractfile(self._getmember(tarinfo.linkname,
2138                                                         tarinfo))
2139         else:
2140             # If there's no data associated with the member (directory, chrdev,
2141             # blkdev, etc.), return None instead of a file object.
2142             return None
2143
2144     def _extract_member(self, tarinfo, targetpath):
2145         """Extract the TarInfo object tarinfo to a physical
2146            file called targetpath.
2147         """
2148         # Fetch the TarInfo object for the given name
2149         # and build the destination pathname, replacing
2150         # forward slashes to platform specific separators.
2151         targetpath = targetpath.rstrip("/")
2152         targetpath = targetpath.replace("/", os.sep)
2153
2154         # Create all upper directories.
2155         upperdirs = os.path.dirname(targetpath)
2156         if upperdirs and not os.path.exists(upperdirs):
2157             # Create directories that are not part of the archive with
2158             # default permissions.
2159             os.makedirs(upperdirs)
2160
2161         if tarinfo.islnk() or tarinfo.issym():
2162             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2163         else:
2164             self._dbg(1, tarinfo.name)
2165
2166         if tarinfo.isreg():
2167             self.makefile(tarinfo, targetpath)
2168         elif tarinfo.isdir():
2169             self.makedir(tarinfo, targetpath)
2170         elif tarinfo.isfifo():
2171             self.makefifo(tarinfo, targetpath)
2172         elif tarinfo.ischr() or tarinfo.isblk():
2173             self.makedev(tarinfo, targetpath)
2174         elif tarinfo.islnk() or tarinfo.issym():
2175             self.makelink(tarinfo, targetpath)
2176         elif tarinfo.type not in SUPPORTED_TYPES:
2177             self.makeunknown(tarinfo, targetpath)
2178         else:
2179             self.makefile(tarinfo, targetpath)
2180
2181         self.chown(tarinfo, targetpath)
2182         if not tarinfo.issym():
2183             self.chmod(tarinfo, targetpath)
2184             self.utime(tarinfo, targetpath)
2185
2186     #--------------------------------------------------------------------------
2187     # Below are the different file methods. They are called via
2188     # _extract_member() when extract() is called. They can be replaced in a
2189     # subclass to implement other functionality.
2190
2191     def makedir(self, tarinfo, targetpath):
2192         """Make a directory called targetpath.
2193         """
2194         try:
2195             # Use a safe mode for the directory, the real mode is set
2196             # later in _extract_member().
2197             os.mkdir(targetpath, 0700)
2198         except EnvironmentError, e:
2199             if e.errno != errno.EEXIST:
2200                 raise
2201
2202     def makefile(self, tarinfo, targetpath):
2203         """Make a file called targetpath.
2204         """
2205         source = self.extractfile(tarinfo)
2206         target = bltn_open(targetpath, "wb")
2207         copyfileobj(source, target)
2208         source.close()
2209         target.close()
2210
2211     def makeunknown(self, tarinfo, targetpath):
2212         """Make a file from a TarInfo object with an unknown type
2213            at targetpath.
2214         """
2215         self.makefile(tarinfo, targetpath)
2216         self._dbg(1, "tarfile: Unknown file type %r, " \
2217                      "extracted as regular file." % tarinfo.type)
2218
2219     def makefifo(self, tarinfo, targetpath):
2220         """Make a fifo called targetpath.
2221         """
2222         if hasattr(os, "mkfifo"):
2223             os.mkfifo(targetpath)
2224         else:
2225             raise ExtractError("fifo not supported by system")
2226
2227     def makedev(self, tarinfo, targetpath):
2228         """Make a character or block device called targetpath.
2229         """
2230         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2231             raise ExtractError("special devices not supported by system")
2232
2233         mode = tarinfo.mode
2234         if tarinfo.isblk():
2235             mode |= stat.S_IFBLK
2236         else:
2237             mode |= stat.S_IFCHR
2238
2239         os.mknod(targetpath, mode,
2240                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2241
2242     def makelink(self, tarinfo, targetpath):
2243         """Make a (symbolic) link called targetpath. If it cannot be created
2244           (platform limitation), we try to make a copy of the referenced file
2245           instead of a link.
2246         """
2247         try:
2248             if tarinfo.issym():
2249                 os.symlink(tarinfo.linkname, targetpath)
2250             else:
2251                 # See extract().
2252                 os.link(tarinfo._link_target, targetpath)
2253         except AttributeError:
2254             if tarinfo.issym():
2255                 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2256                                         tarinfo.linkname
2257             else:
2258                 linkpath = tarinfo.linkname
2259
2260             try:
2261                 self._extract_member(self.getmember(linkpath), targetpath)
2262             except (EnvironmentError, KeyError), e:
2263                 linkpath = linkpath.replace("/", os.sep)
2264                 try:
2265                     shutil.copy2(linkpath, targetpath)
2266                 except EnvironmentError, e:
2267                     raise IOError("link could not be created")
2268
2269     def chown(self, tarinfo, targetpath):
2270         """Set owner of targetpath according to tarinfo.
2271         """
2272         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2273             # We have to be root to do so.
2274             try:
2275                 g = grp.getgrnam(tarinfo.gname)[2]
2276             except KeyError:
2277                 try:
2278                     g = grp.getgrgid(tarinfo.gid)[2]
2279                 except KeyError:
2280                     g = os.getgid()
2281             try:
2282                 u = pwd.getpwnam(tarinfo.uname)[2]
2283             except KeyError:
2284                 try:
2285                     u = pwd.getpwuid(tarinfo.uid)[2]
2286                 except KeyError:
2287                     u = os.getuid()
2288             try:
2289                 if tarinfo.issym() and hasattr(os, "lchown"):
2290                     os.lchown(targetpath, u, g)
2291                 else:
2292                     if sys.platform != "os2emx":
2293                         os.chown(targetpath, u, g)
2294             except EnvironmentError, e:
2295                 raise ExtractError("could not change owner")
2296
2297     def chmod(self, tarinfo, targetpath):
2298         """Set file permissions of targetpath according to tarinfo.
2299         """
2300         if hasattr(os, 'chmod'):
2301             try:
2302                 os.chmod(targetpath, tarinfo.mode)
2303             except EnvironmentError, e:
2304                 raise ExtractError("could not change mode")
2305
2306     def utime(self, tarinfo, targetpath):
2307         """Set modification time of targetpath according to tarinfo.
2308         """
2309         if not hasattr(os, 'utime'):
2310             return
2311         try:
2312             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2313         except EnvironmentError, e:
2314             raise ExtractError("could not change modification time")
2315
2316     #--------------------------------------------------------------------------
2317     def next(self):
2318         """Return the next member of the archive as a TarInfo object, when
2319            TarFile is opened for reading. Return None if there is no more
2320            available.
2321         """
2322         self._check("ra")
2323         if self.firstmember is not None:
2324             m = self.firstmember
2325             self.firstmember = None
2326             return m
2327
2328         # Read the next block.
2329         self.fileobj.seek(self.offset)
2330         tarinfo = None
2331         while True:
2332             try:
2333                 tarinfo = self.tarinfo.fromtarfile(self)
2334             except EOFHeaderError, e:
2335                 if self.ignore_zeros:
2336                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2337                     self.offset += BLOCKSIZE
2338                     continue
2339             except InvalidHeaderError, e:
2340                 if self.ignore_zeros:
2341                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2342                     self.offset += BLOCKSIZE
2343                     continue
2344                 elif self.offset == 0:
2345                     raise ReadError(str(e))
2346             except EmptyHeaderError:
2347                 if self.offset == 0:
2348                     raise ReadError("empty file")
2349             except TruncatedHeaderError, e:
2350                 if self.offset == 0:
2351                     raise ReadError(str(e))
2352             except SubsequentHeaderError, e:
2353                 raise ReadError(str(e))
2354             break
2355
2356         if tarinfo is not None:
2357             self.members.append(tarinfo)
2358         else:
2359             self._loaded = True
2360
2361         return tarinfo
2362
2363     #--------------------------------------------------------------------------
2364     # Little helper methods:
2365
2366     def _getmember(self, name, tarinfo=None):
2367         """Find an archive member by name from bottom to top.
2368            If tarinfo is given, it is used as the starting point.
2369         """
2370         # Ensure that all members have been loaded.
2371         members = self.getmembers()
2372
2373         if tarinfo is None:
2374             end = len(members)
2375         else:
2376             end = members.index(tarinfo)
2377
2378         for i in xrange(end - 1, -1, -1):
2379             if name == members[i].name:
2380                 return members[i]
2381
2382     def _load(self):
2383         """Read through the entire archive file and look for readable
2384            members.
2385         """
2386         while True:
2387             tarinfo = self.next()
2388             if tarinfo is None:
2389                 break
2390         self._loaded = True
2391
2392     def _check(self, mode=None):
2393         """Check if TarFile is still open, and if the operation's mode
2394            corresponds to TarFile's mode.
2395         """
2396         if self.closed:
2397             raise IOError("%s is closed" % self.__class__.__name__)
2398         if mode is not None and self.mode not in mode:
2399             raise IOError("bad operation for mode %r" % self.mode)
2400
2401     def __iter__(self):
2402         """Provide an iterator object.
2403         """
2404         if self._loaded:
2405             return iter(self.members)
2406         else:
2407             return TarIter(self)
2408
2409     def _dbg(self, level, msg):
2410         """Write debugging output to sys.stderr.
2411         """
2412         if level <= self.debug:
2413             print >> sys.stderr, msg
2414 # class TarFile
2415
2416 class TarIter:
2417     """Iterator Class.
2418
2419        for tarinfo in TarFile(...):
2420            suite...
2421     """
2422
2423     def __init__(self, tarfile):
2424         """Construct a TarIter object.
2425         """
2426         self.tarfile = tarfile
2427         self.index = 0
2428     def __iter__(self):
2429         """Return iterator object.
2430         """
2431         return self
2432     def next(self):
2433         """Return the next item using TarFile's next() method.
2434            When all members have been read, set TarFile as _loaded.
2435         """
2436         # Fix for SF #1100429: Under rare circumstances it can
2437         # happen that getmembers() is called during iteration,
2438         # which will cause TarIter to stop prematurely.
2439         if not self.tarfile._loaded:
2440             tarinfo = self.tarfile.next()
2441             if not tarinfo:
2442                 self.tarfile._loaded = True
2443                 raise StopIteration
2444         else:
2445             try:
2446                 tarinfo = self.tarfile.members[self.index]
2447             except IndexError:
2448                 raise StopIteration
2449         self.index += 1
2450         return tarinfo
2451
2452 # Helper classes for sparse file support
2453 class _section:
2454     """Base class for _data and _hole.
2455     """
2456     def __init__(self, offset, size):
2457         self.offset = offset
2458         self.size = size
2459     def __contains__(self, offset):
2460         return self.offset <= offset < self.offset + self.size
2461
2462 class _data(_section):
2463     """Represent a data section in a sparse file.
2464     """
2465     def __init__(self, offset, size, realpos):
2466         _section.__init__(self, offset, size)
2467         self.realpos = realpos
2468
2469 class _hole(_section):
2470     """Represent a hole section in a sparse file.
2471     """
2472     pass
2473
2474 class _ringbuffer(list):
2475     """Ringbuffer class which increases performance
2476        over a regular list.
2477     """
2478     def __init__(self):
2479         self.idx = 0
2480     def find(self, offset):
2481         idx = self.idx
2482         while True:
2483             item = self[idx]
2484             if offset in item:
2485                 break
2486             idx += 1
2487             if idx == len(self):
2488                 idx = 0
2489             if idx == self.idx:
2490                 # End of File
2491                 return None
2492         self.idx = idx
2493         return item
2494
2495 #---------------------------------------------
2496 # zipfile compatible TarFile class
2497 #---------------------------------------------
2498 TAR_PLAIN = 0           # zipfile.ZIP_STORED
2499 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2500 class TarFileCompat:
2501     """TarFile class compatible with standard module zipfile's
2502        ZipFile class.
2503     """
2504     def __init__(self, file, mode="r", compression=TAR_PLAIN):
2505         from warnings import warnpy3k
2506         warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2507                 stacklevel=2)
2508         if compression == TAR_PLAIN:
2509             self.tarfile = TarFile.taropen(file, mode)
2510         elif compression == TAR_GZIPPED:
2511             self.tarfile = TarFile.gzopen(file, mode)
2512         else:
2513             raise ValueError("unknown compression constant")
2514         if mode[0:1] == "r":
2515             members = self.tarfile.getmembers()
2516             for m in members:
2517                 m.filename = m.name
2518                 m.file_size = m.size
2519                 m.date_time = time.gmtime(m.mtime)[:6]
2520     def namelist(self):
2521         return map(lambda m: m.name, self.infolist())
2522     def infolist(self):
2523         return filter(lambda m: m.type in REGULAR_TYPES,
2524                       self.tarfile.getmembers())
2525     def printdir(self):
2526         self.tarfile.list()
2527     def testzip(self):
2528         return
2529     def getinfo(self, name):
2530         return self.tarfile.getmember(name)
2531     def read(self, name):
2532         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2533     def write(self, filename, arcname=None, compress_type=None):
2534         self.tarfile.add(filename, arcname)
2535     def writestr(self, zinfo, bytes):
2536         try:
2537             from cStringIO import StringIO
2538         except ImportError:
2539             from StringIO import StringIO
2540         import calendar
2541         tinfo = TarInfo(zinfo.filename)
2542         tinfo.size = len(bytes)
2543         tinfo.mtime = calendar.timegm(zinfo.date_time)
2544         self.tarfile.addfile(tinfo, StringIO(bytes))
2545     def close(self):
2546         self.tarfile.close()
2547 #class TarFileCompat
2548
2549 #--------------------
2550 # exported functions
2551 #--------------------
2552 def is_tarfile(name):
2553     """Return True if name points to a tar archive that we
2554        are able to handle, else return False.
2555     """
2556     try:
2557         t = open(name)
2558         t.close()
2559         return True
2560     except TarError:
2561         return False
2562
2563 bltn_open = open
2564 open = TarFile.open