Lib/tarfile.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 #-------------------------------------------------------------------
   4 # tarfile.py
   5 #-------------------------------------------------------------------
   6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7 # All rights reserved.
   8 #
   9 # Permission  is  hereby granted,  free  of charge,  to  any person
  10 # obtaining a  copy of  this software  and associated documentation
  11 # files  (the  "Software"),  to   deal  in  the  Software   without
  12 # restriction,  including  without limitation  the  rights to  use,
  13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 # copies  of  the  Software,  and to  permit  persons  to  whom the
  15 # Software  is  furnished  to  do  so,  subject  to  the  following
  16 # conditions:
  17 #
  18 # The above copyright  notice and this  permission notice shall  be
  19 # included in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 # OTHER DEALINGS IN THE SOFTWARE.
  29 #
  30 """Read from and write to tar format archives.
  31 """
  32
  33 __version__ = "$Revision$"
  34 # $Source$
  35
  36 version     = "0.9.0"
  37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38 __date__    = "$Date$"
  39 __cvsid__   = "$Id$"
  40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42 #---------
  43 # Imports
  44 #---------
  45 import sys
  46 import os
  47 import shutil
  48 import stat
  49 import errno
  50 import time
  51 import struct
  52 import copy
  53 import re
  54 import operator
  55
  56 try:
  57     import grp, pwd
  58 except ImportError:
  59     grp = pwd = None
  60
  61 # from tarfile import *
  62 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  63
  64 #---------------------------------------------------------
  65 # tar constants
  66 #---------------------------------------------------------
  67 NUL = "\0"                      # the null character
  68 BLOCKSIZE = 512                 # length of processing blocks
  69 RECORDSIZE = BLOCKSIZE * 20     # length of records
  70 GNU_MAGIC = "ustar  \0"         # magic gnu tar string
  71 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
  72
  73 LENGTH_NAME = 100               # maximum length of a filename
  74 LENGTH_LINK = 100               # maximum length of a linkname
  75 LENGTH_PREFIX = 155             # maximum length of the prefix field
  76
  77 REGTYPE = "0"                   # regular file
  78 AREGTYPE = "\0"                 # regular file
  79 LNKTYPE = "1"                   # link (inside tarfile)
  80 SYMTYPE = "2"                   # symbolic link
  81 CHRTYPE = "3"                   # character special device
  82 BLKTYPE = "4"                   # block special device
  83 DIRTYPE = "5"                   # directory
  84 FIFOTYPE = "6"                  # fifo special device
  85 CONTTYPE = "7"                  # contiguous file
  86
  87 GNUTYPE_LONGNAME = "L"          # GNU tar longname
  88 GNUTYPE_LONGLINK = "K"          # GNU tar longlink
  89 GNUTYPE_SPARSE = "S"            # GNU tar sparse file
  90
  91 XHDTYPE = "x"                   # POSIX.1-2001 extended header
  92 XGLTYPE = "g"                   # POSIX.1-2001 global header
  93 SOLARIS_XHDTYPE = "X"           # Solaris extended header
  94
  95 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
  96 GNU_FORMAT = 1                  # GNU tar format
  97 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
  98 DEFAULT_FORMAT = GNU_FORMAT
  99
 100 #---------------------------------------------------------
 101 # tarfile constants
 102 #---------------------------------------------------------
 103 # File types that tarfile supports:
 104 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 105                    SYMTYPE, DIRTYPE, FIFOTYPE,
 106                    CONTTYPE, CHRTYPE, BLKTYPE,
 107                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 108                    GNUTYPE_SPARSE)
 109
 110 # File types that will be treated as a regular file.
 111 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 112                  CONTTYPE, GNUTYPE_SPARSE)
 113
 114 # File types that are part of the GNU tar format.
 115 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 116              GNUTYPE_SPARSE)
 117
 118 # Fields from a pax header that override a TarInfo attribute.
 119 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 120               "uid", "gid", "uname", "gname")
 121
 122 # Fields in a pax header that are numbers, all other fields
 123 # are treated as strings.
 124 PAX_NUMBER_FIELDS = {
 125     "atime": float,
 126     "ctime": float,
 127     "mtime": float,
 128     "uid": int,
 129     "gid": int,
 130     "size": int
 131 }
 132
 133 #---------------------------------------------------------
 134 # Bits used in the mode field, values in octal.
 135 #---------------------------------------------------------
 136 S_IFLNK = 0120000        # symbolic link
 137 S_IFREG = 0100000        # regular file
 138 S_IFBLK = 0060000        # block device
 139 S_IFDIR = 0040000        # directory
 140 S_IFCHR = 0020000        # character device
 141 S_IFIFO = 0010000        # fifo
 142
 143 TSUID   = 04000          # set UID on execution
 144 TSGID   = 02000          # set GID on execution
 145 TSVTX   = 01000          # reserved
 146
 147 TUREAD  = 0400           # read by owner
 148 TUWRITE = 0200           # write by owner
 149 TUEXEC  = 0100           # execute/search by owner
 150 TGREAD  = 0040           # read by group
 151 TGWRITE = 0020           # write by group
 152 TGEXEC  = 0010           # execute/search by group
 153 TOREAD  = 0004           # read by other
 154 TOWRITE = 0002           # write by other
 155 TOEXEC  = 0001           # execute/search by other
 156
 157 #---------------------------------------------------------
 158 # initialization
 159 #---------------------------------------------------------
 160 ENCODING = sys.getfilesystemencoding()
 161 if ENCODING is None:
 162     ENCODING = sys.getdefaultencoding()
 163
 164 #---------------------------------------------------------
 165 # Some useful functions
 166 #---------------------------------------------------------
 167
 168 def stn(s, length):
 169     """Convert a python string to a null-terminated string buffer.
 170     """
 171     return s[:length] + (length - len(s)) * NUL
 172
 173 def nts(s):
 174     """Convert a null-terminated string field to a python string.
 175     """
 176     # Use the string up to the first null char.
 177     p = s.find("\0")
 178     if p == -1:
 179         return s
 180     return s[:p]
 181
 182 def nti(s):
 183     """Convert a number field to a python number.
 184     """
 185     # There are two possible encodings for a number field, see
 186     # itn() below.
 187     if s[0] != chr(0200):
 188         try:
 189             n = int(nts(s) or "0", 8)
 190         except ValueError:
 191             raise InvalidHeaderError("invalid header")
 192     else:
 193         n = 0L
 194         for i in xrange(len(s) - 1):
 195             n <<= 8
 196             n += ord(s[i + 1])
 197     return n
 198
 199 def itn(n, digits=8, format=DEFAULT_FORMAT):
 200     """Convert a python number to a number field.
 201     """
 202     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 203     # octal digits followed by a null-byte, this allows values up to
 204     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 205     # that if necessary. A leading 0200 byte indicates this particular
 206     # encoding, the following digits-1 bytes are a big-endian
 207     # representation. This allows values up to (256**(digits-1))-1.
 208     if 0 <= n < 8 ** (digits - 1):
 209         s = "%0*o" % (digits - 1, n) + NUL
 210     else:
 211         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 212             raise ValueError("overflow in number field")
 213
 214         if n < 0:
 215             # XXX We mimic GNU tar's behaviour with negative numbers,
 216             # this could raise OverflowError.
 217             n = struct.unpack("L", struct.pack("l", n))[0]
 218
 219         s = ""
 220         for i in xrange(digits - 1):
 221             s = chr(n & 0377) + s
 222             n >>= 8
 223         s = chr(0200) + s
 224     return s
 225
 226 def uts(s, encoding, errors):
 227     """Convert a unicode object to a string.
 228     """
 229     if errors == "utf-8":
 230         # An extra error handler similar to the -o invalid=UTF-8 option
 231         # in POSIX.1-2001. Replace untranslatable characters with their
 232         # UTF-8 representation.
 233         try:
 234             return s.encode(encoding, "strict")
 235         except UnicodeEncodeError:
 236             x = []
 237             for c in s:
 238                 try:
 239                     x.append(c.encode(encoding, "strict"))
 240                 except UnicodeEncodeError:
 241                     x.append(c.encode("utf8"))
 242             return "".join(x)
 243     else:
 244         return s.encode(encoding, errors)
 245
 246 def calc_chksums(buf):
 247     """Calculate the checksum for a member's header by summing up all
 248        characters except for the chksum field which is treated as if
 249        it was filled with spaces. According to the GNU tar sources,
 250        some tars (Sun and NeXT) calculate chksum with signed char,
 251        which will be different if there are chars in the buffer with
 252        the high bit set. So we calculate two checksums, unsigned and
 253        signed.
 254     """
 255     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 256     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 257     return unsigned_chksum, signed_chksum
 258
 259 def copyfileobj(src, dst, length=None):
 260     """Copy length bytes from fileobj src to fileobj dst.
 261        If length is None, copy the entire content.
 262     """
 263     if length == 0:
 264         return
 265     if length is None:
 266         shutil.copyfileobj(src, dst)
 267         return
 268
 269     BUFSIZE = 16 * 1024
 270     blocks, remainder = divmod(length, BUFSIZE)
 271     for b in xrange(blocks):
 272         buf = src.read(BUFSIZE)
 273         if len(buf) < BUFSIZE:
 274             raise IOError("end of file reached")
 275         dst.write(buf)
 276
 277     if remainder != 0:
 278         buf = src.read(remainder)
 279         if len(buf) < remainder:
 280             raise IOError("end of file reached")
 281         dst.write(buf)
 282     return
 283
 284 filemode_table = (
 285     ((S_IFLNK,      "l"),
 286      (S_IFREG,      "-"),
 287      (S_IFBLK,      "b"),
 288      (S_IFDIR,      "d"),
 289      (S_IFCHR,      "c"),
 290      (S_IFIFO,      "p")),
 291
 292     ((TUREAD,       "r"),),
 293     ((TUWRITE,      "w"),),
 294     ((TUEXEC|TSUID, "s"),
 295      (TSUID,        "S"),
 296      (TUEXEC,       "x")),
 297
 298     ((TGREAD,       "r"),),
 299     ((TGWRITE,      "w"),),
 300     ((TGEXEC|TSGID, "s"),
 301      (TSGID,        "S"),
 302      (TGEXEC,       "x")),
 303
 304     ((TOREAD,       "r"),),
 305     ((TOWRITE,      "w"),),
 306     ((TOEXEC|TSVTX, "t"),
 307      (TSVTX,        "T"),
 308      (TOEXEC,       "x"))
 309 )
 310
 311 def filemode(mode):
 312     """Convert a file's mode to a string of the form
 313        -rwxrwxrwx.
 314        Used by TarFile.list()
 315     """
 316     perm = []
 317     for table in filemode_table:
 318         for bit, char in table:
 319             if mode & bit == bit:
 320                 perm.append(char)
 321                 break
 322         else:
 323             perm.append("-")
 324     return "".join(perm)
 325
 326 class TarError(Exception):
 327     """Base exception."""
 328     pass
 329 class ExtractError(TarError):
 330     """General exception for extract errors."""
 331     pass
 332 class ReadError(TarError):
 333     """Exception for unreadble tar archives."""
 334     pass
 335 class CompressionError(TarError):
 336     """Exception for unavailable compression methods."""
 337     pass
 338 class StreamError(TarError):
 339     """Exception for unsupported operations on stream-like TarFiles."""
 340     pass
 341 class HeaderError(TarError):
 342     """Base exception for header errors."""
 343     pass
 344 class EmptyHeaderError(HeaderError):
 345     """Exception for empty headers."""
 346     pass
 347 class TruncatedHeaderError(HeaderError):
 348     """Exception for truncated headers."""
 349     pass
 350 class EOFHeaderError(HeaderError):
 351     """Exception for end of file headers."""
 352     pass
 353 class InvalidHeaderError(HeaderError):
 354     """Exception for invalid headers."""
 355     pass
 356 class SubsequentHeaderError(HeaderError):
 357     """Exception for missing and invalid extended headers."""
 358     pass
 359
 360 #---------------------------
 361 # internal stream interface
 362 #---------------------------
 363 class _LowLevelFile:
 364     """Low-level file object. Supports reading and writing.
 365        It is used instead of a regular file object for streaming
 366        access.
 367     """
 368
 369     def __init__(self, name, mode):
 370         mode = {
 371             "r": os.O_RDONLY,
 372             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 373         }[mode]
 374         if hasattr(os, "O_BINARY"):
 375             mode |= os.O_BINARY
 376         self.fd = os.open(name, mode, 0666)
 377
 378     def close(self):
 379         os.close(self.fd)
 380
 381     def read(self, size):
 382         return os.read(self.fd, size)
 383
 384     def write(self, s):
 385         os.write(self.fd, s)
 386
 387 class _Stream:
 388     """Class that serves as an adapter between TarFile and
 389        a stream-like object.  The stream-like object only
 390        needs to have a read() or write() method and is accessed
 391        blockwise.  Use of gzip or bzip2 compression is possible.
 392        A stream-like object could be for example: sys.stdin,
 393        sys.stdout, a socket, a tape device etc.
 394
 395        _Stream is intended to be used only internally.
 396     """
 397
 398     def __init__(self, name, mode, comptype, fileobj, bufsize):
 399         """Construct a _Stream object.
 400         """
 401         self._extfileobj = True
 402         if fileobj is None:
 403             fileobj = _LowLevelFile(name, mode)
 404             self._extfileobj = False
 405
 406         if comptype == '*':
 407             # Enable transparent compression detection for the
 408             # stream interface
 409             fileobj = _StreamProxy(fileobj)
 410             comptype = fileobj.getcomptype()
 411
 412         self.name     = name or ""
 413         self.mode     = mode
 414         self.comptype = comptype
 415         self.fileobj  = fileobj
 416         self.bufsize  = bufsize
 417         self.buf      = ""
 418         self.pos      = 0L
 419         self.closed   = False
 420
 421         if comptype == "gz":
 422             try:
 423                 import zlib
 424             except ImportError:
 425                 raise CompressionError("zlib module is not available")
 426             self.zlib = zlib
 427             self.crc = zlib.crc32("") & 0xffffffffL
 428             if mode == "r":
 429                 self._init_read_gz()
 430             else:
 431                 self._init_write_gz()
 432
 433         if comptype == "bz2":
 434             try:
 435                 import bz2
 436             except ImportError:
 437                 raise CompressionError("bz2 module is not available")
 438             if mode == "r":
 439                 self.dbuf = ""
 440                 self.cmp = bz2.BZ2Decompressor()
 441             else:
 442                 self.cmp = bz2.BZ2Compressor()
 443
 444     def __del__(self):
 445         if hasattr(self, "closed") and not self.closed:
 446             self.close()
 447
 448     def _init_write_gz(self):
 449         """Initialize for writing with gzip compression.
 450         """
 451         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 452                                             -self.zlib.MAX_WBITS,
 453                                             self.zlib.DEF_MEM_LEVEL,
 454                                             0)
 455         timestamp = struct.pack("<L", long(time.time()))
 456         self.__write("\037\213\010\010%s\002\377" % timestamp)
 457         if self.name.endswith(".gz"):
 458             self.name = self.name[:-3]
 459         self.__write(self.name + NUL)
 460
 461     def write(self, s):
 462         """Write string s to the stream.
 463         """
 464         if self.comptype == "gz":
 465             self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
 466         self.pos += len(s)
 467         if self.comptype != "tar":
 468             s = self.cmp.compress(s)
 469         self.__write(s)
 470
 471     def __write(self, s):
 472         """Write string s to the stream if a whole new block
 473            is ready to be written.
 474         """
 475         self.buf += s
 476         while len(self.buf) > self.bufsize:
 477             self.fileobj.write(self.buf[:self.bufsize])
 478             self.buf = self.buf[self.bufsize:]
 479
 480     def close(self):
 481         """Close the _Stream object. No operation should be
 482            done on it afterwards.
 483         """
 484         if self.closed:
 485             return
 486
 487         if self.mode == "w" and self.comptype != "tar":
 488             self.buf += self.cmp.flush()
 489
 490         if self.mode == "w" and self.buf:
 491             self.fileobj.write(self.buf)
 492             self.buf = ""
 493             if self.comptype == "gz":
 494                 # The native zlib crc is an unsigned 32-bit integer, but
 495                 # the Python wrapper implicitly casts that to a signed C
 496                 # long.  So, on a 32-bit box self.crc may "look negative",
 497                 # while the same crc on a 64-bit box may "look positive".
 498                 # To avoid irksome warnings from the `struct` module, force
 499                 # it to look positive on all boxes.
 500                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
 501                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 502
 503         if not self._extfileobj:
 504             self.fileobj.close()
 505
 506         self.closed = True
 507
 508     def _init_read_gz(self):
 509         """Initialize for reading a gzip compressed fileobj.
 510         """
 511         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 512         self.dbuf = ""
 513
 514         # taken from gzip.GzipFile with some alterations
 515         if self.__read(2) != "\037\213":
 516             raise ReadError("not a gzip file")
 517         if self.__read(1) != "\010":
 518             raise CompressionError("unsupported compression method")
 519
 520         flag = ord(self.__read(1))
 521         self.__read(6)
 522
 523         if flag & 4:
 524             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 525             self.read(xlen)
 526         if flag & 8:
 527             while True:
 528                 s = self.__read(1)
 529                 if not s or s == NUL:
 530                     break
 531         if flag & 16:
 532             while True:
 533                 s = self.__read(1)
 534                 if not s or s == NUL:
 535                     break
 536         if flag & 2:
 537             self.__read(2)
 538
 539     def tell(self):
 540         """Return the stream's file pointer position.
 541         """
 542         return self.pos
 543
 544     def seek(self, pos=0):
 545         """Set the stream's file pointer to pos. Negative seeking
 546            is forbidden.
 547         """
 548         if pos - self.pos >= 0:
 549             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 550             for i in xrange(blocks):
 551                 self.read(self.bufsize)
 552             self.read(remainder)
 553         else:
 554             raise StreamError("seeking backwards is not allowed")
 555         return self.pos
 556
 557     def read(self, size=None):
 558         """Return the next size number of bytes from the stream.
 559            If size is not defined, return all bytes of the stream
 560            up to EOF.
 561         """
 562         if size is None:
 563             t = []
 564             while True:
 565                 buf = self._read(self.bufsize)
 566                 if not buf:
 567                     break
 568                 t.append(buf)
 569             buf = "".join(t)
 570         else:
 571             buf = self._read(size)
 572         self.pos += len(buf)
 573         return buf
 574
 575     def _read(self, size):
 576         """Return size bytes from the stream.
 577         """
 578         if self.comptype == "tar":
 579             return self.__read(size)
 580
 581         c = len(self.dbuf)
 582         t = [self.dbuf]
 583         while c < size:
 584             buf = self.__read(self.bufsize)
 585             if not buf:
 586                 break
 587             try:
 588                 buf = self.cmp.decompress(buf)
 589             except IOError:
 590                 raise ReadError("invalid compressed data")
 591             t.append(buf)
 592             c += len(buf)
 593         t = "".join(t)
 594         self.dbuf = t[size:]
 595         return t[:size]
 596
 597     def __read(self, size):
 598         """Return size bytes from stream. If internal buffer is empty,
 599            read another block from the stream.
 600         """
 601         c = len(self.buf)
 602         t = [self.buf]
 603         while c < size:
 604             buf = self.fileobj.read(self.bufsize)
 605             if not buf:
 606                 break
 607             t.append(buf)
 608             c += len(buf)
 609         t = "".join(t)
 610         self.buf = t[size:]
 611         return t[:size]
 612 # class _Stream
 613
 614 class _StreamProxy(object):
 615     """Small proxy class that enables transparent compression
 616        detection for the Stream interface (mode 'r|*').
 617     """
 618
 619     def __init__(self, fileobj):
 620         self.fileobj = fileobj
 621         self.buf = self.fileobj.read(BLOCKSIZE)
 622
 623     def read(self, size):
 624         self.read = self.fileobj.read
 625         return self.buf
 626
 627     def getcomptype(self):
 628         if self.buf.startswith("\037\213\010"):
 629             return "gz"
 630         if self.buf.startswith("BZh91"):
 631             return "bz2"
 632         return "tar"
 633
 634     def close(self):
 635         self.fileobj.close()
 636 # class StreamProxy
 637
 638 class _BZ2Proxy(object):
 639     """Small proxy class that enables external file object
 640        support for "r:bz2" and "w:bz2" modes. This is actually
 641        a workaround for a limitation in bz2 module's BZ2File
 642        class which (unlike gzip.GzipFile) has no support for
 643        a file object argument.
 644     """
 645
 646     blocksize = 16 * 1024
 647
 648     def __init__(self, fileobj, mode):
 649         self.fileobj = fileobj
 650         self.mode = mode
 651         self.name = getattr(self.fileobj, "name", None)
 652         self.init()
 653
 654     def init(self):
 655         import bz2
 656         self.pos = 0
 657         if self.mode == "r":
 658             self.bz2obj = bz2.BZ2Decompressor()
 659             self.fileobj.seek(0)
 660             self.buf = ""
 661         else:
 662             self.bz2obj = bz2.BZ2Compressor()
 663
 664     def read(self, size):
 665         b = [self.buf]
 666         x = len(self.buf)
 667         while x < size:
 668             raw = self.fileobj.read(self.blocksize)
 669             if not raw:
 670                 break
 671             data = self.bz2obj.decompress(raw)
 672             b.append(data)
 673             x += len(data)
 674         self.buf = "".join(b)
 675
 676         buf = self.buf[:size]
 677         self.buf = self.buf[size:]
 678         self.pos += len(buf)
 679         return buf
 680
 681     def seek(self, pos):
 682         if pos < self.pos:
 683             self.init()
 684         self.read(pos - self.pos)
 685
 686     def tell(self):
 687         return self.pos
 688
 689     def write(self, data):
 690         self.pos += len(data)
 691         raw = self.bz2obj.compress(data)
 692         self.fileobj.write(raw)
 693
 694     def close(self):
 695         if self.mode == "w":
 696             raw = self.bz2obj.flush()
 697             self.fileobj.write(raw)
 698 # class _BZ2Proxy
 699
 700 #------------------------
 701 # Extraction file object
 702 #------------------------
 703 class _FileInFile(object):
 704     """A thin wrapper around an existing file object that
 705        provides a part of its data as an individual file
 706        object.
 707     """
 708
 709     def __init__(self, fileobj, offset, size, sparse=None):
 710         self.fileobj = fileobj
 711         self.offset = offset
 712         self.size = size
 713         self.sparse = sparse
 714         self.position = 0
 715
 716     def tell(self):
 717         """Return the current file position.
 718         """
 719         return self.position
 720
 721     def seek(self, position):
 722         """Seek to a position in the file.
 723         """
 724         self.position = position
 725
 726     def read(self, size=None):
 727         """Read data from the file.
 728         """
 729         if size is None:
 730             size = self.size - self.position
 731         else:
 732             size = min(size, self.size - self.position)
 733
 734         if self.sparse is None:
 735             return self.readnormal(size)
 736         else:
 737             return self.readsparse(size)
 738
 739     def readnormal(self, size):
 740         """Read operation for regular files.
 741         """
 742         self.fileobj.seek(self.offset + self.position)
 743         self.position += size
 744         return self.fileobj.read(size)
 745
 746     def readsparse(self, size):
 747         """Read operation for sparse files.
 748         """
 749         data = []
 750         while size > 0:
 751             buf = self.readsparsesection(size)
 752             if not buf:
 753                 break
 754             size -= len(buf)
 755             data.append(buf)
 756         return "".join(data)
 757
 758     def readsparsesection(self, size):
 759         """Read a single section of a sparse file.
 760         """
 761         section = self.sparse.find(self.position)
 762
 763         if section is None:
 764             return ""
 765
 766         size = min(size, section.offset + section.size - self.position)
 767
 768         if isinstance(section, _data):
 769             realpos = section.realpos + self.position - section.offset
 770             self.fileobj.seek(self.offset + realpos)
 771             self.position += size
 772             return self.fileobj.read(size)
 773         else:
 774             self.position += size
 775             return NUL * size
 776 #class _FileInFile
 777
 778
 779 class ExFileObject(object):
 780     """File-like object for reading an archive member.
 781        Is returned by TarFile.extractfile().
 782     """
 783     blocksize = 1024
 784
 785     def __init__(self, tarfile, tarinfo):
 786         self.fileobj = _FileInFile(tarfile.fileobj,
 787                                    tarinfo.offset_data,
 788                                    tarinfo.size,
 789                                    getattr(tarinfo, "sparse", None))
 790         self.name = tarinfo.name
 791         self.mode = "r"
 792         self.closed = False
 793         self.size = tarinfo.size
 794
 795         self.position = 0
 796         self.buffer = ""
 797
 798     def read(self, size=None):
 799         """Read at most size bytes from the file. If size is not
 800            present or None, read all data until EOF is reached.
 801         """
 802         if self.closed:
 803             raise ValueError("I/O operation on closed file")
 804
 805         buf = ""
 806         if self.buffer:
 807             if size is None:
 808                 buf = self.buffer
 809                 self.buffer = ""
 810             else:
 811                 buf = self.buffer[:size]
 812                 self.buffer = self.buffer[size:]
 813
 814         if size is None:
 815             buf += self.fileobj.read()
 816         else:
 817             buf += self.fileobj.read(size - len(buf))
 818
 819         self.position += len(buf)
 820         return buf
 821
 822     def readline(self, size=-1):
 823         """Read one entire line from the file. If size is present
 824            and non-negative, return a string with at most that
 825            size, which may be an incomplete line.
 826         """
 827         if self.closed:
 828             raise ValueError("I/O operation on closed file")
 829
 830         if "\n" in self.buffer:
 831             pos = self.buffer.find("\n") + 1
 832         else:
 833             buffers = [self.buffer]
 834             while True:
 835                 buf = self.fileobj.read(self.blocksize)
 836                 buffers.append(buf)
 837                 if not buf or "\n" in buf:
 838                     self.buffer = "".join(buffers)
 839                     pos = self.buffer.find("\n") + 1
 840                     if pos == 0:
 841                         # no newline found.
 842                         pos = len(self.buffer)
 843                     break
 844
 845         if size != -1:
 846             pos = min(size, pos)
 847
 848         buf = self.buffer[:pos]
 849         self.buffer = self.buffer[pos:]
 850         self.position += len(buf)
 851         return buf
 852
 853     def readlines(self):
 854         """Return a list with all remaining lines.
 855         """
 856         result = []
 857         while True:
 858             line = self.readline()
 859             if not line: break
 860             result.append(line)
 861         return result
 862
 863     def tell(self):
 864         """Return the current file position.
 865         """
 866         if self.closed:
 867             raise ValueError("I/O operation on closed file")
 868
 869         return self.position
 870
 871     def seek(self, pos, whence=os.SEEK_SET):
 872         """Seek to a position in the file.
 873         """
 874         if self.closed:
 875             raise ValueError("I/O operation on closed file")
 876
 877         if whence == os.SEEK_SET:
 878             self.position = min(max(pos, 0), self.size)
 879         elif whence == os.SEEK_CUR:
 880             if pos < 0:
 881                 self.position = max(self.position + pos, 0)
 882             else:
 883                 self.position = min(self.position + pos, self.size)
 884         elif whence == os.SEEK_END:
 885             self.position = max(min(self.size + pos, self.size), 0)
 886         else:
 887             raise ValueError("Invalid argument")
 888
 889         self.buffer = ""
 890         self.fileobj.seek(self.position)
 891
 892     def close(self):
 893         """Close the file object.
 894         """
 895         self.closed = True
 896
 897     def __iter__(self):
 898         """Get an iterator over the file's lines.
 899         """
 900         while True:
 901             line = self.readline()
 902             if not line:
 903                 break
 904             yield line
 905 #class ExFileObject
 906
 907 #------------------
 908 # Exported Classes
 909 #------------------
 910 class TarInfo(object):
 911     """Informational class which holds the details about an
 912        archive member given by a tar header block.
 913        TarInfo objects are returned by TarFile.getmember(),
 914        TarFile.getmembers() and TarFile.gettarinfo() and are
 915        usually created internally.
 916     """
 917
 918     def __init__(self, name=""):
 919         """Construct a TarInfo object. name is the optional name
 920            of the member.
 921         """
 922         self.name = name        # member name
 923         self.mode = 0644        # file permissions
 924         self.uid = 0            # user id
 925         self.gid = 0            # group id
 926         self.size = 0           # file size
 927         self.mtime = 0          # modification time
 928         self.chksum = 0         # header checksum
 929         self.type = REGTYPE     # member type
 930         self.linkname = ""      # link name
 931         self.uname = "root"     # user name
 932         self.gname = "root"     # group name
 933         self.devmajor = 0       # device major number
 934         self.devminor = 0       # device minor number
 935
 936         self.offset = 0         # the tar header starts here
 937         self.offset_data = 0    # the file's data starts here
 938
 939         self.pax_headers = {}   # pax header information
 940
 941     # In pax headers the "name" and "linkname" field are called
 942     # "path" and "linkpath".
 943     def _getpath(self):
 944         return self.name
 945     def _setpath(self, name):
 946         self.name = name
 947     path = property(_getpath, _setpath)
 948
 949     def _getlinkpath(self):
 950         return self.linkname
 951     def _setlinkpath(self, linkname):
 952         self.linkname = linkname
 953     linkpath = property(_getlinkpath, _setlinkpath)
 954
 955     def __repr__(self):
 956         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 957
 958     def get_info(self, encoding, errors):
 959         """Return the TarInfo's attributes as a dictionary.
 960         """
 961         info = {
 962             "name":     self.name,
 963             "mode":     self.mode & 07777,
 964             "uid":      self.uid,
 965             "gid":      self.gid,
 966             "size":     self.size,
 967             "mtime":    self.mtime,
 968             "chksum":   self.chksum,
 969             "type":     self.type,
 970             "linkname": self.linkname,
 971             "uname":    self.uname,
 972             "gname":    self.gname,
 973             "devmajor": self.devmajor,
 974             "devminor": self.devminor
 975         }
 976
 977         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 978             info["name"] += "/"
 979
 980         for key in ("name", "linkname", "uname", "gname"):
 981             if type(info[key]) is unicode:
 982                 info[key] = info[key].encode(encoding, errors)
 983
 984         return info
 985
 986     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 987         """Return a tar header as a string of 512 byte blocks.
 988         """
 989         info = self.get_info(encoding, errors)
 990
 991         if format == USTAR_FORMAT:
 992             return self.create_ustar_header(info)
 993         elif format == GNU_FORMAT:
 994             return self.create_gnu_header(info)
 995         elif format == PAX_FORMAT:
 996             return self.create_pax_header(info, encoding, errors)
 997         else:
 998             raise ValueError("invalid format")
 999
1000     def create_ustar_header(self, info):
1001         """Return the object as a ustar header block.
1002         """
1003         info["magic"] = POSIX_MAGIC
1004
1005         if len(info["linkname"]) > LENGTH_LINK:
1006             raise ValueError("linkname is too long")
1007
1008         if len(info["name"]) > LENGTH_NAME:
1009             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1010
1011         return self._create_header(info, USTAR_FORMAT)
1012
1013     def create_gnu_header(self, info):
1014         """Return the object as a GNU header block sequence.
1015         """
1016         info["magic"] = GNU_MAGIC
1017
1018         buf = ""
1019         if len(info["linkname"]) > LENGTH_LINK:
1020             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1021
1022         if len(info["name"]) > LENGTH_NAME:
1023             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1024
1025         return buf + self._create_header(info, GNU_FORMAT)
1026
1027     def create_pax_header(self, info, encoding, errors):
1028         """Return the object as a ustar header block. If it cannot be
1029            represented this way, prepend a pax extended header sequence
1030            with supplement information.
1031         """
1032         info["magic"] = POSIX_MAGIC
1033         pax_headers = self.pax_headers.copy()
1034
1035         # Test string fields for values that exceed the field length or cannot
1036         # be represented in ASCII encoding.
1037         for name, hname, length in (
1038                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039                 ("uname", "uname", 32), ("gname", "gname", 32)):
1040
1041             if hname in pax_headers:
1042                 # The pax header has priority.
1043                 continue
1044
1045             val = info[name].decode(encoding, errors)
1046
1047             # Try to encode the string as ASCII.
1048             try:
1049                 val.encode("ascii")
1050             except UnicodeEncodeError:
1051                 pax_headers[hname] = val
1052                 continue
1053
1054             if len(info[name]) > length:
1055                 pax_headers[hname] = val
1056
1057         # Test number fields for values that exceed the field limit or values
1058         # that like to be stored as float.
1059         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1060             if name in pax_headers:
1061                 # The pax header has priority. Avoid overflow.
1062                 info[name] = 0
1063                 continue
1064
1065             val = info[name]
1066             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1067                 pax_headers[name] = unicode(val)
1068                 info[name] = 0
1069
1070         # Create a pax extended header if necessary.
1071         if pax_headers:
1072             buf = self._create_pax_generic_header(pax_headers)
1073         else:
1074             buf = ""
1075
1076         return buf + self._create_header(info, USTAR_FORMAT)
1077
1078     @classmethod
1079     def create_pax_global_header(cls, pax_headers):
1080         """Return the object as a pax global header block sequence.
1081         """
1082         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1083
1084     def _posix_split_name(self, name):
1085         """Split a name longer than 100 chars into a prefix
1086            and a name part.
1087         """
1088         prefix = name[:LENGTH_PREFIX + 1]
1089         while prefix and prefix[-1] != "/":
1090             prefix = prefix[:-1]
1091
1092         name = name[len(prefix):]
1093         prefix = prefix[:-1]
1094
1095         if not prefix or len(name) > LENGTH_NAME:
1096             raise ValueError("name is too long")
1097         return prefix, name
1098
1099     @staticmethod
1100     def _create_header(info, format):
1101         """Return a header block. info is a dictionary with file
1102            information, format must be one of the *_FORMAT constants.
1103         """
1104         parts = [
1105             stn(info.get("name", ""), 100),
1106             itn(info.get("mode", 0) & 07777, 8, format),
1107             itn(info.get("uid", 0), 8, format),
1108             itn(info.get("gid", 0), 8, format),
1109             itn(info.get("size", 0), 12, format),
1110             itn(info.get("mtime", 0), 12, format),
1111             "        ", # checksum field
1112             info.get("type", REGTYPE),
1113             stn(info.get("linkname", ""), 100),
1114             stn(info.get("magic", POSIX_MAGIC), 8),
1115             stn(info.get("uname", "root"), 32),
1116             stn(info.get("gname", "root"), 32),
1117             itn(info.get("devmajor", 0), 8, format),
1118             itn(info.get("devminor", 0), 8, format),
1119             stn(info.get("prefix", ""), 155)
1120         ]
1121
1122         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125         return buf
1126
1127     @staticmethod
1128     def _create_payload(payload):
1129         """Return the string payload filled with zero bytes
1130            up to the next 512 byte border.
1131         """
1132         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133         if remainder > 0:
1134             payload += (BLOCKSIZE - remainder) * NUL
1135         return payload
1136
1137     @classmethod
1138     def _create_gnu_long_header(cls, name, type):
1139         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140            for name.
1141         """
1142         name += NUL
1143
1144         info = {}
1145         info["name"] = "././@LongLink"
1146         info["type"] = type
1147         info["size"] = len(name)
1148         info["magic"] = GNU_MAGIC
1149
1150         # create extended header + name blocks.
1151         return cls._create_header(info, USTAR_FORMAT) + \
1152                 cls._create_payload(name)
1153
1154     @classmethod
1155     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1156         """Return a POSIX.1-2001 extended or global header sequence
1157            that contains a list of keyword, value pairs. The values
1158            must be unicode objects.
1159         """
1160         records = []
1161         for keyword, value in pax_headers.iteritems():
1162             keyword = keyword.encode("utf8")
1163             value = value.encode("utf8")
1164             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1165             n = p = 0
1166             while True:
1167                 n = l + len(str(p))
1168                 if n == p:
1169                     break
1170                 p = n
1171             records.append("%d %s=%s\n" % (p, keyword, value))
1172         records = "".join(records)
1173
1174         # We use a hardcoded "././@PaxHeader" name like star does
1175         # instead of the one that POSIX recommends.
1176         info = {}
1177         info["name"] = "././@PaxHeader"
1178         info["type"] = type
1179         info["size"] = len(records)
1180         info["magic"] = POSIX_MAGIC
1181
1182         # Create pax header + record blocks.
1183         return cls._create_header(info, USTAR_FORMAT) + \
1184                 cls._create_payload(records)
1185
1186     @classmethod
1187     def frombuf(cls, buf):
1188         """Construct a TarInfo object from a 512 byte string buffer.
1189         """
1190         if len(buf) == 0:
1191             raise EmptyHeaderError("empty header")
1192         if len(buf) != BLOCKSIZE:
1193             raise TruncatedHeaderError("truncated header")
1194         if buf.count(NUL) == BLOCKSIZE:
1195             raise EOFHeaderError("end of file header")
1196
1197         chksum = nti(buf[148:156])
1198         if chksum not in calc_chksums(buf):
1199             raise InvalidHeaderError("bad checksum")
1200
1201         obj = cls()
1202         obj.buf = buf
1203         obj.name = nts(buf[0:100])
1204         obj.mode = nti(buf[100:108])
1205         obj.uid = nti(buf[108:116])
1206         obj.gid = nti(buf[116:124])
1207         obj.size = nti(buf[124:136])
1208         obj.mtime = nti(buf[136:148])
1209         obj.chksum = chksum
1210         obj.type = buf[156:157]
1211         obj.linkname = nts(buf[157:257])
1212         obj.uname = nts(buf[265:297])
1213         obj.gname = nts(buf[297:329])
1214         obj.devmajor = nti(buf[329:337])
1215         obj.devminor = nti(buf[337:345])
1216         prefix = nts(buf[345:500])
1217
1218         # Old V7 tar format represents a directory as a regular
1219         # file with a trailing slash.
1220         if obj.type == AREGTYPE and obj.name.endswith("/"):
1221             obj.type = DIRTYPE
1222
1223         # Remove redundant slashes from directories.
1224         if obj.isdir():
1225             obj.name = obj.name.rstrip("/")
1226
1227         # Reconstruct a ustar longname.
1228         if prefix and obj.type not in GNU_TYPES:
1229             obj.name = prefix + "/" + obj.name
1230         return obj
1231
1232     @classmethod
1233     def fromtarfile(cls, tarfile):
1234         """Return the next TarInfo object from TarFile object
1235            tarfile.
1236         """
1237         buf = tarfile.fileobj.read(BLOCKSIZE)
1238         obj = cls.frombuf(buf)
1239         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1240         return obj._proc_member(tarfile)
1241
1242     #--------------------------------------------------------------------------
1243     # The following are methods that are called depending on the type of a
1244     # member. The entry point is _proc_member() which can be overridden in a
1245     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1246     # implement the following
1247     # operations:
1248     # 1. Set self.offset_data to the position where the data blocks begin,
1249     #    if there is data that follows.
1250     # 2. Set tarfile.offset to the position where the next member's header will
1251     #    begin.
1252     # 3. Return self or another valid TarInfo object.
1253     def _proc_member(self, tarfile):
1254         """Choose the right processing method depending on
1255            the type and call it.
1256         """
1257         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1258             return self._proc_gnulong(tarfile)
1259         elif self.type == GNUTYPE_SPARSE:
1260             return self._proc_sparse(tarfile)
1261         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1262             return self._proc_pax(tarfile)
1263         else:
1264             return self._proc_builtin(tarfile)
1265
1266     def _proc_builtin(self, tarfile):
1267         """Process a builtin type or an unknown type which
1268            will be treated as a regular file.
1269         """
1270         self.offset_data = tarfile.fileobj.tell()
1271         offset = self.offset_data
1272         if self.isreg() or self.type not in SUPPORTED_TYPES:
1273             # Skip the following data blocks.
1274             offset += self._block(self.size)
1275         tarfile.offset = offset
1276
1277         # Patch the TarInfo object with saved global
1278         # header information.
1279         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1280
1281         return self
1282
1283     def _proc_gnulong(self, tarfile):
1284         """Process the blocks that hold a GNU longname
1285            or longlink member.
1286         """
1287         buf = tarfile.fileobj.read(self._block(self.size))
1288
1289         # Fetch the next header and process it.
1290         try:
1291             next = self.fromtarfile(tarfile)
1292         except HeaderError:
1293             raise SubsequentHeaderError("missing or bad subsequent header")
1294
1295         # Patch the TarInfo object from the next header with
1296         # the longname information.
1297         next.offset = self.offset
1298         if self.type == GNUTYPE_LONGNAME:
1299             next.name = nts(buf)
1300         elif self.type == GNUTYPE_LONGLINK:
1301             next.linkname = nts(buf)
1302
1303         return next
1304
1305     def _proc_sparse(self, tarfile):
1306         """Process a GNU sparse header plus extra headers.
1307         """
1308         buf = self.buf
1309         sp = _ringbuffer()
1310         pos = 386
1311         lastpos = 0L
1312         realpos = 0L
1313         # There are 4 possible sparse structs in the
1314         # first header.
1315         for i in xrange(4):
1316             try:
1317                 offset = nti(buf[pos:pos + 12])
1318                 numbytes = nti(buf[pos + 12:pos + 24])
1319             except ValueError:
1320                 break
1321             if offset > lastpos:
1322                 sp.append(_hole(lastpos, offset - lastpos))
1323             sp.append(_data(offset, numbytes, realpos))
1324             realpos += numbytes
1325             lastpos = offset + numbytes
1326             pos += 24
1327
1328         isextended = ord(buf[482])
1329         origsize = nti(buf[483:495])
1330
1331         # If the isextended flag is given,
1332         # there are extra headers to process.
1333         while isextended == 1:
1334             buf = tarfile.fileobj.read(BLOCKSIZE)
1335             pos = 0
1336             for i in xrange(21):
1337                 try:
1338                     offset = nti(buf[pos:pos + 12])
1339                     numbytes = nti(buf[pos + 12:pos + 24])
1340                 except ValueError:
1341                     break
1342                 if offset > lastpos:
1343                     sp.append(_hole(lastpos, offset - lastpos))
1344                 sp.append(_data(offset, numbytes, realpos))
1345                 realpos += numbytes
1346                 lastpos = offset + numbytes
1347                 pos += 24
1348             isextended = ord(buf[504])
1349
1350         if lastpos < origsize:
1351             sp.append(_hole(lastpos, origsize - lastpos))
1352
1353         self.sparse = sp
1354
1355         self.offset_data = tarfile.fileobj.tell()
1356         tarfile.offset = self.offset_data + self._block(self.size)
1357         self.size = origsize
1358
1359         return self
1360
1361     def _proc_pax(self, tarfile):
1362         """Process an extended or global header as described in
1363            POSIX.1-2001.
1364         """
1365         # Read the header information.
1366         buf = tarfile.fileobj.read(self._block(self.size))
1367
1368         # A pax header stores supplemental information for either
1369         # the following file (extended) or all following files
1370         # (global).
1371         if self.type == XGLTYPE:
1372             pax_headers = tarfile.pax_headers
1373         else:
1374             pax_headers = tarfile.pax_headers.copy()
1375
1376         # Parse pax header information. A record looks like that:
1377         # "%d %s=%s\n" % (length, keyword, value). length is the size
1378         # of the complete record including the length field itself and
1379         # the newline. keyword and value are both UTF-8 encoded strings.
1380         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1381         pos = 0
1382         while True:
1383             match = regex.match(buf, pos)
1384             if not match:
1385                 break
1386
1387             length, keyword = match.groups()
1388             length = int(length)
1389             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1390
1391             keyword = keyword.decode("utf8")
1392             value = value.decode("utf8")
1393
1394             pax_headers[keyword] = value
1395             pos += length
1396
1397         # Fetch the next header.
1398         try:
1399             next = self.fromtarfile(tarfile)
1400         except HeaderError:
1401             raise SubsequentHeaderError("missing or bad subsequent header")
1402
1403         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1404             # Patch the TarInfo object with the extended header info.
1405             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1406             next.offset = self.offset
1407
1408             if "size" in pax_headers:
1409                 # If the extended header replaces the size field,
1410                 # we need to recalculate the offset where the next
1411                 # header starts.
1412                 offset = next.offset_data
1413                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1414                     offset += next._block(next.size)
1415                 tarfile.offset = offset
1416
1417         return next
1418
1419     def _apply_pax_info(self, pax_headers, encoding, errors):
1420         """Replace fields with supplemental information from a previous
1421            pax extended or global header.
1422         """
1423         for keyword, value in pax_headers.iteritems():
1424             if keyword not in PAX_FIELDS:
1425                 continue
1426
1427             if keyword == "path":
1428                 value = value.rstrip("/")
1429
1430             if keyword in PAX_NUMBER_FIELDS:
1431                 try:
1432                     value = PAX_NUMBER_FIELDS[keyword](value)
1433                 except ValueError:
1434                     value = 0
1435             else:
1436                 value = uts(value, encoding, errors)
1437
1438             setattr(self, keyword, value)
1439
1440         self.pax_headers = pax_headers.copy()
1441
1442     def _block(self, count):
1443         """Round up a byte count by BLOCKSIZE and return it,
1444            e.g. _block(834) => 1024.
1445         """
1446         blocks, remainder = divmod(count, BLOCKSIZE)
1447         if remainder:
1448             blocks += 1
1449         return blocks * BLOCKSIZE
1450
1451     def isreg(self):
1452         return self.type in REGULAR_TYPES
1453     def isfile(self):
1454         return self.isreg()
1455     def isdir(self):
1456         return self.type == DIRTYPE
1457     def issym(self):
1458         return self.type == SYMTYPE
1459     def islnk(self):
1460         return self.type == LNKTYPE
1461     def ischr(self):
1462         return self.type == CHRTYPE
1463     def isblk(self):
1464         return self.type == BLKTYPE
1465     def isfifo(self):
1466         return self.type == FIFOTYPE
1467     def issparse(self):
1468         return self.type == GNUTYPE_SPARSE
1469     def isdev(self):
1470         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1471 # class TarInfo
1472
1473 class TarFile(object):
1474     """The TarFile Class provides an interface to tar archives.
1475     """
1476
1477     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1478
1479     dereference = False         # If true, add content of linked file to the
1480                                 # tar file, else the link.
1481
1482     ignore_zeros = False        # If true, skips empty or invalid blocks and
1483                                 # continues processing.
1484
1485     errorlevel = 1              # If 0, fatal errors only appear in debug
1486                                 # messages (if debug >= 0). If > 0, errors
1487                                 # are passed to the caller as exceptions.
1488
1489     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1490
1491     encoding = ENCODING         # Encoding for 8-bit character strings.
1492
1493     errors = None               # Error handler for unicode conversion.
1494
1495     tarinfo = TarInfo           # The default TarInfo class to use.
1496
1497     fileobject = ExFileObject   # The default ExFileObject class to use.
1498
1499     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1500             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1501             errors=None, pax_headers=None, debug=None, errorlevel=None):
1502         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1503            read from an existing archive, 'a' to append data to an existing
1504            file or 'w' to create a new file overwriting an existing one. `mode'
1505            defaults to 'r'.
1506            If `fileobj' is given, it is used for reading or writing data. If it
1507            can be determined, `mode' is overridden by `fileobj's mode.
1508            `fileobj' is not closed, when TarFile is closed.
1509         """
1510         if len(mode) > 1 or mode not in "raw":
1511             raise ValueError("mode must be 'r', 'a' or 'w'")
1512         self.mode = mode
1513         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1514
1515         if not fileobj:
1516             if self.mode == "a" and not os.path.exists(name):
1517                 # Create nonexistent files in append mode.
1518                 self.mode = "w"
1519                 self._mode = "wb"
1520             fileobj = bltn_open(name, self._mode)
1521             self._extfileobj = False
1522         else:
1523             if name is None and hasattr(fileobj, "name"):
1524                 name = fileobj.name
1525             if hasattr(fileobj, "mode"):
1526                 self._mode = fileobj.mode
1527             self._extfileobj = True
1528         self.name = os.path.abspath(name) if name else None
1529         self.fileobj = fileobj
1530
1531         # Init attributes.
1532         if format is not None:
1533             self.format = format
1534         if tarinfo is not None:
1535             self.tarinfo = tarinfo
1536         if dereference is not None:
1537             self.dereference = dereference
1538         if ignore_zeros is not None:
1539             self.ignore_zeros = ignore_zeros
1540         if encoding is not None:
1541             self.encoding = encoding
1542
1543         if errors is not None:
1544             self.errors = errors
1545         elif mode == "r":
1546             self.errors = "utf-8"
1547         else:
1548             self.errors = "strict"
1549
1550         if pax_headers is not None and self.format == PAX_FORMAT:
1551             self.pax_headers = pax_headers
1552         else:
1553             self.pax_headers = {}
1554
1555         if debug is not None:
1556             self.debug = debug
1557         if errorlevel is not None:
1558             self.errorlevel = errorlevel
1559
1560         # Init datastructures.
1561         self.closed = False
1562         self.members = []       # list of members as TarInfo objects
1563         self._loaded = False    # flag if all members have been read
1564         self.offset = self.fileobj.tell()
1565                                 # current position in the archive file
1566         self.inodes = {}        # dictionary caching the inodes of
1567                                 # archive members already added
1568
1569         try:
1570             if self.mode == "r":
1571                 self.firstmember = None
1572                 self.firstmember = self.next()
1573
1574             if self.mode == "a":
1575                 # Move to the end of the archive,
1576                 # before the first empty block.
1577                 while True:
1578                     self.fileobj.seek(self.offset)
1579                     try:
1580                         tarinfo = self.tarinfo.fromtarfile(self)
1581                         self.members.append(tarinfo)
1582                     except EOFHeaderError:
1583                         self.fileobj.seek(self.offset)
1584                         break
1585                     except HeaderError, e:
1586                         raise ReadError(str(e))
1587
1588             if self.mode in "aw":
1589                 self._loaded = True
1590
1591                 if self.pax_headers:
1592                     buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1593                     self.fileobj.write(buf)
1594                     self.offset += len(buf)
1595         except:
1596             if not self._extfileobj:
1597                 self.fileobj.close()
1598             self.closed = True
1599             raise
1600
1601     def _getposix(self):
1602         return self.format == USTAR_FORMAT
1603     def _setposix(self, value):
1604         import warnings
1605         warnings.warn("use the format attribute instead", DeprecationWarning,
1606                       2)
1607         if value:
1608             self.format = USTAR_FORMAT
1609         else:
1610             self.format = GNU_FORMAT
1611     posix = property(_getposix, _setposix)
1612
1613     #--------------------------------------------------------------------------
1614     # Below are the classmethods which act as alternate constructors to the
1615     # TarFile class. The open() method is the only one that is needed for
1616     # public use; it is the "super"-constructor and is able to select an
1617     # adequate "sub"-constructor for a particular compression using the mapping
1618     # from OPEN_METH.
1619     #
1620     # This concept allows one to subclass TarFile without losing the comfort of
1621     # the super-constructor. A sub-constructor is registered and made available
1622     # by adding it to the mapping in OPEN_METH.
1623
1624     @classmethod
1625     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1626         """Open a tar archive for reading, writing or appending. Return
1627            an appropriate TarFile class.
1628
1629            mode:
1630            'r' or 'r:*' open for reading with transparent compression
1631            'r:'         open for reading exclusively uncompressed
1632            'r:gz'       open for reading with gzip compression
1633            'r:bz2'      open for reading with bzip2 compression
1634            'a' or 'a:'  open for appending, creating the file if necessary
1635            'w' or 'w:'  open for writing without compression
1636            'w:gz'       open for writing with gzip compression
1637            'w:bz2'      open for writing with bzip2 compression
1638
1639            'r|*'        open a stream of tar blocks with transparent compression
1640            'r|'         open an uncompressed stream of tar blocks for reading
1641            'r|gz'       open a gzip compressed stream of tar blocks
1642            'r|bz2'      open a bzip2 compressed stream of tar blocks
1643            'w|'         open an uncompressed stream for writing
1644            'w|gz'       open a gzip compressed stream for writing
1645            'w|bz2'      open a bzip2 compressed stream for writing
1646         """
1647
1648         if not name and not fileobj:
1649             raise ValueError("nothing to open")
1650
1651         if mode in ("r", "r:*"):
1652             # Find out which *open() is appropriate for opening the file.
1653             for comptype in cls.OPEN_METH:
1654                 func = getattr(cls, cls.OPEN_METH[comptype])
1655                 if fileobj is not None:
1656                     saved_pos = fileobj.tell()
1657                 try:
1658                     return func(name, "r", fileobj, **kwargs)
1659                 except (ReadError, CompressionError), e:
1660                     if fileobj is not None:
1661                         fileobj.seek(saved_pos)
1662                     continue
1663             raise ReadError("file could not be opened successfully")
1664
1665         elif ":" in mode:
1666             filemode, comptype = mode.split(":", 1)
1667             filemode = filemode or "r"
1668             comptype = comptype or "tar"
1669
1670             # Select the *open() function according to
1671             # given compression.
1672             if comptype in cls.OPEN_METH:
1673                 func = getattr(cls, cls.OPEN_METH[comptype])
1674             else:
1675                 raise CompressionError("unknown compression type %r" % comptype)
1676             return func(name, filemode, fileobj, **kwargs)
1677
1678         elif "|" in mode:
1679             filemode, comptype = mode.split("|", 1)
1680             filemode = filemode or "r"
1681             comptype = comptype or "tar"
1682
1683             if filemode not in "rw":
1684                 raise ValueError("mode must be 'r' or 'w'")
1685
1686             t = cls(name, filemode,
1687                     _Stream(name, filemode, comptype, fileobj, bufsize),
1688                     **kwargs)
1689             t._extfileobj = False
1690             return t
1691
1692         elif mode in "aw":
1693             return cls.taropen(name, mode, fileobj, **kwargs)
1694
1695         raise ValueError("undiscernible mode")
1696
1697     @classmethod
1698     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1699         """Open uncompressed tar archive name for reading or writing.
1700         """
1701         if len(mode) > 1 or mode not in "raw":
1702             raise ValueError("mode must be 'r', 'a' or 'w'")
1703         return cls(name, mode, fileobj, **kwargs)
1704
1705     @classmethod
1706     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1707         """Open gzip compressed tar archive name for reading or writing.
1708            Appending is not allowed.
1709         """
1710         if len(mode) > 1 or mode not in "rw":
1711             raise ValueError("mode must be 'r' or 'w'")
1712
1713         try:
1714             import gzip
1715             gzip.GzipFile
1716         except (ImportError, AttributeError):
1717             raise CompressionError("gzip module is not available")
1718
1719         if fileobj is None:
1720             fileobj = bltn_open(name, mode + "b")
1721
1722         try:
1723             t = cls.taropen(name, mode,
1724                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1725                 **kwargs)
1726         except IOError:
1727             raise ReadError("not a gzip file")
1728         t._extfileobj = False
1729         return t
1730
1731     @classmethod
1732     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1733         """Open bzip2 compressed tar archive name for reading or writing.
1734            Appending is not allowed.
1735         """
1736         if len(mode) > 1 or mode not in "rw":
1737             raise ValueError("mode must be 'r' or 'w'.")
1738
1739         try:
1740             import bz2
1741         except ImportError:
1742             raise CompressionError("bz2 module is not available")
1743
1744         if fileobj is not None:
1745             fileobj = _BZ2Proxy(fileobj, mode)
1746         else:
1747             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1748
1749         try:
1750             t = cls.taropen(name, mode, fileobj, **kwargs)
1751         except (IOError, EOFError):
1752             raise ReadError("not a bzip2 file")
1753         t._extfileobj = False
1754         return t
1755
1756     # All *open() methods are registered here.
1757     OPEN_METH = {
1758         "tar": "taropen",   # uncompressed tar
1759         "gz":  "gzopen",    # gzip compressed tar
1760         "bz2": "bz2open"    # bzip2 compressed tar
1761     }
1762
1763     #--------------------------------------------------------------------------
1764     # The public methods which TarFile provides:
1765
1766     def close(self):
1767         """Close the TarFile. In write-mode, two finishing zero blocks are
1768            appended to the archive.
1769         """
1770         if self.closed:
1771             return
1772
1773         if self.mode in "aw":
1774             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1775             self.offset += (BLOCKSIZE * 2)
1776             # fill up the end with zero-blocks
1777             # (like option -b20 for tar does)
1778             blocks, remainder = divmod(self.offset, RECORDSIZE)
1779             if remainder > 0:
1780                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1781
1782         if not self._extfileobj:
1783             self.fileobj.close()
1784         self.closed = True
1785
1786     def getmember(self, name):
1787         """Return a TarInfo object for member `name'. If `name' can not be
1788            found in the archive, KeyError is raised. If a member occurs more
1789            than once in the archive, its last occurrence is assumed to be the
1790            most up-to-date version.
1791         """
1792         tarinfo = self._getmember(name)
1793         if tarinfo is None:
1794             raise KeyError("filename %r not found" % name)
1795         return tarinfo
1796
1797     def getmembers(self):
1798         """Return the members of the archive as a list of TarInfo objects. The
1799            list has the same order as the members in the archive.
1800         """
1801         self._check()
1802         if not self._loaded:    # if we want to obtain a list of
1803             self._load()        # all members, we first have to
1804                                 # scan the whole archive.
1805         return self.members
1806
1807     def getnames(self):
1808         """Return the members of the archive as a list of their names. It has
1809            the same order as the list returned by getmembers().
1810         """
1811         return [tarinfo.name for tarinfo in self.getmembers()]
1812
1813     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1814         """Create a TarInfo object for either the file `name' or the file
1815            object `fileobj' (using os.fstat on its file descriptor). You can
1816            modify some of the TarInfo's attributes before you add it using
1817            addfile(). If given, `arcname' specifies an alternative name for the
1818            file in the archive.
1819         """
1820         self._check("aw")
1821
1822         # When fileobj is given, replace name by
1823         # fileobj's real name.
1824         if fileobj is not None:
1825             name = fileobj.name
1826
1827         # Building the name of the member in the archive.
1828         # Backward slashes are converted to forward slashes,
1829         # Absolute paths are turned to relative paths.
1830         if arcname is None:
1831             arcname = name
1832         drv, arcname = os.path.splitdrive(arcname)
1833         arcname = arcname.replace(os.sep, "/")
1834         arcname = arcname.lstrip("/")
1835
1836         # Now, fill the TarInfo object with
1837         # information specific for the file.
1838         tarinfo = self.tarinfo()
1839         tarinfo.tarfile = self
1840
1841         # Use os.stat or os.lstat, depending on platform
1842         # and if symlinks shall be resolved.
1843         if fileobj is None:
1844             if hasattr(os, "lstat") and not self.dereference:
1845                 statres = os.lstat(name)
1846             else:
1847                 statres = os.stat(name)
1848         else:
1849             statres = os.fstat(fileobj.fileno())
1850         linkname = ""
1851
1852         stmd = statres.st_mode
1853         if stat.S_ISREG(stmd):
1854             inode = (statres.st_ino, statres.st_dev)
1855             if not self.dereference and statres.st_nlink > 1 and \
1856                     inode in self.inodes and arcname != self.inodes[inode]:
1857                 # Is it a hardlink to an already
1858                 # archived file?
1859                 type = LNKTYPE
1860                 linkname = self.inodes[inode]
1861             else:
1862                 # The inode is added only if its valid.
1863                 # For win32 it is always 0.
1864                 type = REGTYPE
1865                 if inode[0]:
1866                     self.inodes[inode] = arcname
1867         elif stat.S_ISDIR(stmd):
1868             type = DIRTYPE
1869         elif stat.S_ISFIFO(stmd):
1870             type = FIFOTYPE
1871         elif stat.S_ISLNK(stmd):
1872             type = SYMTYPE
1873             linkname = os.readlink(name)
1874         elif stat.S_ISCHR(stmd):
1875             type = CHRTYPE
1876         elif stat.S_ISBLK(stmd):
1877             type = BLKTYPE
1878         else:
1879             return None
1880
1881         # Fill the TarInfo object with all
1882         # information we can get.
1883         tarinfo.name = arcname
1884         tarinfo.mode = stmd
1885         tarinfo.uid = statres.st_uid
1886         tarinfo.gid = statres.st_gid
1887         if type == REGTYPE:
1888             tarinfo.size = statres.st_size
1889         else:
1890             tarinfo.size = 0L
1891         tarinfo.mtime = statres.st_mtime
1892         tarinfo.type = type
1893         tarinfo.linkname = linkname
1894         if pwd:
1895             try:
1896                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1897             except KeyError:
1898                 pass
1899         if grp:
1900             try:
1901                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1902             except KeyError:
1903                 pass
1904
1905         if type in (CHRTYPE, BLKTYPE):
1906             if hasattr(os, "major") and hasattr(os, "minor"):
1907                 tarinfo.devmajor = os.major(statres.st_rdev)
1908                 tarinfo.devminor = os.minor(statres.st_rdev)
1909         return tarinfo
1910
1911     def list(self, verbose=True):
1912         """Print a table of contents to sys.stdout. If `verbose' is False, only
1913            the names of the members are printed. If it is True, an `ls -l'-like
1914            output is produced.
1915         """
1916         self._check()
1917
1918         for tarinfo in self:
1919             if verbose:
1920                 print filemode(tarinfo.mode),
1921                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1922                                  tarinfo.gname or tarinfo.gid),
1923                 if tarinfo.ischr() or tarinfo.isblk():
1924                     print "%10s" % ("%d,%d" \
1925                                     % (tarinfo.devmajor, tarinfo.devminor)),
1926                 else:
1927                     print "%10d" % tarinfo.size,
1928                 print "%d-%02d-%02d %02d:%02d:%02d" \
1929                       % time.localtime(tarinfo.mtime)[:6],
1930
1931             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1932
1933             if verbose:
1934                 if tarinfo.issym():
1935                     print "->", tarinfo.linkname,
1936                 if tarinfo.islnk():
1937                     print "link to", tarinfo.linkname,
1938             print
1939
1940     def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1941         """Add the file `name' to the archive. `name' may be any type of file
1942            (directory, fifo, symbolic link, etc.). If given, `arcname'
1943            specifies an alternative name for the file in the archive.
1944            Directories are added recursively by default. This can be avoided by
1945            setting `recursive' to False. `exclude' is a function that should
1946            return True for each filename to be excluded. `filter' is a function
1947            that expects a TarInfo object argument and returns the changed
1948            TarInfo object, if it returns None the TarInfo object will be
1949            excluded from the archive.
1950         """
1951         self._check("aw")
1952
1953         if arcname is None:
1954             arcname = name
1955
1956         # Exclude pathnames.
1957         if exclude is not None:
1958             import warnings
1959             warnings.warn("use the filter argument instead",
1960                     DeprecationWarning, 2)
1961             if exclude(name):
1962                 self._dbg(2, "tarfile: Excluded %r" % name)
1963                 return
1964
1965         # Skip if somebody tries to archive the archive...
1966         if self.name is not None and os.path.abspath(name) == self.name:
1967             self._dbg(2, "tarfile: Skipped %r" % name)
1968             return
1969
1970         self._dbg(1, name)
1971
1972         # Create a TarInfo object from the file.
1973         tarinfo = self.gettarinfo(name, arcname)
1974
1975         if tarinfo is None:
1976             self._dbg(1, "tarfile: Unsupported type %r" % name)
1977             return
1978
1979         # Change or exclude the TarInfo object.
1980         if filter is not None:
1981             tarinfo = filter(tarinfo)
1982             if tarinfo is None:
1983                 self._dbg(2, "tarfile: Excluded %r" % name)
1984                 return
1985
1986         # Append the tar header and data to the archive.
1987         if tarinfo.isreg():
1988             f = bltn_open(name, "rb")
1989             self.addfile(tarinfo, f)
1990             f.close()
1991
1992         elif tarinfo.isdir():
1993             self.addfile(tarinfo)
1994             if recursive:
1995                 for f in os.listdir(name):
1996                     self.add(os.path.join(name, f), os.path.join(arcname, f),
1997                             recursive, exclude, filter)
1998
1999         else:
2000             self.addfile(tarinfo)
2001
2002     def addfile(self, tarinfo, fileobj=None):
2003         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2004            given, tarinfo.size bytes are read from it and added to the archive.
2005            You can create TarInfo objects using gettarinfo().
2006            On Windows platforms, `fileobj' should always be opened with mode
2007            'rb' to avoid irritation about the file size.
2008         """
2009         self._check("aw")
2010
2011         tarinfo = copy.copy(tarinfo)
2012
2013         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2014         self.fileobj.write(buf)
2015         self.offset += len(buf)
2016
2017         # If there's data to follow, append it.
2018         if fileobj is not None:
2019             copyfileobj(fileobj, self.fileobj, tarinfo.size)
2020             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2021             if remainder > 0:
2022                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2023                 blocks += 1
2024             self.offset += blocks * BLOCKSIZE
2025
2026         self.members.append(tarinfo)
2027
2028     def extractall(self, path=".", members=None):
2029         """Extract all members from the archive to the current working
2030            directory and set owner, modification time and permissions on
2031            directories afterwards. `path' specifies a different directory
2032            to extract to. `members' is optional and must be a subset of the
2033            list returned by getmembers().
2034         """
2035         directories = []
2036
2037         if members is None:
2038             members = self
2039
2040         for tarinfo in members:
2041             if tarinfo.isdir():
2042                 # Extract directories with a safe mode.
2043                 directories.append(tarinfo)
2044                 tarinfo = copy.copy(tarinfo)
2045                 tarinfo.mode = 0700
2046             self.extract(tarinfo, path)
2047
2048         # Reverse sort directories.
2049         directories.sort(key=operator.attrgetter('name'))
2050         directories.reverse()
2051
2052         # Set correct owner, mtime and filemode on directories.
2053         for tarinfo in directories:
2054             dirpath = os.path.join(path, tarinfo.name)
2055             try:
2056                 self.chown(tarinfo, dirpath)
2057                 self.utime(tarinfo, dirpath)
2058                 self.chmod(tarinfo, dirpath)
2059             except ExtractError, e:
2060                 if self.errorlevel > 1:
2061                     raise
2062                 else:
2063                     self._dbg(1, "tarfile: %s" % e)
2064
2065     def extract(self, member, path=""):
2066         """Extract a member from the archive to the current working directory,
2067            using its full name. Its file information is extracted as accurately
2068            as possible. `member' may be a filename or a TarInfo object. You can
2069            specify a different directory using `path'.
2070         """
2071         self._check("r")
2072
2073         if isinstance(member, basestring):
2074             tarinfo = self.getmember(member)
2075         else:
2076             tarinfo = member
2077
2078         # Prepare the link target for makelink().
2079         if tarinfo.islnk():
2080             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2081
2082         try:
2083             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2084         except EnvironmentError, e:
2085             if self.errorlevel > 0:
2086                 raise
2087             else:
2088                 if e.filename is None:
2089                     self._dbg(1, "tarfile: %s" % e.strerror)
2090                 else:
2091                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2092         except ExtractError, e:
2093             if self.errorlevel > 1:
2094                 raise
2095             else:
2096                 self._dbg(1, "tarfile: %s" % e)
2097
2098     def extractfile(self, member):
2099         """Extract a member from the archive as a file object. `member' may be
2100            a filename or a TarInfo object. If `member' is a regular file, a
2101            file-like object is returned. If `member' is a link, a file-like
2102            object is constructed from the link's target. If `member' is none of
2103            the above, None is returned.
2104            The file-like object is read-only and provides the following
2105            methods: read(), readline(), readlines(), seek() and tell()
2106         """
2107         self._check("r")
2108
2109         if isinstance(member, basestring):
2110             tarinfo = self.getmember(member)
2111         else:
2112             tarinfo = member
2113
2114         if tarinfo.isreg():
2115             return self.fileobject(self, tarinfo)
2116
2117         elif tarinfo.type not in SUPPORTED_TYPES:
2118             # If a member's type is unknown, it is treated as a
2119             # regular file.
2120             return self.fileobject(self, tarinfo)
2121
2122         elif tarinfo.islnk() or tarinfo.issym():
2123             if isinstance(self.fileobj, _Stream):
2124                 # A small but ugly workaround for the case that someone tries
2125                 # to extract a (sym)link as a file-object from a non-seekable
2126                 # stream of tar blocks.
2127                 raise StreamError("cannot extract (sym)link as file object")
2128             else:
2129                 # A (sym)link's file object is its target's file object.
2130                 return self.extractfile(self._find_link_target(tarinfo))
2131         else:
2132             # If there's no data associated with the member (directory, chrdev,
2133             # blkdev, etc.), return None instead of a file object.
2134             return None
2135
2136     def _extract_member(self, tarinfo, targetpath):
2137         """Extract the TarInfo object tarinfo to a physical
2138            file called targetpath.
2139         """
2140         # Fetch the TarInfo object for the given name
2141         # and build the destination pathname, replacing
2142         # forward slashes to platform specific separators.
2143         targetpath = targetpath.rstrip("/")
2144         targetpath = targetpath.replace("/", os.sep)
2145
2146         # Create all upper directories.
2147         upperdirs = os.path.dirname(targetpath)
2148         if upperdirs and not os.path.exists(upperdirs):
2149             # Create directories that are not part of the archive with
2150             # default permissions.
2151             os.makedirs(upperdirs)
2152
2153         if tarinfo.islnk() or tarinfo.issym():
2154             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2155         else:
2156             self._dbg(1, tarinfo.name)
2157
2158         if tarinfo.isreg():
2159             self.makefile(tarinfo, targetpath)
2160         elif tarinfo.isdir():
2161             self.makedir(tarinfo, targetpath)
2162         elif tarinfo.isfifo():
2163             self.makefifo(tarinfo, targetpath)
2164         elif tarinfo.ischr() or tarinfo.isblk():
2165             self.makedev(tarinfo, targetpath)
2166         elif tarinfo.islnk() or tarinfo.issym():
2167             self.makelink(tarinfo, targetpath)
2168         elif tarinfo.type not in SUPPORTED_TYPES:
2169             self.makeunknown(tarinfo, targetpath)
2170         else:
2171             self.makefile(tarinfo, targetpath)
2172
2173         self.chown(tarinfo, targetpath)
2174         if not tarinfo.issym():
2175             self.chmod(tarinfo, targetpath)
2176             self.utime(tarinfo, targetpath)
2177
2178     #--------------------------------------------------------------------------
2179     # Below are the different file methods. They are called via
2180     # _extract_member() when extract() is called. They can be replaced in a
2181     # subclass to implement other functionality.
2182
2183     def makedir(self, tarinfo, targetpath):
2184         """Make a directory called targetpath.
2185         """
2186         try:
2187             # Use a safe mode for the directory, the real mode is set
2188             # later in _extract_member().
2189             os.mkdir(targetpath, 0700)
2190         except EnvironmentError, e:
2191             if e.errno != errno.EEXIST:
2192                 raise
2193
2194     def makefile(self, tarinfo, targetpath):
2195         """Make a file called targetpath.
2196         """
2197         source = self.extractfile(tarinfo)
2198         target = bltn_open(targetpath, "wb")
2199         copyfileobj(source, target)
2200         source.close()
2201         target.close()
2202
2203     def makeunknown(self, tarinfo, targetpath):
2204         """Make a file from a TarInfo object with an unknown type
2205            at targetpath.
2206         """
2207         self.makefile(tarinfo, targetpath)
2208         self._dbg(1, "tarfile: Unknown file type %r, " \
2209                      "extracted as regular file." % tarinfo.type)
2210
2211     def makefifo(self, tarinfo, targetpath):
2212         """Make a fifo called targetpath.
2213         """
2214         if hasattr(os, "mkfifo"):
2215             os.mkfifo(targetpath)
2216         else:
2217             raise ExtractError("fifo not supported by system")
2218
2219     def makedev(self, tarinfo, targetpath):
2220         """Make a character or block device called targetpath.
2221         """
2222         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2223             raise ExtractError("special devices not supported by system")
2224
2225         mode = tarinfo.mode
2226         if tarinfo.isblk():
2227             mode |= stat.S_IFBLK
2228         else:
2229             mode |= stat.S_IFCHR
2230
2231         os.mknod(targetpath, mode,
2232                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2233
2234     def makelink(self, tarinfo, targetpath):
2235         """Make a (symbolic) link called targetpath. If it cannot be created
2236           (platform limitation), we try to make a copy of the referenced file
2237           instead of a link.
2238         """
2239         if hasattr(os, "symlink") and hasattr(os, "link"):
2240             # For systems that support symbolic and hard links.
2241             if tarinfo.issym():
2242                 os.symlink(tarinfo.linkname, targetpath)
2243             else:
2244                 # See extract().
2245                 if os.path.exists(tarinfo._link_target):
2246                     os.link(tarinfo._link_target, targetpath)
2247                 else:
2248                     self._extract_member(self._find_link_target(tarinfo), targetpath)
2249         else:
2250             try:
2251                 self._extract_member(self._find_link_target(tarinfo), targetpath)
2252             except KeyError:
2253                 raise ExtractError("unable to resolve link inside archive")
2254
2255     def chown(self, tarinfo, targetpath):
2256         """Set owner of targetpath according to tarinfo.
2257         """
2258         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2259             # We have to be root to do so.
2260             try:
2261                 g = grp.getgrnam(tarinfo.gname)[2]
2262             except KeyError:
2263                 try:
2264                     g = grp.getgrgid(tarinfo.gid)[2]
2265                 except KeyError:
2266                     g = os.getgid()
2267             try:
2268                 u = pwd.getpwnam(tarinfo.uname)[2]
2269             except KeyError:
2270                 try:
2271                     u = pwd.getpwuid(tarinfo.uid)[2]
2272                 except KeyError:
2273                     u = os.getuid()
2274             try:
2275                 if tarinfo.issym() and hasattr(os, "lchown"):
2276                     os.lchown(targetpath, u, g)
2277                 else:
2278                     if sys.platform != "os2emx":
2279                         os.chown(targetpath, u, g)
2280             except EnvironmentError, e:
2281                 raise ExtractError("could not change owner")
2282
2283     def chmod(self, tarinfo, targetpath):
2284         """Set file permissions of targetpath according to tarinfo.
2285         """
2286         if hasattr(os, 'chmod'):
2287             try:
2288                 os.chmod(targetpath, tarinfo.mode)
2289             except EnvironmentError, e:
2290                 raise ExtractError("could not change mode")
2291
2292     def utime(self, tarinfo, targetpath):
2293         """Set modification time of targetpath according to tarinfo.
2294         """
2295         if not hasattr(os, 'utime'):
2296             return
2297         try:
2298             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2299         except EnvironmentError, e:
2300             raise ExtractError("could not change modification time")
2301
2302     #--------------------------------------------------------------------------
2303     def next(self):
2304         """Return the next member of the archive as a TarInfo object, when
2305            TarFile is opened for reading. Return None if there is no more
2306            available.
2307         """
2308         self._check("ra")
2309         if self.firstmember is not None:
2310             m = self.firstmember
2311             self.firstmember = None
2312             return m
2313
2314         # Read the next block.
2315         self.fileobj.seek(self.offset)
2316         tarinfo = None
2317         while True:
2318             try:
2319                 tarinfo = self.tarinfo.fromtarfile(self)
2320             except EOFHeaderError, e:
2321                 if self.ignore_zeros:
2322                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2323                     self.offset += BLOCKSIZE
2324                     continue
2325             except InvalidHeaderError, e:
2326                 if self.ignore_zeros:
2327                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2328                     self.offset += BLOCKSIZE
2329                     continue
2330                 elif self.offset == 0:
2331                     raise ReadError(str(e))
2332             except EmptyHeaderError:
2333                 if self.offset == 0:
2334                     raise ReadError("empty file")
2335             except TruncatedHeaderError, e:
2336                 if self.offset == 0:
2337                     raise ReadError(str(e))
2338             except SubsequentHeaderError, e:
2339                 raise ReadError(str(e))
2340             break
2341
2342         if tarinfo is not None:
2343             self.members.append(tarinfo)
2344         else:
2345             self._loaded = True
2346
2347         return tarinfo
2348
2349     #--------------------------------------------------------------------------
2350     # Little helper methods:
2351
2352     def _getmember(self, name, tarinfo=None, normalize=False):
2353         """Find an archive member by name from bottom to top.
2354            If tarinfo is given, it is used as the starting point.
2355         """
2356         # Ensure that all members have been loaded.
2357         members = self.getmembers()
2358
2359         # Limit the member search list up to tarinfo.
2360         if tarinfo is not None:
2361             members = members[:members.index(tarinfo)]
2362
2363         if normalize:
2364             name = os.path.normpath(name)
2365
2366         for member in reversed(members):
2367             if normalize:
2368                 member_name = os.path.normpath(member.name)
2369             else:
2370                 member_name = member.name
2371
2372             if name == member_name:
2373                 return member
2374
2375     def _load(self):
2376         """Read through the entire archive file and look for readable
2377            members.
2378         """
2379         while True:
2380             tarinfo = self.next()
2381             if tarinfo is None:
2382                 break
2383         self._loaded = True
2384
2385     def _check(self, mode=None):
2386         """Check if TarFile is still open, and if the operation's mode
2387            corresponds to TarFile's mode.
2388         """
2389         if self.closed:
2390             raise IOError("%s is closed" % self.__class__.__name__)
2391         if mode is not None and self.mode not in mode:
2392             raise IOError("bad operation for mode %r" % self.mode)
2393
2394     def _find_link_target(self, tarinfo):
2395         """Find the target member of a symlink or hardlink member in the
2396            archive.
2397         """
2398         if tarinfo.issym():
2399             # Always search the entire archive.
2400             linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
2401             limit = None
2402         else:
2403             # Search the archive before the link, because a hard link is
2404             # just a reference to an already archived file.
2405             linkname = tarinfo.linkname
2406             limit = tarinfo
2407
2408         member = self._getmember(linkname, tarinfo=limit, normalize=True)
2409         if member is None:
2410             raise KeyError("linkname %r not found" % linkname)
2411         return member
2412
2413     def __iter__(self):
2414         """Provide an iterator object.
2415         """
2416         if self._loaded:
2417             return iter(self.members)
2418         else:
2419             return TarIter(self)
2420
2421     def _dbg(self, level, msg):
2422         """Write debugging output to sys.stderr.
2423         """
2424         if level <= self.debug:
2425             print >> sys.stderr, msg
2426
2427     def __enter__(self):
2428         self._check()
2429         return self
2430
2431     def __exit__(self, type, value, traceback):
2432         if type is None:
2433             self.close()
2434         else:
2435             # An exception occurred. We must not call close() because
2436             # it would try to write end-of-archive blocks and padding.
2437             if not self._extfileobj:
2438                 self.fileobj.close()
2439             self.closed = True
2440 # class TarFile
2441
2442 class TarIter:
2443     """Iterator Class.
2444
2445        for tarinfo in TarFile(...):
2446            suite...
2447     """
2448
2449     def __init__(self, tarfile):
2450         """Construct a TarIter object.
2451         """
2452         self.tarfile = tarfile
2453         self.index = 0
2454     def __iter__(self):
2455         """Return iterator object.
2456         """
2457         return self
2458     def next(self):
2459         """Return the next item using TarFile's next() method.
2460            When all members have been read, set TarFile as _loaded.
2461         """
2462         # Fix for SF #1100429: Under rare circumstances it can
2463         # happen that getmembers() is called during iteration,
2464         # which will cause TarIter to stop prematurely.
2465         if not self.tarfile._loaded:
2466             tarinfo = self.tarfile.next()
2467             if not tarinfo:
2468                 self.tarfile._loaded = True
2469                 raise StopIteration
2470         else:
2471             try:
2472                 tarinfo = self.tarfile.members[self.index]
2473             except IndexError:
2474                 raise StopIteration
2475         self.index += 1
2476         return tarinfo
2477
2478 # Helper classes for sparse file support
2479 class _section:
2480     """Base class for _data and _hole.
2481     """
2482     def __init__(self, offset, size):
2483         self.offset = offset
2484         self.size = size
2485     def __contains__(self, offset):
2486         return self.offset <= offset < self.offset + self.size
2487
2488 class _data(_section):
2489     """Represent a data section in a sparse file.
2490     """
2491     def __init__(self, offset, size, realpos):
2492         _section.__init__(self, offset, size)
2493         self.realpos = realpos
2494
2495 class _hole(_section):
2496     """Represent a hole section in a sparse file.
2497     """
2498     pass
2499
2500 class _ringbuffer(list):
2501     """Ringbuffer class which increases performance
2502        over a regular list.
2503     """
2504     def __init__(self):
2505         self.idx = 0
2506     def find(self, offset):
2507         idx = self.idx
2508         while True:
2509             item = self[idx]
2510             if offset in item:
2511                 break
2512             idx += 1
2513             if idx == len(self):
2514                 idx = 0
2515             if idx == self.idx:
2516                 # End of File
2517                 return None
2518         self.idx = idx
2519         return item
2520
2521 #---------------------------------------------
2522 # zipfile compatible TarFile class
2523 #---------------------------------------------
2524 TAR_PLAIN = 0           # zipfile.ZIP_STORED
2525 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2526 class TarFileCompat:
2527     """TarFile class compatible with standard module zipfile's
2528        ZipFile class.
2529     """
2530     def __init__(self, file, mode="r", compression=TAR_PLAIN):
2531         from warnings import warnpy3k
2532         warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2533                 stacklevel=2)
2534         if compression == TAR_PLAIN:
2535             self.tarfile = TarFile.taropen(file, mode)
2536         elif compression == TAR_GZIPPED:
2537             self.tarfile = TarFile.gzopen(file, mode)
2538         else:
2539             raise ValueError("unknown compression constant")
2540         if mode[0:1] == "r":
2541             members = self.tarfile.getmembers()
2542             for m in members:
2543                 m.filename = m.name
2544                 m.file_size = m.size
2545                 m.date_time = time.gmtime(m.mtime)[:6]
2546     def namelist(self):
2547         return map(lambda m: m.name, self.infolist())
2548     def infolist(self):
2549         return filter(lambda m: m.type in REGULAR_TYPES,
2550                       self.tarfile.getmembers())
2551     def printdir(self):
2552         self.tarfile.list()
2553     def testzip(self):
2554         return
2555     def getinfo(self, name):
2556         return self.tarfile.getmember(name)
2557     def read(self, name):
2558         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2559     def write(self, filename, arcname=None, compress_type=None):
2560         self.tarfile.add(filename, arcname)
2561     def writestr(self, zinfo, bytes):
2562         try:
2563             from cStringIO import StringIO
2564         except ImportError:
2565             from StringIO import StringIO
2566         import calendar
2567         tinfo = TarInfo(zinfo.filename)
2568         tinfo.size = len(bytes)
2569         tinfo.mtime = calendar.timegm(zinfo.date_time)
2570         self.tarfile.addfile(tinfo, StringIO(bytes))
2571     def close(self):
2572         self.tarfile.close()
2573 #class TarFileCompat
2574
2575 #--------------------
2576 # exported functions
2577 #--------------------
2578 def is_tarfile(name):
2579     """Return True if name points to a tar archive that we
2580        are able to handle, else return False.
2581     """
2582     try:
2583         t = open(name)
2584         t.close()
2585         return True
2586     except TarError:
2587         return False
2588
2589 bltn_open = open
2590 open = TarFile.open