Lib/tarfile.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 #-------------------------------------------------------------------
   4 # tarfile.py
   5 #-------------------------------------------------------------------
   6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7 # All rights reserved.
   8 #
   9 # Permission  is  hereby granted,  free  of charge,  to  any person
  10 # obtaining a  copy of  this software  and associated documentation
  11 # files  (the  "Software"),  to   deal  in  the  Software   without
  12 # restriction,  including  without limitation  the  rights to  use,
  13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 # copies  of  the  Software,  and to  permit  persons  to  whom the
  15 # Software  is  furnished  to  do  so,  subject  to  the  following
  16 # conditions:
  17 #
  18 # The above copyright  notice and this  permission notice shall  be
  19 # included in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 # OTHER DEALINGS IN THE SOFTWARE.
  29 #
  30 """Read from and write to tar format archives.
  31 """
  32
  33 __version__ = "$Revision$"
  34 # $Source$
  35
  36 version     = "0.9.0"
  37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38 __date__    = "$Date$"
  39 __cvsid__   = "$Id$"
  40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42 #---------
  43 # Imports
  44 #---------
  45 import sys
  46 import os
  47 import shutil
  48 import stat
  49 import errno
  50 import time
  51 import struct
  52 import copy
  53 import re
  54 import operator
  55
  56 if sys.platform == 'mac':
  57     # This module needs work for MacOS9, especially in the area of pathname
  58     # handling. In many places it is assumed a simple substitution of / by the
  59     # local os.path.sep is good enough to convert pathnames, but this does not
  60     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  61     raise ImportError, "tarfile does not work for platform==mac"
  62
  63 try:
  64     import grp, pwd
  65 except ImportError:
  66     grp = pwd = None
  67
  68 # from tarfile import *
  69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  70
  71 #---------------------------------------------------------
  72 # tar constants
  73 #---------------------------------------------------------
  74 NUL = "\0"                      # the null character
  75 BLOCKSIZE = 512                 # length of processing blocks
  76 RECORDSIZE = BLOCKSIZE * 20     # length of records
  77 GNU_MAGIC = "ustar  \0"         # magic gnu tar string
  78 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
  79
  80 LENGTH_NAME = 100               # maximum length of a filename
  81 LENGTH_LINK = 100               # maximum length of a linkname
  82 LENGTH_PREFIX = 155             # maximum length of the prefix field
  83
  84 REGTYPE = "0"                   # regular file
  85 AREGTYPE = "\0"                 # regular file
  86 LNKTYPE = "1"                   # link (inside tarfile)
  87 SYMTYPE = "2"                   # symbolic link
  88 CHRTYPE = "3"                   # character special device
  89 BLKTYPE = "4"                   # block special device
  90 DIRTYPE = "5"                   # directory
  91 FIFOTYPE = "6"                  # fifo special device
  92 CONTTYPE = "7"                  # contiguous file
  93
  94 GNUTYPE_LONGNAME = "L"          # GNU tar longname
  95 GNUTYPE_LONGLINK = "K"          # GNU tar longlink
  96 GNUTYPE_SPARSE = "S"            # GNU tar sparse file
  97
  98 XHDTYPE = "x"                   # POSIX.1-2001 extended header
  99 XGLTYPE = "g"                   # POSIX.1-2001 global header
 100 SOLARIS_XHDTYPE = "X"           # Solaris extended header
 101
 102 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 103 GNU_FORMAT = 1                  # GNU tar format
 104 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 105 DEFAULT_FORMAT = GNU_FORMAT
 106
 107 #---------------------------------------------------------
 108 # tarfile constants
 109 #---------------------------------------------------------
 110 # File types that tarfile supports:
 111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 112                    SYMTYPE, DIRTYPE, FIFOTYPE,
 113                    CONTTYPE, CHRTYPE, BLKTYPE,
 114                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 115                    GNUTYPE_SPARSE)
 116
 117 # File types that will be treated as a regular file.
 118 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 119                  CONTTYPE, GNUTYPE_SPARSE)
 120
 121 # File types that are part of the GNU tar format.
 122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 123              GNUTYPE_SPARSE)
 124
 125 # Fields from a pax header that override a TarInfo attribute.
 126 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 127               "uid", "gid", "uname", "gname")
 128
 129 # Fields in a pax header that are numbers, all other fields
 130 # are treated as strings.
 131 PAX_NUMBER_FIELDS = {
 132     "atime": float,
 133     "ctime": float,
 134     "mtime": float,
 135     "uid": int,
 136     "gid": int,
 137     "size": int
 138 }
 139
 140 #---------------------------------------------------------
 141 # Bits used in the mode field, values in octal.
 142 #---------------------------------------------------------
 143 S_IFLNK = 0120000        # symbolic link
 144 S_IFREG = 0100000        # regular file
 145 S_IFBLK = 0060000        # block device
 146 S_IFDIR = 0040000        # directory
 147 S_IFCHR = 0020000        # character device
 148 S_IFIFO = 0010000        # fifo
 149
 150 TSUID   = 04000          # set UID on execution
 151 TSGID   = 02000          # set GID on execution
 152 TSVTX   = 01000          # reserved
 153
 154 TUREAD  = 0400           # read by owner
 155 TUWRITE = 0200           # write by owner
 156 TUEXEC  = 0100           # execute/search by owner
 157 TGREAD  = 0040           # read by group
 158 TGWRITE = 0020           # write by group
 159 TGEXEC  = 0010           # execute/search by group
 160 TOREAD  = 0004           # read by other
 161 TOWRITE = 0002           # write by other
 162 TOEXEC  = 0001           # execute/search by other
 163
 164 #---------------------------------------------------------
 165 # initialization
 166 #---------------------------------------------------------
 167 ENCODING = sys.getfilesystemencoding()
 168 if ENCODING is None:
 169     ENCODING = sys.getdefaultencoding()
 170
 171 #---------------------------------------------------------
 172 # Some useful functions
 173 #---------------------------------------------------------
 174
 175 def stn(s, length):
 176     """Convert a python string to a null-terminated string buffer.
 177     """
 178     return s[:length] + (length - len(s)) * NUL
 179
 180 def nts(s):
 181     """Convert a null-terminated string field to a python string.
 182     """
 183     # Use the string up to the first null char.
 184     p = s.find("\0")
 185     if p == -1:
 186         return s
 187     return s[:p]
 188
 189 def nti(s):
 190     """Convert a number field to a python number.
 191     """
 192     # There are two possible encodings for a number field, see
 193     # itn() below.
 194     if s[0] != chr(0200):
 195         try:
 196             n = int(nts(s) or "0", 8)
 197         except ValueError:
 198             raise HeaderError("invalid header")
 199     else:
 200         n = 0L
 201         for i in xrange(len(s) - 1):
 202             n <<= 8
 203             n += ord(s[i + 1])
 204     return n
 205
 206 def itn(n, digits=8, format=DEFAULT_FORMAT):
 207     """Convert a python number to a number field.
 208     """
 209     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 210     # octal digits followed by a null-byte, this allows values up to
 211     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 212     # that if necessary. A leading 0200 byte indicates this particular
 213     # encoding, the following digits-1 bytes are a big-endian
 214     # representation. This allows values up to (256**(digits-1))-1.
 215     if 0 <= n < 8 ** (digits - 1):
 216         s = "%0*o" % (digits - 1, n) + NUL
 217     else:
 218         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 219             raise ValueError("overflow in number field")
 220
 221         if n < 0:
 222             # XXX We mimic GNU tar's behaviour with negative numbers,
 223             # this could raise OverflowError.
 224             n = struct.unpack("L", struct.pack("l", n))[0]
 225
 226         s = ""
 227         for i in xrange(digits - 1):
 228             s = chr(n & 0377) + s
 229             n >>= 8
 230         s = chr(0200) + s
 231     return s
 232
 233 def uts(s, encoding, errors):
 234     """Convert a unicode object to a string.
 235     """
 236     if errors == "utf-8":
 237         # An extra error handler similar to the -o invalid=UTF-8 option
 238         # in POSIX.1-2001. Replace untranslatable characters with their
 239         # UTF-8 representation.
 240         try:
 241             return s.encode(encoding, "strict")
 242         except UnicodeEncodeError:
 243             x = []
 244             for c in s:
 245                 try:
 246                     x.append(c.encode(encoding, "strict"))
 247                 except UnicodeEncodeError:
 248                     x.append(c.encode("utf8"))
 249             return "".join(x)
 250     else:
 251         return s.encode(encoding, errors)
 252
 253 def calc_chksums(buf):
 254     """Calculate the checksum for a member's header by summing up all
 255        characters except for the chksum field which is treated as if
 256        it was filled with spaces. According to the GNU tar sources,
 257        some tars (Sun and NeXT) calculate chksum with signed char,
 258        which will be different if there are chars in the buffer with
 259        the high bit set. So we calculate two checksums, unsigned and
 260        signed.
 261     """
 262     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 263     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 264     return unsigned_chksum, signed_chksum
 265
 266 def copyfileobj(src, dst, length=None):
 267     """Copy length bytes from fileobj src to fileobj dst.
 268        If length is None, copy the entire content.
 269     """
 270     if length == 0:
 271         return
 272     if length is None:
 273         shutil.copyfileobj(src, dst)
 274         return
 275
 276     BUFSIZE = 16 * 1024
 277     blocks, remainder = divmod(length, BUFSIZE)
 278     for b in xrange(blocks):
 279         buf = src.read(BUFSIZE)
 280         if len(buf) < BUFSIZE:
 281             raise IOError("end of file reached")
 282         dst.write(buf)
 283
 284     if remainder != 0:
 285         buf = src.read(remainder)
 286         if len(buf) < remainder:
 287             raise IOError("end of file reached")
 288         dst.write(buf)
 289     return
 290
 291 filemode_table = (
 292     ((S_IFLNK,      "l"),
 293      (S_IFREG,      "-"),
 294      (S_IFBLK,      "b"),
 295      (S_IFDIR,      "d"),
 296      (S_IFCHR,      "c"),
 297      (S_IFIFO,      "p")),
 298
 299     ((TUREAD,       "r"),),
 300     ((TUWRITE,      "w"),),
 301     ((TUEXEC|TSUID, "s"),
 302      (TSUID,        "S"),
 303      (TUEXEC,       "x")),
 304
 305     ((TGREAD,       "r"),),
 306     ((TGWRITE,      "w"),),
 307     ((TGEXEC|TSGID, "s"),
 308      (TSGID,        "S"),
 309      (TGEXEC,       "x")),
 310
 311     ((TOREAD,       "r"),),
 312     ((TOWRITE,      "w"),),
 313     ((TOEXEC|TSVTX, "t"),
 314      (TSVTX,        "T"),
 315      (TOEXEC,       "x"))
 316 )
 317
 318 def filemode(mode):
 319     """Convert a file's mode to a string of the form
 320        -rwxrwxrwx.
 321        Used by TarFile.list()
 322     """
 323     perm = []
 324     for table in filemode_table:
 325         for bit, char in table:
 326             if mode & bit == bit:
 327                 perm.append(char)
 328                 break
 329         else:
 330             perm.append("-")
 331     return "".join(perm)
 332
 333 if os.sep != "/":
 334     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 335 else:
 336     normpath = os.path.normpath
 337
 338 class TarError(Exception):
 339     """Base exception."""
 340     pass
 341 class ExtractError(TarError):
 342     """General exception for extract errors."""
 343     pass
 344 class ReadError(TarError):
 345     """Exception for unreadble tar archives."""
 346     pass
 347 class CompressionError(TarError):
 348     """Exception for unavailable compression methods."""
 349     pass
 350 class StreamError(TarError):
 351     """Exception for unsupported operations on stream-like TarFiles."""
 352     pass
 353 class HeaderError(TarError):
 354     """Exception for invalid headers."""
 355     pass
 356
 357 #---------------------------
 358 # internal stream interface
 359 #---------------------------
 360 class _LowLevelFile:
 361     """Low-level file object. Supports reading and writing.
 362        It is used instead of a regular file object for streaming
 363        access.
 364     """
 365
 366     def __init__(self, name, mode):
 367         mode = {
 368             "r": os.O_RDONLY,
 369             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 370         }[mode]
 371         if hasattr(os, "O_BINARY"):
 372             mode |= os.O_BINARY
 373         self.fd = os.open(name, mode)
 374
 375     def close(self):
 376         os.close(self.fd)
 377
 378     def read(self, size):
 379         return os.read(self.fd, size)
 380
 381     def write(self, s):
 382         os.write(self.fd, s)
 383
 384 class _Stream:
 385     """Class that serves as an adapter between TarFile and
 386        a stream-like object.  The stream-like object only
 387        needs to have a read() or write() method and is accessed
 388        blockwise.  Use of gzip or bzip2 compression is possible.
 389        A stream-like object could be for example: sys.stdin,
 390        sys.stdout, a socket, a tape device etc.
 391
 392        _Stream is intended to be used only internally.
 393     """
 394
 395     def __init__(self, name, mode, comptype, fileobj, bufsize):
 396         """Construct a _Stream object.
 397         """
 398         self._extfileobj = True
 399         if fileobj is None:
 400             fileobj = _LowLevelFile(name, mode)
 401             self._extfileobj = False
 402
 403         if comptype == '*':
 404             # Enable transparent compression detection for the
 405             # stream interface
 406             fileobj = _StreamProxy(fileobj)
 407             comptype = fileobj.getcomptype()
 408
 409         self.name     = name or ""
 410         self.mode     = mode
 411         self.comptype = comptype
 412         self.fileobj  = fileobj
 413         self.bufsize  = bufsize
 414         self.buf      = ""
 415         self.pos      = 0L
 416         self.closed   = False
 417
 418         if comptype == "gz":
 419             try:
 420                 import zlib
 421             except ImportError:
 422                 raise CompressionError("zlib module is not available")
 423             self.zlib = zlib
 424             self.crc = zlib.crc32("") & 0xffffffffL
 425             if mode == "r":
 426                 self._init_read_gz()
 427             else:
 428                 self._init_write_gz()
 429
 430         if comptype == "bz2":
 431             try:
 432                 import bz2
 433             except ImportError:
 434                 raise CompressionError("bz2 module is not available")
 435             if mode == "r":
 436                 self.dbuf = ""
 437                 self.cmp = bz2.BZ2Decompressor()
 438             else:
 439                 self.cmp = bz2.BZ2Compressor()
 440
 441     def __del__(self):
 442         if hasattr(self, "closed") and not self.closed:
 443             self.close()
 444
 445     def _init_write_gz(self):
 446         """Initialize for writing with gzip compression.
 447         """
 448         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 449                                             -self.zlib.MAX_WBITS,
 450                                             self.zlib.DEF_MEM_LEVEL,
 451                                             0)
 452         timestamp = struct.pack("<L", long(time.time()))
 453         self.__write("\037\213\010\010%s\002\377" % timestamp)
 454         if self.name.endswith(".gz"):
 455             self.name = self.name[:-3]
 456         self.__write(self.name + NUL)
 457
 458     def write(self, s):
 459         """Write string s to the stream.
 460         """
 461         if self.comptype == "gz":
 462             self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
 463         self.pos += len(s)
 464         if self.comptype != "tar":
 465             s = self.cmp.compress(s)
 466         self.__write(s)
 467
 468     def __write(self, s):
 469         """Write string s to the stream if a whole new block
 470            is ready to be written.
 471         """
 472         self.buf += s
 473         while len(self.buf) > self.bufsize:
 474             self.fileobj.write(self.buf[:self.bufsize])
 475             self.buf = self.buf[self.bufsize:]
 476
 477     def close(self):
 478         """Close the _Stream object. No operation should be
 479            done on it afterwards.
 480         """
 481         if self.closed:
 482             return
 483
 484         if self.mode == "w" and self.comptype != "tar":
 485             self.buf += self.cmp.flush()
 486
 487         if self.mode == "w" and self.buf:
 488             self.fileobj.write(self.buf)
 489             self.buf = ""
 490             if self.comptype == "gz":
 491                 # The native zlib crc is an unsigned 32-bit integer, but
 492                 # the Python wrapper implicitly casts that to a signed C
 493                 # long.  So, on a 32-bit box self.crc may "look negative",
 494                 # while the same crc on a 64-bit box may "look positive".
 495                 # To avoid irksome warnings from the `struct` module, force
 496                 # it to look positive on all boxes.
 497                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
 498                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 499
 500         if not self._extfileobj:
 501             self.fileobj.close()
 502
 503         self.closed = True
 504
 505     def _init_read_gz(self):
 506         """Initialize for reading a gzip compressed fileobj.
 507         """
 508         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 509         self.dbuf = ""
 510
 511         # taken from gzip.GzipFile with some alterations
 512         if self.__read(2) != "\037\213":
 513             raise ReadError("not a gzip file")
 514         if self.__read(1) != "\010":
 515             raise CompressionError("unsupported compression method")
 516
 517         flag = ord(self.__read(1))
 518         self.__read(6)
 519
 520         if flag & 4:
 521             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 522             self.read(xlen)
 523         if flag & 8:
 524             while True:
 525                 s = self.__read(1)
 526                 if not s or s == NUL:
 527                     break
 528         if flag & 16:
 529             while True:
 530                 s = self.__read(1)
 531                 if not s or s == NUL:
 532                     break
 533         if flag & 2:
 534             self.__read(2)
 535
 536     def tell(self):
 537         """Return the stream's file pointer position.
 538         """
 539         return self.pos
 540
 541     def seek(self, pos=0):
 542         """Set the stream's file pointer to pos. Negative seeking
 543            is forbidden.
 544         """
 545         if pos - self.pos >= 0:
 546             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 547             for i in xrange(blocks):
 548                 self.read(self.bufsize)
 549             self.read(remainder)
 550         else:
 551             raise StreamError("seeking backwards is not allowed")
 552         return self.pos
 553
 554     def read(self, size=None):
 555         """Return the next size number of bytes from the stream.
 556            If size is not defined, return all bytes of the stream
 557            up to EOF.
 558         """
 559         if size is None:
 560             t = []
 561             while True:
 562                 buf = self._read(self.bufsize)
 563                 if not buf:
 564                     break
 565                 t.append(buf)
 566             buf = "".join(t)
 567         else:
 568             buf = self._read(size)
 569         self.pos += len(buf)
 570         return buf
 571
 572     def _read(self, size):
 573         """Return size bytes from the stream.
 574         """
 575         if self.comptype == "tar":
 576             return self.__read(size)
 577
 578         c = len(self.dbuf)
 579         t = [self.dbuf]
 580         while c < size:
 581             buf = self.__read(self.bufsize)
 582             if not buf:
 583                 break
 584             try:
 585                 buf = self.cmp.decompress(buf)
 586             except IOError:
 587                 raise ReadError("invalid compressed data")
 588             t.append(buf)
 589             c += len(buf)
 590         t = "".join(t)
 591         self.dbuf = t[size:]
 592         return t[:size]
 593
 594     def __read(self, size):
 595         """Return size bytes from stream. If internal buffer is empty,
 596            read another block from the stream.
 597         """
 598         c = len(self.buf)
 599         t = [self.buf]
 600         while c < size:
 601             buf = self.fileobj.read(self.bufsize)
 602             if not buf:
 603                 break
 604             t.append(buf)
 605             c += len(buf)
 606         t = "".join(t)
 607         self.buf = t[size:]
 608         return t[:size]
 609 # class _Stream
 610
 611 class _StreamProxy(object):
 612     """Small proxy class that enables transparent compression
 613        detection for the Stream interface (mode 'r|*').
 614     """
 615
 616     def __init__(self, fileobj):
 617         self.fileobj = fileobj
 618         self.buf = self.fileobj.read(BLOCKSIZE)
 619
 620     def read(self, size):
 621         self.read = self.fileobj.read
 622         return self.buf
 623
 624     def getcomptype(self):
 625         if self.buf.startswith("\037\213\010"):
 626             return "gz"
 627         if self.buf.startswith("BZh91"):
 628             return "bz2"
 629         return "tar"
 630
 631     def close(self):
 632         self.fileobj.close()
 633 # class StreamProxy
 634
 635 class _BZ2Proxy(object):
 636     """Small proxy class that enables external file object
 637        support for "r:bz2" and "w:bz2" modes. This is actually
 638        a workaround for a limitation in bz2 module's BZ2File
 639        class which (unlike gzip.GzipFile) has no support for
 640        a file object argument.
 641     """
 642
 643     blocksize = 16 * 1024
 644
 645     def __init__(self, fileobj, mode):
 646         self.fileobj = fileobj
 647         self.mode = mode
 648         self.name = getattr(self.fileobj, "name", None)
 649         self.init()
 650
 651     def init(self):
 652         import bz2
 653         self.pos = 0
 654         if self.mode == "r":
 655             self.bz2obj = bz2.BZ2Decompressor()
 656             self.fileobj.seek(0)
 657             self.buf = ""
 658         else:
 659             self.bz2obj = bz2.BZ2Compressor()
 660
 661     def read(self, size):
 662         b = [self.buf]
 663         x = len(self.buf)
 664         while x < size:
 665             raw = self.fileobj.read(self.blocksize)
 666             if not raw:
 667                 break
 668             data = self.bz2obj.decompress(raw)
 669             b.append(data)
 670             x += len(data)
 671         self.buf = "".join(b)
 672
 673         buf = self.buf[:size]
 674         self.buf = self.buf[size:]
 675         self.pos += len(buf)
 676         return buf
 677
 678     def seek(self, pos):
 679         if pos < self.pos:
 680             self.init()
 681         self.read(pos - self.pos)
 682
 683     def tell(self):
 684         return self.pos
 685
 686     def write(self, data):
 687         self.pos += len(data)
 688         raw = self.bz2obj.compress(data)
 689         self.fileobj.write(raw)
 690
 691     def close(self):
 692         if self.mode == "w":
 693             raw = self.bz2obj.flush()
 694             self.fileobj.write(raw)
 695 # class _BZ2Proxy
 696
 697 #------------------------
 698 # Extraction file object
 699 #------------------------
 700 class _FileInFile(object):
 701     """A thin wrapper around an existing file object that
 702        provides a part of its data as an individual file
 703        object.
 704     """
 705
 706     def __init__(self, fileobj, offset, size, sparse=None):
 707         self.fileobj = fileobj
 708         self.offset = offset
 709         self.size = size
 710         self.sparse = sparse
 711         self.position = 0
 712
 713     def tell(self):
 714         """Return the current file position.
 715         """
 716         return self.position
 717
 718     def seek(self, position):
 719         """Seek to a position in the file.
 720         """
 721         self.position = position
 722
 723     def read(self, size=None):
 724         """Read data from the file.
 725         """
 726         if size is None:
 727             size = self.size - self.position
 728         else:
 729             size = min(size, self.size - self.position)
 730
 731         if self.sparse is None:
 732             return self.readnormal(size)
 733         else:
 734             return self.readsparse(size)
 735
 736     def readnormal(self, size):
 737         """Read operation for regular files.
 738         """
 739         self.fileobj.seek(self.offset + self.position)
 740         self.position += size
 741         return self.fileobj.read(size)
 742
 743     def readsparse(self, size):
 744         """Read operation for sparse files.
 745         """
 746         data = []
 747         while size > 0:
 748             buf = self.readsparsesection(size)
 749             if not buf:
 750                 break
 751             size -= len(buf)
 752             data.append(buf)
 753         return "".join(data)
 754
 755     def readsparsesection(self, size):
 756         """Read a single section of a sparse file.
 757         """
 758         section = self.sparse.find(self.position)
 759
 760         if section is None:
 761             return ""
 762
 763         size = min(size, section.offset + section.size - self.position)
 764
 765         if isinstance(section, _data):
 766             realpos = section.realpos + self.position - section.offset
 767             self.fileobj.seek(self.offset + realpos)
 768             self.position += size
 769             return self.fileobj.read(size)
 770         else:
 771             self.position += size
 772             return NUL * size
 773 #class _FileInFile
 774
 775
 776 class ExFileObject(object):
 777     """File-like object for reading an archive member.
 778        Is returned by TarFile.extractfile().
 779     """
 780     blocksize = 1024
 781
 782     def __init__(self, tarfile, tarinfo):
 783         self.fileobj = _FileInFile(tarfile.fileobj,
 784                                    tarinfo.offset_data,
 785                                    tarinfo.size,
 786                                    getattr(tarinfo, "sparse", None))
 787         self.name = tarinfo.name
 788         self.mode = "r"
 789         self.closed = False
 790         self.size = tarinfo.size
 791
 792         self.position = 0
 793         self.buffer = ""
 794
 795     def read(self, size=None):
 796         """Read at most size bytes from the file. If size is not
 797            present or None, read all data until EOF is reached.
 798         """
 799         if self.closed:
 800             raise ValueError("I/O operation on closed file")
 801
 802         buf = ""
 803         if self.buffer:
 804             if size is None:
 805                 buf = self.buffer
 806                 self.buffer = ""
 807             else:
 808                 buf = self.buffer[:size]
 809                 self.buffer = self.buffer[size:]
 810
 811         if size is None:
 812             buf += self.fileobj.read()
 813         else:
 814             buf += self.fileobj.read(size - len(buf))
 815
 816         self.position += len(buf)
 817         return buf
 818
 819     def readline(self, size=-1):
 820         """Read one entire line from the file. If size is present
 821            and non-negative, return a string with at most that
 822            size, which may be an incomplete line.
 823         """
 824         if self.closed:
 825             raise ValueError("I/O operation on closed file")
 826
 827         if "\n" in self.buffer:
 828             pos = self.buffer.find("\n") + 1
 829         else:
 830             buffers = [self.buffer]
 831             while True:
 832                 buf = self.fileobj.read(self.blocksize)
 833                 buffers.append(buf)
 834                 if not buf or "\n" in buf:
 835                     self.buffer = "".join(buffers)
 836                     pos = self.buffer.find("\n") + 1
 837                     if pos == 0:
 838                         # no newline found.
 839                         pos = len(self.buffer)
 840                     break
 841
 842         if size != -1:
 843             pos = min(size, pos)
 844
 845         buf = self.buffer[:pos]
 846         self.buffer = self.buffer[pos:]
 847         self.position += len(buf)
 848         return buf
 849
 850     def readlines(self):
 851         """Return a list with all remaining lines.
 852         """
 853         result = []
 854         while True:
 855             line = self.readline()
 856             if not line: break
 857             result.append(line)
 858         return result
 859
 860     def tell(self):
 861         """Return the current file position.
 862         """
 863         if self.closed:
 864             raise ValueError("I/O operation on closed file")
 865
 866         return self.position
 867
 868     def seek(self, pos, whence=os.SEEK_SET):
 869         """Seek to a position in the file.
 870         """
 871         if self.closed:
 872             raise ValueError("I/O operation on closed file")
 873
 874         if whence == os.SEEK_SET:
 875             self.position = min(max(pos, 0), self.size)
 876         elif whence == os.SEEK_CUR:
 877             if pos < 0:
 878                 self.position = max(self.position + pos, 0)
 879             else:
 880                 self.position = min(self.position + pos, self.size)
 881         elif whence == os.SEEK_END:
 882             self.position = max(min(self.size + pos, self.size), 0)
 883         else:
 884             raise ValueError("Invalid argument")
 885
 886         self.buffer = ""
 887         self.fileobj.seek(self.position)
 888
 889     def close(self):
 890         """Close the file object.
 891         """
 892         self.closed = True
 893
 894     def __iter__(self):
 895         """Get an iterator over the file's lines.
 896         """
 897         while True:
 898             line = self.readline()
 899             if not line:
 900                 break
 901             yield line
 902 #class ExFileObject
 903
 904 #------------------
 905 # Exported Classes
 906 #------------------
 907 class TarInfo(object):
 908     """Informational class which holds the details about an
 909        archive member given by a tar header block.
 910        TarInfo objects are returned by TarFile.getmember(),
 911        TarFile.getmembers() and TarFile.gettarinfo() and are
 912        usually created internally.
 913     """
 914
 915     def __init__(self, name=""):
 916         """Construct a TarInfo object. name is the optional name
 917            of the member.
 918         """
 919         self.name = name        # member name
 920         self.mode = 0644        # file permissions
 921         self.uid = 0            # user id
 922         self.gid = 0            # group id
 923         self.size = 0           # file size
 924         self.mtime = 0          # modification time
 925         self.chksum = 0         # header checksum
 926         self.type = REGTYPE     # member type
 927         self.linkname = ""      # link name
 928         self.uname = "root"     # user name
 929         self.gname = "root"     # group name
 930         self.devmajor = 0       # device major number
 931         self.devminor = 0       # device minor number
 932
 933         self.offset = 0         # the tar header starts here
 934         self.offset_data = 0    # the file's data starts here
 935
 936         self.pax_headers = {}   # pax header information
 937
 938     # In pax headers the "name" and "linkname" field are called
 939     # "path" and "linkpath".
 940     def _getpath(self):
 941         return self.name
 942     def _setpath(self, name):
 943         self.name = name
 944     path = property(_getpath, _setpath)
 945
 946     def _getlinkpath(self):
 947         return self.linkname
 948     def _setlinkpath(self, linkname):
 949         self.linkname = linkname
 950     linkpath = property(_getlinkpath, _setlinkpath)
 951
 952     def __repr__(self):
 953         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 954
 955     def get_info(self, encoding, errors):
 956         """Return the TarInfo's attributes as a dictionary.
 957         """
 958         info = {
 959             "name":     normpath(self.name),
 960             "mode":     self.mode & 07777,
 961             "uid":      self.uid,
 962             "gid":      self.gid,
 963             "size":     self.size,
 964             "mtime":    self.mtime,
 965             "chksum":   self.chksum,
 966             "type":     self.type,
 967             "linkname": normpath(self.linkname) if self.linkname else "",
 968             "uname":    self.uname,
 969             "gname":    self.gname,
 970             "devmajor": self.devmajor,
 971             "devminor": self.devminor
 972         }
 973
 974         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 975             info["name"] += "/"
 976
 977         for key in ("name", "linkname", "uname", "gname"):
 978             if type(info[key]) is unicode:
 979                 info[key] = info[key].encode(encoding, errors)
 980
 981         return info
 982
 983     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 984         """Return a tar header as a string of 512 byte blocks.
 985         """
 986         info = self.get_info(encoding, errors)
 987
 988         if format == USTAR_FORMAT:
 989             return self.create_ustar_header(info)
 990         elif format == GNU_FORMAT:
 991             return self.create_gnu_header(info)
 992         elif format == PAX_FORMAT:
 993             return self.create_pax_header(info, encoding, errors)
 994         else:
 995             raise ValueError("invalid format")
 996
 997     def create_ustar_header(self, info):
 998         """Return the object as a ustar header block.
 999         """
1000         info["magic"] = POSIX_MAGIC
1001
1002         if len(info["linkname"]) > LENGTH_LINK:
1003             raise ValueError("linkname is too long")
1004
1005         if len(info["name"]) > LENGTH_NAME:
1006             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1007
1008         return self._create_header(info, USTAR_FORMAT)
1009
1010     def create_gnu_header(self, info):
1011         """Return the object as a GNU header block sequence.
1012         """
1013         info["magic"] = GNU_MAGIC
1014
1015         buf = ""
1016         if len(info["linkname"]) > LENGTH_LINK:
1017             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1018
1019         if len(info["name"]) > LENGTH_NAME:
1020             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1021
1022         return buf + self._create_header(info, GNU_FORMAT)
1023
1024     def create_pax_header(self, info, encoding, errors):
1025         """Return the object as a ustar header block. If it cannot be
1026            represented this way, prepend a pax extended header sequence
1027            with supplement information.
1028         """
1029         info["magic"] = POSIX_MAGIC
1030         pax_headers = self.pax_headers.copy()
1031
1032         # Test string fields for values that exceed the field length or cannot
1033         # be represented in ASCII encoding.
1034         for name, hname, length in (
1035                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1036                 ("uname", "uname", 32), ("gname", "gname", 32)):
1037
1038             if hname in pax_headers:
1039                 # The pax header has priority.
1040                 continue
1041
1042             val = info[name].decode(encoding, errors)
1043
1044             # Try to encode the string as ASCII.
1045             try:
1046                 val.encode("ascii")
1047             except UnicodeEncodeError:
1048                 pax_headers[hname] = val
1049                 continue
1050
1051             if len(info[name]) > length:
1052                 pax_headers[hname] = val
1053
1054         # Test number fields for values that exceed the field limit or values
1055         # that like to be stored as float.
1056         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1057             if name in pax_headers:
1058                 # The pax header has priority. Avoid overflow.
1059                 info[name] = 0
1060                 continue
1061
1062             val = info[name]
1063             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1064                 pax_headers[name] = unicode(val)
1065                 info[name] = 0
1066
1067         # Create a pax extended header if necessary.
1068         if pax_headers:
1069             buf = self._create_pax_generic_header(pax_headers)
1070         else:
1071             buf = ""
1072
1073         return buf + self._create_header(info, USTAR_FORMAT)
1074
1075     @classmethod
1076     def create_pax_global_header(cls, pax_headers):
1077         """Return the object as a pax global header block sequence.
1078         """
1079         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1080
1081     def _posix_split_name(self, name):
1082         """Split a name longer than 100 chars into a prefix
1083            and a name part.
1084         """
1085         prefix = name[:LENGTH_PREFIX + 1]
1086         while prefix and prefix[-1] != "/":
1087             prefix = prefix[:-1]
1088
1089         name = name[len(prefix):]
1090         prefix = prefix[:-1]
1091
1092         if not prefix or len(name) > LENGTH_NAME:
1093             raise ValueError("name is too long")
1094         return prefix, name
1095
1096     @staticmethod
1097     def _create_header(info, format):
1098         """Return a header block. info is a dictionary with file
1099            information, format must be one of the *_FORMAT constants.
1100         """
1101         parts = [
1102             stn(info.get("name", ""), 100),
1103             itn(info.get("mode", 0) & 07777, 8, format),
1104             itn(info.get("uid", 0), 8, format),
1105             itn(info.get("gid", 0), 8, format),
1106             itn(info.get("size", 0), 12, format),
1107             itn(info.get("mtime", 0), 12, format),
1108             "        ", # checksum field
1109             info.get("type", REGTYPE),
1110             stn(info.get("linkname", ""), 100),
1111             stn(info.get("magic", POSIX_MAGIC), 8),
1112             stn(info.get("uname", "root"), 32),
1113             stn(info.get("gname", "root"), 32),
1114             itn(info.get("devmajor", 0), 8, format),
1115             itn(info.get("devminor", 0), 8, format),
1116             stn(info.get("prefix", ""), 155)
1117         ]
1118
1119         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1120         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1121         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1122         return buf
1123
1124     @staticmethod
1125     def _create_payload(payload):
1126         """Return the string payload filled with zero bytes
1127            up to the next 512 byte border.
1128         """
1129         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1130         if remainder > 0:
1131             payload += (BLOCKSIZE - remainder) * NUL
1132         return payload
1133
1134     @classmethod
1135     def _create_gnu_long_header(cls, name, type):
1136         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1137            for name.
1138         """
1139         name += NUL
1140
1141         info = {}
1142         info["name"] = "././@LongLink"
1143         info["type"] = type
1144         info["size"] = len(name)
1145         info["magic"] = GNU_MAGIC
1146
1147         # create extended header + name blocks.
1148         return cls._create_header(info, USTAR_FORMAT) + \
1149                 cls._create_payload(name)
1150
1151     @classmethod
1152     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1153         """Return a POSIX.1-2001 extended or global header sequence
1154            that contains a list of keyword, value pairs. The values
1155            must be unicode objects.
1156         """
1157         records = []
1158         for keyword, value in pax_headers.iteritems():
1159             keyword = keyword.encode("utf8")
1160             value = value.encode("utf8")
1161             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1162             n = p = 0
1163             while True:
1164                 n = l + len(str(p))
1165                 if n == p:
1166                     break
1167                 p = n
1168             records.append("%d %s=%s\n" % (p, keyword, value))
1169         records = "".join(records)
1170
1171         # We use a hardcoded "././@PaxHeader" name like star does
1172         # instead of the one that POSIX recommends.
1173         info = {}
1174         info["name"] = "././@PaxHeader"
1175         info["type"] = type
1176         info["size"] = len(records)
1177         info["magic"] = POSIX_MAGIC
1178
1179         # Create pax header + record blocks.
1180         return cls._create_header(info, USTAR_FORMAT) + \
1181                 cls._create_payload(records)
1182
1183     @classmethod
1184     def frombuf(cls, buf):
1185         """Construct a TarInfo object from a 512 byte string buffer.
1186         """
1187         if len(buf) != BLOCKSIZE:
1188             raise HeaderError("truncated header")
1189         if buf.count(NUL) == BLOCKSIZE:
1190             raise HeaderError("empty header")
1191
1192         chksum = nti(buf[148:156])
1193         if chksum not in calc_chksums(buf):
1194             raise HeaderError("bad checksum")
1195
1196         obj = cls()
1197         obj.buf = buf
1198         obj.name = nts(buf[0:100])
1199         obj.mode = nti(buf[100:108])
1200         obj.uid = nti(buf[108:116])
1201         obj.gid = nti(buf[116:124])
1202         obj.size = nti(buf[124:136])
1203         obj.mtime = nti(buf[136:148])
1204         obj.chksum = chksum
1205         obj.type = buf[156:157]
1206         obj.linkname = nts(buf[157:257])
1207         obj.uname = nts(buf[265:297])
1208         obj.gname = nts(buf[297:329])
1209         obj.devmajor = nti(buf[329:337])
1210         obj.devminor = nti(buf[337:345])
1211         prefix = nts(buf[345:500])
1212
1213         # Old V7 tar format represents a directory as a regular
1214         # file with a trailing slash.
1215         if obj.type == AREGTYPE and obj.name.endswith("/"):
1216             obj.type = DIRTYPE
1217
1218         # Remove redundant slashes from directories.
1219         if obj.isdir():
1220             obj.name = obj.name.rstrip("/")
1221
1222         # Reconstruct a ustar longname.
1223         if prefix and obj.type not in GNU_TYPES:
1224             obj.name = prefix + "/" + obj.name
1225         return obj
1226
1227     @classmethod
1228     def fromtarfile(cls, tarfile):
1229         """Return the next TarInfo object from TarFile object
1230            tarfile.
1231         """
1232         buf = tarfile.fileobj.read(BLOCKSIZE)
1233         if not buf:
1234             return
1235         obj = cls.frombuf(buf)
1236         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1237         return obj._proc_member(tarfile)
1238
1239     #--------------------------------------------------------------------------
1240     # The following are methods that are called depending on the type of a
1241     # member. The entry point is _proc_member() which can be overridden in a
1242     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1243     # implement the following
1244     # operations:
1245     # 1. Set self.offset_data to the position where the data blocks begin,
1246     #    if there is data that follows.
1247     # 2. Set tarfile.offset to the position where the next member's header will
1248     #    begin.
1249     # 3. Return self or another valid TarInfo object.
1250     def _proc_member(self, tarfile):
1251         """Choose the right processing method depending on
1252            the type and call it.
1253         """
1254         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1255             return self._proc_gnulong(tarfile)
1256         elif self.type == GNUTYPE_SPARSE:
1257             return self._proc_sparse(tarfile)
1258         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1259             return self._proc_pax(tarfile)
1260         else:
1261             return self._proc_builtin(tarfile)
1262
1263     def _proc_builtin(self, tarfile):
1264         """Process a builtin type or an unknown type which
1265            will be treated as a regular file.
1266         """
1267         self.offset_data = tarfile.fileobj.tell()
1268         offset = self.offset_data
1269         if self.isreg() or self.type not in SUPPORTED_TYPES:
1270             # Skip the following data blocks.
1271             offset += self._block(self.size)
1272         tarfile.offset = offset
1273
1274         # Patch the TarInfo object with saved global
1275         # header information.
1276         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1277
1278         return self
1279
1280     def _proc_gnulong(self, tarfile):
1281         """Process the blocks that hold a GNU longname
1282            or longlink member.
1283         """
1284         buf = tarfile.fileobj.read(self._block(self.size))
1285
1286         # Fetch the next header and process it.
1287         next = self.fromtarfile(tarfile)
1288         if next is None:
1289             raise HeaderError("missing subsequent header")
1290
1291         # Patch the TarInfo object from the next header with
1292         # the longname information.
1293         next.offset = self.offset
1294         if self.type == GNUTYPE_LONGNAME:
1295             next.name = nts(buf)
1296         elif self.type == GNUTYPE_LONGLINK:
1297             next.linkname = nts(buf)
1298
1299         return next
1300
1301     def _proc_sparse(self, tarfile):
1302         """Process a GNU sparse header plus extra headers.
1303         """
1304         buf = self.buf
1305         sp = _ringbuffer()
1306         pos = 386
1307         lastpos = 0L
1308         realpos = 0L
1309         # There are 4 possible sparse structs in the
1310         # first header.
1311         for i in xrange(4):
1312             try:
1313                 offset = nti(buf[pos:pos + 12])
1314                 numbytes = nti(buf[pos + 12:pos + 24])
1315             except ValueError:
1316                 break
1317             if offset > lastpos:
1318                 sp.append(_hole(lastpos, offset - lastpos))
1319             sp.append(_data(offset, numbytes, realpos))
1320             realpos += numbytes
1321             lastpos = offset + numbytes
1322             pos += 24
1323
1324         isextended = ord(buf[482])
1325         origsize = nti(buf[483:495])
1326
1327         # If the isextended flag is given,
1328         # there are extra headers to process.
1329         while isextended == 1:
1330             buf = tarfile.fileobj.read(BLOCKSIZE)
1331             pos = 0
1332             for i in xrange(21):
1333                 try:
1334                     offset = nti(buf[pos:pos + 12])
1335                     numbytes = nti(buf[pos + 12:pos + 24])
1336                 except ValueError:
1337                     break
1338                 if offset > lastpos:
1339                     sp.append(_hole(lastpos, offset - lastpos))
1340                 sp.append(_data(offset, numbytes, realpos))
1341                 realpos += numbytes
1342                 lastpos = offset + numbytes
1343                 pos += 24
1344             isextended = ord(buf[504])
1345
1346         if lastpos < origsize:
1347             sp.append(_hole(lastpos, origsize - lastpos))
1348
1349         self.sparse = sp
1350
1351         self.offset_data = tarfile.fileobj.tell()
1352         tarfile.offset = self.offset_data + self._block(self.size)
1353         self.size = origsize
1354
1355         return self
1356
1357     def _proc_pax(self, tarfile):
1358         """Process an extended or global header as described in
1359            POSIX.1-2001.
1360         """
1361         # Read the header information.
1362         buf = tarfile.fileobj.read(self._block(self.size))
1363
1364         # A pax header stores supplemental information for either
1365         # the following file (extended) or all following files
1366         # (global).
1367         if self.type == XGLTYPE:
1368             pax_headers = tarfile.pax_headers
1369         else:
1370             pax_headers = tarfile.pax_headers.copy()
1371
1372         # Parse pax header information. A record looks like that:
1373         # "%d %s=%s\n" % (length, keyword, value). length is the size
1374         # of the complete record including the length field itself and
1375         # the newline. keyword and value are both UTF-8 encoded strings.
1376         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1377         pos = 0
1378         while True:
1379             match = regex.match(buf, pos)
1380             if not match:
1381                 break
1382
1383             length, keyword = match.groups()
1384             length = int(length)
1385             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1386
1387             keyword = keyword.decode("utf8")
1388             value = value.decode("utf8")
1389
1390             pax_headers[keyword] = value
1391             pos += length
1392
1393         # Fetch the next header.
1394         next = self.fromtarfile(tarfile)
1395
1396         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1397             if next is None:
1398                 raise HeaderError("missing subsequent header")
1399
1400             # Patch the TarInfo object with the extended header info.
1401             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1402             next.offset = self.offset
1403
1404             if "size" in pax_headers:
1405                 # If the extended header replaces the size field,
1406                 # we need to recalculate the offset where the next
1407                 # header starts.
1408                 offset = next.offset_data
1409                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1410                     offset += next._block(next.size)
1411                 tarfile.offset = offset
1412
1413         return next
1414
1415     def _apply_pax_info(self, pax_headers, encoding, errors):
1416         """Replace fields with supplemental information from a previous
1417            pax extended or global header.
1418         """
1419         for keyword, value in pax_headers.iteritems():
1420             if keyword not in PAX_FIELDS:
1421                 continue
1422
1423             if keyword == "path":
1424                 value = value.rstrip("/")
1425
1426             if keyword in PAX_NUMBER_FIELDS:
1427                 try:
1428                     value = PAX_NUMBER_FIELDS[keyword](value)
1429                 except ValueError:
1430                     value = 0
1431             else:
1432                 value = uts(value, encoding, errors)
1433
1434             setattr(self, keyword, value)
1435
1436         self.pax_headers = pax_headers.copy()
1437
1438     def _block(self, count):
1439         """Round up a byte count by BLOCKSIZE and return it,
1440            e.g. _block(834) => 1024.
1441         """
1442         blocks, remainder = divmod(count, BLOCKSIZE)
1443         if remainder:
1444             blocks += 1
1445         return blocks * BLOCKSIZE
1446
1447     def isreg(self):
1448         return self.type in REGULAR_TYPES
1449     def isfile(self):
1450         return self.isreg()
1451     def isdir(self):
1452         return self.type == DIRTYPE
1453     def issym(self):
1454         return self.type == SYMTYPE
1455     def islnk(self):
1456         return self.type == LNKTYPE
1457     def ischr(self):
1458         return self.type == CHRTYPE
1459     def isblk(self):
1460         return self.type == BLKTYPE
1461     def isfifo(self):
1462         return self.type == FIFOTYPE
1463     def issparse(self):
1464         return self.type == GNUTYPE_SPARSE
1465     def isdev(self):
1466         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1467 # class TarInfo
1468
1469 class TarFile(object):
1470     """The TarFile Class provides an interface to tar archives.
1471     """
1472
1473     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1474
1475     dereference = False         # If true, add content of linked file to the
1476                                 # tar file, else the link.
1477
1478     ignore_zeros = False        # If true, skips empty or invalid blocks and
1479                                 # continues processing.
1480
1481     errorlevel = 0              # If 0, fatal errors only appear in debug
1482                                 # messages (if debug >= 0). If > 0, errors
1483                                 # are passed to the caller as exceptions.
1484
1485     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1486
1487     encoding = ENCODING         # Encoding for 8-bit character strings.
1488
1489     errors = None               # Error handler for unicode conversion.
1490
1491     tarinfo = TarInfo           # The default TarInfo class to use.
1492
1493     fileobject = ExFileObject   # The default ExFileObject class to use.
1494
1495     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1496             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1497             errors=None, pax_headers=None, debug=None, errorlevel=None):
1498         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1499            read from an existing archive, 'a' to append data to an existing
1500            file or 'w' to create a new file overwriting an existing one. `mode'
1501            defaults to 'r'.
1502            If `fileobj' is given, it is used for reading or writing data. If it
1503            can be determined, `mode' is overridden by `fileobj's mode.
1504            `fileobj' is not closed, when TarFile is closed.
1505         """
1506         if len(mode) > 1 or mode not in "raw":
1507             raise ValueError("mode must be 'r', 'a' or 'w'")
1508         self.mode = mode
1509         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1510
1511         if not fileobj:
1512             if self.mode == "a" and not os.path.exists(name):
1513                 # Create nonexistent files in append mode.
1514                 self.mode = "w"
1515                 self._mode = "wb"
1516             fileobj = bltn_open(name, self._mode)
1517             self._extfileobj = False
1518         else:
1519             if name is None and hasattr(fileobj, "name"):
1520                 name = fileobj.name
1521             if hasattr(fileobj, "mode"):
1522                 self._mode = fileobj.mode
1523             self._extfileobj = True
1524         self.name = os.path.abspath(name) if name else None
1525         self.fileobj = fileobj
1526
1527         # Init attributes.
1528         if format is not None:
1529             self.format = format
1530         if tarinfo is not None:
1531             self.tarinfo = tarinfo
1532         if dereference is not None:
1533             self.dereference = dereference
1534         if ignore_zeros is not None:
1535             self.ignore_zeros = ignore_zeros
1536         if encoding is not None:
1537             self.encoding = encoding
1538
1539         if errors is not None:
1540             self.errors = errors
1541         elif mode == "r":
1542             self.errors = "utf-8"
1543         else:
1544             self.errors = "strict"
1545
1546         if pax_headers is not None and self.format == PAX_FORMAT:
1547             self.pax_headers = pax_headers
1548         else:
1549             self.pax_headers = {}
1550
1551         if debug is not None:
1552             self.debug = debug
1553         if errorlevel is not None:
1554             self.errorlevel = errorlevel
1555
1556         # Init datastructures.
1557         self.closed = False
1558         self.members = []       # list of members as TarInfo objects
1559         self._loaded = False    # flag if all members have been read
1560         self.offset = self.fileobj.tell()
1561                                 # current position in the archive file
1562         self.inodes = {}        # dictionary caching the inodes of
1563                                 # archive members already added
1564
1565         if self.mode == "r":
1566             self.firstmember = None
1567             self.firstmember = self.next()
1568
1569         if self.mode == "a":
1570             # Move to the end of the archive,
1571             # before the first empty block.
1572             self.firstmember = None
1573             while True:
1574                 if self.next() is None:
1575                     if self.offset > 0:
1576                         self.fileobj.seek(- BLOCKSIZE, 1)
1577                     break
1578
1579         if self.mode in "aw":
1580             self._loaded = True
1581
1582             if self.pax_headers:
1583                 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1584                 self.fileobj.write(buf)
1585                 self.offset += len(buf)
1586
1587     def _getposix(self):
1588         return self.format == USTAR_FORMAT
1589     def _setposix(self, value):
1590         import warnings
1591         warnings.warn("use the format attribute instead", DeprecationWarning)
1592         if value:
1593             self.format = USTAR_FORMAT
1594         else:
1595             self.format = GNU_FORMAT
1596     posix = property(_getposix, _setposix)
1597
1598     #--------------------------------------------------------------------------
1599     # Below are the classmethods which act as alternate constructors to the
1600     # TarFile class. The open() method is the only one that is needed for
1601     # public use; it is the "super"-constructor and is able to select an
1602     # adequate "sub"-constructor for a particular compression using the mapping
1603     # from OPEN_METH.
1604     #
1605     # This concept allows one to subclass TarFile without losing the comfort of
1606     # the super-constructor. A sub-constructor is registered and made available
1607     # by adding it to the mapping in OPEN_METH.
1608
1609     @classmethod
1610     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1611         """Open a tar archive for reading, writing or appending. Return
1612            an appropriate TarFile class.
1613
1614            mode:
1615            'r' or 'r:*' open for reading with transparent compression
1616            'r:'         open for reading exclusively uncompressed
1617            'r:gz'       open for reading with gzip compression
1618            'r:bz2'      open for reading with bzip2 compression
1619            'a' or 'a:'  open for appending, creating the file if necessary
1620            'w' or 'w:'  open for writing without compression
1621            'w:gz'       open for writing with gzip compression
1622            'w:bz2'      open for writing with bzip2 compression
1623
1624            'r|*'        open a stream of tar blocks with transparent compression
1625            'r|'         open an uncompressed stream of tar blocks for reading
1626            'r|gz'       open a gzip compressed stream of tar blocks
1627            'r|bz2'      open a bzip2 compressed stream of tar blocks
1628            'w|'         open an uncompressed stream for writing
1629            'w|gz'       open a gzip compressed stream for writing
1630            'w|bz2'      open a bzip2 compressed stream for writing
1631         """
1632
1633         if not name and not fileobj:
1634             raise ValueError("nothing to open")
1635
1636         if mode in ("r", "r:*"):
1637             # Find out which *open() is appropriate for opening the file.
1638             for comptype in cls.OPEN_METH:
1639                 func = getattr(cls, cls.OPEN_METH[comptype])
1640                 if fileobj is not None:
1641                     saved_pos = fileobj.tell()
1642                 try:
1643                     return func(name, "r", fileobj, **kwargs)
1644                 except (ReadError, CompressionError), e:
1645                     if fileobj is not None:
1646                         fileobj.seek(saved_pos)
1647                     continue
1648             raise ReadError("file could not be opened successfully")
1649
1650         elif ":" in mode:
1651             filemode, comptype = mode.split(":", 1)
1652             filemode = filemode or "r"
1653             comptype = comptype or "tar"
1654
1655             # Select the *open() function according to
1656             # given compression.
1657             if comptype in cls.OPEN_METH:
1658                 func = getattr(cls, cls.OPEN_METH[comptype])
1659             else:
1660                 raise CompressionError("unknown compression type %r" % comptype)
1661             return func(name, filemode, fileobj, **kwargs)
1662
1663         elif "|" in mode:
1664             filemode, comptype = mode.split("|", 1)
1665             filemode = filemode or "r"
1666             comptype = comptype or "tar"
1667
1668             if filemode not in "rw":
1669                 raise ValueError("mode must be 'r' or 'w'")
1670
1671             t = cls(name, filemode,
1672                     _Stream(name, filemode, comptype, fileobj, bufsize),
1673                     **kwargs)
1674             t._extfileobj = False
1675             return t
1676
1677         elif mode in "aw":
1678             return cls.taropen(name, mode, fileobj, **kwargs)
1679
1680         raise ValueError("undiscernible mode")
1681
1682     @classmethod
1683     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1684         """Open uncompressed tar archive name for reading or writing.
1685         """
1686         if len(mode) > 1 or mode not in "raw":
1687             raise ValueError("mode must be 'r', 'a' or 'w'")
1688         return cls(name, mode, fileobj, **kwargs)
1689
1690     @classmethod
1691     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1692         """Open gzip compressed tar archive name for reading or writing.
1693            Appending is not allowed.
1694         """
1695         if len(mode) > 1 or mode not in "rw":
1696             raise ValueError("mode must be 'r' or 'w'")
1697
1698         try:
1699             import gzip
1700             gzip.GzipFile
1701         except (ImportError, AttributeError):
1702             raise CompressionError("gzip module is not available")
1703
1704         if fileobj is None:
1705             fileobj = bltn_open(name, mode + "b")
1706
1707         try:
1708             t = cls.taropen(name, mode,
1709                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1710                 **kwargs)
1711         except IOError:
1712             raise ReadError("not a gzip file")
1713         t._extfileobj = False
1714         return t
1715
1716     @classmethod
1717     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1718         """Open bzip2 compressed tar archive name for reading or writing.
1719            Appending is not allowed.
1720         """
1721         if len(mode) > 1 or mode not in "rw":
1722             raise ValueError("mode must be 'r' or 'w'.")
1723
1724         try:
1725             import bz2
1726         except ImportError:
1727             raise CompressionError("bz2 module is not available")
1728
1729         if fileobj is not None:
1730             fileobj = _BZ2Proxy(fileobj, mode)
1731         else:
1732             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1733
1734         try:
1735             t = cls.taropen(name, mode, fileobj, **kwargs)
1736         except IOError:
1737             raise ReadError("not a bzip2 file")
1738         t._extfileobj = False
1739         return t
1740
1741     # All *open() methods are registered here.
1742     OPEN_METH = {
1743         "tar": "taropen",   # uncompressed tar
1744         "gz":  "gzopen",    # gzip compressed tar
1745         "bz2": "bz2open"    # bzip2 compressed tar
1746     }
1747
1748     #--------------------------------------------------------------------------
1749     # The public methods which TarFile provides:
1750
1751     def close(self):
1752         """Close the TarFile. In write-mode, two finishing zero blocks are
1753            appended to the archive.
1754         """
1755         if self.closed:
1756             return
1757
1758         if self.mode in "aw":
1759             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1760             self.offset += (BLOCKSIZE * 2)
1761             # fill up the end with zero-blocks
1762             # (like option -b20 for tar does)
1763             blocks, remainder = divmod(self.offset, RECORDSIZE)
1764             if remainder > 0:
1765                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1766
1767         if not self._extfileobj:
1768             self.fileobj.close()
1769         self.closed = True
1770
1771     def getmember(self, name):
1772         """Return a TarInfo object for member `name'. If `name' can not be
1773            found in the archive, KeyError is raised. If a member occurs more
1774            than once in the archive, its last occurrence is assumed to be the
1775            most up-to-date version.
1776         """
1777         tarinfo = self._getmember(name)
1778         if tarinfo is None:
1779             raise KeyError("filename %r not found" % name)
1780         return tarinfo
1781
1782     def getmembers(self):
1783         """Return the members of the archive as a list of TarInfo objects. The
1784            list has the same order as the members in the archive.
1785         """
1786         self._check()
1787         if not self._loaded:    # if we want to obtain a list of
1788             self._load()        # all members, we first have to
1789                                 # scan the whole archive.
1790         return self.members
1791
1792     def getnames(self):
1793         """Return the members of the archive as a list of their names. It has
1794            the same order as the list returned by getmembers().
1795         """
1796         return [tarinfo.name for tarinfo in self.getmembers()]
1797
1798     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1799         """Create a TarInfo object for either the file `name' or the file
1800            object `fileobj' (using os.fstat on its file descriptor). You can
1801            modify some of the TarInfo's attributes before you add it using
1802            addfile(). If given, `arcname' specifies an alternative name for the
1803            file in the archive.
1804         """
1805         self._check("aw")
1806
1807         # When fileobj is given, replace name by
1808         # fileobj's real name.
1809         if fileobj is not None:
1810             name = fileobj.name
1811
1812         # Building the name of the member in the archive.
1813         # Backward slashes are converted to forward slashes,
1814         # Absolute paths are turned to relative paths.
1815         if arcname is None:
1816             arcname = name
1817         arcname = normpath(arcname)
1818         drv, arcname = os.path.splitdrive(arcname)
1819         while arcname[0:1] == "/":
1820             arcname = arcname[1:]
1821
1822         # Now, fill the TarInfo object with
1823         # information specific for the file.
1824         tarinfo = self.tarinfo()
1825         tarinfo.tarfile = self
1826
1827         # Use os.stat or os.lstat, depending on platform
1828         # and if symlinks shall be resolved.
1829         if fileobj is None:
1830             if hasattr(os, "lstat") and not self.dereference:
1831                 statres = os.lstat(name)
1832             else:
1833                 statres = os.stat(name)
1834         else:
1835             statres = os.fstat(fileobj.fileno())
1836         linkname = ""
1837
1838         stmd = statres.st_mode
1839         if stat.S_ISREG(stmd):
1840             inode = (statres.st_ino, statres.st_dev)
1841             if not self.dereference and statres.st_nlink > 1 and \
1842                     inode in self.inodes and arcname != self.inodes[inode]:
1843                 # Is it a hardlink to an already
1844                 # archived file?
1845                 type = LNKTYPE
1846                 linkname = self.inodes[inode]
1847             else:
1848                 # The inode is added only if its valid.
1849                 # For win32 it is always 0.
1850                 type = REGTYPE
1851                 if inode[0]:
1852                     self.inodes[inode] = arcname
1853         elif stat.S_ISDIR(stmd):
1854             type = DIRTYPE
1855         elif stat.S_ISFIFO(stmd):
1856             type = FIFOTYPE
1857         elif stat.S_ISLNK(stmd):
1858             type = SYMTYPE
1859             linkname = os.readlink(name)
1860         elif stat.S_ISCHR(stmd):
1861             type = CHRTYPE
1862         elif stat.S_ISBLK(stmd):
1863             type = BLKTYPE
1864         else:
1865             return None
1866
1867         # Fill the TarInfo object with all
1868         # information we can get.
1869         tarinfo.name = arcname
1870         tarinfo.mode = stmd
1871         tarinfo.uid = statres.st_uid
1872         tarinfo.gid = statres.st_gid
1873         if stat.S_ISREG(stmd):
1874             tarinfo.size = statres.st_size
1875         else:
1876             tarinfo.size = 0L
1877         tarinfo.mtime = statres.st_mtime
1878         tarinfo.type = type
1879         tarinfo.linkname = linkname
1880         if pwd:
1881             try:
1882                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1883             except KeyError:
1884                 pass
1885         if grp:
1886             try:
1887                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1888             except KeyError:
1889                 pass
1890
1891         if type in (CHRTYPE, BLKTYPE):
1892             if hasattr(os, "major") and hasattr(os, "minor"):
1893                 tarinfo.devmajor = os.major(statres.st_rdev)
1894                 tarinfo.devminor = os.minor(statres.st_rdev)
1895         return tarinfo
1896
1897     def list(self, verbose=True):
1898         """Print a table of contents to sys.stdout. If `verbose' is False, only
1899            the names of the members are printed. If it is True, an `ls -l'-like
1900            output is produced.
1901         """
1902         self._check()
1903
1904         for tarinfo in self:
1905             if verbose:
1906                 print filemode(tarinfo.mode),
1907                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1908                                  tarinfo.gname or tarinfo.gid),
1909                 if tarinfo.ischr() or tarinfo.isblk():
1910                     print "%10s" % ("%d,%d" \
1911                                     % (tarinfo.devmajor, tarinfo.devminor)),
1912                 else:
1913                     print "%10d" % tarinfo.size,
1914                 print "%d-%02d-%02d %02d:%02d:%02d" \
1915                       % time.localtime(tarinfo.mtime)[:6],
1916
1917             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1918
1919             if verbose:
1920                 if tarinfo.issym():
1921                     print "->", tarinfo.linkname,
1922                 if tarinfo.islnk():
1923                     print "link to", tarinfo.linkname,
1924             print
1925
1926     def add(self, name, arcname=None, recursive=True, exclude=None):
1927         """Add the file `name' to the archive. `name' may be any type of file
1928            (directory, fifo, symbolic link, etc.). If given, `arcname'
1929            specifies an alternative name for the file in the archive.
1930            Directories are added recursively by default. This can be avoided by
1931            setting `recursive' to False. `exclude' is a function that should
1932            return True for each filename to be excluded.
1933         """
1934         self._check("aw")
1935
1936         if arcname is None:
1937             arcname = name
1938
1939         # Exclude pathnames.
1940         if exclude is not None and exclude(name):
1941             self._dbg(2, "tarfile: Excluded %r" % name)
1942             return
1943
1944         # Skip if somebody tries to archive the archive...
1945         if self.name is not None and os.path.abspath(name) == self.name:
1946             self._dbg(2, "tarfile: Skipped %r" % name)
1947             return
1948
1949         # Special case: The user wants to add the current
1950         # working directory.
1951         if name == ".":
1952             if recursive:
1953                 if arcname == ".":
1954                     arcname = ""
1955                 for f in os.listdir(name):
1956                     self.add(f, os.path.join(arcname, f), recursive, exclude)
1957             return
1958
1959         self._dbg(1, name)
1960
1961         # Create a TarInfo object from the file.
1962         tarinfo = self.gettarinfo(name, arcname)
1963
1964         if tarinfo is None:
1965             self._dbg(1, "tarfile: Unsupported type %r" % name)
1966             return
1967
1968         # Append the tar header and data to the archive.
1969         if tarinfo.isreg():
1970             f = bltn_open(name, "rb")
1971             self.addfile(tarinfo, f)
1972             f.close()
1973
1974         elif tarinfo.isdir():
1975             self.addfile(tarinfo)
1976             if recursive:
1977                 for f in os.listdir(name):
1978                     self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1979
1980         else:
1981             self.addfile(tarinfo)
1982
1983     def addfile(self, tarinfo, fileobj=None):
1984         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1985            given, tarinfo.size bytes are read from it and added to the archive.
1986            You can create TarInfo objects using gettarinfo().
1987            On Windows platforms, `fileobj' should always be opened with mode
1988            'rb' to avoid irritation about the file size.
1989         """
1990         self._check("aw")
1991
1992         tarinfo = copy.copy(tarinfo)
1993
1994         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1995         self.fileobj.write(buf)
1996         self.offset += len(buf)
1997
1998         # If there's data to follow, append it.
1999         if fileobj is not None:
2000             copyfileobj(fileobj, self.fileobj, tarinfo.size)
2001             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2002             if remainder > 0:
2003                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2004                 blocks += 1
2005             self.offset += blocks * BLOCKSIZE
2006
2007         self.members.append(tarinfo)
2008
2009     def extractall(self, path=".", members=None):
2010         """Extract all members from the archive to the current working
2011            directory and set owner, modification time and permissions on
2012            directories afterwards. `path' specifies a different directory
2013            to extract to. `members' is optional and must be a subset of the
2014            list returned by getmembers().
2015         """
2016         directories = []
2017
2018         if members is None:
2019             members = self
2020
2021         for tarinfo in members:
2022             if tarinfo.isdir():
2023                 # Extract directories with a safe mode.
2024                 directories.append(tarinfo)
2025                 tarinfo = copy.copy(tarinfo)
2026                 tarinfo.mode = 0700
2027             self.extract(tarinfo, path)
2028
2029         # Reverse sort directories.
2030         directories.sort(key=operator.attrgetter('name'))
2031         directories.reverse()
2032
2033         # Set correct owner, mtime and filemode on directories.
2034         for tarinfo in directories:
2035             dirpath = os.path.join(path, tarinfo.name)
2036             try:
2037                 self.chown(tarinfo, dirpath)
2038                 self.utime(tarinfo, dirpath)
2039                 self.chmod(tarinfo, dirpath)
2040             except ExtractError, e:
2041                 if self.errorlevel > 1:
2042                     raise
2043                 else:
2044                     self._dbg(1, "tarfile: %s" % e)
2045
2046     def extract(self, member, path=""):
2047         """Extract a member from the archive to the current working directory,
2048            using its full name. Its file information is extracted as accurately
2049            as possible. `member' may be a filename or a TarInfo object. You can
2050            specify a different directory using `path'.
2051         """
2052         self._check("r")
2053
2054         if isinstance(member, basestring):
2055             tarinfo = self.getmember(member)
2056         else:
2057             tarinfo = member
2058
2059         # Prepare the link target for makelink().
2060         if tarinfo.islnk():
2061             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2062
2063         try:
2064             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2065         except EnvironmentError, e:
2066             if self.errorlevel > 0:
2067                 raise
2068             else:
2069                 if e.filename is None:
2070                     self._dbg(1, "tarfile: %s" % e.strerror)
2071                 else:
2072                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2073         except ExtractError, e:
2074             if self.errorlevel > 1:
2075                 raise
2076             else:
2077                 self._dbg(1, "tarfile: %s" % e)
2078
2079     def extractfile(self, member):
2080         """Extract a member from the archive as a file object. `member' may be
2081            a filename or a TarInfo object. If `member' is a regular file, a
2082            file-like object is returned. If `member' is a link, a file-like
2083            object is constructed from the link's target. If `member' is none of
2084            the above, None is returned.
2085            The file-like object is read-only and provides the following
2086            methods: read(), readline(), readlines(), seek() and tell()
2087         """
2088         self._check("r")
2089
2090         if isinstance(member, basestring):
2091             tarinfo = self.getmember(member)
2092         else:
2093             tarinfo = member
2094
2095         if tarinfo.isreg():
2096             return self.fileobject(self, tarinfo)
2097
2098         elif tarinfo.type not in SUPPORTED_TYPES:
2099             # If a member's type is unknown, it is treated as a
2100             # regular file.
2101             return self.fileobject(self, tarinfo)
2102
2103         elif tarinfo.islnk() or tarinfo.issym():
2104             if isinstance(self.fileobj, _Stream):
2105                 # A small but ugly workaround for the case that someone tries
2106                 # to extract a (sym)link as a file-object from a non-seekable
2107                 # stream of tar blocks.
2108                 raise StreamError("cannot extract (sym)link as file object")
2109             else:
2110                 # A (sym)link's file object is its target's file object.
2111                 return self.extractfile(self._getmember(tarinfo.linkname,
2112                                                         tarinfo))
2113         else:
2114             # If there's no data associated with the member (directory, chrdev,
2115             # blkdev, etc.), return None instead of a file object.
2116             return None
2117
2118     def _extract_member(self, tarinfo, targetpath):
2119         """Extract the TarInfo object tarinfo to a physical
2120            file called targetpath.
2121         """
2122         # Fetch the TarInfo object for the given name
2123         # and build the destination pathname, replacing
2124         # forward slashes to platform specific separators.
2125         if targetpath[-1:] == "/":
2126             targetpath = targetpath[:-1]
2127         targetpath = os.path.normpath(targetpath)
2128
2129         # Create all upper directories.
2130         upperdirs = os.path.dirname(targetpath)
2131         if upperdirs and not os.path.exists(upperdirs):
2132             # Create directories that are not part of the archive with
2133             # default permissions.
2134             os.makedirs(upperdirs)
2135
2136         if tarinfo.islnk() or tarinfo.issym():
2137             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2138         else:
2139             self._dbg(1, tarinfo.name)
2140
2141         if tarinfo.isreg():
2142             self.makefile(tarinfo, targetpath)
2143         elif tarinfo.isdir():
2144             self.makedir(tarinfo, targetpath)
2145         elif tarinfo.isfifo():
2146             self.makefifo(tarinfo, targetpath)
2147         elif tarinfo.ischr() or tarinfo.isblk():
2148             self.makedev(tarinfo, targetpath)
2149         elif tarinfo.islnk() or tarinfo.issym():
2150             self.makelink(tarinfo, targetpath)
2151         elif tarinfo.type not in SUPPORTED_TYPES:
2152             self.makeunknown(tarinfo, targetpath)
2153         else:
2154             self.makefile(tarinfo, targetpath)
2155
2156         self.chown(tarinfo, targetpath)
2157         if not tarinfo.issym():
2158             self.chmod(tarinfo, targetpath)
2159             self.utime(tarinfo, targetpath)
2160
2161     #--------------------------------------------------------------------------
2162     # Below are the different file methods. They are called via
2163     # _extract_member() when extract() is called. They can be replaced in a
2164     # subclass to implement other functionality.
2165
2166     def makedir(self, tarinfo, targetpath):
2167         """Make a directory called targetpath.
2168         """
2169         try:
2170             # Use a safe mode for the directory, the real mode is set
2171             # later in _extract_member().
2172             os.mkdir(targetpath, 0700)
2173         except EnvironmentError, e:
2174             if e.errno != errno.EEXIST:
2175                 raise
2176
2177     def makefile(self, tarinfo, targetpath):
2178         """Make a file called targetpath.
2179         """
2180         source = self.extractfile(tarinfo)
2181         target = bltn_open(targetpath, "wb")
2182         copyfileobj(source, target)
2183         source.close()
2184         target.close()
2185
2186     def makeunknown(self, tarinfo, targetpath):
2187         """Make a file from a TarInfo object with an unknown type
2188            at targetpath.
2189         """
2190         self.makefile(tarinfo, targetpath)
2191         self._dbg(1, "tarfile: Unknown file type %r, " \
2192                      "extracted as regular file." % tarinfo.type)
2193
2194     def makefifo(self, tarinfo, targetpath):
2195         """Make a fifo called targetpath.
2196         """
2197         if hasattr(os, "mkfifo"):
2198             os.mkfifo(targetpath)
2199         else:
2200             raise ExtractError("fifo not supported by system")
2201
2202     def makedev(self, tarinfo, targetpath):
2203         """Make a character or block device called targetpath.
2204         """
2205         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2206             raise ExtractError("special devices not supported by system")
2207
2208         mode = tarinfo.mode
2209         if tarinfo.isblk():
2210             mode |= stat.S_IFBLK
2211         else:
2212             mode |= stat.S_IFCHR
2213
2214         os.mknod(targetpath, mode,
2215                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2216
2217     def makelink(self, tarinfo, targetpath):
2218         """Make a (symbolic) link called targetpath. If it cannot be created
2219           (platform limitation), we try to make a copy of the referenced file
2220           instead of a link.
2221         """
2222         linkpath = tarinfo.linkname
2223         try:
2224             if tarinfo.issym():
2225                 os.symlink(linkpath, targetpath)
2226             else:
2227                 # See extract().
2228                 os.link(tarinfo._link_target, targetpath)
2229         except AttributeError:
2230             if tarinfo.issym():
2231                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2232                                         linkpath)
2233                 linkpath = normpath(linkpath)
2234
2235             try:
2236                 self._extract_member(self.getmember(linkpath), targetpath)
2237             except (EnvironmentError, KeyError), e:
2238                 linkpath = os.path.normpath(linkpath)
2239                 try:
2240                     shutil.copy2(linkpath, targetpath)
2241                 except EnvironmentError, e:
2242                     raise IOError("link could not be created")
2243
2244     def chown(self, tarinfo, targetpath):
2245         """Set owner of targetpath according to tarinfo.
2246         """
2247         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2248             # We have to be root to do so.
2249             try:
2250                 g = grp.getgrnam(tarinfo.gname)[2]
2251             except KeyError:
2252                 try:
2253                     g = grp.getgrgid(tarinfo.gid)[2]
2254                 except KeyError:
2255                     g = os.getgid()
2256             try:
2257                 u = pwd.getpwnam(tarinfo.uname)[2]
2258             except KeyError:
2259                 try:
2260                     u = pwd.getpwuid(tarinfo.uid)[2]
2261                 except KeyError:
2262                     u = os.getuid()
2263             try:
2264                 if tarinfo.issym() and hasattr(os, "lchown"):
2265                     os.lchown(targetpath, u, g)
2266                 else:
2267                     if sys.platform != "os2emx":
2268                         os.chown(targetpath, u, g)
2269             except EnvironmentError, e:
2270                 raise ExtractError("could not change owner")
2271
2272     def chmod(self, tarinfo, targetpath):
2273         """Set file permissions of targetpath according to tarinfo.
2274         """
2275         if hasattr(os, 'chmod'):
2276             try:
2277                 os.chmod(targetpath, tarinfo.mode)
2278             except EnvironmentError, e:
2279                 raise ExtractError("could not change mode")
2280
2281     def utime(self, tarinfo, targetpath):
2282         """Set modification time of targetpath according to tarinfo.
2283         """
2284         if not hasattr(os, 'utime'):
2285             return
2286         try:
2287             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2288         except EnvironmentError, e:
2289             raise ExtractError("could not change modification time")
2290
2291     #--------------------------------------------------------------------------
2292     def next(self):
2293         """Return the next member of the archive as a TarInfo object, when
2294            TarFile is opened for reading. Return None if there is no more
2295            available.
2296         """
2297         self._check("ra")
2298         if self.firstmember is not None:
2299             m = self.firstmember
2300             self.firstmember = None
2301             return m
2302
2303         # Read the next block.
2304         self.fileobj.seek(self.offset)
2305         while True:
2306             try:
2307                 tarinfo = self.tarinfo.fromtarfile(self)
2308                 if tarinfo is None:
2309                     return
2310                 self.members.append(tarinfo)
2311
2312             except HeaderError, e:
2313                 if self.ignore_zeros:
2314                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2315                     self.offset += BLOCKSIZE
2316                     continue
2317                 else:
2318                     if self.offset == 0:
2319                         raise ReadError(str(e))
2320                     return None
2321             break
2322
2323         return tarinfo
2324
2325     #--------------------------------------------------------------------------
2326     # Little helper methods:
2327
2328     def _getmember(self, name, tarinfo=None):
2329         """Find an archive member by name from bottom to top.
2330            If tarinfo is given, it is used as the starting point.
2331         """
2332         # Ensure that all members have been loaded.
2333         members = self.getmembers()
2334
2335         if tarinfo is None:
2336             end = len(members)
2337         else:
2338             end = members.index(tarinfo)
2339
2340         for i in xrange(end - 1, -1, -1):
2341             if name == members[i].name:
2342                 return members[i]
2343
2344     def _load(self):
2345         """Read through the entire archive file and look for readable
2346            members.
2347         """
2348         while True:
2349             tarinfo = self.next()
2350             if tarinfo is None:
2351                 break
2352         self._loaded = True
2353
2354     def _check(self, mode=None):
2355         """Check if TarFile is still open, and if the operation's mode
2356            corresponds to TarFile's mode.
2357         """
2358         if self.closed:
2359             raise IOError("%s is closed" % self.__class__.__name__)
2360         if mode is not None and self.mode not in mode:
2361             raise IOError("bad operation for mode %r" % self.mode)
2362
2363     def __iter__(self):
2364         """Provide an iterator object.
2365         """
2366         if self._loaded:
2367             return iter(self.members)
2368         else:
2369             return TarIter(self)
2370
2371     def _dbg(self, level, msg):
2372         """Write debugging output to sys.stderr.
2373         """
2374         if level <= self.debug:
2375             print >> sys.stderr, msg
2376 # class TarFile
2377
2378 class TarIter:
2379     """Iterator Class.
2380
2381        for tarinfo in TarFile(...):
2382            suite...
2383     """
2384
2385     def __init__(self, tarfile):
2386         """Construct a TarIter object.
2387         """
2388         self.tarfile = tarfile
2389         self.index = 0
2390     def __iter__(self):
2391         """Return iterator object.
2392         """
2393         return self
2394     def next(self):
2395         """Return the next item using TarFile's next() method.
2396            When all members have been read, set TarFile as _loaded.
2397         """
2398         # Fix for SF #1100429: Under rare circumstances it can
2399         # happen that getmembers() is called during iteration,
2400         # which will cause TarIter to stop prematurely.
2401         if not self.tarfile._loaded:
2402             tarinfo = self.tarfile.next()
2403             if not tarinfo:
2404                 self.tarfile._loaded = True
2405                 raise StopIteration
2406         else:
2407             try:
2408                 tarinfo = self.tarfile.members[self.index]
2409             except IndexError:
2410                 raise StopIteration
2411         self.index += 1
2412         return tarinfo
2413
2414 # Helper classes for sparse file support
2415 class _section:
2416     """Base class for _data and _hole.
2417     """
2418     def __init__(self, offset, size):
2419         self.offset = offset
2420         self.size = size
2421     def __contains__(self, offset):
2422         return self.offset <= offset < self.offset + self.size
2423
2424 class _data(_section):
2425     """Represent a data section in a sparse file.
2426     """
2427     def __init__(self, offset, size, realpos):
2428         _section.__init__(self, offset, size)
2429         self.realpos = realpos
2430
2431 class _hole(_section):
2432     """Represent a hole section in a sparse file.
2433     """
2434     pass
2435
2436 class _ringbuffer(list):
2437     """Ringbuffer class which increases performance
2438        over a regular list.
2439     """
2440     def __init__(self):
2441         self.idx = 0
2442     def find(self, offset):
2443         idx = self.idx
2444         while True:
2445             item = self[idx]
2446             if offset in item:
2447                 break
2448             idx += 1
2449             if idx == len(self):
2450                 idx = 0
2451             if idx == self.idx:
2452                 # End of File
2453                 return None
2454         self.idx = idx
2455         return item
2456
2457 #---------------------------------------------
2458 # zipfile compatible TarFile class
2459 #---------------------------------------------
2460 TAR_PLAIN = 0           # zipfile.ZIP_STORED
2461 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2462 class TarFileCompat:
2463     """TarFile class compatible with standard module zipfile's
2464        ZipFile class.
2465     """
2466     def __init__(self, file, mode="r", compression=TAR_PLAIN):
2467         from warnings import warnpy3k
2468         warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2469                 stacklevel=2)
2470         if compression == TAR_PLAIN:
2471             self.tarfile = TarFile.taropen(file, mode)
2472         elif compression == TAR_GZIPPED:
2473             self.tarfile = TarFile.gzopen(file, mode)
2474         else:
2475             raise ValueError("unknown compression constant")
2476         if mode[0:1] == "r":
2477             members = self.tarfile.getmembers()
2478             for m in members:
2479                 m.filename = m.name
2480                 m.file_size = m.size
2481                 m.date_time = time.gmtime(m.mtime)[:6]
2482     def namelist(self):
2483         return map(lambda m: m.name, self.infolist())
2484     def infolist(self):
2485         return filter(lambda m: m.type in REGULAR_TYPES,
2486                       self.tarfile.getmembers())
2487     def printdir(self):
2488         self.tarfile.list()
2489     def testzip(self):
2490         return
2491     def getinfo(self, name):
2492         return self.tarfile.getmember(name)
2493     def read(self, name):
2494         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2495     def write(self, filename, arcname=None, compress_type=None):
2496         self.tarfile.add(filename, arcname)
2497     def writestr(self, zinfo, bytes):
2498         try:
2499             from cStringIO import StringIO
2500         except ImportError:
2501             from StringIO import StringIO
2502         import calendar
2503         tinfo = TarInfo(zinfo.filename)
2504         tinfo.size = len(bytes)
2505         tinfo.mtime = calendar.timegm(zinfo.date_time)
2506         self.tarfile.addfile(tinfo, StringIO(bytes))
2507     def close(self):
2508         self.tarfile.close()
2509 #class TarFileCompat
2510
2511 #--------------------
2512 # exported functions
2513 #--------------------
2514 def is_tarfile(name):
2515     """Return True if name points to a tar archive that we
2516        are able to handle, else return False.
2517     """
2518     try:
2519         t = open(name)
2520         t.close()
2521         return True
2522     except TarError:
2523         return False
2524
2525 bltn_open = open
2526 open = TarFile.open