Lib/tarfile.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 #-------------------------------------------------------------------
   4 # tarfile.py
   5 #-------------------------------------------------------------------
   6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7 # All rights reserved.
   8 #
   9 # Permission  is  hereby granted,  free  of charge,  to  any person
  10 # obtaining a  copy of  this software  and associated documentation
  11 # files  (the  "Software"),  to   deal  in  the  Software   without
  12 # restriction,  including  without limitation  the  rights to  use,
  13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 # copies  of  the  Software,  and to  permit  persons  to  whom the
  15 # Software  is  furnished  to  do  so,  subject  to  the  following
  16 # conditions:
  17 #
  18 # The above copyright  notice and this  permission notice shall  be
  19 # included in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 # OTHER DEALINGS IN THE SOFTWARE.
  29 #
  30 """Read from and write to tar format archives.
  31 """
  32
  33 __version__ = "$Revision$"
  34 # $Source$
  35
  36 version     = "0.9.0"
  37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38 __date__    = "$Date$"
  39 __cvsid__   = "$Id$"
  40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42 #---------
  43 # Imports
  44 #---------
  45 import sys
  46 import os
  47 import shutil
  48 import stat
  49 import errno
  50 import time
  51 import struct
  52 import copy
  53 import re
  54 import operator
  55
  56 if sys.platform == 'mac':
  57     # This module needs work for MacOS9, especially in the area of pathname
  58     # handling. In many places it is assumed a simple substitution of / by the
  59     # local os.path.sep is good enough to convert pathnames, but this does not
  60     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  61     raise ImportError, "tarfile does not work for platform==mac"
  62
  63 try:
  64     import grp, pwd
  65 except ImportError:
  66     grp = pwd = None
  67
  68 # from tarfile import *
  69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  70
  71 #---------------------------------------------------------
  72 # tar constants
  73 #---------------------------------------------------------
  74 NUL = "\0"                      # the null character
  75 BLOCKSIZE = 512                 # length of processing blocks
  76 RECORDSIZE = BLOCKSIZE * 20     # length of records
  77 GNU_MAGIC = "ustar  \0"         # magic gnu tar string
  78 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
  79
  80 LENGTH_NAME = 100               # maximum length of a filename
  81 LENGTH_LINK = 100               # maximum length of a linkname
  82 LENGTH_PREFIX = 155             # maximum length of the prefix field
  83
  84 REGTYPE = "0"                   # regular file
  85 AREGTYPE = "\0"                 # regular file
  86 LNKTYPE = "1"                   # link (inside tarfile)
  87 SYMTYPE = "2"                   # symbolic link
  88 CHRTYPE = "3"                   # character special device
  89 BLKTYPE = "4"                   # block special device
  90 DIRTYPE = "5"                   # directory
  91 FIFOTYPE = "6"                  # fifo special device
  92 CONTTYPE = "7"                  # contiguous file
  93
  94 GNUTYPE_LONGNAME = "L"          # GNU tar longname
  95 GNUTYPE_LONGLINK = "K"          # GNU tar longlink
  96 GNUTYPE_SPARSE = "S"            # GNU tar sparse file
  97
  98 XHDTYPE = "x"                   # POSIX.1-2001 extended header
  99 XGLTYPE = "g"                   # POSIX.1-2001 global header
 100 SOLARIS_XHDTYPE = "X"           # Solaris extended header
 101
 102 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 103 GNU_FORMAT = 1                  # GNU tar format
 104 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 105 DEFAULT_FORMAT = GNU_FORMAT
 106
 107 #---------------------------------------------------------
 108 # tarfile constants
 109 #---------------------------------------------------------
 110 # File types that tarfile supports:
 111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 112                    SYMTYPE, DIRTYPE, FIFOTYPE,
 113                    CONTTYPE, CHRTYPE, BLKTYPE,
 114                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 115                    GNUTYPE_SPARSE)
 116
 117 # File types that will be treated as a regular file.
 118 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 119                  CONTTYPE, GNUTYPE_SPARSE)
 120
 121 # File types that are part of the GNU tar format.
 122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 123              GNUTYPE_SPARSE)
 124
 125 # Fields from a pax header that override a TarInfo attribute.
 126 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 127               "uid", "gid", "uname", "gname")
 128
 129 # Fields in a pax header that are numbers, all other fields
 130 # are treated as strings.
 131 PAX_NUMBER_FIELDS = {
 132     "atime": float,
 133     "ctime": float,
 134     "mtime": float,
 135     "uid": int,
 136     "gid": int,
 137     "size": int
 138 }
 139
 140 #---------------------------------------------------------
 141 # Bits used in the mode field, values in octal.
 142 #---------------------------------------------------------
 143 S_IFLNK = 0120000        # symbolic link
 144 S_IFREG = 0100000        # regular file
 145 S_IFBLK = 0060000        # block device
 146 S_IFDIR = 0040000        # directory
 147 S_IFCHR = 0020000        # character device
 148 S_IFIFO = 0010000        # fifo
 149
 150 TSUID   = 04000          # set UID on execution
 151 TSGID   = 02000          # set GID on execution
 152 TSVTX   = 01000          # reserved
 153
 154 TUREAD  = 0400           # read by owner
 155 TUWRITE = 0200           # write by owner
 156 TUEXEC  = 0100           # execute/search by owner
 157 TGREAD  = 0040           # read by group
 158 TGWRITE = 0020           # write by group
 159 TGEXEC  = 0010           # execute/search by group
 160 TOREAD  = 0004           # read by other
 161 TOWRITE = 0002           # write by other
 162 TOEXEC  = 0001           # execute/search by other
 163
 164 #---------------------------------------------------------
 165 # initialization
 166 #---------------------------------------------------------
 167 ENCODING = sys.getfilesystemencoding()
 168 if ENCODING is None:
 169     ENCODING = sys.getdefaultencoding()
 170
 171 #---------------------------------------------------------
 172 # Some useful functions
 173 #---------------------------------------------------------
 174
 175 def stn(s, length):
 176     """Convert a python string to a null-terminated string buffer.
 177     """
 178     return s[:length] + (length - len(s)) * NUL
 179
 180 def nts(s):
 181     """Convert a null-terminated string field to a python string.
 182     """
 183     # Use the string up to the first null char.
 184     p = s.find("\0")
 185     if p == -1:
 186         return s
 187     return s[:p]
 188
 189 def nti(s):
 190     """Convert a number field to a python number.
 191     """
 192     # There are two possible encodings for a number field, see
 193     # itn() below.
 194     if s[0] != chr(0200):
 195         try:
 196             n = int(nts(s) or "0", 8)
 197         except ValueError:
 198             raise HeaderError("invalid header")
 199     else:
 200         n = 0L
 201         for i in xrange(len(s) - 1):
 202             n <<= 8
 203             n += ord(s[i + 1])
 204     return n
 205
 206 def itn(n, digits=8, format=DEFAULT_FORMAT):
 207     """Convert a python number to a number field.
 208     """
 209     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 210     # octal digits followed by a null-byte, this allows values up to
 211     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 212     # that if necessary. A leading 0200 byte indicates this particular
 213     # encoding, the following digits-1 bytes are a big-endian
 214     # representation. This allows values up to (256**(digits-1))-1.
 215     if 0 <= n < 8 ** (digits - 1):
 216         s = "%0*o" % (digits - 1, n) + NUL
 217     else:
 218         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 219             raise ValueError("overflow in number field")
 220
 221         if n < 0:
 222             # XXX We mimic GNU tar's behaviour with negative numbers,
 223             # this could raise OverflowError.
 224             n = struct.unpack("L", struct.pack("l", n))[0]
 225
 226         s = ""
 227         for i in xrange(digits - 1):
 228             s = chr(n & 0377) + s
 229             n >>= 8
 230         s = chr(0200) + s
 231     return s
 232
 233 def uts(s, encoding, errors):
 234     """Convert a unicode object to a string.
 235     """
 236     if errors == "utf-8":
 237         # An extra error handler similar to the -o invalid=UTF-8 option
 238         # in POSIX.1-2001. Replace untranslatable characters with their
 239         # UTF-8 representation.
 240         try:
 241             return s.encode(encoding, "strict")
 242         except UnicodeEncodeError:
 243             x = []
 244             for c in s:
 245                 try:
 246                     x.append(c.encode(encoding, "strict"))
 247                 except UnicodeEncodeError:
 248                     x.append(c.encode("utf8"))
 249             return "".join(x)
 250     else:
 251         return s.encode(encoding, errors)
 252
 253 def calc_chksums(buf):
 254     """Calculate the checksum for a member's header by summing up all
 255        characters except for the chksum field which is treated as if
 256        it was filled with spaces. According to the GNU tar sources,
 257        some tars (Sun and NeXT) calculate chksum with signed char,
 258        which will be different if there are chars in the buffer with
 259        the high bit set. So we calculate two checksums, unsigned and
 260        signed.
 261     """
 262     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 263     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 264     return unsigned_chksum, signed_chksum
 265
 266 def copyfileobj(src, dst, length=None):
 267     """Copy length bytes from fileobj src to fileobj dst.
 268        If length is None, copy the entire content.
 269     """
 270     if length == 0:
 271         return
 272     if length is None:
 273         shutil.copyfileobj(src, dst)
 274         return
 275
 276     BUFSIZE = 16 * 1024
 277     blocks, remainder = divmod(length, BUFSIZE)
 278     for b in xrange(blocks):
 279         buf = src.read(BUFSIZE)
 280         if len(buf) < BUFSIZE:
 281             raise IOError("end of file reached")
 282         dst.write(buf)
 283
 284     if remainder != 0:
 285         buf = src.read(remainder)
 286         if len(buf) < remainder:
 287             raise IOError("end of file reached")
 288         dst.write(buf)
 289     return
 290
 291 filemode_table = (
 292     ((S_IFLNK,      "l"),
 293      (S_IFREG,      "-"),
 294      (S_IFBLK,      "b"),
 295      (S_IFDIR,      "d"),
 296      (S_IFCHR,      "c"),
 297      (S_IFIFO,      "p")),
 298
 299     ((TUREAD,       "r"),),
 300     ((TUWRITE,      "w"),),
 301     ((TUEXEC|TSUID, "s"),
 302      (TSUID,        "S"),
 303      (TUEXEC,       "x")),
 304
 305     ((TGREAD,       "r"),),
 306     ((TGWRITE,      "w"),),
 307     ((TGEXEC|TSGID, "s"),
 308      (TSGID,        "S"),
 309      (TGEXEC,       "x")),
 310
 311     ((TOREAD,       "r"),),
 312     ((TOWRITE,      "w"),),
 313     ((TOEXEC|TSVTX, "t"),
 314      (TSVTX,        "T"),
 315      (TOEXEC,       "x"))
 316 )
 317
 318 def filemode(mode):
 319     """Convert a file's mode to a string of the form
 320        -rwxrwxrwx.
 321        Used by TarFile.list()
 322     """
 323     perm = []
 324     for table in filemode_table:
 325         for bit, char in table:
 326             if mode & bit == bit:
 327                 perm.append(char)
 328                 break
 329         else:
 330             perm.append("-")
 331     return "".join(perm)
 332
 333 if os.sep != "/":
 334     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 335 else:
 336     normpath = os.path.normpath
 337
 338 class TarError(Exception):
 339     """Base exception."""
 340     pass
 341 class ExtractError(TarError):
 342     """General exception for extract errors."""
 343     pass
 344 class ReadError(TarError):
 345     """Exception for unreadble tar archives."""
 346     pass
 347 class CompressionError(TarError):
 348     """Exception for unavailable compression methods."""
 349     pass
 350 class StreamError(TarError):
 351     """Exception for unsupported operations on stream-like TarFiles."""
 352     pass
 353 class HeaderError(TarError):
 354     """Exception for invalid headers."""
 355     pass
 356
 357 #---------------------------
 358 # internal stream interface
 359 #---------------------------
 360 class _LowLevelFile:
 361     """Low-level file object. Supports reading and writing.
 362        It is used instead of a regular file object for streaming
 363        access.
 364     """
 365
 366     def __init__(self, name, mode):
 367         mode = {
 368             "r": os.O_RDONLY,
 369             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 370         }[mode]
 371         if hasattr(os, "O_BINARY"):
 372             mode |= os.O_BINARY
 373         self.fd = os.open(name, mode)
 374
 375     def close(self):
 376         os.close(self.fd)
 377
 378     def read(self, size):
 379         return os.read(self.fd, size)
 380
 381     def write(self, s):
 382         os.write(self.fd, s)
 383
 384 class _Stream:
 385     """Class that serves as an adapter between TarFile and
 386        a stream-like object.  The stream-like object only
 387        needs to have a read() or write() method and is accessed
 388        blockwise.  Use of gzip or bzip2 compression is possible.
 389        A stream-like object could be for example: sys.stdin,
 390        sys.stdout, a socket, a tape device etc.
 391
 392        _Stream is intended to be used only internally.
 393     """
 394
 395     def __init__(self, name, mode, comptype, fileobj, bufsize):
 396         """Construct a _Stream object.
 397         """
 398         self._extfileobj = True
 399         if fileobj is None:
 400             fileobj = _LowLevelFile(name, mode)
 401             self._extfileobj = False
 402
 403         if comptype == '*':
 404             # Enable transparent compression detection for the
 405             # stream interface
 406             fileobj = _StreamProxy(fileobj)
 407             comptype = fileobj.getcomptype()
 408
 409         self.name     = name or ""
 410         self.mode     = mode
 411         self.comptype = comptype
 412         self.fileobj  = fileobj
 413         self.bufsize  = bufsize
 414         self.buf      = ""
 415         self.pos      = 0L
 416         self.closed   = False
 417
 418         if comptype == "gz":
 419             try:
 420                 import zlib
 421             except ImportError:
 422                 raise CompressionError("zlib module is not available")
 423             self.zlib = zlib
 424             self.crc = zlib.crc32("") & 0xffffffffL
 425             if mode == "r":
 426                 self._init_read_gz()
 427             else:
 428                 self._init_write_gz()
 429
 430         if comptype == "bz2":
 431             try:
 432                 import bz2
 433             except ImportError:
 434                 raise CompressionError("bz2 module is not available")
 435             if mode == "r":
 436                 self.dbuf = ""
 437                 self.cmp = bz2.BZ2Decompressor()
 438             else:
 439                 self.cmp = bz2.BZ2Compressor()
 440
 441     def __del__(self):
 442         if hasattr(self, "closed") and not self.closed:
 443             self.close()
 444
 445     def _init_write_gz(self):
 446         """Initialize for writing with gzip compression.
 447         """
 448         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 449                                             -self.zlib.MAX_WBITS,
 450                                             self.zlib.DEF_MEM_LEVEL,
 451                                             0)
 452         timestamp = struct.pack("<L", long(time.time()))
 453         self.__write("\037\213\010\010%s\002\377" % timestamp)
 454         if self.name.endswith(".gz"):
 455             self.name = self.name[:-3]
 456         self.__write(self.name + NUL)
 457
 458     def write(self, s):
 459         """Write string s to the stream.
 460         """
 461         if self.comptype == "gz":
 462             self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
 463         self.pos += len(s)
 464         if self.comptype != "tar":
 465             s = self.cmp.compress(s)
 466         self.__write(s)
 467
 468     def __write(self, s):
 469         """Write string s to the stream if a whole new block
 470            is ready to be written.
 471         """
 472         self.buf += s
 473         while len(self.buf) > self.bufsize:
 474             self.fileobj.write(self.buf[:self.bufsize])
 475             self.buf = self.buf[self.bufsize:]
 476
 477     def close(self):
 478         """Close the _Stream object. No operation should be
 479            done on it afterwards.
 480         """
 481         if self.closed:
 482             return
 483
 484         if self.mode == "w" and self.comptype != "tar":
 485             self.buf += self.cmp.flush()
 486
 487         if self.mode == "w" and self.buf:
 488             self.fileobj.write(self.buf)
 489             self.buf = ""
 490             if self.comptype == "gz":
 491                 # The native zlib crc is an unsigned 32-bit integer, but
 492                 # the Python wrapper implicitly casts that to a signed C
 493                 # long.  So, on a 32-bit box self.crc may "look negative",
 494                 # while the same crc on a 64-bit box may "look positive".
 495                 # To avoid irksome warnings from the `struct` module, force
 496                 # it to look positive on all boxes.
 497                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
 498                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 499
 500         if not self._extfileobj:
 501             self.fileobj.close()
 502
 503         self.closed = True
 504
 505     def _init_read_gz(self):
 506         """Initialize for reading a gzip compressed fileobj.
 507         """
 508         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 509         self.dbuf = ""
 510
 511         # taken from gzip.GzipFile with some alterations
 512         if self.__read(2) != "\037\213":
 513             raise ReadError("not a gzip file")
 514         if self.__read(1) != "\010":
 515             raise CompressionError("unsupported compression method")
 516
 517         flag = ord(self.__read(1))
 518         self.__read(6)
 519
 520         if flag & 4:
 521             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 522             self.read(xlen)
 523         if flag & 8:
 524             while True:
 525                 s = self.__read(1)
 526                 if not s or s == NUL:
 527                     break
 528         if flag & 16:
 529             while True:
 530                 s = self.__read(1)
 531                 if not s or s == NUL:
 532                     break
 533         if flag & 2:
 534             self.__read(2)
 535
 536     def tell(self):
 537         """Return the stream's file pointer position.
 538         """
 539         return self.pos
 540
 541     def seek(self, pos=0):
 542         """Set the stream's file pointer to pos. Negative seeking
 543            is forbidden.
 544         """
 545         if pos - self.pos >= 0:
 546             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 547             for i in xrange(blocks):
 548                 self.read(self.bufsize)
 549             self.read(remainder)
 550         else:
 551             raise StreamError("seeking backwards is not allowed")
 552         return self.pos
 553
 554     def read(self, size=None):
 555         """Return the next size number of bytes from the stream.
 556            If size is not defined, return all bytes of the stream
 557            up to EOF.
 558         """
 559         if size is None:
 560             t = []
 561             while True:
 562                 buf = self._read(self.bufsize)
 563                 if not buf:
 564                     break
 565                 t.append(buf)
 566             buf = "".join(t)
 567         else:
 568             buf = self._read(size)
 569         self.pos += len(buf)
 570         return buf
 571
 572     def _read(self, size):
 573         """Return size bytes from the stream.
 574         """
 575         if self.comptype == "tar":
 576             return self.__read(size)
 577
 578         c = len(self.dbuf)
 579         t = [self.dbuf]
 580         while c < size:
 581             buf = self.__read(self.bufsize)
 582             if not buf:
 583                 break
 584             try:
 585                 buf = self.cmp.decompress(buf)
 586             except IOError:
 587                 raise ReadError("invalid compressed data")
 588             t.append(buf)
 589             c += len(buf)
 590         t = "".join(t)
 591         self.dbuf = t[size:]
 592         return t[:size]
 593
 594     def __read(self, size):
 595         """Return size bytes from stream. If internal buffer is empty,
 596            read another block from the stream.
 597         """
 598         c = len(self.buf)
 599         t = [self.buf]
 600         while c < size:
 601             buf = self.fileobj.read(self.bufsize)
 602             if not buf:
 603                 break
 604             t.append(buf)
 605             c += len(buf)
 606         t = "".join(t)
 607         self.buf = t[size:]
 608         return t[:size]
 609 # class _Stream
 610
 611 class _StreamProxy(object):
 612     """Small proxy class that enables transparent compression
 613        detection for the Stream interface (mode 'r|*').
 614     """
 615
 616     def __init__(self, fileobj):
 617         self.fileobj = fileobj
 618         self.buf = self.fileobj.read(BLOCKSIZE)
 619
 620     def read(self, size):
 621         self.read = self.fileobj.read
 622         return self.buf
 623
 624     def getcomptype(self):
 625         if self.buf.startswith("\037\213\010"):
 626             return "gz"
 627         if self.buf.startswith("BZh91"):
 628             return "bz2"
 629         return "tar"
 630
 631     def close(self):
 632         self.fileobj.close()
 633 # class StreamProxy
 634
 635 class _BZ2Proxy(object):
 636     """Small proxy class that enables external file object
 637        support for "r:bz2" and "w:bz2" modes. This is actually
 638        a workaround for a limitation in bz2 module's BZ2File
 639        class which (unlike gzip.GzipFile) has no support for
 640        a file object argument.
 641     """
 642
 643     blocksize = 16 * 1024
 644
 645     def __init__(self, fileobj, mode):
 646         self.fileobj = fileobj
 647         self.mode = mode
 648         self.name = getattr(self.fileobj, "name", None)
 649         self.init()
 650
 651     def init(self):
 652         import bz2
 653         self.pos = 0
 654         if self.mode == "r":
 655             self.bz2obj = bz2.BZ2Decompressor()
 656             self.fileobj.seek(0)
 657             self.buf = ""
 658         else:
 659             self.bz2obj = bz2.BZ2Compressor()
 660
 661     def read(self, size):
 662         b = [self.buf]
 663         x = len(self.buf)
 664         while x < size:
 665             raw = self.fileobj.read(self.blocksize)
 666             if not raw:
 667                 break
 668             data = self.bz2obj.decompress(raw)
 669             b.append(data)
 670             x += len(data)
 671         self.buf = "".join(b)
 672
 673         buf = self.buf[:size]
 674         self.buf = self.buf[size:]
 675         self.pos += len(buf)
 676         return buf
 677
 678     def seek(self, pos):
 679         if pos < self.pos:
 680             self.init()
 681         self.read(pos - self.pos)
 682
 683     def tell(self):
 684         return self.pos
 685
 686     def write(self, data):
 687         self.pos += len(data)
 688         raw = self.bz2obj.compress(data)
 689         self.fileobj.write(raw)
 690
 691     def close(self):
 692         if self.mode == "w":
 693             raw = self.bz2obj.flush()
 694             self.fileobj.write(raw)
 695 # class _BZ2Proxy
 696
 697 #------------------------
 698 # Extraction file object
 699 #------------------------
 700 class _FileInFile(object):
 701     """A thin wrapper around an existing file object that
 702        provides a part of its data as an individual file
 703        object.
 704     """
 705
 706     def __init__(self, fileobj, offset, size, sparse=None):
 707         self.fileobj = fileobj
 708         self.offset = offset
 709         self.size = size
 710         self.sparse = sparse
 711         self.position = 0
 712
 713     def tell(self):
 714         """Return the current file position.
 715         """
 716         return self.position
 717
 718     def seek(self, position):
 719         """Seek to a position in the file.
 720         """
 721         self.position = position
 722
 723     def read(self, size=None):
 724         """Read data from the file.
 725         """
 726         if size is None:
 727             size = self.size - self.position
 728         else:
 729             size = min(size, self.size - self.position)
 730
 731         if self.sparse is None:
 732             return self.readnormal(size)
 733         else:
 734             return self.readsparse(size)
 735
 736     def readnormal(self, size):
 737         """Read operation for regular files.
 738         """
 739         self.fileobj.seek(self.offset + self.position)
 740         self.position += size
 741         return self.fileobj.read(size)
 742
 743     def readsparse(self, size):
 744         """Read operation for sparse files.
 745         """
 746         data = []
 747         while size > 0:
 748             buf = self.readsparsesection(size)
 749             if not buf:
 750                 break
 751             size -= len(buf)
 752             data.append(buf)
 753         return "".join(data)
 754
 755     def readsparsesection(self, size):
 756         """Read a single section of a sparse file.
 757         """
 758         section = self.sparse.find(self.position)
 759
 760         if section is None:
 761             return ""
 762
 763         size = min(size, section.offset + section.size - self.position)
 764
 765         if isinstance(section, _data):
 766             realpos = section.realpos + self.position - section.offset
 767             self.fileobj.seek(self.offset + realpos)
 768             self.position += size
 769             return self.fileobj.read(size)
 770         else:
 771             self.position += size
 772             return NUL * size
 773 #class _FileInFile
 774
 775
 776 class ExFileObject(object):
 777     """File-like object for reading an archive member.
 778        Is returned by TarFile.extractfile().
 779     """
 780     blocksize = 1024
 781
 782     def __init__(self, tarfile, tarinfo):
 783         self.fileobj = _FileInFile(tarfile.fileobj,
 784                                    tarinfo.offset_data,
 785                                    tarinfo.size,
 786                                    getattr(tarinfo, "sparse", None))
 787         self.name = tarinfo.name
 788         self.mode = "r"
 789         self.closed = False
 790         self.size = tarinfo.size
 791
 792         self.position = 0
 793         self.buffer = ""
 794
 795     def read(self, size=None):
 796         """Read at most size bytes from the file. If size is not
 797            present or None, read all data until EOF is reached.
 798         """
 799         if self.closed:
 800             raise ValueError("I/O operation on closed file")
 801
 802         buf = ""
 803         if self.buffer:
 804             if size is None:
 805                 buf = self.buffer
 806                 self.buffer = ""
 807             else:
 808                 buf = self.buffer[:size]
 809                 self.buffer = self.buffer[size:]
 810
 811         if size is None:
 812             buf += self.fileobj.read()
 813         else:
 814             buf += self.fileobj.read(size - len(buf))
 815
 816         self.position += len(buf)
 817         return buf
 818
 819     def readline(self, size=-1):
 820         """Read one entire line from the file. If size is present
 821            and non-negative, return a string with at most that
 822            size, which may be an incomplete line.
 823         """
 824         if self.closed:
 825             raise ValueError("I/O operation on closed file")
 826
 827         if "\n" in self.buffer:
 828             pos = self.buffer.find("\n") + 1
 829         else:
 830             buffers = [self.buffer]
 831             while True:
 832                 buf = self.fileobj.read(self.blocksize)
 833                 buffers.append(buf)
 834                 if not buf or "\n" in buf:
 835                     self.buffer = "".join(buffers)
 836                     pos = self.buffer.find("\n") + 1
 837                     if pos == 0:
 838                         # no newline found.
 839                         pos = len(self.buffer)
 840                     break
 841
 842         if size != -1:
 843             pos = min(size, pos)
 844
 845         buf = self.buffer[:pos]
 846         self.buffer = self.buffer[pos:]
 847         self.position += len(buf)
 848         return buf
 849
 850     def readlines(self):
 851         """Return a list with all remaining lines.
 852         """
 853         result = []
 854         while True:
 855             line = self.readline()
 856             if not line: break
 857             result.append(line)
 858         return result
 859
 860     def tell(self):
 861         """Return the current file position.
 862         """
 863         if self.closed:
 864             raise ValueError("I/O operation on closed file")
 865
 866         return self.position
 867
 868     def seek(self, pos, whence=os.SEEK_SET):
 869         """Seek to a position in the file.
 870         """
 871         if self.closed:
 872             raise ValueError("I/O operation on closed file")
 873
 874         if whence == os.SEEK_SET:
 875             self.position = min(max(pos, 0), self.size)
 876         elif whence == os.SEEK_CUR:
 877             if pos < 0:
 878                 self.position = max(self.position + pos, 0)
 879             else:
 880                 self.position = min(self.position + pos, self.size)
 881         elif whence == os.SEEK_END:
 882             self.position = max(min(self.size + pos, self.size), 0)
 883         else:
 884             raise ValueError("Invalid argument")
 885
 886         self.buffer = ""
 887         self.fileobj.seek(self.position)
 888
 889     def close(self):
 890         """Close the file object.
 891         """
 892         self.closed = True
 893
 894     def __iter__(self):
 895         """Get an iterator over the file's lines.
 896         """
 897         while True:
 898             line = self.readline()
 899             if not line:
 900                 break
 901             yield line
 902 #class ExFileObject
 903
 904 #------------------
 905 # Exported Classes
 906 #------------------
 907 class TarInfo(object):
 908     """Informational class which holds the details about an
 909        archive member given by a tar header block.
 910        TarInfo objects are returned by TarFile.getmember(),
 911        TarFile.getmembers() and TarFile.gettarinfo() and are
 912        usually created internally.
 913     """
 914
 915     def __init__(self, name=""):
 916         """Construct a TarInfo object. name is the optional name
 917            of the member.
 918         """
 919         self.name = name        # member name
 920         self.mode = 0644        # file permissions
 921         self.uid = 0            # user id
 922         self.gid = 0            # group id
 923         self.size = 0           # file size
 924         self.mtime = 0          # modification time
 925         self.chksum = 0         # header checksum
 926         self.type = REGTYPE     # member type
 927         self.linkname = ""      # link name
 928         self.uname = "root"     # user name
 929         self.gname = "root"     # group name
 930         self.devmajor = 0       # device major number
 931         self.devminor = 0       # device minor number
 932
 933         self.offset = 0         # the tar header starts here
 934         self.offset_data = 0    # the file's data starts here
 935
 936         self.pax_headers = {}   # pax header information
 937
 938     # In pax headers the "name" and "linkname" field are called
 939     # "path" and "linkpath".
 940     def _getpath(self):
 941         return self.name
 942     def _setpath(self, name):
 943         self.name = name
 944     path = property(_getpath, _setpath)
 945
 946     def _getlinkpath(self):
 947         return self.linkname
 948     def _setlinkpath(self, linkname):
 949         self.linkname = linkname
 950     linkpath = property(_getlinkpath, _setlinkpath)
 951
 952     def __repr__(self):
 953         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 954
 955     def get_info(self, encoding, errors):
 956         """Return the TarInfo's attributes as a dictionary.
 957         """
 958         info = {
 959             "name":     normpath(self.name),
 960             "mode":     self.mode & 07777,
 961             "uid":      self.uid,
 962             "gid":      self.gid,
 963             "size":     self.size,
 964             "mtime":    self.mtime,
 965             "chksum":   self.chksum,
 966             "type":     self.type,
 967             "linkname": normpath(self.linkname) if self.linkname else "",
 968             "uname":    self.uname,
 969             "gname":    self.gname,
 970             "devmajor": self.devmajor,
 971             "devminor": self.devminor
 972         }
 973
 974         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 975             info["name"] += "/"
 976
 977         for key in ("name", "linkname", "uname", "gname"):
 978             if type(info[key]) is unicode:
 979                 info[key] = info[key].encode(encoding, errors)
 980
 981         return info
 982
 983     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 984         """Return a tar header as a string of 512 byte blocks.
 985         """
 986         info = self.get_info(encoding, errors)
 987
 988         if format == USTAR_FORMAT:
 989             return self.create_ustar_header(info)
 990         elif format == GNU_FORMAT:
 991             return self.create_gnu_header(info)
 992         elif format == PAX_FORMAT:
 993             return self.create_pax_header(info, encoding, errors)
 994         else:
 995             raise ValueError("invalid format")
 996
 997     def create_ustar_header(self, info):
 998         """Return the object as a ustar header block.
 999         """
1000         info["magic"] = POSIX_MAGIC
1001
1002         if len(info["linkname"]) > LENGTH_LINK:
1003             raise ValueError("linkname is too long")
1004
1005         if len(info["name"]) > LENGTH_NAME:
1006             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1007
1008         return self._create_header(info, USTAR_FORMAT)
1009
1010     def create_gnu_header(self, info):
1011         """Return the object as a GNU header block sequence.
1012         """
1013         info["magic"] = GNU_MAGIC
1014
1015         buf = ""
1016         if len(info["linkname"]) > LENGTH_LINK:
1017             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1018
1019         if len(info["name"]) > LENGTH_NAME:
1020             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1021
1022         return buf + self._create_header(info, GNU_FORMAT)
1023
1024     def create_pax_header(self, info, encoding, errors):
1025         """Return the object as a ustar header block. If it cannot be
1026            represented this way, prepend a pax extended header sequence
1027            with supplement information.
1028         """
1029         info["magic"] = POSIX_MAGIC
1030         pax_headers = self.pax_headers.copy()
1031
1032         # Test string fields for values that exceed the field length or cannot
1033         # be represented in ASCII encoding.
1034         for name, hname, length in (
1035                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1036                 ("uname", "uname", 32), ("gname", "gname", 32)):
1037
1038             if hname in pax_headers:
1039                 # The pax header has priority.
1040                 continue
1041
1042             val = info[name].decode(encoding, errors)
1043
1044             # Try to encode the string as ASCII.
1045             try:
1046                 val.encode("ascii")
1047             except UnicodeEncodeError:
1048                 pax_headers[hname] = val
1049                 continue
1050
1051             if len(info[name]) > length:
1052                 pax_headers[hname] = val
1053
1054         # Test number fields for values that exceed the field limit or values
1055         # that like to be stored as float.
1056         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1057             if name in pax_headers:
1058                 # The pax header has priority. Avoid overflow.
1059                 info[name] = 0
1060                 continue
1061
1062             val = info[name]
1063             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1064                 pax_headers[name] = unicode(val)
1065                 info[name] = 0
1066
1067         # Create a pax extended header if necessary.
1068         if pax_headers:
1069             buf = self._create_pax_generic_header(pax_headers)
1070         else:
1071             buf = ""
1072
1073         return buf + self._create_header(info, USTAR_FORMAT)
1074
1075     @classmethod
1076     def create_pax_global_header(cls, pax_headers):
1077         """Return the object as a pax global header block sequence.
1078         """
1079         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1080
1081     def _posix_split_name(self, name):
1082         """Split a name longer than 100 chars into a prefix
1083            and a name part.
1084         """
1085         prefix = name[:LENGTH_PREFIX + 1]
1086         while prefix and prefix[-1] != "/":
1087             prefix = prefix[:-1]
1088
1089         name = name[len(prefix):]
1090         prefix = prefix[:-1]
1091
1092         if not prefix or len(name) > LENGTH_NAME:
1093             raise ValueError("name is too long")
1094         return prefix, name
1095
1096     @staticmethod
1097     def _create_header(info, format):
1098         """Return a header block. info is a dictionary with file
1099            information, format must be one of the *_FORMAT constants.
1100         """
1101         parts = [
1102             stn(info.get("name", ""), 100),
1103             itn(info.get("mode", 0) & 07777, 8, format),
1104             itn(info.get("uid", 0), 8, format),
1105             itn(info.get("gid", 0), 8, format),
1106             itn(info.get("size", 0), 12, format),
1107             itn(info.get("mtime", 0), 12, format),
1108             "        ", # checksum field
1109             info.get("type", REGTYPE),
1110             stn(info.get("linkname", ""), 100),
1111             stn(info.get("magic", POSIX_MAGIC), 8),
1112             stn(info.get("uname", "root"), 32),
1113             stn(info.get("gname", "root"), 32),
1114             itn(info.get("devmajor", 0), 8, format),
1115             itn(info.get("devminor", 0), 8, format),
1116             stn(info.get("prefix", ""), 155)
1117         ]
1118
1119         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1120         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1121         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1122         return buf
1123
1124     @staticmethod
1125     def _create_payload(payload):
1126         """Return the string payload filled with zero bytes
1127            up to the next 512 byte border.
1128         """
1129         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1130         if remainder > 0:
1131             payload += (BLOCKSIZE - remainder) * NUL
1132         return payload
1133
1134     @classmethod
1135     def _create_gnu_long_header(cls, name, type):
1136         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1137            for name.
1138         """
1139         name += NUL
1140
1141         info = {}
1142         info["name"] = "././@LongLink"
1143         info["type"] = type
1144         info["size"] = len(name)
1145         info["magic"] = GNU_MAGIC
1146
1147         # create extended header + name blocks.
1148         return cls._create_header(info, USTAR_FORMAT) + \
1149                 cls._create_payload(name)
1150
1151     @classmethod
1152     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1153         """Return a POSIX.1-2001 extended or global header sequence
1154            that contains a list of keyword, value pairs. The values
1155            must be unicode objects.
1156         """
1157         records = []
1158         for keyword, value in pax_headers.iteritems():
1159             keyword = keyword.encode("utf8")
1160             value = value.encode("utf8")
1161             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1162             n = p = 0
1163             while True:
1164                 n = l + len(str(p))
1165                 if n == p:
1166                     break
1167                 p = n
1168             records.append("%d %s=%s\n" % (p, keyword, value))
1169         records = "".join(records)
1170
1171         # We use a hardcoded "././@PaxHeader" name like star does
1172         # instead of the one that POSIX recommends.
1173         info = {}
1174         info["name"] = "././@PaxHeader"
1175         info["type"] = type
1176         info["size"] = len(records)
1177         info["magic"] = POSIX_MAGIC
1178
1179         # Create pax header + record blocks.
1180         return cls._create_header(info, USTAR_FORMAT) + \
1181                 cls._create_payload(records)
1182
1183     @classmethod
1184     def frombuf(cls, buf):
1185         """Construct a TarInfo object from a 512 byte string buffer.
1186         """
1187         if len(buf) != BLOCKSIZE:
1188             raise HeaderError("truncated header")
1189         if buf.count(NUL) == BLOCKSIZE:
1190             raise HeaderError("empty header")
1191
1192         chksum = nti(buf[148:156])
1193         if chksum not in calc_chksums(buf):
1194             raise HeaderError("bad checksum")
1195
1196         obj = cls()
1197         obj.buf = buf
1198         obj.name = nts(buf[0:100])
1199         obj.mode = nti(buf[100:108])
1200         obj.uid = nti(buf[108:116])
1201         obj.gid = nti(buf[116:124])
1202         obj.size = nti(buf[124:136])
1203         obj.mtime = nti(buf[136:148])
1204         obj.chksum = chksum
1205         obj.type = buf[156:157]
1206         obj.linkname = nts(buf[157:257])
1207         obj.uname = nts(buf[265:297])
1208         obj.gname = nts(buf[297:329])
1209         obj.devmajor = nti(buf[329:337])
1210         obj.devminor = nti(buf[337:345])
1211         prefix = nts(buf[345:500])
1212
1213         # Old V7 tar format represents a directory as a regular
1214         # file with a trailing slash.
1215         if obj.type == AREGTYPE and obj.name.endswith("/"):
1216             obj.type = DIRTYPE
1217
1218         # Remove redundant slashes from directories.
1219         if obj.isdir():
1220             obj.name = obj.name.rstrip("/")
1221
1222         # Reconstruct a ustar longname.
1223         if prefix and obj.type not in GNU_TYPES:
1224             obj.name = prefix + "/" + obj.name
1225         return obj
1226
1227     @classmethod
1228     def fromtarfile(cls, tarfile):
1229         """Return the next TarInfo object from TarFile object
1230            tarfile.
1231         """
1232         buf = tarfile.fileobj.read(BLOCKSIZE)
1233         if not buf:
1234             return
1235         obj = cls.frombuf(buf)
1236         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1237         return obj._proc_member(tarfile)
1238
1239     #--------------------------------------------------------------------------
1240     # The following are methods that are called depending on the type of a
1241     # member. The entry point is _proc_member() which can be overridden in a
1242     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1243     # implement the following
1244     # operations:
1245     # 1. Set self.offset_data to the position where the data blocks begin,
1246     #    if there is data that follows.
1247     # 2. Set tarfile.offset to the position where the next member's header will
1248     #    begin.
1249     # 3. Return self or another valid TarInfo object.
1250     def _proc_member(self, tarfile):
1251         """Choose the right processing method depending on
1252            the type and call it.
1253         """
1254         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1255             return self._proc_gnulong(tarfile)
1256         elif self.type == GNUTYPE_SPARSE:
1257             return self._proc_sparse(tarfile)
1258         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1259             return self._proc_pax(tarfile)
1260         else:
1261             return self._proc_builtin(tarfile)
1262
1263     def _proc_builtin(self, tarfile):
1264         """Process a builtin type or an unknown type which
1265            will be treated as a regular file.
1266         """
1267         self.offset_data = tarfile.fileobj.tell()
1268         offset = self.offset_data
1269         if self.isreg() or self.type not in SUPPORTED_TYPES:
1270             # Skip the following data blocks.
1271             offset += self._block(self.size)
1272         tarfile.offset = offset
1273
1274         # Patch the TarInfo object with saved global
1275         # header information.
1276         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1277
1278         return self
1279
1280     def _proc_gnulong(self, tarfile):
1281         """Process the blocks that hold a GNU longname
1282            or longlink member.
1283         """
1284         buf = tarfile.fileobj.read(self._block(self.size))
1285
1286         # Fetch the next header and process it.
1287         next = self.fromtarfile(tarfile)
1288         if next is None:
1289             raise HeaderError("missing subsequent header")
1290
1291         # Patch the TarInfo object from the next header with
1292         # the longname information.
1293         next.offset = self.offset
1294         if self.type == GNUTYPE_LONGNAME:
1295             next.name = nts(buf)
1296         elif self.type == GNUTYPE_LONGLINK:
1297             next.linkname = nts(buf)
1298
1299         return next
1300
1301     def _proc_sparse(self, tarfile):
1302         """Process a GNU sparse header plus extra headers.
1303         """
1304         buf = self.buf
1305         sp = _ringbuffer()
1306         pos = 386
1307         lastpos = 0L
1308         realpos = 0L
1309         # There are 4 possible sparse structs in the
1310         # first header.
1311         for i in xrange(4):
1312             try:
1313                 offset = nti(buf[pos:pos + 12])
1314                 numbytes = nti(buf[pos + 12:pos + 24])
1315             except ValueError:
1316                 break
1317             if offset > lastpos:
1318                 sp.append(_hole(lastpos, offset - lastpos))
1319             sp.append(_data(offset, numbytes, realpos))
1320             realpos += numbytes
1321             lastpos = offset + numbytes
1322             pos += 24
1323
1324         isextended = ord(buf[482])
1325         origsize = nti(buf[483:495])
1326
1327         # If the isextended flag is given,
1328         # there are extra headers to process.
1329         while isextended == 1:
1330             buf = tarfile.fileobj.read(BLOCKSIZE)
1331             pos = 0
1332             for i in xrange(21):
1333                 try:
1334                     offset = nti(buf[pos:pos + 12])
1335                     numbytes = nti(buf[pos + 12:pos + 24])
1336                 except ValueError:
1337                     break
1338                 if offset > lastpos:
1339                     sp.append(_hole(lastpos, offset - lastpos))
1340                 sp.append(_data(offset, numbytes, realpos))
1341                 realpos += numbytes
1342                 lastpos = offset + numbytes
1343                 pos += 24
1344             isextended = ord(buf[504])
1345
1346         if lastpos < origsize:
1347             sp.append(_hole(lastpos, origsize - lastpos))
1348
1349         self.sparse = sp
1350
1351         self.offset_data = tarfile.fileobj.tell()
1352         tarfile.offset = self.offset_data + self._block(self.size)
1353         self.size = origsize
1354
1355         return self
1356
1357     def _proc_pax(self, tarfile):
1358         """Process an extended or global header as described in
1359            POSIX.1-2001.
1360         """
1361         # Read the header information.
1362         buf = tarfile.fileobj.read(self._block(self.size))
1363
1364         # A pax header stores supplemental information for either
1365         # the following file (extended) or all following files
1366         # (global).
1367         if self.type == XGLTYPE:
1368             pax_headers = tarfile.pax_headers
1369         else:
1370             pax_headers = tarfile.pax_headers.copy()
1371
1372         # Parse pax header information. A record looks like that:
1373         # "%d %s=%s\n" % (length, keyword, value). length is the size
1374         # of the complete record including the length field itself and
1375         # the newline. keyword and value are both UTF-8 encoded strings.
1376         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1377         pos = 0
1378         while True:
1379             match = regex.match(buf, pos)
1380             if not match:
1381                 break
1382
1383             length, keyword = match.groups()
1384             length = int(length)
1385             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1386
1387             keyword = keyword.decode("utf8")
1388             value = value.decode("utf8")
1389
1390             pax_headers[keyword] = value
1391             pos += length
1392
1393         # Fetch the next header.
1394         next = self.fromtarfile(tarfile)
1395
1396         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1397             if next is None:
1398                 raise HeaderError("missing subsequent header")
1399
1400             # Patch the TarInfo object with the extended header info.
1401             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1402             next.offset = self.offset
1403
1404             if "size" in pax_headers:
1405                 # If the extended header replaces the size field,
1406                 # we need to recalculate the offset where the next
1407                 # header starts.
1408                 offset = next.offset_data
1409                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1410                     offset += next._block(next.size)
1411                 tarfile.offset = offset
1412
1413         return next
1414
1415     def _apply_pax_info(self, pax_headers, encoding, errors):
1416         """Replace fields with supplemental information from a previous
1417            pax extended or global header.
1418         """
1419         for keyword, value in pax_headers.iteritems():
1420             if keyword not in PAX_FIELDS:
1421                 continue
1422
1423             if keyword == "path":
1424                 value = value.rstrip("/")
1425
1426             if keyword in PAX_NUMBER_FIELDS:
1427                 try:
1428                     value = PAX_NUMBER_FIELDS[keyword](value)
1429                 except ValueError:
1430                     value = 0
1431             else:
1432                 value = uts(value, encoding, errors)
1433
1434             setattr(self, keyword, value)
1435
1436         self.pax_headers = pax_headers.copy()
1437
1438     def _block(self, count):
1439         """Round up a byte count by BLOCKSIZE and return it,
1440            e.g. _block(834) => 1024.
1441         """
1442         blocks, remainder = divmod(count, BLOCKSIZE)
1443         if remainder:
1444             blocks += 1
1445         return blocks * BLOCKSIZE
1446
1447     def isreg(self):
1448         return self.type in REGULAR_TYPES
1449     def isfile(self):
1450         return self.isreg()
1451     def isdir(self):
1452         return self.type == DIRTYPE
1453     def issym(self):
1454         return self.type == SYMTYPE
1455     def islnk(self):
1456         return self.type == LNKTYPE
1457     def ischr(self):
1458         return self.type == CHRTYPE
1459     def isblk(self):
1460         return self.type == BLKTYPE
1461     def isfifo(self):
1462         return self.type == FIFOTYPE
1463     def issparse(self):
1464         return self.type == GNUTYPE_SPARSE
1465     def isdev(self):
1466         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1467 # class TarInfo
1468
1469 class TarFile(object):
1470     """The TarFile Class provides an interface to tar archives.
1471     """
1472
1473     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1474
1475     dereference = False         # If true, add content of linked file to the
1476                                 # tar file, else the link.
1477
1478     ignore_zeros = False        # If true, skips empty or invalid blocks and
1479                                 # continues processing.
1480
1481     errorlevel = 0              # If 0, fatal errors only appear in debug
1482                                 # messages (if debug >= 0). If > 0, errors
1483                                 # are passed to the caller as exceptions.
1484
1485     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1486
1487     encoding = ENCODING         # Encoding for 8-bit character strings.
1488
1489     errors = None               # Error handler for unicode conversion.
1490
1491     tarinfo = TarInfo           # The default TarInfo class to use.
1492
1493     fileobject = ExFileObject   # The default ExFileObject class to use.
1494
1495     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1496             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1497             errors=None, pax_headers=None, debug=None, errorlevel=None):
1498         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1499            read from an existing archive, 'a' to append data to an existing
1500            file or 'w' to create a new file overwriting an existing one. `mode'
1501            defaults to 'r'.
1502            If `fileobj' is given, it is used for reading or writing data. If it
1503            can be determined, `mode' is overridden by `fileobj's mode.
1504            `fileobj' is not closed, when TarFile is closed.
1505         """
1506         if len(mode) > 1 or mode not in "raw":
1507             raise ValueError("mode must be 'r', 'a' or 'w'")
1508         self.mode = mode
1509         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1510
1511         if not fileobj:
1512             if self.mode == "a" and not os.path.exists(name):
1513                 # Create nonexistent files in append mode.
1514                 self.mode = "w"
1515                 self._mode = "wb"
1516             fileobj = bltn_open(name, self._mode)
1517             self._extfileobj = False
1518         else:
1519             if name is None and hasattr(fileobj, "name"):
1520                 name = fileobj.name
1521             if hasattr(fileobj, "mode"):
1522                 self._mode = fileobj.mode
1523             self._extfileobj = True
1524         self.name = os.path.abspath(name) if name else None
1525         self.fileobj = fileobj
1526
1527         # Init attributes.
1528         if format is not None:
1529             self.format = format
1530         if tarinfo is not None:
1531             self.tarinfo = tarinfo
1532         if dereference is not None:
1533             self.dereference = dereference
1534         if ignore_zeros is not None:
1535             self.ignore_zeros = ignore_zeros
1536         if encoding is not None:
1537             self.encoding = encoding
1538
1539         if errors is not None:
1540             self.errors = errors
1541         elif mode == "r":
1542             self.errors = "utf-8"
1543         else:
1544             self.errors = "strict"
1545
1546         if pax_headers is not None and self.format == PAX_FORMAT:
1547             self.pax_headers = pax_headers
1548         else:
1549             self.pax_headers = {}
1550
1551         if debug is not None:
1552             self.debug = debug
1553         if errorlevel is not None:
1554             self.errorlevel = errorlevel
1555
1556         # Init datastructures.
1557         self.closed = False
1558         self.members = []       # list of members as TarInfo objects
1559         self._loaded = False    # flag if all members have been read
1560         self.offset = self.fileobj.tell()
1561                                 # current position in the archive file
1562         self.inodes = {}        # dictionary caching the inodes of
1563                                 # archive members already added
1564
1565         if self.mode == "r":
1566             self.firstmember = None
1567             self.firstmember = self.next()
1568
1569         if self.mode == "a":
1570             # Move to the end of the archive,
1571             # before the first empty block.
1572             self.firstmember = None
1573             while True:
1574                 if self.next() is None:
1575                     if self.offset > 0:
1576                         self.fileobj.seek(- BLOCKSIZE, 1)
1577                     break
1578
1579         if self.mode in "aw":
1580             self._loaded = True
1581
1582             if self.pax_headers:
1583                 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1584                 self.fileobj.write(buf)
1585                 self.offset += len(buf)
1586
1587     def _getposix(self):
1588         return self.format == USTAR_FORMAT
1589     def _setposix(self, value):
1590         import warnings
1591         warnings.warn("use the format attribute instead", DeprecationWarning,
1592                       2)
1593         if value:
1594             self.format = USTAR_FORMAT
1595         else:
1596             self.format = GNU_FORMAT
1597     posix = property(_getposix, _setposix)
1598
1599     #--------------------------------------------------------------------------
1600     # Below are the classmethods which act as alternate constructors to the
1601     # TarFile class. The open() method is the only one that is needed for
1602     # public use; it is the "super"-constructor and is able to select an
1603     # adequate "sub"-constructor for a particular compression using the mapping
1604     # from OPEN_METH.
1605     #
1606     # This concept allows one to subclass TarFile without losing the comfort of
1607     # the super-constructor. A sub-constructor is registered and made available
1608     # by adding it to the mapping in OPEN_METH.
1609
1610     @classmethod
1611     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1612         """Open a tar archive for reading, writing or appending. Return
1613            an appropriate TarFile class.
1614
1615            mode:
1616            'r' or 'r:*' open for reading with transparent compression
1617            'r:'         open for reading exclusively uncompressed
1618            'r:gz'       open for reading with gzip compression
1619            'r:bz2'      open for reading with bzip2 compression
1620            'a' or 'a:'  open for appending, creating the file if necessary
1621            'w' or 'w:'  open for writing without compression
1622            'w:gz'       open for writing with gzip compression
1623            'w:bz2'      open for writing with bzip2 compression
1624
1625            'r|*'        open a stream of tar blocks with transparent compression
1626            'r|'         open an uncompressed stream of tar blocks for reading
1627            'r|gz'       open a gzip compressed stream of tar blocks
1628            'r|bz2'      open a bzip2 compressed stream of tar blocks
1629            'w|'         open an uncompressed stream for writing
1630            'w|gz'       open a gzip compressed stream for writing
1631            'w|bz2'      open a bzip2 compressed stream for writing
1632         """
1633
1634         if not name and not fileobj:
1635             raise ValueError("nothing to open")
1636
1637         if mode in ("r", "r:*"):
1638             # Find out which *open() is appropriate for opening the file.
1639             for comptype in cls.OPEN_METH:
1640                 func = getattr(cls, cls.OPEN_METH[comptype])
1641                 if fileobj is not None:
1642                     saved_pos = fileobj.tell()
1643                 try:
1644                     return func(name, "r", fileobj, **kwargs)
1645                 except (ReadError, CompressionError), e:
1646                     if fileobj is not None:
1647                         fileobj.seek(saved_pos)
1648                     continue
1649             raise ReadError("file could not be opened successfully")
1650
1651         elif ":" in mode:
1652             filemode, comptype = mode.split(":", 1)
1653             filemode = filemode or "r"
1654             comptype = comptype or "tar"
1655
1656             # Select the *open() function according to
1657             # given compression.
1658             if comptype in cls.OPEN_METH:
1659                 func = getattr(cls, cls.OPEN_METH[comptype])
1660             else:
1661                 raise CompressionError("unknown compression type %r" % comptype)
1662             return func(name, filemode, fileobj, **kwargs)
1663
1664         elif "|" in mode:
1665             filemode, comptype = mode.split("|", 1)
1666             filemode = filemode or "r"
1667             comptype = comptype or "tar"
1668
1669             if filemode not in "rw":
1670                 raise ValueError("mode must be 'r' or 'w'")
1671
1672             t = cls(name, filemode,
1673                     _Stream(name, filemode, comptype, fileobj, bufsize),
1674                     **kwargs)
1675             t._extfileobj = False
1676             return t
1677
1678         elif mode in "aw":
1679             return cls.taropen(name, mode, fileobj, **kwargs)
1680
1681         raise ValueError("undiscernible mode")
1682
1683     @classmethod
1684     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1685         """Open uncompressed tar archive name for reading or writing.
1686         """
1687         if len(mode) > 1 or mode not in "raw":
1688             raise ValueError("mode must be 'r', 'a' or 'w'")
1689         return cls(name, mode, fileobj, **kwargs)
1690
1691     @classmethod
1692     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1693         """Open gzip compressed tar archive name for reading or writing.
1694            Appending is not allowed.
1695         """
1696         if len(mode) > 1 or mode not in "rw":
1697             raise ValueError("mode must be 'r' or 'w'")
1698
1699         try:
1700             import gzip
1701             gzip.GzipFile
1702         except (ImportError, AttributeError):
1703             raise CompressionError("gzip module is not available")
1704
1705         if fileobj is None:
1706             fileobj = bltn_open(name, mode + "b")
1707
1708         try:
1709             t = cls.taropen(name, mode,
1710                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1711                 **kwargs)
1712         except IOError:
1713             raise ReadError("not a gzip file")
1714         t._extfileobj = False
1715         return t
1716
1717     @classmethod
1718     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1719         """Open bzip2 compressed tar archive name for reading or writing.
1720            Appending is not allowed.
1721         """
1722         if len(mode) > 1 or mode not in "rw":
1723             raise ValueError("mode must be 'r' or 'w'.")
1724
1725         try:
1726             import bz2
1727         except ImportError:
1728             raise CompressionError("bz2 module is not available")
1729
1730         if fileobj is not None:
1731             fileobj = _BZ2Proxy(fileobj, mode)
1732         else:
1733             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1734
1735         try:
1736             t = cls.taropen(name, mode, fileobj, **kwargs)
1737         except IOError:
1738             raise ReadError("not a bzip2 file")
1739         t._extfileobj = False
1740         return t
1741
1742     # All *open() methods are registered here.
1743     OPEN_METH = {
1744         "tar": "taropen",   # uncompressed tar
1745         "gz":  "gzopen",    # gzip compressed tar
1746         "bz2": "bz2open"    # bzip2 compressed tar
1747     }
1748
1749     #--------------------------------------------------------------------------
1750     # The public methods which TarFile provides:
1751
1752     def close(self):
1753         """Close the TarFile. In write-mode, two finishing zero blocks are
1754            appended to the archive.
1755         """
1756         if self.closed:
1757             return
1758
1759         if self.mode in "aw":
1760             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1761             self.offset += (BLOCKSIZE * 2)
1762             # fill up the end with zero-blocks
1763             # (like option -b20 for tar does)
1764             blocks, remainder = divmod(self.offset, RECORDSIZE)
1765             if remainder > 0:
1766                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1767
1768         if not self._extfileobj:
1769             self.fileobj.close()
1770         self.closed = True
1771
1772     def getmember(self, name):
1773         """Return a TarInfo object for member `name'. If `name' can not be
1774            found in the archive, KeyError is raised. If a member occurs more
1775            than once in the archive, its last occurrence is assumed to be the
1776            most up-to-date version.
1777         """
1778         tarinfo = self._getmember(name)
1779         if tarinfo is None:
1780             raise KeyError("filename %r not found" % name)
1781         return tarinfo
1782
1783     def getmembers(self):
1784         """Return the members of the archive as a list of TarInfo objects. The
1785            list has the same order as the members in the archive.
1786         """
1787         self._check()
1788         if not self._loaded:    # if we want to obtain a list of
1789             self._load()        # all members, we first have to
1790                                 # scan the whole archive.
1791         return self.members
1792
1793     def getnames(self):
1794         """Return the members of the archive as a list of their names. It has
1795            the same order as the list returned by getmembers().
1796         """
1797         return [tarinfo.name for tarinfo in self.getmembers()]
1798
1799     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1800         """Create a TarInfo object for either the file `name' or the file
1801            object `fileobj' (using os.fstat on its file descriptor). You can
1802            modify some of the TarInfo's attributes before you add it using
1803            addfile(). If given, `arcname' specifies an alternative name for the
1804            file in the archive.
1805         """
1806         self._check("aw")
1807
1808         # When fileobj is given, replace name by
1809         # fileobj's real name.
1810         if fileobj is not None:
1811             name = fileobj.name
1812
1813         # Building the name of the member in the archive.
1814         # Backward slashes are converted to forward slashes,
1815         # Absolute paths are turned to relative paths.
1816         if arcname is None:
1817             arcname = name
1818         arcname = normpath(arcname)
1819         drv, arcname = os.path.splitdrive(arcname)
1820         while arcname[0:1] == "/":
1821             arcname = arcname[1:]
1822
1823         # Now, fill the TarInfo object with
1824         # information specific for the file.
1825         tarinfo = self.tarinfo()
1826         tarinfo.tarfile = self
1827
1828         # Use os.stat or os.lstat, depending on platform
1829         # and if symlinks shall be resolved.
1830         if fileobj is None:
1831             if hasattr(os, "lstat") and not self.dereference:
1832                 statres = os.lstat(name)
1833             else:
1834                 statres = os.stat(name)
1835         else:
1836             statres = os.fstat(fileobj.fileno())
1837         linkname = ""
1838
1839         stmd = statres.st_mode
1840         if stat.S_ISREG(stmd):
1841             inode = (statres.st_ino, statres.st_dev)
1842             if not self.dereference and statres.st_nlink > 1 and \
1843                     inode in self.inodes and arcname != self.inodes[inode]:
1844                 # Is it a hardlink to an already
1845                 # archived file?
1846                 type = LNKTYPE
1847                 linkname = self.inodes[inode]
1848             else:
1849                 # The inode is added only if its valid.
1850                 # For win32 it is always 0.
1851                 type = REGTYPE
1852                 if inode[0]:
1853                     self.inodes[inode] = arcname
1854         elif stat.S_ISDIR(stmd):
1855             type = DIRTYPE
1856         elif stat.S_ISFIFO(stmd):
1857             type = FIFOTYPE
1858         elif stat.S_ISLNK(stmd):
1859             type = SYMTYPE
1860             linkname = os.readlink(name)
1861         elif stat.S_ISCHR(stmd):
1862             type = CHRTYPE
1863         elif stat.S_ISBLK(stmd):
1864             type = BLKTYPE
1865         else:
1866             return None
1867
1868         # Fill the TarInfo object with all
1869         # information we can get.
1870         tarinfo.name = arcname
1871         tarinfo.mode = stmd
1872         tarinfo.uid = statres.st_uid
1873         tarinfo.gid = statres.st_gid
1874         if stat.S_ISREG(stmd):
1875             tarinfo.size = statres.st_size
1876         else:
1877             tarinfo.size = 0L
1878         tarinfo.mtime = statres.st_mtime
1879         tarinfo.type = type
1880         tarinfo.linkname = linkname
1881         if pwd:
1882             try:
1883                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1884             except KeyError:
1885                 pass
1886         if grp:
1887             try:
1888                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1889             except KeyError:
1890                 pass
1891
1892         if type in (CHRTYPE, BLKTYPE):
1893             if hasattr(os, "major") and hasattr(os, "minor"):
1894                 tarinfo.devmajor = os.major(statres.st_rdev)
1895                 tarinfo.devminor = os.minor(statres.st_rdev)
1896         return tarinfo
1897
1898     def list(self, verbose=True):
1899         """Print a table of contents to sys.stdout. If `verbose' is False, only
1900            the names of the members are printed. If it is True, an `ls -l'-like
1901            output is produced.
1902         """
1903         self._check()
1904
1905         for tarinfo in self:
1906             if verbose:
1907                 print filemode(tarinfo.mode),
1908                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1909                                  tarinfo.gname or tarinfo.gid),
1910                 if tarinfo.ischr() or tarinfo.isblk():
1911                     print "%10s" % ("%d,%d" \
1912                                     % (tarinfo.devmajor, tarinfo.devminor)),
1913                 else:
1914                     print "%10d" % tarinfo.size,
1915                 print "%d-%02d-%02d %02d:%02d:%02d" \
1916                       % time.localtime(tarinfo.mtime)[:6],
1917
1918             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1919
1920             if verbose:
1921                 if tarinfo.issym():
1922                     print "->", tarinfo.linkname,
1923                 if tarinfo.islnk():
1924                     print "link to", tarinfo.linkname,
1925             print
1926
1927     def add(self, name, arcname=None, recursive=True, exclude=None):
1928         """Add the file `name' to the archive. `name' may be any type of file
1929            (directory, fifo, symbolic link, etc.). If given, `arcname'
1930            specifies an alternative name for the file in the archive.
1931            Directories are added recursively by default. This can be avoided by
1932            setting `recursive' to False. `exclude' is a function that should
1933            return True for each filename to be excluded.
1934         """
1935         self._check("aw")
1936
1937         if arcname is None:
1938             arcname = name
1939
1940         # Exclude pathnames.
1941         if exclude is not None and exclude(name):
1942             self._dbg(2, "tarfile: Excluded %r" % name)
1943             return
1944
1945         # Skip if somebody tries to archive the archive...
1946         if self.name is not None and os.path.abspath(name) == self.name:
1947             self._dbg(2, "tarfile: Skipped %r" % name)
1948             return
1949
1950         # Special case: The user wants to add the current
1951         # working directory.
1952         if name == ".":
1953             if recursive:
1954                 if arcname == ".":
1955                     arcname = ""
1956                 for f in os.listdir(name):
1957                     self.add(f, os.path.join(arcname, f), recursive, exclude)
1958             return
1959
1960         self._dbg(1, name)
1961
1962         # Create a TarInfo object from the file.
1963         tarinfo = self.gettarinfo(name, arcname)
1964
1965         if tarinfo is None:
1966             self._dbg(1, "tarfile: Unsupported type %r" % name)
1967             return
1968
1969         # Append the tar header and data to the archive.
1970         if tarinfo.isreg():
1971             f = bltn_open(name, "rb")
1972             self.addfile(tarinfo, f)
1973             f.close()
1974
1975         elif tarinfo.isdir():
1976             self.addfile(tarinfo)
1977             if recursive:
1978                 for f in os.listdir(name):
1979                     self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1980
1981         else:
1982             self.addfile(tarinfo)
1983
1984     def addfile(self, tarinfo, fileobj=None):
1985         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1986            given, tarinfo.size bytes are read from it and added to the archive.
1987            You can create TarInfo objects using gettarinfo().
1988            On Windows platforms, `fileobj' should always be opened with mode
1989            'rb' to avoid irritation about the file size.
1990         """
1991         self._check("aw")
1992
1993         tarinfo = copy.copy(tarinfo)
1994
1995         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1996         self.fileobj.write(buf)
1997         self.offset += len(buf)
1998
1999         # If there's data to follow, append it.
2000         if fileobj is not None:
2001             copyfileobj(fileobj, self.fileobj, tarinfo.size)
2002             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2003             if remainder > 0:
2004                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2005                 blocks += 1
2006             self.offset += blocks * BLOCKSIZE
2007
2008         self.members.append(tarinfo)
2009
2010     def extractall(self, path=".", members=None):
2011         """Extract all members from the archive to the current working
2012            directory and set owner, modification time and permissions on
2013            directories afterwards. `path' specifies a different directory
2014            to extract to. `members' is optional and must be a subset of the
2015            list returned by getmembers().
2016         """
2017         directories = []
2018
2019         if members is None:
2020             members = self
2021
2022         for tarinfo in members:
2023             if tarinfo.isdir():
2024                 # Extract directories with a safe mode.
2025                 directories.append(tarinfo)
2026                 tarinfo = copy.copy(tarinfo)
2027                 tarinfo.mode = 0700
2028             self.extract(tarinfo, path)
2029
2030         # Reverse sort directories.
2031         directories.sort(key=operator.attrgetter('name'))
2032         directories.reverse()
2033
2034         # Set correct owner, mtime and filemode on directories.
2035         for tarinfo in directories:
2036             dirpath = os.path.join(path, tarinfo.name)
2037             try:
2038                 self.chown(tarinfo, dirpath)
2039                 self.utime(tarinfo, dirpath)
2040                 self.chmod(tarinfo, dirpath)
2041             except ExtractError, e:
2042                 if self.errorlevel > 1:
2043                     raise
2044                 else:
2045                     self._dbg(1, "tarfile: %s" % e)
2046
2047     def extract(self, member, path=""):
2048         """Extract a member from the archive to the current working directory,
2049            using its full name. Its file information is extracted as accurately
2050            as possible. `member' may be a filename or a TarInfo object. You can
2051            specify a different directory using `path'.
2052         """
2053         self._check("r")
2054
2055         if isinstance(member, basestring):
2056             tarinfo = self.getmember(member)
2057         else:
2058             tarinfo = member
2059
2060         # Prepare the link target for makelink().
2061         if tarinfo.islnk():
2062             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2063
2064         try:
2065             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2066         except EnvironmentError, e:
2067             if self.errorlevel > 0:
2068                 raise
2069             else:
2070                 if e.filename is None:
2071                     self._dbg(1, "tarfile: %s" % e.strerror)
2072                 else:
2073                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2074         except ExtractError, e:
2075             if self.errorlevel > 1:
2076                 raise
2077             else:
2078                 self._dbg(1, "tarfile: %s" % e)
2079
2080     def extractfile(self, member):
2081         """Extract a member from the archive as a file object. `member' may be
2082            a filename or a TarInfo object. If `member' is a regular file, a
2083            file-like object is returned. If `member' is a link, a file-like
2084            object is constructed from the link's target. If `member' is none of
2085            the above, None is returned.
2086            The file-like object is read-only and provides the following
2087            methods: read(), readline(), readlines(), seek() and tell()
2088         """
2089         self._check("r")
2090
2091         if isinstance(member, basestring):
2092             tarinfo = self.getmember(member)
2093         else:
2094             tarinfo = member
2095
2096         if tarinfo.isreg():
2097             return self.fileobject(self, tarinfo)
2098
2099         elif tarinfo.type not in SUPPORTED_TYPES:
2100             # If a member's type is unknown, it is treated as a
2101             # regular file.
2102             return self.fileobject(self, tarinfo)
2103
2104         elif tarinfo.islnk() or tarinfo.issym():
2105             if isinstance(self.fileobj, _Stream):
2106                 # A small but ugly workaround for the case that someone tries
2107                 # to extract a (sym)link as a file-object from a non-seekable
2108                 # stream of tar blocks.
2109                 raise StreamError("cannot extract (sym)link as file object")
2110             else:
2111                 # A (sym)link's file object is its target's file object.
2112                 return self.extractfile(self._getmember(tarinfo.linkname,
2113                                                         tarinfo))
2114         else:
2115             # If there's no data associated with the member (directory, chrdev,
2116             # blkdev, etc.), return None instead of a file object.
2117             return None
2118
2119     def _extract_member(self, tarinfo, targetpath):
2120         """Extract the TarInfo object tarinfo to a physical
2121            file called targetpath.
2122         """
2123         # Fetch the TarInfo object for the given name
2124         # and build the destination pathname, replacing
2125         # forward slashes to platform specific separators.
2126         if targetpath[-1:] == "/":
2127             targetpath = targetpath[:-1]
2128         targetpath = os.path.normpath(targetpath)
2129
2130         # Create all upper directories.
2131         upperdirs = os.path.dirname(targetpath)
2132         if upperdirs and not os.path.exists(upperdirs):
2133             # Create directories that are not part of the archive with
2134             # default permissions.
2135             os.makedirs(upperdirs)
2136
2137         if tarinfo.islnk() or tarinfo.issym():
2138             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2139         else:
2140             self._dbg(1, tarinfo.name)
2141
2142         if tarinfo.isreg():
2143             self.makefile(tarinfo, targetpath)
2144         elif tarinfo.isdir():
2145             self.makedir(tarinfo, targetpath)
2146         elif tarinfo.isfifo():
2147             self.makefifo(tarinfo, targetpath)
2148         elif tarinfo.ischr() or tarinfo.isblk():
2149             self.makedev(tarinfo, targetpath)
2150         elif tarinfo.islnk() or tarinfo.issym():
2151             self.makelink(tarinfo, targetpath)
2152         elif tarinfo.type not in SUPPORTED_TYPES:
2153             self.makeunknown(tarinfo, targetpath)
2154         else:
2155             self.makefile(tarinfo, targetpath)
2156
2157         self.chown(tarinfo, targetpath)
2158         if not tarinfo.issym():
2159             self.chmod(tarinfo, targetpath)
2160             self.utime(tarinfo, targetpath)
2161
2162     #--------------------------------------------------------------------------
2163     # Below are the different file methods. They are called via
2164     # _extract_member() when extract() is called. They can be replaced in a
2165     # subclass to implement other functionality.
2166
2167     def makedir(self, tarinfo, targetpath):
2168         """Make a directory called targetpath.
2169         """
2170         try:
2171             # Use a safe mode for the directory, the real mode is set
2172             # later in _extract_member().
2173             os.mkdir(targetpath, 0700)
2174         except EnvironmentError, e:
2175             if e.errno != errno.EEXIST:
2176                 raise
2177
2178     def makefile(self, tarinfo, targetpath):
2179         """Make a file called targetpath.
2180         """
2181         source = self.extractfile(tarinfo)
2182         target = bltn_open(targetpath, "wb")
2183         copyfileobj(source, target)
2184         source.close()
2185         target.close()
2186
2187     def makeunknown(self, tarinfo, targetpath):
2188         """Make a file from a TarInfo object with an unknown type
2189            at targetpath.
2190         """
2191         self.makefile(tarinfo, targetpath)
2192         self._dbg(1, "tarfile: Unknown file type %r, " \
2193                      "extracted as regular file." % tarinfo.type)
2194
2195     def makefifo(self, tarinfo, targetpath):
2196         """Make a fifo called targetpath.
2197         """
2198         if hasattr(os, "mkfifo"):
2199             os.mkfifo(targetpath)
2200         else:
2201             raise ExtractError("fifo not supported by system")
2202
2203     def makedev(self, tarinfo, targetpath):
2204         """Make a character or block device called targetpath.
2205         """
2206         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2207             raise ExtractError("special devices not supported by system")
2208
2209         mode = tarinfo.mode
2210         if tarinfo.isblk():
2211             mode |= stat.S_IFBLK
2212         else:
2213             mode |= stat.S_IFCHR
2214
2215         os.mknod(targetpath, mode,
2216                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2217
2218     def makelink(self, tarinfo, targetpath):
2219         """Make a (symbolic) link called targetpath. If it cannot be created
2220           (platform limitation), we try to make a copy of the referenced file
2221           instead of a link.
2222         """
2223         linkpath = tarinfo.linkname
2224         try:
2225             if tarinfo.issym():
2226                 os.symlink(linkpath, targetpath)
2227             else:
2228                 # See extract().
2229                 os.link(tarinfo._link_target, targetpath)
2230         except AttributeError:
2231             if tarinfo.issym():
2232                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2233                                         linkpath)
2234                 linkpath = normpath(linkpath)
2235
2236             try:
2237                 self._extract_member(self.getmember(linkpath), targetpath)
2238             except (EnvironmentError, KeyError), e:
2239                 linkpath = os.path.normpath(linkpath)
2240                 try:
2241                     shutil.copy2(linkpath, targetpath)
2242                 except EnvironmentError, e:
2243                     raise IOError("link could not be created")
2244
2245     def chown(self, tarinfo, targetpath):
2246         """Set owner of targetpath according to tarinfo.
2247         """
2248         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2249             # We have to be root to do so.
2250             try:
2251                 g = grp.getgrnam(tarinfo.gname)[2]
2252             except KeyError:
2253                 try:
2254                     g = grp.getgrgid(tarinfo.gid)[2]
2255                 except KeyError:
2256                     g = os.getgid()
2257             try:
2258                 u = pwd.getpwnam(tarinfo.uname)[2]
2259             except KeyError:
2260                 try:
2261                     u = pwd.getpwuid(tarinfo.uid)[2]
2262                 except KeyError:
2263                     u = os.getuid()
2264             try:
2265                 if tarinfo.issym() and hasattr(os, "lchown"):
2266                     os.lchown(targetpath, u, g)
2267                 else:
2268                     if sys.platform != "os2emx":
2269                         os.chown(targetpath, u, g)
2270             except EnvironmentError, e:
2271                 raise ExtractError("could not change owner")
2272
2273     def chmod(self, tarinfo, targetpath):
2274         """Set file permissions of targetpath according to tarinfo.
2275         """
2276         if hasattr(os, 'chmod'):
2277             try:
2278                 os.chmod(targetpath, tarinfo.mode)
2279             except EnvironmentError, e:
2280                 raise ExtractError("could not change mode")
2281
2282     def utime(self, tarinfo, targetpath):
2283         """Set modification time of targetpath according to tarinfo.
2284         """
2285         if not hasattr(os, 'utime'):
2286             return
2287         try:
2288             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2289         except EnvironmentError, e:
2290             raise ExtractError("could not change modification time")
2291
2292     #--------------------------------------------------------------------------
2293     def next(self):
2294         """Return the next member of the archive as a TarInfo object, when
2295            TarFile is opened for reading. Return None if there is no more
2296            available.
2297         """
2298         self._check("ra")
2299         if self.firstmember is not None:
2300             m = self.firstmember
2301             self.firstmember = None
2302             return m
2303
2304         # Read the next block.
2305         self.fileobj.seek(self.offset)
2306         while True:
2307             try:
2308                 tarinfo = self.tarinfo.fromtarfile(self)
2309                 if tarinfo is None:
2310                     return
2311                 self.members.append(tarinfo)
2312
2313             except HeaderError, e:
2314                 if self.ignore_zeros:
2315                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2316                     self.offset += BLOCKSIZE
2317                     continue
2318                 else:
2319                     if self.offset == 0:
2320                         raise ReadError(str(e))
2321                     return None
2322             break
2323
2324         return tarinfo
2325
2326     #--------------------------------------------------------------------------
2327     # Little helper methods:
2328
2329     def _getmember(self, name, tarinfo=None):
2330         """Find an archive member by name from bottom to top.
2331            If tarinfo is given, it is used as the starting point.
2332         """
2333         # Ensure that all members have been loaded.
2334         members = self.getmembers()
2335
2336         if tarinfo is None:
2337             end = len(members)
2338         else:
2339             end = members.index(tarinfo)
2340
2341         for i in xrange(end - 1, -1, -1):
2342             if name == members[i].name:
2343                 return members[i]
2344
2345     def _load(self):
2346         """Read through the entire archive file and look for readable
2347            members.
2348         """
2349         while True:
2350             tarinfo = self.next()
2351             if tarinfo is None:
2352                 break
2353         self._loaded = True
2354
2355     def _check(self, mode=None):
2356         """Check if TarFile is still open, and if the operation's mode
2357            corresponds to TarFile's mode.
2358         """
2359         if self.closed:
2360             raise IOError("%s is closed" % self.__class__.__name__)
2361         if mode is not None and self.mode not in mode:
2362             raise IOError("bad operation for mode %r" % self.mode)
2363
2364     def __iter__(self):
2365         """Provide an iterator object.
2366         """
2367         if self._loaded:
2368             return iter(self.members)
2369         else:
2370             return TarIter(self)
2371
2372     def _dbg(self, level, msg):
2373         """Write debugging output to sys.stderr.
2374         """
2375         if level <= self.debug:
2376             print >> sys.stderr, msg
2377 # class TarFile
2378
2379 class TarIter:
2380     """Iterator Class.
2381
2382        for tarinfo in TarFile(...):
2383            suite...
2384     """
2385
2386     def __init__(self, tarfile):
2387         """Construct a TarIter object.
2388         """
2389         self.tarfile = tarfile
2390         self.index = 0
2391     def __iter__(self):
2392         """Return iterator object.
2393         """
2394         return self
2395     def next(self):
2396         """Return the next item using TarFile's next() method.
2397            When all members have been read, set TarFile as _loaded.
2398         """
2399         # Fix for SF #1100429: Under rare circumstances it can
2400         # happen that getmembers() is called during iteration,
2401         # which will cause TarIter to stop prematurely.
2402         if not self.tarfile._loaded:
2403             tarinfo = self.tarfile.next()
2404             if not tarinfo:
2405                 self.tarfile._loaded = True
2406                 raise StopIteration
2407         else:
2408             try:
2409                 tarinfo = self.tarfile.members[self.index]
2410             except IndexError:
2411                 raise StopIteration
2412         self.index += 1
2413         return tarinfo
2414
2415 # Helper classes for sparse file support
2416 class _section:
2417     """Base class for _data and _hole.
2418     """
2419     def __init__(self, offset, size):
2420         self.offset = offset
2421         self.size = size
2422     def __contains__(self, offset):
2423         return self.offset <= offset < self.offset + self.size
2424
2425 class _data(_section):
2426     """Represent a data section in a sparse file.
2427     """
2428     def __init__(self, offset, size, realpos):
2429         _section.__init__(self, offset, size)
2430         self.realpos = realpos
2431
2432 class _hole(_section):
2433     """Represent a hole section in a sparse file.
2434     """
2435     pass
2436
2437 class _ringbuffer(list):
2438     """Ringbuffer class which increases performance
2439        over a regular list.
2440     """
2441     def __init__(self):
2442         self.idx = 0
2443     def find(self, offset):
2444         idx = self.idx
2445         while True:
2446             item = self[idx]
2447             if offset in item:
2448                 break
2449             idx += 1
2450             if idx == len(self):
2451                 idx = 0
2452             if idx == self.idx:
2453                 # End of File
2454                 return None
2455         self.idx = idx
2456         return item
2457
2458 #---------------------------------------------
2459 # zipfile compatible TarFile class
2460 #---------------------------------------------
2461 TAR_PLAIN = 0           # zipfile.ZIP_STORED
2462 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2463 class TarFileCompat:
2464     """TarFile class compatible with standard module zipfile's
2465        ZipFile class.
2466     """
2467     def __init__(self, file, mode="r", compression=TAR_PLAIN):
2468         from warnings import warnpy3k
2469         warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2470                 stacklevel=2)
2471         if compression == TAR_PLAIN:
2472             self.tarfile = TarFile.taropen(file, mode)
2473         elif compression == TAR_GZIPPED:
2474             self.tarfile = TarFile.gzopen(file, mode)
2475         else:
2476             raise ValueError("unknown compression constant")
2477         if mode[0:1] == "r":
2478             members = self.tarfile.getmembers()
2479             for m in members:
2480                 m.filename = m.name
2481                 m.file_size = m.size
2482                 m.date_time = time.gmtime(m.mtime)[:6]
2483     def namelist(self):
2484         return map(lambda m: m.name, self.infolist())
2485     def infolist(self):
2486         return filter(lambda m: m.type in REGULAR_TYPES,
2487                       self.tarfile.getmembers())
2488     def printdir(self):
2489         self.tarfile.list()
2490     def testzip(self):
2491         return
2492     def getinfo(self, name):
2493         return self.tarfile.getmember(name)
2494     def read(self, name):
2495         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2496     def write(self, filename, arcname=None, compress_type=None):
2497         self.tarfile.add(filename, arcname)
2498     def writestr(self, zinfo, bytes):
2499         try:
2500             from cStringIO import StringIO
2501         except ImportError:
2502             from StringIO import StringIO
2503         import calendar
2504         tinfo = TarInfo(zinfo.filename)
2505         tinfo.size = len(bytes)
2506         tinfo.mtime = calendar.timegm(zinfo.date_time)
2507         self.tarfile.addfile(tinfo, StringIO(bytes))
2508     def close(self):
2509         self.tarfile.close()
2510 #class TarFileCompat
2511
2512 #--------------------
2513 # exported functions
2514 #--------------------
2515 def is_tarfile(name):
2516     """Return True if name points to a tar archive that we
2517        are able to handle, else return False.
2518     """
2519     try:
2520         t = open(name)
2521         t.close()
2522         return True
2523     except TarError:
2524         return False
2525
2526 bltn_open = open
2527 open = TarFile.open