Lib/tarfile.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 #-------------------------------------------------------------------
   4 # tarfile.py
   5 #-------------------------------------------------------------------
   6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7 # All rights reserved.
   8 #
   9 # Permission  is  hereby granted,  free  of charge,  to  any person
  10 # obtaining a  copy of  this software  and associated documentation
  11 # files  (the  "Software"),  to   deal  in  the  Software   without
  12 # restriction,  including  without limitation  the  rights to  use,
  13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  14 # copies  of  the  Software,  and to  permit  persons  to  whom the
  15 # Software  is  furnished  to  do  so,  subject  to  the  following
  16 # conditions:
  17 #
  18 # The above copyright  notice and this  permission notice shall  be
  19 # included in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28 # OTHER DEALINGS IN THE SOFTWARE.
  29 #
  30 """Read from and write to tar format archives.
  31 """
  32
  33 __version__ = "$Revision$"
  34 # $Source$
  35
  36 version     = "0.9.0"
  37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38 __date__    = "$Date$"
  39 __cvsid__   = "$Id$"
  40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42 #---------
  43 # Imports
  44 #---------
  45 import sys
  46 import os
  47 import shutil
  48 import stat
  49 import errno
  50 import time
  51 import struct
  52 import copy
  53 import re
  54
  55 if sys.platform == 'mac':
  56     # This module needs work for MacOS9, especially in the area of pathname
  57     # handling. In many places it is assumed a simple substitution of / by the
  58     # local os.path.sep is good enough to convert pathnames, but this does not
  59     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  60     raise ImportError, "tarfile does not work for platform==mac"
  61
  62 try:
  63     import grp, pwd
  64 except ImportError:
  65     grp = pwd = None
  66
  67 # from tarfile import *
  68 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  69
  70 #---------------------------------------------------------
  71 # tar constants
  72 #---------------------------------------------------------
  73 NUL = "\0"                      # the null character
  74 BLOCKSIZE = 512                 # length of processing blocks
  75 RECORDSIZE = BLOCKSIZE * 20     # length of records
  76 GNU_MAGIC = "ustar  \0"         # magic gnu tar string
  77 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
  78
  79 LENGTH_NAME = 100               # maximum length of a filename
  80 LENGTH_LINK = 100               # maximum length of a linkname
  81 LENGTH_PREFIX = 155             # maximum length of the prefix field
  82
  83 REGTYPE = "0"                   # regular file
  84 AREGTYPE = "\0"                 # regular file
  85 LNKTYPE = "1"                   # link (inside tarfile)
  86 SYMTYPE = "2"                   # symbolic link
  87 CHRTYPE = "3"                   # character special device
  88 BLKTYPE = "4"                   # block special device
  89 DIRTYPE = "5"                   # directory
  90 FIFOTYPE = "6"                  # fifo special device
  91 CONTTYPE = "7"                  # contiguous file
  92
  93 GNUTYPE_LONGNAME = "L"          # GNU tar longname
  94 GNUTYPE_LONGLINK = "K"          # GNU tar longlink
  95 GNUTYPE_SPARSE = "S"            # GNU tar sparse file
  96
  97 XHDTYPE = "x"                   # POSIX.1-2001 extended header
  98 XGLTYPE = "g"                   # POSIX.1-2001 global header
  99 SOLARIS_XHDTYPE = "X"           # Solaris extended header
 100
 101 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 102 GNU_FORMAT = 1                  # GNU tar format
 103 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 104 DEFAULT_FORMAT = GNU_FORMAT
 105
 106 #---------------------------------------------------------
 107 # tarfile constants
 108 #---------------------------------------------------------
 109 # File types that tarfile supports:
 110 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 111                    SYMTYPE, DIRTYPE, FIFOTYPE,
 112                    CONTTYPE, CHRTYPE, BLKTYPE,
 113                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 114                    GNUTYPE_SPARSE)
 115
 116 # File types that will be treated as a regular file.
 117 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 118                  CONTTYPE, GNUTYPE_SPARSE)
 119
 120 # File types that are part of the GNU tar format.
 121 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 122              GNUTYPE_SPARSE)
 123
 124 # Fields from a pax header that override a TarInfo attribute.
 125 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 126               "uid", "gid", "uname", "gname")
 127
 128 # Fields in a pax header that are numbers, all other fields
 129 # are treated as strings.
 130 PAX_NUMBER_FIELDS = {
 131     "atime": float,
 132     "ctime": float,
 133     "mtime": float,
 134     "uid": int,
 135     "gid": int,
 136     "size": int
 137 }
 138
 139 #---------------------------------------------------------
 140 # Bits used in the mode field, values in octal.
 141 #---------------------------------------------------------
 142 S_IFLNK = 0120000        # symbolic link
 143 S_IFREG = 0100000        # regular file
 144 S_IFBLK = 0060000        # block device
 145 S_IFDIR = 0040000        # directory
 146 S_IFCHR = 0020000        # character device
 147 S_IFIFO = 0010000        # fifo
 148
 149 TSUID   = 04000          # set UID on execution
 150 TSGID   = 02000          # set GID on execution
 151 TSVTX   = 01000          # reserved
 152
 153 TUREAD  = 0400           # read by owner
 154 TUWRITE = 0200           # write by owner
 155 TUEXEC  = 0100           # execute/search by owner
 156 TGREAD  = 0040           # read by group
 157 TGWRITE = 0020           # write by group
 158 TGEXEC  = 0010           # execute/search by group
 159 TOREAD  = 0004           # read by other
 160 TOWRITE = 0002           # write by other
 161 TOEXEC  = 0001           # execute/search by other
 162
 163 #---------------------------------------------------------
 164 # initialization
 165 #---------------------------------------------------------
 166 ENCODING = sys.getfilesystemencoding()
 167 if ENCODING is None:
 168     ENCODING = sys.getdefaultencoding()
 169
 170 #---------------------------------------------------------
 171 # Some useful functions
 172 #---------------------------------------------------------
 173
 174 def stn(s, length):
 175     """Convert a python string to a null-terminated string buffer.
 176     """
 177     return s[:length] + (length - len(s)) * NUL
 178
 179 def nts(s):
 180     """Convert a null-terminated string field to a python string.
 181     """
 182     # Use the string up to the first null char.
 183     p = s.find("\0")
 184     if p == -1:
 185         return s
 186     return s[:p]
 187
 188 def nti(s):
 189     """Convert a number field to a python number.
 190     """
 191     # There are two possible encodings for a number field, see
 192     # itn() below.
 193     if s[0] != chr(0200):
 194         try:
 195             n = int(nts(s) or "0", 8)
 196         except ValueError:
 197             raise HeaderError("invalid header")
 198     else:
 199         n = 0L
 200         for i in xrange(len(s) - 1):
 201             n <<= 8
 202             n += ord(s[i + 1])
 203     return n
 204
 205 def itn(n, digits=8, format=DEFAULT_FORMAT):
 206     """Convert a python number to a number field.
 207     """
 208     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 209     # octal digits followed by a null-byte, this allows values up to
 210     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 211     # that if necessary. A leading 0200 byte indicates this particular
 212     # encoding, the following digits-1 bytes are a big-endian
 213     # representation. This allows values up to (256**(digits-1))-1.
 214     if 0 <= n < 8 ** (digits - 1):
 215         s = "%0*o" % (digits - 1, n) + NUL
 216     else:
 217         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 218             raise ValueError("overflow in number field")
 219
 220         if n < 0:
 221             # XXX We mimic GNU tar's behaviour with negative numbers,
 222             # this could raise OverflowError.
 223             n = struct.unpack("L", struct.pack("l", n))[0]
 224
 225         s = ""
 226         for i in xrange(digits - 1):
 227             s = chr(n & 0377) + s
 228             n >>= 8
 229         s = chr(0200) + s
 230     return s
 231
 232 def uts(s, encoding, errors):
 233     """Convert a unicode object to a string.
 234     """
 235     if errors == "utf-8":
 236         # An extra error handler similar to the -o invalid=UTF-8 option
 237         # in POSIX.1-2001. Replace untranslatable characters with their
 238         # UTF-8 representation.
 239         try:
 240             return s.encode(encoding, "strict")
 241         except UnicodeEncodeError:
 242             x = []
 243             for c in s:
 244                 try:
 245                     x.append(c.encode(encoding, "strict"))
 246                 except UnicodeEncodeError:
 247                     x.append(c.encode("utf8"))
 248             return "".join(x)
 249     else:
 250         return s.encode(encoding, errors)
 251
 252 def calc_chksums(buf):
 253     """Calculate the checksum for a member's header by summing up all
 254        characters except for the chksum field which is treated as if
 255        it was filled with spaces. According to the GNU tar sources,
 256        some tars (Sun and NeXT) calculate chksum with signed char,
 257        which will be different if there are chars in the buffer with
 258        the high bit set. So we calculate two checksums, unsigned and
 259        signed.
 260     """
 261     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 262     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 263     return unsigned_chksum, signed_chksum
 264
 265 def copyfileobj(src, dst, length=None):
 266     """Copy length bytes from fileobj src to fileobj dst.
 267        If length is None, copy the entire content.
 268     """
 269     if length == 0:
 270         return
 271     if length is None:
 272         shutil.copyfileobj(src, dst)
 273         return
 274
 275     BUFSIZE = 16 * 1024
 276     blocks, remainder = divmod(length, BUFSIZE)
 277     for b in xrange(blocks):
 278         buf = src.read(BUFSIZE)
 279         if len(buf) < BUFSIZE:
 280             raise IOError("end of file reached")
 281         dst.write(buf)
 282
 283     if remainder != 0:
 284         buf = src.read(remainder)
 285         if len(buf) < remainder:
 286             raise IOError("end of file reached")
 287         dst.write(buf)
 288     return
 289
 290 filemode_table = (
 291     ((S_IFLNK,      "l"),
 292      (S_IFREG,      "-"),
 293      (S_IFBLK,      "b"),
 294      (S_IFDIR,      "d"),
 295      (S_IFCHR,      "c"),
 296      (S_IFIFO,      "p")),
 297
 298     ((TUREAD,       "r"),),
 299     ((TUWRITE,      "w"),),
 300     ((TUEXEC|TSUID, "s"),
 301      (TSUID,        "S"),
 302      (TUEXEC,       "x")),
 303
 304     ((TGREAD,       "r"),),
 305     ((TGWRITE,      "w"),),
 306     ((TGEXEC|TSGID, "s"),
 307      (TSGID,        "S"),
 308      (TGEXEC,       "x")),
 309
 310     ((TOREAD,       "r"),),
 311     ((TOWRITE,      "w"),),
 312     ((TOEXEC|TSVTX, "t"),
 313      (TSVTX,        "T"),
 314      (TOEXEC,       "x"))
 315 )
 316
 317 def filemode(mode):
 318     """Convert a file's mode to a string of the form
 319        -rwxrwxrwx.
 320        Used by TarFile.list()
 321     """
 322     perm = []
 323     for table in filemode_table:
 324         for bit, char in table:
 325             if mode & bit == bit:
 326                 perm.append(char)
 327                 break
 328         else:
 329             perm.append("-")
 330     return "".join(perm)
 331
 332 if os.sep != "/":
 333     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 334 else:
 335     normpath = os.path.normpath
 336
 337 class TarError(Exception):
 338     """Base exception."""
 339     pass
 340 class ExtractError(TarError):
 341     """General exception for extract errors."""
 342     pass
 343 class ReadError(TarError):
 344     """Exception for unreadble tar archives."""
 345     pass
 346 class CompressionError(TarError):
 347     """Exception for unavailable compression methods."""
 348     pass
 349 class StreamError(TarError):
 350     """Exception for unsupported operations on stream-like TarFiles."""
 351     pass
 352 class HeaderError(TarError):
 353     """Exception for invalid headers."""
 354     pass
 355
 356 #---------------------------
 357 # internal stream interface
 358 #---------------------------
 359 class _LowLevelFile:
 360     """Low-level file object. Supports reading and writing.
 361        It is used instead of a regular file object for streaming
 362        access.
 363     """
 364
 365     def __init__(self, name, mode):
 366         mode = {
 367             "r": os.O_RDONLY,
 368             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 369         }[mode]
 370         if hasattr(os, "O_BINARY"):
 371             mode |= os.O_BINARY
 372         self.fd = os.open(name, mode)
 373
 374     def close(self):
 375         os.close(self.fd)
 376
 377     def read(self, size):
 378         return os.read(self.fd, size)
 379
 380     def write(self, s):
 381         os.write(self.fd, s)
 382
 383 class _Stream:
 384     """Class that serves as an adapter between TarFile and
 385        a stream-like object.  The stream-like object only
 386        needs to have a read() or write() method and is accessed
 387        blockwise.  Use of gzip or bzip2 compression is possible.
 388        A stream-like object could be for example: sys.stdin,
 389        sys.stdout, a socket, a tape device etc.
 390
 391        _Stream is intended to be used only internally.
 392     """
 393
 394     def __init__(self, name, mode, comptype, fileobj, bufsize):
 395         """Construct a _Stream object.
 396         """
 397         self._extfileobj = True
 398         if fileobj is None:
 399             fileobj = _LowLevelFile(name, mode)
 400             self._extfileobj = False
 401
 402         if comptype == '*':
 403             # Enable transparent compression detection for the
 404             # stream interface
 405             fileobj = _StreamProxy(fileobj)
 406             comptype = fileobj.getcomptype()
 407
 408         self.name     = name or ""
 409         self.mode     = mode
 410         self.comptype = comptype
 411         self.fileobj  = fileobj
 412         self.bufsize  = bufsize
 413         self.buf      = ""
 414         self.pos      = 0L
 415         self.closed   = False
 416
 417         if comptype == "gz":
 418             try:
 419                 import zlib
 420             except ImportError:
 421                 raise CompressionError("zlib module is not available")
 422             self.zlib = zlib
 423             self.crc = zlib.crc32("")
 424             if mode == "r":
 425                 self._init_read_gz()
 426             else:
 427                 self._init_write_gz()
 428
 429         if comptype == "bz2":
 430             try:
 431                 import bz2
 432             except ImportError:
 433                 raise CompressionError("bz2 module is not available")
 434             if mode == "r":
 435                 self.dbuf = ""
 436                 self.cmp = bz2.BZ2Decompressor()
 437             else:
 438                 self.cmp = bz2.BZ2Compressor()
 439
 440     def __del__(self):
 441         if hasattr(self, "closed") and not self.closed:
 442             self.close()
 443
 444     def _init_write_gz(self):
 445         """Initialize for writing with gzip compression.
 446         """
 447         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 448                                             -self.zlib.MAX_WBITS,
 449                                             self.zlib.DEF_MEM_LEVEL,
 450                                             0)
 451         timestamp = struct.pack("<L", long(time.time()))
 452         self.__write("\037\213\010\010%s\002\377" % timestamp)
 453         if self.name.endswith(".gz"):
 454             self.name = self.name[:-3]
 455         self.__write(self.name + NUL)
 456
 457     def write(self, s):
 458         """Write string s to the stream.
 459         """
 460         if self.comptype == "gz":
 461             self.crc = self.zlib.crc32(s, self.crc)
 462         self.pos += len(s)
 463         if self.comptype != "tar":
 464             s = self.cmp.compress(s)
 465         self.__write(s)
 466
 467     def __write(self, s):
 468         """Write string s to the stream if a whole new block
 469            is ready to be written.
 470         """
 471         self.buf += s
 472         while len(self.buf) > self.bufsize:
 473             self.fileobj.write(self.buf[:self.bufsize])
 474             self.buf = self.buf[self.bufsize:]
 475
 476     def close(self):
 477         """Close the _Stream object. No operation should be
 478            done on it afterwards.
 479         """
 480         if self.closed:
 481             return
 482
 483         if self.mode == "w" and self.comptype != "tar":
 484             self.buf += self.cmp.flush()
 485
 486         if self.mode == "w" and self.buf:
 487             self.fileobj.write(self.buf)
 488             self.buf = ""
 489             if self.comptype == "gz":
 490                 # The native zlib crc is an unsigned 32-bit integer, but
 491                 # the Python wrapper implicitly casts that to a signed C
 492                 # long.  So, on a 32-bit box self.crc may "look negative",
 493                 # while the same crc on a 64-bit box may "look positive".
 494                 # To avoid irksome warnings from the `struct` module, force
 495                 # it to look positive on all boxes.
 496                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
 497                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 498
 499         if not self._extfileobj:
 500             self.fileobj.close()
 501
 502         self.closed = True
 503
 504     def _init_read_gz(self):
 505         """Initialize for reading a gzip compressed fileobj.
 506         """
 507         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 508         self.dbuf = ""
 509
 510         # taken from gzip.GzipFile with some alterations
 511         if self.__read(2) != "\037\213":
 512             raise ReadError("not a gzip file")
 513         if self.__read(1) != "\010":
 514             raise CompressionError("unsupported compression method")
 515
 516         flag = ord(self.__read(1))
 517         self.__read(6)
 518
 519         if flag & 4:
 520             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 521             self.read(xlen)
 522         if flag & 8:
 523             while True:
 524                 s = self.__read(1)
 525                 if not s or s == NUL:
 526                     break
 527         if flag & 16:
 528             while True:
 529                 s = self.__read(1)
 530                 if not s or s == NUL:
 531                     break
 532         if flag & 2:
 533             self.__read(2)
 534
 535     def tell(self):
 536         """Return the stream's file pointer position.
 537         """
 538         return self.pos
 539
 540     def seek(self, pos=0):
 541         """Set the stream's file pointer to pos. Negative seeking
 542            is forbidden.
 543         """
 544         if pos - self.pos >= 0:
 545             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 546             for i in xrange(blocks):
 547                 self.read(self.bufsize)
 548             self.read(remainder)
 549         else:
 550             raise StreamError("seeking backwards is not allowed")
 551         return self.pos
 552
 553     def read(self, size=None):
 554         """Return the next size number of bytes from the stream.
 555            If size is not defined, return all bytes of the stream
 556            up to EOF.
 557         """
 558         if size is None:
 559             t = []
 560             while True:
 561                 buf = self._read(self.bufsize)
 562                 if not buf:
 563                     break
 564                 t.append(buf)
 565             buf = "".join(t)
 566         else:
 567             buf = self._read(size)
 568         self.pos += len(buf)
 569         return buf
 570
 571     def _read(self, size):
 572         """Return size bytes from the stream.
 573         """
 574         if self.comptype == "tar":
 575             return self.__read(size)
 576
 577         c = len(self.dbuf)
 578         t = [self.dbuf]
 579         while c < size:
 580             buf = self.__read(self.bufsize)
 581             if not buf:
 582                 break
 583             try:
 584                 buf = self.cmp.decompress(buf)
 585             except IOError:
 586                 raise ReadError("invalid compressed data")
 587             t.append(buf)
 588             c += len(buf)
 589         t = "".join(t)
 590         self.dbuf = t[size:]
 591         return t[:size]
 592
 593     def __read(self, size):
 594         """Return size bytes from stream. If internal buffer is empty,
 595            read another block from the stream.
 596         """
 597         c = len(self.buf)
 598         t = [self.buf]
 599         while c < size:
 600             buf = self.fileobj.read(self.bufsize)
 601             if not buf:
 602                 break
 603             t.append(buf)
 604             c += len(buf)
 605         t = "".join(t)
 606         self.buf = t[size:]
 607         return t[:size]
 608 # class _Stream
 609
 610 class _StreamProxy(object):
 611     """Small proxy class that enables transparent compression
 612        detection for the Stream interface (mode 'r|*').
 613     """
 614
 615     def __init__(self, fileobj):
 616         self.fileobj = fileobj
 617         self.buf = self.fileobj.read(BLOCKSIZE)
 618
 619     def read(self, size):
 620         self.read = self.fileobj.read
 621         return self.buf
 622
 623     def getcomptype(self):
 624         if self.buf.startswith("\037\213\010"):
 625             return "gz"
 626         if self.buf.startswith("BZh91"):
 627             return "bz2"
 628         return "tar"
 629
 630     def close(self):
 631         self.fileobj.close()
 632 # class StreamProxy
 633
 634 class _BZ2Proxy(object):
 635     """Small proxy class that enables external file object
 636        support for "r:bz2" and "w:bz2" modes. This is actually
 637        a workaround for a limitation in bz2 module's BZ2File
 638        class which (unlike gzip.GzipFile) has no support for
 639        a file object argument.
 640     """
 641
 642     blocksize = 16 * 1024
 643
 644     def __init__(self, fileobj, mode):
 645         self.fileobj = fileobj
 646         self.mode = mode
 647         self.name = getattr(self.fileobj, "name", None)
 648         self.init()
 649
 650     def init(self):
 651         import bz2
 652         self.pos = 0
 653         if self.mode == "r":
 654             self.bz2obj = bz2.BZ2Decompressor()
 655             self.fileobj.seek(0)
 656             self.buf = ""
 657         else:
 658             self.bz2obj = bz2.BZ2Compressor()
 659
 660     def read(self, size):
 661         b = [self.buf]
 662         x = len(self.buf)
 663         while x < size:
 664             try:
 665                 raw = self.fileobj.read(self.blocksize)
 666                 data = self.bz2obj.decompress(raw)
 667                 b.append(data)
 668             except EOFError:
 669                 break
 670             x += len(data)
 671         self.buf = "".join(b)
 672
 673         buf = self.buf[:size]
 674         self.buf = self.buf[size:]
 675         self.pos += len(buf)
 676         return buf
 677
 678     def seek(self, pos):
 679         if pos < self.pos:
 680             self.init()
 681         self.read(pos - self.pos)
 682
 683     def tell(self):
 684         return self.pos
 685
 686     def write(self, data):
 687         self.pos += len(data)
 688         raw = self.bz2obj.compress(data)
 689         self.fileobj.write(raw)
 690
 691     def close(self):
 692         if self.mode == "w":
 693             raw = self.bz2obj.flush()
 694             self.fileobj.write(raw)
 695         self.fileobj.close()
 696 # class _BZ2Proxy
 697
 698 #------------------------
 699 # Extraction file object
 700 #------------------------
 701 class _FileInFile(object):
 702     """A thin wrapper around an existing file object that
 703        provides a part of its data as an individual file
 704        object.
 705     """
 706
 707     def __init__(self, fileobj, offset, size, sparse=None):
 708         self.fileobj = fileobj
 709         self.offset = offset
 710         self.size = size
 711         self.sparse = sparse
 712         self.position = 0
 713
 714     def tell(self):
 715         """Return the current file position.
 716         """
 717         return self.position
 718
 719     def seek(self, position):
 720         """Seek to a position in the file.
 721         """
 722         self.position = position
 723
 724     def read(self, size=None):
 725         """Read data from the file.
 726         """
 727         if size is None:
 728             size = self.size - self.position
 729         else:
 730             size = min(size, self.size - self.position)
 731
 732         if self.sparse is None:
 733             return self.readnormal(size)
 734         else:
 735             return self.readsparse(size)
 736
 737     def readnormal(self, size):
 738         """Read operation for regular files.
 739         """
 740         self.fileobj.seek(self.offset + self.position)
 741         self.position += size
 742         return self.fileobj.read(size)
 743
 744     def readsparse(self, size):
 745         """Read operation for sparse files.
 746         """
 747         data = []
 748         while size > 0:
 749             buf = self.readsparsesection(size)
 750             if not buf:
 751                 break
 752             size -= len(buf)
 753             data.append(buf)
 754         return "".join(data)
 755
 756     def readsparsesection(self, size):
 757         """Read a single section of a sparse file.
 758         """
 759         section = self.sparse.find(self.position)
 760
 761         if section is None:
 762             return ""
 763
 764         size = min(size, section.offset + section.size - self.position)
 765
 766         if isinstance(section, _data):
 767             realpos = section.realpos + self.position - section.offset
 768             self.fileobj.seek(self.offset + realpos)
 769             self.position += size
 770             return self.fileobj.read(size)
 771         else:
 772             self.position += size
 773             return NUL * size
 774 #class _FileInFile
 775
 776
 777 class ExFileObject(object):
 778     """File-like object for reading an archive member.
 779        Is returned by TarFile.extractfile().
 780     """
 781     blocksize = 1024
 782
 783     def __init__(self, tarfile, tarinfo):
 784         self.fileobj = _FileInFile(tarfile.fileobj,
 785                                    tarinfo.offset_data,
 786                                    tarinfo.size,
 787                                    getattr(tarinfo, "sparse", None))
 788         self.name = tarinfo.name
 789         self.mode = "r"
 790         self.closed = False
 791         self.size = tarinfo.size
 792
 793         self.position = 0
 794         self.buffer = ""
 795
 796     def read(self, size=None):
 797         """Read at most size bytes from the file. If size is not
 798            present or None, read all data until EOF is reached.
 799         """
 800         if self.closed:
 801             raise ValueError("I/O operation on closed file")
 802
 803         buf = ""
 804         if self.buffer:
 805             if size is None:
 806                 buf = self.buffer
 807                 self.buffer = ""
 808             else:
 809                 buf = self.buffer[:size]
 810                 self.buffer = self.buffer[size:]
 811
 812         if size is None:
 813             buf += self.fileobj.read()
 814         else:
 815             buf += self.fileobj.read(size - len(buf))
 816
 817         self.position += len(buf)
 818         return buf
 819
 820     def readline(self, size=-1):
 821         """Read one entire line from the file. If size is present
 822            and non-negative, return a string with at most that
 823            size, which may be an incomplete line.
 824         """
 825         if self.closed:
 826             raise ValueError("I/O operation on closed file")
 827
 828         if "\n" in self.buffer:
 829             pos = self.buffer.find("\n") + 1
 830         else:
 831             buffers = [self.buffer]
 832             while True:
 833                 buf = self.fileobj.read(self.blocksize)
 834                 buffers.append(buf)
 835                 if not buf or "\n" in buf:
 836                     self.buffer = "".join(buffers)
 837                     pos = self.buffer.find("\n") + 1
 838                     if pos == 0:
 839                         # no newline found.
 840                         pos = len(self.buffer)
 841                     break
 842
 843         if size != -1:
 844             pos = min(size, pos)
 845
 846         buf = self.buffer[:pos]
 847         self.buffer = self.buffer[pos:]
 848         self.position += len(buf)
 849         return buf
 850
 851     def readlines(self):
 852         """Return a list with all remaining lines.
 853         """
 854         result = []
 855         while True:
 856             line = self.readline()
 857             if not line: break
 858             result.append(line)
 859         return result
 860
 861     def tell(self):
 862         """Return the current file position.
 863         """
 864         if self.closed:
 865             raise ValueError("I/O operation on closed file")
 866
 867         return self.position
 868
 869     def seek(self, pos, whence=os.SEEK_SET):
 870         """Seek to a position in the file.
 871         """
 872         if self.closed:
 873             raise ValueError("I/O operation on closed file")
 874
 875         if whence == os.SEEK_SET:
 876             self.position = min(max(pos, 0), self.size)
 877         elif whence == os.SEEK_CUR:
 878             if pos < 0:
 879                 self.position = max(self.position + pos, 0)
 880             else:
 881                 self.position = min(self.position + pos, self.size)
 882         elif whence == os.SEEK_END:
 883             self.position = max(min(self.size + pos, self.size), 0)
 884         else:
 885             raise ValueError("Invalid argument")
 886
 887         self.buffer = ""
 888         self.fileobj.seek(self.position)
 889
 890     def close(self):
 891         """Close the file object.
 892         """
 893         self.closed = True
 894
 895     def __iter__(self):
 896         """Get an iterator over the file's lines.
 897         """
 898         while True:
 899             line = self.readline()
 900             if not line:
 901                 break
 902             yield line
 903 #class ExFileObject
 904
 905 #------------------
 906 # Exported Classes
 907 #------------------
 908 class TarInfo(object):
 909     """Informational class which holds the details about an
 910        archive member given by a tar header block.
 911        TarInfo objects are returned by TarFile.getmember(),
 912        TarFile.getmembers() and TarFile.gettarinfo() and are
 913        usually created internally.
 914     """
 915
 916     def __init__(self, name=""):
 917         """Construct a TarInfo object. name is the optional name
 918            of the member.
 919         """
 920         self.name = name        # member name
 921         self.mode = 0644        # file permissions
 922         self.uid = 0            # user id
 923         self.gid = 0            # group id
 924         self.size = 0           # file size
 925         self.mtime = 0          # modification time
 926         self.chksum = 0         # header checksum
 927         self.type = REGTYPE     # member type
 928         self.linkname = ""      # link name
 929         self.uname = "root"     # user name
 930         self.gname = "root"     # group name
 931         self.devmajor = 0       # device major number
 932         self.devminor = 0       # device minor number
 933
 934         self.offset = 0         # the tar header starts here
 935         self.offset_data = 0    # the file's data starts here
 936
 937         self.pax_headers = {}   # pax header information
 938
 939     # In pax headers the "name" and "linkname" field are called
 940     # "path" and "linkpath".
 941     def _getpath(self):
 942         return self.name
 943     def _setpath(self, name):
 944         self.name = name
 945     path = property(_getpath, _setpath)
 946
 947     def _getlinkpath(self):
 948         return self.linkname
 949     def _setlinkpath(self, linkname):
 950         self.linkname = linkname
 951     linkpath = property(_getlinkpath, _setlinkpath)
 952
 953     def __repr__(self):
 954         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 955
 956     def get_info(self, encoding, errors):
 957         """Return the TarInfo's attributes as a dictionary.
 958         """
 959         info = {
 960             "name":     normpath(self.name),
 961             "mode":     self.mode & 07777,
 962             "uid":      self.uid,
 963             "gid":      self.gid,
 964             "size":     self.size,
 965             "mtime":    self.mtime,
 966             "chksum":   self.chksum,
 967             "type":     self.type,
 968             "linkname": normpath(self.linkname) if self.linkname else "",
 969             "uname":    self.uname,
 970             "gname":    self.gname,
 971             "devmajor": self.devmajor,
 972             "devminor": self.devminor
 973         }
 974
 975         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 976             info["name"] += "/"
 977
 978         for key in ("name", "linkname", "uname", "gname"):
 979             if type(info[key]) is unicode:
 980                 info[key] = info[key].encode(encoding, errors)
 981
 982         return info
 983
 984     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 985         """Return a tar header as a string of 512 byte blocks.
 986         """
 987         info = self.get_info(encoding, errors)
 988
 989         if format == USTAR_FORMAT:
 990             return self.create_ustar_header(info)
 991         elif format == GNU_FORMAT:
 992             return self.create_gnu_header(info)
 993         elif format == PAX_FORMAT:
 994             return self.create_pax_header(info, encoding, errors)
 995         else:
 996             raise ValueError("invalid format")
 997
 998     def create_ustar_header(self, info):
 999         """Return the object as a ustar header block.
1000         """
1001         info["magic"] = POSIX_MAGIC
1002
1003         if len(info["linkname"]) > LENGTH_LINK:
1004             raise ValueError("linkname is too long")
1005
1006         if len(info["name"]) > LENGTH_NAME:
1007             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1008
1009         return self._create_header(info, USTAR_FORMAT)
1010
1011     def create_gnu_header(self, info):
1012         """Return the object as a GNU header block sequence.
1013         """
1014         info["magic"] = GNU_MAGIC
1015
1016         buf = ""
1017         if len(info["linkname"]) > LENGTH_LINK:
1018             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1019
1020         if len(info["name"]) > LENGTH_NAME:
1021             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1022
1023         return buf + self._create_header(info, GNU_FORMAT)
1024
1025     def create_pax_header(self, info, encoding, errors):
1026         """Return the object as a ustar header block. If it cannot be
1027            represented this way, prepend a pax extended header sequence
1028            with supplement information.
1029         """
1030         info["magic"] = POSIX_MAGIC
1031         pax_headers = self.pax_headers.copy()
1032
1033         # Test string fields for values that exceed the field length or cannot
1034         # be represented in ASCII encoding.
1035         for name, hname, length in (
1036                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1037                 ("uname", "uname", 32), ("gname", "gname", 32)):
1038
1039             if hname in pax_headers:
1040                 # The pax header has priority.
1041                 continue
1042
1043             val = info[name].decode(encoding, errors)
1044
1045             # Try to encode the string as ASCII.
1046             try:
1047                 val.encode("ascii")
1048             except UnicodeEncodeError:
1049                 pax_headers[hname] = val
1050                 continue
1051
1052             if len(info[name]) > length:
1053                 pax_headers[hname] = val
1054
1055         # Test number fields for values that exceed the field limit or values
1056         # that like to be stored as float.
1057         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1058             if name in pax_headers:
1059                 # The pax header has priority. Avoid overflow.
1060                 info[name] = 0
1061                 continue
1062
1063             val = info[name]
1064             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1065                 pax_headers[name] = unicode(val)
1066                 info[name] = 0
1067
1068         # Create a pax extended header if necessary.
1069         if pax_headers:
1070             buf = self._create_pax_generic_header(pax_headers)
1071         else:
1072             buf = ""
1073
1074         return buf + self._create_header(info, USTAR_FORMAT)
1075
1076     @classmethod
1077     def create_pax_global_header(cls, pax_headers):
1078         """Return the object as a pax global header block sequence.
1079         """
1080         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1081
1082     def _posix_split_name(self, name):
1083         """Split a name longer than 100 chars into a prefix
1084            and a name part.
1085         """
1086         prefix = name[:LENGTH_PREFIX + 1]
1087         while prefix and prefix[-1] != "/":
1088             prefix = prefix[:-1]
1089
1090         name = name[len(prefix):]
1091         prefix = prefix[:-1]
1092
1093         if not prefix or len(name) > LENGTH_NAME:
1094             raise ValueError("name is too long")
1095         return prefix, name
1096
1097     @staticmethod
1098     def _create_header(info, format):
1099         """Return a header block. info is a dictionary with file
1100            information, format must be one of the *_FORMAT constants.
1101         """
1102         parts = [
1103             stn(info.get("name", ""), 100),
1104             itn(info.get("mode", 0) & 07777, 8, format),
1105             itn(info.get("uid", 0), 8, format),
1106             itn(info.get("gid", 0), 8, format),
1107             itn(info.get("size", 0), 12, format),
1108             itn(info.get("mtime", 0), 12, format),
1109             "        ", # checksum field
1110             info.get("type", REGTYPE),
1111             stn(info.get("linkname", ""), 100),
1112             stn(info.get("magic", POSIX_MAGIC), 8),
1113             stn(info.get("uname", "root"), 32),
1114             stn(info.get("gname", "root"), 32),
1115             itn(info.get("devmajor", 0), 8, format),
1116             itn(info.get("devminor", 0), 8, format),
1117             stn(info.get("prefix", ""), 155)
1118         ]
1119
1120         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1121         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1122         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1123         return buf
1124
1125     @staticmethod
1126     def _create_payload(payload):
1127         """Return the string payload filled with zero bytes
1128            up to the next 512 byte border.
1129         """
1130         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1131         if remainder > 0:
1132             payload += (BLOCKSIZE - remainder) * NUL
1133         return payload
1134
1135     @classmethod
1136     def _create_gnu_long_header(cls, name, type):
1137         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1138            for name.
1139         """
1140         name += NUL
1141
1142         info = {}
1143         info["name"] = "././@LongLink"
1144         info["type"] = type
1145         info["size"] = len(name)
1146         info["magic"] = GNU_MAGIC
1147
1148         # create extended header + name blocks.
1149         return cls._create_header(info, USTAR_FORMAT) + \
1150                 cls._create_payload(name)
1151
1152     @classmethod
1153     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1154         """Return a POSIX.1-2001 extended or global header sequence
1155            that contains a list of keyword, value pairs. The values
1156            must be unicode objects.
1157         """
1158         records = []
1159         for keyword, value in pax_headers.iteritems():
1160             keyword = keyword.encode("utf8")
1161             value = value.encode("utf8")
1162             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1163             n = p = 0
1164             while True:
1165                 n = l + len(str(p))
1166                 if n == p:
1167                     break
1168                 p = n
1169             records.append("%d %s=%s\n" % (p, keyword, value))
1170         records = "".join(records)
1171
1172         # We use a hardcoded "././@PaxHeader" name like star does
1173         # instead of the one that POSIX recommends.
1174         info = {}
1175         info["name"] = "././@PaxHeader"
1176         info["type"] = type
1177         info["size"] = len(records)
1178         info["magic"] = POSIX_MAGIC
1179
1180         # Create pax header + record blocks.
1181         return cls._create_header(info, USTAR_FORMAT) + \
1182                 cls._create_payload(records)
1183
1184     @classmethod
1185     def frombuf(cls, buf):
1186         """Construct a TarInfo object from a 512 byte string buffer.
1187         """
1188         if len(buf) != BLOCKSIZE:
1189             raise HeaderError("truncated header")
1190         if buf.count(NUL) == BLOCKSIZE:
1191             raise HeaderError("empty header")
1192
1193         chksum = nti(buf[148:156])
1194         if chksum not in calc_chksums(buf):
1195             raise HeaderError("bad checksum")
1196
1197         obj = cls()
1198         obj.buf = buf
1199         obj.name = nts(buf[0:100])
1200         obj.mode = nti(buf[100:108])
1201         obj.uid = nti(buf[108:116])
1202         obj.gid = nti(buf[116:124])
1203         obj.size = nti(buf[124:136])
1204         obj.mtime = nti(buf[136:148])
1205         obj.chksum = chksum
1206         obj.type = buf[156:157]
1207         obj.linkname = nts(buf[157:257])
1208         obj.uname = nts(buf[265:297])
1209         obj.gname = nts(buf[297:329])
1210         obj.devmajor = nti(buf[329:337])
1211         obj.devminor = nti(buf[337:345])
1212         prefix = nts(buf[345:500])
1213
1214         # Old V7 tar format represents a directory as a regular
1215         # file with a trailing slash.
1216         if obj.type == AREGTYPE and obj.name.endswith("/"):
1217             obj.type = DIRTYPE
1218
1219         # Remove redundant slashes from directories.
1220         if obj.isdir():
1221             obj.name = obj.name.rstrip("/")
1222
1223         # Reconstruct a ustar longname.
1224         if prefix and obj.type not in GNU_TYPES:
1225             obj.name = prefix + "/" + obj.name
1226         return obj
1227
1228     @classmethod
1229     def fromtarfile(cls, tarfile):
1230         """Return the next TarInfo object from TarFile object
1231            tarfile.
1232         """
1233         buf = tarfile.fileobj.read(BLOCKSIZE)
1234         if not buf:
1235             return
1236         obj = cls.frombuf(buf)
1237         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1238         return obj._proc_member(tarfile)
1239
1240     #--------------------------------------------------------------------------
1241     # The following are methods that are called depending on the type of a
1242     # member. The entry point is _proc_member() which can be overridden in a
1243     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1244     # implement the following
1245     # operations:
1246     # 1. Set self.offset_data to the position where the data blocks begin,
1247     #    if there is data that follows.
1248     # 2. Set tarfile.offset to the position where the next member's header will
1249     #    begin.
1250     # 3. Return self or another valid TarInfo object.
1251     def _proc_member(self, tarfile):
1252         """Choose the right processing method depending on
1253            the type and call it.
1254         """
1255         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1256             return self._proc_gnulong(tarfile)
1257         elif self.type == GNUTYPE_SPARSE:
1258             return self._proc_sparse(tarfile)
1259         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1260             return self._proc_pax(tarfile)
1261         else:
1262             return self._proc_builtin(tarfile)
1263
1264     def _proc_builtin(self, tarfile):
1265         """Process a builtin type or an unknown type which
1266            will be treated as a regular file.
1267         """
1268         self.offset_data = tarfile.fileobj.tell()
1269         offset = self.offset_data
1270         if self.isreg() or self.type not in SUPPORTED_TYPES:
1271             # Skip the following data blocks.
1272             offset += self._block(self.size)
1273         tarfile.offset = offset
1274
1275         # Patch the TarInfo object with saved global
1276         # header information.
1277         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1278
1279         return self
1280
1281     def _proc_gnulong(self, tarfile):
1282         """Process the blocks that hold a GNU longname
1283            or longlink member.
1284         """
1285         buf = tarfile.fileobj.read(self._block(self.size))
1286
1287         # Fetch the next header and process it.
1288         next = self.fromtarfile(tarfile)
1289         if next is None:
1290             raise HeaderError("missing subsequent header")
1291
1292         # Patch the TarInfo object from the next header with
1293         # the longname information.
1294         next.offset = self.offset
1295         if self.type == GNUTYPE_LONGNAME:
1296             next.name = nts(buf)
1297         elif self.type == GNUTYPE_LONGLINK:
1298             next.linkname = nts(buf)
1299
1300         return next
1301
1302     def _proc_sparse(self, tarfile):
1303         """Process a GNU sparse header plus extra headers.
1304         """
1305         buf = self.buf
1306         sp = _ringbuffer()
1307         pos = 386
1308         lastpos = 0L
1309         realpos = 0L
1310         # There are 4 possible sparse structs in the
1311         # first header.
1312         for i in xrange(4):
1313             try:
1314                 offset = nti(buf[pos:pos + 12])
1315                 numbytes = nti(buf[pos + 12:pos + 24])
1316             except ValueError:
1317                 break
1318             if offset > lastpos:
1319                 sp.append(_hole(lastpos, offset - lastpos))
1320             sp.append(_data(offset, numbytes, realpos))
1321             realpos += numbytes
1322             lastpos = offset + numbytes
1323             pos += 24
1324
1325         isextended = ord(buf[482])
1326         origsize = nti(buf[483:495])
1327
1328         # If the isextended flag is given,
1329         # there are extra headers to process.
1330         while isextended == 1:
1331             buf = tarfile.fileobj.read(BLOCKSIZE)
1332             pos = 0
1333             for i in xrange(21):
1334                 try:
1335                     offset = nti(buf[pos:pos + 12])
1336                     numbytes = nti(buf[pos + 12:pos + 24])
1337                 except ValueError:
1338                     break
1339                 if offset > lastpos:
1340                     sp.append(_hole(lastpos, offset - lastpos))
1341                 sp.append(_data(offset, numbytes, realpos))
1342                 realpos += numbytes
1343                 lastpos = offset + numbytes
1344                 pos += 24
1345             isextended = ord(buf[504])
1346
1347         if lastpos < origsize:
1348             sp.append(_hole(lastpos, origsize - lastpos))
1349
1350         self.sparse = sp
1351
1352         self.offset_data = tarfile.fileobj.tell()
1353         tarfile.offset = self.offset_data + self._block(self.size)
1354         self.size = origsize
1355
1356         return self
1357
1358     def _proc_pax(self, tarfile):
1359         """Process an extended or global header as described in
1360            POSIX.1-2001.
1361         """
1362         # Read the header information.
1363         buf = tarfile.fileobj.read(self._block(self.size))
1364
1365         # A pax header stores supplemental information for either
1366         # the following file (extended) or all following files
1367         # (global).
1368         if self.type == XGLTYPE:
1369             pax_headers = tarfile.pax_headers
1370         else:
1371             pax_headers = tarfile.pax_headers.copy()
1372
1373         # Parse pax header information. A record looks like that:
1374         # "%d %s=%s\n" % (length, keyword, value). length is the size
1375         # of the complete record including the length field itself and
1376         # the newline. keyword and value are both UTF-8 encoded strings.
1377         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1378         pos = 0
1379         while True:
1380             match = regex.match(buf, pos)
1381             if not match:
1382                 break
1383
1384             length, keyword = match.groups()
1385             length = int(length)
1386             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1387
1388             keyword = keyword.decode("utf8")
1389             value = value.decode("utf8")
1390
1391             pax_headers[keyword] = value
1392             pos += length
1393
1394         # Fetch the next header.
1395         next = self.fromtarfile(tarfile)
1396
1397         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1398             if next is None:
1399                 raise HeaderError("missing subsequent header")
1400
1401             # Patch the TarInfo object with the extended header info.
1402             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1403             next.offset = self.offset
1404
1405             if pax_headers.has_key("size"):
1406                 # If the extended header replaces the size field,
1407                 # we need to recalculate the offset where the next
1408                 # header starts.
1409                 offset = next.offset_data
1410                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1411                     offset += next._block(next.size)
1412                 tarfile.offset = offset
1413
1414         return next
1415
1416     def _apply_pax_info(self, pax_headers, encoding, errors):
1417         """Replace fields with supplemental information from a previous
1418            pax extended or global header.
1419         """
1420         for keyword, value in pax_headers.iteritems():
1421             if keyword not in PAX_FIELDS:
1422                 continue
1423
1424             if keyword == "path":
1425                 value = value.rstrip("/")
1426
1427             if keyword in PAX_NUMBER_FIELDS:
1428                 try:
1429                     value = PAX_NUMBER_FIELDS[keyword](value)
1430                 except ValueError:
1431                     value = 0
1432             else:
1433                 value = uts(value, encoding, errors)
1434
1435             setattr(self, keyword, value)
1436
1437         self.pax_headers = pax_headers.copy()
1438
1439     def _block(self, count):
1440         """Round up a byte count by BLOCKSIZE and return it,
1441            e.g. _block(834) => 1024.
1442         """
1443         blocks, remainder = divmod(count, BLOCKSIZE)
1444         if remainder:
1445             blocks += 1
1446         return blocks * BLOCKSIZE
1447
1448     def isreg(self):
1449         return self.type in REGULAR_TYPES
1450     def isfile(self):
1451         return self.isreg()
1452     def isdir(self):
1453         return self.type == DIRTYPE
1454     def issym(self):
1455         return self.type == SYMTYPE
1456     def islnk(self):
1457         return self.type == LNKTYPE
1458     def ischr(self):
1459         return self.type == CHRTYPE
1460     def isblk(self):
1461         return self.type == BLKTYPE
1462     def isfifo(self):
1463         return self.type == FIFOTYPE
1464     def issparse(self):
1465         return self.type == GNUTYPE_SPARSE
1466     def isdev(self):
1467         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1468 # class TarInfo
1469
1470 class TarFile(object):
1471     """The TarFile Class provides an interface to tar archives.
1472     """
1473
1474     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1475
1476     dereference = False         # If true, add content of linked file to the
1477                                 # tar file, else the link.
1478
1479     ignore_zeros = False        # If true, skips empty or invalid blocks and
1480                                 # continues processing.
1481
1482     errorlevel = 0              # If 0, fatal errors only appear in debug
1483                                 # messages (if debug >= 0). If > 0, errors
1484                                 # are passed to the caller as exceptions.
1485
1486     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1487
1488     encoding = ENCODING         # Encoding for 8-bit character strings.
1489
1490     errors = None               # Error handler for unicode conversion.
1491
1492     tarinfo = TarInfo           # The default TarInfo class to use.
1493
1494     fileobject = ExFileObject   # The default ExFileObject class to use.
1495
1496     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1497             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1498             errors=None, pax_headers=None, debug=None, errorlevel=None):
1499         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1500            read from an existing archive, 'a' to append data to an existing
1501            file or 'w' to create a new file overwriting an existing one. `mode'
1502            defaults to 'r'.
1503            If `fileobj' is given, it is used for reading or writing data. If it
1504            can be determined, `mode' is overridden by `fileobj's mode.
1505            `fileobj' is not closed, when TarFile is closed.
1506         """
1507         if len(mode) > 1 or mode not in "raw":
1508             raise ValueError("mode must be 'r', 'a' or 'w'")
1509         self.mode = mode
1510         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1511
1512         if not fileobj:
1513             if self.mode == "a" and not os.path.exists(name):
1514                 # Create nonexistent files in append mode.
1515                 self.mode = "w"
1516                 self._mode = "wb"
1517             fileobj = bltn_open(name, self._mode)
1518             self._extfileobj = False
1519         else:
1520             if name is None and hasattr(fileobj, "name"):
1521                 name = fileobj.name
1522             if hasattr(fileobj, "mode"):
1523                 self._mode = fileobj.mode
1524             self._extfileobj = True
1525         self.name = os.path.abspath(name) if name else None
1526         self.fileobj = fileobj
1527
1528         # Init attributes.
1529         if format is not None:
1530             self.format = format
1531         if tarinfo is not None:
1532             self.tarinfo = tarinfo
1533         if dereference is not None:
1534             self.dereference = dereference
1535         if ignore_zeros is not None:
1536             self.ignore_zeros = ignore_zeros
1537         if encoding is not None:
1538             self.encoding = encoding
1539
1540         if errors is not None:
1541             self.errors = errors
1542         elif mode == "r":
1543             self.errors = "utf-8"
1544         else:
1545             self.errors = "strict"
1546
1547         if pax_headers is not None and self.format == PAX_FORMAT:
1548             self.pax_headers = pax_headers
1549         else:
1550             self.pax_headers = {}
1551
1552         if debug is not None:
1553             self.debug = debug
1554         if errorlevel is not None:
1555             self.errorlevel = errorlevel
1556
1557         # Init datastructures.
1558         self.closed = False
1559         self.members = []       # list of members as TarInfo objects
1560         self._loaded = False    # flag if all members have been read
1561         self.offset = 0L        # current position in the archive file
1562         self.inodes = {}        # dictionary caching the inodes of
1563                                 # archive members already added
1564
1565         if self.mode == "r":
1566             self.firstmember = None
1567             self.firstmember = self.next()
1568
1569         if self.mode == "a":
1570             # Move to the end of the archive,
1571             # before the first empty block.
1572             self.firstmember = None
1573             while True:
1574                 if self.next() is None:
1575                     if self.offset > 0:
1576                         self.fileobj.seek(- BLOCKSIZE, 1)
1577                     break
1578
1579         if self.mode in "aw":
1580             self._loaded = True
1581
1582             if self.pax_headers:
1583                 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1584                 self.fileobj.write(buf)
1585                 self.offset += len(buf)
1586
1587     def _getposix(self):
1588         return self.format == USTAR_FORMAT
1589     def _setposix(self, value):
1590         import warnings
1591         warnings.warn("use the format attribute instead", DeprecationWarning)
1592         if value:
1593             self.format = USTAR_FORMAT
1594         else:
1595             self.format = GNU_FORMAT
1596     posix = property(_getposix, _setposix)
1597
1598     #--------------------------------------------------------------------------
1599     # Below are the classmethods which act as alternate constructors to the
1600     # TarFile class. The open() method is the only one that is needed for
1601     # public use; it is the "super"-constructor and is able to select an
1602     # adequate "sub"-constructor for a particular compression using the mapping
1603     # from OPEN_METH.
1604     #
1605     # This concept allows one to subclass TarFile without losing the comfort of
1606     # the super-constructor. A sub-constructor is registered and made available
1607     # by adding it to the mapping in OPEN_METH.
1608
1609     @classmethod
1610     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1611         """Open a tar archive for reading, writing or appending. Return
1612            an appropriate TarFile class.
1613
1614            mode:
1615            'r' or 'r:*' open for reading with transparent compression
1616            'r:'         open for reading exclusively uncompressed
1617            'r:gz'       open for reading with gzip compression
1618            'r:bz2'      open for reading with bzip2 compression
1619            'a' or 'a:'  open for appending, creating the file if necessary
1620            'w' or 'w:'  open for writing without compression
1621            'w:gz'       open for writing with gzip compression
1622            'w:bz2'      open for writing with bzip2 compression
1623
1624            'r|*'        open a stream of tar blocks with transparent compression
1625            'r|'         open an uncompressed stream of tar blocks for reading
1626            'r|gz'       open a gzip compressed stream of tar blocks
1627            'r|bz2'      open a bzip2 compressed stream of tar blocks
1628            'w|'         open an uncompressed stream for writing
1629            'w|gz'       open a gzip compressed stream for writing
1630            'w|bz2'      open a bzip2 compressed stream for writing
1631         """
1632
1633         if not name and not fileobj:
1634             raise ValueError("nothing to open")
1635
1636         if mode in ("r", "r:*"):
1637             # Find out which *open() is appropriate for opening the file.
1638             for comptype in cls.OPEN_METH:
1639                 func = getattr(cls, cls.OPEN_METH[comptype])
1640                 if fileobj is not None:
1641                     saved_pos = fileobj.tell()
1642                 try:
1643                     return func(name, "r", fileobj, **kwargs)
1644                 except (ReadError, CompressionError), e:
1645                     if fileobj is not None:
1646                         fileobj.seek(saved_pos)
1647                     continue
1648             raise ReadError("file could not be opened successfully")
1649
1650         elif ":" in mode:
1651             filemode, comptype = mode.split(":", 1)
1652             filemode = filemode or "r"
1653             comptype = comptype or "tar"
1654
1655             # Select the *open() function according to
1656             # given compression.
1657             if comptype in cls.OPEN_METH:
1658                 func = getattr(cls, cls.OPEN_METH[comptype])
1659             else:
1660                 raise CompressionError("unknown compression type %r" % comptype)
1661             return func(name, filemode, fileobj, **kwargs)
1662
1663         elif "|" in mode:
1664             filemode, comptype = mode.split("|", 1)
1665             filemode = filemode or "r"
1666             comptype = comptype or "tar"
1667
1668             if filemode not in "rw":
1669                 raise ValueError("mode must be 'r' or 'w'")
1670
1671             t = cls(name, filemode,
1672                     _Stream(name, filemode, comptype, fileobj, bufsize),
1673                     **kwargs)
1674             t._extfileobj = False
1675             return t
1676
1677         elif mode in "aw":
1678             return cls.taropen(name, mode, fileobj, **kwargs)
1679
1680         raise ValueError("undiscernible mode")
1681
1682     @classmethod
1683     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1684         """Open uncompressed tar archive name for reading or writing.
1685         """
1686         if len(mode) > 1 or mode not in "raw":
1687             raise ValueError("mode must be 'r', 'a' or 'w'")
1688         return cls(name, mode, fileobj, **kwargs)
1689
1690     @classmethod
1691     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1692         """Open gzip compressed tar archive name for reading or writing.
1693            Appending is not allowed.
1694         """
1695         if len(mode) > 1 or mode not in "rw":
1696             raise ValueError("mode must be 'r' or 'w'")
1697
1698         try:
1699             import gzip
1700             gzip.GzipFile
1701         except (ImportError, AttributeError):
1702             raise CompressionError("gzip module is not available")
1703
1704         if fileobj is None:
1705             fileobj = bltn_open(name, mode + "b")
1706
1707         try:
1708             t = cls.taropen(name, mode,
1709                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1710                 **kwargs)
1711         except IOError:
1712             raise ReadError("not a gzip file")
1713         t._extfileobj = False
1714         return t
1715
1716     @classmethod
1717     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1718         """Open bzip2 compressed tar archive name for reading or writing.
1719            Appending is not allowed.
1720         """
1721         if len(mode) > 1 or mode not in "rw":
1722             raise ValueError("mode must be 'r' or 'w'.")
1723
1724         try:
1725             import bz2
1726         except ImportError:
1727             raise CompressionError("bz2 module is not available")
1728
1729         if fileobj is not None:
1730             fileobj = _BZ2Proxy(fileobj, mode)
1731         else:
1732             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1733
1734         try:
1735             t = cls.taropen(name, mode, fileobj, **kwargs)
1736         except IOError:
1737             raise ReadError("not a bzip2 file")
1738         t._extfileobj = False
1739         return t
1740
1741     # All *open() methods are registered here.
1742     OPEN_METH = {
1743         "tar": "taropen",   # uncompressed tar
1744         "gz":  "gzopen",    # gzip compressed tar
1745         "bz2": "bz2open"    # bzip2 compressed tar
1746     }
1747
1748     #--------------------------------------------------------------------------
1749     # The public methods which TarFile provides:
1750
1751     def close(self):
1752         """Close the TarFile. In write-mode, two finishing zero blocks are
1753            appended to the archive.
1754         """
1755         if self.closed:
1756             return
1757
1758         if self.mode in "aw":
1759             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1760             self.offset += (BLOCKSIZE * 2)
1761             # fill up the end with zero-blocks
1762             # (like option -b20 for tar does)
1763             blocks, remainder = divmod(self.offset, RECORDSIZE)
1764             if remainder > 0:
1765                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1766
1767         if not self._extfileobj:
1768             self.fileobj.close()
1769         self.closed = True
1770
1771     def getmember(self, name):
1772         """Return a TarInfo object for member `name'. If `name' can not be
1773            found in the archive, KeyError is raised. If a member occurs more
1774            than once in the archive, its last occurence is assumed to be the
1775            most up-to-date version.
1776         """
1777         tarinfo = self._getmember(name)
1778         if tarinfo is None:
1779             raise KeyError("filename %r not found" % name)
1780         return tarinfo
1781
1782     def getmembers(self):
1783         """Return the members of the archive as a list of TarInfo objects. The
1784            list has the same order as the members in the archive.
1785         """
1786         self._check()
1787         if not self._loaded:    # if we want to obtain a list of
1788             self._load()        # all members, we first have to
1789                                 # scan the whole archive.
1790         return self.members
1791
1792     def getnames(self):
1793         """Return the members of the archive as a list of their names. It has
1794            the same order as the list returned by getmembers().
1795         """
1796         return [tarinfo.name for tarinfo in self.getmembers()]
1797
1798     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1799         """Create a TarInfo object for either the file `name' or the file
1800            object `fileobj' (using os.fstat on its file descriptor). You can
1801            modify some of the TarInfo's attributes before you add it using
1802            addfile(). If given, `arcname' specifies an alternative name for the
1803            file in the archive.
1804         """
1805         self._check("aw")
1806
1807         # When fileobj is given, replace name by
1808         # fileobj's real name.
1809         if fileobj is not None:
1810             name = fileobj.name
1811
1812         # Building the name of the member in the archive.
1813         # Backward slashes are converted to forward slashes,
1814         # Absolute paths are turned to relative paths.
1815         if arcname is None:
1816             arcname = name
1817         arcname = normpath(arcname)
1818         drv, arcname = os.path.splitdrive(arcname)
1819         while arcname[0:1] == "/":
1820             arcname = arcname[1:]
1821
1822         # Now, fill the TarInfo object with
1823         # information specific for the file.
1824         tarinfo = self.tarinfo()
1825         tarinfo.tarfile = self
1826
1827         # Use os.stat or os.lstat, depending on platform
1828         # and if symlinks shall be resolved.
1829         if fileobj is None:
1830             if hasattr(os, "lstat") and not self.dereference:
1831                 statres = os.lstat(name)
1832             else:
1833                 statres = os.stat(name)
1834         else:
1835             statres = os.fstat(fileobj.fileno())
1836         linkname = ""
1837
1838         stmd = statres.st_mode
1839         if stat.S_ISREG(stmd):
1840             inode = (statres.st_ino, statres.st_dev)
1841             if not self.dereference and statres.st_nlink > 1 and \
1842                     inode in self.inodes and arcname != self.inodes[inode]:
1843                 # Is it a hardlink to an already
1844                 # archived file?
1845                 type = LNKTYPE
1846                 linkname = self.inodes[inode]
1847             else:
1848                 # The inode is added only if its valid.
1849                 # For win32 it is always 0.
1850                 type = REGTYPE
1851                 if inode[0]:
1852                     self.inodes[inode] = arcname
1853         elif stat.S_ISDIR(stmd):
1854             type = DIRTYPE
1855         elif stat.S_ISFIFO(stmd):
1856             type = FIFOTYPE
1857         elif stat.S_ISLNK(stmd):
1858             type = SYMTYPE
1859             linkname = os.readlink(name)
1860         elif stat.S_ISCHR(stmd):
1861             type = CHRTYPE
1862         elif stat.S_ISBLK(stmd):
1863             type = BLKTYPE
1864         else:
1865             return None
1866
1867         # Fill the TarInfo object with all
1868         # information we can get.
1869         tarinfo.name = arcname
1870         tarinfo.mode = stmd
1871         tarinfo.uid = statres.st_uid
1872         tarinfo.gid = statres.st_gid
1873         if stat.S_ISREG(stmd):
1874             tarinfo.size = statres.st_size
1875         else:
1876             tarinfo.size = 0L
1877         tarinfo.mtime = statres.st_mtime
1878         tarinfo.type = type
1879         tarinfo.linkname = linkname
1880         if pwd:
1881             try:
1882                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1883             except KeyError:
1884                 pass
1885         if grp:
1886             try:
1887                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1888             except KeyError:
1889                 pass
1890
1891         if type in (CHRTYPE, BLKTYPE):
1892             if hasattr(os, "major") and hasattr(os, "minor"):
1893                 tarinfo.devmajor = os.major(statres.st_rdev)
1894                 tarinfo.devminor = os.minor(statres.st_rdev)
1895         return tarinfo
1896
1897     def list(self, verbose=True):
1898         """Print a table of contents to sys.stdout. If `verbose' is False, only
1899            the names of the members are printed. If it is True, an `ls -l'-like
1900            output is produced.
1901         """
1902         self._check()
1903
1904         for tarinfo in self:
1905             if verbose:
1906                 print filemode(tarinfo.mode),
1907                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1908                                  tarinfo.gname or tarinfo.gid),
1909                 if tarinfo.ischr() or tarinfo.isblk():
1910                     print "%10s" % ("%d,%d" \
1911                                     % (tarinfo.devmajor, tarinfo.devminor)),
1912                 else:
1913                     print "%10d" % tarinfo.size,
1914                 print "%d-%02d-%02d %02d:%02d:%02d" \
1915                       % time.localtime(tarinfo.mtime)[:6],
1916
1917             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1918
1919             if verbose:
1920                 if tarinfo.issym():
1921                     print "->", tarinfo.linkname,
1922                 if tarinfo.islnk():
1923                     print "link to", tarinfo.linkname,
1924             print
1925
1926     def add(self, name, arcname=None, recursive=True, exclude=None):
1927         """Add the file `name' to the archive. `name' may be any type of file
1928            (directory, fifo, symbolic link, etc.). If given, `arcname'
1929            specifies an alternative name for the file in the archive.
1930            Directories are added recursively by default. This can be avoided by
1931            setting `recursive' to False. `exclude' is a function that should
1932            return True for each filename to be excluded.
1933         """
1934         self._check("aw")
1935
1936         if arcname is None:
1937             arcname = name
1938
1939         # Exclude pathnames.
1940         if exclude is not None and exclude(name):
1941             self._dbg(2, "tarfile: Excluded %r" % name)
1942             return
1943
1944         # Skip if somebody tries to archive the archive...
1945         if self.name is not None and os.path.abspath(name) == self.name:
1946             self._dbg(2, "tarfile: Skipped %r" % name)
1947             return
1948
1949         # Special case: The user wants to add the current
1950         # working directory.
1951         if name == ".":
1952             if recursive:
1953                 if arcname == ".":
1954                     arcname = ""
1955                 for f in os.listdir(name):
1956                     self.add(f, os.path.join(arcname, f), recursive, exclude)
1957             return
1958
1959         self._dbg(1, name)
1960
1961         # Create a TarInfo object from the file.
1962         tarinfo = self.gettarinfo(name, arcname)
1963
1964         if tarinfo is None:
1965             self._dbg(1, "tarfile: Unsupported type %r" % name)
1966             return
1967
1968         # Append the tar header and data to the archive.
1969         if tarinfo.isreg():
1970             f = bltn_open(name, "rb")
1971             self.addfile(tarinfo, f)
1972             f.close()
1973
1974         elif tarinfo.isdir():
1975             self.addfile(tarinfo)
1976             if recursive:
1977                 for f in os.listdir(name):
1978                     self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1979
1980         else:
1981             self.addfile(tarinfo)
1982
1983     def addfile(self, tarinfo, fileobj=None):
1984         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1985            given, tarinfo.size bytes are read from it and added to the archive.
1986            You can create TarInfo objects using gettarinfo().
1987            On Windows platforms, `fileobj' should always be opened with mode
1988            'rb' to avoid irritation about the file size.
1989         """
1990         self._check("aw")
1991
1992         tarinfo = copy.copy(tarinfo)
1993
1994         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1995         self.fileobj.write(buf)
1996         self.offset += len(buf)
1997
1998         # If there's data to follow, append it.
1999         if fileobj is not None:
2000             copyfileobj(fileobj, self.fileobj, tarinfo.size)
2001             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2002             if remainder > 0:
2003                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2004                 blocks += 1
2005             self.offset += blocks * BLOCKSIZE
2006
2007         self.members.append(tarinfo)
2008
2009     def extractall(self, path=".", members=None):
2010         """Extract all members from the archive to the current working
2011            directory and set owner, modification time and permissions on
2012            directories afterwards. `path' specifies a different directory
2013            to extract to. `members' is optional and must be a subset of the
2014            list returned by getmembers().
2015         """
2016         directories = []
2017
2018         if members is None:
2019             members = self
2020
2021         for tarinfo in members:
2022             if tarinfo.isdir():
2023                 # Extract directory with a safe mode, so that
2024                 # all files below can be extracted as well.
2025                 try:
2026                     os.makedirs(os.path.join(path, tarinfo.name), 0700)
2027                 except EnvironmentError:
2028                     pass
2029                 directories.append(tarinfo)
2030             else:
2031                 self.extract(tarinfo, path)
2032
2033         # Reverse sort directories.
2034         directories.sort(lambda a, b: cmp(a.name, b.name))
2035         directories.reverse()
2036
2037         # Set correct owner, mtime and filemode on directories.
2038         for tarinfo in directories:
2039             path = os.path.join(path, tarinfo.name)
2040             try:
2041                 self.chown(tarinfo, path)
2042                 self.utime(tarinfo, path)
2043                 self.chmod(tarinfo, path)
2044             except ExtractError, e:
2045                 if self.errorlevel > 1:
2046                     raise
2047                 else:
2048                     self._dbg(1, "tarfile: %s" % e)
2049
2050     def extract(self, member, path=""):
2051         """Extract a member from the archive to the current working directory,
2052            using its full name. Its file information is extracted as accurately
2053            as possible. `member' may be a filename or a TarInfo object. You can
2054            specify a different directory using `path'.
2055         """
2056         self._check("r")
2057
2058         if isinstance(member, basestring):
2059             tarinfo = self.getmember(member)
2060         else:
2061             tarinfo = member
2062
2063         # Prepare the link target for makelink().
2064         if tarinfo.islnk():
2065             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2066
2067         try:
2068             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2069         except EnvironmentError, e:
2070             if self.errorlevel > 0:
2071                 raise
2072             else:
2073                 if e.filename is None:
2074                     self._dbg(1, "tarfile: %s" % e.strerror)
2075                 else:
2076                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2077         except ExtractError, e:
2078             if self.errorlevel > 1:
2079                 raise
2080             else:
2081                 self._dbg(1, "tarfile: %s" % e)
2082
2083     def extractfile(self, member):
2084         """Extract a member from the archive as a file object. `member' may be
2085            a filename or a TarInfo object. If `member' is a regular file, a
2086            file-like object is returned. If `member' is a link, a file-like
2087            object is constructed from the link's target. If `member' is none of
2088            the above, None is returned.
2089            The file-like object is read-only and provides the following
2090            methods: read(), readline(), readlines(), seek() and tell()
2091         """
2092         self._check("r")
2093
2094         if isinstance(member, basestring):
2095             tarinfo = self.getmember(member)
2096         else:
2097             tarinfo = member
2098
2099         if tarinfo.isreg():
2100             return self.fileobject(self, tarinfo)
2101
2102         elif tarinfo.type not in SUPPORTED_TYPES:
2103             # If a member's type is unknown, it is treated as a
2104             # regular file.
2105             return self.fileobject(self, tarinfo)
2106
2107         elif tarinfo.islnk() or tarinfo.issym():
2108             if isinstance(self.fileobj, _Stream):
2109                 # A small but ugly workaround for the case that someone tries
2110                 # to extract a (sym)link as a file-object from a non-seekable
2111                 # stream of tar blocks.
2112                 raise StreamError("cannot extract (sym)link as file object")
2113             else:
2114                 # A (sym)link's file object is its target's file object.
2115                 return self.extractfile(self._getmember(tarinfo.linkname,
2116                                                         tarinfo))
2117         else:
2118             # If there's no data associated with the member (directory, chrdev,
2119             # blkdev, etc.), return None instead of a file object.
2120             return None
2121
2122     def _extract_member(self, tarinfo, targetpath):
2123         """Extract the TarInfo object tarinfo to a physical
2124            file called targetpath.
2125         """
2126         # Fetch the TarInfo object for the given name
2127         # and build the destination pathname, replacing
2128         # forward slashes to platform specific separators.
2129         if targetpath[-1:] == "/":
2130             targetpath = targetpath[:-1]
2131         targetpath = os.path.normpath(targetpath)
2132
2133         # Create all upper directories.
2134         upperdirs = os.path.dirname(targetpath)
2135         if upperdirs and not os.path.exists(upperdirs):
2136             os.makedirs(upperdirs)
2137
2138         if tarinfo.islnk() or tarinfo.issym():
2139             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2140         else:
2141             self._dbg(1, tarinfo.name)
2142
2143         if tarinfo.isreg():
2144             self.makefile(tarinfo, targetpath)
2145         elif tarinfo.isdir():
2146             self.makedir(tarinfo, targetpath)
2147         elif tarinfo.isfifo():
2148             self.makefifo(tarinfo, targetpath)
2149         elif tarinfo.ischr() or tarinfo.isblk():
2150             self.makedev(tarinfo, targetpath)
2151         elif tarinfo.islnk() or tarinfo.issym():
2152             self.makelink(tarinfo, targetpath)
2153         elif tarinfo.type not in SUPPORTED_TYPES:
2154             self.makeunknown(tarinfo, targetpath)
2155         else:
2156             self.makefile(tarinfo, targetpath)
2157
2158         self.chown(tarinfo, targetpath)
2159         if not tarinfo.issym():
2160             self.chmod(tarinfo, targetpath)
2161             self.utime(tarinfo, targetpath)
2162
2163     #--------------------------------------------------------------------------
2164     # Below are the different file methods. They are called via
2165     # _extract_member() when extract() is called. They can be replaced in a
2166     # subclass to implement other functionality.
2167
2168     def makedir(self, tarinfo, targetpath):
2169         """Make a directory called targetpath.
2170         """
2171         try:
2172             os.mkdir(targetpath)
2173         except EnvironmentError, e:
2174             if e.errno != errno.EEXIST:
2175                 raise
2176
2177     def makefile(self, tarinfo, targetpath):
2178         """Make a file called targetpath.
2179         """
2180         source = self.extractfile(tarinfo)
2181         target = bltn_open(targetpath, "wb")
2182         copyfileobj(source, target)
2183         source.close()
2184         target.close()
2185
2186     def makeunknown(self, tarinfo, targetpath):
2187         """Make a file from a TarInfo object with an unknown type
2188            at targetpath.
2189         """
2190         self.makefile(tarinfo, targetpath)
2191         self._dbg(1, "tarfile: Unknown file type %r, " \
2192                      "extracted as regular file." % tarinfo.type)
2193
2194     def makefifo(self, tarinfo, targetpath):
2195         """Make a fifo called targetpath.
2196         """
2197         if hasattr(os, "mkfifo"):
2198             os.mkfifo(targetpath)
2199         else:
2200             raise ExtractError("fifo not supported by system")
2201
2202     def makedev(self, tarinfo, targetpath):
2203         """Make a character or block device called targetpath.
2204         """
2205         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2206             raise ExtractError("special devices not supported by system")
2207
2208         mode = tarinfo.mode
2209         if tarinfo.isblk():
2210             mode |= stat.S_IFBLK
2211         else:
2212             mode |= stat.S_IFCHR
2213
2214         os.mknod(targetpath, mode,
2215                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2216
2217     def makelink(self, tarinfo, targetpath):
2218         """Make a (symbolic) link called targetpath. If it cannot be created
2219           (platform limitation), we try to make a copy of the referenced file
2220           instead of a link.
2221         """
2222         linkpath = tarinfo.linkname
2223         try:
2224             if tarinfo.issym():
2225                 os.symlink(linkpath, targetpath)
2226             else:
2227                 # See extract().
2228                 os.link(tarinfo._link_target, targetpath)
2229         except AttributeError:
2230             if tarinfo.issym():
2231                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2232                                         linkpath)
2233                 linkpath = normpath(linkpath)
2234
2235             try:
2236                 self._extract_member(self.getmember(linkpath), targetpath)
2237             except (EnvironmentError, KeyError), e:
2238                 linkpath = os.path.normpath(linkpath)
2239                 try:
2240                     shutil.copy2(linkpath, targetpath)
2241                 except EnvironmentError, e:
2242                     raise IOError("link could not be created")
2243
2244     def chown(self, tarinfo, targetpath):
2245         """Set owner of targetpath according to tarinfo.
2246         """
2247         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2248             # We have to be root to do so.
2249             try:
2250                 g = grp.getgrnam(tarinfo.gname)[2]
2251             except KeyError:
2252                 try:
2253                     g = grp.getgrgid(tarinfo.gid)[2]
2254                 except KeyError:
2255                     g = os.getgid()
2256             try:
2257                 u = pwd.getpwnam(tarinfo.uname)[2]
2258             except KeyError:
2259                 try:
2260                     u = pwd.getpwuid(tarinfo.uid)[2]
2261                 except KeyError:
2262                     u = os.getuid()
2263             try:
2264                 if tarinfo.issym() and hasattr(os, "lchown"):
2265                     os.lchown(targetpath, u, g)
2266                 else:
2267                     if sys.platform != "os2emx":
2268                         os.chown(targetpath, u, g)
2269             except EnvironmentError, e:
2270                 raise ExtractError("could not change owner")
2271
2272     def chmod(self, tarinfo, targetpath):
2273         """Set file permissions of targetpath according to tarinfo.
2274         """
2275         if hasattr(os, 'chmod'):
2276             try:
2277                 os.chmod(targetpath, tarinfo.mode)
2278             except EnvironmentError, e:
2279                 raise ExtractError("could not change mode")
2280
2281     def utime(self, tarinfo, targetpath):
2282         """Set modification time of targetpath according to tarinfo.
2283         """
2284         if not hasattr(os, 'utime'):
2285             return
2286         if sys.platform == "win32" and tarinfo.isdir():
2287             # According to msdn.microsoft.com, it is an error (EACCES)
2288             # to use utime() on directories.
2289             return
2290         try:
2291             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2292         except EnvironmentError, e:
2293             raise ExtractError("could not change modification time")
2294
2295     #--------------------------------------------------------------------------
2296     def next(self):
2297         """Return the next member of the archive as a TarInfo object, when
2298            TarFile is opened for reading. Return None if there is no more
2299            available.
2300         """
2301         self._check("ra")
2302         if self.firstmember is not None:
2303             m = self.firstmember
2304             self.firstmember = None
2305             return m
2306
2307         # Read the next block.
2308         self.fileobj.seek(self.offset)
2309         while True:
2310             try:
2311                 tarinfo = self.tarinfo.fromtarfile(self)
2312                 if tarinfo is None:
2313                     return
2314                 self.members.append(tarinfo)
2315
2316             except HeaderError, e:
2317                 if self.ignore_zeros:
2318                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2319                     self.offset += BLOCKSIZE
2320                     continue
2321                 else:
2322                     if self.offset == 0:
2323                         raise ReadError(str(e))
2324                     return None
2325             break
2326
2327         return tarinfo
2328
2329     #--------------------------------------------------------------------------
2330     # Little helper methods:
2331
2332     def _getmember(self, name, tarinfo=None):
2333         """Find an archive member by name from bottom to top.
2334            If tarinfo is given, it is used as the starting point.
2335         """
2336         # Ensure that all members have been loaded.
2337         members = self.getmembers()
2338
2339         if tarinfo is None:
2340             end = len(members)
2341         else:
2342             end = members.index(tarinfo)
2343
2344         for i in xrange(end - 1, -1, -1):
2345             if name == members[i].name:
2346                 return members[i]
2347
2348     def _load(self):
2349         """Read through the entire archive file and look for readable
2350            members.
2351         """
2352         while True:
2353             tarinfo = self.next()
2354             if tarinfo is None:
2355                 break
2356         self._loaded = True
2357
2358     def _check(self, mode=None):
2359         """Check if TarFile is still open, and if the operation's mode
2360            corresponds to TarFile's mode.
2361         """
2362         if self.closed:
2363             raise IOError("%s is closed" % self.__class__.__name__)
2364         if mode is not None and self.mode not in mode:
2365             raise IOError("bad operation for mode %r" % self.mode)
2366
2367     def __iter__(self):
2368         """Provide an iterator object.
2369         """
2370         if self._loaded:
2371             return iter(self.members)
2372         else:
2373             return TarIter(self)
2374
2375     def _dbg(self, level, msg):
2376         """Write debugging output to sys.stderr.
2377         """
2378         if level <= self.debug:
2379             print >> sys.stderr, msg
2380 # class TarFile
2381
2382 class TarIter:
2383     """Iterator Class.
2384
2385        for tarinfo in TarFile(...):
2386            suite...
2387     """
2388
2389     def __init__(self, tarfile):
2390         """Construct a TarIter object.
2391         """
2392         self.tarfile = tarfile
2393         self.index = 0
2394     def __iter__(self):
2395         """Return iterator object.
2396         """
2397         return self
2398     def next(self):
2399         """Return the next item using TarFile's next() method.
2400            When all members have been read, set TarFile as _loaded.
2401         """
2402         # Fix for SF #1100429: Under rare circumstances it can
2403         # happen that getmembers() is called during iteration,
2404         # which will cause TarIter to stop prematurely.
2405         if not self.tarfile._loaded:
2406             tarinfo = self.tarfile.next()
2407             if not tarinfo:
2408                 self.tarfile._loaded = True
2409                 raise StopIteration
2410         else:
2411             try:
2412                 tarinfo = self.tarfile.members[self.index]
2413             except IndexError:
2414                 raise StopIteration
2415         self.index += 1
2416         return tarinfo
2417
2418 # Helper classes for sparse file support
2419 class _section:
2420     """Base class for _data and _hole.
2421     """
2422     def __init__(self, offset, size):
2423         self.offset = offset
2424         self.size = size
2425     def __contains__(self, offset):
2426         return self.offset <= offset < self.offset + self.size
2427
2428 class _data(_section):
2429     """Represent a data section in a sparse file.
2430     """
2431     def __init__(self, offset, size, realpos):
2432         _section.__init__(self, offset, size)
2433         self.realpos = realpos
2434
2435 class _hole(_section):
2436     """Represent a hole section in a sparse file.
2437     """
2438     pass
2439
2440 class _ringbuffer(list):
2441     """Ringbuffer class which increases performance
2442        over a regular list.
2443     """
2444     def __init__(self):
2445         self.idx = 0
2446     def find(self, offset):
2447         idx = self.idx
2448         while True:
2449             item = self[idx]
2450             if offset in item:
2451                 break
2452             idx += 1
2453             if idx == len(self):
2454                 idx = 0
2455             if idx == self.idx:
2456                 # End of File
2457                 return None
2458         self.idx = idx
2459         return item
2460
2461 #---------------------------------------------
2462 # zipfile compatible TarFile class
2463 #---------------------------------------------
2464 TAR_PLAIN = 0           # zipfile.ZIP_STORED
2465 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
2466 class TarFileCompat:
2467     """TarFile class compatible with standard module zipfile's
2468        ZipFile class.
2469     """
2470     def __init__(self, file, mode="r", compression=TAR_PLAIN):
2471         if compression == TAR_PLAIN:
2472             self.tarfile = TarFile.taropen(file, mode)
2473         elif compression == TAR_GZIPPED:
2474             self.tarfile = TarFile.gzopen(file, mode)
2475         else:
2476             raise ValueError("unknown compression constant")
2477         if mode[0:1] == "r":
2478             members = self.tarfile.getmembers()
2479             for m in members:
2480                 m.filename = m.name
2481                 m.file_size = m.size
2482                 m.date_time = time.gmtime(m.mtime)[:6]
2483     def namelist(self):
2484         return map(lambda m: m.name, self.infolist())
2485     def infolist(self):
2486         return filter(lambda m: m.type in REGULAR_TYPES,
2487                       self.tarfile.getmembers())
2488     def printdir(self):
2489         self.tarfile.list()
2490     def testzip(self):
2491         return
2492     def getinfo(self, name):
2493         return self.tarfile.getmember(name)
2494     def read(self, name):
2495         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2496     def write(self, filename, arcname=None, compress_type=None):
2497         self.tarfile.add(filename, arcname)
2498     def writestr(self, zinfo, bytes):
2499         try:
2500             from cStringIO import StringIO
2501         except ImportError:
2502             from StringIO import StringIO
2503         import calendar
2504         zinfo.name = zinfo.filename
2505         zinfo.size = zinfo.file_size
2506         zinfo.mtime = calendar.timegm(zinfo.date_time)
2507         self.tarfile.addfile(zinfo, StringIO(bytes))
2508     def close(self):
2509         self.tarfile.close()
2510 #class TarFileCompat
2511
2512 #--------------------
2513 # exported functions
2514 #--------------------
2515 def is_tarfile(name):
2516     """Return True if name points to a tar archive that we
2517        are able to handle, else return False.
2518     """
2519     try:
2520         t = open(name)
2521         t.close()
2522         return True
2523     except TarError:
2524         return False
2525
2526 bltn_open = open
2527 open = TarFile.open