Lib/tarfile.py

   1 #!/usr/bin/env python
   2 #-------------------------------------------------------------------
   3 # tarfile.py
   4 #-------------------------------------------------------------------
   5 # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
   6 # All rights reserved.
   7 #
   8 # Permission  is  hereby granted,  free  of charge,  to  any person
   9 # obtaining a  copy of  this software  and associated documentation
  10 # files  (the  "Software"),  to   deal  in  the  Software   without
  11 # restriction,  including  without limitation  the  rights to  use,
  12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  13 # copies  of  the  Software,  and to  permit  persons  to  whom the
  14 # Software  is  furnished  to  do  so,  subject  to  the  following
  15 # conditions:
  16 #
  17 # The above copyright  notice and this  permission notice shall  be
  18 # included in all copies or substantial portions of the Software.
  19 #
  20 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  21 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  22 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  23 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  24 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  25 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  27 # OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29 """Read from and write to tar format archives.
  30 """
  31
  32 __version__ = "$Revision$"
  33
  34 version     = "0.9.0"
  35 __author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
  36 __date__    = "$Date$"
  37 __cvsid__   = "$Id$"
  38 __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
  39
  40 #---------
  41 # Imports
  42 #---------
  43 import sys
  44 import os
  45 import shutil
  46 import stat
  47 import errno
  48 import time
  49 import struct
  50 import copy
  51 import re
  52
  53 if sys.platform == 'mac':
  54     # This module needs work for MacOS9, especially in the area of pathname
  55     # handling. In many places it is assumed a simple substitution of / by the
  56     # local os.path.sep is good enough to convert pathnames, but this does not
  57     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  58     raise ImportError("tarfile does not work for platform==mac")
  59
  60 try:
  61     import grp, pwd
  62 except ImportError:
  63     grp = pwd = None
  64
  65 # from tarfile import *
  66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  67
  68 from builtins import open as _open # Since 'open' is TarFile.open
  69
  70 #---------------------------------------------------------
  71 # tar constants
  72 #---------------------------------------------------------
  73 NUL = b"\0"                     # the null character
  74 BLOCKSIZE = 512                 # length of processing blocks
  75 RECORDSIZE = BLOCKSIZE * 20     # length of records
  76 GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
  77 POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
  78
  79 LENGTH_NAME = 100               # maximum length of a filename
  80 LENGTH_LINK = 100               # maximum length of a linkname
  81 LENGTH_PREFIX = 155             # maximum length of the prefix field
  82
  83 REGTYPE = b"0"                  # regular file
  84 AREGTYPE = b"\0"                # regular file
  85 LNKTYPE = b"1"                  # link (inside tarfile)
  86 SYMTYPE = b"2"                  # symbolic link
  87 CHRTYPE = b"3"                  # character special device
  88 BLKTYPE = b"4"                  # block special device
  89 DIRTYPE = b"5"                  # directory
  90 FIFOTYPE = b"6"                 # fifo special device
  91 CONTTYPE = b"7"                 # contiguous file
  92
  93 GNUTYPE_LONGNAME = b"L"         # GNU tar longname
  94 GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
  95 GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
  96
  97 XHDTYPE = b"x"                  # POSIX.1-2001 extended header
  98 XGLTYPE = b"g"                  # POSIX.1-2001 global header
  99 SOLARIS_XHDTYPE = b"X"          # Solaris extended header
 100
 101 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 102 GNU_FORMAT = 1                  # GNU tar format
 103 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 104 DEFAULT_FORMAT = GNU_FORMAT
 105
 106 #---------------------------------------------------------
 107 # tarfile constants
 108 #---------------------------------------------------------
 109 # File types that tarfile supports:
 110 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 111                    SYMTYPE, DIRTYPE, FIFOTYPE,
 112                    CONTTYPE, CHRTYPE, BLKTYPE,
 113                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 114                    GNUTYPE_SPARSE)
 115
 116 # File types that will be treated as a regular file.
 117 REGULAR_TYPES = (REGTYPE, AREGTYPE,
 118                  CONTTYPE, GNUTYPE_SPARSE)
 119
 120 # File types that are part of the GNU tar format.
 121 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 122              GNUTYPE_SPARSE)
 123
 124 # Fields from a pax header that override a TarInfo attribute.
 125 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 126               "uid", "gid", "uname", "gname")
 127
 128 # Fields in a pax header that are numbers, all other fields
 129 # are treated as strings.
 130 PAX_NUMBER_FIELDS = {
 131     "atime": float,
 132     "ctime": float,
 133     "mtime": float,
 134     "uid": int,
 135     "gid": int,
 136     "size": int
 137 }
 138
 139 #---------------------------------------------------------
 140 # Bits used in the mode field, values in octal.
 141 #---------------------------------------------------------
 142 S_IFLNK = 0o120000        # symbolic link
 143 S_IFREG = 0o100000        # regular file
 144 S_IFBLK = 0o060000        # block device
 145 S_IFDIR = 0o040000        # directory
 146 S_IFCHR = 0o020000        # character device
 147 S_IFIFO = 0o010000        # fifo
 148
 149 TSUID   = 0o4000          # set UID on execution
 150 TSGID   = 0o2000          # set GID on execution
 151 TSVTX   = 0o1000          # reserved
 152
 153 TUREAD  = 0o400           # read by owner
 154 TUWRITE = 0o200           # write by owner
 155 TUEXEC  = 0o100           # execute/search by owner
 156 TGREAD  = 0o040           # read by group
 157 TGWRITE = 0o020           # write by group
 158 TGEXEC  = 0o010           # execute/search by group
 159 TOREAD  = 0o004           # read by other
 160 TOWRITE = 0o002           # write by other
 161 TOEXEC  = 0o001           # execute/search by other
 162
 163 #---------------------------------------------------------
 164 # initialization
 165 #---------------------------------------------------------
 166 ENCODING = sys.getfilesystemencoding()
 167 if ENCODING is None:
 168     ENCODING = "ascii"
 169
 170 #---------------------------------------------------------
 171 # Some useful functions
 172 #---------------------------------------------------------
 173
 174 def stn(s, length, encoding, errors):
 175     """Convert a string to a null-terminated bytes object.
 176     """
 177     s = s.encode(encoding, errors)
 178     return s[:length] + (length - len(s)) * NUL
 179
 180 def nts(s, encoding, errors):
 181     """Convert a null-terminated bytes object to a string.
 182     """
 183     p = s.find(b"\0")
 184     if p != -1:
 185         s = s[:p]
 186     return s.decode(encoding, errors)
 187
 188 def nti(s):
 189     """Convert a number field to a python number.
 190     """
 191     # There are two possible encodings for a number field, see
 192     # itn() below.
 193     if s[0] != chr(0o200):
 194         try:
 195             n = int(nts(s, "ascii", "strict") or "0", 8)
 196         except ValueError:
 197             raise HeaderError("invalid header")
 198     else:
 199         n = 0
 200         for i in range(len(s) - 1):
 201             n <<= 8
 202             n += ord(s[i + 1])
 203     return n
 204
 205 def itn(n, digits=8, format=DEFAULT_FORMAT):
 206     """Convert a python number to a number field.
 207     """
 208     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 209     # octal digits followed by a null-byte, this allows values up to
 210     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 211     # that if necessary. A leading 0o200 byte indicates this particular
 212     # encoding, the following digits-1 bytes are a big-endian
 213     # representation. This allows values up to (256**(digits-1))-1.
 214     if 0 <= n < 8 ** (digits - 1):
 215         s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
 216     else:
 217         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 218             raise ValueError("overflow in number field")
 219
 220         if n < 0:
 221             # XXX We mimic GNU tar's behaviour with negative numbers,
 222             # this could raise OverflowError.
 223             n = struct.unpack("L", struct.pack("l", n))[0]
 224
 225         s = bytearray()
 226         for i in range(digits - 1):
 227             s.insert(0, n & 0o377)
 228             n >>= 8
 229         s.insert(0, 0o200)
 230     return s
 231
 232 def calc_chksums(buf):
 233     """Calculate the checksum for a member's header by summing up all
 234        characters except for the chksum field which is treated as if
 235        it was filled with spaces. According to the GNU tar sources,
 236        some tars (Sun and NeXT) calculate chksum with signed char,
 237        which will be different if there are chars in the buffer with
 238        the high bit set. So we calculate two checksums, unsigned and
 239        signed.
 240     """
 241     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 242     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 243     return unsigned_chksum, signed_chksum
 244
 245 def copyfileobj(src, dst, length=None):
 246     """Copy length bytes from fileobj src to fileobj dst.
 247        If length is None, copy the entire content.
 248     """
 249     if length == 0:
 250         return
 251     if length is None:
 252         shutil.copyfileobj(src, dst)
 253         return
 254
 255     BUFSIZE = 16 * 1024
 256     blocks, remainder = divmod(length, BUFSIZE)
 257     for b in range(blocks):
 258         buf = src.read(BUFSIZE)
 259         if len(buf) < BUFSIZE:
 260             raise IOError("end of file reached")
 261         dst.write(buf)
 262
 263     if remainder != 0:
 264         buf = src.read(remainder)
 265         if len(buf) < remainder:
 266             raise IOError("end of file reached")
 267         dst.write(buf)
 268     return
 269
 270 filemode_table = (
 271     ((S_IFLNK,      "l"),
 272      (S_IFREG,      "-"),
 273      (S_IFBLK,      "b"),
 274      (S_IFDIR,      "d"),
 275      (S_IFCHR,      "c"),
 276      (S_IFIFO,      "p")),
 277
 278     ((TUREAD,       "r"),),
 279     ((TUWRITE,      "w"),),
 280     ((TUEXEC|TSUID, "s"),
 281      (TSUID,        "S"),
 282      (TUEXEC,       "x")),
 283
 284     ((TGREAD,       "r"),),
 285     ((TGWRITE,      "w"),),
 286     ((TGEXEC|TSGID, "s"),
 287      (TSGID,        "S"),
 288      (TGEXEC,       "x")),
 289
 290     ((TOREAD,       "r"),),
 291     ((TOWRITE,      "w"),),
 292     ((TOEXEC|TSVTX, "t"),
 293      (TSVTX,        "T"),
 294      (TOEXEC,       "x"))
 295 )
 296
 297 def filemode(mode):
 298     """Convert a file's mode to a string of the form
 299        -rwxrwxrwx.
 300        Used by TarFile.list()
 301     """
 302     perm = []
 303     for table in filemode_table:
 304         for bit, char in table:
 305             if mode & bit == bit:
 306                 perm.append(char)
 307                 break
 308         else:
 309             perm.append("-")
 310     return "".join(perm)
 311
 312 if os.sep != "/":
 313     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 314 else:
 315     normpath = os.path.normpath
 316
 317 class TarError(Exception):
 318     """Base exception."""
 319     pass
 320 class ExtractError(TarError):
 321     """General exception for extract errors."""
 322     pass
 323 class ReadError(TarError):
 324     """Exception for unreadble tar archives."""
 325     pass
 326 class CompressionError(TarError):
 327     """Exception for unavailable compression methods."""
 328     pass
 329 class StreamError(TarError):
 330     """Exception for unsupported operations on stream-like TarFiles."""
 331     pass
 332 class HeaderError(TarError):
 333     """Exception for invalid headers."""
 334     pass
 335
 336 #---------------------------
 337 # internal stream interface
 338 #---------------------------
 339 class _LowLevelFile:
 340     """Low-level file object. Supports reading and writing.
 341        It is used instead of a regular file object for streaming
 342        access.
 343     """
 344
 345     def __init__(self, name, mode):
 346         mode = {
 347             "r": os.O_RDONLY,
 348             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 349         }[mode]
 350         if hasattr(os, "O_BINARY"):
 351             mode |= os.O_BINARY
 352         self.fd = os.open(name, mode)
 353
 354     def close(self):
 355         os.close(self.fd)
 356
 357     def read(self, size):
 358         return os.read(self.fd, size)
 359
 360     def write(self, s):
 361         os.write(self.fd, s)
 362
 363 class _Stream:
 364     """Class that serves as an adapter between TarFile and
 365        a stream-like object.  The stream-like object only
 366        needs to have a read() or write() method and is accessed
 367        blockwise.  Use of gzip or bzip2 compression is possible.
 368        A stream-like object could be for example: sys.stdin,
 369        sys.stdout, a socket, a tape device etc.
 370
 371        _Stream is intended to be used only internally.
 372     """
 373
 374     def __init__(self, name, mode, comptype, fileobj, bufsize):
 375         """Construct a _Stream object.
 376         """
 377         self._extfileobj = True
 378         if fileobj is None:
 379             fileobj = _LowLevelFile(name, mode)
 380             self._extfileobj = False
 381
 382         if comptype == '*':
 383             # Enable transparent compression detection for the
 384             # stream interface
 385             fileobj = _StreamProxy(fileobj)
 386             comptype = fileobj.getcomptype()
 387
 388         self.name     = name or ""
 389         self.mode     = mode
 390         self.comptype = comptype
 391         self.fileobj  = fileobj
 392         self.bufsize  = bufsize
 393         self.buf      = b""
 394         self.pos      = 0
 395         self.closed   = False
 396
 397         if comptype == "gz":
 398             try:
 399                 import zlib
 400             except ImportError:
 401                 raise CompressionError("zlib module is not available")
 402             self.zlib = zlib
 403             self.crc = zlib.crc32("")
 404             if mode == "r":
 405                 self._init_read_gz()
 406             else:
 407                 self._init_write_gz()
 408
 409         if comptype == "bz2":
 410             try:
 411                 import bz2
 412             except ImportError:
 413                 raise CompressionError("bz2 module is not available")
 414             if mode == "r":
 415                 self.dbuf = b""
 416                 self.cmp = bz2.BZ2Decompressor()
 417             else:
 418                 self.cmp = bz2.BZ2Compressor()
 419
 420     def __del__(self):
 421         if hasattr(self, "closed") and not self.closed:
 422             self.close()
 423
 424     def _init_write_gz(self):
 425         """Initialize for writing with gzip compression.
 426         """
 427         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 428                                             -self.zlib.MAX_WBITS,
 429                                             self.zlib.DEF_MEM_LEVEL,
 430                                             0)
 431         timestamp = struct.pack("<L", int(time.time()))
 432         self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
 433         if self.name.endswith(".gz"):
 434             self.name = self.name[:-3]
 435         # RFC1952 says we must use ISO-8859-1 for the FNAME field.
 436         self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
 437
 438     def write(self, s):
 439         """Write string s to the stream.
 440         """
 441         if self.comptype == "gz":
 442             self.crc = self.zlib.crc32(s, self.crc)
 443         self.pos += len(s)
 444         if self.comptype != "tar":
 445             s = self.cmp.compress(s)
 446         self.__write(s)
 447
 448     def __write(self, s):
 449         """Write string s to the stream if a whole new block
 450            is ready to be written.
 451         """
 452         self.buf += s
 453         while len(self.buf) > self.bufsize:
 454             self.fileobj.write(self.buf[:self.bufsize])
 455             self.buf = self.buf[self.bufsize:]
 456
 457     def close(self):
 458         """Close the _Stream object. No operation should be
 459            done on it afterwards.
 460         """
 461         if self.closed:
 462             return
 463
 464         if self.mode == "w" and self.comptype != "tar":
 465             self.buf += self.cmp.flush()
 466
 467         if self.mode == "w" and self.buf:
 468             self.fileobj.write(self.buf)
 469             self.buf = b""
 470             if self.comptype == "gz":
 471                 # The native zlib crc is an unsigned 32-bit integer, but
 472                 # the Python wrapper implicitly casts that to a signed C
 473                 # long.  So, on a 32-bit box self.crc may "look negative",
 474                 # while the same crc on a 64-bit box may "look positive".
 475                 # To avoid irksome warnings from the `struct` module, force
 476                 # it to look positive on all boxes.
 477                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
 478                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
 479
 480         if not self._extfileobj:
 481             self.fileobj.close()
 482
 483         self.closed = True
 484
 485     def _init_read_gz(self):
 486         """Initialize for reading a gzip compressed fileobj.
 487         """
 488         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 489         self.dbuf = b""
 490
 491         # taken from gzip.GzipFile with some alterations
 492         if self.__read(2) != b"\037\213":
 493             raise ReadError("not a gzip file")
 494         if self.__read(1) != b"\010":
 495             raise CompressionError("unsupported compression method")
 496
 497         flag = ord(self.__read(1))
 498         self.__read(6)
 499
 500         if flag & 4:
 501             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 502             self.read(xlen)
 503         if flag & 8:
 504             while True:
 505                 s = self.__read(1)
 506                 if not s or s == NUL:
 507                     break
 508         if flag & 16:
 509             while True:
 510                 s = self.__read(1)
 511                 if not s or s == NUL:
 512                     break
 513         if flag & 2:
 514             self.__read(2)
 515
 516     def tell(self):
 517         """Return the stream's file pointer position.
 518         """
 519         return self.pos
 520
 521     def seek(self, pos=0):
 522         """Set the stream's file pointer to pos. Negative seeking
 523            is forbidden.
 524         """
 525         if pos - self.pos >= 0:
 526             blocks, remainder = divmod(pos - self.pos, self.bufsize)
 527             for i in range(blocks):
 528                 self.read(self.bufsize)
 529             self.read(remainder)
 530         else:
 531             raise StreamError("seeking backwards is not allowed")
 532         return self.pos
 533
 534     def read(self, size=None):
 535         """Return the next size number of bytes from the stream.
 536            If size is not defined, return all bytes of the stream
 537            up to EOF.
 538         """
 539         if size is None:
 540             t = []
 541             while True:
 542                 buf = self._read(self.bufsize)
 543                 if not buf:
 544                     break
 545                 t.append(buf)
 546             buf = "".join(t)
 547         else:
 548             buf = self._read(size)
 549         self.pos += len(buf)
 550         return buf
 551
 552     def _read(self, size):
 553         """Return size bytes from the stream.
 554         """
 555         if self.comptype == "tar":
 556             return self.__read(size)
 557
 558         c = len(self.dbuf)
 559         while c < size:
 560             buf = self.__read(self.bufsize)
 561             if not buf:
 562                 break
 563             try:
 564                 buf = self.cmp.decompress(buf)
 565             except IOError:
 566                 raise ReadError("invalid compressed data")
 567             self.dbuf += buf
 568             c += len(buf)
 569         buf = self.dbuf[:size]
 570         self.dbuf = self.dbuf[size:]
 571         return buf
 572
 573     def __read(self, size):
 574         """Return size bytes from stream. If internal buffer is empty,
 575            read another block from the stream.
 576         """
 577         c = len(self.buf)
 578         while c < size:
 579             buf = self.fileobj.read(self.bufsize)
 580             if not buf:
 581                 break
 582             self.buf += buf
 583             c += len(buf)
 584         buf = self.buf[:size]
 585         self.buf = self.buf[size:]
 586         return buf
 587 # class _Stream
 588
 589 class _StreamProxy(object):
 590     """Small proxy class that enables transparent compression
 591        detection for the Stream interface (mode 'r|*').
 592     """
 593
 594     def __init__(self, fileobj):
 595         self.fileobj = fileobj
 596         self.buf = self.fileobj.read(BLOCKSIZE)
 597
 598     def read(self, size):
 599         self.read = self.fileobj.read
 600         return self.buf
 601
 602     def getcomptype(self):
 603         if self.buf.startswith(b"\037\213\010"):
 604             return "gz"
 605         if self.buf.startswith(b"BZh91"):
 606             return "bz2"
 607         return "tar"
 608
 609     def close(self):
 610         self.fileobj.close()
 611 # class StreamProxy
 612
 613 class _BZ2Proxy(object):
 614     """Small proxy class that enables external file object
 615        support for "r:bz2" and "w:bz2" modes. This is actually
 616        a workaround for a limitation in bz2 module's BZ2File
 617        class which (unlike gzip.GzipFile) has no support for
 618        a file object argument.
 619     """
 620
 621     blocksize = 16 * 1024
 622
 623     def __init__(self, fileobj, mode):
 624         self.fileobj = fileobj
 625         self.mode = mode
 626         self.name = getattr(self.fileobj, "name", None)
 627         self.init()
 628
 629     def init(self):
 630         import bz2
 631         self.pos = 0
 632         if self.mode == "r":
 633             self.bz2obj = bz2.BZ2Decompressor()
 634             self.fileobj.seek(0)
 635             self.buf = b""
 636         else:
 637             self.bz2obj = bz2.BZ2Compressor()
 638
 639     def read(self, size):
 640         x = len(self.buf)
 641         while x < size:
 642             raw = self.fileobj.read(self.blocksize)
 643             if not raw:
 644                 break
 645             data = self.bz2obj.decompress(raw)
 646             self.buf += data
 647             x += len(data)
 648
 649         buf = self.buf[:size]
 650         self.buf = self.buf[size:]
 651         self.pos += len(buf)
 652         return buf
 653
 654     def seek(self, pos):
 655         if pos < self.pos:
 656             self.init()
 657         self.read(pos - self.pos)
 658
 659     def tell(self):
 660         return self.pos
 661
 662     def write(self, data):
 663         self.pos += len(data)
 664         raw = self.bz2obj.compress(data)
 665         self.fileobj.write(raw)
 666
 667     def close(self):
 668         if self.mode == "w":
 669             raw = self.bz2obj.flush()
 670             self.fileobj.write(raw)
 671 # class _BZ2Proxy
 672
 673 #------------------------
 674 # Extraction file object
 675 #------------------------
 676 class _FileInFile(object):
 677     """A thin wrapper around an existing file object that
 678        provides a part of its data as an individual file
 679        object.
 680     """
 681
 682     def __init__(self, fileobj, offset, size, sparse=None):
 683         self.fileobj = fileobj
 684         self.offset = offset
 685         self.size = size
 686         self.sparse = sparse
 687         self.position = 0
 688
 689     def seekable(self):
 690         if not hasattr(self.fileobj, "seekable"):
 691             # XXX gzip.GzipFile and bz2.BZ2File
 692             return True
 693         return self.fileobj.seekable()
 694
 695     def tell(self):
 696         """Return the current file position.
 697         """
 698         return self.position
 699
 700     def seek(self, position):
 701         """Seek to a position in the file.
 702         """
 703         self.position = position
 704
 705     def read(self, size=None):
 706         """Read data from the file.
 707         """
 708         if size is None:
 709             size = self.size - self.position
 710         else:
 711             size = min(size, self.size - self.position)
 712
 713         if self.sparse is None:
 714             return self.readnormal(size)
 715         else:
 716             return self.readsparse(size)
 717
 718     def readnormal(self, size):
 719         """Read operation for regular files.
 720         """
 721         self.fileobj.seek(self.offset + self.position)
 722         self.position += size
 723         return self.fileobj.read(size)
 724
 725     def readsparse(self, size):
 726         """Read operation for sparse files.
 727         """
 728         data = b""
 729         while size > 0:
 730             buf = self.readsparsesection(size)
 731             if not buf:
 732                 break
 733             size -= len(buf)
 734             data += buf
 735         return data
 736
 737     def readsparsesection(self, size):
 738         """Read a single section of a sparse file.
 739         """
 740         section = self.sparse.find(self.position)
 741
 742         if section is None:
 743             return b""
 744
 745         size = min(size, section.offset + section.size - self.position)
 746
 747         if isinstance(section, _data):
 748             realpos = section.realpos + self.position - section.offset
 749             self.fileobj.seek(self.offset + realpos)
 750             self.position += size
 751             return self.fileobj.read(size)
 752         else:
 753             self.position += size
 754             return NUL * size
 755 #class _FileInFile
 756
 757
 758 class ExFileObject(object):
 759     """File-like object for reading an archive member.
 760        Is returned by TarFile.extractfile().
 761     """
 762     blocksize = 1024
 763
 764     def __init__(self, tarfile, tarinfo):
 765         self.fileobj = _FileInFile(tarfile.fileobj,
 766                                    tarinfo.offset_data,
 767                                    tarinfo.size,
 768                                    tarinfo.sparse)
 769         self.name = tarinfo.name
 770         self.mode = "r"
 771         self.closed = False
 772         self.size = tarinfo.size
 773
 774         self.position = 0
 775         self.buffer = b""
 776
 777     def readable(self):
 778         return True
 779
 780     def writable(self):
 781         return False
 782
 783     def seekable(self):
 784         return self.fileobj.seekable()
 785
 786     def read(self, size=None):
 787         """Read at most size bytes from the file. If size is not
 788            present or None, read all data until EOF is reached.
 789         """
 790         if self.closed:
 791             raise ValueError("I/O operation on closed file")
 792
 793         buf = b""
 794         if self.buffer:
 795             if size is None:
 796                 buf = self.buffer
 797                 self.buffer = b""
 798             else:
 799                 buf = self.buffer[:size]
 800                 self.buffer = self.buffer[size:]
 801
 802         if size is None:
 803             buf += self.fileobj.read()
 804         else:
 805             buf += self.fileobj.read(size - len(buf))
 806
 807         self.position += len(buf)
 808         return buf
 809
 810     # XXX TextIOWrapper uses the read1() method.
 811     read1 = read
 812
 813     def readline(self, size=-1):
 814         """Read one entire line from the file. If size is present
 815            and non-negative, return a string with at most that
 816            size, which may be an incomplete line.
 817         """
 818         if self.closed:
 819             raise ValueError("I/O operation on closed file")
 820
 821         pos = self.buffer.find(b"\n") + 1
 822         if pos == 0:
 823             # no newline found.
 824             while True:
 825                 buf = self.fileobj.read(self.blocksize)
 826                 self.buffer += buf
 827                 if not buf or b"\n" in buf:
 828                     pos = self.buffer.find(b"\n") + 1
 829                     if pos == 0:
 830                         # no newline found.
 831                         pos = len(self.buffer)
 832                     break
 833
 834         if size != -1:
 835             pos = min(size, pos)
 836
 837         buf = self.buffer[:pos]
 838         self.buffer = self.buffer[pos:]
 839         self.position += len(buf)
 840         return buf
 841
 842     def readlines(self):
 843         """Return a list with all remaining lines.
 844         """
 845         result = []
 846         while True:
 847             line = self.readline()
 848             if not line: break
 849             result.append(line)
 850         return result
 851
 852     def tell(self):
 853         """Return the current file position.
 854         """
 855         if self.closed:
 856             raise ValueError("I/O operation on closed file")
 857
 858         return self.position
 859
 860     def seek(self, pos, whence=os.SEEK_SET):
 861         """Seek to a position in the file.
 862         """
 863         if self.closed:
 864             raise ValueError("I/O operation on closed file")
 865
 866         if whence == os.SEEK_SET:
 867             self.position = min(max(pos, 0), self.size)
 868         elif whence == os.SEEK_CUR:
 869             if pos < 0:
 870                 self.position = max(self.position + pos, 0)
 871             else:
 872                 self.position = min(self.position + pos, self.size)
 873         elif whence == os.SEEK_END:
 874             self.position = max(min(self.size + pos, self.size), 0)
 875         else:
 876             raise ValueError("Invalid argument")
 877
 878         self.buffer = b""
 879         self.fileobj.seek(self.position)
 880
 881     def close(self):
 882         """Close the file object.
 883         """
 884         self.closed = True
 885
 886     def __iter__(self):
 887         """Get an iterator over the file's lines.
 888         """
 889         while True:
 890             line = self.readline()
 891             if not line:
 892                 break
 893             yield line
 894 #class ExFileObject
 895
 896 #------------------
 897 # Exported Classes
 898 #------------------
 899 class TarInfo(object):
 900     """Informational class which holds the details about an
 901        archive member given by a tar header block.
 902        TarInfo objects are returned by TarFile.getmember(),
 903        TarFile.getmembers() and TarFile.gettarinfo() and are
 904        usually created internally.
 905     """
 906
 907     __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
 908                  "chksum", "type", "linkname", "uname", "gname",
 909                  "devmajor", "devminor",
 910                  "offset", "offset_data", "pax_headers", "sparse",
 911                  "tarfile", "_sparse_structs", "_link_target")
 912
 913     def __init__(self, name=""):
 914         """Construct a TarInfo object. name is the optional name
 915            of the member.
 916         """
 917         self.name = name        # member name
 918         self.mode = 0o644       # file permissions
 919         self.uid = 0            # user id
 920         self.gid = 0            # group id
 921         self.size = 0           # file size
 922         self.mtime = 0          # modification time
 923         self.chksum = 0         # header checksum
 924         self.type = REGTYPE     # member type
 925         self.linkname = ""      # link name
 926         self.uname = "root"     # user name
 927         self.gname = "root"     # group name
 928         self.devmajor = 0       # device major number
 929         self.devminor = 0       # device minor number
 930
 931         self.offset = 0         # the tar header starts here
 932         self.offset_data = 0    # the file's data starts here
 933
 934         self.sparse = None      # sparse member information
 935         self.pax_headers = {}   # pax header information
 936
 937     # In pax headers the "name" and "linkname" field are called
 938     # "path" and "linkpath".
 939     def _getpath(self):
 940         return self.name
 941     def _setpath(self, name):
 942         self.name = name
 943     path = property(_getpath, _setpath)
 944
 945     def _getlinkpath(self):
 946         return self.linkname
 947     def _setlinkpath(self, linkname):
 948         self.linkname = linkname
 949     linkpath = property(_getlinkpath, _setlinkpath)
 950
 951     def __repr__(self):
 952         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 953
 954     def get_info(self):
 955         """Return the TarInfo's attributes as a dictionary.
 956         """
 957         info = {
 958             "name":     normpath(self.name),
 959             "mode":     self.mode & 0o7777,
 960             "uid":      self.uid,
 961             "gid":      self.gid,
 962             "size":     self.size,
 963             "mtime":    self.mtime,
 964             "chksum":   self.chksum,
 965             "type":     self.type,
 966             "linkname": normpath(self.linkname) if self.linkname else "",
 967             "uname":    self.uname,
 968             "gname":    self.gname,
 969             "devmajor": self.devmajor,
 970             "devminor": self.devminor
 971         }
 972
 973         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 974             info["name"] += "/"
 975
 976         return info
 977
 978     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 979         """Return a tar header as a string of 512 byte blocks.
 980         """
 981         info = self.get_info()
 982
 983         if format == USTAR_FORMAT:
 984             return self.create_ustar_header(info, encoding, errors)
 985         elif format == GNU_FORMAT:
 986             return self.create_gnu_header(info, encoding, errors)
 987         elif format == PAX_FORMAT:
 988             return self.create_pax_header(info)
 989         else:
 990             raise ValueError("invalid format")
 991
 992     def create_ustar_header(self, info, encoding, errors):
 993         """Return the object as a ustar header block.
 994         """
 995         info["magic"] = POSIX_MAGIC
 996
 997         if len(info["linkname"]) > LENGTH_LINK:
 998             raise ValueError("linkname is too long")
 999
1000         if len(info["name"]) > LENGTH_NAME:
1001             info["prefix"], info["name"] = self._posix_split_name(info["name"])
1002
1003         return self._create_header(info, USTAR_FORMAT, encoding, errors)
1004
1005     def create_gnu_header(self, info, encoding, errors):
1006         """Return the object as a GNU header block sequence.
1007         """
1008         info["magic"] = GNU_MAGIC
1009
1010         buf = b""
1011         if len(info["linkname"]) > LENGTH_LINK:
1012             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1013
1014         if len(info["name"]) > LENGTH_NAME:
1015             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1016
1017         return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1018
1019     def create_pax_header(self, info):
1020         """Return the object as a ustar header block. If it cannot be
1021            represented this way, prepend a pax extended header sequence
1022            with supplement information.
1023         """
1024         info["magic"] = POSIX_MAGIC
1025         pax_headers = self.pax_headers.copy()
1026
1027         # Test string fields for values that exceed the field length or cannot
1028         # be represented in ASCII encoding.
1029         for name, hname, length in (
1030                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1031                 ("uname", "uname", 32), ("gname", "gname", 32)):
1032
1033             if hname in pax_headers:
1034                 # The pax header has priority.
1035                 continue
1036
1037             # Try to encode the string as ASCII.
1038             try:
1039                 info[name].encode("ascii", "strict")
1040             except UnicodeEncodeError:
1041                 pax_headers[hname] = info[name]
1042                 continue
1043
1044             if len(info[name]) > length:
1045                 pax_headers[hname] = info[name]
1046
1047         # Test number fields for values that exceed the field limit or values
1048         # that like to be stored as float.
1049         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1050             if name in pax_headers:
1051                 # The pax header has priority. Avoid overflow.
1052                 info[name] = 0
1053                 continue
1054
1055             val = info[name]
1056             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1057                 pax_headers[name] = str(val)
1058                 info[name] = 0
1059
1060         # Create a pax extended header if necessary.
1061         if pax_headers:
1062             buf = self._create_pax_generic_header(pax_headers, XHDTYPE)
1063         else:
1064             buf = b""
1065
1066         return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1067
1068     @classmethod
1069     def create_pax_global_header(cls, pax_headers):
1070         """Return the object as a pax global header block sequence.
1071         """
1072         return cls._create_pax_generic_header(pax_headers, XGLTYPE)
1073
1074     def _posix_split_name(self, name):
1075         """Split a name longer than 100 chars into a prefix
1076            and a name part.
1077         """
1078         prefix = name[:LENGTH_PREFIX + 1]
1079         while prefix and prefix[-1] != "/":
1080             prefix = prefix[:-1]
1081
1082         name = name[len(prefix):]
1083         prefix = prefix[:-1]
1084
1085         if not prefix or len(name) > LENGTH_NAME:
1086             raise ValueError("name is too long")
1087         return prefix, name
1088
1089     @staticmethod
1090     def _create_header(info, format, encoding, errors):
1091         """Return a header block. info is a dictionary with file
1092            information, format must be one of the *_FORMAT constants.
1093         """
1094         parts = [
1095             stn(info.get("name", ""), 100, encoding, errors),
1096             itn(info.get("mode", 0) & 0o7777, 8, format),
1097             itn(info.get("uid", 0), 8, format),
1098             itn(info.get("gid", 0), 8, format),
1099             itn(info.get("size", 0), 12, format),
1100             itn(info.get("mtime", 0), 12, format),
1101             b"        ", # checksum field
1102             info.get("type", REGTYPE),
1103             stn(info.get("linkname", ""), 100, encoding, errors),
1104             info.get("magic", POSIX_MAGIC),
1105             stn(info.get("uname", "root"), 32, encoding, errors),
1106             stn(info.get("gname", "root"), 32, encoding, errors),
1107             itn(info.get("devmajor", 0), 8, format),
1108             itn(info.get("devminor", 0), 8, format),
1109             stn(info.get("prefix", ""), 155, encoding, errors)
1110         ]
1111
1112         buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1113         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1114         buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1115         return buf
1116
1117     @staticmethod
1118     def _create_payload(payload):
1119         """Return the string payload filled with zero bytes
1120            up to the next 512 byte border.
1121         """
1122         blocks, remainder = divmod(len(payload), BLOCKSIZE)
1123         if remainder > 0:
1124             payload += (BLOCKSIZE - remainder) * NUL
1125         return payload
1126
1127     @classmethod
1128     def _create_gnu_long_header(cls, name, type, encoding, errors):
1129         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1130            for name.
1131         """
1132         name = name.encode(encoding, errors) + NUL
1133
1134         info = {}
1135         info["name"] = "././@LongLink"
1136         info["type"] = type
1137         info["size"] = len(name)
1138         info["magic"] = GNU_MAGIC
1139
1140         # create extended header + name blocks.
1141         return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1142                 cls._create_payload(name)
1143
1144     @classmethod
1145     def _create_pax_generic_header(cls, pax_headers, type):
1146         """Return a POSIX.1-2001 extended or global header sequence
1147            that contains a list of keyword, value pairs. The values
1148            must be strings.
1149         """
1150         records = b""
1151         for keyword, value in pax_headers.items():
1152             keyword = keyword.encode("utf8")
1153             value = value.encode("utf8")
1154             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1155             n = p = 0
1156             while True:
1157                 n = l + len(str(p))
1158                 if n == p:
1159                     break
1160                 p = n
1161             records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1162
1163         # We use a hardcoded "././@PaxHeader" name like star does
1164         # instead of the one that POSIX recommends.
1165         info = {}
1166         info["name"] = "././@PaxHeader"
1167         info["type"] = type
1168         info["size"] = len(records)
1169         info["magic"] = POSIX_MAGIC
1170
1171         # Create pax header + record blocks.
1172         return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1173                 cls._create_payload(records)
1174
1175     @classmethod
1176     def frombuf(cls, buf, encoding, errors):
1177         """Construct a TarInfo object from a 512 byte bytes object.
1178         """
1179         if len(buf) != BLOCKSIZE:
1180             raise HeaderError("truncated header")
1181         if buf.count(NUL) == BLOCKSIZE:
1182             raise HeaderError("empty header")
1183
1184         chksum = nti(buf[148:156])
1185         if chksum not in calc_chksums(buf):
1186             raise HeaderError("bad checksum")
1187
1188         obj = cls()
1189         obj.name = nts(buf[0:100], encoding, errors)
1190         obj.mode = nti(buf[100:108])
1191         obj.uid = nti(buf[108:116])
1192         obj.gid = nti(buf[116:124])
1193         obj.size = nti(buf[124:136])
1194         obj.mtime = nti(buf[136:148])
1195         obj.chksum = chksum
1196         obj.type = buf[156:157]
1197         obj.linkname = nts(buf[157:257], encoding, errors)
1198         obj.uname = nts(buf[265:297], encoding, errors)
1199         obj.gname = nts(buf[297:329], encoding, errors)
1200         obj.devmajor = nti(buf[329:337])
1201         obj.devminor = nti(buf[337:345])
1202         prefix = nts(buf[345:500], encoding, errors)
1203
1204         # Old V7 tar format represents a directory as a regular
1205         # file with a trailing slash.
1206         if obj.type == AREGTYPE and obj.name.endswith("/"):
1207             obj.type = DIRTYPE
1208
1209         # The old GNU sparse format occupies some of the unused
1210         # space in the buffer for up to 4 sparse structures.
1211         # Save the them for later processing in _proc_sparse().
1212         if obj.type == GNUTYPE_SPARSE:
1213             pos = 386
1214             structs = []
1215             for i in range(4):
1216                 try:
1217                     offset = nti(buf[pos:pos + 12])
1218                     numbytes = nti(buf[pos + 12:pos + 24])
1219                 except ValueError:
1220                     break
1221                 structs.append((offset, numbytes))
1222                 pos += 24
1223             isextended = bool(buf[482])
1224             origsize = nti(buf[483:495])
1225             obj._sparse_structs = (structs, isextended, origsize)
1226
1227         # Remove redundant slashes from directories.
1228         if obj.isdir():
1229             obj.name = obj.name.rstrip("/")
1230
1231         # Reconstruct a ustar longname.
1232         if prefix and obj.type not in GNU_TYPES:
1233             obj.name = prefix + "/" + obj.name
1234         return obj
1235
1236     @classmethod
1237     def fromtarfile(cls, tarfile):
1238         """Return the next TarInfo object from TarFile object
1239            tarfile.
1240         """
1241         buf = tarfile.fileobj.read(BLOCKSIZE)
1242         if not buf:
1243             return
1244         obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1245         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1246         return obj._proc_member(tarfile)
1247
1248     #--------------------------------------------------------------------------
1249     # The following are methods that are called depending on the type of a
1250     # member. The entry point is _proc_member() which can be overridden in a
1251     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1252     # implement the following
1253     # operations:
1254     # 1. Set self.offset_data to the position where the data blocks begin,
1255     #    if there is data that follows.
1256     # 2. Set tarfile.offset to the position where the next member's header will
1257     #    begin.
1258     # 3. Return self or another valid TarInfo object.
1259     def _proc_member(self, tarfile):
1260         """Choose the right processing method depending on
1261            the type and call it.
1262         """
1263         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1264             return self._proc_gnulong(tarfile)
1265         elif self.type == GNUTYPE_SPARSE:
1266             return self._proc_sparse(tarfile)
1267         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1268             return self._proc_pax(tarfile)
1269         else:
1270             return self._proc_builtin(tarfile)
1271
1272     def _proc_builtin(self, tarfile):
1273         """Process a builtin type or an unknown type which
1274            will be treated as a regular file.
1275         """
1276         self.offset_data = tarfile.fileobj.tell()
1277         offset = self.offset_data
1278         if self.isreg() or self.type not in SUPPORTED_TYPES:
1279             # Skip the following data blocks.
1280             offset += self._block(self.size)
1281         tarfile.offset = offset
1282
1283         # Patch the TarInfo object with saved global
1284         # header information.
1285         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1286
1287         return self
1288
1289     def _proc_gnulong(self, tarfile):
1290         """Process the blocks that hold a GNU longname
1291            or longlink member.
1292         """
1293         buf = tarfile.fileobj.read(self._block(self.size))
1294
1295         # Fetch the next header and process it.
1296         next = self.fromtarfile(tarfile)
1297         if next is None:
1298             raise HeaderError("missing subsequent header")
1299
1300         # Patch the TarInfo object from the next header with
1301         # the longname information.
1302         next.offset = self.offset
1303         if self.type == GNUTYPE_LONGNAME:
1304             next.name = nts(buf, tarfile.encoding, tarfile.errors)
1305         elif self.type == GNUTYPE_LONGLINK:
1306             next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1307
1308         return next
1309
1310     def _proc_sparse(self, tarfile):
1311         """Process a GNU sparse header plus extra headers.
1312         """
1313         # We already collected some sparse structures in frombuf().
1314         structs, isextended, origsize = self._sparse_structs
1315         del self._sparse_structs
1316
1317         # Collect sparse structures from extended header blocks.
1318         while isextended:
1319             buf = tarfile.fileobj.read(BLOCKSIZE)
1320             pos = 0
1321             for i in range(21):
1322                 try:
1323                     offset = nti(buf[pos:pos + 12])
1324                     numbytes = nti(buf[pos + 12:pos + 24])
1325                 except ValueError:
1326                     break
1327                 structs.append((offset, numbytes))
1328                 pos += 24
1329             isextended = bool(buf[504])
1330
1331         # Transform the sparse structures to something we can use
1332         # in ExFileObject.
1333         self.sparse = _ringbuffer()
1334         lastpos = 0
1335         realpos = 0
1336         for offset, numbytes in structs:
1337             if offset > lastpos:
1338                 self.sparse.append(_hole(lastpos, offset - lastpos))
1339             self.sparse.append(_data(offset, numbytes, realpos))
1340             realpos += numbytes
1341             lastpos = offset + numbytes
1342         if lastpos < origsize:
1343             self.sparse.append(_hole(lastpos, origsize - lastpos))
1344
1345         self.offset_data = tarfile.fileobj.tell()
1346         tarfile.offset = self.offset_data + self._block(self.size)
1347         self.size = origsize
1348
1349         return self
1350
1351     def _proc_pax(self, tarfile):
1352         """Process an extended or global header as described in
1353            POSIX.1-2001.
1354         """
1355         # Read the header information.
1356         buf = tarfile.fileobj.read(self._block(self.size))
1357
1358         # A pax header stores supplemental information for either
1359         # the following file (extended) or all following files
1360         # (global).
1361         if self.type == XGLTYPE:
1362             pax_headers = tarfile.pax_headers
1363         else:
1364             pax_headers = tarfile.pax_headers.copy()
1365
1366         # Parse pax header information. A record looks like that:
1367         # "%d %s=%s\n" % (length, keyword, value). length is the size
1368         # of the complete record including the length field itself and
1369         # the newline. keyword and value are both UTF-8 encoded strings.
1370         regex = re.compile(br"(\d+) ([^=]+)=")
1371         pos = 0
1372         while True:
1373             match = regex.match(buf, pos)
1374             if not match:
1375                 break
1376
1377             length, keyword = match.groups()
1378             length = int(length)
1379             value = buf[match.end(2) + 1:match.start(1) + length - 1]
1380
1381             keyword = keyword.decode("utf8")
1382             value = value.decode("utf8")
1383
1384             pax_headers[keyword] = value
1385             pos += length
1386
1387         # Fetch the next header.
1388         next = self.fromtarfile(tarfile)
1389
1390         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1391             if next is None:
1392                 raise HeaderError("missing subsequent header")
1393
1394             # Patch the TarInfo object with the extended header info.
1395             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1396             next.offset = self.offset
1397
1398             if "size" in pax_headers:
1399                 # If the extended header replaces the size field,
1400                 # we need to recalculate the offset where the next
1401                 # header starts.
1402                 offset = next.offset_data
1403                 if next.isreg() or next.type not in SUPPORTED_TYPES:
1404                     offset += next._block(next.size)
1405                 tarfile.offset = offset
1406
1407         return next
1408
1409     def _apply_pax_info(self, pax_headers, encoding, errors):
1410         """Replace fields with supplemental information from a previous
1411            pax extended or global header.
1412         """
1413         for keyword, value in pax_headers.items():
1414             if keyword not in PAX_FIELDS:
1415                 continue
1416
1417             if keyword == "path":
1418                 value = value.rstrip("/")
1419
1420             if keyword in PAX_NUMBER_FIELDS:
1421                 try:
1422                     value = PAX_NUMBER_FIELDS[keyword](value)
1423                 except ValueError:
1424                     value = 0
1425
1426             setattr(self, keyword, value)
1427
1428         self.pax_headers = pax_headers.copy()
1429
1430     def _block(self, count):
1431         """Round up a byte count by BLOCKSIZE and return it,
1432            e.g. _block(834) => 1024.
1433         """
1434         blocks, remainder = divmod(count, BLOCKSIZE)
1435         if remainder:
1436             blocks += 1
1437         return blocks * BLOCKSIZE
1438
1439     def isreg(self):
1440         return self.type in REGULAR_TYPES
1441     def isfile(self):
1442         return self.isreg()
1443     def isdir(self):
1444         return self.type == DIRTYPE
1445     def issym(self):
1446         return self.type == SYMTYPE
1447     def islnk(self):
1448         return self.type == LNKTYPE
1449     def ischr(self):
1450         return self.type == CHRTYPE
1451     def isblk(self):
1452         return self.type == BLKTYPE
1453     def isfifo(self):
1454         return self.type == FIFOTYPE
1455     def issparse(self):
1456         return self.type == GNUTYPE_SPARSE
1457     def isdev(self):
1458         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1459 # class TarInfo
1460
1461 class TarFile(object):
1462     """The TarFile Class provides an interface to tar archives.
1463     """
1464
1465     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1466
1467     dereference = False         # If true, add content of linked file to the
1468                                 # tar file, else the link.
1469
1470     ignore_zeros = False        # If true, skips empty or invalid blocks and
1471                                 # continues processing.
1472
1473     errorlevel = 0              # If 0, fatal errors only appear in debug
1474                                 # messages (if debug >= 0). If > 0, errors
1475                                 # are passed to the caller as exceptions.
1476
1477     format = DEFAULT_FORMAT     # The format to use when creating an archive.
1478
1479     encoding = ENCODING         # Encoding for 8-bit character strings.
1480
1481     errors = None               # Error handler for unicode conversion.
1482
1483     tarinfo = TarInfo           # The default TarInfo class to use.
1484
1485     fileobject = ExFileObject   # The default ExFileObject class to use.
1486
1487     def __init__(self, name=None, mode="r", fileobj=None, format=None,
1488             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1489             errors=None, pax_headers=None, debug=None, errorlevel=None):
1490         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1491            read from an existing archive, 'a' to append data to an existing
1492            file or 'w' to create a new file overwriting an existing one. `mode'
1493            defaults to 'r'.
1494            If `fileobj' is given, it is used for reading or writing data. If it
1495            can be determined, `mode' is overridden by `fileobj's mode.
1496            `fileobj' is not closed, when TarFile is closed.
1497         """
1498         if len(mode) > 1 or mode not in "raw":
1499             raise ValueError("mode must be 'r', 'a' or 'w'")
1500         self.mode = mode
1501         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1502
1503         if not fileobj:
1504             if self.mode == "a" and not os.path.exists(name):
1505                 # Create nonexistent files in append mode.
1506                 self.mode = "w"
1507                 self._mode = "wb"
1508             fileobj = bltn_open(name, self._mode)
1509             self._extfileobj = False
1510         else:
1511             if name is None and hasattr(fileobj, "name"):
1512                 name = fileobj.name
1513             if hasattr(fileobj, "mode"):
1514                 self._mode = fileobj.mode
1515             self._extfileobj = True
1516         self.name = os.path.abspath(name) if name else None
1517         self.fileobj = fileobj
1518
1519         # Init attributes.
1520         if format is not None:
1521             self.format = format
1522         if tarinfo is not None:
1523             self.tarinfo = tarinfo
1524         if dereference is not None:
1525             self.dereference = dereference
1526         if ignore_zeros is not None:
1527             self.ignore_zeros = ignore_zeros
1528         if encoding is not None:
1529             self.encoding = encoding
1530
1531         if errors is not None:
1532             self.errors = errors
1533         elif mode == "r":
1534             self.errors = "replace"
1535         else:
1536             self.errors = "strict"
1537
1538         if pax_headers is not None and self.format == PAX_FORMAT:
1539             self.pax_headers = pax_headers
1540         else:
1541             self.pax_headers = {}
1542
1543         if debug is not None:
1544             self.debug = debug
1545         if errorlevel is not None:
1546             self.errorlevel = errorlevel
1547
1548         # Init datastructures.
1549         self.closed = False
1550         self.members = []       # list of members as TarInfo objects
1551         self._loaded = False    # flag if all members have been read
1552         self.offset = self.fileobj.tell()
1553                                 # current position in the archive file
1554         self.inodes = {}        # dictionary caching the inodes of
1555                                 # archive members already added
1556
1557         if self.mode == "r":
1558             self.firstmember = None
1559             self.firstmember = self.next()
1560
1561         if self.mode == "a":
1562             # Move to the end of the archive,
1563             # before the first empty block.
1564             self.firstmember = None
1565             while True:
1566                 if self.next() is None:
1567                     if self.offset > 0:
1568                         self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
1569                     break
1570
1571         if self.mode in "aw":
1572             self._loaded = True
1573
1574             if self.pax_headers:
1575                 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1576                 self.fileobj.write(buf)
1577                 self.offset += len(buf)
1578
1579     #--------------------------------------------------------------------------
1580     # Below are the classmethods which act as alternate constructors to the
1581     # TarFile class. The open() method is the only one that is needed for
1582     # public use; it is the "super"-constructor and is able to select an
1583     # adequate "sub"-constructor for a particular compression using the mapping
1584     # from OPEN_METH.
1585     #
1586     # This concept allows one to subclass TarFile without losing the comfort of
1587     # the super-constructor. A sub-constructor is registered and made available
1588     # by adding it to the mapping in OPEN_METH.
1589
1590     @classmethod
1591     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1592         """Open a tar archive for reading, writing or appending. Return
1593            an appropriate TarFile class.
1594
1595            mode:
1596            'r' or 'r:*' open for reading with transparent compression
1597            'r:'         open for reading exclusively uncompressed
1598            'r:gz'       open for reading with gzip compression
1599            'r:bz2'      open for reading with bzip2 compression
1600            'a' or 'a:'  open for appending, creating the file if necessary
1601            'w' or 'w:'  open for writing without compression
1602            'w:gz'       open for writing with gzip compression
1603            'w:bz2'      open for writing with bzip2 compression
1604
1605            'r|*'        open a stream of tar blocks with transparent compression
1606            'r|'         open an uncompressed stream of tar blocks for reading
1607            'r|gz'       open a gzip compressed stream of tar blocks
1608            'r|bz2'      open a bzip2 compressed stream of tar blocks
1609            'w|'         open an uncompressed stream for writing
1610            'w|gz'       open a gzip compressed stream for writing
1611            'w|bz2'      open a bzip2 compressed stream for writing
1612         """
1613
1614         if not name and not fileobj:
1615             raise ValueError("nothing to open")
1616
1617         if mode in ("r", "r:*"):
1618             # Find out which *open() is appropriate for opening the file.
1619             for comptype in cls.OPEN_METH:
1620                 func = getattr(cls, cls.OPEN_METH[comptype])
1621                 if fileobj is not None:
1622                     saved_pos = fileobj.tell()
1623                 try:
1624                     return func(name, "r", fileobj, **kwargs)
1625                 except (ReadError, CompressionError) as e:
1626                     if fileobj is not None:
1627                         fileobj.seek(saved_pos)
1628                     continue
1629             raise ReadError("file could not be opened successfully")
1630
1631         elif ":" in mode:
1632             filemode, comptype = mode.split(":", 1)
1633             filemode = filemode or "r"
1634             comptype = comptype or "tar"
1635
1636             # Select the *open() function according to
1637             # given compression.
1638             if comptype in cls.OPEN_METH:
1639                 func = getattr(cls, cls.OPEN_METH[comptype])
1640             else:
1641                 raise CompressionError("unknown compression type %r" % comptype)
1642             return func(name, filemode, fileobj, **kwargs)
1643
1644         elif "|" in mode:
1645             filemode, comptype = mode.split("|", 1)
1646             filemode = filemode or "r"
1647             comptype = comptype or "tar"
1648
1649             if filemode not in "rw":
1650                 raise ValueError("mode must be 'r' or 'w'")
1651
1652             t = cls(name, filemode,
1653                     _Stream(name, filemode, comptype, fileobj, bufsize),
1654                     **kwargs)
1655             t._extfileobj = False
1656             return t
1657
1658         elif mode in "aw":
1659             return cls.taropen(name, mode, fileobj, **kwargs)
1660
1661         raise ValueError("undiscernible mode")
1662
1663     @classmethod
1664     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1665         """Open uncompressed tar archive name for reading or writing.
1666         """
1667         if len(mode) > 1 or mode not in "raw":
1668             raise ValueError("mode must be 'r', 'a' or 'w'")
1669         return cls(name, mode, fileobj, **kwargs)
1670
1671     @classmethod
1672     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1673         """Open gzip compressed tar archive name for reading or writing.
1674            Appending is not allowed.
1675         """
1676         if len(mode) > 1 or mode not in "rw":
1677             raise ValueError("mode must be 'r' or 'w'")
1678
1679         try:
1680             import gzip
1681             gzip.GzipFile
1682         except (ImportError, AttributeError):
1683             raise CompressionError("gzip module is not available")
1684
1685         if fileobj is None:
1686             fileobj = bltn_open(name, mode + "b")
1687
1688         try:
1689             t = cls.taropen(name, mode,
1690                 gzip.GzipFile(name, mode, compresslevel, fileobj),
1691                 **kwargs)
1692         except IOError:
1693             raise ReadError("not a gzip file")
1694         t._extfileobj = False
1695         return t
1696
1697     @classmethod
1698     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1699         """Open bzip2 compressed tar archive name for reading or writing.
1700            Appending is not allowed.
1701         """
1702         if len(mode) > 1 or mode not in "rw":
1703             raise ValueError("mode must be 'r' or 'w'.")
1704
1705         try:
1706             import bz2
1707         except ImportError:
1708             raise CompressionError("bz2 module is not available")
1709
1710         if fileobj is not None:
1711             fileobj = _BZ2Proxy(fileobj, mode)
1712         else:
1713             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1714
1715         try:
1716             t = cls.taropen(name, mode, fileobj, **kwargs)
1717         except IOError:
1718             raise ReadError("not a bzip2 file")
1719         t._extfileobj = False
1720         return t
1721
1722     # All *open() methods are registered here.
1723     OPEN_METH = {
1724         "tar": "taropen",   # uncompressed tar
1725         "gz":  "gzopen",    # gzip compressed tar
1726         "bz2": "bz2open"    # bzip2 compressed tar
1727     }
1728
1729     #--------------------------------------------------------------------------
1730     # The public methods which TarFile provides:
1731
1732     def close(self):
1733         """Close the TarFile. In write-mode, two finishing zero blocks are
1734            appended to the archive.
1735         """
1736         if self.closed:
1737             return
1738
1739         if self.mode in "aw":
1740             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1741             self.offset += (BLOCKSIZE * 2)
1742             # fill up the end with zero-blocks
1743             # (like option -b20 for tar does)
1744             blocks, remainder = divmod(self.offset, RECORDSIZE)
1745             if remainder > 0:
1746                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1747
1748         if not self._extfileobj:
1749             self.fileobj.close()
1750         self.closed = True
1751
1752     def getmember(self, name):
1753         """Return a TarInfo object for member `name'. If `name' can not be
1754            found in the archive, KeyError is raised. If a member occurs more
1755            than once in the archive, its last occurrence is assumed to be the
1756            most up-to-date version.
1757         """
1758         tarinfo = self._getmember(name)
1759         if tarinfo is None:
1760             raise KeyError("filename %r not found" % name)
1761         return tarinfo
1762
1763     def getmembers(self):
1764         """Return the members of the archive as a list of TarInfo objects. The
1765            list has the same order as the members in the archive.
1766         """
1767         self._check()
1768         if not self._loaded:    # if we want to obtain a list of
1769             self._load()        # all members, we first have to
1770                                 # scan the whole archive.
1771         return self.members
1772
1773     def getnames(self):
1774         """Return the members of the archive as a list of their names. It has
1775            the same order as the list returned by getmembers().
1776         """
1777         return [tarinfo.name for tarinfo in self.getmembers()]
1778
1779     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1780         """Create a TarInfo object for either the file `name' or the file
1781            object `fileobj' (using os.fstat on its file descriptor). You can
1782            modify some of the TarInfo's attributes before you add it using
1783            addfile(). If given, `arcname' specifies an alternative name for the
1784            file in the archive.
1785         """
1786         self._check("aw")
1787
1788         # When fileobj is given, replace name by
1789         # fileobj's real name.
1790         if fileobj is not None:
1791             name = fileobj.name
1792
1793         # Building the name of the member in the archive.
1794         # Backward slashes are converted to forward slashes,
1795         # Absolute paths are turned to relative paths.
1796         if arcname is None:
1797             arcname = name
1798         arcname = normpath(arcname)
1799         drv, arcname = os.path.splitdrive(arcname)
1800         while arcname[0:1] == "/":
1801             arcname = arcname[1:]
1802
1803         # Now, fill the TarInfo object with
1804         # information specific for the file.
1805         tarinfo = self.tarinfo()
1806         tarinfo.tarfile = self
1807
1808         # Use os.stat or os.lstat, depending on platform
1809         # and if symlinks shall be resolved.
1810         if fileobj is None:
1811             if hasattr(os, "lstat") and not self.dereference:
1812                 statres = os.lstat(name)
1813             else:
1814                 statres = os.stat(name)
1815         else:
1816             statres = os.fstat(fileobj.fileno())
1817         linkname = ""
1818
1819         stmd = statres.st_mode
1820         if stat.S_ISREG(stmd):
1821             inode = (statres.st_ino, statres.st_dev)
1822             if not self.dereference and statres.st_nlink > 1 and \
1823                     inode in self.inodes and arcname != self.inodes[inode]:
1824                 # Is it a hardlink to an already
1825                 # archived file?
1826                 type = LNKTYPE
1827                 linkname = self.inodes[inode]
1828             else:
1829                 # The inode is added only if its valid.
1830                 # For win32 it is always 0.
1831                 type = REGTYPE
1832                 if inode[0]:
1833                     self.inodes[inode] = arcname
1834         elif stat.S_ISDIR(stmd):
1835             type = DIRTYPE
1836         elif stat.S_ISFIFO(stmd):
1837             type = FIFOTYPE
1838         elif stat.S_ISLNK(stmd):
1839             type = SYMTYPE
1840             linkname = os.readlink(name)
1841         elif stat.S_ISCHR(stmd):
1842             type = CHRTYPE
1843         elif stat.S_ISBLK(stmd):
1844             type = BLKTYPE
1845         else:
1846             return None
1847
1848         # Fill the TarInfo object with all
1849         # information we can get.
1850         tarinfo.name = arcname
1851         tarinfo.mode = stmd
1852         tarinfo.uid = statres.st_uid
1853         tarinfo.gid = statres.st_gid
1854         if stat.S_ISREG(stmd):
1855             tarinfo.size = statres.st_size
1856         else:
1857             tarinfo.size = 0
1858         tarinfo.mtime = statres.st_mtime
1859         tarinfo.type = type
1860         tarinfo.linkname = linkname
1861         if pwd:
1862             try:
1863                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1864             except KeyError:
1865                 pass
1866         if grp:
1867             try:
1868                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1869             except KeyError:
1870                 pass
1871
1872         if type in (CHRTYPE, BLKTYPE):
1873             if hasattr(os, "major") and hasattr(os, "minor"):
1874                 tarinfo.devmajor = os.major(statres.st_rdev)
1875                 tarinfo.devminor = os.minor(statres.st_rdev)
1876         return tarinfo
1877
1878     def list(self, verbose=True):
1879         """Print a table of contents to sys.stdout. If `verbose' is False, only
1880            the names of the members are printed. If it is True, an `ls -l'-like
1881            output is produced.
1882         """
1883         self._check()
1884
1885         for tarinfo in self:
1886             if verbose:
1887                 print(filemode(tarinfo.mode), end=' ')
1888                 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1889                                  tarinfo.gname or tarinfo.gid), end=' ')
1890                 if tarinfo.ischr() or tarinfo.isblk():
1891                     print("%10s" % ("%d,%d" \
1892                                     % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
1893                 else:
1894                     print("%10d" % tarinfo.size, end=' ')
1895                 print("%d-%02d-%02d %02d:%02d:%02d" \
1896                       % time.localtime(tarinfo.mtime)[:6], end=' ')
1897
1898             print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
1899
1900             if verbose:
1901                 if tarinfo.issym():
1902                     print("->", tarinfo.linkname, end=' ')
1903                 if tarinfo.islnk():
1904                     print("link to", tarinfo.linkname, end=' ')
1905             print()
1906
1907     def add(self, name, arcname=None, recursive=True, exclude=None):
1908         """Add the file `name' to the archive. `name' may be any type of file
1909            (directory, fifo, symbolic link, etc.). If given, `arcname'
1910            specifies an alternative name for the file in the archive.
1911            Directories are added recursively by default. This can be avoided by
1912            setting `recursive' to False. `exclude' is a function that should
1913            return True for each filename to be excluded.
1914         """
1915         self._check("aw")
1916
1917         if arcname is None:
1918             arcname = name
1919
1920         # Exclude pathnames.
1921         if exclude is not None and exclude(name):
1922             self._dbg(2, "tarfile: Excluded %r" % name)
1923             return
1924
1925         # Skip if somebody tries to archive the archive...
1926         if self.name is not None and os.path.abspath(name) == self.name:
1927             self._dbg(2, "tarfile: Skipped %r" % name)
1928             return
1929
1930         # Special case: The user wants to add the current
1931         # working directory.
1932         if name == ".":
1933             if recursive:
1934                 if arcname == ".":
1935                     arcname = ""
1936                 for f in os.listdir(name):
1937                     self.add(f, os.path.join(arcname, f), recursive, exclude)
1938             return
1939
1940         self._dbg(1, name)
1941
1942         # Create a TarInfo object from the file.
1943         tarinfo = self.gettarinfo(name, arcname)
1944
1945         if tarinfo is None:
1946             self._dbg(1, "tarfile: Unsupported type %r" % name)
1947             return
1948
1949         # Append the tar header and data to the archive.
1950         if tarinfo.isreg():
1951             f = bltn_open(name, "rb")
1952             self.addfile(tarinfo, f)
1953             f.close()
1954
1955         elif tarinfo.isdir():
1956             self.addfile(tarinfo)
1957             if recursive:
1958                 for f in os.listdir(name):
1959                     self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1960
1961         else:
1962             self.addfile(tarinfo)
1963
1964     def addfile(self, tarinfo, fileobj=None):
1965         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1966            given, tarinfo.size bytes are read from it and added to the archive.
1967            You can create TarInfo objects using gettarinfo().
1968            On Windows platforms, `fileobj' should always be opened with mode
1969            'rb' to avoid irritation about the file size.
1970         """
1971         self._check("aw")
1972
1973         tarinfo = copy.copy(tarinfo)
1974
1975         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1976         self.fileobj.write(buf)
1977         self.offset += len(buf)
1978
1979         # If there's data to follow, append it.
1980         if fileobj is not None:
1981             copyfileobj(fileobj, self.fileobj, tarinfo.size)
1982             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1983             if remainder > 0:
1984                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1985                 blocks += 1
1986             self.offset += blocks * BLOCKSIZE
1987
1988         self.members.append(tarinfo)
1989
1990     def extractall(self, path=".", members=None):
1991         """Extract all members from the archive to the current working
1992            directory and set owner, modification time and permissions on
1993            directories afterwards. `path' specifies a different directory
1994            to extract to. `members' is optional and must be a subset of the
1995            list returned by getmembers().
1996         """
1997         directories = []
1998
1999         if members is None:
2000             members = self
2001
2002         for tarinfo in members:
2003             if tarinfo.isdir():
2004                 # Extract directories with a safe mode.
2005                 directories.append(tarinfo)
2006                 tarinfo = copy.copy(tarinfo)
2007                 tarinfo.mode = 0o700
2008             self.extract(tarinfo, path)
2009
2010         # Reverse sort directories.
2011         directories.sort(key=lambda a: a.name)
2012         directories.reverse()
2013
2014         # Set correct owner, mtime and filemode on directories.
2015         for tarinfo in directories:
2016             dirpath = os.path.join(path, tarinfo.name)
2017             try:
2018                 self.chown(tarinfo, dirpath)
2019                 self.utime(tarinfo, dirpath)
2020                 self.chmod(tarinfo, dirpath)
2021             except ExtractError as e:
2022                 if self.errorlevel > 1:
2023                     raise
2024                 else:
2025                     self._dbg(1, "tarfile: %s" % e)
2026
2027     def extract(self, member, path=""):
2028         """Extract a member from the archive to the current working directory,
2029            using its full name. Its file information is extracted as accurately
2030            as possible. `member' may be a filename or a TarInfo object. You can
2031            specify a different directory using `path'.
2032         """
2033         self._check("r")
2034
2035         if isinstance(member, str):
2036             tarinfo = self.getmember(member)
2037         else:
2038             tarinfo = member
2039
2040         # Prepare the link target for makelink().
2041         if tarinfo.islnk():
2042             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2043
2044         try:
2045             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2046         except EnvironmentError as e:
2047             if self.errorlevel > 0:
2048                 raise
2049             else:
2050                 if e.filename is None:
2051                     self._dbg(1, "tarfile: %s" % e.strerror)
2052                 else:
2053                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2054         except ExtractError as e:
2055             if self.errorlevel > 1:
2056                 raise
2057             else:
2058                 self._dbg(1, "tarfile: %s" % e)
2059
2060     def extractfile(self, member):
2061         """Extract a member from the archive as a file object. `member' may be
2062            a filename or a TarInfo object. If `member' is a regular file, a
2063            file-like object is returned. If `member' is a link, a file-like
2064            object is constructed from the link's target. If `member' is none of
2065            the above, None is returned.
2066            The file-like object is read-only and provides the following
2067            methods: read(), readline(), readlines(), seek() and tell()
2068         """
2069         self._check("r")
2070
2071         if isinstance(member, str):
2072             tarinfo = self.getmember(member)
2073         else:
2074             tarinfo = member
2075
2076         if tarinfo.isreg():
2077             return self.fileobject(self, tarinfo)
2078
2079         elif tarinfo.type not in SUPPORTED_TYPES:
2080             # If a member's type is unknown, it is treated as a
2081             # regular file.
2082             return self.fileobject(self, tarinfo)
2083
2084         elif tarinfo.islnk() or tarinfo.issym():
2085             if isinstance(self.fileobj, _Stream):
2086                 # A small but ugly workaround for the case that someone tries
2087                 # to extract a (sym)link as a file-object from a non-seekable
2088                 # stream of tar blocks.
2089                 raise StreamError("cannot extract (sym)link as file object")
2090             else:
2091                 # A (sym)link's file object is its target's file object.
2092                 return self.extractfile(self._getmember(tarinfo.linkname,
2093                                                         tarinfo))
2094         else:
2095             # If there's no data associated with the member (directory, chrdev,
2096             # blkdev, etc.), return None instead of a file object.
2097             return None
2098
2099     def _extract_member(self, tarinfo, targetpath):
2100         """Extract the TarInfo object tarinfo to a physical
2101            file called targetpath.
2102         """
2103         # Fetch the TarInfo object for the given name
2104         # and build the destination pathname, replacing
2105         # forward slashes to platform specific separators.
2106         if targetpath[-1:] == "/":
2107             targetpath = targetpath[:-1]
2108         targetpath = os.path.normpath(targetpath)
2109
2110         # Create all upper directories.
2111         upperdirs = os.path.dirname(targetpath)
2112         if upperdirs and not os.path.exists(upperdirs):
2113             # Create directories that are not part of the archive with
2114             # default permissions.
2115             os.makedirs(upperdirs)
2116
2117         if tarinfo.islnk() or tarinfo.issym():
2118             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2119         else:
2120             self._dbg(1, tarinfo.name)
2121
2122         if tarinfo.isreg():
2123             self.makefile(tarinfo, targetpath)
2124         elif tarinfo.isdir():
2125             self.makedir(tarinfo, targetpath)
2126         elif tarinfo.isfifo():
2127             self.makefifo(tarinfo, targetpath)
2128         elif tarinfo.ischr() or tarinfo.isblk():
2129             self.makedev(tarinfo, targetpath)
2130         elif tarinfo.islnk() or tarinfo.issym():
2131             self.makelink(tarinfo, targetpath)
2132         elif tarinfo.type not in SUPPORTED_TYPES:
2133             self.makeunknown(tarinfo, targetpath)
2134         else:
2135             self.makefile(tarinfo, targetpath)
2136
2137         self.chown(tarinfo, targetpath)
2138         if not tarinfo.issym():
2139             self.chmod(tarinfo, targetpath)
2140             self.utime(tarinfo, targetpath)
2141
2142     #--------------------------------------------------------------------------
2143     # Below are the different file methods. They are called via
2144     # _extract_member() when extract() is called. They can be replaced in a
2145     # subclass to implement other functionality.
2146
2147     def makedir(self, tarinfo, targetpath):
2148         """Make a directory called targetpath.
2149         """
2150         try:
2151             # Use a safe mode for the directory, the real mode is set
2152             # later in _extract_member().
2153             os.mkdir(targetpath, 0o700)
2154         except EnvironmentError as e:
2155             if e.errno != errno.EEXIST:
2156                 raise
2157
2158     def makefile(self, tarinfo, targetpath):
2159         """Make a file called targetpath.
2160         """
2161         source = self.extractfile(tarinfo)
2162         target = bltn_open(targetpath, "wb")
2163         copyfileobj(source, target)
2164         source.close()
2165         target.close()
2166
2167     def makeunknown(self, tarinfo, targetpath):
2168         """Make a file from a TarInfo object with an unknown type
2169            at targetpath.
2170         """
2171         self.makefile(tarinfo, targetpath)
2172         self._dbg(1, "tarfile: Unknown file type %r, " \
2173                      "extracted as regular file." % tarinfo.type)
2174
2175     def makefifo(self, tarinfo, targetpath):
2176         """Make a fifo called targetpath.
2177         """
2178         if hasattr(os, "mkfifo"):
2179             os.mkfifo(targetpath)
2180         else:
2181             raise ExtractError("fifo not supported by system")
2182
2183     def makedev(self, tarinfo, targetpath):
2184         """Make a character or block device called targetpath.
2185         """
2186         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2187             raise ExtractError("special devices not supported by system")
2188
2189         mode = tarinfo.mode
2190         if tarinfo.isblk():
2191             mode |= stat.S_IFBLK
2192         else:
2193             mode |= stat.S_IFCHR
2194
2195         os.mknod(targetpath, mode,
2196                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
2197
2198     def makelink(self, tarinfo, targetpath):
2199         """Make a (symbolic) link called targetpath. If it cannot be created
2200           (platform limitation), we try to make a copy of the referenced file
2201           instead of a link.
2202         """
2203         linkpath = tarinfo.linkname
2204         try:
2205             if tarinfo.issym():
2206                 os.symlink(linkpath, targetpath)
2207             else:
2208                 # See extract().
2209                 os.link(tarinfo._link_target, targetpath)
2210         except AttributeError:
2211             if tarinfo.issym():
2212                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
2213                                         linkpath)
2214                 linkpath = normpath(linkpath)
2215
2216             try:
2217                 self._extract_member(self.getmember(linkpath), targetpath)
2218             except (EnvironmentError, KeyError) as e:
2219                 linkpath = os.path.normpath(linkpath)
2220                 try:
2221                     shutil.copy2(linkpath, targetpath)
2222                 except EnvironmentError as e:
2223                     raise IOError("link could not be created")
2224
2225     def chown(self, tarinfo, targetpath):
2226         """Set owner of targetpath according to tarinfo.
2227         """
2228         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2229             # We have to be root to do so.
2230             try:
2231                 g = grp.getgrnam(tarinfo.gname)[2]
2232             except KeyError:
2233                 try:
2234                     g = grp.getgrgid(tarinfo.gid)[2]
2235                 except KeyError:
2236                     g = os.getgid()
2237             try:
2238                 u = pwd.getpwnam(tarinfo.uname)[2]
2239             except KeyError:
2240                 try:
2241                     u = pwd.getpwuid(tarinfo.uid)[2]
2242                 except KeyError:
2243                     u = os.getuid()
2244             try:
2245                 if tarinfo.issym() and hasattr(os, "lchown"):
2246                     os.lchown(targetpath, u, g)
2247                 else:
2248                     if sys.platform != "os2emx":
2249                         os.chown(targetpath, u, g)
2250             except EnvironmentError as e:
2251                 raise ExtractError("could not change owner")
2252
2253     def chmod(self, tarinfo, targetpath):
2254         """Set file permissions of targetpath according to tarinfo.
2255         """
2256         if hasattr(os, 'chmod'):
2257             try:
2258                 os.chmod(targetpath, tarinfo.mode)
2259             except EnvironmentError as e:
2260                 raise ExtractError("could not change mode")
2261
2262     def utime(self, tarinfo, targetpath):
2263         """Set modification time of targetpath according to tarinfo.
2264         """
2265         if not hasattr(os, 'utime'):
2266             return
2267         try:
2268             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2269         except EnvironmentError as e:
2270             raise ExtractError("could not change modification time")
2271
2272     #--------------------------------------------------------------------------
2273     def next(self):
2274         """Return the next member of the archive as a TarInfo object, when
2275            TarFile is opened for reading. Return None if there is no more
2276            available.
2277         """
2278         self._check("ra")
2279         if self.firstmember is not None:
2280             m = self.firstmember
2281             self.firstmember = None
2282             return m
2283
2284         # Read the next block.
2285         self.fileobj.seek(self.offset)
2286         while True:
2287             try:
2288                 tarinfo = self.tarinfo.fromtarfile(self)
2289                 if tarinfo is None:
2290                     return
2291                 self.members.append(tarinfo)
2292
2293             except HeaderError as e:
2294                 if self.ignore_zeros:
2295                     self._dbg(2, "0x%X: %s" % (self.offset, e))
2296                     self.offset += BLOCKSIZE
2297                     continue
2298                 else:
2299                     if self.offset == 0:
2300                         raise ReadError(str(e))
2301                     return None
2302             break
2303
2304         return tarinfo
2305
2306     #--------------------------------------------------------------------------
2307     # Little helper methods:
2308
2309     def _getmember(self, name, tarinfo=None):
2310         """Find an archive member by name from bottom to top.
2311            If tarinfo is given, it is used as the starting point.
2312         """
2313         # Ensure that all members have been loaded.
2314         members = self.getmembers()
2315
2316         if tarinfo is None:
2317             end = len(members)
2318         else:
2319             end = members.index(tarinfo)
2320
2321         for i in range(end - 1, -1, -1):
2322             if name == members[i].name:
2323                 return members[i]
2324
2325     def _load(self):
2326         """Read through the entire archive file and look for readable
2327            members.
2328         """
2329         while True:
2330             tarinfo = self.next()
2331             if tarinfo is None:
2332                 break
2333         self._loaded = True
2334
2335     def _check(self, mode=None):
2336         """Check if TarFile is still open, and if the operation's mode
2337            corresponds to TarFile's mode.
2338         """
2339         if self.closed:
2340             raise IOError("%s is closed" % self.__class__.__name__)
2341         if mode is not None and self.mode not in mode:
2342             raise IOError("bad operation for mode %r" % self.mode)
2343
2344     def __iter__(self):
2345         """Provide an iterator object.
2346         """
2347         if self._loaded:
2348             return iter(self.members)
2349         else:
2350             return TarIter(self)
2351
2352     def _dbg(self, level, msg):
2353         """Write debugging output to sys.stderr.
2354         """
2355         if level <= self.debug:
2356             print(msg, file=sys.stderr)
2357 # class TarFile
2358
2359 class TarIter:
2360     """Iterator Class.
2361
2362        for tarinfo in TarFile(...):
2363            suite...
2364     """
2365
2366     def __init__(self, tarfile):
2367         """Construct a TarIter object.
2368         """
2369         self.tarfile = tarfile
2370         self.index = 0
2371     def __iter__(self):
2372         """Return iterator object.
2373         """
2374         return self
2375     def __next__(self):
2376         """Return the next item using TarFile's next() method.
2377            When all members have been read, set TarFile as _loaded.
2378         """
2379         # Fix for SF #1100429: Under rare circumstances it can
2380         # happen that getmembers() is called during iteration,
2381         # which will cause TarIter to stop prematurely.
2382         if not self.tarfile._loaded:
2383             tarinfo = self.tarfile.next()
2384             if not tarinfo:
2385                 self.tarfile._loaded = True
2386                 raise StopIteration
2387         else:
2388             try:
2389                 tarinfo = self.tarfile.members[self.index]
2390             except IndexError:
2391                 raise StopIteration
2392         self.index += 1
2393         return tarinfo
2394
2395 # Helper classes for sparse file support
2396 class _section:
2397     """Base class for _data and _hole.
2398     """
2399     def __init__(self, offset, size):
2400         self.offset = offset
2401         self.size = size
2402     def __contains__(self, offset):
2403         return self.offset <= offset < self.offset + self.size
2404
2405 class _data(_section):
2406     """Represent a data section in a sparse file.
2407     """
2408     def __init__(self, offset, size, realpos):
2409         _section.__init__(self, offset, size)
2410         self.realpos = realpos
2411
2412 class _hole(_section):
2413     """Represent a hole section in a sparse file.
2414     """
2415     pass
2416
2417 class _ringbuffer(list):
2418     """Ringbuffer class which increases performance
2419        over a regular list.
2420     """
2421     def __init__(self):
2422         self.idx = 0
2423     def find(self, offset):
2424         idx = self.idx
2425         while True:
2426             item = self[idx]
2427             if offset in item:
2428                 break
2429             idx += 1
2430             if idx == len(self):
2431                 idx = 0
2432             if idx == self.idx:
2433                 # End of File
2434                 return None
2435         self.idx = idx
2436         return item
2437
2438 #--------------------
2439 # exported functions
2440 #--------------------
2441 def is_tarfile(name):
2442     """Return True if name points to a tar archive that we
2443        are able to handle, else return False.
2444     """
2445     try:
2446         t = open(name)
2447         t.close()
2448         return True
2449     except TarError:
2450         return False
2451
2452 bltn_open = open
2453 open = TarFile.open