Issue #7379: Fix incorrect doctest for Fraction.limit_denominator.
[python.git] / Lib / tarfile.py
blobf48b477bb6319caec86dd71f2710cf7c65d43b79
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
4 # tarfile.py
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7 # All rights reserved.
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
16 # conditions:
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
31 """
33 __version__ = "$Revision$"
34 # $Source$
36 version = "0.9.0"
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
38 __date__ = "$Date$"
39 __cvsid__ = "$Id$"
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
42 #---------
43 # Imports
44 #---------
45 import sys
46 import os
47 import shutil
48 import stat
49 import errno
50 import time
51 import struct
52 import copy
53 import re
54 import operator
56 if sys.platform == 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
63 try:
64 import grp, pwd
65 except ImportError:
66 grp = pwd = None
68 # from tarfile import *
69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
71 #---------------------------------------------------------
72 # tar constants
73 #---------------------------------------------------------
74 NUL = "\0" # the null character
75 BLOCKSIZE = 512 # length of processing blocks
76 RECORDSIZE = BLOCKSIZE * 20 # length of records
77 GNU_MAGIC = "ustar \0" # magic gnu tar string
78 POSIX_MAGIC = "ustar\x0000" # magic posix tar string
80 LENGTH_NAME = 100 # maximum length of a filename
81 LENGTH_LINK = 100 # maximum length of a linkname
82 LENGTH_PREFIX = 155 # maximum length of the prefix field
84 REGTYPE = "0" # regular file
85 AREGTYPE = "\0" # regular file
86 LNKTYPE = "1" # link (inside tarfile)
87 SYMTYPE = "2" # symbolic link
88 CHRTYPE = "3" # character special device
89 BLKTYPE = "4" # block special device
90 DIRTYPE = "5" # directory
91 FIFOTYPE = "6" # fifo special device
92 CONTTYPE = "7" # contiguous file
94 GNUTYPE_LONGNAME = "L" # GNU tar longname
95 GNUTYPE_LONGLINK = "K" # GNU tar longlink
96 GNUTYPE_SPARSE = "S" # GNU tar sparse file
98 XHDTYPE = "x" # POSIX.1-2001 extended header
99 XGLTYPE = "g" # POSIX.1-2001 global header
100 SOLARIS_XHDTYPE = "X" # Solaris extended header
102 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103 GNU_FORMAT = 1 # GNU tar format
104 PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105 DEFAULT_FORMAT = GNU_FORMAT
107 #---------------------------------------------------------
108 # tarfile constants
109 #---------------------------------------------------------
110 # File types that tarfile supports:
111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112 SYMTYPE, DIRTYPE, FIFOTYPE,
113 CONTTYPE, CHRTYPE, BLKTYPE,
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
117 # File types that will be treated as a regular file.
118 REGULAR_TYPES = (REGTYPE, AREGTYPE,
119 CONTTYPE, GNUTYPE_SPARSE)
121 # File types that are part of the GNU tar format.
122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123 GNUTYPE_SPARSE)
125 # Fields from a pax header that override a TarInfo attribute.
126 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
129 # Fields in a pax header that are numbers, all other fields
130 # are treated as strings.
131 PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
140 #---------------------------------------------------------
141 # Bits used in the mode field, values in octal.
142 #---------------------------------------------------------
143 S_IFLNK = 0120000 # symbolic link
144 S_IFREG = 0100000 # regular file
145 S_IFBLK = 0060000 # block device
146 S_IFDIR = 0040000 # directory
147 S_IFCHR = 0020000 # character device
148 S_IFIFO = 0010000 # fifo
150 TSUID = 04000 # set UID on execution
151 TSGID = 02000 # set GID on execution
152 TSVTX = 01000 # reserved
154 TUREAD = 0400 # read by owner
155 TUWRITE = 0200 # write by owner
156 TUEXEC = 0100 # execute/search by owner
157 TGREAD = 0040 # read by group
158 TGWRITE = 0020 # write by group
159 TGEXEC = 0010 # execute/search by group
160 TOREAD = 0004 # read by other
161 TOWRITE = 0002 # write by other
162 TOEXEC = 0001 # execute/search by other
164 #---------------------------------------------------------
165 # initialization
166 #---------------------------------------------------------
167 ENCODING = sys.getfilesystemencoding()
168 if ENCODING is None:
169 ENCODING = sys.getdefaultencoding()
171 #---------------------------------------------------------
172 # Some useful functions
173 #---------------------------------------------------------
175 def stn(s, length):
176 """Convert a python string to a null-terminated string buffer.
178 return s[:length] + (length - len(s)) * NUL
180 def nts(s):
181 """Convert a null-terminated string field to a python string.
183 # Use the string up to the first null char.
184 p = s.find("\0")
185 if p == -1:
186 return s
187 return s[:p]
189 def nti(s):
190 """Convert a number field to a python number.
192 # There are two possible encodings for a number field, see
193 # itn() below.
194 if s[0] != chr(0200):
195 try:
196 n = int(nts(s) or "0", 8)
197 except ValueError:
198 raise InvalidHeaderError("invalid header")
199 else:
200 n = 0L
201 for i in xrange(len(s) - 1):
202 n <<= 8
203 n += ord(s[i + 1])
204 return n
206 def itn(n, digits=8, format=DEFAULT_FORMAT):
207 """Convert a python number to a number field.
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n < 8 ** (digits - 1):
216 s = "%0*o" % (digits - 1, n) + NUL
217 else:
218 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
219 raise ValueError("overflow in number field")
221 if n < 0:
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n = struct.unpack("L", struct.pack("l", n))[0]
226 s = ""
227 for i in xrange(digits - 1):
228 s = chr(n & 0377) + s
229 n >>= 8
230 s = chr(0200) + s
231 return s
233 def uts(s, encoding, errors):
234 """Convert a unicode object to a string.
236 if errors == "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
240 try:
241 return s.encode(encoding, "strict")
242 except UnicodeEncodeError:
243 x = []
244 for c in s:
245 try:
246 x.append(c.encode(encoding, "strict"))
247 except UnicodeEncodeError:
248 x.append(c.encode("utf8"))
249 return "".join(x)
250 else:
251 return s.encode(encoding, errors)
253 def calc_chksums(buf):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
260 signed.
262 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
263 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
264 return unsigned_chksum, signed_chksum
266 def copyfileobj(src, dst, length=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
270 if length == 0:
271 return
272 if length is None:
273 shutil.copyfileobj(src, dst)
274 return
276 BUFSIZE = 16 * 1024
277 blocks, remainder = divmod(length, BUFSIZE)
278 for b in xrange(blocks):
279 buf = src.read(BUFSIZE)
280 if len(buf) < BUFSIZE:
281 raise IOError("end of file reached")
282 dst.write(buf)
284 if remainder != 0:
285 buf = src.read(remainder)
286 if len(buf) < remainder:
287 raise IOError("end of file reached")
288 dst.write(buf)
289 return
291 filemode_table = (
292 ((S_IFLNK, "l"),
293 (S_IFREG, "-"),
294 (S_IFBLK, "b"),
295 (S_IFDIR, "d"),
296 (S_IFCHR, "c"),
297 (S_IFIFO, "p")),
299 ((TUREAD, "r"),),
300 ((TUWRITE, "w"),),
301 ((TUEXEC|TSUID, "s"),
302 (TSUID, "S"),
303 (TUEXEC, "x")),
305 ((TGREAD, "r"),),
306 ((TGWRITE, "w"),),
307 ((TGEXEC|TSGID, "s"),
308 (TSGID, "S"),
309 (TGEXEC, "x")),
311 ((TOREAD, "r"),),
312 ((TOWRITE, "w"),),
313 ((TOEXEC|TSVTX, "t"),
314 (TSVTX, "T"),
315 (TOEXEC, "x"))
318 def filemode(mode):
319 """Convert a file's mode to a string of the form
320 -rwxrwxrwx.
321 Used by TarFile.list()
323 perm = []
324 for table in filemode_table:
325 for bit, char in table:
326 if mode & bit == bit:
327 perm.append(char)
328 break
329 else:
330 perm.append("-")
331 return "".join(perm)
333 class TarError(Exception):
334 """Base exception."""
335 pass
336 class ExtractError(TarError):
337 """General exception for extract errors."""
338 pass
339 class ReadError(TarError):
340 """Exception for unreadble tar archives."""
341 pass
342 class CompressionError(TarError):
343 """Exception for unavailable compression methods."""
344 pass
345 class StreamError(TarError):
346 """Exception for unsupported operations on stream-like TarFiles."""
347 pass
348 class HeaderError(TarError):
349 """Base exception for header errors."""
350 pass
351 class EmptyHeaderError(HeaderError):
352 """Exception for empty headers."""
353 pass
354 class TruncatedHeaderError(HeaderError):
355 """Exception for truncated headers."""
356 pass
357 class EOFHeaderError(HeaderError):
358 """Exception for end of file headers."""
359 pass
360 class InvalidHeaderError(HeaderError):
361 """Exception for invalid headers."""
362 pass
363 class SubsequentHeaderError(HeaderError):
364 """Exception for missing and invalid extended headers."""
365 pass
367 #---------------------------
368 # internal stream interface
369 #---------------------------
370 class _LowLevelFile:
371 """Low-level file object. Supports reading and writing.
372 It is used instead of a regular file object for streaming
373 access.
376 def __init__(self, name, mode):
377 mode = {
378 "r": os.O_RDONLY,
379 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
380 }[mode]
381 if hasattr(os, "O_BINARY"):
382 mode |= os.O_BINARY
383 self.fd = os.open(name, mode)
385 def close(self):
386 os.close(self.fd)
388 def read(self, size):
389 return os.read(self.fd, size)
391 def write(self, s):
392 os.write(self.fd, s)
394 class _Stream:
395 """Class that serves as an adapter between TarFile and
396 a stream-like object. The stream-like object only
397 needs to have a read() or write() method and is accessed
398 blockwise. Use of gzip or bzip2 compression is possible.
399 A stream-like object could be for example: sys.stdin,
400 sys.stdout, a socket, a tape device etc.
402 _Stream is intended to be used only internally.
405 def __init__(self, name, mode, comptype, fileobj, bufsize):
406 """Construct a _Stream object.
408 self._extfileobj = True
409 if fileobj is None:
410 fileobj = _LowLevelFile(name, mode)
411 self._extfileobj = False
413 if comptype == '*':
414 # Enable transparent compression detection for the
415 # stream interface
416 fileobj = _StreamProxy(fileobj)
417 comptype = fileobj.getcomptype()
419 self.name = name or ""
420 self.mode = mode
421 self.comptype = comptype
422 self.fileobj = fileobj
423 self.bufsize = bufsize
424 self.buf = ""
425 self.pos = 0L
426 self.closed = False
428 if comptype == "gz":
429 try:
430 import zlib
431 except ImportError:
432 raise CompressionError("zlib module is not available")
433 self.zlib = zlib
434 self.crc = zlib.crc32("") & 0xffffffffL
435 if mode == "r":
436 self._init_read_gz()
437 else:
438 self._init_write_gz()
440 if comptype == "bz2":
441 try:
442 import bz2
443 except ImportError:
444 raise CompressionError("bz2 module is not available")
445 if mode == "r":
446 self.dbuf = ""
447 self.cmp = bz2.BZ2Decompressor()
448 else:
449 self.cmp = bz2.BZ2Compressor()
451 def __del__(self):
452 if hasattr(self, "closed") and not self.closed:
453 self.close()
455 def _init_write_gz(self):
456 """Initialize for writing with gzip compression.
458 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
459 -self.zlib.MAX_WBITS,
460 self.zlib.DEF_MEM_LEVEL,
462 timestamp = struct.pack("<L", long(time.time()))
463 self.__write("\037\213\010\010%s\002\377" % timestamp)
464 if self.name.endswith(".gz"):
465 self.name = self.name[:-3]
466 self.__write(self.name + NUL)
468 def write(self, s):
469 """Write string s to the stream.
471 if self.comptype == "gz":
472 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
473 self.pos += len(s)
474 if self.comptype != "tar":
475 s = self.cmp.compress(s)
476 self.__write(s)
478 def __write(self, s):
479 """Write string s to the stream if a whole new block
480 is ready to be written.
482 self.buf += s
483 while len(self.buf) > self.bufsize:
484 self.fileobj.write(self.buf[:self.bufsize])
485 self.buf = self.buf[self.bufsize:]
487 def close(self):
488 """Close the _Stream object. No operation should be
489 done on it afterwards.
491 if self.closed:
492 return
494 if self.mode == "w" and self.comptype != "tar":
495 self.buf += self.cmp.flush()
497 if self.mode == "w" and self.buf:
498 self.fileobj.write(self.buf)
499 self.buf = ""
500 if self.comptype == "gz":
501 # The native zlib crc is an unsigned 32-bit integer, but
502 # the Python wrapper implicitly casts that to a signed C
503 # long. So, on a 32-bit box self.crc may "look negative",
504 # while the same crc on a 64-bit box may "look positive".
505 # To avoid irksome warnings from the `struct` module, force
506 # it to look positive on all boxes.
507 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
508 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
510 if not self._extfileobj:
511 self.fileobj.close()
513 self.closed = True
515 def _init_read_gz(self):
516 """Initialize for reading a gzip compressed fileobj.
518 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
519 self.dbuf = ""
521 # taken from gzip.GzipFile with some alterations
522 if self.__read(2) != "\037\213":
523 raise ReadError("not a gzip file")
524 if self.__read(1) != "\010":
525 raise CompressionError("unsupported compression method")
527 flag = ord(self.__read(1))
528 self.__read(6)
530 if flag & 4:
531 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
532 self.read(xlen)
533 if flag & 8:
534 while True:
535 s = self.__read(1)
536 if not s or s == NUL:
537 break
538 if flag & 16:
539 while True:
540 s = self.__read(1)
541 if not s or s == NUL:
542 break
543 if flag & 2:
544 self.__read(2)
546 def tell(self):
547 """Return the stream's file pointer position.
549 return self.pos
551 def seek(self, pos=0):
552 """Set the stream's file pointer to pos. Negative seeking
553 is forbidden.
555 if pos - self.pos >= 0:
556 blocks, remainder = divmod(pos - self.pos, self.bufsize)
557 for i in xrange(blocks):
558 self.read(self.bufsize)
559 self.read(remainder)
560 else:
561 raise StreamError("seeking backwards is not allowed")
562 return self.pos
564 def read(self, size=None):
565 """Return the next size number of bytes from the stream.
566 If size is not defined, return all bytes of the stream
567 up to EOF.
569 if size is None:
570 t = []
571 while True:
572 buf = self._read(self.bufsize)
573 if not buf:
574 break
575 t.append(buf)
576 buf = "".join(t)
577 else:
578 buf = self._read(size)
579 self.pos += len(buf)
580 return buf
582 def _read(self, size):
583 """Return size bytes from the stream.
585 if self.comptype == "tar":
586 return self.__read(size)
588 c = len(self.dbuf)
589 t = [self.dbuf]
590 while c < size:
591 buf = self.__read(self.bufsize)
592 if not buf:
593 break
594 try:
595 buf = self.cmp.decompress(buf)
596 except IOError:
597 raise ReadError("invalid compressed data")
598 t.append(buf)
599 c += len(buf)
600 t = "".join(t)
601 self.dbuf = t[size:]
602 return t[:size]
604 def __read(self, size):
605 """Return size bytes from stream. If internal buffer is empty,
606 read another block from the stream.
608 c = len(self.buf)
609 t = [self.buf]
610 while c < size:
611 buf = self.fileobj.read(self.bufsize)
612 if not buf:
613 break
614 t.append(buf)
615 c += len(buf)
616 t = "".join(t)
617 self.buf = t[size:]
618 return t[:size]
619 # class _Stream
621 class _StreamProxy(object):
622 """Small proxy class that enables transparent compression
623 detection for the Stream interface (mode 'r|*').
626 def __init__(self, fileobj):
627 self.fileobj = fileobj
628 self.buf = self.fileobj.read(BLOCKSIZE)
630 def read(self, size):
631 self.read = self.fileobj.read
632 return self.buf
634 def getcomptype(self):
635 if self.buf.startswith("\037\213\010"):
636 return "gz"
637 if self.buf.startswith("BZh91"):
638 return "bz2"
639 return "tar"
641 def close(self):
642 self.fileobj.close()
643 # class StreamProxy
645 class _BZ2Proxy(object):
646 """Small proxy class that enables external file object
647 support for "r:bz2" and "w:bz2" modes. This is actually
648 a workaround for a limitation in bz2 module's BZ2File
649 class which (unlike gzip.GzipFile) has no support for
650 a file object argument.
653 blocksize = 16 * 1024
655 def __init__(self, fileobj, mode):
656 self.fileobj = fileobj
657 self.mode = mode
658 self.name = getattr(self.fileobj, "name", None)
659 self.init()
661 def init(self):
662 import bz2
663 self.pos = 0
664 if self.mode == "r":
665 self.bz2obj = bz2.BZ2Decompressor()
666 self.fileobj.seek(0)
667 self.buf = ""
668 else:
669 self.bz2obj = bz2.BZ2Compressor()
671 def read(self, size):
672 b = [self.buf]
673 x = len(self.buf)
674 while x < size:
675 raw = self.fileobj.read(self.blocksize)
676 if not raw:
677 break
678 data = self.bz2obj.decompress(raw)
679 b.append(data)
680 x += len(data)
681 self.buf = "".join(b)
683 buf = self.buf[:size]
684 self.buf = self.buf[size:]
685 self.pos += len(buf)
686 return buf
688 def seek(self, pos):
689 if pos < self.pos:
690 self.init()
691 self.read(pos - self.pos)
693 def tell(self):
694 return self.pos
696 def write(self, data):
697 self.pos += len(data)
698 raw = self.bz2obj.compress(data)
699 self.fileobj.write(raw)
701 def close(self):
702 if self.mode == "w":
703 raw = self.bz2obj.flush()
704 self.fileobj.write(raw)
705 # class _BZ2Proxy
707 #------------------------
708 # Extraction file object
709 #------------------------
710 class _FileInFile(object):
711 """A thin wrapper around an existing file object that
712 provides a part of its data as an individual file
713 object.
716 def __init__(self, fileobj, offset, size, sparse=None):
717 self.fileobj = fileobj
718 self.offset = offset
719 self.size = size
720 self.sparse = sparse
721 self.position = 0
723 def tell(self):
724 """Return the current file position.
726 return self.position
728 def seek(self, position):
729 """Seek to a position in the file.
731 self.position = position
733 def read(self, size=None):
734 """Read data from the file.
736 if size is None:
737 size = self.size - self.position
738 else:
739 size = min(size, self.size - self.position)
741 if self.sparse is None:
742 return self.readnormal(size)
743 else:
744 return self.readsparse(size)
746 def readnormal(self, size):
747 """Read operation for regular files.
749 self.fileobj.seek(self.offset + self.position)
750 self.position += size
751 return self.fileobj.read(size)
753 def readsparse(self, size):
754 """Read operation for sparse files.
756 data = []
757 while size > 0:
758 buf = self.readsparsesection(size)
759 if not buf:
760 break
761 size -= len(buf)
762 data.append(buf)
763 return "".join(data)
765 def readsparsesection(self, size):
766 """Read a single section of a sparse file.
768 section = self.sparse.find(self.position)
770 if section is None:
771 return ""
773 size = min(size, section.offset + section.size - self.position)
775 if isinstance(section, _data):
776 realpos = section.realpos + self.position - section.offset
777 self.fileobj.seek(self.offset + realpos)
778 self.position += size
779 return self.fileobj.read(size)
780 else:
781 self.position += size
782 return NUL * size
783 #class _FileInFile
786 class ExFileObject(object):
787 """File-like object for reading an archive member.
788 Is returned by TarFile.extractfile().
790 blocksize = 1024
792 def __init__(self, tarfile, tarinfo):
793 self.fileobj = _FileInFile(tarfile.fileobj,
794 tarinfo.offset_data,
795 tarinfo.size,
796 getattr(tarinfo, "sparse", None))
797 self.name = tarinfo.name
798 self.mode = "r"
799 self.closed = False
800 self.size = tarinfo.size
802 self.position = 0
803 self.buffer = ""
805 def read(self, size=None):
806 """Read at most size bytes from the file. If size is not
807 present or None, read all data until EOF is reached.
809 if self.closed:
810 raise ValueError("I/O operation on closed file")
812 buf = ""
813 if self.buffer:
814 if size is None:
815 buf = self.buffer
816 self.buffer = ""
817 else:
818 buf = self.buffer[:size]
819 self.buffer = self.buffer[size:]
821 if size is None:
822 buf += self.fileobj.read()
823 else:
824 buf += self.fileobj.read(size - len(buf))
826 self.position += len(buf)
827 return buf
829 def readline(self, size=-1):
830 """Read one entire line from the file. If size is present
831 and non-negative, return a string with at most that
832 size, which may be an incomplete line.
834 if self.closed:
835 raise ValueError("I/O operation on closed file")
837 if "\n" in self.buffer:
838 pos = self.buffer.find("\n") + 1
839 else:
840 buffers = [self.buffer]
841 while True:
842 buf = self.fileobj.read(self.blocksize)
843 buffers.append(buf)
844 if not buf or "\n" in buf:
845 self.buffer = "".join(buffers)
846 pos = self.buffer.find("\n") + 1
847 if pos == 0:
848 # no newline found.
849 pos = len(self.buffer)
850 break
852 if size != -1:
853 pos = min(size, pos)
855 buf = self.buffer[:pos]
856 self.buffer = self.buffer[pos:]
857 self.position += len(buf)
858 return buf
860 def readlines(self):
861 """Return a list with all remaining lines.
863 result = []
864 while True:
865 line = self.readline()
866 if not line: break
867 result.append(line)
868 return result
870 def tell(self):
871 """Return the current file position.
873 if self.closed:
874 raise ValueError("I/O operation on closed file")
876 return self.position
878 def seek(self, pos, whence=os.SEEK_SET):
879 """Seek to a position in the file.
881 if self.closed:
882 raise ValueError("I/O operation on closed file")
884 if whence == os.SEEK_SET:
885 self.position = min(max(pos, 0), self.size)
886 elif whence == os.SEEK_CUR:
887 if pos < 0:
888 self.position = max(self.position + pos, 0)
889 else:
890 self.position = min(self.position + pos, self.size)
891 elif whence == os.SEEK_END:
892 self.position = max(min(self.size + pos, self.size), 0)
893 else:
894 raise ValueError("Invalid argument")
896 self.buffer = ""
897 self.fileobj.seek(self.position)
899 def close(self):
900 """Close the file object.
902 self.closed = True
904 def __iter__(self):
905 """Get an iterator over the file's lines.
907 while True:
908 line = self.readline()
909 if not line:
910 break
911 yield line
912 #class ExFileObject
914 #------------------
915 # Exported Classes
916 #------------------
917 class TarInfo(object):
918 """Informational class which holds the details about an
919 archive member given by a tar header block.
920 TarInfo objects are returned by TarFile.getmember(),
921 TarFile.getmembers() and TarFile.gettarinfo() and are
922 usually created internally.
925 def __init__(self, name=""):
926 """Construct a TarInfo object. name is the optional name
927 of the member.
929 self.name = name # member name
930 self.mode = 0644 # file permissions
931 self.uid = 0 # user id
932 self.gid = 0 # group id
933 self.size = 0 # file size
934 self.mtime = 0 # modification time
935 self.chksum = 0 # header checksum
936 self.type = REGTYPE # member type
937 self.linkname = "" # link name
938 self.uname = "root" # user name
939 self.gname = "root" # group name
940 self.devmajor = 0 # device major number
941 self.devminor = 0 # device minor number
943 self.offset = 0 # the tar header starts here
944 self.offset_data = 0 # the file's data starts here
946 self.pax_headers = {} # pax header information
948 # In pax headers the "name" and "linkname" field are called
949 # "path" and "linkpath".
950 def _getpath(self):
951 return self.name
952 def _setpath(self, name):
953 self.name = name
954 path = property(_getpath, _setpath)
956 def _getlinkpath(self):
957 return self.linkname
958 def _setlinkpath(self, linkname):
959 self.linkname = linkname
960 linkpath = property(_getlinkpath, _setlinkpath)
962 def __repr__(self):
963 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
965 def get_info(self, encoding, errors):
966 """Return the TarInfo's attributes as a dictionary.
968 info = {
969 "name": self.name,
970 "mode": self.mode & 07777,
971 "uid": self.uid,
972 "gid": self.gid,
973 "size": self.size,
974 "mtime": self.mtime,
975 "chksum": self.chksum,
976 "type": self.type,
977 "linkname": self.linkname,
978 "uname": self.uname,
979 "gname": self.gname,
980 "devmajor": self.devmajor,
981 "devminor": self.devminor
984 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
985 info["name"] += "/"
987 for key in ("name", "linkname", "uname", "gname"):
988 if type(info[key]) is unicode:
989 info[key] = info[key].encode(encoding, errors)
991 return info
993 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
994 """Return a tar header as a string of 512 byte blocks.
996 info = self.get_info(encoding, errors)
998 if format == USTAR_FORMAT:
999 return self.create_ustar_header(info)
1000 elif format == GNU_FORMAT:
1001 return self.create_gnu_header(info)
1002 elif format == PAX_FORMAT:
1003 return self.create_pax_header(info, encoding, errors)
1004 else:
1005 raise ValueError("invalid format")
1007 def create_ustar_header(self, info):
1008 """Return the object as a ustar header block.
1010 info["magic"] = POSIX_MAGIC
1012 if len(info["linkname"]) > LENGTH_LINK:
1013 raise ValueError("linkname is too long")
1015 if len(info["name"]) > LENGTH_NAME:
1016 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1018 return self._create_header(info, USTAR_FORMAT)
1020 def create_gnu_header(self, info):
1021 """Return the object as a GNU header block sequence.
1023 info["magic"] = GNU_MAGIC
1025 buf = ""
1026 if len(info["linkname"]) > LENGTH_LINK:
1027 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1029 if len(info["name"]) > LENGTH_NAME:
1030 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1032 return buf + self._create_header(info, GNU_FORMAT)
1034 def create_pax_header(self, info, encoding, errors):
1035 """Return the object as a ustar header block. If it cannot be
1036 represented this way, prepend a pax extended header sequence
1037 with supplement information.
1039 info["magic"] = POSIX_MAGIC
1040 pax_headers = self.pax_headers.copy()
1042 # Test string fields for values that exceed the field length or cannot
1043 # be represented in ASCII encoding.
1044 for name, hname, length in (
1045 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1046 ("uname", "uname", 32), ("gname", "gname", 32)):
1048 if hname in pax_headers:
1049 # The pax header has priority.
1050 continue
1052 val = info[name].decode(encoding, errors)
1054 # Try to encode the string as ASCII.
1055 try:
1056 val.encode("ascii")
1057 except UnicodeEncodeError:
1058 pax_headers[hname] = val
1059 continue
1061 if len(info[name]) > length:
1062 pax_headers[hname] = val
1064 # Test number fields for values that exceed the field limit or values
1065 # that like to be stored as float.
1066 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1067 if name in pax_headers:
1068 # The pax header has priority. Avoid overflow.
1069 info[name] = 0
1070 continue
1072 val = info[name]
1073 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1074 pax_headers[name] = unicode(val)
1075 info[name] = 0
1077 # Create a pax extended header if necessary.
1078 if pax_headers:
1079 buf = self._create_pax_generic_header(pax_headers)
1080 else:
1081 buf = ""
1083 return buf + self._create_header(info, USTAR_FORMAT)
1085 @classmethod
1086 def create_pax_global_header(cls, pax_headers):
1087 """Return the object as a pax global header block sequence.
1089 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1091 def _posix_split_name(self, name):
1092 """Split a name longer than 100 chars into a prefix
1093 and a name part.
1095 prefix = name[:LENGTH_PREFIX + 1]
1096 while prefix and prefix[-1] != "/":
1097 prefix = prefix[:-1]
1099 name = name[len(prefix):]
1100 prefix = prefix[:-1]
1102 if not prefix or len(name) > LENGTH_NAME:
1103 raise ValueError("name is too long")
1104 return prefix, name
1106 @staticmethod
1107 def _create_header(info, format):
1108 """Return a header block. info is a dictionary with file
1109 information, format must be one of the *_FORMAT constants.
1111 parts = [
1112 stn(info.get("name", ""), 100),
1113 itn(info.get("mode", 0) & 07777, 8, format),
1114 itn(info.get("uid", 0), 8, format),
1115 itn(info.get("gid", 0), 8, format),
1116 itn(info.get("size", 0), 12, format),
1117 itn(info.get("mtime", 0), 12, format),
1118 " ", # checksum field
1119 info.get("type", REGTYPE),
1120 stn(info.get("linkname", ""), 100),
1121 stn(info.get("magic", POSIX_MAGIC), 8),
1122 stn(info.get("uname", "root"), 32),
1123 stn(info.get("gname", "root"), 32),
1124 itn(info.get("devmajor", 0), 8, format),
1125 itn(info.get("devminor", 0), 8, format),
1126 stn(info.get("prefix", ""), 155)
1129 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1130 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1131 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1132 return buf
1134 @staticmethod
1135 def _create_payload(payload):
1136 """Return the string payload filled with zero bytes
1137 up to the next 512 byte border.
1139 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1140 if remainder > 0:
1141 payload += (BLOCKSIZE - remainder) * NUL
1142 return payload
1144 @classmethod
1145 def _create_gnu_long_header(cls, name, type):
1146 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1147 for name.
1149 name += NUL
1151 info = {}
1152 info["name"] = "././@LongLink"
1153 info["type"] = type
1154 info["size"] = len(name)
1155 info["magic"] = GNU_MAGIC
1157 # create extended header + name blocks.
1158 return cls._create_header(info, USTAR_FORMAT) + \
1159 cls._create_payload(name)
1161 @classmethod
1162 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1163 """Return a POSIX.1-2001 extended or global header sequence
1164 that contains a list of keyword, value pairs. The values
1165 must be unicode objects.
1167 records = []
1168 for keyword, value in pax_headers.iteritems():
1169 keyword = keyword.encode("utf8")
1170 value = value.encode("utf8")
1171 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1172 n = p = 0
1173 while True:
1174 n = l + len(str(p))
1175 if n == p:
1176 break
1177 p = n
1178 records.append("%d %s=%s\n" % (p, keyword, value))
1179 records = "".join(records)
1181 # We use a hardcoded "././@PaxHeader" name like star does
1182 # instead of the one that POSIX recommends.
1183 info = {}
1184 info["name"] = "././@PaxHeader"
1185 info["type"] = type
1186 info["size"] = len(records)
1187 info["magic"] = POSIX_MAGIC
1189 # Create pax header + record blocks.
1190 return cls._create_header(info, USTAR_FORMAT) + \
1191 cls._create_payload(records)
1193 @classmethod
1194 def frombuf(cls, buf):
1195 """Construct a TarInfo object from a 512 byte string buffer.
1197 if len(buf) == 0:
1198 raise EmptyHeaderError("empty header")
1199 if len(buf) != BLOCKSIZE:
1200 raise TruncatedHeaderError("truncated header")
1201 if buf.count(NUL) == BLOCKSIZE:
1202 raise EOFHeaderError("end of file header")
1204 chksum = nti(buf[148:156])
1205 if chksum not in calc_chksums(buf):
1206 raise InvalidHeaderError("bad checksum")
1208 obj = cls()
1209 obj.buf = buf
1210 obj.name = nts(buf[0:100])
1211 obj.mode = nti(buf[100:108])
1212 obj.uid = nti(buf[108:116])
1213 obj.gid = nti(buf[116:124])
1214 obj.size = nti(buf[124:136])
1215 obj.mtime = nti(buf[136:148])
1216 obj.chksum = chksum
1217 obj.type = buf[156:157]
1218 obj.linkname = nts(buf[157:257])
1219 obj.uname = nts(buf[265:297])
1220 obj.gname = nts(buf[297:329])
1221 obj.devmajor = nti(buf[329:337])
1222 obj.devminor = nti(buf[337:345])
1223 prefix = nts(buf[345:500])
1225 # Old V7 tar format represents a directory as a regular
1226 # file with a trailing slash.
1227 if obj.type == AREGTYPE and obj.name.endswith("/"):
1228 obj.type = DIRTYPE
1230 # Remove redundant slashes from directories.
1231 if obj.isdir():
1232 obj.name = obj.name.rstrip("/")
1234 # Reconstruct a ustar longname.
1235 if prefix and obj.type not in GNU_TYPES:
1236 obj.name = prefix + "/" + obj.name
1237 return obj
1239 @classmethod
1240 def fromtarfile(cls, tarfile):
1241 """Return the next TarInfo object from TarFile object
1242 tarfile.
1244 buf = tarfile.fileobj.read(BLOCKSIZE)
1245 obj = cls.frombuf(buf)
1246 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1247 return obj._proc_member(tarfile)
1249 #--------------------------------------------------------------------------
1250 # The following are methods that are called depending on the type of a
1251 # member. The entry point is _proc_member() which can be overridden in a
1252 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1253 # implement the following
1254 # operations:
1255 # 1. Set self.offset_data to the position where the data blocks begin,
1256 # if there is data that follows.
1257 # 2. Set tarfile.offset to the position where the next member's header will
1258 # begin.
1259 # 3. Return self or another valid TarInfo object.
1260 def _proc_member(self, tarfile):
1261 """Choose the right processing method depending on
1262 the type and call it.
1264 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1265 return self._proc_gnulong(tarfile)
1266 elif self.type == GNUTYPE_SPARSE:
1267 return self._proc_sparse(tarfile)
1268 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1269 return self._proc_pax(tarfile)
1270 else:
1271 return self._proc_builtin(tarfile)
1273 def _proc_builtin(self, tarfile):
1274 """Process a builtin type or an unknown type which
1275 will be treated as a regular file.
1277 self.offset_data = tarfile.fileobj.tell()
1278 offset = self.offset_data
1279 if self.isreg() or self.type not in SUPPORTED_TYPES:
1280 # Skip the following data blocks.
1281 offset += self._block(self.size)
1282 tarfile.offset = offset
1284 # Patch the TarInfo object with saved global
1285 # header information.
1286 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1288 return self
1290 def _proc_gnulong(self, tarfile):
1291 """Process the blocks that hold a GNU longname
1292 or longlink member.
1294 buf = tarfile.fileobj.read(self._block(self.size))
1296 # Fetch the next header and process it.
1297 try:
1298 next = self.fromtarfile(tarfile)
1299 except HeaderError:
1300 raise SubsequentHeaderError("missing or bad subsequent header")
1302 # Patch the TarInfo object from the next header with
1303 # the longname information.
1304 next.offset = self.offset
1305 if self.type == GNUTYPE_LONGNAME:
1306 next.name = nts(buf)
1307 elif self.type == GNUTYPE_LONGLINK:
1308 next.linkname = nts(buf)
1310 return next
1312 def _proc_sparse(self, tarfile):
1313 """Process a GNU sparse header plus extra headers.
1315 buf = self.buf
1316 sp = _ringbuffer()
1317 pos = 386
1318 lastpos = 0L
1319 realpos = 0L
1320 # There are 4 possible sparse structs in the
1321 # first header.
1322 for i in xrange(4):
1323 try:
1324 offset = nti(buf[pos:pos + 12])
1325 numbytes = nti(buf[pos + 12:pos + 24])
1326 except ValueError:
1327 break
1328 if offset > lastpos:
1329 sp.append(_hole(lastpos, offset - lastpos))
1330 sp.append(_data(offset, numbytes, realpos))
1331 realpos += numbytes
1332 lastpos = offset + numbytes
1333 pos += 24
1335 isextended = ord(buf[482])
1336 origsize = nti(buf[483:495])
1338 # If the isextended flag is given,
1339 # there are extra headers to process.
1340 while isextended == 1:
1341 buf = tarfile.fileobj.read(BLOCKSIZE)
1342 pos = 0
1343 for i in xrange(21):
1344 try:
1345 offset = nti(buf[pos:pos + 12])
1346 numbytes = nti(buf[pos + 12:pos + 24])
1347 except ValueError:
1348 break
1349 if offset > lastpos:
1350 sp.append(_hole(lastpos, offset - lastpos))
1351 sp.append(_data(offset, numbytes, realpos))
1352 realpos += numbytes
1353 lastpos = offset + numbytes
1354 pos += 24
1355 isextended = ord(buf[504])
1357 if lastpos < origsize:
1358 sp.append(_hole(lastpos, origsize - lastpos))
1360 self.sparse = sp
1362 self.offset_data = tarfile.fileobj.tell()
1363 tarfile.offset = self.offset_data + self._block(self.size)
1364 self.size = origsize
1366 return self
1368 def _proc_pax(self, tarfile):
1369 """Process an extended or global header as described in
1370 POSIX.1-2001.
1372 # Read the header information.
1373 buf = tarfile.fileobj.read(self._block(self.size))
1375 # A pax header stores supplemental information for either
1376 # the following file (extended) or all following files
1377 # (global).
1378 if self.type == XGLTYPE:
1379 pax_headers = tarfile.pax_headers
1380 else:
1381 pax_headers = tarfile.pax_headers.copy()
1383 # Parse pax header information. A record looks like that:
1384 # "%d %s=%s\n" % (length, keyword, value). length is the size
1385 # of the complete record including the length field itself and
1386 # the newline. keyword and value are both UTF-8 encoded strings.
1387 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1388 pos = 0
1389 while True:
1390 match = regex.match(buf, pos)
1391 if not match:
1392 break
1394 length, keyword = match.groups()
1395 length = int(length)
1396 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1398 keyword = keyword.decode("utf8")
1399 value = value.decode("utf8")
1401 pax_headers[keyword] = value
1402 pos += length
1404 # Fetch the next header.
1405 try:
1406 next = self.fromtarfile(tarfile)
1407 except HeaderError:
1408 raise SubsequentHeaderError("missing or bad subsequent header")
1410 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1411 # Patch the TarInfo object with the extended header info.
1412 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1413 next.offset = self.offset
1415 if "size" in pax_headers:
1416 # If the extended header replaces the size field,
1417 # we need to recalculate the offset where the next
1418 # header starts.
1419 offset = next.offset_data
1420 if next.isreg() or next.type not in SUPPORTED_TYPES:
1421 offset += next._block(next.size)
1422 tarfile.offset = offset
1424 return next
1426 def _apply_pax_info(self, pax_headers, encoding, errors):
1427 """Replace fields with supplemental information from a previous
1428 pax extended or global header.
1430 for keyword, value in pax_headers.iteritems():
1431 if keyword not in PAX_FIELDS:
1432 continue
1434 if keyword == "path":
1435 value = value.rstrip("/")
1437 if keyword in PAX_NUMBER_FIELDS:
1438 try:
1439 value = PAX_NUMBER_FIELDS[keyword](value)
1440 except ValueError:
1441 value = 0
1442 else:
1443 value = uts(value, encoding, errors)
1445 setattr(self, keyword, value)
1447 self.pax_headers = pax_headers.copy()
1449 def _block(self, count):
1450 """Round up a byte count by BLOCKSIZE and return it,
1451 e.g. _block(834) => 1024.
1453 blocks, remainder = divmod(count, BLOCKSIZE)
1454 if remainder:
1455 blocks += 1
1456 return blocks * BLOCKSIZE
1458 def isreg(self):
1459 return self.type in REGULAR_TYPES
1460 def isfile(self):
1461 return self.isreg()
1462 def isdir(self):
1463 return self.type == DIRTYPE
1464 def issym(self):
1465 return self.type == SYMTYPE
1466 def islnk(self):
1467 return self.type == LNKTYPE
1468 def ischr(self):
1469 return self.type == CHRTYPE
1470 def isblk(self):
1471 return self.type == BLKTYPE
1472 def isfifo(self):
1473 return self.type == FIFOTYPE
1474 def issparse(self):
1475 return self.type == GNUTYPE_SPARSE
1476 def isdev(self):
1477 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1478 # class TarInfo
1480 class TarFile(object):
1481 """The TarFile Class provides an interface to tar archives.
1484 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1486 dereference = False # If true, add content of linked file to the
1487 # tar file, else the link.
1489 ignore_zeros = False # If true, skips empty or invalid blocks and
1490 # continues processing.
1492 errorlevel = 0 # If 0, fatal errors only appear in debug
1493 # messages (if debug >= 0). If > 0, errors
1494 # are passed to the caller as exceptions.
1496 format = DEFAULT_FORMAT # The format to use when creating an archive.
1498 encoding = ENCODING # Encoding for 8-bit character strings.
1500 errors = None # Error handler for unicode conversion.
1502 tarinfo = TarInfo # The default TarInfo class to use.
1504 fileobject = ExFileObject # The default ExFileObject class to use.
1506 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1507 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1508 errors=None, pax_headers=None, debug=None, errorlevel=None):
1509 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1510 read from an existing archive, 'a' to append data to an existing
1511 file or 'w' to create a new file overwriting an existing one. `mode'
1512 defaults to 'r'.
1513 If `fileobj' is given, it is used for reading or writing data. If it
1514 can be determined, `mode' is overridden by `fileobj's mode.
1515 `fileobj' is not closed, when TarFile is closed.
1517 if len(mode) > 1 or mode not in "raw":
1518 raise ValueError("mode must be 'r', 'a' or 'w'")
1519 self.mode = mode
1520 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1522 if not fileobj:
1523 if self.mode == "a" and not os.path.exists(name):
1524 # Create nonexistent files in append mode.
1525 self.mode = "w"
1526 self._mode = "wb"
1527 fileobj = bltn_open(name, self._mode)
1528 self._extfileobj = False
1529 else:
1530 if name is None and hasattr(fileobj, "name"):
1531 name = fileobj.name
1532 if hasattr(fileobj, "mode"):
1533 self._mode = fileobj.mode
1534 self._extfileobj = True
1535 self.name = os.path.abspath(name) if name else None
1536 self.fileobj = fileobj
1538 # Init attributes.
1539 if format is not None:
1540 self.format = format
1541 if tarinfo is not None:
1542 self.tarinfo = tarinfo
1543 if dereference is not None:
1544 self.dereference = dereference
1545 if ignore_zeros is not None:
1546 self.ignore_zeros = ignore_zeros
1547 if encoding is not None:
1548 self.encoding = encoding
1550 if errors is not None:
1551 self.errors = errors
1552 elif mode == "r":
1553 self.errors = "utf-8"
1554 else:
1555 self.errors = "strict"
1557 if pax_headers is not None and self.format == PAX_FORMAT:
1558 self.pax_headers = pax_headers
1559 else:
1560 self.pax_headers = {}
1562 if debug is not None:
1563 self.debug = debug
1564 if errorlevel is not None:
1565 self.errorlevel = errorlevel
1567 # Init datastructures.
1568 self.closed = False
1569 self.members = [] # list of members as TarInfo objects
1570 self._loaded = False # flag if all members have been read
1571 self.offset = self.fileobj.tell()
1572 # current position in the archive file
1573 self.inodes = {} # dictionary caching the inodes of
1574 # archive members already added
1576 try:
1577 if self.mode == "r":
1578 self.firstmember = None
1579 self.firstmember = self.next()
1581 if self.mode == "a":
1582 # Move to the end of the archive,
1583 # before the first empty block.
1584 while True:
1585 self.fileobj.seek(self.offset)
1586 try:
1587 tarinfo = self.tarinfo.fromtarfile(self)
1588 self.members.append(tarinfo)
1589 except EOFHeaderError:
1590 self.fileobj.seek(self.offset)
1591 break
1592 except HeaderError, e:
1593 raise ReadError(str(e))
1595 if self.mode in "aw":
1596 self._loaded = True
1598 if self.pax_headers:
1599 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1600 self.fileobj.write(buf)
1601 self.offset += len(buf)
1602 except:
1603 if not self._extfileobj:
1604 self.fileobj.close()
1605 self.closed = True
1606 raise
1608 def _getposix(self):
1609 return self.format == USTAR_FORMAT
1610 def _setposix(self, value):
1611 import warnings
1612 warnings.warn("use the format attribute instead", DeprecationWarning,
1614 if value:
1615 self.format = USTAR_FORMAT
1616 else:
1617 self.format = GNU_FORMAT
1618 posix = property(_getposix, _setposix)
1620 #--------------------------------------------------------------------------
1621 # Below are the classmethods which act as alternate constructors to the
1622 # TarFile class. The open() method is the only one that is needed for
1623 # public use; it is the "super"-constructor and is able to select an
1624 # adequate "sub"-constructor for a particular compression using the mapping
1625 # from OPEN_METH.
1627 # This concept allows one to subclass TarFile without losing the comfort of
1628 # the super-constructor. A sub-constructor is registered and made available
1629 # by adding it to the mapping in OPEN_METH.
1631 @classmethod
1632 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1633 """Open a tar archive for reading, writing or appending. Return
1634 an appropriate TarFile class.
1636 mode:
1637 'r' or 'r:*' open for reading with transparent compression
1638 'r:' open for reading exclusively uncompressed
1639 'r:gz' open for reading with gzip compression
1640 'r:bz2' open for reading with bzip2 compression
1641 'a' or 'a:' open for appending, creating the file if necessary
1642 'w' or 'w:' open for writing without compression
1643 'w:gz' open for writing with gzip compression
1644 'w:bz2' open for writing with bzip2 compression
1646 'r|*' open a stream of tar blocks with transparent compression
1647 'r|' open an uncompressed stream of tar blocks for reading
1648 'r|gz' open a gzip compressed stream of tar blocks
1649 'r|bz2' open a bzip2 compressed stream of tar blocks
1650 'w|' open an uncompressed stream for writing
1651 'w|gz' open a gzip compressed stream for writing
1652 'w|bz2' open a bzip2 compressed stream for writing
1655 if not name and not fileobj:
1656 raise ValueError("nothing to open")
1658 if mode in ("r", "r:*"):
1659 # Find out which *open() is appropriate for opening the file.
1660 for comptype in cls.OPEN_METH:
1661 func = getattr(cls, cls.OPEN_METH[comptype])
1662 if fileobj is not None:
1663 saved_pos = fileobj.tell()
1664 try:
1665 return func(name, "r", fileobj, **kwargs)
1666 except (ReadError, CompressionError), e:
1667 if fileobj is not None:
1668 fileobj.seek(saved_pos)
1669 continue
1670 raise ReadError("file could not be opened successfully")
1672 elif ":" in mode:
1673 filemode, comptype = mode.split(":", 1)
1674 filemode = filemode or "r"
1675 comptype = comptype or "tar"
1677 # Select the *open() function according to
1678 # given compression.
1679 if comptype in cls.OPEN_METH:
1680 func = getattr(cls, cls.OPEN_METH[comptype])
1681 else:
1682 raise CompressionError("unknown compression type %r" % comptype)
1683 return func(name, filemode, fileobj, **kwargs)
1685 elif "|" in mode:
1686 filemode, comptype = mode.split("|", 1)
1687 filemode = filemode or "r"
1688 comptype = comptype or "tar"
1690 if filemode not in "rw":
1691 raise ValueError("mode must be 'r' or 'w'")
1693 t = cls(name, filemode,
1694 _Stream(name, filemode, comptype, fileobj, bufsize),
1695 **kwargs)
1696 t._extfileobj = False
1697 return t
1699 elif mode in "aw":
1700 return cls.taropen(name, mode, fileobj, **kwargs)
1702 raise ValueError("undiscernible mode")
1704 @classmethod
1705 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1706 """Open uncompressed tar archive name for reading or writing.
1708 if len(mode) > 1 or mode not in "raw":
1709 raise ValueError("mode must be 'r', 'a' or 'w'")
1710 return cls(name, mode, fileobj, **kwargs)
1712 @classmethod
1713 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1714 """Open gzip compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1717 if len(mode) > 1 or mode not in "rw":
1718 raise ValueError("mode must be 'r' or 'w'")
1720 try:
1721 import gzip
1722 gzip.GzipFile
1723 except (ImportError, AttributeError):
1724 raise CompressionError("gzip module is not available")
1726 if fileobj is None:
1727 fileobj = bltn_open(name, mode + "b")
1729 try:
1730 t = cls.taropen(name, mode,
1731 gzip.GzipFile(name, mode, compresslevel, fileobj),
1732 **kwargs)
1733 except IOError:
1734 raise ReadError("not a gzip file")
1735 t._extfileobj = False
1736 return t
1738 @classmethod
1739 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1740 """Open bzip2 compressed tar archive name for reading or writing.
1741 Appending is not allowed.
1743 if len(mode) > 1 or mode not in "rw":
1744 raise ValueError("mode must be 'r' or 'w'.")
1746 try:
1747 import bz2
1748 except ImportError:
1749 raise CompressionError("bz2 module is not available")
1751 if fileobj is not None:
1752 fileobj = _BZ2Proxy(fileobj, mode)
1753 else:
1754 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1756 try:
1757 t = cls.taropen(name, mode, fileobj, **kwargs)
1758 except (IOError, EOFError):
1759 raise ReadError("not a bzip2 file")
1760 t._extfileobj = False
1761 return t
1763 # All *open() methods are registered here.
1764 OPEN_METH = {
1765 "tar": "taropen", # uncompressed tar
1766 "gz": "gzopen", # gzip compressed tar
1767 "bz2": "bz2open" # bzip2 compressed tar
1770 #--------------------------------------------------------------------------
1771 # The public methods which TarFile provides:
1773 def close(self):
1774 """Close the TarFile. In write-mode, two finishing zero blocks are
1775 appended to the archive.
1777 if self.closed:
1778 return
1780 if self.mode in "aw":
1781 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1782 self.offset += (BLOCKSIZE * 2)
1783 # fill up the end with zero-blocks
1784 # (like option -b20 for tar does)
1785 blocks, remainder = divmod(self.offset, RECORDSIZE)
1786 if remainder > 0:
1787 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1789 if not self._extfileobj:
1790 self.fileobj.close()
1791 self.closed = True
1793 def getmember(self, name):
1794 """Return a TarInfo object for member `name'. If `name' can not be
1795 found in the archive, KeyError is raised. If a member occurs more
1796 than once in the archive, its last occurrence is assumed to be the
1797 most up-to-date version.
1799 tarinfo = self._getmember(name)
1800 if tarinfo is None:
1801 raise KeyError("filename %r not found" % name)
1802 return tarinfo
1804 def getmembers(self):
1805 """Return the members of the archive as a list of TarInfo objects. The
1806 list has the same order as the members in the archive.
1808 self._check()
1809 if not self._loaded: # if we want to obtain a list of
1810 self._load() # all members, we first have to
1811 # scan the whole archive.
1812 return self.members
1814 def getnames(self):
1815 """Return the members of the archive as a list of their names. It has
1816 the same order as the list returned by getmembers().
1818 return [tarinfo.name for tarinfo in self.getmembers()]
1820 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1821 """Create a TarInfo object for either the file `name' or the file
1822 object `fileobj' (using os.fstat on its file descriptor). You can
1823 modify some of the TarInfo's attributes before you add it using
1824 addfile(). If given, `arcname' specifies an alternative name for the
1825 file in the archive.
1827 self._check("aw")
1829 # When fileobj is given, replace name by
1830 # fileobj's real name.
1831 if fileobj is not None:
1832 name = fileobj.name
1834 # Building the name of the member in the archive.
1835 # Backward slashes are converted to forward slashes,
1836 # Absolute paths are turned to relative paths.
1837 if arcname is None:
1838 arcname = name
1839 drv, arcname = os.path.splitdrive(arcname)
1840 arcname = arcname.replace(os.sep, "/")
1841 arcname = arcname.lstrip("/")
1843 # Now, fill the TarInfo object with
1844 # information specific for the file.
1845 tarinfo = self.tarinfo()
1846 tarinfo.tarfile = self
1848 # Use os.stat or os.lstat, depending on platform
1849 # and if symlinks shall be resolved.
1850 if fileobj is None:
1851 if hasattr(os, "lstat") and not self.dereference:
1852 statres = os.lstat(name)
1853 else:
1854 statres = os.stat(name)
1855 else:
1856 statres = os.fstat(fileobj.fileno())
1857 linkname = ""
1859 stmd = statres.st_mode
1860 if stat.S_ISREG(stmd):
1861 inode = (statres.st_ino, statres.st_dev)
1862 if not self.dereference and statres.st_nlink > 1 and \
1863 inode in self.inodes and arcname != self.inodes[inode]:
1864 # Is it a hardlink to an already
1865 # archived file?
1866 type = LNKTYPE
1867 linkname = self.inodes[inode]
1868 else:
1869 # The inode is added only if its valid.
1870 # For win32 it is always 0.
1871 type = REGTYPE
1872 if inode[0]:
1873 self.inodes[inode] = arcname
1874 elif stat.S_ISDIR(stmd):
1875 type = DIRTYPE
1876 elif stat.S_ISFIFO(stmd):
1877 type = FIFOTYPE
1878 elif stat.S_ISLNK(stmd):
1879 type = SYMTYPE
1880 linkname = os.readlink(name)
1881 elif stat.S_ISCHR(stmd):
1882 type = CHRTYPE
1883 elif stat.S_ISBLK(stmd):
1884 type = BLKTYPE
1885 else:
1886 return None
1888 # Fill the TarInfo object with all
1889 # information we can get.
1890 tarinfo.name = arcname
1891 tarinfo.mode = stmd
1892 tarinfo.uid = statres.st_uid
1893 tarinfo.gid = statres.st_gid
1894 if stat.S_ISREG(stmd):
1895 tarinfo.size = statres.st_size
1896 else:
1897 tarinfo.size = 0L
1898 tarinfo.mtime = statres.st_mtime
1899 tarinfo.type = type
1900 tarinfo.linkname = linkname
1901 if pwd:
1902 try:
1903 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1904 except KeyError:
1905 pass
1906 if grp:
1907 try:
1908 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1909 except KeyError:
1910 pass
1912 if type in (CHRTYPE, BLKTYPE):
1913 if hasattr(os, "major") and hasattr(os, "minor"):
1914 tarinfo.devmajor = os.major(statres.st_rdev)
1915 tarinfo.devminor = os.minor(statres.st_rdev)
1916 return tarinfo
1918 def list(self, verbose=True):
1919 """Print a table of contents to sys.stdout. If `verbose' is False, only
1920 the names of the members are printed. If it is True, an `ls -l'-like
1921 output is produced.
1923 self._check()
1925 for tarinfo in self:
1926 if verbose:
1927 print filemode(tarinfo.mode),
1928 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1929 tarinfo.gname or tarinfo.gid),
1930 if tarinfo.ischr() or tarinfo.isblk():
1931 print "%10s" % ("%d,%d" \
1932 % (tarinfo.devmajor, tarinfo.devminor)),
1933 else:
1934 print "%10d" % tarinfo.size,
1935 print "%d-%02d-%02d %02d:%02d:%02d" \
1936 % time.localtime(tarinfo.mtime)[:6],
1938 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1940 if verbose:
1941 if tarinfo.issym():
1942 print "->", tarinfo.linkname,
1943 if tarinfo.islnk():
1944 print "link to", tarinfo.linkname,
1945 print
1947 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1948 """Add the file `name' to the archive. `name' may be any type of file
1949 (directory, fifo, symbolic link, etc.). If given, `arcname'
1950 specifies an alternative name for the file in the archive.
1951 Directories are added recursively by default. This can be avoided by
1952 setting `recursive' to False. `exclude' is a function that should
1953 return True for each filename to be excluded. `filter' is a function
1954 that expects a TarInfo object argument and returns the changed
1955 TarInfo object, if it returns None the TarInfo object will be
1956 excluded from the archive.
1958 self._check("aw")
1960 if arcname is None:
1961 arcname = name
1963 # Exclude pathnames.
1964 if exclude is not None:
1965 import warnings
1966 warnings.warn("use the filter argument instead",
1967 DeprecationWarning, 2)
1968 if exclude(name):
1969 self._dbg(2, "tarfile: Excluded %r" % name)
1970 return
1972 # Skip if somebody tries to archive the archive...
1973 if self.name is not None and os.path.abspath(name) == self.name:
1974 self._dbg(2, "tarfile: Skipped %r" % name)
1975 return
1977 self._dbg(1, name)
1979 # Create a TarInfo object from the file.
1980 tarinfo = self.gettarinfo(name, arcname)
1982 if tarinfo is None:
1983 self._dbg(1, "tarfile: Unsupported type %r" % name)
1984 return
1986 # Change or exclude the TarInfo object.
1987 if filter is not None:
1988 tarinfo = filter(tarinfo)
1989 if tarinfo is None:
1990 self._dbg(2, "tarfile: Excluded %r" % name)
1991 return
1993 # Append the tar header and data to the archive.
1994 if tarinfo.isreg():
1995 f = bltn_open(name, "rb")
1996 self.addfile(tarinfo, f)
1997 f.close()
1999 elif tarinfo.isdir():
2000 self.addfile(tarinfo)
2001 if recursive:
2002 for f in os.listdir(name):
2003 self.add(os.path.join(name, f), os.path.join(arcname, f),
2004 recursive, exclude, filter)
2006 else:
2007 self.addfile(tarinfo)
2009 def addfile(self, tarinfo, fileobj=None):
2010 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2011 given, tarinfo.size bytes are read from it and added to the archive.
2012 You can create TarInfo objects using gettarinfo().
2013 On Windows platforms, `fileobj' should always be opened with mode
2014 'rb' to avoid irritation about the file size.
2016 self._check("aw")
2018 tarinfo = copy.copy(tarinfo)
2020 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2021 self.fileobj.write(buf)
2022 self.offset += len(buf)
2024 # If there's data to follow, append it.
2025 if fileobj is not None:
2026 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2027 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2028 if remainder > 0:
2029 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2030 blocks += 1
2031 self.offset += blocks * BLOCKSIZE
2033 self.members.append(tarinfo)
2035 def extractall(self, path=".", members=None):
2036 """Extract all members from the archive to the current working
2037 directory and set owner, modification time and permissions on
2038 directories afterwards. `path' specifies a different directory
2039 to extract to. `members' is optional and must be a subset of the
2040 list returned by getmembers().
2042 directories = []
2044 if members is None:
2045 members = self
2047 for tarinfo in members:
2048 if tarinfo.isdir():
2049 # Extract directories with a safe mode.
2050 directories.append(tarinfo)
2051 tarinfo = copy.copy(tarinfo)
2052 tarinfo.mode = 0700
2053 self.extract(tarinfo, path)
2055 # Reverse sort directories.
2056 directories.sort(key=operator.attrgetter('name'))
2057 directories.reverse()
2059 # Set correct owner, mtime and filemode on directories.
2060 for tarinfo in directories:
2061 dirpath = os.path.join(path, tarinfo.name)
2062 try:
2063 self.chown(tarinfo, dirpath)
2064 self.utime(tarinfo, dirpath)
2065 self.chmod(tarinfo, dirpath)
2066 except ExtractError, e:
2067 if self.errorlevel > 1:
2068 raise
2069 else:
2070 self._dbg(1, "tarfile: %s" % e)
2072 def extract(self, member, path=""):
2073 """Extract a member from the archive to the current working directory,
2074 using its full name. Its file information is extracted as accurately
2075 as possible. `member' may be a filename or a TarInfo object. You can
2076 specify a different directory using `path'.
2078 self._check("r")
2080 if isinstance(member, basestring):
2081 tarinfo = self.getmember(member)
2082 else:
2083 tarinfo = member
2085 # Prepare the link target for makelink().
2086 if tarinfo.islnk():
2087 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2089 try:
2090 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2091 except EnvironmentError, e:
2092 if self.errorlevel > 0:
2093 raise
2094 else:
2095 if e.filename is None:
2096 self._dbg(1, "tarfile: %s" % e.strerror)
2097 else:
2098 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2099 except ExtractError, e:
2100 if self.errorlevel > 1:
2101 raise
2102 else:
2103 self._dbg(1, "tarfile: %s" % e)
2105 def extractfile(self, member):
2106 """Extract a member from the archive as a file object. `member' may be
2107 a filename or a TarInfo object. If `member' is a regular file, a
2108 file-like object is returned. If `member' is a link, a file-like
2109 object is constructed from the link's target. If `member' is none of
2110 the above, None is returned.
2111 The file-like object is read-only and provides the following
2112 methods: read(), readline(), readlines(), seek() and tell()
2114 self._check("r")
2116 if isinstance(member, basestring):
2117 tarinfo = self.getmember(member)
2118 else:
2119 tarinfo = member
2121 if tarinfo.isreg():
2122 return self.fileobject(self, tarinfo)
2124 elif tarinfo.type not in SUPPORTED_TYPES:
2125 # If a member's type is unknown, it is treated as a
2126 # regular file.
2127 return self.fileobject(self, tarinfo)
2129 elif tarinfo.islnk() or tarinfo.issym():
2130 if isinstance(self.fileobj, _Stream):
2131 # A small but ugly workaround for the case that someone tries
2132 # to extract a (sym)link as a file-object from a non-seekable
2133 # stream of tar blocks.
2134 raise StreamError("cannot extract (sym)link as file object")
2135 else:
2136 # A (sym)link's file object is its target's file object.
2137 return self.extractfile(self._getmember(tarinfo.linkname,
2138 tarinfo))
2139 else:
2140 # If there's no data associated with the member (directory, chrdev,
2141 # blkdev, etc.), return None instead of a file object.
2142 return None
2144 def _extract_member(self, tarinfo, targetpath):
2145 """Extract the TarInfo object tarinfo to a physical
2146 file called targetpath.
2148 # Fetch the TarInfo object for the given name
2149 # and build the destination pathname, replacing
2150 # forward slashes to platform specific separators.
2151 targetpath = targetpath.rstrip("/")
2152 targetpath = targetpath.replace("/", os.sep)
2154 # Create all upper directories.
2155 upperdirs = os.path.dirname(targetpath)
2156 if upperdirs and not os.path.exists(upperdirs):
2157 # Create directories that are not part of the archive with
2158 # default permissions.
2159 os.makedirs(upperdirs)
2161 if tarinfo.islnk() or tarinfo.issym():
2162 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2163 else:
2164 self._dbg(1, tarinfo.name)
2166 if tarinfo.isreg():
2167 self.makefile(tarinfo, targetpath)
2168 elif tarinfo.isdir():
2169 self.makedir(tarinfo, targetpath)
2170 elif tarinfo.isfifo():
2171 self.makefifo(tarinfo, targetpath)
2172 elif tarinfo.ischr() or tarinfo.isblk():
2173 self.makedev(tarinfo, targetpath)
2174 elif tarinfo.islnk() or tarinfo.issym():
2175 self.makelink(tarinfo, targetpath)
2176 elif tarinfo.type not in SUPPORTED_TYPES:
2177 self.makeunknown(tarinfo, targetpath)
2178 else:
2179 self.makefile(tarinfo, targetpath)
2181 self.chown(tarinfo, targetpath)
2182 if not tarinfo.issym():
2183 self.chmod(tarinfo, targetpath)
2184 self.utime(tarinfo, targetpath)
2186 #--------------------------------------------------------------------------
2187 # Below are the different file methods. They are called via
2188 # _extract_member() when extract() is called. They can be replaced in a
2189 # subclass to implement other functionality.
2191 def makedir(self, tarinfo, targetpath):
2192 """Make a directory called targetpath.
2194 try:
2195 # Use a safe mode for the directory, the real mode is set
2196 # later in _extract_member().
2197 os.mkdir(targetpath, 0700)
2198 except EnvironmentError, e:
2199 if e.errno != errno.EEXIST:
2200 raise
2202 def makefile(self, tarinfo, targetpath):
2203 """Make a file called targetpath.
2205 source = self.extractfile(tarinfo)
2206 target = bltn_open(targetpath, "wb")
2207 copyfileobj(source, target)
2208 source.close()
2209 target.close()
2211 def makeunknown(self, tarinfo, targetpath):
2212 """Make a file from a TarInfo object with an unknown type
2213 at targetpath.
2215 self.makefile(tarinfo, targetpath)
2216 self._dbg(1, "tarfile: Unknown file type %r, " \
2217 "extracted as regular file." % tarinfo.type)
2219 def makefifo(self, tarinfo, targetpath):
2220 """Make a fifo called targetpath.
2222 if hasattr(os, "mkfifo"):
2223 os.mkfifo(targetpath)
2224 else:
2225 raise ExtractError("fifo not supported by system")
2227 def makedev(self, tarinfo, targetpath):
2228 """Make a character or block device called targetpath.
2230 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2231 raise ExtractError("special devices not supported by system")
2233 mode = tarinfo.mode
2234 if tarinfo.isblk():
2235 mode |= stat.S_IFBLK
2236 else:
2237 mode |= stat.S_IFCHR
2239 os.mknod(targetpath, mode,
2240 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2242 def makelink(self, tarinfo, targetpath):
2243 """Make a (symbolic) link called targetpath. If it cannot be created
2244 (platform limitation), we try to make a copy of the referenced file
2245 instead of a link.
2247 try:
2248 if tarinfo.issym():
2249 os.symlink(tarinfo.linkname, targetpath)
2250 else:
2251 # See extract().
2252 os.link(tarinfo._link_target, targetpath)
2253 except AttributeError:
2254 if tarinfo.issym():
2255 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2256 tarinfo.linkname
2257 else:
2258 linkpath = tarinfo.linkname
2260 try:
2261 self._extract_member(self.getmember(linkpath), targetpath)
2262 except (EnvironmentError, KeyError), e:
2263 linkpath = linkpath.replace("/", os.sep)
2264 try:
2265 shutil.copy2(linkpath, targetpath)
2266 except EnvironmentError, e:
2267 raise IOError("link could not be created")
2269 def chown(self, tarinfo, targetpath):
2270 """Set owner of targetpath according to tarinfo.
2272 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2273 # We have to be root to do so.
2274 try:
2275 g = grp.getgrnam(tarinfo.gname)[2]
2276 except KeyError:
2277 try:
2278 g = grp.getgrgid(tarinfo.gid)[2]
2279 except KeyError:
2280 g = os.getgid()
2281 try:
2282 u = pwd.getpwnam(tarinfo.uname)[2]
2283 except KeyError:
2284 try:
2285 u = pwd.getpwuid(tarinfo.uid)[2]
2286 except KeyError:
2287 u = os.getuid()
2288 try:
2289 if tarinfo.issym() and hasattr(os, "lchown"):
2290 os.lchown(targetpath, u, g)
2291 else:
2292 if sys.platform != "os2emx":
2293 os.chown(targetpath, u, g)
2294 except EnvironmentError, e:
2295 raise ExtractError("could not change owner")
2297 def chmod(self, tarinfo, targetpath):
2298 """Set file permissions of targetpath according to tarinfo.
2300 if hasattr(os, 'chmod'):
2301 try:
2302 os.chmod(targetpath, tarinfo.mode)
2303 except EnvironmentError, e:
2304 raise ExtractError("could not change mode")
2306 def utime(self, tarinfo, targetpath):
2307 """Set modification time of targetpath according to tarinfo.
2309 if not hasattr(os, 'utime'):
2310 return
2311 try:
2312 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2313 except EnvironmentError, e:
2314 raise ExtractError("could not change modification time")
2316 #--------------------------------------------------------------------------
2317 def next(self):
2318 """Return the next member of the archive as a TarInfo object, when
2319 TarFile is opened for reading. Return None if there is no more
2320 available.
2322 self._check("ra")
2323 if self.firstmember is not None:
2324 m = self.firstmember
2325 self.firstmember = None
2326 return m
2328 # Read the next block.
2329 self.fileobj.seek(self.offset)
2330 tarinfo = None
2331 while True:
2332 try:
2333 tarinfo = self.tarinfo.fromtarfile(self)
2334 except EOFHeaderError, e:
2335 if self.ignore_zeros:
2336 self._dbg(2, "0x%X: %s" % (self.offset, e))
2337 self.offset += BLOCKSIZE
2338 continue
2339 except InvalidHeaderError, e:
2340 if self.ignore_zeros:
2341 self._dbg(2, "0x%X: %s" % (self.offset, e))
2342 self.offset += BLOCKSIZE
2343 continue
2344 elif self.offset == 0:
2345 raise ReadError(str(e))
2346 except EmptyHeaderError:
2347 if self.offset == 0:
2348 raise ReadError("empty file")
2349 except TruncatedHeaderError, e:
2350 if self.offset == 0:
2351 raise ReadError(str(e))
2352 except SubsequentHeaderError, e:
2353 raise ReadError(str(e))
2354 break
2356 if tarinfo is not None:
2357 self.members.append(tarinfo)
2358 else:
2359 self._loaded = True
2361 return tarinfo
2363 #--------------------------------------------------------------------------
2364 # Little helper methods:
2366 def _getmember(self, name, tarinfo=None):
2367 """Find an archive member by name from bottom to top.
2368 If tarinfo is given, it is used as the starting point.
2370 # Ensure that all members have been loaded.
2371 members = self.getmembers()
2373 if tarinfo is None:
2374 end = len(members)
2375 else:
2376 end = members.index(tarinfo)
2378 for i in xrange(end - 1, -1, -1):
2379 if name == members[i].name:
2380 return members[i]
2382 def _load(self):
2383 """Read through the entire archive file and look for readable
2384 members.
2386 while True:
2387 tarinfo = self.next()
2388 if tarinfo is None:
2389 break
2390 self._loaded = True
2392 def _check(self, mode=None):
2393 """Check if TarFile is still open, and if the operation's mode
2394 corresponds to TarFile's mode.
2396 if self.closed:
2397 raise IOError("%s is closed" % self.__class__.__name__)
2398 if mode is not None and self.mode not in mode:
2399 raise IOError("bad operation for mode %r" % self.mode)
2401 def __iter__(self):
2402 """Provide an iterator object.
2404 if self._loaded:
2405 return iter(self.members)
2406 else:
2407 return TarIter(self)
2409 def _dbg(self, level, msg):
2410 """Write debugging output to sys.stderr.
2412 if level <= self.debug:
2413 print >> sys.stderr, msg
2414 # class TarFile
2416 class TarIter:
2417 """Iterator Class.
2419 for tarinfo in TarFile(...):
2420 suite...
2423 def __init__(self, tarfile):
2424 """Construct a TarIter object.
2426 self.tarfile = tarfile
2427 self.index = 0
2428 def __iter__(self):
2429 """Return iterator object.
2431 return self
2432 def next(self):
2433 """Return the next item using TarFile's next() method.
2434 When all members have been read, set TarFile as _loaded.
2436 # Fix for SF #1100429: Under rare circumstances it can
2437 # happen that getmembers() is called during iteration,
2438 # which will cause TarIter to stop prematurely.
2439 if not self.tarfile._loaded:
2440 tarinfo = self.tarfile.next()
2441 if not tarinfo:
2442 self.tarfile._loaded = True
2443 raise StopIteration
2444 else:
2445 try:
2446 tarinfo = self.tarfile.members[self.index]
2447 except IndexError:
2448 raise StopIteration
2449 self.index += 1
2450 return tarinfo
2452 # Helper classes for sparse file support
2453 class _section:
2454 """Base class for _data and _hole.
2456 def __init__(self, offset, size):
2457 self.offset = offset
2458 self.size = size
2459 def __contains__(self, offset):
2460 return self.offset <= offset < self.offset + self.size
2462 class _data(_section):
2463 """Represent a data section in a sparse file.
2465 def __init__(self, offset, size, realpos):
2466 _section.__init__(self, offset, size)
2467 self.realpos = realpos
2469 class _hole(_section):
2470 """Represent a hole section in a sparse file.
2472 pass
2474 class _ringbuffer(list):
2475 """Ringbuffer class which increases performance
2476 over a regular list.
2478 def __init__(self):
2479 self.idx = 0
2480 def find(self, offset):
2481 idx = self.idx
2482 while True:
2483 item = self[idx]
2484 if offset in item:
2485 break
2486 idx += 1
2487 if idx == len(self):
2488 idx = 0
2489 if idx == self.idx:
2490 # End of File
2491 return None
2492 self.idx = idx
2493 return item
2495 #---------------------------------------------
2496 # zipfile compatible TarFile class
2497 #---------------------------------------------
2498 TAR_PLAIN = 0 # zipfile.ZIP_STORED
2499 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2500 class TarFileCompat:
2501 """TarFile class compatible with standard module zipfile's
2502 ZipFile class.
2504 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2505 from warnings import warnpy3k
2506 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2507 stacklevel=2)
2508 if compression == TAR_PLAIN:
2509 self.tarfile = TarFile.taropen(file, mode)
2510 elif compression == TAR_GZIPPED:
2511 self.tarfile = TarFile.gzopen(file, mode)
2512 else:
2513 raise ValueError("unknown compression constant")
2514 if mode[0:1] == "r":
2515 members = self.tarfile.getmembers()
2516 for m in members:
2517 m.filename = m.name
2518 m.file_size = m.size
2519 m.date_time = time.gmtime(m.mtime)[:6]
2520 def namelist(self):
2521 return map(lambda m: m.name, self.infolist())
2522 def infolist(self):
2523 return filter(lambda m: m.type in REGULAR_TYPES,
2524 self.tarfile.getmembers())
2525 def printdir(self):
2526 self.tarfile.list()
2527 def testzip(self):
2528 return
2529 def getinfo(self, name):
2530 return self.tarfile.getmember(name)
2531 def read(self, name):
2532 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2533 def write(self, filename, arcname=None, compress_type=None):
2534 self.tarfile.add(filename, arcname)
2535 def writestr(self, zinfo, bytes):
2536 try:
2537 from cStringIO import StringIO
2538 except ImportError:
2539 from StringIO import StringIO
2540 import calendar
2541 tinfo = TarInfo(zinfo.filename)
2542 tinfo.size = len(bytes)
2543 tinfo.mtime = calendar.timegm(zinfo.date_time)
2544 self.tarfile.addfile(tinfo, StringIO(bytes))
2545 def close(self):
2546 self.tarfile.close()
2547 #class TarFileCompat
2549 #--------------------
2550 # exported functions
2551 #--------------------
2552 def is_tarfile(name):
2553 """Return True if name points to a tar archive that we
2554 are able to handle, else return False.
2556 try:
2557 t = open(name)
2558 t.close()
2559 return True
2560 except TarError:
2561 return False
2563 bltn_open = open
2564 open = TarFile.open