comment out ugly xxx
[python.git] / Lib / tarfile.py
bloba87fe225d17f63c51a9c78b2282b0ca65a84d705
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
4 # tarfile.py
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7 # All rights reserved.
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
16 # conditions:
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
31 """
33 __version__ = "$Revision$"
34 # $Source$
36 version = "0.9.0"
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
38 __date__ = "$Date$"
39 __cvsid__ = "$Id$"
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
42 #---------
43 # Imports
44 #---------
45 import sys
46 import os
47 import shutil
48 import stat
49 import errno
50 import time
51 import struct
52 import copy
53 import re
54 import operator
56 if sys.platform == 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
63 try:
64 import grp, pwd
65 except ImportError:
66 grp = pwd = None
68 # from tarfile import *
69 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
71 #---------------------------------------------------------
72 # tar constants
73 #---------------------------------------------------------
74 NUL = "\0" # the null character
75 BLOCKSIZE = 512 # length of processing blocks
76 RECORDSIZE = BLOCKSIZE * 20 # length of records
77 GNU_MAGIC = "ustar \0" # magic gnu tar string
78 POSIX_MAGIC = "ustar\x0000" # magic posix tar string
80 LENGTH_NAME = 100 # maximum length of a filename
81 LENGTH_LINK = 100 # maximum length of a linkname
82 LENGTH_PREFIX = 155 # maximum length of the prefix field
84 REGTYPE = "0" # regular file
85 AREGTYPE = "\0" # regular file
86 LNKTYPE = "1" # link (inside tarfile)
87 SYMTYPE = "2" # symbolic link
88 CHRTYPE = "3" # character special device
89 BLKTYPE = "4" # block special device
90 DIRTYPE = "5" # directory
91 FIFOTYPE = "6" # fifo special device
92 CONTTYPE = "7" # contiguous file
94 GNUTYPE_LONGNAME = "L" # GNU tar longname
95 GNUTYPE_LONGLINK = "K" # GNU tar longlink
96 GNUTYPE_SPARSE = "S" # GNU tar sparse file
98 XHDTYPE = "x" # POSIX.1-2001 extended header
99 XGLTYPE = "g" # POSIX.1-2001 global header
100 SOLARIS_XHDTYPE = "X" # Solaris extended header
102 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103 GNU_FORMAT = 1 # GNU tar format
104 PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105 DEFAULT_FORMAT = GNU_FORMAT
107 #---------------------------------------------------------
108 # tarfile constants
109 #---------------------------------------------------------
110 # File types that tarfile supports:
111 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112 SYMTYPE, DIRTYPE, FIFOTYPE,
113 CONTTYPE, CHRTYPE, BLKTYPE,
114 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115 GNUTYPE_SPARSE)
117 # File types that will be treated as a regular file.
118 REGULAR_TYPES = (REGTYPE, AREGTYPE,
119 CONTTYPE, GNUTYPE_SPARSE)
121 # File types that are part of the GNU tar format.
122 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123 GNUTYPE_SPARSE)
125 # Fields from a pax header that override a TarInfo attribute.
126 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
129 # Fields in a pax header that are numbers, all other fields
130 # are treated as strings.
131 PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
140 #---------------------------------------------------------
141 # Bits used in the mode field, values in octal.
142 #---------------------------------------------------------
143 S_IFLNK = 0120000 # symbolic link
144 S_IFREG = 0100000 # regular file
145 S_IFBLK = 0060000 # block device
146 S_IFDIR = 0040000 # directory
147 S_IFCHR = 0020000 # character device
148 S_IFIFO = 0010000 # fifo
150 TSUID = 04000 # set UID on execution
151 TSGID = 02000 # set GID on execution
152 TSVTX = 01000 # reserved
154 TUREAD = 0400 # read by owner
155 TUWRITE = 0200 # write by owner
156 TUEXEC = 0100 # execute/search by owner
157 TGREAD = 0040 # read by group
158 TGWRITE = 0020 # write by group
159 TGEXEC = 0010 # execute/search by group
160 TOREAD = 0004 # read by other
161 TOWRITE = 0002 # write by other
162 TOEXEC = 0001 # execute/search by other
164 #---------------------------------------------------------
165 # initialization
166 #---------------------------------------------------------
167 ENCODING = sys.getfilesystemencoding()
168 if ENCODING is None:
169 ENCODING = sys.getdefaultencoding()
171 #---------------------------------------------------------
172 # Some useful functions
173 #---------------------------------------------------------
175 def stn(s, length):
176 """Convert a python string to a null-terminated string buffer.
178 return s[:length] + (length - len(s)) * NUL
180 def nts(s):
181 """Convert a null-terminated string field to a python string.
183 # Use the string up to the first null char.
184 p = s.find("\0")
185 if p == -1:
186 return s
187 return s[:p]
189 def nti(s):
190 """Convert a number field to a python number.
192 # There are two possible encodings for a number field, see
193 # itn() below.
194 if s[0] != chr(0200):
195 try:
196 n = int(nts(s) or "0", 8)
197 except ValueError:
198 raise HeaderError("invalid header")
199 else:
200 n = 0L
201 for i in xrange(len(s) - 1):
202 n <<= 8
203 n += ord(s[i + 1])
204 return n
206 def itn(n, digits=8, format=DEFAULT_FORMAT):
207 """Convert a python number to a number field.
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n < 8 ** (digits - 1):
216 s = "%0*o" % (digits - 1, n) + NUL
217 else:
218 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
219 raise ValueError("overflow in number field")
221 if n < 0:
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n = struct.unpack("L", struct.pack("l", n))[0]
226 s = ""
227 for i in xrange(digits - 1):
228 s = chr(n & 0377) + s
229 n >>= 8
230 s = chr(0200) + s
231 return s
233 def uts(s, encoding, errors):
234 """Convert a unicode object to a string.
236 if errors == "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
240 try:
241 return s.encode(encoding, "strict")
242 except UnicodeEncodeError:
243 x = []
244 for c in s:
245 try:
246 x.append(c.encode(encoding, "strict"))
247 except UnicodeEncodeError:
248 x.append(c.encode("utf8"))
249 return "".join(x)
250 else:
251 return s.encode(encoding, errors)
253 def calc_chksums(buf):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
260 signed.
262 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
263 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
264 return unsigned_chksum, signed_chksum
266 def copyfileobj(src, dst, length=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
270 if length == 0:
271 return
272 if length is None:
273 shutil.copyfileobj(src, dst)
274 return
276 BUFSIZE = 16 * 1024
277 blocks, remainder = divmod(length, BUFSIZE)
278 for b in xrange(blocks):
279 buf = src.read(BUFSIZE)
280 if len(buf) < BUFSIZE:
281 raise IOError("end of file reached")
282 dst.write(buf)
284 if remainder != 0:
285 buf = src.read(remainder)
286 if len(buf) < remainder:
287 raise IOError("end of file reached")
288 dst.write(buf)
289 return
291 filemode_table = (
292 ((S_IFLNK, "l"),
293 (S_IFREG, "-"),
294 (S_IFBLK, "b"),
295 (S_IFDIR, "d"),
296 (S_IFCHR, "c"),
297 (S_IFIFO, "p")),
299 ((TUREAD, "r"),),
300 ((TUWRITE, "w"),),
301 ((TUEXEC|TSUID, "s"),
302 (TSUID, "S"),
303 (TUEXEC, "x")),
305 ((TGREAD, "r"),),
306 ((TGWRITE, "w"),),
307 ((TGEXEC|TSGID, "s"),
308 (TSGID, "S"),
309 (TGEXEC, "x")),
311 ((TOREAD, "r"),),
312 ((TOWRITE, "w"),),
313 ((TOEXEC|TSVTX, "t"),
314 (TSVTX, "T"),
315 (TOEXEC, "x"))
318 def filemode(mode):
319 """Convert a file's mode to a string of the form
320 -rwxrwxrwx.
321 Used by TarFile.list()
323 perm = []
324 for table in filemode_table:
325 for bit, char in table:
326 if mode & bit == bit:
327 perm.append(char)
328 break
329 else:
330 perm.append("-")
331 return "".join(perm)
333 class TarError(Exception):
334 """Base exception."""
335 pass
336 class ExtractError(TarError):
337 """General exception for extract errors."""
338 pass
339 class ReadError(TarError):
340 """Exception for unreadble tar archives."""
341 pass
342 class CompressionError(TarError):
343 """Exception for unavailable compression methods."""
344 pass
345 class StreamError(TarError):
346 """Exception for unsupported operations on stream-like TarFiles."""
347 pass
348 class HeaderError(TarError):
349 """Exception for invalid headers."""
350 pass
352 #---------------------------
353 # internal stream interface
354 #---------------------------
355 class _LowLevelFile:
356 """Low-level file object. Supports reading and writing.
357 It is used instead of a regular file object for streaming
358 access.
361 def __init__(self, name, mode):
362 mode = {
363 "r": os.O_RDONLY,
364 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
365 }[mode]
366 if hasattr(os, "O_BINARY"):
367 mode |= os.O_BINARY
368 self.fd = os.open(name, mode)
370 def close(self):
371 os.close(self.fd)
373 def read(self, size):
374 return os.read(self.fd, size)
376 def write(self, s):
377 os.write(self.fd, s)
379 class _Stream:
380 """Class that serves as an adapter between TarFile and
381 a stream-like object. The stream-like object only
382 needs to have a read() or write() method and is accessed
383 blockwise. Use of gzip or bzip2 compression is possible.
384 A stream-like object could be for example: sys.stdin,
385 sys.stdout, a socket, a tape device etc.
387 _Stream is intended to be used only internally.
390 def __init__(self, name, mode, comptype, fileobj, bufsize):
391 """Construct a _Stream object.
393 self._extfileobj = True
394 if fileobj is None:
395 fileobj = _LowLevelFile(name, mode)
396 self._extfileobj = False
398 if comptype == '*':
399 # Enable transparent compression detection for the
400 # stream interface
401 fileobj = _StreamProxy(fileobj)
402 comptype = fileobj.getcomptype()
404 self.name = name or ""
405 self.mode = mode
406 self.comptype = comptype
407 self.fileobj = fileobj
408 self.bufsize = bufsize
409 self.buf = ""
410 self.pos = 0L
411 self.closed = False
413 if comptype == "gz":
414 try:
415 import zlib
416 except ImportError:
417 raise CompressionError("zlib module is not available")
418 self.zlib = zlib
419 self.crc = zlib.crc32("") & 0xffffffffL
420 if mode == "r":
421 self._init_read_gz()
422 else:
423 self._init_write_gz()
425 if comptype == "bz2":
426 try:
427 import bz2
428 except ImportError:
429 raise CompressionError("bz2 module is not available")
430 if mode == "r":
431 self.dbuf = ""
432 self.cmp = bz2.BZ2Decompressor()
433 else:
434 self.cmp = bz2.BZ2Compressor()
436 def __del__(self):
437 if hasattr(self, "closed") and not self.closed:
438 self.close()
440 def _init_write_gz(self):
441 """Initialize for writing with gzip compression.
443 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
444 -self.zlib.MAX_WBITS,
445 self.zlib.DEF_MEM_LEVEL,
447 timestamp = struct.pack("<L", long(time.time()))
448 self.__write("\037\213\010\010%s\002\377" % timestamp)
449 if self.name.endswith(".gz"):
450 self.name = self.name[:-3]
451 self.__write(self.name + NUL)
453 def write(self, s):
454 """Write string s to the stream.
456 if self.comptype == "gz":
457 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
458 self.pos += len(s)
459 if self.comptype != "tar":
460 s = self.cmp.compress(s)
461 self.__write(s)
463 def __write(self, s):
464 """Write string s to the stream if a whole new block
465 is ready to be written.
467 self.buf += s
468 while len(self.buf) > self.bufsize:
469 self.fileobj.write(self.buf[:self.bufsize])
470 self.buf = self.buf[self.bufsize:]
472 def close(self):
473 """Close the _Stream object. No operation should be
474 done on it afterwards.
476 if self.closed:
477 return
479 if self.mode == "w" and self.comptype != "tar":
480 self.buf += self.cmp.flush()
482 if self.mode == "w" and self.buf:
483 self.fileobj.write(self.buf)
484 self.buf = ""
485 if self.comptype == "gz":
486 # The native zlib crc is an unsigned 32-bit integer, but
487 # the Python wrapper implicitly casts that to a signed C
488 # long. So, on a 32-bit box self.crc may "look negative",
489 # while the same crc on a 64-bit box may "look positive".
490 # To avoid irksome warnings from the `struct` module, force
491 # it to look positive on all boxes.
492 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
493 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
495 if not self._extfileobj:
496 self.fileobj.close()
498 self.closed = True
500 def _init_read_gz(self):
501 """Initialize for reading a gzip compressed fileobj.
503 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
504 self.dbuf = ""
506 # taken from gzip.GzipFile with some alterations
507 if self.__read(2) != "\037\213":
508 raise ReadError("not a gzip file")
509 if self.__read(1) != "\010":
510 raise CompressionError("unsupported compression method")
512 flag = ord(self.__read(1))
513 self.__read(6)
515 if flag & 4:
516 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
517 self.read(xlen)
518 if flag & 8:
519 while True:
520 s = self.__read(1)
521 if not s or s == NUL:
522 break
523 if flag & 16:
524 while True:
525 s = self.__read(1)
526 if not s or s == NUL:
527 break
528 if flag & 2:
529 self.__read(2)
531 def tell(self):
532 """Return the stream's file pointer position.
534 return self.pos
536 def seek(self, pos=0):
537 """Set the stream's file pointer to pos. Negative seeking
538 is forbidden.
540 if pos - self.pos >= 0:
541 blocks, remainder = divmod(pos - self.pos, self.bufsize)
542 for i in xrange(blocks):
543 self.read(self.bufsize)
544 self.read(remainder)
545 else:
546 raise StreamError("seeking backwards is not allowed")
547 return self.pos
549 def read(self, size=None):
550 """Return the next size number of bytes from the stream.
551 If size is not defined, return all bytes of the stream
552 up to EOF.
554 if size is None:
555 t = []
556 while True:
557 buf = self._read(self.bufsize)
558 if not buf:
559 break
560 t.append(buf)
561 buf = "".join(t)
562 else:
563 buf = self._read(size)
564 self.pos += len(buf)
565 return buf
567 def _read(self, size):
568 """Return size bytes from the stream.
570 if self.comptype == "tar":
571 return self.__read(size)
573 c = len(self.dbuf)
574 t = [self.dbuf]
575 while c < size:
576 buf = self.__read(self.bufsize)
577 if not buf:
578 break
579 try:
580 buf = self.cmp.decompress(buf)
581 except IOError:
582 raise ReadError("invalid compressed data")
583 t.append(buf)
584 c += len(buf)
585 t = "".join(t)
586 self.dbuf = t[size:]
587 return t[:size]
589 def __read(self, size):
590 """Return size bytes from stream. If internal buffer is empty,
591 read another block from the stream.
593 c = len(self.buf)
594 t = [self.buf]
595 while c < size:
596 buf = self.fileobj.read(self.bufsize)
597 if not buf:
598 break
599 t.append(buf)
600 c += len(buf)
601 t = "".join(t)
602 self.buf = t[size:]
603 return t[:size]
604 # class _Stream
606 class _StreamProxy(object):
607 """Small proxy class that enables transparent compression
608 detection for the Stream interface (mode 'r|*').
611 def __init__(self, fileobj):
612 self.fileobj = fileobj
613 self.buf = self.fileobj.read(BLOCKSIZE)
615 def read(self, size):
616 self.read = self.fileobj.read
617 return self.buf
619 def getcomptype(self):
620 if self.buf.startswith("\037\213\010"):
621 return "gz"
622 if self.buf.startswith("BZh91"):
623 return "bz2"
624 return "tar"
626 def close(self):
627 self.fileobj.close()
628 # class StreamProxy
630 class _BZ2Proxy(object):
631 """Small proxy class that enables external file object
632 support for "r:bz2" and "w:bz2" modes. This is actually
633 a workaround for a limitation in bz2 module's BZ2File
634 class which (unlike gzip.GzipFile) has no support for
635 a file object argument.
638 blocksize = 16 * 1024
640 def __init__(self, fileobj, mode):
641 self.fileobj = fileobj
642 self.mode = mode
643 self.name = getattr(self.fileobj, "name", None)
644 self.init()
646 def init(self):
647 import bz2
648 self.pos = 0
649 if self.mode == "r":
650 self.bz2obj = bz2.BZ2Decompressor()
651 self.fileobj.seek(0)
652 self.buf = ""
653 else:
654 self.bz2obj = bz2.BZ2Compressor()
656 def read(self, size):
657 b = [self.buf]
658 x = len(self.buf)
659 while x < size:
660 raw = self.fileobj.read(self.blocksize)
661 if not raw:
662 break
663 data = self.bz2obj.decompress(raw)
664 b.append(data)
665 x += len(data)
666 self.buf = "".join(b)
668 buf = self.buf[:size]
669 self.buf = self.buf[size:]
670 self.pos += len(buf)
671 return buf
673 def seek(self, pos):
674 if pos < self.pos:
675 self.init()
676 self.read(pos - self.pos)
678 def tell(self):
679 return self.pos
681 def write(self, data):
682 self.pos += len(data)
683 raw = self.bz2obj.compress(data)
684 self.fileobj.write(raw)
686 def close(self):
687 if self.mode == "w":
688 raw = self.bz2obj.flush()
689 self.fileobj.write(raw)
690 # class _BZ2Proxy
692 #------------------------
693 # Extraction file object
694 #------------------------
695 class _FileInFile(object):
696 """A thin wrapper around an existing file object that
697 provides a part of its data as an individual file
698 object.
701 def __init__(self, fileobj, offset, size, sparse=None):
702 self.fileobj = fileobj
703 self.offset = offset
704 self.size = size
705 self.sparse = sparse
706 self.position = 0
708 def tell(self):
709 """Return the current file position.
711 return self.position
713 def seek(self, position):
714 """Seek to a position in the file.
716 self.position = position
718 def read(self, size=None):
719 """Read data from the file.
721 if size is None:
722 size = self.size - self.position
723 else:
724 size = min(size, self.size - self.position)
726 if self.sparse is None:
727 return self.readnormal(size)
728 else:
729 return self.readsparse(size)
731 def readnormal(self, size):
732 """Read operation for regular files.
734 self.fileobj.seek(self.offset + self.position)
735 self.position += size
736 return self.fileobj.read(size)
738 def readsparse(self, size):
739 """Read operation for sparse files.
741 data = []
742 while size > 0:
743 buf = self.readsparsesection(size)
744 if not buf:
745 break
746 size -= len(buf)
747 data.append(buf)
748 return "".join(data)
750 def readsparsesection(self, size):
751 """Read a single section of a sparse file.
753 section = self.sparse.find(self.position)
755 if section is None:
756 return ""
758 size = min(size, section.offset + section.size - self.position)
760 if isinstance(section, _data):
761 realpos = section.realpos + self.position - section.offset
762 self.fileobj.seek(self.offset + realpos)
763 self.position += size
764 return self.fileobj.read(size)
765 else:
766 self.position += size
767 return NUL * size
768 #class _FileInFile
771 class ExFileObject(object):
772 """File-like object for reading an archive member.
773 Is returned by TarFile.extractfile().
775 blocksize = 1024
777 def __init__(self, tarfile, tarinfo):
778 self.fileobj = _FileInFile(tarfile.fileobj,
779 tarinfo.offset_data,
780 tarinfo.size,
781 getattr(tarinfo, "sparse", None))
782 self.name = tarinfo.name
783 self.mode = "r"
784 self.closed = False
785 self.size = tarinfo.size
787 self.position = 0
788 self.buffer = ""
790 def read(self, size=None):
791 """Read at most size bytes from the file. If size is not
792 present or None, read all data until EOF is reached.
794 if self.closed:
795 raise ValueError("I/O operation on closed file")
797 buf = ""
798 if self.buffer:
799 if size is None:
800 buf = self.buffer
801 self.buffer = ""
802 else:
803 buf = self.buffer[:size]
804 self.buffer = self.buffer[size:]
806 if size is None:
807 buf += self.fileobj.read()
808 else:
809 buf += self.fileobj.read(size - len(buf))
811 self.position += len(buf)
812 return buf
814 def readline(self, size=-1):
815 """Read one entire line from the file. If size is present
816 and non-negative, return a string with at most that
817 size, which may be an incomplete line.
819 if self.closed:
820 raise ValueError("I/O operation on closed file")
822 if "\n" in self.buffer:
823 pos = self.buffer.find("\n") + 1
824 else:
825 buffers = [self.buffer]
826 while True:
827 buf = self.fileobj.read(self.blocksize)
828 buffers.append(buf)
829 if not buf or "\n" in buf:
830 self.buffer = "".join(buffers)
831 pos = self.buffer.find("\n") + 1
832 if pos == 0:
833 # no newline found.
834 pos = len(self.buffer)
835 break
837 if size != -1:
838 pos = min(size, pos)
840 buf = self.buffer[:pos]
841 self.buffer = self.buffer[pos:]
842 self.position += len(buf)
843 return buf
845 def readlines(self):
846 """Return a list with all remaining lines.
848 result = []
849 while True:
850 line = self.readline()
851 if not line: break
852 result.append(line)
853 return result
855 def tell(self):
856 """Return the current file position.
858 if self.closed:
859 raise ValueError("I/O operation on closed file")
861 return self.position
863 def seek(self, pos, whence=os.SEEK_SET):
864 """Seek to a position in the file.
866 if self.closed:
867 raise ValueError("I/O operation on closed file")
869 if whence == os.SEEK_SET:
870 self.position = min(max(pos, 0), self.size)
871 elif whence == os.SEEK_CUR:
872 if pos < 0:
873 self.position = max(self.position + pos, 0)
874 else:
875 self.position = min(self.position + pos, self.size)
876 elif whence == os.SEEK_END:
877 self.position = max(min(self.size + pos, self.size), 0)
878 else:
879 raise ValueError("Invalid argument")
881 self.buffer = ""
882 self.fileobj.seek(self.position)
884 def close(self):
885 """Close the file object.
887 self.closed = True
889 def __iter__(self):
890 """Get an iterator over the file's lines.
892 while True:
893 line = self.readline()
894 if not line:
895 break
896 yield line
897 #class ExFileObject
899 #------------------
900 # Exported Classes
901 #------------------
902 class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
910 def __init__(self, name=""):
911 """Construct a TarInfo object. name is the optional name
912 of the member.
914 self.name = name # member name
915 self.mode = 0644 # file permissions
916 self.uid = 0 # user id
917 self.gid = 0 # group id
918 self.size = 0 # file size
919 self.mtime = 0 # modification time
920 self.chksum = 0 # header checksum
921 self.type = REGTYPE # member type
922 self.linkname = "" # link name
923 self.uname = "root" # user name
924 self.gname = "root" # group name
925 self.devmajor = 0 # device major number
926 self.devminor = 0 # device minor number
928 self.offset = 0 # the tar header starts here
929 self.offset_data = 0 # the file's data starts here
931 self.pax_headers = {} # pax header information
933 # In pax headers the "name" and "linkname" field are called
934 # "path" and "linkpath".
935 def _getpath(self):
936 return self.name
937 def _setpath(self, name):
938 self.name = name
939 path = property(_getpath, _setpath)
941 def _getlinkpath(self):
942 return self.linkname
943 def _setlinkpath(self, linkname):
944 self.linkname = linkname
945 linkpath = property(_getlinkpath, _setlinkpath)
947 def __repr__(self):
948 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
950 def get_info(self, encoding, errors):
951 """Return the TarInfo's attributes as a dictionary.
953 info = {
954 "name": self.name,
955 "mode": self.mode & 07777,
956 "uid": self.uid,
957 "gid": self.gid,
958 "size": self.size,
959 "mtime": self.mtime,
960 "chksum": self.chksum,
961 "type": self.type,
962 "linkname": self.linkname,
963 "uname": self.uname,
964 "gname": self.gname,
965 "devmajor": self.devmajor,
966 "devminor": self.devminor
969 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
970 info["name"] += "/"
972 for key in ("name", "linkname", "uname", "gname"):
973 if type(info[key]) is unicode:
974 info[key] = info[key].encode(encoding, errors)
976 return info
978 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
979 """Return a tar header as a string of 512 byte blocks.
981 info = self.get_info(encoding, errors)
983 if format == USTAR_FORMAT:
984 return self.create_ustar_header(info)
985 elif format == GNU_FORMAT:
986 return self.create_gnu_header(info)
987 elif format == PAX_FORMAT:
988 return self.create_pax_header(info, encoding, errors)
989 else:
990 raise ValueError("invalid format")
992 def create_ustar_header(self, info):
993 """Return the object as a ustar header block.
995 info["magic"] = POSIX_MAGIC
997 if len(info["linkname"]) > LENGTH_LINK:
998 raise ValueError("linkname is too long")
1000 if len(info["name"]) > LENGTH_NAME:
1001 info["prefix"], info["name"] = self._posix_split_name(info["name"])
1003 return self._create_header(info, USTAR_FORMAT)
1005 def create_gnu_header(self, info):
1006 """Return the object as a GNU header block sequence.
1008 info["magic"] = GNU_MAGIC
1010 buf = ""
1011 if len(info["linkname"]) > LENGTH_LINK:
1012 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1014 if len(info["name"]) > LENGTH_NAME:
1015 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1017 return buf + self._create_header(info, GNU_FORMAT)
1019 def create_pax_header(self, info, encoding, errors):
1020 """Return the object as a ustar header block. If it cannot be
1021 represented this way, prepend a pax extended header sequence
1022 with supplement information.
1024 info["magic"] = POSIX_MAGIC
1025 pax_headers = self.pax_headers.copy()
1027 # Test string fields for values that exceed the field length or cannot
1028 # be represented in ASCII encoding.
1029 for name, hname, length in (
1030 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1031 ("uname", "uname", 32), ("gname", "gname", 32)):
1033 if hname in pax_headers:
1034 # The pax header has priority.
1035 continue
1037 val = info[name].decode(encoding, errors)
1039 # Try to encode the string as ASCII.
1040 try:
1041 val.encode("ascii")
1042 except UnicodeEncodeError:
1043 pax_headers[hname] = val
1044 continue
1046 if len(info[name]) > length:
1047 pax_headers[hname] = val
1049 # Test number fields for values that exceed the field limit or values
1050 # that like to be stored as float.
1051 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1052 if name in pax_headers:
1053 # The pax header has priority. Avoid overflow.
1054 info[name] = 0
1055 continue
1057 val = info[name]
1058 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1059 pax_headers[name] = unicode(val)
1060 info[name] = 0
1062 # Create a pax extended header if necessary.
1063 if pax_headers:
1064 buf = self._create_pax_generic_header(pax_headers)
1065 else:
1066 buf = ""
1068 return buf + self._create_header(info, USTAR_FORMAT)
1070 @classmethod
1071 def create_pax_global_header(cls, pax_headers):
1072 """Return the object as a pax global header block sequence.
1074 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1076 def _posix_split_name(self, name):
1077 """Split a name longer than 100 chars into a prefix
1078 and a name part.
1080 prefix = name[:LENGTH_PREFIX + 1]
1081 while prefix and prefix[-1] != "/":
1082 prefix = prefix[:-1]
1084 name = name[len(prefix):]
1085 prefix = prefix[:-1]
1087 if not prefix or len(name) > LENGTH_NAME:
1088 raise ValueError("name is too long")
1089 return prefix, name
1091 @staticmethod
1092 def _create_header(info, format):
1093 """Return a header block. info is a dictionary with file
1094 information, format must be one of the *_FORMAT constants.
1096 parts = [
1097 stn(info.get("name", ""), 100),
1098 itn(info.get("mode", 0) & 07777, 8, format),
1099 itn(info.get("uid", 0), 8, format),
1100 itn(info.get("gid", 0), 8, format),
1101 itn(info.get("size", 0), 12, format),
1102 itn(info.get("mtime", 0), 12, format),
1103 " ", # checksum field
1104 info.get("type", REGTYPE),
1105 stn(info.get("linkname", ""), 100),
1106 stn(info.get("magic", POSIX_MAGIC), 8),
1107 stn(info.get("uname", "root"), 32),
1108 stn(info.get("gname", "root"), 32),
1109 itn(info.get("devmajor", 0), 8, format),
1110 itn(info.get("devminor", 0), 8, format),
1111 stn(info.get("prefix", ""), 155)
1114 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1115 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1116 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1117 return buf
1119 @staticmethod
1120 def _create_payload(payload):
1121 """Return the string payload filled with zero bytes
1122 up to the next 512 byte border.
1124 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1125 if remainder > 0:
1126 payload += (BLOCKSIZE - remainder) * NUL
1127 return payload
1129 @classmethod
1130 def _create_gnu_long_header(cls, name, type):
1131 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1132 for name.
1134 name += NUL
1136 info = {}
1137 info["name"] = "././@LongLink"
1138 info["type"] = type
1139 info["size"] = len(name)
1140 info["magic"] = GNU_MAGIC
1142 # create extended header + name blocks.
1143 return cls._create_header(info, USTAR_FORMAT) + \
1144 cls._create_payload(name)
1146 @classmethod
1147 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1148 """Return a POSIX.1-2001 extended or global header sequence
1149 that contains a list of keyword, value pairs. The values
1150 must be unicode objects.
1152 records = []
1153 for keyword, value in pax_headers.iteritems():
1154 keyword = keyword.encode("utf8")
1155 value = value.encode("utf8")
1156 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1157 n = p = 0
1158 while True:
1159 n = l + len(str(p))
1160 if n == p:
1161 break
1162 p = n
1163 records.append("%d %s=%s\n" % (p, keyword, value))
1164 records = "".join(records)
1166 # We use a hardcoded "././@PaxHeader" name like star does
1167 # instead of the one that POSIX recommends.
1168 info = {}
1169 info["name"] = "././@PaxHeader"
1170 info["type"] = type
1171 info["size"] = len(records)
1172 info["magic"] = POSIX_MAGIC
1174 # Create pax header + record blocks.
1175 return cls._create_header(info, USTAR_FORMAT) + \
1176 cls._create_payload(records)
1178 @classmethod
1179 def frombuf(cls, buf):
1180 """Construct a TarInfo object from a 512 byte string buffer.
1182 if len(buf) != BLOCKSIZE:
1183 raise HeaderError("truncated header")
1184 if buf.count(NUL) == BLOCKSIZE:
1185 raise HeaderError("empty header")
1187 chksum = nti(buf[148:156])
1188 if chksum not in calc_chksums(buf):
1189 raise HeaderError("bad checksum")
1191 obj = cls()
1192 obj.buf = buf
1193 obj.name = nts(buf[0:100])
1194 obj.mode = nti(buf[100:108])
1195 obj.uid = nti(buf[108:116])
1196 obj.gid = nti(buf[116:124])
1197 obj.size = nti(buf[124:136])
1198 obj.mtime = nti(buf[136:148])
1199 obj.chksum = chksum
1200 obj.type = buf[156:157]
1201 obj.linkname = nts(buf[157:257])
1202 obj.uname = nts(buf[265:297])
1203 obj.gname = nts(buf[297:329])
1204 obj.devmajor = nti(buf[329:337])
1205 obj.devminor = nti(buf[337:345])
1206 prefix = nts(buf[345:500])
1208 # Old V7 tar format represents a directory as a regular
1209 # file with a trailing slash.
1210 if obj.type == AREGTYPE and obj.name.endswith("/"):
1211 obj.type = DIRTYPE
1213 # Remove redundant slashes from directories.
1214 if obj.isdir():
1215 obj.name = obj.name.rstrip("/")
1217 # Reconstruct a ustar longname.
1218 if prefix and obj.type not in GNU_TYPES:
1219 obj.name = prefix + "/" + obj.name
1220 return obj
1222 @classmethod
1223 def fromtarfile(cls, tarfile):
1224 """Return the next TarInfo object from TarFile object
1225 tarfile.
1227 buf = tarfile.fileobj.read(BLOCKSIZE)
1228 if not buf:
1229 return
1230 obj = cls.frombuf(buf)
1231 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1232 return obj._proc_member(tarfile)
1234 #--------------------------------------------------------------------------
1235 # The following are methods that are called depending on the type of a
1236 # member. The entry point is _proc_member() which can be overridden in a
1237 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1238 # implement the following
1239 # operations:
1240 # 1. Set self.offset_data to the position where the data blocks begin,
1241 # if there is data that follows.
1242 # 2. Set tarfile.offset to the position where the next member's header will
1243 # begin.
1244 # 3. Return self or another valid TarInfo object.
1245 def _proc_member(self, tarfile):
1246 """Choose the right processing method depending on
1247 the type and call it.
1249 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1250 return self._proc_gnulong(tarfile)
1251 elif self.type == GNUTYPE_SPARSE:
1252 return self._proc_sparse(tarfile)
1253 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1254 return self._proc_pax(tarfile)
1255 else:
1256 return self._proc_builtin(tarfile)
1258 def _proc_builtin(self, tarfile):
1259 """Process a builtin type or an unknown type which
1260 will be treated as a regular file.
1262 self.offset_data = tarfile.fileobj.tell()
1263 offset = self.offset_data
1264 if self.isreg() or self.type not in SUPPORTED_TYPES:
1265 # Skip the following data blocks.
1266 offset += self._block(self.size)
1267 tarfile.offset = offset
1269 # Patch the TarInfo object with saved global
1270 # header information.
1271 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1273 return self
1275 def _proc_gnulong(self, tarfile):
1276 """Process the blocks that hold a GNU longname
1277 or longlink member.
1279 buf = tarfile.fileobj.read(self._block(self.size))
1281 # Fetch the next header and process it.
1282 next = self.fromtarfile(tarfile)
1283 if next is None:
1284 raise HeaderError("missing subsequent header")
1286 # Patch the TarInfo object from the next header with
1287 # the longname information.
1288 next.offset = self.offset
1289 if self.type == GNUTYPE_LONGNAME:
1290 next.name = nts(buf)
1291 elif self.type == GNUTYPE_LONGLINK:
1292 next.linkname = nts(buf)
1294 return next
1296 def _proc_sparse(self, tarfile):
1297 """Process a GNU sparse header plus extra headers.
1299 buf = self.buf
1300 sp = _ringbuffer()
1301 pos = 386
1302 lastpos = 0L
1303 realpos = 0L
1304 # There are 4 possible sparse structs in the
1305 # first header.
1306 for i in xrange(4):
1307 try:
1308 offset = nti(buf[pos:pos + 12])
1309 numbytes = nti(buf[pos + 12:pos + 24])
1310 except ValueError:
1311 break
1312 if offset > lastpos:
1313 sp.append(_hole(lastpos, offset - lastpos))
1314 sp.append(_data(offset, numbytes, realpos))
1315 realpos += numbytes
1316 lastpos = offset + numbytes
1317 pos += 24
1319 isextended = ord(buf[482])
1320 origsize = nti(buf[483:495])
1322 # If the isextended flag is given,
1323 # there are extra headers to process.
1324 while isextended == 1:
1325 buf = tarfile.fileobj.read(BLOCKSIZE)
1326 pos = 0
1327 for i in xrange(21):
1328 try:
1329 offset = nti(buf[pos:pos + 12])
1330 numbytes = nti(buf[pos + 12:pos + 24])
1331 except ValueError:
1332 break
1333 if offset > lastpos:
1334 sp.append(_hole(lastpos, offset - lastpos))
1335 sp.append(_data(offset, numbytes, realpos))
1336 realpos += numbytes
1337 lastpos = offset + numbytes
1338 pos += 24
1339 isextended = ord(buf[504])
1341 if lastpos < origsize:
1342 sp.append(_hole(lastpos, origsize - lastpos))
1344 self.sparse = sp
1346 self.offset_data = tarfile.fileobj.tell()
1347 tarfile.offset = self.offset_data + self._block(self.size)
1348 self.size = origsize
1350 return self
1352 def _proc_pax(self, tarfile):
1353 """Process an extended or global header as described in
1354 POSIX.1-2001.
1356 # Read the header information.
1357 buf = tarfile.fileobj.read(self._block(self.size))
1359 # A pax header stores supplemental information for either
1360 # the following file (extended) or all following files
1361 # (global).
1362 if self.type == XGLTYPE:
1363 pax_headers = tarfile.pax_headers
1364 else:
1365 pax_headers = tarfile.pax_headers.copy()
1367 # Parse pax header information. A record looks like that:
1368 # "%d %s=%s\n" % (length, keyword, value). length is the size
1369 # of the complete record including the length field itself and
1370 # the newline. keyword and value are both UTF-8 encoded strings.
1371 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1372 pos = 0
1373 while True:
1374 match = regex.match(buf, pos)
1375 if not match:
1376 break
1378 length, keyword = match.groups()
1379 length = int(length)
1380 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1382 keyword = keyword.decode("utf8")
1383 value = value.decode("utf8")
1385 pax_headers[keyword] = value
1386 pos += length
1388 # Fetch the next header.
1389 next = self.fromtarfile(tarfile)
1391 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1392 if next is None:
1393 raise HeaderError("missing subsequent header")
1395 # Patch the TarInfo object with the extended header info.
1396 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1397 next.offset = self.offset
1399 if "size" in pax_headers:
1400 # If the extended header replaces the size field,
1401 # we need to recalculate the offset where the next
1402 # header starts.
1403 offset = next.offset_data
1404 if next.isreg() or next.type not in SUPPORTED_TYPES:
1405 offset += next._block(next.size)
1406 tarfile.offset = offset
1408 return next
1410 def _apply_pax_info(self, pax_headers, encoding, errors):
1411 """Replace fields with supplemental information from a previous
1412 pax extended or global header.
1414 for keyword, value in pax_headers.iteritems():
1415 if keyword not in PAX_FIELDS:
1416 continue
1418 if keyword == "path":
1419 value = value.rstrip("/")
1421 if keyword in PAX_NUMBER_FIELDS:
1422 try:
1423 value = PAX_NUMBER_FIELDS[keyword](value)
1424 except ValueError:
1425 value = 0
1426 else:
1427 value = uts(value, encoding, errors)
1429 setattr(self, keyword, value)
1431 self.pax_headers = pax_headers.copy()
1433 def _block(self, count):
1434 """Round up a byte count by BLOCKSIZE and return it,
1435 e.g. _block(834) => 1024.
1437 blocks, remainder = divmod(count, BLOCKSIZE)
1438 if remainder:
1439 blocks += 1
1440 return blocks * BLOCKSIZE
1442 def isreg(self):
1443 return self.type in REGULAR_TYPES
1444 def isfile(self):
1445 return self.isreg()
1446 def isdir(self):
1447 return self.type == DIRTYPE
1448 def issym(self):
1449 return self.type == SYMTYPE
1450 def islnk(self):
1451 return self.type == LNKTYPE
1452 def ischr(self):
1453 return self.type == CHRTYPE
1454 def isblk(self):
1455 return self.type == BLKTYPE
1456 def isfifo(self):
1457 return self.type == FIFOTYPE
1458 def issparse(self):
1459 return self.type == GNUTYPE_SPARSE
1460 def isdev(self):
1461 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1462 # class TarInfo
1464 class TarFile(object):
1465 """The TarFile Class provides an interface to tar archives.
1468 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1470 dereference = False # If true, add content of linked file to the
1471 # tar file, else the link.
1473 ignore_zeros = False # If true, skips empty or invalid blocks and
1474 # continues processing.
1476 errorlevel = 0 # If 0, fatal errors only appear in debug
1477 # messages (if debug >= 0). If > 0, errors
1478 # are passed to the caller as exceptions.
1480 format = DEFAULT_FORMAT # The format to use when creating an archive.
1482 encoding = ENCODING # Encoding for 8-bit character strings.
1484 errors = None # Error handler for unicode conversion.
1486 tarinfo = TarInfo # The default TarInfo class to use.
1488 fileobject = ExFileObject # The default ExFileObject class to use.
1490 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1491 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1492 errors=None, pax_headers=None, debug=None, errorlevel=None):
1493 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1494 read from an existing archive, 'a' to append data to an existing
1495 file or 'w' to create a new file overwriting an existing one. `mode'
1496 defaults to 'r'.
1497 If `fileobj' is given, it is used for reading or writing data. If it
1498 can be determined, `mode' is overridden by `fileobj's mode.
1499 `fileobj' is not closed, when TarFile is closed.
1501 if len(mode) > 1 or mode not in "raw":
1502 raise ValueError("mode must be 'r', 'a' or 'w'")
1503 self.mode = mode
1504 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1506 if not fileobj:
1507 if self.mode == "a" and not os.path.exists(name):
1508 # Create nonexistent files in append mode.
1509 self.mode = "w"
1510 self._mode = "wb"
1511 fileobj = bltn_open(name, self._mode)
1512 self._extfileobj = False
1513 else:
1514 if name is None and hasattr(fileobj, "name"):
1515 name = fileobj.name
1516 if hasattr(fileobj, "mode"):
1517 self._mode = fileobj.mode
1518 self._extfileobj = True
1519 self.name = os.path.abspath(name) if name else None
1520 self.fileobj = fileobj
1522 # Init attributes.
1523 if format is not None:
1524 self.format = format
1525 if tarinfo is not None:
1526 self.tarinfo = tarinfo
1527 if dereference is not None:
1528 self.dereference = dereference
1529 if ignore_zeros is not None:
1530 self.ignore_zeros = ignore_zeros
1531 if encoding is not None:
1532 self.encoding = encoding
1534 if errors is not None:
1535 self.errors = errors
1536 elif mode == "r":
1537 self.errors = "utf-8"
1538 else:
1539 self.errors = "strict"
1541 if pax_headers is not None and self.format == PAX_FORMAT:
1542 self.pax_headers = pax_headers
1543 else:
1544 self.pax_headers = {}
1546 if debug is not None:
1547 self.debug = debug
1548 if errorlevel is not None:
1549 self.errorlevel = errorlevel
1551 # Init datastructures.
1552 self.closed = False
1553 self.members = [] # list of members as TarInfo objects
1554 self._loaded = False # flag if all members have been read
1555 self.offset = self.fileobj.tell()
1556 # current position in the archive file
1557 self.inodes = {} # dictionary caching the inodes of
1558 # archive members already added
1560 if self.mode == "r":
1561 self.firstmember = None
1562 self.firstmember = self.next()
1564 if self.mode == "a":
1565 # Move to the end of the archive,
1566 # before the first empty block.
1567 self.firstmember = None
1568 while True:
1569 if self.next() is None:
1570 if self.offset > 0:
1571 self.fileobj.seek(- BLOCKSIZE, 1)
1572 break
1574 if self.mode in "aw":
1575 self._loaded = True
1577 if self.pax_headers:
1578 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1579 self.fileobj.write(buf)
1580 self.offset += len(buf)
1582 def _getposix(self):
1583 return self.format == USTAR_FORMAT
1584 def _setposix(self, value):
1585 import warnings
1586 warnings.warn("use the format attribute instead", DeprecationWarning,
1588 if value:
1589 self.format = USTAR_FORMAT
1590 else:
1591 self.format = GNU_FORMAT
1592 posix = property(_getposix, _setposix)
1594 #--------------------------------------------------------------------------
1595 # Below are the classmethods which act as alternate constructors to the
1596 # TarFile class. The open() method is the only one that is needed for
1597 # public use; it is the "super"-constructor and is able to select an
1598 # adequate "sub"-constructor for a particular compression using the mapping
1599 # from OPEN_METH.
1601 # This concept allows one to subclass TarFile without losing the comfort of
1602 # the super-constructor. A sub-constructor is registered and made available
1603 # by adding it to the mapping in OPEN_METH.
1605 @classmethod
1606 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1607 """Open a tar archive for reading, writing or appending. Return
1608 an appropriate TarFile class.
1610 mode:
1611 'r' or 'r:*' open for reading with transparent compression
1612 'r:' open for reading exclusively uncompressed
1613 'r:gz' open for reading with gzip compression
1614 'r:bz2' open for reading with bzip2 compression
1615 'a' or 'a:' open for appending, creating the file if necessary
1616 'w' or 'w:' open for writing without compression
1617 'w:gz' open for writing with gzip compression
1618 'w:bz2' open for writing with bzip2 compression
1620 'r|*' open a stream of tar blocks with transparent compression
1621 'r|' open an uncompressed stream of tar blocks for reading
1622 'r|gz' open a gzip compressed stream of tar blocks
1623 'r|bz2' open a bzip2 compressed stream of tar blocks
1624 'w|' open an uncompressed stream for writing
1625 'w|gz' open a gzip compressed stream for writing
1626 'w|bz2' open a bzip2 compressed stream for writing
1629 if not name and not fileobj:
1630 raise ValueError("nothing to open")
1632 if mode in ("r", "r:*"):
1633 # Find out which *open() is appropriate for opening the file.
1634 for comptype in cls.OPEN_METH:
1635 func = getattr(cls, cls.OPEN_METH[comptype])
1636 if fileobj is not None:
1637 saved_pos = fileobj.tell()
1638 try:
1639 return func(name, "r", fileobj, **kwargs)
1640 except (ReadError, CompressionError), e:
1641 if fileobj is not None:
1642 fileobj.seek(saved_pos)
1643 continue
1644 raise ReadError("file could not be opened successfully")
1646 elif ":" in mode:
1647 filemode, comptype = mode.split(":", 1)
1648 filemode = filemode or "r"
1649 comptype = comptype or "tar"
1651 # Select the *open() function according to
1652 # given compression.
1653 if comptype in cls.OPEN_METH:
1654 func = getattr(cls, cls.OPEN_METH[comptype])
1655 else:
1656 raise CompressionError("unknown compression type %r" % comptype)
1657 return func(name, filemode, fileobj, **kwargs)
1659 elif "|" in mode:
1660 filemode, comptype = mode.split("|", 1)
1661 filemode = filemode or "r"
1662 comptype = comptype or "tar"
1664 if filemode not in "rw":
1665 raise ValueError("mode must be 'r' or 'w'")
1667 t = cls(name, filemode,
1668 _Stream(name, filemode, comptype, fileobj, bufsize),
1669 **kwargs)
1670 t._extfileobj = False
1671 return t
1673 elif mode in "aw":
1674 return cls.taropen(name, mode, fileobj, **kwargs)
1676 raise ValueError("undiscernible mode")
1678 @classmethod
1679 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1680 """Open uncompressed tar archive name for reading or writing.
1682 if len(mode) > 1 or mode not in "raw":
1683 raise ValueError("mode must be 'r', 'a' or 'w'")
1684 return cls(name, mode, fileobj, **kwargs)
1686 @classmethod
1687 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1688 """Open gzip compressed tar archive name for reading or writing.
1689 Appending is not allowed.
1691 if len(mode) > 1 or mode not in "rw":
1692 raise ValueError("mode must be 'r' or 'w'")
1694 try:
1695 import gzip
1696 gzip.GzipFile
1697 except (ImportError, AttributeError):
1698 raise CompressionError("gzip module is not available")
1700 if fileobj is None:
1701 fileobj = bltn_open(name, mode + "b")
1703 try:
1704 t = cls.taropen(name, mode,
1705 gzip.GzipFile(name, mode, compresslevel, fileobj),
1706 **kwargs)
1707 except IOError:
1708 raise ReadError("not a gzip file")
1709 t._extfileobj = False
1710 return t
1712 @classmethod
1713 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1714 """Open bzip2 compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1717 if len(mode) > 1 or mode not in "rw":
1718 raise ValueError("mode must be 'r' or 'w'.")
1720 try:
1721 import bz2
1722 except ImportError:
1723 raise CompressionError("bz2 module is not available")
1725 if fileobj is not None:
1726 fileobj = _BZ2Proxy(fileobj, mode)
1727 else:
1728 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1730 try:
1731 t = cls.taropen(name, mode, fileobj, **kwargs)
1732 except IOError:
1733 raise ReadError("not a bzip2 file")
1734 t._extfileobj = False
1735 return t
1737 # All *open() methods are registered here.
1738 OPEN_METH = {
1739 "tar": "taropen", # uncompressed tar
1740 "gz": "gzopen", # gzip compressed tar
1741 "bz2": "bz2open" # bzip2 compressed tar
1744 #--------------------------------------------------------------------------
1745 # The public methods which TarFile provides:
1747 def close(self):
1748 """Close the TarFile. In write-mode, two finishing zero blocks are
1749 appended to the archive.
1751 if self.closed:
1752 return
1754 if self.mode in "aw":
1755 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1756 self.offset += (BLOCKSIZE * 2)
1757 # fill up the end with zero-blocks
1758 # (like option -b20 for tar does)
1759 blocks, remainder = divmod(self.offset, RECORDSIZE)
1760 if remainder > 0:
1761 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1763 if not self._extfileobj:
1764 self.fileobj.close()
1765 self.closed = True
1767 def getmember(self, name):
1768 """Return a TarInfo object for member `name'. If `name' can not be
1769 found in the archive, KeyError is raised. If a member occurs more
1770 than once in the archive, its last occurrence is assumed to be the
1771 most up-to-date version.
1773 tarinfo = self._getmember(name)
1774 if tarinfo is None:
1775 raise KeyError("filename %r not found" % name)
1776 return tarinfo
1778 def getmembers(self):
1779 """Return the members of the archive as a list of TarInfo objects. The
1780 list has the same order as the members in the archive.
1782 self._check()
1783 if not self._loaded: # if we want to obtain a list of
1784 self._load() # all members, we first have to
1785 # scan the whole archive.
1786 return self.members
1788 def getnames(self):
1789 """Return the members of the archive as a list of their names. It has
1790 the same order as the list returned by getmembers().
1792 return [tarinfo.name for tarinfo in self.getmembers()]
1794 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1795 """Create a TarInfo object for either the file `name' or the file
1796 object `fileobj' (using os.fstat on its file descriptor). You can
1797 modify some of the TarInfo's attributes before you add it using
1798 addfile(). If given, `arcname' specifies an alternative name for the
1799 file in the archive.
1801 self._check("aw")
1803 # When fileobj is given, replace name by
1804 # fileobj's real name.
1805 if fileobj is not None:
1806 name = fileobj.name
1808 # Building the name of the member in the archive.
1809 # Backward slashes are converted to forward slashes,
1810 # Absolute paths are turned to relative paths.
1811 if arcname is None:
1812 arcname = name
1813 drv, arcname = os.path.splitdrive(arcname)
1814 arcname = arcname.replace(os.sep, "/")
1815 arcname = arcname.lstrip("/")
1817 # Now, fill the TarInfo object with
1818 # information specific for the file.
1819 tarinfo = self.tarinfo()
1820 tarinfo.tarfile = self
1822 # Use os.stat or os.lstat, depending on platform
1823 # and if symlinks shall be resolved.
1824 if fileobj is None:
1825 if hasattr(os, "lstat") and not self.dereference:
1826 statres = os.lstat(name)
1827 else:
1828 statres = os.stat(name)
1829 else:
1830 statres = os.fstat(fileobj.fileno())
1831 linkname = ""
1833 stmd = statres.st_mode
1834 if stat.S_ISREG(stmd):
1835 inode = (statres.st_ino, statres.st_dev)
1836 if not self.dereference and statres.st_nlink > 1 and \
1837 inode in self.inodes and arcname != self.inodes[inode]:
1838 # Is it a hardlink to an already
1839 # archived file?
1840 type = LNKTYPE
1841 linkname = self.inodes[inode]
1842 else:
1843 # The inode is added only if its valid.
1844 # For win32 it is always 0.
1845 type = REGTYPE
1846 if inode[0]:
1847 self.inodes[inode] = arcname
1848 elif stat.S_ISDIR(stmd):
1849 type = DIRTYPE
1850 elif stat.S_ISFIFO(stmd):
1851 type = FIFOTYPE
1852 elif stat.S_ISLNK(stmd):
1853 type = SYMTYPE
1854 linkname = os.readlink(name)
1855 elif stat.S_ISCHR(stmd):
1856 type = CHRTYPE
1857 elif stat.S_ISBLK(stmd):
1858 type = BLKTYPE
1859 else:
1860 return None
1862 # Fill the TarInfo object with all
1863 # information we can get.
1864 tarinfo.name = arcname
1865 tarinfo.mode = stmd
1866 tarinfo.uid = statres.st_uid
1867 tarinfo.gid = statres.st_gid
1868 if stat.S_ISREG(stmd):
1869 tarinfo.size = statres.st_size
1870 else:
1871 tarinfo.size = 0L
1872 tarinfo.mtime = statres.st_mtime
1873 tarinfo.type = type
1874 tarinfo.linkname = linkname
1875 if pwd:
1876 try:
1877 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1878 except KeyError:
1879 pass
1880 if grp:
1881 try:
1882 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1883 except KeyError:
1884 pass
1886 if type in (CHRTYPE, BLKTYPE):
1887 if hasattr(os, "major") and hasattr(os, "minor"):
1888 tarinfo.devmajor = os.major(statres.st_rdev)
1889 tarinfo.devminor = os.minor(statres.st_rdev)
1890 return tarinfo
1892 def list(self, verbose=True):
1893 """Print a table of contents to sys.stdout. If `verbose' is False, only
1894 the names of the members are printed. If it is True, an `ls -l'-like
1895 output is produced.
1897 self._check()
1899 for tarinfo in self:
1900 if verbose:
1901 print filemode(tarinfo.mode),
1902 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1903 tarinfo.gname or tarinfo.gid),
1904 if tarinfo.ischr() or tarinfo.isblk():
1905 print "%10s" % ("%d,%d" \
1906 % (tarinfo.devmajor, tarinfo.devminor)),
1907 else:
1908 print "%10d" % tarinfo.size,
1909 print "%d-%02d-%02d %02d:%02d:%02d" \
1910 % time.localtime(tarinfo.mtime)[:6],
1912 print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1914 if verbose:
1915 if tarinfo.issym():
1916 print "->", tarinfo.linkname,
1917 if tarinfo.islnk():
1918 print "link to", tarinfo.linkname,
1919 print
1921 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1922 """Add the file `name' to the archive. `name' may be any type of file
1923 (directory, fifo, symbolic link, etc.). If given, `arcname'
1924 specifies an alternative name for the file in the archive.
1925 Directories are added recursively by default. This can be avoided by
1926 setting `recursive' to False. `exclude' is a function that should
1927 return True for each filename to be excluded. `filter' is a function
1928 that expects a TarInfo object argument and returns the changed
1929 TarInfo object, if it returns None the TarInfo object will be
1930 excluded from the archive.
1932 self._check("aw")
1934 if arcname is None:
1935 arcname = name
1937 # Exclude pathnames.
1938 if exclude is not None:
1939 import warnings
1940 warnings.warn("use the filter argument instead",
1941 DeprecationWarning, 2)
1942 if exclude(name):
1943 self._dbg(2, "tarfile: Excluded %r" % name)
1944 return
1946 # Skip if somebody tries to archive the archive...
1947 if self.name is not None and os.path.abspath(name) == self.name:
1948 self._dbg(2, "tarfile: Skipped %r" % name)
1949 return
1951 self._dbg(1, name)
1953 # Create a TarInfo object from the file.
1954 tarinfo = self.gettarinfo(name, arcname)
1956 if tarinfo is None:
1957 self._dbg(1, "tarfile: Unsupported type %r" % name)
1958 return
1960 # Change or exclude the TarInfo object.
1961 if filter is not None:
1962 tarinfo = filter(tarinfo)
1963 if tarinfo is None:
1964 self._dbg(2, "tarfile: Excluded %r" % name)
1965 return
1967 # Append the tar header and data to the archive.
1968 if tarinfo.isreg():
1969 f = bltn_open(name, "rb")
1970 self.addfile(tarinfo, f)
1971 f.close()
1973 elif tarinfo.isdir():
1974 self.addfile(tarinfo)
1975 if recursive:
1976 for f in os.listdir(name):
1977 self.add(os.path.join(name, f), os.path.join(arcname, f),
1978 recursive, exclude, filter)
1980 else:
1981 self.addfile(tarinfo)
1983 def addfile(self, tarinfo, fileobj=None):
1984 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1985 given, tarinfo.size bytes are read from it and added to the archive.
1986 You can create TarInfo objects using gettarinfo().
1987 On Windows platforms, `fileobj' should always be opened with mode
1988 'rb' to avoid irritation about the file size.
1990 self._check("aw")
1992 tarinfo = copy.copy(tarinfo)
1994 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1995 self.fileobj.write(buf)
1996 self.offset += len(buf)
1998 # If there's data to follow, append it.
1999 if fileobj is not None:
2000 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2001 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2002 if remainder > 0:
2003 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2004 blocks += 1
2005 self.offset += blocks * BLOCKSIZE
2007 self.members.append(tarinfo)
2009 def extractall(self, path=".", members=None):
2010 """Extract all members from the archive to the current working
2011 directory and set owner, modification time and permissions on
2012 directories afterwards. `path' specifies a different directory
2013 to extract to. `members' is optional and must be a subset of the
2014 list returned by getmembers().
2016 directories = []
2018 if members is None:
2019 members = self
2021 for tarinfo in members:
2022 if tarinfo.isdir():
2023 # Extract directories with a safe mode.
2024 directories.append(tarinfo)
2025 tarinfo = copy.copy(tarinfo)
2026 tarinfo.mode = 0700
2027 self.extract(tarinfo, path)
2029 # Reverse sort directories.
2030 directories.sort(key=operator.attrgetter('name'))
2031 directories.reverse()
2033 # Set correct owner, mtime and filemode on directories.
2034 for tarinfo in directories:
2035 dirpath = os.path.join(path, tarinfo.name)
2036 try:
2037 self.chown(tarinfo, dirpath)
2038 self.utime(tarinfo, dirpath)
2039 self.chmod(tarinfo, dirpath)
2040 except ExtractError, e:
2041 if self.errorlevel > 1:
2042 raise
2043 else:
2044 self._dbg(1, "tarfile: %s" % e)
2046 def extract(self, member, path=""):
2047 """Extract a member from the archive to the current working directory,
2048 using its full name. Its file information is extracted as accurately
2049 as possible. `member' may be a filename or a TarInfo object. You can
2050 specify a different directory using `path'.
2052 self._check("r")
2054 if isinstance(member, basestring):
2055 tarinfo = self.getmember(member)
2056 else:
2057 tarinfo = member
2059 # Prepare the link target for makelink().
2060 if tarinfo.islnk():
2061 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2063 try:
2064 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2065 except EnvironmentError, e:
2066 if self.errorlevel > 0:
2067 raise
2068 else:
2069 if e.filename is None:
2070 self._dbg(1, "tarfile: %s" % e.strerror)
2071 else:
2072 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2073 except ExtractError, e:
2074 if self.errorlevel > 1:
2075 raise
2076 else:
2077 self._dbg(1, "tarfile: %s" % e)
2079 def extractfile(self, member):
2080 """Extract a member from the archive as a file object. `member' may be
2081 a filename or a TarInfo object. If `member' is a regular file, a
2082 file-like object is returned. If `member' is a link, a file-like
2083 object is constructed from the link's target. If `member' is none of
2084 the above, None is returned.
2085 The file-like object is read-only and provides the following
2086 methods: read(), readline(), readlines(), seek() and tell()
2088 self._check("r")
2090 if isinstance(member, basestring):
2091 tarinfo = self.getmember(member)
2092 else:
2093 tarinfo = member
2095 if tarinfo.isreg():
2096 return self.fileobject(self, tarinfo)
2098 elif tarinfo.type not in SUPPORTED_TYPES:
2099 # If a member's type is unknown, it is treated as a
2100 # regular file.
2101 return self.fileobject(self, tarinfo)
2103 elif tarinfo.islnk() or tarinfo.issym():
2104 if isinstance(self.fileobj, _Stream):
2105 # A small but ugly workaround for the case that someone tries
2106 # to extract a (sym)link as a file-object from a non-seekable
2107 # stream of tar blocks.
2108 raise StreamError("cannot extract (sym)link as file object")
2109 else:
2110 # A (sym)link's file object is its target's file object.
2111 return self.extractfile(self._getmember(tarinfo.linkname,
2112 tarinfo))
2113 else:
2114 # If there's no data associated with the member (directory, chrdev,
2115 # blkdev, etc.), return None instead of a file object.
2116 return None
2118 def _extract_member(self, tarinfo, targetpath):
2119 """Extract the TarInfo object tarinfo to a physical
2120 file called targetpath.
2122 # Fetch the TarInfo object for the given name
2123 # and build the destination pathname, replacing
2124 # forward slashes to platform specific separators.
2125 targetpath = targetpath.rstrip("/")
2126 targetpath = targetpath.replace("/", os.sep)
2128 # Create all upper directories.
2129 upperdirs = os.path.dirname(targetpath)
2130 if upperdirs and not os.path.exists(upperdirs):
2131 # Create directories that are not part of the archive with
2132 # default permissions.
2133 os.makedirs(upperdirs)
2135 if tarinfo.islnk() or tarinfo.issym():
2136 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2137 else:
2138 self._dbg(1, tarinfo.name)
2140 if tarinfo.isreg():
2141 self.makefile(tarinfo, targetpath)
2142 elif tarinfo.isdir():
2143 self.makedir(tarinfo, targetpath)
2144 elif tarinfo.isfifo():
2145 self.makefifo(tarinfo, targetpath)
2146 elif tarinfo.ischr() or tarinfo.isblk():
2147 self.makedev(tarinfo, targetpath)
2148 elif tarinfo.islnk() or tarinfo.issym():
2149 self.makelink(tarinfo, targetpath)
2150 elif tarinfo.type not in SUPPORTED_TYPES:
2151 self.makeunknown(tarinfo, targetpath)
2152 else:
2153 self.makefile(tarinfo, targetpath)
2155 self.chown(tarinfo, targetpath)
2156 if not tarinfo.issym():
2157 self.chmod(tarinfo, targetpath)
2158 self.utime(tarinfo, targetpath)
2160 #--------------------------------------------------------------------------
2161 # Below are the different file methods. They are called via
2162 # _extract_member() when extract() is called. They can be replaced in a
2163 # subclass to implement other functionality.
2165 def makedir(self, tarinfo, targetpath):
2166 """Make a directory called targetpath.
2168 try:
2169 # Use a safe mode for the directory, the real mode is set
2170 # later in _extract_member().
2171 os.mkdir(targetpath, 0700)
2172 except EnvironmentError, e:
2173 if e.errno != errno.EEXIST:
2174 raise
2176 def makefile(self, tarinfo, targetpath):
2177 """Make a file called targetpath.
2179 source = self.extractfile(tarinfo)
2180 target = bltn_open(targetpath, "wb")
2181 copyfileobj(source, target)
2182 source.close()
2183 target.close()
2185 def makeunknown(self, tarinfo, targetpath):
2186 """Make a file from a TarInfo object with an unknown type
2187 at targetpath.
2189 self.makefile(tarinfo, targetpath)
2190 self._dbg(1, "tarfile: Unknown file type %r, " \
2191 "extracted as regular file." % tarinfo.type)
2193 def makefifo(self, tarinfo, targetpath):
2194 """Make a fifo called targetpath.
2196 if hasattr(os, "mkfifo"):
2197 os.mkfifo(targetpath)
2198 else:
2199 raise ExtractError("fifo not supported by system")
2201 def makedev(self, tarinfo, targetpath):
2202 """Make a character or block device called targetpath.
2204 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2205 raise ExtractError("special devices not supported by system")
2207 mode = tarinfo.mode
2208 if tarinfo.isblk():
2209 mode |= stat.S_IFBLK
2210 else:
2211 mode |= stat.S_IFCHR
2213 os.mknod(targetpath, mode,
2214 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2216 def makelink(self, tarinfo, targetpath):
2217 """Make a (symbolic) link called targetpath. If it cannot be created
2218 (platform limitation), we try to make a copy of the referenced file
2219 instead of a link.
2221 try:
2222 if tarinfo.issym():
2223 os.symlink(tarinfo.linkname, targetpath)
2224 else:
2225 # See extract().
2226 os.link(tarinfo._link_target, targetpath)
2227 except AttributeError:
2228 if tarinfo.issym():
2229 linkpath = os.path.dirname(tarinfo.name) + "/" + \
2230 tarinfo.linkname
2231 else:
2232 linkpath = tarinfo.linkname
2234 try:
2235 self._extract_member(self.getmember(linkpath), targetpath)
2236 except (EnvironmentError, KeyError), e:
2237 linkpath = linkpath.replace("/", os.sep)
2238 try:
2239 shutil.copy2(linkpath, targetpath)
2240 except EnvironmentError, e:
2241 raise IOError("link could not be created")
2243 def chown(self, tarinfo, targetpath):
2244 """Set owner of targetpath according to tarinfo.
2246 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2247 # We have to be root to do so.
2248 try:
2249 g = grp.getgrnam(tarinfo.gname)[2]
2250 except KeyError:
2251 try:
2252 g = grp.getgrgid(tarinfo.gid)[2]
2253 except KeyError:
2254 g = os.getgid()
2255 try:
2256 u = pwd.getpwnam(tarinfo.uname)[2]
2257 except KeyError:
2258 try:
2259 u = pwd.getpwuid(tarinfo.uid)[2]
2260 except KeyError:
2261 u = os.getuid()
2262 try:
2263 if tarinfo.issym() and hasattr(os, "lchown"):
2264 os.lchown(targetpath, u, g)
2265 else:
2266 if sys.platform != "os2emx":
2267 os.chown(targetpath, u, g)
2268 except EnvironmentError, e:
2269 raise ExtractError("could not change owner")
2271 def chmod(self, tarinfo, targetpath):
2272 """Set file permissions of targetpath according to tarinfo.
2274 if hasattr(os, 'chmod'):
2275 try:
2276 os.chmod(targetpath, tarinfo.mode)
2277 except EnvironmentError, e:
2278 raise ExtractError("could not change mode")
2280 def utime(self, tarinfo, targetpath):
2281 """Set modification time of targetpath according to tarinfo.
2283 if not hasattr(os, 'utime'):
2284 return
2285 try:
2286 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2287 except EnvironmentError, e:
2288 raise ExtractError("could not change modification time")
2290 #--------------------------------------------------------------------------
2291 def next(self):
2292 """Return the next member of the archive as a TarInfo object, when
2293 TarFile is opened for reading. Return None if there is no more
2294 available.
2296 self._check("ra")
2297 if self.firstmember is not None:
2298 m = self.firstmember
2299 self.firstmember = None
2300 return m
2302 # Read the next block.
2303 self.fileobj.seek(self.offset)
2304 while True:
2305 try:
2306 tarinfo = self.tarinfo.fromtarfile(self)
2307 if tarinfo is None:
2308 return
2309 self.members.append(tarinfo)
2311 except HeaderError, e:
2312 if self.ignore_zeros:
2313 self._dbg(2, "0x%X: %s" % (self.offset, e))
2314 self.offset += BLOCKSIZE
2315 continue
2316 else:
2317 if self.offset == 0:
2318 raise ReadError(str(e))
2319 return None
2320 break
2322 return tarinfo
2324 #--------------------------------------------------------------------------
2325 # Little helper methods:
2327 def _getmember(self, name, tarinfo=None):
2328 """Find an archive member by name from bottom to top.
2329 If tarinfo is given, it is used as the starting point.
2331 # Ensure that all members have been loaded.
2332 members = self.getmembers()
2334 if tarinfo is None:
2335 end = len(members)
2336 else:
2337 end = members.index(tarinfo)
2339 for i in xrange(end - 1, -1, -1):
2340 if name == members[i].name:
2341 return members[i]
2343 def _load(self):
2344 """Read through the entire archive file and look for readable
2345 members.
2347 while True:
2348 tarinfo = self.next()
2349 if tarinfo is None:
2350 break
2351 self._loaded = True
2353 def _check(self, mode=None):
2354 """Check if TarFile is still open, and if the operation's mode
2355 corresponds to TarFile's mode.
2357 if self.closed:
2358 raise IOError("%s is closed" % self.__class__.__name__)
2359 if mode is not None and self.mode not in mode:
2360 raise IOError("bad operation for mode %r" % self.mode)
2362 def __iter__(self):
2363 """Provide an iterator object.
2365 if self._loaded:
2366 return iter(self.members)
2367 else:
2368 return TarIter(self)
2370 def _dbg(self, level, msg):
2371 """Write debugging output to sys.stderr.
2373 if level <= self.debug:
2374 print >> sys.stderr, msg
2375 # class TarFile
2377 class TarIter:
2378 """Iterator Class.
2380 for tarinfo in TarFile(...):
2381 suite...
2384 def __init__(self, tarfile):
2385 """Construct a TarIter object.
2387 self.tarfile = tarfile
2388 self.index = 0
2389 def __iter__(self):
2390 """Return iterator object.
2392 return self
2393 def next(self):
2394 """Return the next item using TarFile's next() method.
2395 When all members have been read, set TarFile as _loaded.
2397 # Fix for SF #1100429: Under rare circumstances it can
2398 # happen that getmembers() is called during iteration,
2399 # which will cause TarIter to stop prematurely.
2400 if not self.tarfile._loaded:
2401 tarinfo = self.tarfile.next()
2402 if not tarinfo:
2403 self.tarfile._loaded = True
2404 raise StopIteration
2405 else:
2406 try:
2407 tarinfo = self.tarfile.members[self.index]
2408 except IndexError:
2409 raise StopIteration
2410 self.index += 1
2411 return tarinfo
2413 # Helper classes for sparse file support
2414 class _section:
2415 """Base class for _data and _hole.
2417 def __init__(self, offset, size):
2418 self.offset = offset
2419 self.size = size
2420 def __contains__(self, offset):
2421 return self.offset <= offset < self.offset + self.size
2423 class _data(_section):
2424 """Represent a data section in a sparse file.
2426 def __init__(self, offset, size, realpos):
2427 _section.__init__(self, offset, size)
2428 self.realpos = realpos
2430 class _hole(_section):
2431 """Represent a hole section in a sparse file.
2433 pass
2435 class _ringbuffer(list):
2436 """Ringbuffer class which increases performance
2437 over a regular list.
2439 def __init__(self):
2440 self.idx = 0
2441 def find(self, offset):
2442 idx = self.idx
2443 while True:
2444 item = self[idx]
2445 if offset in item:
2446 break
2447 idx += 1
2448 if idx == len(self):
2449 idx = 0
2450 if idx == self.idx:
2451 # End of File
2452 return None
2453 self.idx = idx
2454 return item
2456 #---------------------------------------------
2457 # zipfile compatible TarFile class
2458 #---------------------------------------------
2459 TAR_PLAIN = 0 # zipfile.ZIP_STORED
2460 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2461 class TarFileCompat:
2462 """TarFile class compatible with standard module zipfile's
2463 ZipFile class.
2465 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2466 from warnings import warnpy3k
2467 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2468 stacklevel=2)
2469 if compression == TAR_PLAIN:
2470 self.tarfile = TarFile.taropen(file, mode)
2471 elif compression == TAR_GZIPPED:
2472 self.tarfile = TarFile.gzopen(file, mode)
2473 else:
2474 raise ValueError("unknown compression constant")
2475 if mode[0:1] == "r":
2476 members = self.tarfile.getmembers()
2477 for m in members:
2478 m.filename = m.name
2479 m.file_size = m.size
2480 m.date_time = time.gmtime(m.mtime)[:6]
2481 def namelist(self):
2482 return map(lambda m: m.name, self.infolist())
2483 def infolist(self):
2484 return filter(lambda m: m.type in REGULAR_TYPES,
2485 self.tarfile.getmembers())
2486 def printdir(self):
2487 self.tarfile.list()
2488 def testzip(self):
2489 return
2490 def getinfo(self, name):
2491 return self.tarfile.getmember(name)
2492 def read(self, name):
2493 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2494 def write(self, filename, arcname=None, compress_type=None):
2495 self.tarfile.add(filename, arcname)
2496 def writestr(self, zinfo, bytes):
2497 try:
2498 from cStringIO import StringIO
2499 except ImportError:
2500 from StringIO import StringIO
2501 import calendar
2502 tinfo = TarInfo(zinfo.filename)
2503 tinfo.size = len(bytes)
2504 tinfo.mtime = calendar.timegm(zinfo.date_time)
2505 self.tarfile.addfile(tinfo, StringIO(bytes))
2506 def close(self):
2507 self.tarfile.close()
2508 #class TarFileCompat
2510 #--------------------
2511 # exported functions
2512 #--------------------
2513 def is_tarfile(name):
2514 """Return True if name points to a tar archive that we
2515 are able to handle, else return False.
2517 try:
2518 t = open(name)
2519 t.close()
2520 return True
2521 except TarError:
2522 return False
2524 bltn_open = open
2525 open = TarFile.open