Patch by Jeremy Katz (SF #1609407)
[python.git] / Lib / tarfile.py
blob1b8f1408a79c8aa772bb3008ae1a271328cefb24
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
4 # tarfile.py
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7 # All rights reserved.
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
16 # conditions:
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
31 """
33 __version__ = "$Revision$"
34 # $Source$
36 version = "0.8.0"
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
38 __date__ = "$Date$"
39 __cvsid__ = "$Id$"
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
42 #---------
43 # Imports
44 #---------
45 import sys
46 import os
47 import shutil
48 import stat
49 import errno
50 import time
51 import struct
52 import copy
54 if sys.platform == 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
61 try:
62 import grp, pwd
63 except ImportError:
64 grp = pwd = None
66 # from tarfile import *
67 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69 #---------------------------------------------------------
70 # tar constants
71 #---------------------------------------------------------
72 NUL = "\0" # the null character
73 BLOCKSIZE = 512 # length of processing blocks
74 RECORDSIZE = BLOCKSIZE * 20 # length of records
75 MAGIC = "ustar" # magic tar string
76 VERSION = "00" # version number
78 LENGTH_NAME = 100 # maximum length of a filename
79 LENGTH_LINK = 100 # maximum length of a linkname
80 LENGTH_PREFIX = 155 # maximum length of the prefix field
81 MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
83 REGTYPE = "0" # regular file
84 AREGTYPE = "\0" # regular file
85 LNKTYPE = "1" # link (inside tarfile)
86 SYMTYPE = "2" # symbolic link
87 CHRTYPE = "3" # character special device
88 BLKTYPE = "4" # block special device
89 DIRTYPE = "5" # directory
90 FIFOTYPE = "6" # fifo special device
91 CONTTYPE = "7" # contiguous file
93 GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
94 GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
95 GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
97 #---------------------------------------------------------
98 # tarfile constants
99 #---------------------------------------------------------
100 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
102 CONTTYPE, CHRTYPE, BLKTYPE,
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
104 GNUTYPE_SPARSE)
106 REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
109 #---------------------------------------------------------
110 # Bits used in the mode field, values in octal.
111 #---------------------------------------------------------
112 S_IFLNK = 0120000 # symbolic link
113 S_IFREG = 0100000 # regular file
114 S_IFBLK = 0060000 # block device
115 S_IFDIR = 0040000 # directory
116 S_IFCHR = 0020000 # character device
117 S_IFIFO = 0010000 # fifo
119 TSUID = 04000 # set UID on execution
120 TSGID = 02000 # set GID on execution
121 TSVTX = 01000 # reserved
123 TUREAD = 0400 # read by owner
124 TUWRITE = 0200 # write by owner
125 TUEXEC = 0100 # execute/search by owner
126 TGREAD = 0040 # read by group
127 TGWRITE = 0020 # write by group
128 TGEXEC = 0010 # execute/search by group
129 TOREAD = 0004 # read by other
130 TOWRITE = 0002 # write by other
131 TOEXEC = 0001 # execute/search by other
133 #---------------------------------------------------------
134 # Some useful functions
135 #---------------------------------------------------------
137 def stn(s, length):
138 """Convert a python string to a null-terminated string buffer.
140 return s[:length] + (length - len(s)) * NUL
142 def nti(s):
143 """Convert a number field to a python number.
145 # There are two possible encodings for a number field, see
146 # itn() below.
147 if s[0] != chr(0200):
148 n = int(s.rstrip(NUL + " ") or "0", 8)
149 else:
150 n = 0L
151 for i in xrange(len(s) - 1):
152 n <<= 8
153 n += ord(s[i + 1])
154 return n
156 def itn(n, digits=8, posix=False):
157 """Convert a python number to a number field.
159 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
160 # octal digits followed by a null-byte, this allows values up to
161 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
162 # that if necessary. A leading 0200 byte indicates this particular
163 # encoding, the following digits-1 bytes are a big-endian
164 # representation. This allows values up to (256**(digits-1))-1.
165 if 0 <= n < 8 ** (digits - 1):
166 s = "%0*o" % (digits - 1, n) + NUL
167 else:
168 if posix:
169 raise ValueError("overflow in number field")
171 if n < 0:
172 # XXX We mimic GNU tar's behaviour with negative numbers,
173 # this could raise OverflowError.
174 n = struct.unpack("L", struct.pack("l", n))[0]
176 s = ""
177 for i in xrange(digits - 1):
178 s = chr(n & 0377) + s
179 n >>= 8
180 s = chr(0200) + s
181 return s
183 def calc_chksums(buf):
184 """Calculate the checksum for a member's header by summing up all
185 characters except for the chksum field which is treated as if
186 it was filled with spaces. According to the GNU tar sources,
187 some tars (Sun and NeXT) calculate chksum with signed char,
188 which will be different if there are chars in the buffer with
189 the high bit set. So we calculate two checksums, unsigned and
190 signed.
192 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
193 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
194 return unsigned_chksum, signed_chksum
196 def copyfileobj(src, dst, length=None):
197 """Copy length bytes from fileobj src to fileobj dst.
198 If length is None, copy the entire content.
200 if length == 0:
201 return
202 if length is None:
203 shutil.copyfileobj(src, dst)
204 return
206 BUFSIZE = 16 * 1024
207 blocks, remainder = divmod(length, BUFSIZE)
208 for b in xrange(blocks):
209 buf = src.read(BUFSIZE)
210 if len(buf) < BUFSIZE:
211 raise IOError("end of file reached")
212 dst.write(buf)
214 if remainder != 0:
215 buf = src.read(remainder)
216 if len(buf) < remainder:
217 raise IOError("end of file reached")
218 dst.write(buf)
219 return
221 filemode_table = (
222 ((S_IFLNK, "l"),
223 (S_IFREG, "-"),
224 (S_IFBLK, "b"),
225 (S_IFDIR, "d"),
226 (S_IFCHR, "c"),
227 (S_IFIFO, "p")),
229 ((TUREAD, "r"),),
230 ((TUWRITE, "w"),),
231 ((TUEXEC|TSUID, "s"),
232 (TSUID, "S"),
233 (TUEXEC, "x")),
235 ((TGREAD, "r"),),
236 ((TGWRITE, "w"),),
237 ((TGEXEC|TSGID, "s"),
238 (TSGID, "S"),
239 (TGEXEC, "x")),
241 ((TOREAD, "r"),),
242 ((TOWRITE, "w"),),
243 ((TOEXEC|TSVTX, "t"),
244 (TSVTX, "T"),
245 (TOEXEC, "x"))
248 def filemode(mode):
249 """Convert a file's mode to a string of the form
250 -rwxrwxrwx.
251 Used by TarFile.list()
253 perm = []
254 for table in filemode_table:
255 for bit, char in table:
256 if mode & bit == bit:
257 perm.append(char)
258 break
259 else:
260 perm.append("-")
261 return "".join(perm)
263 if os.sep != "/":
264 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
265 else:
266 normpath = os.path.normpath
268 class TarError(Exception):
269 """Base exception."""
270 pass
271 class ExtractError(TarError):
272 """General exception for extract errors."""
273 pass
274 class ReadError(TarError):
275 """Exception for unreadble tar archives."""
276 pass
277 class CompressionError(TarError):
278 """Exception for unavailable compression methods."""
279 pass
280 class StreamError(TarError):
281 """Exception for unsupported operations on stream-like TarFiles."""
282 pass
284 #---------------------------
285 # internal stream interface
286 #---------------------------
287 class _LowLevelFile:
288 """Low-level file object. Supports reading and writing.
289 It is used instead of a regular file object for streaming
290 access.
293 def __init__(self, name, mode):
294 mode = {
295 "r": os.O_RDONLY,
296 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
297 }[mode]
298 if hasattr(os, "O_BINARY"):
299 mode |= os.O_BINARY
300 self.fd = os.open(name, mode)
302 def close(self):
303 os.close(self.fd)
305 def read(self, size):
306 return os.read(self.fd, size)
308 def write(self, s):
309 os.write(self.fd, s)
311 class _Stream:
312 """Class that serves as an adapter between TarFile and
313 a stream-like object. The stream-like object only
314 needs to have a read() or write() method and is accessed
315 blockwise. Use of gzip or bzip2 compression is possible.
316 A stream-like object could be for example: sys.stdin,
317 sys.stdout, a socket, a tape device etc.
319 _Stream is intended to be used only internally.
322 def __init__(self, name, mode, comptype, fileobj, bufsize):
323 """Construct a _Stream object.
325 self._extfileobj = True
326 if fileobj is None:
327 fileobj = _LowLevelFile(name, mode)
328 self._extfileobj = False
330 if comptype == '*':
331 # Enable transparent compression detection for the
332 # stream interface
333 fileobj = _StreamProxy(fileobj)
334 comptype = fileobj.getcomptype()
336 self.name = name or ""
337 self.mode = mode
338 self.comptype = comptype
339 self.fileobj = fileobj
340 self.bufsize = bufsize
341 self.buf = ""
342 self.pos = 0L
343 self.closed = False
345 if comptype == "gz":
346 try:
347 import zlib
348 except ImportError:
349 raise CompressionError("zlib module is not available")
350 self.zlib = zlib
351 self.crc = zlib.crc32("")
352 if mode == "r":
353 self._init_read_gz()
354 else:
355 self._init_write_gz()
357 if comptype == "bz2":
358 try:
359 import bz2
360 except ImportError:
361 raise CompressionError("bz2 module is not available")
362 if mode == "r":
363 self.dbuf = ""
364 self.cmp = bz2.BZ2Decompressor()
365 else:
366 self.cmp = bz2.BZ2Compressor()
368 def __del__(self):
369 if hasattr(self, "closed") and not self.closed:
370 self.close()
372 def _init_write_gz(self):
373 """Initialize for writing with gzip compression.
375 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
376 -self.zlib.MAX_WBITS,
377 self.zlib.DEF_MEM_LEVEL,
379 timestamp = struct.pack("<L", long(time.time()))
380 self.__write("\037\213\010\010%s\002\377" % timestamp)
381 if self.name.endswith(".gz"):
382 self.name = self.name[:-3]
383 self.__write(self.name + NUL)
385 def write(self, s):
386 """Write string s to the stream.
388 if self.comptype == "gz":
389 self.crc = self.zlib.crc32(s, self.crc)
390 self.pos += len(s)
391 if self.comptype != "tar":
392 s = self.cmp.compress(s)
393 self.__write(s)
395 def __write(self, s):
396 """Write string s to the stream if a whole new block
397 is ready to be written.
399 self.buf += s
400 while len(self.buf) > self.bufsize:
401 self.fileobj.write(self.buf[:self.bufsize])
402 self.buf = self.buf[self.bufsize:]
404 def close(self):
405 """Close the _Stream object. No operation should be
406 done on it afterwards.
408 if self.closed:
409 return
411 if self.mode == "w" and self.comptype != "tar":
412 self.buf += self.cmp.flush()
414 if self.mode == "w" and self.buf:
415 self.fileobj.write(self.buf)
416 self.buf = ""
417 if self.comptype == "gz":
418 # The native zlib crc is an unsigned 32-bit integer, but
419 # the Python wrapper implicitly casts that to a signed C
420 # long. So, on a 32-bit box self.crc may "look negative",
421 # while the same crc on a 64-bit box may "look positive".
422 # To avoid irksome warnings from the `struct` module, force
423 # it to look positive on all boxes.
424 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
425 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
427 if not self._extfileobj:
428 self.fileobj.close()
430 self.closed = True
432 def _init_read_gz(self):
433 """Initialize for reading a gzip compressed fileobj.
435 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
436 self.dbuf = ""
438 # taken from gzip.GzipFile with some alterations
439 if self.__read(2) != "\037\213":
440 raise ReadError("not a gzip file")
441 if self.__read(1) != "\010":
442 raise CompressionError("unsupported compression method")
444 flag = ord(self.__read(1))
445 self.__read(6)
447 if flag & 4:
448 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
449 self.read(xlen)
450 if flag & 8:
451 while True:
452 s = self.__read(1)
453 if not s or s == NUL:
454 break
455 if flag & 16:
456 while True:
457 s = self.__read(1)
458 if not s or s == NUL:
459 break
460 if flag & 2:
461 self.__read(2)
463 def tell(self):
464 """Return the stream's file pointer position.
466 return self.pos
468 def seek(self, pos=0):
469 """Set the stream's file pointer to pos. Negative seeking
470 is forbidden.
472 if pos - self.pos >= 0:
473 blocks, remainder = divmod(pos - self.pos, self.bufsize)
474 for i in xrange(blocks):
475 self.read(self.bufsize)
476 self.read(remainder)
477 else:
478 raise StreamError("seeking backwards is not allowed")
479 return self.pos
481 def read(self, size=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
484 up to EOF.
486 if size is None:
487 t = []
488 while True:
489 buf = self._read(self.bufsize)
490 if not buf:
491 break
492 t.append(buf)
493 buf = "".join(t)
494 else:
495 buf = self._read(size)
496 self.pos += len(buf)
497 return buf
499 def _read(self, size):
500 """Return size bytes from the stream.
502 if self.comptype == "tar":
503 return self.__read(size)
505 c = len(self.dbuf)
506 t = [self.dbuf]
507 while c < size:
508 buf = self.__read(self.bufsize)
509 if not buf:
510 break
511 buf = self.cmp.decompress(buf)
512 t.append(buf)
513 c += len(buf)
514 t = "".join(t)
515 self.dbuf = t[size:]
516 return t[:size]
518 def __read(self, size):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
522 c = len(self.buf)
523 t = [self.buf]
524 while c < size:
525 buf = self.fileobj.read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 c += len(buf)
530 t = "".join(t)
531 self.buf = t[size:]
532 return t[:size]
533 # class _Stream
535 class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
540 def __init__(self, fileobj):
541 self.fileobj = fileobj
542 self.buf = self.fileobj.read(BLOCKSIZE)
544 def read(self, size):
545 self.read = self.fileobj.read
546 return self.buf
548 def getcomptype(self):
549 if self.buf.startswith("\037\213\010"):
550 return "gz"
551 if self.buf.startswith("BZh91"):
552 return "bz2"
553 return "tar"
555 def close(self):
556 self.fileobj.close()
557 # class StreamProxy
559 class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
567 blocksize = 16 * 1024
569 def __init__(self, fileobj, mode):
570 self.fileobj = fileobj
571 self.mode = mode
572 self.init()
574 def init(self):
575 import bz2
576 self.pos = 0
577 if self.mode == "r":
578 self.bz2obj = bz2.BZ2Decompressor()
579 self.fileobj.seek(0)
580 self.buf = ""
581 else:
582 self.bz2obj = bz2.BZ2Compressor()
584 def read(self, size):
585 b = [self.buf]
586 x = len(self.buf)
587 while x < size:
588 try:
589 raw = self.fileobj.read(self.blocksize)
590 data = self.bz2obj.decompress(raw)
591 b.append(data)
592 except EOFError:
593 break
594 x += len(data)
595 self.buf = "".join(b)
597 buf = self.buf[:size]
598 self.buf = self.buf[size:]
599 self.pos += len(buf)
600 return buf
602 def seek(self, pos):
603 if pos < self.pos:
604 self.init()
605 self.read(pos - self.pos)
607 def tell(self):
608 return self.pos
610 def write(self, data):
611 self.pos += len(data)
612 raw = self.bz2obj.compress(data)
613 self.fileobj.write(raw)
615 def close(self):
616 if self.mode == "w":
617 raw = self.bz2obj.flush()
618 self.fileobj.write(raw)
619 self.fileobj.close()
620 # class _BZ2Proxy
622 #------------------------
623 # Extraction file object
624 #------------------------
625 class ExFileObject(object):
626 """File-like object for reading an archive member.
627 Is returned by TarFile.extractfile(). Support for
628 sparse files included.
631 def __init__(self, tarfile, tarinfo):
632 self.fileobj = tarfile.fileobj
633 self.name = tarinfo.name
634 self.mode = "r"
635 self.closed = False
636 self.offset = tarinfo.offset_data
637 self.size = tarinfo.size
638 self.pos = 0L
639 self.linebuffer = ""
640 if tarinfo.issparse():
641 self.sparse = tarinfo.sparse
642 self.read = self._readsparse
643 else:
644 self.read = self._readnormal
646 def __read(self, size):
647 """Overloadable read method.
649 return self.fileobj.read(size)
651 def readline(self, size=-1):
652 """Read a line with approx. size. If size is negative,
653 read a whole line. readline() and read() must not
654 be mixed up (!).
656 if size < 0:
657 size = sys.maxint
659 nl = self.linebuffer.find("\n")
660 if nl >= 0:
661 nl = min(nl, size)
662 else:
663 size -= len(self.linebuffer)
664 while (nl < 0 and size > 0):
665 buf = self.read(min(size, 100))
666 if not buf:
667 break
668 self.linebuffer += buf
669 size -= len(buf)
670 nl = self.linebuffer.find("\n")
671 if nl == -1:
672 s = self.linebuffer
673 self.linebuffer = ""
674 return s
675 buf = self.linebuffer[:nl]
676 self.linebuffer = self.linebuffer[nl + 1:]
677 while buf[-1:] == "\r":
678 buf = buf[:-1]
679 return buf + "\n"
681 def readlines(self):
682 """Return a list with all (following) lines.
684 result = []
685 while True:
686 line = self.readline()
687 if not line: break
688 result.append(line)
689 return result
691 def _readnormal(self, size=None):
692 """Read operation for regular files.
694 if self.closed:
695 raise ValueError("file is closed")
696 self.fileobj.seek(self.offset + self.pos)
697 bytesleft = self.size - self.pos
698 if size is None:
699 bytestoread = bytesleft
700 else:
701 bytestoread = min(size, bytesleft)
702 self.pos += bytestoread
703 return self.__read(bytestoread)
705 def _readsparse(self, size=None):
706 """Read operation for sparse files.
708 if self.closed:
709 raise ValueError("file is closed")
711 if size is None:
712 size = self.size - self.pos
714 data = []
715 while size > 0:
716 buf = self._readsparsesection(size)
717 if not buf:
718 break
719 size -= len(buf)
720 data.append(buf)
721 return "".join(data)
723 def _readsparsesection(self, size):
724 """Read a single section of a sparse file.
726 section = self.sparse.find(self.pos)
728 if section is None:
729 return ""
731 toread = min(size, section.offset + section.size - self.pos)
732 if isinstance(section, _data):
733 realpos = section.realpos + self.pos - section.offset
734 self.pos += toread
735 self.fileobj.seek(self.offset + realpos)
736 return self.__read(toread)
737 else:
738 self.pos += toread
739 return NUL * toread
741 def tell(self):
742 """Return the current file position.
744 return self.pos
746 def seek(self, pos, whence=0):
747 """Seek to a position in the file.
749 self.linebuffer = ""
750 if whence == 0:
751 self.pos = min(max(pos, 0), self.size)
752 if whence == 1:
753 if pos < 0:
754 self.pos = max(self.pos + pos, 0)
755 else:
756 self.pos = min(self.pos + pos, self.size)
757 if whence == 2:
758 self.pos = max(min(self.size + pos, self.size), 0)
760 def close(self):
761 """Close the file object.
763 self.closed = True
765 def __iter__(self):
766 """Get an iterator over the file object.
768 if self.closed:
769 raise ValueError("I/O operation on closed file")
770 return self
772 def next(self):
773 """Get the next item from the file iterator.
775 result = self.readline()
776 if not result:
777 raise StopIteration
778 return result
780 #class ExFileObject
782 #------------------
783 # Exported Classes
784 #------------------
785 class TarInfo(object):
786 """Informational class which holds the details about an
787 archive member given by a tar header block.
788 TarInfo objects are returned by TarFile.getmember(),
789 TarFile.getmembers() and TarFile.gettarinfo() and are
790 usually created internally.
793 def __init__(self, name=""):
794 """Construct a TarInfo object. name is the optional name
795 of the member.
797 self.name = name # member name (dirnames must end with '/')
798 self.mode = 0666 # file permissions
799 self.uid = 0 # user id
800 self.gid = 0 # group id
801 self.size = 0 # file size
802 self.mtime = 0 # modification time
803 self.chksum = 0 # header checksum
804 self.type = REGTYPE # member type
805 self.linkname = "" # link name
806 self.uname = "user" # user name
807 self.gname = "group" # group name
808 self.devmajor = 0 # device major number
809 self.devminor = 0 # device minor number
811 self.offset = 0 # the tar header starts here
812 self.offset_data = 0 # the file's data starts here
814 def __repr__(self):
815 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
817 @classmethod
818 def frombuf(cls, buf):
819 """Construct a TarInfo object from a 512 byte string buffer.
821 if len(buf) != BLOCKSIZE:
822 raise ValueError("truncated header")
823 if buf.count(NUL) == BLOCKSIZE:
824 raise ValueError("empty header")
826 tarinfo = cls()
827 tarinfo.buf = buf
828 tarinfo.name = buf[0:100].rstrip(NUL)
829 tarinfo.mode = nti(buf[100:108])
830 tarinfo.uid = nti(buf[108:116])
831 tarinfo.gid = nti(buf[116:124])
832 tarinfo.size = nti(buf[124:136])
833 tarinfo.mtime = nti(buf[136:148])
834 tarinfo.chksum = nti(buf[148:156])
835 tarinfo.type = buf[156:157]
836 tarinfo.linkname = buf[157:257].rstrip(NUL)
837 tarinfo.uname = buf[265:297].rstrip(NUL)
838 tarinfo.gname = buf[297:329].rstrip(NUL)
839 tarinfo.devmajor = nti(buf[329:337])
840 tarinfo.devminor = nti(buf[337:345])
841 prefix = buf[345:500].rstrip(NUL)
843 if prefix and not tarinfo.issparse():
844 tarinfo.name = prefix + "/" + tarinfo.name
846 if tarinfo.chksum not in calc_chksums(buf):
847 raise ValueError("invalid header")
848 return tarinfo
850 def tobuf(self, posix=False):
851 """Return a tar header as a string of 512 byte blocks.
853 buf = ""
854 type = self.type
855 prefix = ""
857 if self.name.endswith("/"):
858 type = DIRTYPE
860 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
861 # Prevent "././@LongLink" from being normalized.
862 name = self.name
863 else:
864 name = normpath(self.name)
866 if type == DIRTYPE:
867 # directories should end with '/'
868 name += "/"
870 linkname = self.linkname
871 if linkname:
872 # if linkname is empty we end up with a '.'
873 linkname = normpath(linkname)
875 if posix:
876 if self.size > MAXSIZE_MEMBER:
877 raise ValueError("file is too large (>= 8 GB)")
879 if len(self.linkname) > LENGTH_LINK:
880 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
882 if len(name) > LENGTH_NAME:
883 prefix = name[:LENGTH_PREFIX + 1]
884 while prefix and prefix[-1] != "/":
885 prefix = prefix[:-1]
887 name = name[len(prefix):]
888 prefix = prefix[:-1]
890 if not prefix or len(name) > LENGTH_NAME:
891 raise ValueError("name is too long")
893 else:
894 if len(self.linkname) > LENGTH_LINK:
895 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
897 if len(name) > LENGTH_NAME:
898 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
900 parts = [
901 stn(name, 100),
902 itn(self.mode & 07777, 8, posix),
903 itn(self.uid, 8, posix),
904 itn(self.gid, 8, posix),
905 itn(self.size, 12, posix),
906 itn(self.mtime, 12, posix),
907 " ", # checksum field
908 type,
909 stn(self.linkname, 100),
910 stn(MAGIC, 6),
911 stn(VERSION, 2),
912 stn(self.uname, 32),
913 stn(self.gname, 32),
914 itn(self.devmajor, 8, posix),
915 itn(self.devminor, 8, posix),
916 stn(prefix, 155)
919 buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
920 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
921 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
922 self.buf = buf
923 return buf
925 def _create_gnulong(self, name, type):
926 """Create a GNU longname/longlink header from name.
927 It consists of an extended tar header, with the length
928 of the longname as size, followed by data blocks,
929 which contain the longname as a null terminated string.
931 name += NUL
933 tarinfo = self.__class__()
934 tarinfo.name = "././@LongLink"
935 tarinfo.type = type
936 tarinfo.mode = 0
937 tarinfo.size = len(name)
939 # create extended header
940 buf = tarinfo.tobuf()
941 # create name blocks
942 buf += name
943 blocks, remainder = divmod(len(name), BLOCKSIZE)
944 if remainder > 0:
945 buf += (BLOCKSIZE - remainder) * NUL
946 return buf
948 def isreg(self):
949 return self.type in REGULAR_TYPES
950 def isfile(self):
951 return self.isreg()
952 def isdir(self):
953 return self.type == DIRTYPE
954 def issym(self):
955 return self.type == SYMTYPE
956 def islnk(self):
957 return self.type == LNKTYPE
958 def ischr(self):
959 return self.type == CHRTYPE
960 def isblk(self):
961 return self.type == BLKTYPE
962 def isfifo(self):
963 return self.type == FIFOTYPE
964 def issparse(self):
965 return self.type == GNUTYPE_SPARSE
966 def isdev(self):
967 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
968 # class TarInfo
970 class TarFile(object):
971 """The TarFile Class provides an interface to tar archives.
974 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
976 dereference = False # If true, add content of linked file to the
977 # tar file, else the link.
979 ignore_zeros = False # If true, skips empty or invalid blocks and
980 # continues processing.
982 errorlevel = 0 # If 0, fatal errors only appear in debug
983 # messages (if debug >= 0). If > 0, errors
984 # are passed to the caller as exceptions.
986 posix = False # If True, generates POSIX.1-1990-compliant
987 # archives (no GNU extensions!)
989 fileobject = ExFileObject
991 def __init__(self, name=None, mode="r", fileobj=None):
992 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
993 read from an existing archive, 'a' to append data to an existing
994 file or 'w' to create a new file overwriting an existing one. `mode'
995 defaults to 'r'.
996 If `fileobj' is given, it is used for reading or writing data. If it
997 can be determined, `mode' is overridden by `fileobj's mode.
998 `fileobj' is not closed, when TarFile is closed.
1000 self.name = name
1002 if len(mode) > 1 or mode not in "raw":
1003 raise ValueError("mode must be 'r', 'a' or 'w'")
1004 self._mode = mode
1005 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1007 if not fileobj:
1008 fileobj = file(self.name, self.mode)
1009 self._extfileobj = False
1010 else:
1011 if self.name is None and hasattr(fileobj, "name"):
1012 self.name = fileobj.name
1013 if hasattr(fileobj, "mode"):
1014 self.mode = fileobj.mode
1015 self._extfileobj = True
1016 self.fileobj = fileobj
1018 # Init datastructures
1019 self.closed = False
1020 self.members = [] # list of members as TarInfo objects
1021 self._loaded = False # flag if all members have been read
1022 self.offset = 0L # current position in the archive file
1023 self.inodes = {} # dictionary caching the inodes of
1024 # archive members already added
1026 if self._mode == "r":
1027 self.firstmember = None
1028 self.firstmember = self.next()
1030 if self._mode == "a":
1031 # Move to the end of the archive,
1032 # before the first empty block.
1033 self.firstmember = None
1034 while True:
1035 try:
1036 tarinfo = self.next()
1037 except ReadError:
1038 self.fileobj.seek(0)
1039 break
1040 if tarinfo is None:
1041 self.fileobj.seek(- BLOCKSIZE, 1)
1042 break
1044 if self._mode in "aw":
1045 self._loaded = True
1047 #--------------------------------------------------------------------------
1048 # Below are the classmethods which act as alternate constructors to the
1049 # TarFile class. The open() method is the only one that is needed for
1050 # public use; it is the "super"-constructor and is able to select an
1051 # adequate "sub"-constructor for a particular compression using the mapping
1052 # from OPEN_METH.
1054 # This concept allows one to subclass TarFile without losing the comfort of
1055 # the super-constructor. A sub-constructor is registered and made available
1056 # by adding it to the mapping in OPEN_METH.
1058 @classmethod
1059 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
1060 """Open a tar archive for reading, writing or appending. Return
1061 an appropriate TarFile class.
1063 mode:
1064 'r' or 'r:*' open for reading with transparent compression
1065 'r:' open for reading exclusively uncompressed
1066 'r:gz' open for reading with gzip compression
1067 'r:bz2' open for reading with bzip2 compression
1068 'a' or 'a:' open for appending
1069 'w' or 'w:' open for writing without compression
1070 'w:gz' open for writing with gzip compression
1071 'w:bz2' open for writing with bzip2 compression
1073 'r|*' open a stream of tar blocks with transparent compression
1074 'r|' open an uncompressed stream of tar blocks for reading
1075 'r|gz' open a gzip compressed stream of tar blocks
1076 'r|bz2' open a bzip2 compressed stream of tar blocks
1077 'w|' open an uncompressed stream for writing
1078 'w|gz' open a gzip compressed stream for writing
1079 'w|bz2' open a bzip2 compressed stream for writing
1082 if not name and not fileobj:
1083 raise ValueError("nothing to open")
1085 if mode in ("r", "r:*"):
1086 # Find out which *open() is appropriate for opening the file.
1087 for comptype in cls.OPEN_METH:
1088 func = getattr(cls, cls.OPEN_METH[comptype])
1089 try:
1090 return func(name, "r", fileobj)
1091 except (ReadError, CompressionError):
1092 continue
1093 raise ReadError("file could not be opened successfully")
1095 elif ":" in mode:
1096 filemode, comptype = mode.split(":", 1)
1097 filemode = filemode or "r"
1098 comptype = comptype or "tar"
1100 # Select the *open() function according to
1101 # given compression.
1102 if comptype in cls.OPEN_METH:
1103 func = getattr(cls, cls.OPEN_METH[comptype])
1104 else:
1105 raise CompressionError("unknown compression type %r" % comptype)
1106 return func(name, filemode, fileobj)
1108 elif "|" in mode:
1109 filemode, comptype = mode.split("|", 1)
1110 filemode = filemode or "r"
1111 comptype = comptype or "tar"
1113 if filemode not in "rw":
1114 raise ValueError("mode must be 'r' or 'w'")
1116 t = cls(name, filemode,
1117 _Stream(name, filemode, comptype, fileobj, bufsize))
1118 t._extfileobj = False
1119 return t
1121 elif mode in "aw":
1122 return cls.taropen(name, mode, fileobj)
1124 raise ValueError("undiscernible mode")
1126 @classmethod
1127 def taropen(cls, name, mode="r", fileobj=None):
1128 """Open uncompressed tar archive name for reading or writing.
1130 if len(mode) > 1 or mode not in "raw":
1131 raise ValueError("mode must be 'r', 'a' or 'w'")
1132 return cls(name, mode, fileobj)
1134 @classmethod
1135 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1136 """Open gzip compressed tar archive name for reading or writing.
1137 Appending is not allowed.
1139 if len(mode) > 1 or mode not in "rw":
1140 raise ValueError("mode must be 'r' or 'w'")
1142 try:
1143 import gzip
1144 gzip.GzipFile
1145 except (ImportError, AttributeError):
1146 raise CompressionError("gzip module is not available")
1148 pre, ext = os.path.splitext(name)
1149 pre = os.path.basename(pre)
1150 if ext == ".tgz":
1151 ext = ".tar"
1152 if ext == ".gz":
1153 ext = ""
1154 tarname = pre + ext
1156 if fileobj is None:
1157 fileobj = file(name, mode + "b")
1159 if mode != "r":
1160 name = tarname
1162 try:
1163 t = cls.taropen(tarname, mode,
1164 gzip.GzipFile(name, mode, compresslevel, fileobj)
1166 except IOError:
1167 raise ReadError("not a gzip file")
1168 t._extfileobj = False
1169 return t
1171 @classmethod
1172 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1173 """Open bzip2 compressed tar archive name for reading or writing.
1174 Appending is not allowed.
1176 if len(mode) > 1 or mode not in "rw":
1177 raise ValueError("mode must be 'r' or 'w'.")
1179 try:
1180 import bz2
1181 except ImportError:
1182 raise CompressionError("bz2 module is not available")
1184 pre, ext = os.path.splitext(name)
1185 pre = os.path.basename(pre)
1186 if ext == ".tbz2":
1187 ext = ".tar"
1188 if ext == ".bz2":
1189 ext = ""
1190 tarname = pre + ext
1192 if fileobj is not None:
1193 fileobj = _BZ2Proxy(fileobj, mode)
1194 else:
1195 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1197 try:
1198 t = cls.taropen(tarname, mode, fileobj)
1199 except IOError:
1200 raise ReadError("not a bzip2 file")
1201 t._extfileobj = False
1202 return t
1204 # All *open() methods are registered here.
1205 OPEN_METH = {
1206 "tar": "taropen", # uncompressed tar
1207 "gz": "gzopen", # gzip compressed tar
1208 "bz2": "bz2open" # bzip2 compressed tar
1211 #--------------------------------------------------------------------------
1212 # The public methods which TarFile provides:
1214 def close(self):
1215 """Close the TarFile. In write-mode, two finishing zero blocks are
1216 appended to the archive.
1218 if self.closed:
1219 return
1221 if self._mode in "aw":
1222 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1223 self.offset += (BLOCKSIZE * 2)
1224 # fill up the end with zero-blocks
1225 # (like option -b20 for tar does)
1226 blocks, remainder = divmod(self.offset, RECORDSIZE)
1227 if remainder > 0:
1228 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1230 if not self._extfileobj:
1231 self.fileobj.close()
1232 self.closed = True
1234 def getmember(self, name):
1235 """Return a TarInfo object for member `name'. If `name' can not be
1236 found in the archive, KeyError is raised. If a member occurs more
1237 than once in the archive, its last occurence is assumed to be the
1238 most up-to-date version.
1240 tarinfo = self._getmember(name)
1241 if tarinfo is None:
1242 raise KeyError("filename %r not found" % name)
1243 return tarinfo
1245 def getmembers(self):
1246 """Return the members of the archive as a list of TarInfo objects. The
1247 list has the same order as the members in the archive.
1249 self._check()
1250 if not self._loaded: # if we want to obtain a list of
1251 self._load() # all members, we first have to
1252 # scan the whole archive.
1253 return self.members
1255 def getnames(self):
1256 """Return the members of the archive as a list of their names. It has
1257 the same order as the list returned by getmembers().
1259 return [tarinfo.name for tarinfo in self.getmembers()]
1261 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1262 """Create a TarInfo object for either the file `name' or the file
1263 object `fileobj' (using os.fstat on its file descriptor). You can
1264 modify some of the TarInfo's attributes before you add it using
1265 addfile(). If given, `arcname' specifies an alternative name for the
1266 file in the archive.
1268 self._check("aw")
1270 # When fileobj is given, replace name by
1271 # fileobj's real name.
1272 if fileobj is not None:
1273 name = fileobj.name
1275 # Building the name of the member in the archive.
1276 # Backward slashes are converted to forward slashes,
1277 # Absolute paths are turned to relative paths.
1278 if arcname is None:
1279 arcname = name
1280 arcname = normpath(arcname)
1281 drv, arcname = os.path.splitdrive(arcname)
1282 while arcname[0:1] == "/":
1283 arcname = arcname[1:]
1285 # Now, fill the TarInfo object with
1286 # information specific for the file.
1287 tarinfo = TarInfo()
1289 # Use os.stat or os.lstat, depending on platform
1290 # and if symlinks shall be resolved.
1291 if fileobj is None:
1292 if hasattr(os, "lstat") and not self.dereference:
1293 statres = os.lstat(name)
1294 else:
1295 statres = os.stat(name)
1296 else:
1297 statres = os.fstat(fileobj.fileno())
1298 linkname = ""
1300 stmd = statres.st_mode
1301 if stat.S_ISREG(stmd):
1302 inode = (statres.st_ino, statres.st_dev)
1303 if not self.dereference and \
1304 statres.st_nlink > 1 and inode in self.inodes:
1305 # Is it a hardlink to an already
1306 # archived file?
1307 type = LNKTYPE
1308 linkname = self.inodes[inode]
1309 else:
1310 # The inode is added only if its valid.
1311 # For win32 it is always 0.
1312 type = REGTYPE
1313 if inode[0]:
1314 self.inodes[inode] = arcname
1315 elif stat.S_ISDIR(stmd):
1316 type = DIRTYPE
1317 if arcname[-1:] != "/":
1318 arcname += "/"
1319 elif stat.S_ISFIFO(stmd):
1320 type = FIFOTYPE
1321 elif stat.S_ISLNK(stmd):
1322 type = SYMTYPE
1323 linkname = os.readlink(name)
1324 elif stat.S_ISCHR(stmd):
1325 type = CHRTYPE
1326 elif stat.S_ISBLK(stmd):
1327 type = BLKTYPE
1328 else:
1329 return None
1331 # Fill the TarInfo object with all
1332 # information we can get.
1333 tarinfo.name = arcname
1334 tarinfo.mode = stmd
1335 tarinfo.uid = statres.st_uid
1336 tarinfo.gid = statres.st_gid
1337 if stat.S_ISREG(stmd):
1338 tarinfo.size = statres.st_size
1339 else:
1340 tarinfo.size = 0L
1341 tarinfo.mtime = statres.st_mtime
1342 tarinfo.type = type
1343 tarinfo.linkname = linkname
1344 if pwd:
1345 try:
1346 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1347 except KeyError:
1348 pass
1349 if grp:
1350 try:
1351 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1352 except KeyError:
1353 pass
1355 if type in (CHRTYPE, BLKTYPE):
1356 if hasattr(os, "major") and hasattr(os, "minor"):
1357 tarinfo.devmajor = os.major(statres.st_rdev)
1358 tarinfo.devminor = os.minor(statres.st_rdev)
1359 return tarinfo
1361 def list(self, verbose=True):
1362 """Print a table of contents to sys.stdout. If `verbose' is False, only
1363 the names of the members are printed. If it is True, an `ls -l'-like
1364 output is produced.
1366 self._check()
1368 for tarinfo in self:
1369 if verbose:
1370 print filemode(tarinfo.mode),
1371 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1372 tarinfo.gname or tarinfo.gid),
1373 if tarinfo.ischr() or tarinfo.isblk():
1374 print "%10s" % ("%d,%d" \
1375 % (tarinfo.devmajor, tarinfo.devminor)),
1376 else:
1377 print "%10d" % tarinfo.size,
1378 print "%d-%02d-%02d %02d:%02d:%02d" \
1379 % time.localtime(tarinfo.mtime)[:6],
1381 print tarinfo.name,
1383 if verbose:
1384 if tarinfo.issym():
1385 print "->", tarinfo.linkname,
1386 if tarinfo.islnk():
1387 print "link to", tarinfo.linkname,
1388 print
1390 def add(self, name, arcname=None, recursive=True):
1391 """Add the file `name' to the archive. `name' may be any type of file
1392 (directory, fifo, symbolic link, etc.). If given, `arcname'
1393 specifies an alternative name for the file in the archive.
1394 Directories are added recursively by default. This can be avoided by
1395 setting `recursive' to False.
1397 self._check("aw")
1399 if arcname is None:
1400 arcname = name
1402 # Skip if somebody tries to archive the archive...
1403 if self.name is not None \
1404 and os.path.abspath(name) == os.path.abspath(self.name):
1405 self._dbg(2, "tarfile: Skipped %r" % name)
1406 return
1408 # Special case: The user wants to add the current
1409 # working directory.
1410 if name == ".":
1411 if recursive:
1412 if arcname == ".":
1413 arcname = ""
1414 for f in os.listdir("."):
1415 self.add(f, os.path.join(arcname, f))
1416 return
1418 self._dbg(1, name)
1420 # Create a TarInfo object from the file.
1421 tarinfo = self.gettarinfo(name, arcname)
1423 if tarinfo is None:
1424 self._dbg(1, "tarfile: Unsupported type %r" % name)
1425 return
1427 # Append the tar header and data to the archive.
1428 if tarinfo.isreg():
1429 f = file(name, "rb")
1430 self.addfile(tarinfo, f)
1431 f.close()
1433 elif tarinfo.isdir():
1434 self.addfile(tarinfo)
1435 if recursive:
1436 for f in os.listdir(name):
1437 self.add(os.path.join(name, f), os.path.join(arcname, f))
1439 else:
1440 self.addfile(tarinfo)
1442 def addfile(self, tarinfo, fileobj=None):
1443 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1444 given, tarinfo.size bytes are read from it and added to the archive.
1445 You can create TarInfo objects using gettarinfo().
1446 On Windows platforms, `fileobj' should always be opened with mode
1447 'rb' to avoid irritation about the file size.
1449 self._check("aw")
1451 tarinfo = copy.copy(tarinfo)
1453 buf = tarinfo.tobuf(self.posix)
1454 self.fileobj.write(buf)
1455 self.offset += len(buf)
1457 # If there's data to follow, append it.
1458 if fileobj is not None:
1459 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1460 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1461 if remainder > 0:
1462 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1463 blocks += 1
1464 self.offset += blocks * BLOCKSIZE
1466 self.members.append(tarinfo)
1468 def extractall(self, path=".", members=None):
1469 """Extract all members from the archive to the current working
1470 directory and set owner, modification time and permissions on
1471 directories afterwards. `path' specifies a different directory
1472 to extract to. `members' is optional and must be a subset of the
1473 list returned by getmembers().
1475 directories = []
1477 if members is None:
1478 members = self
1480 for tarinfo in members:
1481 if tarinfo.isdir():
1482 # Extract directory with a safe mode, so that
1483 # all files below can be extracted as well.
1484 try:
1485 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1486 except EnvironmentError:
1487 pass
1488 directories.append(tarinfo)
1489 else:
1490 self.extract(tarinfo, path)
1492 # Reverse sort directories.
1493 directories.sort(lambda a, b: cmp(a.name, b.name))
1494 directories.reverse()
1496 # Set correct owner, mtime and filemode on directories.
1497 for tarinfo in directories:
1498 path = os.path.join(path, tarinfo.name)
1499 try:
1500 self.chown(tarinfo, path)
1501 self.utime(tarinfo, path)
1502 self.chmod(tarinfo, path)
1503 except ExtractError, e:
1504 if self.errorlevel > 1:
1505 raise
1506 else:
1507 self._dbg(1, "tarfile: %s" % e)
1509 def extract(self, member, path=""):
1510 """Extract a member from the archive to the current working directory,
1511 using its full name. Its file information is extracted as accurately
1512 as possible. `member' may be a filename or a TarInfo object. You can
1513 specify a different directory using `path'.
1515 self._check("r")
1517 if isinstance(member, TarInfo):
1518 tarinfo = member
1519 else:
1520 tarinfo = self.getmember(member)
1522 # Prepare the link target for makelink().
1523 if tarinfo.islnk():
1524 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1526 try:
1527 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1528 except EnvironmentError, e:
1529 if self.errorlevel > 0:
1530 raise
1531 else:
1532 if e.filename is None:
1533 self._dbg(1, "tarfile: %s" % e.strerror)
1534 else:
1535 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1536 except ExtractError, e:
1537 if self.errorlevel > 1:
1538 raise
1539 else:
1540 self._dbg(1, "tarfile: %s" % e)
1542 def extractfile(self, member):
1543 """Extract a member from the archive as a file object. `member' may be
1544 a filename or a TarInfo object. If `member' is a regular file, a
1545 file-like object is returned. If `member' is a link, a file-like
1546 object is constructed from the link's target. If `member' is none of
1547 the above, None is returned.
1548 The file-like object is read-only and provides the following
1549 methods: read(), readline(), readlines(), seek() and tell()
1551 self._check("r")
1553 if isinstance(member, TarInfo):
1554 tarinfo = member
1555 else:
1556 tarinfo = self.getmember(member)
1558 if tarinfo.isreg():
1559 return self.fileobject(self, tarinfo)
1561 elif tarinfo.type not in SUPPORTED_TYPES:
1562 # If a member's type is unknown, it is treated as a
1563 # regular file.
1564 return self.fileobject(self, tarinfo)
1566 elif tarinfo.islnk() or tarinfo.issym():
1567 if isinstance(self.fileobj, _Stream):
1568 # A small but ugly workaround for the case that someone tries
1569 # to extract a (sym)link as a file-object from a non-seekable
1570 # stream of tar blocks.
1571 raise StreamError("cannot extract (sym)link as file object")
1572 else:
1573 # A (sym)link's file object is its target's file object.
1574 return self.extractfile(self._getmember(tarinfo.linkname,
1575 tarinfo))
1576 else:
1577 # If there's no data associated with the member (directory, chrdev,
1578 # blkdev, etc.), return None instead of a file object.
1579 return None
1581 def _extract_member(self, tarinfo, targetpath):
1582 """Extract the TarInfo object tarinfo to a physical
1583 file called targetpath.
1585 # Fetch the TarInfo object for the given name
1586 # and build the destination pathname, replacing
1587 # forward slashes to platform specific separators.
1588 if targetpath[-1:] == "/":
1589 targetpath = targetpath[:-1]
1590 targetpath = os.path.normpath(targetpath)
1592 # Create all upper directories.
1593 upperdirs = os.path.dirname(targetpath)
1594 if upperdirs and not os.path.exists(upperdirs):
1595 ti = TarInfo()
1596 ti.name = upperdirs
1597 ti.type = DIRTYPE
1598 ti.mode = 0777
1599 ti.mtime = tarinfo.mtime
1600 ti.uid = tarinfo.uid
1601 ti.gid = tarinfo.gid
1602 ti.uname = tarinfo.uname
1603 ti.gname = tarinfo.gname
1604 try:
1605 self._extract_member(ti, ti.name)
1606 except:
1607 pass
1609 if tarinfo.islnk() or tarinfo.issym():
1610 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1611 else:
1612 self._dbg(1, tarinfo.name)
1614 if tarinfo.isreg():
1615 self.makefile(tarinfo, targetpath)
1616 elif tarinfo.isdir():
1617 self.makedir(tarinfo, targetpath)
1618 elif tarinfo.isfifo():
1619 self.makefifo(tarinfo, targetpath)
1620 elif tarinfo.ischr() or tarinfo.isblk():
1621 self.makedev(tarinfo, targetpath)
1622 elif tarinfo.islnk() or tarinfo.issym():
1623 self.makelink(tarinfo, targetpath)
1624 elif tarinfo.type not in SUPPORTED_TYPES:
1625 self.makeunknown(tarinfo, targetpath)
1626 else:
1627 self.makefile(tarinfo, targetpath)
1629 self.chown(tarinfo, targetpath)
1630 if not tarinfo.issym():
1631 self.chmod(tarinfo, targetpath)
1632 self.utime(tarinfo, targetpath)
1634 #--------------------------------------------------------------------------
1635 # Below are the different file methods. They are called via
1636 # _extract_member() when extract() is called. They can be replaced in a
1637 # subclass to implement other functionality.
1639 def makedir(self, tarinfo, targetpath):
1640 """Make a directory called targetpath.
1642 try:
1643 os.mkdir(targetpath)
1644 except EnvironmentError, e:
1645 if e.errno != errno.EEXIST:
1646 raise
1648 def makefile(self, tarinfo, targetpath):
1649 """Make a file called targetpath.
1651 source = self.extractfile(tarinfo)
1652 target = file(targetpath, "wb")
1653 copyfileobj(source, target)
1654 source.close()
1655 target.close()
1657 def makeunknown(self, tarinfo, targetpath):
1658 """Make a file from a TarInfo object with an unknown type
1659 at targetpath.
1661 self.makefile(tarinfo, targetpath)
1662 self._dbg(1, "tarfile: Unknown file type %r, " \
1663 "extracted as regular file." % tarinfo.type)
1665 def makefifo(self, tarinfo, targetpath):
1666 """Make a fifo called targetpath.
1668 if hasattr(os, "mkfifo"):
1669 os.mkfifo(targetpath)
1670 else:
1671 raise ExtractError("fifo not supported by system")
1673 def makedev(self, tarinfo, targetpath):
1674 """Make a character or block device called targetpath.
1676 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1677 raise ExtractError("special devices not supported by system")
1679 mode = tarinfo.mode
1680 if tarinfo.isblk():
1681 mode |= stat.S_IFBLK
1682 else:
1683 mode |= stat.S_IFCHR
1685 os.mknod(targetpath, mode,
1686 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1688 def makelink(self, tarinfo, targetpath):
1689 """Make a (symbolic) link called targetpath. If it cannot be created
1690 (platform limitation), we try to make a copy of the referenced file
1691 instead of a link.
1693 linkpath = tarinfo.linkname
1694 try:
1695 if tarinfo.issym():
1696 os.symlink(linkpath, targetpath)
1697 else:
1698 # See extract().
1699 os.link(tarinfo._link_target, targetpath)
1700 except AttributeError:
1701 if tarinfo.issym():
1702 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1703 linkpath)
1704 linkpath = normpath(linkpath)
1706 try:
1707 self._extract_member(self.getmember(linkpath), targetpath)
1708 except (EnvironmentError, KeyError), e:
1709 linkpath = os.path.normpath(linkpath)
1710 try:
1711 shutil.copy2(linkpath, targetpath)
1712 except EnvironmentError, e:
1713 raise IOError("link could not be created")
1715 def chown(self, tarinfo, targetpath):
1716 """Set owner of targetpath according to tarinfo.
1718 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1719 # We have to be root to do so.
1720 try:
1721 g = grp.getgrnam(tarinfo.gname)[2]
1722 except KeyError:
1723 try:
1724 g = grp.getgrgid(tarinfo.gid)[2]
1725 except KeyError:
1726 g = os.getgid()
1727 try:
1728 u = pwd.getpwnam(tarinfo.uname)[2]
1729 except KeyError:
1730 try:
1731 u = pwd.getpwuid(tarinfo.uid)[2]
1732 except KeyError:
1733 u = os.getuid()
1734 try:
1735 if tarinfo.issym() and hasattr(os, "lchown"):
1736 os.lchown(targetpath, u, g)
1737 else:
1738 if sys.platform != "os2emx":
1739 os.chown(targetpath, u, g)
1740 except EnvironmentError, e:
1741 raise ExtractError("could not change owner")
1743 def chmod(self, tarinfo, targetpath):
1744 """Set file permissions of targetpath according to tarinfo.
1746 if hasattr(os, 'chmod'):
1747 try:
1748 os.chmod(targetpath, tarinfo.mode)
1749 except EnvironmentError, e:
1750 raise ExtractError("could not change mode")
1752 def utime(self, tarinfo, targetpath):
1753 """Set modification time of targetpath according to tarinfo.
1755 if not hasattr(os, 'utime'):
1756 return
1757 if sys.platform == "win32" and tarinfo.isdir():
1758 # According to msdn.microsoft.com, it is an error (EACCES)
1759 # to use utime() on directories.
1760 return
1761 try:
1762 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1763 except EnvironmentError, e:
1764 raise ExtractError("could not change modification time")
1766 #--------------------------------------------------------------------------
1767 def next(self):
1768 """Return the next member of the archive as a TarInfo object, when
1769 TarFile is opened for reading. Return None if there is no more
1770 available.
1772 self._check("ra")
1773 if self.firstmember is not None:
1774 m = self.firstmember
1775 self.firstmember = None
1776 return m
1778 # Read the next block.
1779 self.fileobj.seek(self.offset)
1780 while True:
1781 buf = self.fileobj.read(BLOCKSIZE)
1782 if not buf:
1783 return None
1785 try:
1786 tarinfo = TarInfo.frombuf(buf)
1788 # Set the TarInfo object's offset to the current position of the
1789 # TarFile and set self.offset to the position where the data blocks
1790 # should begin.
1791 tarinfo.offset = self.offset
1792 self.offset += BLOCKSIZE
1794 tarinfo = self.proc_member(tarinfo)
1796 except ValueError, e:
1797 if self.ignore_zeros:
1798 self._dbg(2, "0x%X: empty or invalid block: %s" %
1799 (self.offset, e))
1800 self.offset += BLOCKSIZE
1801 continue
1802 else:
1803 if self.offset == 0:
1804 raise ReadError("empty, unreadable or compressed "
1805 "file: %s" % e)
1806 return None
1807 break
1809 # Some old tar programs represent a directory as a regular
1810 # file with a trailing slash.
1811 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1812 tarinfo.type = DIRTYPE
1814 # Directory names should have a '/' at the end.
1815 if tarinfo.isdir():
1816 tarinfo.name += "/"
1818 self.members.append(tarinfo)
1819 return tarinfo
1821 #--------------------------------------------------------------------------
1822 # The following are methods that are called depending on the type of a
1823 # member. The entry point is proc_member() which is called with a TarInfo
1824 # object created from the header block from the current offset. The
1825 # proc_member() method can be overridden in a subclass to add custom
1826 # proc_*() methods. A proc_*() method MUST implement the following
1827 # operations:
1828 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1829 # if there is data that follows.
1830 # 2. Set self.offset to the position where the next member's header will
1831 # begin.
1832 # 3. Return tarinfo or another valid TarInfo object.
1833 def proc_member(self, tarinfo):
1834 """Choose the right processing method for tarinfo depending
1835 on its type and call it.
1837 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1838 return self.proc_gnulong(tarinfo)
1839 elif tarinfo.type == GNUTYPE_SPARSE:
1840 return self.proc_sparse(tarinfo)
1841 else:
1842 return self.proc_builtin(tarinfo)
1844 def proc_builtin(self, tarinfo):
1845 """Process a builtin type member or an unknown member
1846 which will be treated as a regular file.
1848 tarinfo.offset_data = self.offset
1849 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1850 # Skip the following data blocks.
1851 self.offset += self._block(tarinfo.size)
1852 return tarinfo
1854 def proc_gnulong(self, tarinfo):
1855 """Process the blocks that hold a GNU longname
1856 or longlink member.
1858 buf = ""
1859 count = tarinfo.size
1860 while count > 0:
1861 block = self.fileobj.read(BLOCKSIZE)
1862 buf += block
1863 self.offset += BLOCKSIZE
1864 count -= BLOCKSIZE
1866 # Fetch the next header and process it.
1867 b = self.fileobj.read(BLOCKSIZE)
1868 t = TarInfo.frombuf(b)
1869 t.offset = self.offset
1870 self.offset += BLOCKSIZE
1871 next = self.proc_member(t)
1873 # Patch the TarInfo object from the next header with
1874 # the longname information.
1875 next.offset = tarinfo.offset
1876 if tarinfo.type == GNUTYPE_LONGNAME:
1877 next.name = buf.rstrip(NUL)
1878 elif tarinfo.type == GNUTYPE_LONGLINK:
1879 next.linkname = buf.rstrip(NUL)
1881 return next
1883 def proc_sparse(self, tarinfo):
1884 """Process a GNU sparse header plus extra headers.
1886 buf = tarinfo.buf
1887 sp = _ringbuffer()
1888 pos = 386
1889 lastpos = 0L
1890 realpos = 0L
1891 # There are 4 possible sparse structs in the
1892 # first header.
1893 for i in xrange(4):
1894 try:
1895 offset = nti(buf[pos:pos + 12])
1896 numbytes = nti(buf[pos + 12:pos + 24])
1897 except ValueError:
1898 break
1899 if offset > lastpos:
1900 sp.append(_hole(lastpos, offset - lastpos))
1901 sp.append(_data(offset, numbytes, realpos))
1902 realpos += numbytes
1903 lastpos = offset + numbytes
1904 pos += 24
1906 isextended = ord(buf[482])
1907 origsize = nti(buf[483:495])
1909 # If the isextended flag is given,
1910 # there are extra headers to process.
1911 while isextended == 1:
1912 buf = self.fileobj.read(BLOCKSIZE)
1913 self.offset += BLOCKSIZE
1914 pos = 0
1915 for i in xrange(21):
1916 try:
1917 offset = nti(buf[pos:pos + 12])
1918 numbytes = nti(buf[pos + 12:pos + 24])
1919 except ValueError:
1920 break
1921 if offset > lastpos:
1922 sp.append(_hole(lastpos, offset - lastpos))
1923 sp.append(_data(offset, numbytes, realpos))
1924 realpos += numbytes
1925 lastpos = offset + numbytes
1926 pos += 24
1927 isextended = ord(buf[504])
1929 if lastpos < origsize:
1930 sp.append(_hole(lastpos, origsize - lastpos))
1932 tarinfo.sparse = sp
1934 tarinfo.offset_data = self.offset
1935 self.offset += self._block(tarinfo.size)
1936 tarinfo.size = origsize
1938 return tarinfo
1940 #--------------------------------------------------------------------------
1941 # Little helper methods:
1943 def _block(self, count):
1944 """Round up a byte count by BLOCKSIZE and return it,
1945 e.g. _block(834) => 1024.
1947 blocks, remainder = divmod(count, BLOCKSIZE)
1948 if remainder:
1949 blocks += 1
1950 return blocks * BLOCKSIZE
1952 def _getmember(self, name, tarinfo=None):
1953 """Find an archive member by name from bottom to top.
1954 If tarinfo is given, it is used as the starting point.
1956 # Ensure that all members have been loaded.
1957 members = self.getmembers()
1959 if tarinfo is None:
1960 end = len(members)
1961 else:
1962 end = members.index(tarinfo)
1964 for i in xrange(end - 1, -1, -1):
1965 if name == members[i].name:
1966 return members[i]
1968 def _load(self):
1969 """Read through the entire archive file and look for readable
1970 members.
1972 while True:
1973 tarinfo = self.next()
1974 if tarinfo is None:
1975 break
1976 self._loaded = True
1978 def _check(self, mode=None):
1979 """Check if TarFile is still open, and if the operation's mode
1980 corresponds to TarFile's mode.
1982 if self.closed:
1983 raise IOError("%s is closed" % self.__class__.__name__)
1984 if mode is not None and self._mode not in mode:
1985 raise IOError("bad operation for mode %r" % self._mode)
1987 def __iter__(self):
1988 """Provide an iterator object.
1990 if self._loaded:
1991 return iter(self.members)
1992 else:
1993 return TarIter(self)
1995 def _dbg(self, level, msg):
1996 """Write debugging output to sys.stderr.
1998 if level <= self.debug:
1999 print >> sys.stderr, msg
2000 # class TarFile
2002 class TarIter:
2003 """Iterator Class.
2005 for tarinfo in TarFile(...):
2006 suite...
2009 def __init__(self, tarfile):
2010 """Construct a TarIter object.
2012 self.tarfile = tarfile
2013 self.index = 0
2014 def __iter__(self):
2015 """Return iterator object.
2017 return self
2018 def next(self):
2019 """Return the next item using TarFile's next() method.
2020 When all members have been read, set TarFile as _loaded.
2022 # Fix for SF #1100429: Under rare circumstances it can
2023 # happen that getmembers() is called during iteration,
2024 # which will cause TarIter to stop prematurely.
2025 if not self.tarfile._loaded:
2026 tarinfo = self.tarfile.next()
2027 if not tarinfo:
2028 self.tarfile._loaded = True
2029 raise StopIteration
2030 else:
2031 try:
2032 tarinfo = self.tarfile.members[self.index]
2033 except IndexError:
2034 raise StopIteration
2035 self.index += 1
2036 return tarinfo
2038 # Helper classes for sparse file support
2039 class _section:
2040 """Base class for _data and _hole.
2042 def __init__(self, offset, size):
2043 self.offset = offset
2044 self.size = size
2045 def __contains__(self, offset):
2046 return self.offset <= offset < self.offset + self.size
2048 class _data(_section):
2049 """Represent a data section in a sparse file.
2051 def __init__(self, offset, size, realpos):
2052 _section.__init__(self, offset, size)
2053 self.realpos = realpos
2055 class _hole(_section):
2056 """Represent a hole section in a sparse file.
2058 pass
2060 class _ringbuffer(list):
2061 """Ringbuffer class which increases performance
2062 over a regular list.
2064 def __init__(self):
2065 self.idx = 0
2066 def find(self, offset):
2067 idx = self.idx
2068 while True:
2069 item = self[idx]
2070 if offset in item:
2071 break
2072 idx += 1
2073 if idx == len(self):
2074 idx = 0
2075 if idx == self.idx:
2076 # End of File
2077 return None
2078 self.idx = idx
2079 return item
2081 #---------------------------------------------
2082 # zipfile compatible TarFile class
2083 #---------------------------------------------
2084 TAR_PLAIN = 0 # zipfile.ZIP_STORED
2085 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2086 class TarFileCompat:
2087 """TarFile class compatible with standard module zipfile's
2088 ZipFile class.
2090 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2091 if compression == TAR_PLAIN:
2092 self.tarfile = TarFile.taropen(file, mode)
2093 elif compression == TAR_GZIPPED:
2094 self.tarfile = TarFile.gzopen(file, mode)
2095 else:
2096 raise ValueError("unknown compression constant")
2097 if mode[0:1] == "r":
2098 members = self.tarfile.getmembers()
2099 for m in members:
2100 m.filename = m.name
2101 m.file_size = m.size
2102 m.date_time = time.gmtime(m.mtime)[:6]
2103 def namelist(self):
2104 return map(lambda m: m.name, self.infolist())
2105 def infolist(self):
2106 return filter(lambda m: m.type in REGULAR_TYPES,
2107 self.tarfile.getmembers())
2108 def printdir(self):
2109 self.tarfile.list()
2110 def testzip(self):
2111 return
2112 def getinfo(self, name):
2113 return self.tarfile.getmember(name)
2114 def read(self, name):
2115 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2116 def write(self, filename, arcname=None, compress_type=None):
2117 self.tarfile.add(filename, arcname)
2118 def writestr(self, zinfo, bytes):
2119 try:
2120 from cStringIO import StringIO
2121 except ImportError:
2122 from StringIO import StringIO
2123 import calendar
2124 zinfo.name = zinfo.filename
2125 zinfo.size = zinfo.file_size
2126 zinfo.mtime = calendar.timegm(zinfo.date_time)
2127 self.tarfile.addfile(zinfo, StringIO(bytes))
2128 def close(self):
2129 self.tarfile.close()
2130 #class TarFileCompat
2132 #--------------------
2133 # exported functions
2134 #--------------------
2135 def is_tarfile(name):
2136 """Return True if name points to a tar archive that we
2137 are able to handle, else return False.
2139 try:
2140 t = open(name)
2141 t.close()
2142 return True
2143 except TarError:
2144 return False
2146 open = TarFile.open