Update functools section
[python.git] / Lib / tarfile.py
blob061d0f55b9521f081e1233ff1aa89905ab1679f8
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
4 # tarfile.py
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7 # All rights reserved.
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
16 # conditions:
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
31 """
33 __version__ = "$Revision$"
34 # $Source$
36 version = "0.8.0"
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
38 __date__ = "$Date$"
39 __cvsid__ = "$Id$"
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
42 #---------
43 # Imports
44 #---------
45 import sys
46 import os
47 import shutil
48 import stat
49 import errno
50 import time
51 import struct
53 if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
60 try:
61 import grp, pwd
62 except ImportError:
63 grp = pwd = None
65 # from tarfile import *
66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68 #---------------------------------------------------------
69 # tar constants
70 #---------------------------------------------------------
71 NUL = "\0" # the null character
72 BLOCKSIZE = 512 # length of processing blocks
73 RECORDSIZE = BLOCKSIZE * 20 # length of records
74 MAGIC = "ustar" # magic tar string
75 VERSION = "00" # version number
77 LENGTH_NAME = 100 # maximum length of a filename
78 LENGTH_LINK = 100 # maximum length of a linkname
79 LENGTH_PREFIX = 155 # maximum length of the prefix field
80 MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82 REGTYPE = "0" # regular file
83 AREGTYPE = "\0" # regular file
84 LNKTYPE = "1" # link (inside tarfile)
85 SYMTYPE = "2" # symbolic link
86 CHRTYPE = "3" # character special device
87 BLKTYPE = "4" # block special device
88 DIRTYPE = "5" # directory
89 FIFOTYPE = "6" # fifo special device
90 CONTTYPE = "7" # contiguous file
92 GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93 GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94 GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96 #---------------------------------------------------------
97 # tarfile constants
98 #---------------------------------------------------------
99 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
105 REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108 #---------------------------------------------------------
109 # Bits used in the mode field, values in octal.
110 #---------------------------------------------------------
111 S_IFLNK = 0120000 # symbolic link
112 S_IFREG = 0100000 # regular file
113 S_IFBLK = 0060000 # block device
114 S_IFDIR = 0040000 # directory
115 S_IFCHR = 0020000 # character device
116 S_IFIFO = 0010000 # fifo
118 TSUID = 04000 # set UID on execution
119 TSGID = 02000 # set GID on execution
120 TSVTX = 01000 # reserved
122 TUREAD = 0400 # read by owner
123 TUWRITE = 0200 # write by owner
124 TUEXEC = 0100 # execute/search by owner
125 TGREAD = 0040 # read by group
126 TGWRITE = 0020 # write by group
127 TGEXEC = 0010 # execute/search by group
128 TOREAD = 0004 # read by other
129 TOWRITE = 0002 # write by other
130 TOEXEC = 0001 # execute/search by other
132 #---------------------------------------------------------
133 # Some useful functions
134 #---------------------------------------------------------
136 def stn(s, length):
137 """Convert a python string to a null-terminated string buffer.
139 return s[:length-1] + (length - len(s) - 1) * NUL + NUL
141 def nti(s):
142 """Convert a number field to a python number.
144 # There are two possible encodings for a number field, see
145 # itn() below.
146 if s[0] != chr(0200):
147 n = int(s.rstrip(NUL) or "0", 8)
148 else:
149 n = 0L
150 for i in xrange(len(s) - 1):
151 n <<= 8
152 n += ord(s[i + 1])
153 return n
155 def itn(n, digits=8, posix=False):
156 """Convert a python number to a number field.
158 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
159 # octal digits followed by a null-byte, this allows values up to
160 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
161 # that if necessary. A leading 0200 byte indicates this particular
162 # encoding, the following digits-1 bytes are a big-endian
163 # representation. This allows values up to (256**(digits-1))-1.
164 if 0 <= n < 8 ** (digits - 1):
165 s = "%0*o" % (digits - 1, n) + NUL
166 else:
167 if posix:
168 raise ValueError("overflow in number field")
170 if n < 0:
171 # XXX We mimic GNU tar's behaviour with negative numbers,
172 # this could raise OverflowError.
173 n = struct.unpack("L", struct.pack("l", n))[0]
175 s = ""
176 for i in xrange(digits - 1):
177 s = chr(n & 0377) + s
178 n >>= 8
179 s = chr(0200) + s
180 return s
182 def calc_chksums(buf):
183 """Calculate the checksum for a member's header by summing up all
184 characters except for the chksum field which is treated as if
185 it was filled with spaces. According to the GNU tar sources,
186 some tars (Sun and NeXT) calculate chksum with signed char,
187 which will be different if there are chars in the buffer with
188 the high bit set. So we calculate two checksums, unsigned and
189 signed.
191 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
192 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
193 return unsigned_chksum, signed_chksum
195 def copyfileobj(src, dst, length=None):
196 """Copy length bytes from fileobj src to fileobj dst.
197 If length is None, copy the entire content.
199 if length == 0:
200 return
201 if length is None:
202 shutil.copyfileobj(src, dst)
203 return
205 BUFSIZE = 16 * 1024
206 blocks, remainder = divmod(length, BUFSIZE)
207 for b in xrange(blocks):
208 buf = src.read(BUFSIZE)
209 if len(buf) < BUFSIZE:
210 raise IOError("end of file reached")
211 dst.write(buf)
213 if remainder != 0:
214 buf = src.read(remainder)
215 if len(buf) < remainder:
216 raise IOError("end of file reached")
217 dst.write(buf)
218 return
220 filemode_table = (
221 ((S_IFLNK, "l"),
222 (S_IFREG, "-"),
223 (S_IFBLK, "b"),
224 (S_IFDIR, "d"),
225 (S_IFCHR, "c"),
226 (S_IFIFO, "p")),
228 ((TUREAD, "r"),),
229 ((TUWRITE, "w"),),
230 ((TUEXEC|TSUID, "s"),
231 (TSUID, "S"),
232 (TUEXEC, "x")),
234 ((TGREAD, "r"),),
235 ((TGWRITE, "w"),),
236 ((TGEXEC|TSGID, "s"),
237 (TSGID, "S"),
238 (TGEXEC, "x")),
240 ((TOREAD, "r"),),
241 ((TOWRITE, "w"),),
242 ((TOEXEC|TSVTX, "t"),
243 (TSVTX, "T"),
244 (TOEXEC, "x"))
247 def filemode(mode):
248 """Convert a file's mode to a string of the form
249 -rwxrwxrwx.
250 Used by TarFile.list()
252 perm = []
253 for table in filemode_table:
254 for bit, char in table:
255 if mode & bit == bit:
256 perm.append(char)
257 break
258 else:
259 perm.append("-")
260 return "".join(perm)
262 if os.sep != "/":
263 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
264 else:
265 normpath = os.path.normpath
267 class TarError(Exception):
268 """Base exception."""
269 pass
270 class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273 class ReadError(TarError):
274 """Exception for unreadble tar archives."""
275 pass
276 class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279 class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
283 #---------------------------
284 # internal stream interface
285 #---------------------------
286 class _LowLevelFile:
287 """Low-level file object. Supports reading and writing.
288 It is used instead of a regular file object for streaming
289 access.
292 def __init__(self, name, mode):
293 mode = {
294 "r": os.O_RDONLY,
295 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
296 }[mode]
297 if hasattr(os, "O_BINARY"):
298 mode |= os.O_BINARY
299 self.fd = os.open(name, mode)
301 def close(self):
302 os.close(self.fd)
304 def read(self, size):
305 return os.read(self.fd, size)
307 def write(self, s):
308 os.write(self.fd, s)
310 class _Stream:
311 """Class that serves as an adapter between TarFile and
312 a stream-like object. The stream-like object only
313 needs to have a read() or write() method and is accessed
314 blockwise. Use of gzip or bzip2 compression is possible.
315 A stream-like object could be for example: sys.stdin,
316 sys.stdout, a socket, a tape device etc.
318 _Stream is intended to be used only internally.
321 def __init__(self, name, mode, comptype, fileobj, bufsize):
322 """Construct a _Stream object.
324 self._extfileobj = True
325 if fileobj is None:
326 fileobj = _LowLevelFile(name, mode)
327 self._extfileobj = False
329 if comptype == '*':
330 # Enable transparent compression detection for the
331 # stream interface
332 fileobj = _StreamProxy(fileobj)
333 comptype = fileobj.getcomptype()
335 self.name = name or ""
336 self.mode = mode
337 self.comptype = comptype
338 self.fileobj = fileobj
339 self.bufsize = bufsize
340 self.buf = ""
341 self.pos = 0L
342 self.closed = False
344 if comptype == "gz":
345 try:
346 import zlib
347 except ImportError:
348 raise CompressionError("zlib module is not available")
349 self.zlib = zlib
350 self.crc = zlib.crc32("")
351 if mode == "r":
352 self._init_read_gz()
353 else:
354 self._init_write_gz()
356 if comptype == "bz2":
357 try:
358 import bz2
359 except ImportError:
360 raise CompressionError("bz2 module is not available")
361 if mode == "r":
362 self.dbuf = ""
363 self.cmp = bz2.BZ2Decompressor()
364 else:
365 self.cmp = bz2.BZ2Compressor()
367 def __del__(self):
368 if hasattr(self, "closed") and not self.closed:
369 self.close()
371 def _init_write_gz(self):
372 """Initialize for writing with gzip compression.
374 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
375 -self.zlib.MAX_WBITS,
376 self.zlib.DEF_MEM_LEVEL,
378 timestamp = struct.pack("<L", long(time.time()))
379 self.__write("\037\213\010\010%s\002\377" % timestamp)
380 if self.name.endswith(".gz"):
381 self.name = self.name[:-3]
382 self.__write(self.name + NUL)
384 def write(self, s):
385 """Write string s to the stream.
387 if self.comptype == "gz":
388 self.crc = self.zlib.crc32(s, self.crc)
389 self.pos += len(s)
390 if self.comptype != "tar":
391 s = self.cmp.compress(s)
392 self.__write(s)
394 def __write(self, s):
395 """Write string s to the stream if a whole new block
396 is ready to be written.
398 self.buf += s
399 while len(self.buf) > self.bufsize:
400 self.fileobj.write(self.buf[:self.bufsize])
401 self.buf = self.buf[self.bufsize:]
403 def close(self):
404 """Close the _Stream object. No operation should be
405 done on it afterwards.
407 if self.closed:
408 return
410 if self.mode == "w" and self.comptype != "tar":
411 self.buf += self.cmp.flush()
413 if self.mode == "w" and self.buf:
414 blocks, remainder = divmod(len(self.buf), self.bufsize)
415 if remainder > 0:
416 self.buf += NUL * (self.bufsize - remainder)
417 self.fileobj.write(self.buf)
418 self.buf = ""
419 if self.comptype == "gz":
420 self.fileobj.write(struct.pack("<l", self.crc))
421 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
423 if not self._extfileobj:
424 self.fileobj.close()
426 self.closed = True
428 def _init_read_gz(self):
429 """Initialize for reading a gzip compressed fileobj.
431 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
432 self.dbuf = ""
434 # taken from gzip.GzipFile with some alterations
435 if self.__read(2) != "\037\213":
436 raise ReadError("not a gzip file")
437 if self.__read(1) != "\010":
438 raise CompressionError("unsupported compression method")
440 flag = ord(self.__read(1))
441 self.__read(6)
443 if flag & 4:
444 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
445 self.read(xlen)
446 if flag & 8:
447 while True:
448 s = self.__read(1)
449 if not s or s == NUL:
450 break
451 if flag & 16:
452 while True:
453 s = self.__read(1)
454 if not s or s == NUL:
455 break
456 if flag & 2:
457 self.__read(2)
459 def tell(self):
460 """Return the stream's file pointer position.
462 return self.pos
464 def seek(self, pos=0):
465 """Set the stream's file pointer to pos. Negative seeking
466 is forbidden.
468 if pos - self.pos >= 0:
469 blocks, remainder = divmod(pos - self.pos, self.bufsize)
470 for i in xrange(blocks):
471 self.read(self.bufsize)
472 self.read(remainder)
473 else:
474 raise StreamError("seeking backwards is not allowed")
475 return self.pos
477 def read(self, size=None):
478 """Return the next size number of bytes from the stream.
479 If size is not defined, return all bytes of the stream
480 up to EOF.
482 if size is None:
483 t = []
484 while True:
485 buf = self._read(self.bufsize)
486 if not buf:
487 break
488 t.append(buf)
489 buf = "".join(t)
490 else:
491 buf = self._read(size)
492 self.pos += len(buf)
493 return buf
495 def _read(self, size):
496 """Return size bytes from the stream.
498 if self.comptype == "tar":
499 return self.__read(size)
501 c = len(self.dbuf)
502 t = [self.dbuf]
503 while c < size:
504 buf = self.__read(self.bufsize)
505 if not buf:
506 break
507 buf = self.cmp.decompress(buf)
508 t.append(buf)
509 c += len(buf)
510 t = "".join(t)
511 self.dbuf = t[size:]
512 return t[:size]
514 def __read(self, size):
515 """Return size bytes from stream. If internal buffer is empty,
516 read another block from the stream.
518 c = len(self.buf)
519 t = [self.buf]
520 while c < size:
521 buf = self.fileobj.read(self.bufsize)
522 if not buf:
523 break
524 t.append(buf)
525 c += len(buf)
526 t = "".join(t)
527 self.buf = t[size:]
528 return t[:size]
529 # class _Stream
531 class _StreamProxy(object):
532 """Small proxy class that enables transparent compression
533 detection for the Stream interface (mode 'r|*').
536 def __init__(self, fileobj):
537 self.fileobj = fileobj
538 self.buf = self.fileobj.read(BLOCKSIZE)
540 def read(self, size):
541 self.read = self.fileobj.read
542 return self.buf
544 def getcomptype(self):
545 if self.buf.startswith("\037\213\010"):
546 return "gz"
547 if self.buf.startswith("BZh91"):
548 return "bz2"
549 return "tar"
551 def close(self):
552 self.fileobj.close()
553 # class StreamProxy
555 class _BZ2Proxy(object):
556 """Small proxy class that enables external file object
557 support for "r:bz2" and "w:bz2" modes. This is actually
558 a workaround for a limitation in bz2 module's BZ2File
559 class which (unlike gzip.GzipFile) has no support for
560 a file object argument.
563 blocksize = 16 * 1024
565 def __init__(self, fileobj, mode):
566 self.fileobj = fileobj
567 self.mode = mode
568 self.init()
570 def init(self):
571 import bz2
572 self.pos = 0
573 if self.mode == "r":
574 self.bz2obj = bz2.BZ2Decompressor()
575 self.fileobj.seek(0)
576 self.buf = ""
577 else:
578 self.bz2obj = bz2.BZ2Compressor()
580 def read(self, size):
581 b = [self.buf]
582 x = len(self.buf)
583 while x < size:
584 try:
585 raw = self.fileobj.read(self.blocksize)
586 data = self.bz2obj.decompress(raw)
587 b.append(data)
588 except EOFError:
589 break
590 x += len(data)
591 self.buf = "".join(b)
593 buf = self.buf[:size]
594 self.buf = self.buf[size:]
595 self.pos += len(buf)
596 return buf
598 def seek(self, pos):
599 if pos < self.pos:
600 self.init()
601 self.read(pos - self.pos)
603 def tell(self):
604 return self.pos
606 def write(self, data):
607 self.pos += len(data)
608 raw = self.bz2obj.compress(data)
609 self.fileobj.write(raw)
611 def close(self):
612 if self.mode == "w":
613 raw = self.bz2obj.flush()
614 self.fileobj.write(raw)
615 self.fileobj.close()
616 # class _BZ2Proxy
618 #------------------------
619 # Extraction file object
620 #------------------------
621 class ExFileObject(object):
622 """File-like object for reading an archive member.
623 Is returned by TarFile.extractfile(). Support for
624 sparse files included.
627 def __init__(self, tarfile, tarinfo):
628 self.fileobj = tarfile.fileobj
629 self.name = tarinfo.name
630 self.mode = "r"
631 self.closed = False
632 self.offset = tarinfo.offset_data
633 self.size = tarinfo.size
634 self.pos = 0L
635 self.linebuffer = ""
636 if tarinfo.issparse():
637 self.sparse = tarinfo.sparse
638 self.read = self._readsparse
639 else:
640 self.read = self._readnormal
642 def __read(self, size):
643 """Overloadable read method.
645 return self.fileobj.read(size)
647 def readline(self, size=-1):
648 """Read a line with approx. size. If size is negative,
649 read a whole line. readline() and read() must not
650 be mixed up (!).
652 if size < 0:
653 size = sys.maxint
655 nl = self.linebuffer.find("\n")
656 if nl >= 0:
657 nl = min(nl, size)
658 else:
659 size -= len(self.linebuffer)
660 while (nl < 0 and size > 0):
661 buf = self.read(min(size, 100))
662 if not buf:
663 break
664 self.linebuffer += buf
665 size -= len(buf)
666 nl = self.linebuffer.find("\n")
667 if nl == -1:
668 s = self.linebuffer
669 self.linebuffer = ""
670 return s
671 buf = self.linebuffer[:nl]
672 self.linebuffer = self.linebuffer[nl + 1:]
673 while buf[-1:] == "\r":
674 buf = buf[:-1]
675 return buf + "\n"
677 def readlines(self):
678 """Return a list with all (following) lines.
680 result = []
681 while True:
682 line = self.readline()
683 if not line: break
684 result.append(line)
685 return result
687 def _readnormal(self, size=None):
688 """Read operation for regular files.
690 if self.closed:
691 raise ValueError("file is closed")
692 self.fileobj.seek(self.offset + self.pos)
693 bytesleft = self.size - self.pos
694 if size is None:
695 bytestoread = bytesleft
696 else:
697 bytestoread = min(size, bytesleft)
698 self.pos += bytestoread
699 return self.__read(bytestoread)
701 def _readsparse(self, size=None):
702 """Read operation for sparse files.
704 if self.closed:
705 raise ValueError("file is closed")
707 if size is None:
708 size = self.size - self.pos
710 data = []
711 while size > 0:
712 buf = self._readsparsesection(size)
713 if not buf:
714 break
715 size -= len(buf)
716 data.append(buf)
717 return "".join(data)
719 def _readsparsesection(self, size):
720 """Read a single section of a sparse file.
722 section = self.sparse.find(self.pos)
724 if section is None:
725 return ""
727 toread = min(size, section.offset + section.size - self.pos)
728 if isinstance(section, _data):
729 realpos = section.realpos + self.pos - section.offset
730 self.pos += toread
731 self.fileobj.seek(self.offset + realpos)
732 return self.__read(toread)
733 else:
734 self.pos += toread
735 return NUL * toread
737 def tell(self):
738 """Return the current file position.
740 return self.pos
742 def seek(self, pos, whence=0):
743 """Seek to a position in the file.
745 self.linebuffer = ""
746 if whence == 0:
747 self.pos = min(max(pos, 0), self.size)
748 if whence == 1:
749 if pos < 0:
750 self.pos = max(self.pos + pos, 0)
751 else:
752 self.pos = min(self.pos + pos, self.size)
753 if whence == 2:
754 self.pos = max(min(self.size + pos, self.size), 0)
756 def close(self):
757 """Close the file object.
759 self.closed = True
761 def __iter__(self):
762 """Get an iterator over the file object.
764 if self.closed:
765 raise ValueError("I/O operation on closed file")
766 return self
768 def next(self):
769 """Get the next item from the file iterator.
771 result = self.readline()
772 if not result:
773 raise StopIteration
774 return result
776 #class ExFileObject
778 #------------------
779 # Exported Classes
780 #------------------
781 class TarInfo(object):
782 """Informational class which holds the details about an
783 archive member given by a tar header block.
784 TarInfo objects are returned by TarFile.getmember(),
785 TarFile.getmembers() and TarFile.gettarinfo() and are
786 usually created internally.
789 def __init__(self, name=""):
790 """Construct a TarInfo object. name is the optional name
791 of the member.
794 self.name = name # member name (dirnames must end with '/')
795 self.mode = 0666 # file permissions
796 self.uid = 0 # user id
797 self.gid = 0 # group id
798 self.size = 0 # file size
799 self.mtime = 0 # modification time
800 self.chksum = 0 # header checksum
801 self.type = REGTYPE # member type
802 self.linkname = "" # link name
803 self.uname = "user" # user name
804 self.gname = "group" # group name
805 self.devmajor = 0 # device major number
806 self.devminor = 0 # device minor number
807 self.prefix = "" # prefix to filename or information
808 # about sparse files
810 self.offset = 0 # the tar header starts here
811 self.offset_data = 0 # the file's data starts here
813 def __repr__(self):
814 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
816 @classmethod
817 def frombuf(cls, buf):
818 """Construct a TarInfo object from a 512 byte string buffer.
820 if len(buf) != BLOCKSIZE:
821 raise ValueError("truncated header")
822 if buf.count(NUL) == BLOCKSIZE:
823 raise ValueError("empty header")
825 tarinfo = cls()
826 tarinfo.buf = buf
827 tarinfo.name = buf[0:100].rstrip(NUL)
828 tarinfo.mode = nti(buf[100:108])
829 tarinfo.uid = nti(buf[108:116])
830 tarinfo.gid = nti(buf[116:124])
831 tarinfo.size = nti(buf[124:136])
832 tarinfo.mtime = nti(buf[136:148])
833 tarinfo.chksum = nti(buf[148:156])
834 tarinfo.type = buf[156:157]
835 tarinfo.linkname = buf[157:257].rstrip(NUL)
836 tarinfo.uname = buf[265:297].rstrip(NUL)
837 tarinfo.gname = buf[297:329].rstrip(NUL)
838 tarinfo.devmajor = nti(buf[329:337])
839 tarinfo.devminor = nti(buf[337:345])
840 tarinfo.prefix = buf[345:500]
842 if tarinfo.chksum not in calc_chksums(buf):
843 raise ValueError("invalid header")
844 return tarinfo
846 def tobuf(self, posix=False):
847 """Return a tar header block as a 512 byte string.
849 parts = [
850 stn(self.name, 100),
851 itn(self.mode & 07777, 8, posix),
852 itn(self.uid, 8, posix),
853 itn(self.gid, 8, posix),
854 itn(self.size, 12, posix),
855 itn(self.mtime, 12, posix),
856 " ", # checksum field
857 self.type,
858 stn(self.linkname, 100),
859 stn(MAGIC, 6),
860 stn(VERSION, 2),
861 stn(self.uname, 32),
862 stn(self.gname, 32),
863 itn(self.devmajor, 8, posix),
864 itn(self.devminor, 8, posix),
865 stn(self.prefix, 155)
868 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
869 chksum = calc_chksums(buf)[0]
870 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
871 self.buf = buf
872 return buf
874 def isreg(self):
875 return self.type in REGULAR_TYPES
876 def isfile(self):
877 return self.isreg()
878 def isdir(self):
879 return self.type == DIRTYPE
880 def issym(self):
881 return self.type == SYMTYPE
882 def islnk(self):
883 return self.type == LNKTYPE
884 def ischr(self):
885 return self.type == CHRTYPE
886 def isblk(self):
887 return self.type == BLKTYPE
888 def isfifo(self):
889 return self.type == FIFOTYPE
890 def issparse(self):
891 return self.type == GNUTYPE_SPARSE
892 def isdev(self):
893 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
894 # class TarInfo
896 class TarFile(object):
897 """The TarFile Class provides an interface to tar archives.
900 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
902 dereference = False # If true, add content of linked file to the
903 # tar file, else the link.
905 ignore_zeros = False # If true, skips empty or invalid blocks and
906 # continues processing.
908 errorlevel = 0 # If 0, fatal errors only appear in debug
909 # messages (if debug >= 0). If > 0, errors
910 # are passed to the caller as exceptions.
912 posix = False # If True, generates POSIX.1-1990-compliant
913 # archives (no GNU extensions!)
915 fileobject = ExFileObject
917 def __init__(self, name=None, mode="r", fileobj=None):
918 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
919 read from an existing archive, 'a' to append data to an existing
920 file or 'w' to create a new file overwriting an existing one. `mode'
921 defaults to 'r'.
922 If `fileobj' is given, it is used for reading or writing data. If it
923 can be determined, `mode' is overridden by `fileobj's mode.
924 `fileobj' is not closed, when TarFile is closed.
926 self.name = name
928 if len(mode) > 1 or mode not in "raw":
929 raise ValueError("mode must be 'r', 'a' or 'w'")
930 self._mode = mode
931 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
933 if not fileobj:
934 fileobj = file(self.name, self.mode)
935 self._extfileobj = False
936 else:
937 if self.name is None and hasattr(fileobj, "name"):
938 self.name = fileobj.name
939 if hasattr(fileobj, "mode"):
940 self.mode = fileobj.mode
941 self._extfileobj = True
942 self.fileobj = fileobj
944 # Init datastructures
945 self.closed = False
946 self.members = [] # list of members as TarInfo objects
947 self._loaded = False # flag if all members have been read
948 self.offset = 0L # current position in the archive file
949 self.inodes = {} # dictionary caching the inodes of
950 # archive members already added
952 if self._mode == "r":
953 self.firstmember = None
954 self.firstmember = self.next()
956 if self._mode == "a":
957 # Move to the end of the archive,
958 # before the first empty block.
959 self.firstmember = None
960 while True:
961 try:
962 tarinfo = self.next()
963 except ReadError:
964 self.fileobj.seek(0)
965 break
966 if tarinfo is None:
967 self.fileobj.seek(- BLOCKSIZE, 1)
968 break
970 if self._mode in "aw":
971 self._loaded = True
973 #--------------------------------------------------------------------------
974 # Below are the classmethods which act as alternate constructors to the
975 # TarFile class. The open() method is the only one that is needed for
976 # public use; it is the "super"-constructor and is able to select an
977 # adequate "sub"-constructor for a particular compression using the mapping
978 # from OPEN_METH.
980 # This concept allows one to subclass TarFile without losing the comfort of
981 # the super-constructor. A sub-constructor is registered and made available
982 # by adding it to the mapping in OPEN_METH.
984 @classmethod
985 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
986 """Open a tar archive for reading, writing or appending. Return
987 an appropriate TarFile class.
989 mode:
990 'r' or 'r:*' open for reading with transparent compression
991 'r:' open for reading exclusively uncompressed
992 'r:gz' open for reading with gzip compression
993 'r:bz2' open for reading with bzip2 compression
994 'a' or 'a:' open for appending
995 'w' or 'w:' open for writing without compression
996 'w:gz' open for writing with gzip compression
997 'w:bz2' open for writing with bzip2 compression
999 'r|*' open a stream of tar blocks with transparent compression
1000 'r|' open an uncompressed stream of tar blocks for reading
1001 'r|gz' open a gzip compressed stream of tar blocks
1002 'r|bz2' open a bzip2 compressed stream of tar blocks
1003 'w|' open an uncompressed stream for writing
1004 'w|gz' open a gzip compressed stream for writing
1005 'w|bz2' open a bzip2 compressed stream for writing
1008 if not name and not fileobj:
1009 raise ValueError("nothing to open")
1011 if mode in ("r", "r:*"):
1012 # Find out which *open() is appropriate for opening the file.
1013 for comptype in cls.OPEN_METH:
1014 func = getattr(cls, cls.OPEN_METH[comptype])
1015 try:
1016 return func(name, "r", fileobj)
1017 except (ReadError, CompressionError):
1018 continue
1019 raise ReadError("file could not be opened successfully")
1021 elif ":" in mode:
1022 filemode, comptype = mode.split(":", 1)
1023 filemode = filemode or "r"
1024 comptype = comptype or "tar"
1026 # Select the *open() function according to
1027 # given compression.
1028 if comptype in cls.OPEN_METH:
1029 func = getattr(cls, cls.OPEN_METH[comptype])
1030 else:
1031 raise CompressionError("unknown compression type %r" % comptype)
1032 return func(name, filemode, fileobj)
1034 elif "|" in mode:
1035 filemode, comptype = mode.split("|", 1)
1036 filemode = filemode or "r"
1037 comptype = comptype or "tar"
1039 if filemode not in "rw":
1040 raise ValueError("mode must be 'r' or 'w'")
1042 t = cls(name, filemode,
1043 _Stream(name, filemode, comptype, fileobj, bufsize))
1044 t._extfileobj = False
1045 return t
1047 elif mode in "aw":
1048 return cls.taropen(name, mode, fileobj)
1050 raise ValueError("undiscernible mode")
1052 @classmethod
1053 def taropen(cls, name, mode="r", fileobj=None):
1054 """Open uncompressed tar archive name for reading or writing.
1056 if len(mode) > 1 or mode not in "raw":
1057 raise ValueError("mode must be 'r', 'a' or 'w'")
1058 return cls(name, mode, fileobj)
1060 @classmethod
1061 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1062 """Open gzip compressed tar archive name for reading or writing.
1063 Appending is not allowed.
1065 if len(mode) > 1 or mode not in "rw":
1066 raise ValueError("mode must be 'r' or 'w'")
1068 try:
1069 import gzip
1070 gzip.GzipFile
1071 except (ImportError, AttributeError):
1072 raise CompressionError("gzip module is not available")
1074 pre, ext = os.path.splitext(name)
1075 pre = os.path.basename(pre)
1076 if ext == ".tgz":
1077 ext = ".tar"
1078 if ext == ".gz":
1079 ext = ""
1080 tarname = pre + ext
1082 if fileobj is None:
1083 fileobj = file(name, mode + "b")
1085 if mode != "r":
1086 name = tarname
1088 try:
1089 t = cls.taropen(tarname, mode,
1090 gzip.GzipFile(name, mode, compresslevel, fileobj)
1092 except IOError:
1093 raise ReadError("not a gzip file")
1094 t._extfileobj = False
1095 return t
1097 @classmethod
1098 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1099 """Open bzip2 compressed tar archive name for reading or writing.
1100 Appending is not allowed.
1102 if len(mode) > 1 or mode not in "rw":
1103 raise ValueError("mode must be 'r' or 'w'.")
1105 try:
1106 import bz2
1107 except ImportError:
1108 raise CompressionError("bz2 module is not available")
1110 pre, ext = os.path.splitext(name)
1111 pre = os.path.basename(pre)
1112 if ext == ".tbz2":
1113 ext = ".tar"
1114 if ext == ".bz2":
1115 ext = ""
1116 tarname = pre + ext
1118 if fileobj is not None:
1119 fileobj = _BZ2Proxy(fileobj, mode)
1120 else:
1121 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1123 try:
1124 t = cls.taropen(tarname, mode, fileobj)
1125 except IOError:
1126 raise ReadError("not a bzip2 file")
1127 t._extfileobj = False
1128 return t
1130 # All *open() methods are registered here.
1131 OPEN_METH = {
1132 "tar": "taropen", # uncompressed tar
1133 "gz": "gzopen", # gzip compressed tar
1134 "bz2": "bz2open" # bzip2 compressed tar
1137 #--------------------------------------------------------------------------
1138 # The public methods which TarFile provides:
1140 def close(self):
1141 """Close the TarFile. In write-mode, two finishing zero blocks are
1142 appended to the archive.
1144 if self.closed:
1145 return
1147 if self._mode in "aw":
1148 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1149 self.offset += (BLOCKSIZE * 2)
1150 # fill up the end with zero-blocks
1151 # (like option -b20 for tar does)
1152 blocks, remainder = divmod(self.offset, RECORDSIZE)
1153 if remainder > 0:
1154 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1156 if not self._extfileobj:
1157 self.fileobj.close()
1158 self.closed = True
1160 def getmember(self, name):
1161 """Return a TarInfo object for member `name'. If `name' can not be
1162 found in the archive, KeyError is raised. If a member occurs more
1163 than once in the archive, its last occurence is assumed to be the
1164 most up-to-date version.
1166 tarinfo = self._getmember(name)
1167 if tarinfo is None:
1168 raise KeyError("filename %r not found" % name)
1169 return tarinfo
1171 def getmembers(self):
1172 """Return the members of the archive as a list of TarInfo objects. The
1173 list has the same order as the members in the archive.
1175 self._check()
1176 if not self._loaded: # if we want to obtain a list of
1177 self._load() # all members, we first have to
1178 # scan the whole archive.
1179 return self.members
1181 def getnames(self):
1182 """Return the members of the archive as a list of their names. It has
1183 the same order as the list returned by getmembers().
1185 return [tarinfo.name for tarinfo in self.getmembers()]
1187 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1188 """Create a TarInfo object for either the file `name' or the file
1189 object `fileobj' (using os.fstat on its file descriptor). You can
1190 modify some of the TarInfo's attributes before you add it using
1191 addfile(). If given, `arcname' specifies an alternative name for the
1192 file in the archive.
1194 self._check("aw")
1196 # When fileobj is given, replace name by
1197 # fileobj's real name.
1198 if fileobj is not None:
1199 name = fileobj.name
1201 # Building the name of the member in the archive.
1202 # Backward slashes are converted to forward slashes,
1203 # Absolute paths are turned to relative paths.
1204 if arcname is None:
1205 arcname = name
1206 arcname = normpath(arcname)
1207 drv, arcname = os.path.splitdrive(arcname)
1208 while arcname[0:1] == "/":
1209 arcname = arcname[1:]
1211 # Now, fill the TarInfo object with
1212 # information specific for the file.
1213 tarinfo = TarInfo()
1215 # Use os.stat or os.lstat, depending on platform
1216 # and if symlinks shall be resolved.
1217 if fileobj is None:
1218 if hasattr(os, "lstat") and not self.dereference:
1219 statres = os.lstat(name)
1220 else:
1221 statres = os.stat(name)
1222 else:
1223 statres = os.fstat(fileobj.fileno())
1224 linkname = ""
1226 stmd = statres.st_mode
1227 if stat.S_ISREG(stmd):
1228 inode = (statres.st_ino, statres.st_dev)
1229 if not self.dereference and \
1230 statres.st_nlink > 1 and inode in self.inodes:
1231 # Is it a hardlink to an already
1232 # archived file?
1233 type = LNKTYPE
1234 linkname = self.inodes[inode]
1235 else:
1236 # The inode is added only if its valid.
1237 # For win32 it is always 0.
1238 type = REGTYPE
1239 if inode[0]:
1240 self.inodes[inode] = arcname
1241 elif stat.S_ISDIR(stmd):
1242 type = DIRTYPE
1243 if arcname[-1:] != "/":
1244 arcname += "/"
1245 elif stat.S_ISFIFO(stmd):
1246 type = FIFOTYPE
1247 elif stat.S_ISLNK(stmd):
1248 type = SYMTYPE
1249 linkname = os.readlink(name)
1250 elif stat.S_ISCHR(stmd):
1251 type = CHRTYPE
1252 elif stat.S_ISBLK(stmd):
1253 type = BLKTYPE
1254 else:
1255 return None
1257 # Fill the TarInfo object with all
1258 # information we can get.
1259 tarinfo.name = arcname
1260 tarinfo.mode = stmd
1261 tarinfo.uid = statres.st_uid
1262 tarinfo.gid = statres.st_gid
1263 if stat.S_ISREG(stmd):
1264 tarinfo.size = statres.st_size
1265 else:
1266 tarinfo.size = 0L
1267 tarinfo.mtime = statres.st_mtime
1268 tarinfo.type = type
1269 tarinfo.linkname = linkname
1270 if pwd:
1271 try:
1272 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1273 except KeyError:
1274 pass
1275 if grp:
1276 try:
1277 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1278 except KeyError:
1279 pass
1281 if type in (CHRTYPE, BLKTYPE):
1282 if hasattr(os, "major") and hasattr(os, "minor"):
1283 tarinfo.devmajor = os.major(statres.st_rdev)
1284 tarinfo.devminor = os.minor(statres.st_rdev)
1285 return tarinfo
1287 def list(self, verbose=True):
1288 """Print a table of contents to sys.stdout. If `verbose' is False, only
1289 the names of the members are printed. If it is True, an `ls -l'-like
1290 output is produced.
1292 self._check()
1294 for tarinfo in self:
1295 if verbose:
1296 print filemode(tarinfo.mode),
1297 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1298 tarinfo.gname or tarinfo.gid),
1299 if tarinfo.ischr() or tarinfo.isblk():
1300 print "%10s" % ("%d,%d" \
1301 % (tarinfo.devmajor, tarinfo.devminor)),
1302 else:
1303 print "%10d" % tarinfo.size,
1304 print "%d-%02d-%02d %02d:%02d:%02d" \
1305 % time.localtime(tarinfo.mtime)[:6],
1307 print tarinfo.name,
1309 if verbose:
1310 if tarinfo.issym():
1311 print "->", tarinfo.linkname,
1312 if tarinfo.islnk():
1313 print "link to", tarinfo.linkname,
1314 print
1316 def add(self, name, arcname=None, recursive=True):
1317 """Add the file `name' to the archive. `name' may be any type of file
1318 (directory, fifo, symbolic link, etc.). If given, `arcname'
1319 specifies an alternative name for the file in the archive.
1320 Directories are added recursively by default. This can be avoided by
1321 setting `recursive' to False.
1323 self._check("aw")
1325 if arcname is None:
1326 arcname = name
1328 # Skip if somebody tries to archive the archive...
1329 if self.name is not None \
1330 and os.path.abspath(name) == os.path.abspath(self.name):
1331 self._dbg(2, "tarfile: Skipped %r" % name)
1332 return
1334 # Special case: The user wants to add the current
1335 # working directory.
1336 if name == ".":
1337 if recursive:
1338 if arcname == ".":
1339 arcname = ""
1340 for f in os.listdir("."):
1341 self.add(f, os.path.join(arcname, f))
1342 return
1344 self._dbg(1, name)
1346 # Create a TarInfo object from the file.
1347 tarinfo = self.gettarinfo(name, arcname)
1349 if tarinfo is None:
1350 self._dbg(1, "tarfile: Unsupported type %r" % name)
1351 return
1353 # Append the tar header and data to the archive.
1354 if tarinfo.isreg():
1355 f = file(name, "rb")
1356 self.addfile(tarinfo, f)
1357 f.close()
1359 elif tarinfo.isdir():
1360 self.addfile(tarinfo)
1361 if recursive:
1362 for f in os.listdir(name):
1363 self.add(os.path.join(name, f), os.path.join(arcname, f))
1365 else:
1366 self.addfile(tarinfo)
1368 def addfile(self, tarinfo, fileobj=None):
1369 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1370 given, tarinfo.size bytes are read from it and added to the archive.
1371 You can create TarInfo objects using gettarinfo().
1372 On Windows platforms, `fileobj' should always be opened with mode
1373 'rb' to avoid irritation about the file size.
1375 self._check("aw")
1377 tarinfo.name = normpath(tarinfo.name)
1378 if tarinfo.isdir():
1379 # directories should end with '/'
1380 tarinfo.name += "/"
1382 if tarinfo.linkname:
1383 tarinfo.linkname = normpath(tarinfo.linkname)
1385 if tarinfo.size > MAXSIZE_MEMBER:
1386 if self.posix:
1387 raise ValueError("file is too large (>= 8 GB)")
1388 else:
1389 self._dbg(2, "tarfile: Created GNU tar largefile header")
1392 if len(tarinfo.linkname) > LENGTH_LINK:
1393 if self.posix:
1394 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
1395 else:
1396 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1397 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1398 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1400 if len(tarinfo.name) > LENGTH_NAME:
1401 if self.posix:
1402 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1403 while prefix and prefix[-1] != "/":
1404 prefix = prefix[:-1]
1406 name = tarinfo.name[len(prefix):]
1407 prefix = prefix[:-1]
1409 if not prefix or len(name) > LENGTH_NAME:
1410 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
1412 tarinfo.name = name
1413 tarinfo.prefix = prefix
1414 else:
1415 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1416 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1417 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1419 self.fileobj.write(tarinfo.tobuf(self.posix))
1420 self.offset += BLOCKSIZE
1422 # If there's data to follow, append it.
1423 if fileobj is not None:
1424 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1425 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1426 if remainder > 0:
1427 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1428 blocks += 1
1429 self.offset += blocks * BLOCKSIZE
1431 self.members.append(tarinfo)
1433 def extractall(self, path=".", members=None):
1434 """Extract all members from the archive to the current working
1435 directory and set owner, modification time and permissions on
1436 directories afterwards. `path' specifies a different directory
1437 to extract to. `members' is optional and must be a subset of the
1438 list returned by getmembers().
1440 directories = []
1442 if members is None:
1443 members = self
1445 for tarinfo in members:
1446 if tarinfo.isdir():
1447 # Extract directory with a safe mode, so that
1448 # all files below can be extracted as well.
1449 try:
1450 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1451 except EnvironmentError:
1452 pass
1453 directories.append(tarinfo)
1454 else:
1455 self.extract(tarinfo, path)
1457 # Reverse sort directories.
1458 directories.sort(lambda a, b: cmp(a.name, b.name))
1459 directories.reverse()
1461 # Set correct owner, mtime and filemode on directories.
1462 for tarinfo in directories:
1463 path = os.path.join(path, tarinfo.name)
1464 try:
1465 self.chown(tarinfo, path)
1466 self.utime(tarinfo, path)
1467 self.chmod(tarinfo, path)
1468 except ExtractError, e:
1469 if self.errorlevel > 1:
1470 raise
1471 else:
1472 self._dbg(1, "tarfile: %s" % e)
1474 def extract(self, member, path=""):
1475 """Extract a member from the archive to the current working directory,
1476 using its full name. Its file information is extracted as accurately
1477 as possible. `member' may be a filename or a TarInfo object. You can
1478 specify a different directory using `path'.
1480 self._check("r")
1482 if isinstance(member, TarInfo):
1483 tarinfo = member
1484 else:
1485 tarinfo = self.getmember(member)
1487 # Prepare the link target for makelink().
1488 if tarinfo.islnk():
1489 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1491 try:
1492 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1493 except EnvironmentError, e:
1494 if self.errorlevel > 0:
1495 raise
1496 else:
1497 if e.filename is None:
1498 self._dbg(1, "tarfile: %s" % e.strerror)
1499 else:
1500 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1501 except ExtractError, e:
1502 if self.errorlevel > 1:
1503 raise
1504 else:
1505 self._dbg(1, "tarfile: %s" % e)
1507 def extractfile(self, member):
1508 """Extract a member from the archive as a file object. `member' may be
1509 a filename or a TarInfo object. If `member' is a regular file, a
1510 file-like object is returned. If `member' is a link, a file-like
1511 object is constructed from the link's target. If `member' is none of
1512 the above, None is returned.
1513 The file-like object is read-only and provides the following
1514 methods: read(), readline(), readlines(), seek() and tell()
1516 self._check("r")
1518 if isinstance(member, TarInfo):
1519 tarinfo = member
1520 else:
1521 tarinfo = self.getmember(member)
1523 if tarinfo.isreg():
1524 return self.fileobject(self, tarinfo)
1526 elif tarinfo.type not in SUPPORTED_TYPES:
1527 # If a member's type is unknown, it is treated as a
1528 # regular file.
1529 return self.fileobject(self, tarinfo)
1531 elif tarinfo.islnk() or tarinfo.issym():
1532 if isinstance(self.fileobj, _Stream):
1533 # A small but ugly workaround for the case that someone tries
1534 # to extract a (sym)link as a file-object from a non-seekable
1535 # stream of tar blocks.
1536 raise StreamError("cannot extract (sym)link as file object")
1537 else:
1538 # A (sym)link's file object is its target's file object.
1539 return self.extractfile(self._getmember(tarinfo.linkname,
1540 tarinfo))
1541 else:
1542 # If there's no data associated with the member (directory, chrdev,
1543 # blkdev, etc.), return None instead of a file object.
1544 return None
1546 def _extract_member(self, tarinfo, targetpath):
1547 """Extract the TarInfo object tarinfo to a physical
1548 file called targetpath.
1550 # Fetch the TarInfo object for the given name
1551 # and build the destination pathname, replacing
1552 # forward slashes to platform specific separators.
1553 if targetpath[-1:] == "/":
1554 targetpath = targetpath[:-1]
1555 targetpath = os.path.normpath(targetpath)
1557 # Create all upper directories.
1558 upperdirs = os.path.dirname(targetpath)
1559 if upperdirs and not os.path.exists(upperdirs):
1560 ti = TarInfo()
1561 ti.name = upperdirs
1562 ti.type = DIRTYPE
1563 ti.mode = 0777
1564 ti.mtime = tarinfo.mtime
1565 ti.uid = tarinfo.uid
1566 ti.gid = tarinfo.gid
1567 ti.uname = tarinfo.uname
1568 ti.gname = tarinfo.gname
1569 try:
1570 self._extract_member(ti, ti.name)
1571 except:
1572 pass
1574 if tarinfo.islnk() or tarinfo.issym():
1575 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1576 else:
1577 self._dbg(1, tarinfo.name)
1579 if tarinfo.isreg():
1580 self.makefile(tarinfo, targetpath)
1581 elif tarinfo.isdir():
1582 self.makedir(tarinfo, targetpath)
1583 elif tarinfo.isfifo():
1584 self.makefifo(tarinfo, targetpath)
1585 elif tarinfo.ischr() or tarinfo.isblk():
1586 self.makedev(tarinfo, targetpath)
1587 elif tarinfo.islnk() or tarinfo.issym():
1588 self.makelink(tarinfo, targetpath)
1589 elif tarinfo.type not in SUPPORTED_TYPES:
1590 self.makeunknown(tarinfo, targetpath)
1591 else:
1592 self.makefile(tarinfo, targetpath)
1594 self.chown(tarinfo, targetpath)
1595 if not tarinfo.issym():
1596 self.chmod(tarinfo, targetpath)
1597 self.utime(tarinfo, targetpath)
1599 #--------------------------------------------------------------------------
1600 # Below are the different file methods. They are called via
1601 # _extract_member() when extract() is called. They can be replaced in a
1602 # subclass to implement other functionality.
1604 def makedir(self, tarinfo, targetpath):
1605 """Make a directory called targetpath.
1607 try:
1608 os.mkdir(targetpath)
1609 except EnvironmentError, e:
1610 if e.errno != errno.EEXIST:
1611 raise
1613 def makefile(self, tarinfo, targetpath):
1614 """Make a file called targetpath.
1616 source = self.extractfile(tarinfo)
1617 target = file(targetpath, "wb")
1618 copyfileobj(source, target)
1619 source.close()
1620 target.close()
1622 def makeunknown(self, tarinfo, targetpath):
1623 """Make a file from a TarInfo object with an unknown type
1624 at targetpath.
1626 self.makefile(tarinfo, targetpath)
1627 self._dbg(1, "tarfile: Unknown file type %r, " \
1628 "extracted as regular file." % tarinfo.type)
1630 def makefifo(self, tarinfo, targetpath):
1631 """Make a fifo called targetpath.
1633 if hasattr(os, "mkfifo"):
1634 os.mkfifo(targetpath)
1635 else:
1636 raise ExtractError("fifo not supported by system")
1638 def makedev(self, tarinfo, targetpath):
1639 """Make a character or block device called targetpath.
1641 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1642 raise ExtractError("special devices not supported by system")
1644 mode = tarinfo.mode
1645 if tarinfo.isblk():
1646 mode |= stat.S_IFBLK
1647 else:
1648 mode |= stat.S_IFCHR
1650 os.mknod(targetpath, mode,
1651 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1653 def makelink(self, tarinfo, targetpath):
1654 """Make a (symbolic) link called targetpath. If it cannot be created
1655 (platform limitation), we try to make a copy of the referenced file
1656 instead of a link.
1658 linkpath = tarinfo.linkname
1659 try:
1660 if tarinfo.issym():
1661 os.symlink(linkpath, targetpath)
1662 else:
1663 # See extract().
1664 os.link(tarinfo._link_target, targetpath)
1665 except AttributeError:
1666 if tarinfo.issym():
1667 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1668 linkpath)
1669 linkpath = normpath(linkpath)
1671 try:
1672 self._extract_member(self.getmember(linkpath), targetpath)
1673 except (EnvironmentError, KeyError), e:
1674 linkpath = os.path.normpath(linkpath)
1675 try:
1676 shutil.copy2(linkpath, targetpath)
1677 except EnvironmentError, e:
1678 raise IOError("link could not be created")
1680 def chown(self, tarinfo, targetpath):
1681 """Set owner of targetpath according to tarinfo.
1683 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1684 # We have to be root to do so.
1685 try:
1686 g = grp.getgrnam(tarinfo.gname)[2]
1687 except KeyError:
1688 try:
1689 g = grp.getgrgid(tarinfo.gid)[2]
1690 except KeyError:
1691 g = os.getgid()
1692 try:
1693 u = pwd.getpwnam(tarinfo.uname)[2]
1694 except KeyError:
1695 try:
1696 u = pwd.getpwuid(tarinfo.uid)[2]
1697 except KeyError:
1698 u = os.getuid()
1699 try:
1700 if tarinfo.issym() and hasattr(os, "lchown"):
1701 os.lchown(targetpath, u, g)
1702 else:
1703 if sys.platform != "os2emx":
1704 os.chown(targetpath, u, g)
1705 except EnvironmentError, e:
1706 raise ExtractError("could not change owner")
1708 def chmod(self, tarinfo, targetpath):
1709 """Set file permissions of targetpath according to tarinfo.
1711 if hasattr(os, 'chmod'):
1712 try:
1713 os.chmod(targetpath, tarinfo.mode)
1714 except EnvironmentError, e:
1715 raise ExtractError("could not change mode")
1717 def utime(self, tarinfo, targetpath):
1718 """Set modification time of targetpath according to tarinfo.
1720 if not hasattr(os, 'utime'):
1721 return
1722 if sys.platform == "win32" and tarinfo.isdir():
1723 # According to msdn.microsoft.com, it is an error (EACCES)
1724 # to use utime() on directories.
1725 return
1726 try:
1727 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1728 except EnvironmentError, e:
1729 raise ExtractError("could not change modification time")
1731 #--------------------------------------------------------------------------
1732 def next(self):
1733 """Return the next member of the archive as a TarInfo object, when
1734 TarFile is opened for reading. Return None if there is no more
1735 available.
1737 self._check("ra")
1738 if self.firstmember is not None:
1739 m = self.firstmember
1740 self.firstmember = None
1741 return m
1743 # Read the next block.
1744 self.fileobj.seek(self.offset)
1745 while True:
1746 buf = self.fileobj.read(BLOCKSIZE)
1747 if not buf:
1748 return None
1750 try:
1751 tarinfo = TarInfo.frombuf(buf)
1753 # We shouldn't rely on this checksum, because some tar programs
1754 # calculate it differently and it is merely validating the
1755 # header block. We could just as well skip this part, which would
1756 # have a slight effect on performance...
1757 if tarinfo.chksum not in calc_chksums(buf):
1758 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1760 # Set the TarInfo object's offset to the current position of the
1761 # TarFile and set self.offset to the position where the data blocks
1762 # should begin.
1763 tarinfo.offset = self.offset
1764 self.offset += BLOCKSIZE
1766 tarinfo = self.proc_member(tarinfo)
1768 except ValueError, e:
1769 if self.ignore_zeros:
1770 self._dbg(2, "0x%X: empty or invalid block: %s" %
1771 (self.offset, e))
1772 self.offset += BLOCKSIZE
1773 continue
1774 else:
1775 if self.offset == 0:
1776 raise ReadError("empty, unreadable or compressed "
1777 "file: %s" % e)
1778 return None
1779 break
1781 # Some old tar programs represent a directory as a regular
1782 # file with a trailing slash.
1783 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1784 tarinfo.type = DIRTYPE
1786 # The prefix field is used for filenames > 100 in
1787 # the POSIX standard.
1788 # name = prefix + '/' + name
1789 tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
1790 tarinfo.name))
1792 # Directory names should have a '/' at the end.
1793 if tarinfo.isdir():
1794 tarinfo.name += "/"
1796 self.members.append(tarinfo)
1797 return tarinfo
1799 #--------------------------------------------------------------------------
1800 # The following are methods that are called depending on the type of a
1801 # member. The entry point is proc_member() which is called with a TarInfo
1802 # object created from the header block from the current offset. The
1803 # proc_member() method can be overridden in a subclass to add custom
1804 # proc_*() methods. A proc_*() method MUST implement the following
1805 # operations:
1806 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1807 # if there is data that follows.
1808 # 2. Set self.offset to the position where the next member's header will
1809 # begin.
1810 # 3. Return tarinfo or another valid TarInfo object.
1811 def proc_member(self, tarinfo):
1812 """Choose the right processing method for tarinfo depending
1813 on its type and call it.
1815 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1816 return self.proc_gnulong(tarinfo)
1817 elif tarinfo.type == GNUTYPE_SPARSE:
1818 return self.proc_sparse(tarinfo)
1819 else:
1820 return self.proc_builtin(tarinfo)
1822 def proc_builtin(self, tarinfo):
1823 """Process a builtin type member or an unknown member
1824 which will be treated as a regular file.
1826 tarinfo.offset_data = self.offset
1827 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1828 # Skip the following data blocks.
1829 self.offset += self._block(tarinfo.size)
1830 return tarinfo
1832 def proc_gnulong(self, tarinfo):
1833 """Process the blocks that hold a GNU longname
1834 or longlink member.
1836 buf = ""
1837 count = tarinfo.size
1838 while count > 0:
1839 block = self.fileobj.read(BLOCKSIZE)
1840 buf += block
1841 self.offset += BLOCKSIZE
1842 count -= BLOCKSIZE
1844 # Fetch the next header and process it.
1845 b = self.fileobj.read(BLOCKSIZE)
1846 t = TarInfo.frombuf(b)
1847 t.offset = self.offset
1848 self.offset += BLOCKSIZE
1849 next = self.proc_member(t)
1851 # Patch the TarInfo object from the next header with
1852 # the longname information.
1853 next.offset = tarinfo.offset
1854 if tarinfo.type == GNUTYPE_LONGNAME:
1855 next.name = buf.rstrip(NUL)
1856 elif tarinfo.type == GNUTYPE_LONGLINK:
1857 next.linkname = buf.rstrip(NUL)
1859 return next
1861 def proc_sparse(self, tarinfo):
1862 """Process a GNU sparse header plus extra headers.
1864 buf = tarinfo.buf
1865 sp = _ringbuffer()
1866 pos = 386
1867 lastpos = 0L
1868 realpos = 0L
1869 # There are 4 possible sparse structs in the
1870 # first header.
1871 for i in xrange(4):
1872 try:
1873 offset = nti(buf[pos:pos + 12])
1874 numbytes = nti(buf[pos + 12:pos + 24])
1875 except ValueError:
1876 break
1877 if offset > lastpos:
1878 sp.append(_hole(lastpos, offset - lastpos))
1879 sp.append(_data(offset, numbytes, realpos))
1880 realpos += numbytes
1881 lastpos = offset + numbytes
1882 pos += 24
1884 isextended = ord(buf[482])
1885 origsize = nti(buf[483:495])
1887 # If the isextended flag is given,
1888 # there are extra headers to process.
1889 while isextended == 1:
1890 buf = self.fileobj.read(BLOCKSIZE)
1891 self.offset += BLOCKSIZE
1892 pos = 0
1893 for i in xrange(21):
1894 try:
1895 offset = nti(buf[pos:pos + 12])
1896 numbytes = nti(buf[pos + 12:pos + 24])
1897 except ValueError:
1898 break
1899 if offset > lastpos:
1900 sp.append(_hole(lastpos, offset - lastpos))
1901 sp.append(_data(offset, numbytes, realpos))
1902 realpos += numbytes
1903 lastpos = offset + numbytes
1904 pos += 24
1905 isextended = ord(buf[504])
1907 if lastpos < origsize:
1908 sp.append(_hole(lastpos, origsize - lastpos))
1910 tarinfo.sparse = sp
1912 tarinfo.offset_data = self.offset
1913 self.offset += self._block(tarinfo.size)
1914 tarinfo.size = origsize
1916 # Clear the prefix field so that it is not used
1917 # as a pathname in next().
1918 tarinfo.prefix = ""
1920 return tarinfo
1922 #--------------------------------------------------------------------------
1923 # Little helper methods:
1925 def _block(self, count):
1926 """Round up a byte count by BLOCKSIZE and return it,
1927 e.g. _block(834) => 1024.
1929 blocks, remainder = divmod(count, BLOCKSIZE)
1930 if remainder:
1931 blocks += 1
1932 return blocks * BLOCKSIZE
1934 def _getmember(self, name, tarinfo=None):
1935 """Find an archive member by name from bottom to top.
1936 If tarinfo is given, it is used as the starting point.
1938 # Ensure that all members have been loaded.
1939 members = self.getmembers()
1941 if tarinfo is None:
1942 end = len(members)
1943 else:
1944 end = members.index(tarinfo)
1946 for i in xrange(end - 1, -1, -1):
1947 if name == members[i].name:
1948 return members[i]
1950 def _load(self):
1951 """Read through the entire archive file and look for readable
1952 members.
1954 while True:
1955 tarinfo = self.next()
1956 if tarinfo is None:
1957 break
1958 self._loaded = True
1960 def _check(self, mode=None):
1961 """Check if TarFile is still open, and if the operation's mode
1962 corresponds to TarFile's mode.
1964 if self.closed:
1965 raise IOError("%s is closed" % self.__class__.__name__)
1966 if mode is not None and self._mode not in mode:
1967 raise IOError("bad operation for mode %r" % self._mode)
1969 def __iter__(self):
1970 """Provide an iterator object.
1972 if self._loaded:
1973 return iter(self.members)
1974 else:
1975 return TarIter(self)
1977 def _create_gnulong(self, name, type):
1978 """Write a GNU longname/longlink member to the TarFile.
1979 It consists of an extended tar header, with the length
1980 of the longname as size, followed by data blocks,
1981 which contain the longname as a null terminated string.
1983 name += NUL
1985 tarinfo = TarInfo()
1986 tarinfo.name = "././@LongLink"
1987 tarinfo.type = type
1988 tarinfo.mode = 0
1989 tarinfo.size = len(name)
1991 # write extended header
1992 self.fileobj.write(tarinfo.tobuf())
1993 self.offset += BLOCKSIZE
1994 # write name blocks
1995 self.fileobj.write(name)
1996 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1997 if remainder > 0:
1998 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1999 blocks += 1
2000 self.offset += blocks * BLOCKSIZE
2002 def _dbg(self, level, msg):
2003 """Write debugging output to sys.stderr.
2005 if level <= self.debug:
2006 print >> sys.stderr, msg
2007 # class TarFile
2009 class TarIter:
2010 """Iterator Class.
2012 for tarinfo in TarFile(...):
2013 suite...
2016 def __init__(self, tarfile):
2017 """Construct a TarIter object.
2019 self.tarfile = tarfile
2020 self.index = 0
2021 def __iter__(self):
2022 """Return iterator object.
2024 return self
2025 def next(self):
2026 """Return the next item using TarFile's next() method.
2027 When all members have been read, set TarFile as _loaded.
2029 # Fix for SF #1100429: Under rare circumstances it can
2030 # happen that getmembers() is called during iteration,
2031 # which will cause TarIter to stop prematurely.
2032 if not self.tarfile._loaded:
2033 tarinfo = self.tarfile.next()
2034 if not tarinfo:
2035 self.tarfile._loaded = True
2036 raise StopIteration
2037 else:
2038 try:
2039 tarinfo = self.tarfile.members[self.index]
2040 except IndexError:
2041 raise StopIteration
2042 self.index += 1
2043 return tarinfo
2045 # Helper classes for sparse file support
2046 class _section:
2047 """Base class for _data and _hole.
2049 def __init__(self, offset, size):
2050 self.offset = offset
2051 self.size = size
2052 def __contains__(self, offset):
2053 return self.offset <= offset < self.offset + self.size
2055 class _data(_section):
2056 """Represent a data section in a sparse file.
2058 def __init__(self, offset, size, realpos):
2059 _section.__init__(self, offset, size)
2060 self.realpos = realpos
2062 class _hole(_section):
2063 """Represent a hole section in a sparse file.
2065 pass
2067 class _ringbuffer(list):
2068 """Ringbuffer class which increases performance
2069 over a regular list.
2071 def __init__(self):
2072 self.idx = 0
2073 def find(self, offset):
2074 idx = self.idx
2075 while True:
2076 item = self[idx]
2077 if offset in item:
2078 break
2079 idx += 1
2080 if idx == len(self):
2081 idx = 0
2082 if idx == self.idx:
2083 # End of File
2084 return None
2085 self.idx = idx
2086 return item
2088 #---------------------------------------------
2089 # zipfile compatible TarFile class
2090 #---------------------------------------------
2091 TAR_PLAIN = 0 # zipfile.ZIP_STORED
2092 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2093 class TarFileCompat:
2094 """TarFile class compatible with standard module zipfile's
2095 ZipFile class.
2097 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2098 if compression == TAR_PLAIN:
2099 self.tarfile = TarFile.taropen(file, mode)
2100 elif compression == TAR_GZIPPED:
2101 self.tarfile = TarFile.gzopen(file, mode)
2102 else:
2103 raise ValueError("unknown compression constant")
2104 if mode[0:1] == "r":
2105 members = self.tarfile.getmembers()
2106 for m in members:
2107 m.filename = m.name
2108 m.file_size = m.size
2109 m.date_time = time.gmtime(m.mtime)[:6]
2110 def namelist(self):
2111 return map(lambda m: m.name, self.infolist())
2112 def infolist(self):
2113 return filter(lambda m: m.type in REGULAR_TYPES,
2114 self.tarfile.getmembers())
2115 def printdir(self):
2116 self.tarfile.list()
2117 def testzip(self):
2118 return
2119 def getinfo(self, name):
2120 return self.tarfile.getmember(name)
2121 def read(self, name):
2122 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2123 def write(self, filename, arcname=None, compress_type=None):
2124 self.tarfile.add(filename, arcname)
2125 def writestr(self, zinfo, bytes):
2126 try:
2127 from cStringIO import StringIO
2128 except ImportError:
2129 from StringIO import StringIO
2130 import calendar
2131 zinfo.name = zinfo.filename
2132 zinfo.size = zinfo.file_size
2133 zinfo.mtime = calendar.timegm(zinfo.date_time)
2134 self.tarfile.addfile(zinfo, StringIO(bytes))
2135 def close(self):
2136 self.tarfile.close()
2137 #class TarFileCompat
2139 #--------------------
2140 # exported functions
2141 #--------------------
2142 def is_tarfile(name):
2143 """Return True if name points to a tar archive that we
2144 are able to handle, else return False.
2146 try:
2147 t = open(name)
2148 t.close()
2149 return True
2150 except TarError:
2151 return False
2153 open = TarFile.open