Typo fix
[python.git] / Lib / tarfile.py
blobc185fbd49eb0bc577c58fa46f60bd2d6f4652843
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
4 # tarfile.py
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7 # All rights reserved.
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
16 # conditions:
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
31 """
33 __version__ = "$Revision$"
34 # $Source$
36 version = "0.8.0"
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
38 __date__ = "$Date$"
39 __cvsid__ = "$Id$"
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
42 #---------
43 # Imports
44 #---------
45 import sys
46 import os
47 import shutil
48 import stat
49 import errno
50 import time
51 import struct
53 if sys.platform == 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
60 try:
61 import grp, pwd
62 except ImportError:
63 grp = pwd = None
65 # from tarfile import *
66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68 #---------------------------------------------------------
69 # tar constants
70 #---------------------------------------------------------
71 NUL = "\0" # the null character
72 BLOCKSIZE = 512 # length of processing blocks
73 RECORDSIZE = BLOCKSIZE * 20 # length of records
74 MAGIC = "ustar" # magic tar string
75 VERSION = "00" # version number
77 LENGTH_NAME = 100 # maximum length of a filename
78 LENGTH_LINK = 100 # maximum length of a linkname
79 LENGTH_PREFIX = 155 # maximum length of the prefix field
80 MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
82 REGTYPE = "0" # regular file
83 AREGTYPE = "\0" # regular file
84 LNKTYPE = "1" # link (inside tarfile)
85 SYMTYPE = "2" # symbolic link
86 CHRTYPE = "3" # character special device
87 BLKTYPE = "4" # block special device
88 DIRTYPE = "5" # directory
89 FIFOTYPE = "6" # fifo special device
90 CONTTYPE = "7" # contiguous file
92 GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
93 GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
94 GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
96 #---------------------------------------------------------
97 # tarfile constants
98 #---------------------------------------------------------
99 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
101 CONTTYPE, CHRTYPE, BLKTYPE,
102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
103 GNUTYPE_SPARSE)
105 REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files
108 #---------------------------------------------------------
109 # Bits used in the mode field, values in octal.
110 #---------------------------------------------------------
111 S_IFLNK = 0120000 # symbolic link
112 S_IFREG = 0100000 # regular file
113 S_IFBLK = 0060000 # block device
114 S_IFDIR = 0040000 # directory
115 S_IFCHR = 0020000 # character device
116 S_IFIFO = 0010000 # fifo
118 TSUID = 04000 # set UID on execution
119 TSGID = 02000 # set GID on execution
120 TSVTX = 01000 # reserved
122 TUREAD = 0400 # read by owner
123 TUWRITE = 0200 # write by owner
124 TUEXEC = 0100 # execute/search by owner
125 TGREAD = 0040 # read by group
126 TGWRITE = 0020 # write by group
127 TGEXEC = 0010 # execute/search by group
128 TOREAD = 0004 # read by other
129 TOWRITE = 0002 # write by other
130 TOEXEC = 0001 # execute/search by other
132 #---------------------------------------------------------
133 # Some useful functions
134 #---------------------------------------------------------
136 def stn(s, length):
137 """Convert a python string to a null-terminated string buffer.
139 return s[:length-1] + (length - len(s) - 1) * NUL + NUL
141 def nti(s):
142 """Convert a number field to a python number.
144 # There are two possible encodings for a number field, see
145 # itn() below.
146 if s[0] != chr(0200):
147 n = int(s.rstrip(NUL) or "0", 8)
148 else:
149 n = 0L
150 for i in xrange(len(s) - 1):
151 n <<= 8
152 n += ord(s[i + 1])
153 return n
155 def itn(n, digits=8, posix=False):
156 """Convert a python number to a number field.
158 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
159 # octal digits followed by a null-byte, this allows values up to
160 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
161 # that if necessary. A leading 0200 byte indicates this particular
162 # encoding, the following digits-1 bytes are a big-endian
163 # representation. This allows values up to (256**(digits-1))-1.
164 if 0 <= n < 8 ** (digits - 1):
165 s = "%0*o" % (digits - 1, n) + NUL
166 else:
167 if posix:
168 raise ValueError("overflow in number field")
170 if n < 0:
171 # XXX We mimic GNU tar's behaviour with negative numbers,
172 # this could raise OverflowError.
173 n = struct.unpack("L", struct.pack("l", n))[0]
175 s = ""
176 for i in xrange(digits - 1):
177 s = chr(n & 0377) + s
178 n >>= 8
179 s = chr(0200) + s
180 return s
182 def calc_chksums(buf):
183 """Calculate the checksum for a member's header by summing up all
184 characters except for the chksum field which is treated as if
185 it was filled with spaces. According to the GNU tar sources,
186 some tars (Sun and NeXT) calculate chksum with signed char,
187 which will be different if there are chars in the buffer with
188 the high bit set. So we calculate two checksums, unsigned and
189 signed.
191 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
192 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
193 return unsigned_chksum, signed_chksum
195 def copyfileobj(src, dst, length=None):
196 """Copy length bytes from fileobj src to fileobj dst.
197 If length is None, copy the entire content.
199 if length == 0:
200 return
201 if length is None:
202 shutil.copyfileobj(src, dst)
203 return
205 BUFSIZE = 16 * 1024
206 blocks, remainder = divmod(length, BUFSIZE)
207 for b in xrange(blocks):
208 buf = src.read(BUFSIZE)
209 if len(buf) < BUFSIZE:
210 raise IOError("end of file reached")
211 dst.write(buf)
213 if remainder != 0:
214 buf = src.read(remainder)
215 if len(buf) < remainder:
216 raise IOError("end of file reached")
217 dst.write(buf)
218 return
220 filemode_table = (
221 ((S_IFLNK, "l"),
222 (S_IFREG, "-"),
223 (S_IFBLK, "b"),
224 (S_IFDIR, "d"),
225 (S_IFCHR, "c"),
226 (S_IFIFO, "p")),
228 ((TUREAD, "r"),),
229 ((TUWRITE, "w"),),
230 ((TUEXEC|TSUID, "s"),
231 (TSUID, "S"),
232 (TUEXEC, "x")),
234 ((TGREAD, "r"),),
235 ((TGWRITE, "w"),),
236 ((TGEXEC|TSGID, "s"),
237 (TSGID, "S"),
238 (TGEXEC, "x")),
240 ((TOREAD, "r"),),
241 ((TOWRITE, "w"),),
242 ((TOEXEC|TSVTX, "t"),
243 (TSVTX, "T"),
244 (TOEXEC, "x"))
247 def filemode(mode):
248 """Convert a file's mode to a string of the form
249 -rwxrwxrwx.
250 Used by TarFile.list()
252 perm = []
253 for table in filemode_table:
254 for bit, char in table:
255 if mode & bit == bit:
256 perm.append(char)
257 break
258 else:
259 perm.append("-")
260 return "".join(perm)
262 if os.sep != "/":
263 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
264 else:
265 normpath = os.path.normpath
267 class TarError(Exception):
268 """Base exception."""
269 pass
270 class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273 class ReadError(TarError):
274 """Exception for unreadble tar archives."""
275 pass
276 class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279 class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
283 #---------------------------
284 # internal stream interface
285 #---------------------------
286 class _LowLevelFile:
287 """Low-level file object. Supports reading and writing.
288 It is used instead of a regular file object for streaming
289 access.
292 def __init__(self, name, mode):
293 mode = {
294 "r": os.O_RDONLY,
295 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
296 }[mode]
297 if hasattr(os, "O_BINARY"):
298 mode |= os.O_BINARY
299 self.fd = os.open(name, mode)
301 def close(self):
302 os.close(self.fd)
304 def read(self, size):
305 return os.read(self.fd, size)
307 def write(self, s):
308 os.write(self.fd, s)
310 class _Stream:
311 """Class that serves as an adapter between TarFile and
312 a stream-like object. The stream-like object only
313 needs to have a read() or write() method and is accessed
314 blockwise. Use of gzip or bzip2 compression is possible.
315 A stream-like object could be for example: sys.stdin,
316 sys.stdout, a socket, a tape device etc.
318 _Stream is intended to be used only internally.
321 def __init__(self, name, mode, comptype, fileobj, bufsize):
322 """Construct a _Stream object.
324 self._extfileobj = True
325 if fileobj is None:
326 fileobj = _LowLevelFile(name, mode)
327 self._extfileobj = False
329 if comptype == '*':
330 # Enable transparent compression detection for the
331 # stream interface
332 fileobj = _StreamProxy(fileobj)
333 comptype = fileobj.getcomptype()
335 self.name = name or ""
336 self.mode = mode
337 self.comptype = comptype
338 self.fileobj = fileobj
339 self.bufsize = bufsize
340 self.buf = ""
341 self.pos = 0L
342 self.closed = False
344 if comptype == "gz":
345 try:
346 import zlib
347 except ImportError:
348 raise CompressionError("zlib module is not available")
349 self.zlib = zlib
350 self.crc = zlib.crc32("")
351 if mode == "r":
352 self._init_read_gz()
353 else:
354 self._init_write_gz()
356 if comptype == "bz2":
357 try:
358 import bz2
359 except ImportError:
360 raise CompressionError("bz2 module is not available")
361 if mode == "r":
362 self.dbuf = ""
363 self.cmp = bz2.BZ2Decompressor()
364 else:
365 self.cmp = bz2.BZ2Compressor()
367 def __del__(self):
368 if hasattr(self, "closed") and not self.closed:
369 self.close()
371 def _init_write_gz(self):
372 """Initialize for writing with gzip compression.
374 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
375 -self.zlib.MAX_WBITS,
376 self.zlib.DEF_MEM_LEVEL,
378 timestamp = struct.pack("<L", long(time.time()))
379 self.__write("\037\213\010\010%s\002\377" % timestamp)
380 if self.name.endswith(".gz"):
381 self.name = self.name[:-3]
382 self.__write(self.name + NUL)
384 def write(self, s):
385 """Write string s to the stream.
387 if self.comptype == "gz":
388 self.crc = self.zlib.crc32(s, self.crc)
389 self.pos += len(s)
390 if self.comptype != "tar":
391 s = self.cmp.compress(s)
392 self.__write(s)
394 def __write(self, s):
395 """Write string s to the stream if a whole new block
396 is ready to be written.
398 self.buf += s
399 while len(self.buf) > self.bufsize:
400 self.fileobj.write(self.buf[:self.bufsize])
401 self.buf = self.buf[self.bufsize:]
403 def close(self):
404 """Close the _Stream object. No operation should be
405 done on it afterwards.
407 if self.closed:
408 return
410 if self.mode == "w" and self.comptype != "tar":
411 self.buf += self.cmp.flush()
413 if self.mode == "w" and self.buf:
414 blocks, remainder = divmod(len(self.buf), self.bufsize)
415 if remainder > 0:
416 self.buf += NUL * (self.bufsize - remainder)
417 self.fileobj.write(self.buf)
418 self.buf = ""
419 if self.comptype == "gz":
420 # The native zlib crc is an unsigned 32-bit integer, but
421 # the Python wrapper implicitly casts that to a signed C
422 # long. So, on a 32-bit box self.crc may "look negative",
423 # while the same crc on a 64-bit box may "look positive".
424 # To avoid irksome warnings from the `struct` module, force
425 # it to look positive on all boxes.
426 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
427 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
429 if not self._extfileobj:
430 self.fileobj.close()
432 self.closed = True
434 def _init_read_gz(self):
435 """Initialize for reading a gzip compressed fileobj.
437 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
438 self.dbuf = ""
440 # taken from gzip.GzipFile with some alterations
441 if self.__read(2) != "\037\213":
442 raise ReadError("not a gzip file")
443 if self.__read(1) != "\010":
444 raise CompressionError("unsupported compression method")
446 flag = ord(self.__read(1))
447 self.__read(6)
449 if flag & 4:
450 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
451 self.read(xlen)
452 if flag & 8:
453 while True:
454 s = self.__read(1)
455 if not s or s == NUL:
456 break
457 if flag & 16:
458 while True:
459 s = self.__read(1)
460 if not s or s == NUL:
461 break
462 if flag & 2:
463 self.__read(2)
465 def tell(self):
466 """Return the stream's file pointer position.
468 return self.pos
470 def seek(self, pos=0):
471 """Set the stream's file pointer to pos. Negative seeking
472 is forbidden.
474 if pos - self.pos >= 0:
475 blocks, remainder = divmod(pos - self.pos, self.bufsize)
476 for i in xrange(blocks):
477 self.read(self.bufsize)
478 self.read(remainder)
479 else:
480 raise StreamError("seeking backwards is not allowed")
481 return self.pos
483 def read(self, size=None):
484 """Return the next size number of bytes from the stream.
485 If size is not defined, return all bytes of the stream
486 up to EOF.
488 if size is None:
489 t = []
490 while True:
491 buf = self._read(self.bufsize)
492 if not buf:
493 break
494 t.append(buf)
495 buf = "".join(t)
496 else:
497 buf = self._read(size)
498 self.pos += len(buf)
499 return buf
501 def _read(self, size):
502 """Return size bytes from the stream.
504 if self.comptype == "tar":
505 return self.__read(size)
507 c = len(self.dbuf)
508 t = [self.dbuf]
509 while c < size:
510 buf = self.__read(self.bufsize)
511 if not buf:
512 break
513 buf = self.cmp.decompress(buf)
514 t.append(buf)
515 c += len(buf)
516 t = "".join(t)
517 self.dbuf = t[size:]
518 return t[:size]
520 def __read(self, size):
521 """Return size bytes from stream. If internal buffer is empty,
522 read another block from the stream.
524 c = len(self.buf)
525 t = [self.buf]
526 while c < size:
527 buf = self.fileobj.read(self.bufsize)
528 if not buf:
529 break
530 t.append(buf)
531 c += len(buf)
532 t = "".join(t)
533 self.buf = t[size:]
534 return t[:size]
535 # class _Stream
537 class _StreamProxy(object):
538 """Small proxy class that enables transparent compression
539 detection for the Stream interface (mode 'r|*').
542 def __init__(self, fileobj):
543 self.fileobj = fileobj
544 self.buf = self.fileobj.read(BLOCKSIZE)
546 def read(self, size):
547 self.read = self.fileobj.read
548 return self.buf
550 def getcomptype(self):
551 if self.buf.startswith("\037\213\010"):
552 return "gz"
553 if self.buf.startswith("BZh91"):
554 return "bz2"
555 return "tar"
557 def close(self):
558 self.fileobj.close()
559 # class StreamProxy
561 class _BZ2Proxy(object):
562 """Small proxy class that enables external file object
563 support for "r:bz2" and "w:bz2" modes. This is actually
564 a workaround for a limitation in bz2 module's BZ2File
565 class which (unlike gzip.GzipFile) has no support for
566 a file object argument.
569 blocksize = 16 * 1024
571 def __init__(self, fileobj, mode):
572 self.fileobj = fileobj
573 self.mode = mode
574 self.init()
576 def init(self):
577 import bz2
578 self.pos = 0
579 if self.mode == "r":
580 self.bz2obj = bz2.BZ2Decompressor()
581 self.fileobj.seek(0)
582 self.buf = ""
583 else:
584 self.bz2obj = bz2.BZ2Compressor()
586 def read(self, size):
587 b = [self.buf]
588 x = len(self.buf)
589 while x < size:
590 try:
591 raw = self.fileobj.read(self.blocksize)
592 data = self.bz2obj.decompress(raw)
593 b.append(data)
594 except EOFError:
595 break
596 x += len(data)
597 self.buf = "".join(b)
599 buf = self.buf[:size]
600 self.buf = self.buf[size:]
601 self.pos += len(buf)
602 return buf
604 def seek(self, pos):
605 if pos < self.pos:
606 self.init()
607 self.read(pos - self.pos)
609 def tell(self):
610 return self.pos
612 def write(self, data):
613 self.pos += len(data)
614 raw = self.bz2obj.compress(data)
615 self.fileobj.write(raw)
617 def close(self):
618 if self.mode == "w":
619 raw = self.bz2obj.flush()
620 self.fileobj.write(raw)
621 self.fileobj.close()
622 # class _BZ2Proxy
624 #------------------------
625 # Extraction file object
626 #------------------------
627 class ExFileObject(object):
628 """File-like object for reading an archive member.
629 Is returned by TarFile.extractfile(). Support for
630 sparse files included.
633 def __init__(self, tarfile, tarinfo):
634 self.fileobj = tarfile.fileobj
635 self.name = tarinfo.name
636 self.mode = "r"
637 self.closed = False
638 self.offset = tarinfo.offset_data
639 self.size = tarinfo.size
640 self.pos = 0L
641 self.linebuffer = ""
642 if tarinfo.issparse():
643 self.sparse = tarinfo.sparse
644 self.read = self._readsparse
645 else:
646 self.read = self._readnormal
648 def __read(self, size):
649 """Overloadable read method.
651 return self.fileobj.read(size)
653 def readline(self, size=-1):
654 """Read a line with approx. size. If size is negative,
655 read a whole line. readline() and read() must not
656 be mixed up (!).
658 if size < 0:
659 size = sys.maxint
661 nl = self.linebuffer.find("\n")
662 if nl >= 0:
663 nl = min(nl, size)
664 else:
665 size -= len(self.linebuffer)
666 while (nl < 0 and size > 0):
667 buf = self.read(min(size, 100))
668 if not buf:
669 break
670 self.linebuffer += buf
671 size -= len(buf)
672 nl = self.linebuffer.find("\n")
673 if nl == -1:
674 s = self.linebuffer
675 self.linebuffer = ""
676 return s
677 buf = self.linebuffer[:nl]
678 self.linebuffer = self.linebuffer[nl + 1:]
679 while buf[-1:] == "\r":
680 buf = buf[:-1]
681 return buf + "\n"
683 def readlines(self):
684 """Return a list with all (following) lines.
686 result = []
687 while True:
688 line = self.readline()
689 if not line: break
690 result.append(line)
691 return result
693 def _readnormal(self, size=None):
694 """Read operation for regular files.
696 if self.closed:
697 raise ValueError("file is closed")
698 self.fileobj.seek(self.offset + self.pos)
699 bytesleft = self.size - self.pos
700 if size is None:
701 bytestoread = bytesleft
702 else:
703 bytestoread = min(size, bytesleft)
704 self.pos += bytestoread
705 return self.__read(bytestoread)
707 def _readsparse(self, size=None):
708 """Read operation for sparse files.
710 if self.closed:
711 raise ValueError("file is closed")
713 if size is None:
714 size = self.size - self.pos
716 data = []
717 while size > 0:
718 buf = self._readsparsesection(size)
719 if not buf:
720 break
721 size -= len(buf)
722 data.append(buf)
723 return "".join(data)
725 def _readsparsesection(self, size):
726 """Read a single section of a sparse file.
728 section = self.sparse.find(self.pos)
730 if section is None:
731 return ""
733 toread = min(size, section.offset + section.size - self.pos)
734 if isinstance(section, _data):
735 realpos = section.realpos + self.pos - section.offset
736 self.pos += toread
737 self.fileobj.seek(self.offset + realpos)
738 return self.__read(toread)
739 else:
740 self.pos += toread
741 return NUL * toread
743 def tell(self):
744 """Return the current file position.
746 return self.pos
748 def seek(self, pos, whence=0):
749 """Seek to a position in the file.
751 self.linebuffer = ""
752 if whence == 0:
753 self.pos = min(max(pos, 0), self.size)
754 if whence == 1:
755 if pos < 0:
756 self.pos = max(self.pos + pos, 0)
757 else:
758 self.pos = min(self.pos + pos, self.size)
759 if whence == 2:
760 self.pos = max(min(self.size + pos, self.size), 0)
762 def close(self):
763 """Close the file object.
765 self.closed = True
767 def __iter__(self):
768 """Get an iterator over the file object.
770 if self.closed:
771 raise ValueError("I/O operation on closed file")
772 return self
774 def next(self):
775 """Get the next item from the file iterator.
777 result = self.readline()
778 if not result:
779 raise StopIteration
780 return result
782 #class ExFileObject
784 #------------------
785 # Exported Classes
786 #------------------
787 class TarInfo(object):
788 """Informational class which holds the details about an
789 archive member given by a tar header block.
790 TarInfo objects are returned by TarFile.getmember(),
791 TarFile.getmembers() and TarFile.gettarinfo() and are
792 usually created internally.
795 def __init__(self, name=""):
796 """Construct a TarInfo object. name is the optional name
797 of the member.
800 self.name = name # member name (dirnames must end with '/')
801 self.mode = 0666 # file permissions
802 self.uid = 0 # user id
803 self.gid = 0 # group id
804 self.size = 0 # file size
805 self.mtime = 0 # modification time
806 self.chksum = 0 # header checksum
807 self.type = REGTYPE # member type
808 self.linkname = "" # link name
809 self.uname = "user" # user name
810 self.gname = "group" # group name
811 self.devmajor = 0 # device major number
812 self.devminor = 0 # device minor number
813 self.prefix = "" # prefix to filename or information
814 # about sparse files
816 self.offset = 0 # the tar header starts here
817 self.offset_data = 0 # the file's data starts here
819 def __repr__(self):
820 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
822 @classmethod
823 def frombuf(cls, buf):
824 """Construct a TarInfo object from a 512 byte string buffer.
826 if len(buf) != BLOCKSIZE:
827 raise ValueError("truncated header")
828 if buf.count(NUL) == BLOCKSIZE:
829 raise ValueError("empty header")
831 tarinfo = cls()
832 tarinfo.buf = buf
833 tarinfo.name = buf[0:100].rstrip(NUL)
834 tarinfo.mode = nti(buf[100:108])
835 tarinfo.uid = nti(buf[108:116])
836 tarinfo.gid = nti(buf[116:124])
837 tarinfo.size = nti(buf[124:136])
838 tarinfo.mtime = nti(buf[136:148])
839 tarinfo.chksum = nti(buf[148:156])
840 tarinfo.type = buf[156:157]
841 tarinfo.linkname = buf[157:257].rstrip(NUL)
842 tarinfo.uname = buf[265:297].rstrip(NUL)
843 tarinfo.gname = buf[297:329].rstrip(NUL)
844 tarinfo.devmajor = nti(buf[329:337])
845 tarinfo.devminor = nti(buf[337:345])
846 tarinfo.prefix = buf[345:500]
848 if tarinfo.chksum not in calc_chksums(buf):
849 raise ValueError("invalid header")
850 return tarinfo
852 def tobuf(self, posix=False):
853 """Return a tar header block as a 512 byte string.
855 parts = [
856 stn(self.name, 100),
857 itn(self.mode & 07777, 8, posix),
858 itn(self.uid, 8, posix),
859 itn(self.gid, 8, posix),
860 itn(self.size, 12, posix),
861 itn(self.mtime, 12, posix),
862 " ", # checksum field
863 self.type,
864 stn(self.linkname, 100),
865 stn(MAGIC, 6),
866 stn(VERSION, 2),
867 stn(self.uname, 32),
868 stn(self.gname, 32),
869 itn(self.devmajor, 8, posix),
870 itn(self.devminor, 8, posix),
871 stn(self.prefix, 155)
874 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
875 chksum = calc_chksums(buf)[0]
876 buf = buf[:148] + "%06o\0" % chksum + buf[155:]
877 self.buf = buf
878 return buf
880 def isreg(self):
881 return self.type in REGULAR_TYPES
882 def isfile(self):
883 return self.isreg()
884 def isdir(self):
885 return self.type == DIRTYPE
886 def issym(self):
887 return self.type == SYMTYPE
888 def islnk(self):
889 return self.type == LNKTYPE
890 def ischr(self):
891 return self.type == CHRTYPE
892 def isblk(self):
893 return self.type == BLKTYPE
894 def isfifo(self):
895 return self.type == FIFOTYPE
896 def issparse(self):
897 return self.type == GNUTYPE_SPARSE
898 def isdev(self):
899 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
900 # class TarInfo
902 class TarFile(object):
903 """The TarFile Class provides an interface to tar archives.
906 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
908 dereference = False # If true, add content of linked file to the
909 # tar file, else the link.
911 ignore_zeros = False # If true, skips empty or invalid blocks and
912 # continues processing.
914 errorlevel = 0 # If 0, fatal errors only appear in debug
915 # messages (if debug >= 0). If > 0, errors
916 # are passed to the caller as exceptions.
918 posix = False # If True, generates POSIX.1-1990-compliant
919 # archives (no GNU extensions!)
921 fileobject = ExFileObject
923 def __init__(self, name=None, mode="r", fileobj=None):
924 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
925 read from an existing archive, 'a' to append data to an existing
926 file or 'w' to create a new file overwriting an existing one. `mode'
927 defaults to 'r'.
928 If `fileobj' is given, it is used for reading or writing data. If it
929 can be determined, `mode' is overridden by `fileobj's mode.
930 `fileobj' is not closed, when TarFile is closed.
932 self.name = name
934 if len(mode) > 1 or mode not in "raw":
935 raise ValueError("mode must be 'r', 'a' or 'w'")
936 self._mode = mode
937 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
939 if not fileobj:
940 fileobj = file(self.name, self.mode)
941 self._extfileobj = False
942 else:
943 if self.name is None and hasattr(fileobj, "name"):
944 self.name = fileobj.name
945 if hasattr(fileobj, "mode"):
946 self.mode = fileobj.mode
947 self._extfileobj = True
948 self.fileobj = fileobj
950 # Init datastructures
951 self.closed = False
952 self.members = [] # list of members as TarInfo objects
953 self._loaded = False # flag if all members have been read
954 self.offset = 0L # current position in the archive file
955 self.inodes = {} # dictionary caching the inodes of
956 # archive members already added
958 if self._mode == "r":
959 self.firstmember = None
960 self.firstmember = self.next()
962 if self._mode == "a":
963 # Move to the end of the archive,
964 # before the first empty block.
965 self.firstmember = None
966 while True:
967 try:
968 tarinfo = self.next()
969 except ReadError:
970 self.fileobj.seek(0)
971 break
972 if tarinfo is None:
973 self.fileobj.seek(- BLOCKSIZE, 1)
974 break
976 if self._mode in "aw":
977 self._loaded = True
979 #--------------------------------------------------------------------------
980 # Below are the classmethods which act as alternate constructors to the
981 # TarFile class. The open() method is the only one that is needed for
982 # public use; it is the "super"-constructor and is able to select an
983 # adequate "sub"-constructor for a particular compression using the mapping
984 # from OPEN_METH.
986 # This concept allows one to subclass TarFile without losing the comfort of
987 # the super-constructor. A sub-constructor is registered and made available
988 # by adding it to the mapping in OPEN_METH.
990 @classmethod
991 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
992 """Open a tar archive for reading, writing or appending. Return
993 an appropriate TarFile class.
995 mode:
996 'r' or 'r:*' open for reading with transparent compression
997 'r:' open for reading exclusively uncompressed
998 'r:gz' open for reading with gzip compression
999 'r:bz2' open for reading with bzip2 compression
1000 'a' or 'a:' open for appending
1001 'w' or 'w:' open for writing without compression
1002 'w:gz' open for writing with gzip compression
1003 'w:bz2' open for writing with bzip2 compression
1005 'r|*' open a stream of tar blocks with transparent compression
1006 'r|' open an uncompressed stream of tar blocks for reading
1007 'r|gz' open a gzip compressed stream of tar blocks
1008 'r|bz2' open a bzip2 compressed stream of tar blocks
1009 'w|' open an uncompressed stream for writing
1010 'w|gz' open a gzip compressed stream for writing
1011 'w|bz2' open a bzip2 compressed stream for writing
1014 if not name and not fileobj:
1015 raise ValueError("nothing to open")
1017 if mode in ("r", "r:*"):
1018 # Find out which *open() is appropriate for opening the file.
1019 for comptype in cls.OPEN_METH:
1020 func = getattr(cls, cls.OPEN_METH[comptype])
1021 try:
1022 return func(name, "r", fileobj)
1023 except (ReadError, CompressionError):
1024 continue
1025 raise ReadError("file could not be opened successfully")
1027 elif ":" in mode:
1028 filemode, comptype = mode.split(":", 1)
1029 filemode = filemode or "r"
1030 comptype = comptype or "tar"
1032 # Select the *open() function according to
1033 # given compression.
1034 if comptype in cls.OPEN_METH:
1035 func = getattr(cls, cls.OPEN_METH[comptype])
1036 else:
1037 raise CompressionError("unknown compression type %r" % comptype)
1038 return func(name, filemode, fileobj)
1040 elif "|" in mode:
1041 filemode, comptype = mode.split("|", 1)
1042 filemode = filemode or "r"
1043 comptype = comptype or "tar"
1045 if filemode not in "rw":
1046 raise ValueError("mode must be 'r' or 'w'")
1048 t = cls(name, filemode,
1049 _Stream(name, filemode, comptype, fileobj, bufsize))
1050 t._extfileobj = False
1051 return t
1053 elif mode in "aw":
1054 return cls.taropen(name, mode, fileobj)
1056 raise ValueError("undiscernible mode")
1058 @classmethod
1059 def taropen(cls, name, mode="r", fileobj=None):
1060 """Open uncompressed tar archive name for reading or writing.
1062 if len(mode) > 1 or mode not in "raw":
1063 raise ValueError("mode must be 'r', 'a' or 'w'")
1064 return cls(name, mode, fileobj)
1066 @classmethod
1067 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
1068 """Open gzip compressed tar archive name for reading or writing.
1069 Appending is not allowed.
1071 if len(mode) > 1 or mode not in "rw":
1072 raise ValueError("mode must be 'r' or 'w'")
1074 try:
1075 import gzip
1076 gzip.GzipFile
1077 except (ImportError, AttributeError):
1078 raise CompressionError("gzip module is not available")
1080 pre, ext = os.path.splitext(name)
1081 pre = os.path.basename(pre)
1082 if ext == ".tgz":
1083 ext = ".tar"
1084 if ext == ".gz":
1085 ext = ""
1086 tarname = pre + ext
1088 if fileobj is None:
1089 fileobj = file(name, mode + "b")
1091 if mode != "r":
1092 name = tarname
1094 try:
1095 t = cls.taropen(tarname, mode,
1096 gzip.GzipFile(name, mode, compresslevel, fileobj)
1098 except IOError:
1099 raise ReadError("not a gzip file")
1100 t._extfileobj = False
1101 return t
1103 @classmethod
1104 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
1105 """Open bzip2 compressed tar archive name for reading or writing.
1106 Appending is not allowed.
1108 if len(mode) > 1 or mode not in "rw":
1109 raise ValueError("mode must be 'r' or 'w'.")
1111 try:
1112 import bz2
1113 except ImportError:
1114 raise CompressionError("bz2 module is not available")
1116 pre, ext = os.path.splitext(name)
1117 pre = os.path.basename(pre)
1118 if ext == ".tbz2":
1119 ext = ".tar"
1120 if ext == ".bz2":
1121 ext = ""
1122 tarname = pre + ext
1124 if fileobj is not None:
1125 fileobj = _BZ2Proxy(fileobj, mode)
1126 else:
1127 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1129 try:
1130 t = cls.taropen(tarname, mode, fileobj)
1131 except IOError:
1132 raise ReadError("not a bzip2 file")
1133 t._extfileobj = False
1134 return t
1136 # All *open() methods are registered here.
1137 OPEN_METH = {
1138 "tar": "taropen", # uncompressed tar
1139 "gz": "gzopen", # gzip compressed tar
1140 "bz2": "bz2open" # bzip2 compressed tar
1143 #--------------------------------------------------------------------------
1144 # The public methods which TarFile provides:
1146 def close(self):
1147 """Close the TarFile. In write-mode, two finishing zero blocks are
1148 appended to the archive.
1150 if self.closed:
1151 return
1153 if self._mode in "aw":
1154 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1155 self.offset += (BLOCKSIZE * 2)
1156 # fill up the end with zero-blocks
1157 # (like option -b20 for tar does)
1158 blocks, remainder = divmod(self.offset, RECORDSIZE)
1159 if remainder > 0:
1160 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1162 if not self._extfileobj:
1163 self.fileobj.close()
1164 self.closed = True
1166 def getmember(self, name):
1167 """Return a TarInfo object for member `name'. If `name' can not be
1168 found in the archive, KeyError is raised. If a member occurs more
1169 than once in the archive, its last occurence is assumed to be the
1170 most up-to-date version.
1172 tarinfo = self._getmember(name)
1173 if tarinfo is None:
1174 raise KeyError("filename %r not found" % name)
1175 return tarinfo
1177 def getmembers(self):
1178 """Return the members of the archive as a list of TarInfo objects. The
1179 list has the same order as the members in the archive.
1181 self._check()
1182 if not self._loaded: # if we want to obtain a list of
1183 self._load() # all members, we first have to
1184 # scan the whole archive.
1185 return self.members
1187 def getnames(self):
1188 """Return the members of the archive as a list of their names. It has
1189 the same order as the list returned by getmembers().
1191 return [tarinfo.name for tarinfo in self.getmembers()]
1193 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1194 """Create a TarInfo object for either the file `name' or the file
1195 object `fileobj' (using os.fstat on its file descriptor). You can
1196 modify some of the TarInfo's attributes before you add it using
1197 addfile(). If given, `arcname' specifies an alternative name for the
1198 file in the archive.
1200 self._check("aw")
1202 # When fileobj is given, replace name by
1203 # fileobj's real name.
1204 if fileobj is not None:
1205 name = fileobj.name
1207 # Building the name of the member in the archive.
1208 # Backward slashes are converted to forward slashes,
1209 # Absolute paths are turned to relative paths.
1210 if arcname is None:
1211 arcname = name
1212 arcname = normpath(arcname)
1213 drv, arcname = os.path.splitdrive(arcname)
1214 while arcname[0:1] == "/":
1215 arcname = arcname[1:]
1217 # Now, fill the TarInfo object with
1218 # information specific for the file.
1219 tarinfo = TarInfo()
1221 # Use os.stat or os.lstat, depending on platform
1222 # and if symlinks shall be resolved.
1223 if fileobj is None:
1224 if hasattr(os, "lstat") and not self.dereference:
1225 statres = os.lstat(name)
1226 else:
1227 statres = os.stat(name)
1228 else:
1229 statres = os.fstat(fileobj.fileno())
1230 linkname = ""
1232 stmd = statres.st_mode
1233 if stat.S_ISREG(stmd):
1234 inode = (statres.st_ino, statres.st_dev)
1235 if not self.dereference and \
1236 statres.st_nlink > 1 and inode in self.inodes:
1237 # Is it a hardlink to an already
1238 # archived file?
1239 type = LNKTYPE
1240 linkname = self.inodes[inode]
1241 else:
1242 # The inode is added only if its valid.
1243 # For win32 it is always 0.
1244 type = REGTYPE
1245 if inode[0]:
1246 self.inodes[inode] = arcname
1247 elif stat.S_ISDIR(stmd):
1248 type = DIRTYPE
1249 if arcname[-1:] != "/":
1250 arcname += "/"
1251 elif stat.S_ISFIFO(stmd):
1252 type = FIFOTYPE
1253 elif stat.S_ISLNK(stmd):
1254 type = SYMTYPE
1255 linkname = os.readlink(name)
1256 elif stat.S_ISCHR(stmd):
1257 type = CHRTYPE
1258 elif stat.S_ISBLK(stmd):
1259 type = BLKTYPE
1260 else:
1261 return None
1263 # Fill the TarInfo object with all
1264 # information we can get.
1265 tarinfo.name = arcname
1266 tarinfo.mode = stmd
1267 tarinfo.uid = statres.st_uid
1268 tarinfo.gid = statres.st_gid
1269 if stat.S_ISREG(stmd):
1270 tarinfo.size = statres.st_size
1271 else:
1272 tarinfo.size = 0L
1273 tarinfo.mtime = statres.st_mtime
1274 tarinfo.type = type
1275 tarinfo.linkname = linkname
1276 if pwd:
1277 try:
1278 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1279 except KeyError:
1280 pass
1281 if grp:
1282 try:
1283 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1284 except KeyError:
1285 pass
1287 if type in (CHRTYPE, BLKTYPE):
1288 if hasattr(os, "major") and hasattr(os, "minor"):
1289 tarinfo.devmajor = os.major(statres.st_rdev)
1290 tarinfo.devminor = os.minor(statres.st_rdev)
1291 return tarinfo
1293 def list(self, verbose=True):
1294 """Print a table of contents to sys.stdout. If `verbose' is False, only
1295 the names of the members are printed. If it is True, an `ls -l'-like
1296 output is produced.
1298 self._check()
1300 for tarinfo in self:
1301 if verbose:
1302 print filemode(tarinfo.mode),
1303 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1304 tarinfo.gname or tarinfo.gid),
1305 if tarinfo.ischr() or tarinfo.isblk():
1306 print "%10s" % ("%d,%d" \
1307 % (tarinfo.devmajor, tarinfo.devminor)),
1308 else:
1309 print "%10d" % tarinfo.size,
1310 print "%d-%02d-%02d %02d:%02d:%02d" \
1311 % time.localtime(tarinfo.mtime)[:6],
1313 print tarinfo.name,
1315 if verbose:
1316 if tarinfo.issym():
1317 print "->", tarinfo.linkname,
1318 if tarinfo.islnk():
1319 print "link to", tarinfo.linkname,
1320 print
1322 def add(self, name, arcname=None, recursive=True):
1323 """Add the file `name' to the archive. `name' may be any type of file
1324 (directory, fifo, symbolic link, etc.). If given, `arcname'
1325 specifies an alternative name for the file in the archive.
1326 Directories are added recursively by default. This can be avoided by
1327 setting `recursive' to False.
1329 self._check("aw")
1331 if arcname is None:
1332 arcname = name
1334 # Skip if somebody tries to archive the archive...
1335 if self.name is not None \
1336 and os.path.abspath(name) == os.path.abspath(self.name):
1337 self._dbg(2, "tarfile: Skipped %r" % name)
1338 return
1340 # Special case: The user wants to add the current
1341 # working directory.
1342 if name == ".":
1343 if recursive:
1344 if arcname == ".":
1345 arcname = ""
1346 for f in os.listdir("."):
1347 self.add(f, os.path.join(arcname, f))
1348 return
1350 self._dbg(1, name)
1352 # Create a TarInfo object from the file.
1353 tarinfo = self.gettarinfo(name, arcname)
1355 if tarinfo is None:
1356 self._dbg(1, "tarfile: Unsupported type %r" % name)
1357 return
1359 # Append the tar header and data to the archive.
1360 if tarinfo.isreg():
1361 f = file(name, "rb")
1362 self.addfile(tarinfo, f)
1363 f.close()
1365 elif tarinfo.isdir():
1366 self.addfile(tarinfo)
1367 if recursive:
1368 for f in os.listdir(name):
1369 self.add(os.path.join(name, f), os.path.join(arcname, f))
1371 else:
1372 self.addfile(tarinfo)
1374 def addfile(self, tarinfo, fileobj=None):
1375 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1376 given, tarinfo.size bytes are read from it and added to the archive.
1377 You can create TarInfo objects using gettarinfo().
1378 On Windows platforms, `fileobj' should always be opened with mode
1379 'rb' to avoid irritation about the file size.
1381 self._check("aw")
1383 tarinfo.name = normpath(tarinfo.name)
1384 if tarinfo.isdir():
1385 # directories should end with '/'
1386 tarinfo.name += "/"
1388 if tarinfo.linkname:
1389 tarinfo.linkname = normpath(tarinfo.linkname)
1391 if tarinfo.size > MAXSIZE_MEMBER:
1392 if self.posix:
1393 raise ValueError("file is too large (>= 8 GB)")
1394 else:
1395 self._dbg(2, "tarfile: Created GNU tar largefile header")
1398 if len(tarinfo.linkname) > LENGTH_LINK:
1399 if self.posix:
1400 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
1401 else:
1402 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1403 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1404 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1406 if len(tarinfo.name) > LENGTH_NAME:
1407 if self.posix:
1408 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1409 while prefix and prefix[-1] != "/":
1410 prefix = prefix[:-1]
1412 name = tarinfo.name[len(prefix):]
1413 prefix = prefix[:-1]
1415 if not prefix or len(name) > LENGTH_NAME:
1416 raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
1418 tarinfo.name = name
1419 tarinfo.prefix = prefix
1420 else:
1421 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1422 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1423 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1425 self.fileobj.write(tarinfo.tobuf(self.posix))
1426 self.offset += BLOCKSIZE
1428 # If there's data to follow, append it.
1429 if fileobj is not None:
1430 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1431 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1432 if remainder > 0:
1433 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1434 blocks += 1
1435 self.offset += blocks * BLOCKSIZE
1437 self.members.append(tarinfo)
1439 def extractall(self, path=".", members=None):
1440 """Extract all members from the archive to the current working
1441 directory and set owner, modification time and permissions on
1442 directories afterwards. `path' specifies a different directory
1443 to extract to. `members' is optional and must be a subset of the
1444 list returned by getmembers().
1446 directories = []
1448 if members is None:
1449 members = self
1451 for tarinfo in members:
1452 if tarinfo.isdir():
1453 # Extract directory with a safe mode, so that
1454 # all files below can be extracted as well.
1455 try:
1456 os.makedirs(os.path.join(path, tarinfo.name), 0777)
1457 except EnvironmentError:
1458 pass
1459 directories.append(tarinfo)
1460 else:
1461 self.extract(tarinfo, path)
1463 # Reverse sort directories.
1464 directories.sort(lambda a, b: cmp(a.name, b.name))
1465 directories.reverse()
1467 # Set correct owner, mtime and filemode on directories.
1468 for tarinfo in directories:
1469 path = os.path.join(path, tarinfo.name)
1470 try:
1471 self.chown(tarinfo, path)
1472 self.utime(tarinfo, path)
1473 self.chmod(tarinfo, path)
1474 except ExtractError, e:
1475 if self.errorlevel > 1:
1476 raise
1477 else:
1478 self._dbg(1, "tarfile: %s" % e)
1480 def extract(self, member, path=""):
1481 """Extract a member from the archive to the current working directory,
1482 using its full name. Its file information is extracted as accurately
1483 as possible. `member' may be a filename or a TarInfo object. You can
1484 specify a different directory using `path'.
1486 self._check("r")
1488 if isinstance(member, TarInfo):
1489 tarinfo = member
1490 else:
1491 tarinfo = self.getmember(member)
1493 # Prepare the link target for makelink().
1494 if tarinfo.islnk():
1495 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1497 try:
1498 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1499 except EnvironmentError, e:
1500 if self.errorlevel > 0:
1501 raise
1502 else:
1503 if e.filename is None:
1504 self._dbg(1, "tarfile: %s" % e.strerror)
1505 else:
1506 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1507 except ExtractError, e:
1508 if self.errorlevel > 1:
1509 raise
1510 else:
1511 self._dbg(1, "tarfile: %s" % e)
1513 def extractfile(self, member):
1514 """Extract a member from the archive as a file object. `member' may be
1515 a filename or a TarInfo object. If `member' is a regular file, a
1516 file-like object is returned. If `member' is a link, a file-like
1517 object is constructed from the link's target. If `member' is none of
1518 the above, None is returned.
1519 The file-like object is read-only and provides the following
1520 methods: read(), readline(), readlines(), seek() and tell()
1522 self._check("r")
1524 if isinstance(member, TarInfo):
1525 tarinfo = member
1526 else:
1527 tarinfo = self.getmember(member)
1529 if tarinfo.isreg():
1530 return self.fileobject(self, tarinfo)
1532 elif tarinfo.type not in SUPPORTED_TYPES:
1533 # If a member's type is unknown, it is treated as a
1534 # regular file.
1535 return self.fileobject(self, tarinfo)
1537 elif tarinfo.islnk() or tarinfo.issym():
1538 if isinstance(self.fileobj, _Stream):
1539 # A small but ugly workaround for the case that someone tries
1540 # to extract a (sym)link as a file-object from a non-seekable
1541 # stream of tar blocks.
1542 raise StreamError("cannot extract (sym)link as file object")
1543 else:
1544 # A (sym)link's file object is its target's file object.
1545 return self.extractfile(self._getmember(tarinfo.linkname,
1546 tarinfo))
1547 else:
1548 # If there's no data associated with the member (directory, chrdev,
1549 # blkdev, etc.), return None instead of a file object.
1550 return None
1552 def _extract_member(self, tarinfo, targetpath):
1553 """Extract the TarInfo object tarinfo to a physical
1554 file called targetpath.
1556 # Fetch the TarInfo object for the given name
1557 # and build the destination pathname, replacing
1558 # forward slashes to platform specific separators.
1559 if targetpath[-1:] == "/":
1560 targetpath = targetpath[:-1]
1561 targetpath = os.path.normpath(targetpath)
1563 # Create all upper directories.
1564 upperdirs = os.path.dirname(targetpath)
1565 if upperdirs and not os.path.exists(upperdirs):
1566 ti = TarInfo()
1567 ti.name = upperdirs
1568 ti.type = DIRTYPE
1569 ti.mode = 0777
1570 ti.mtime = tarinfo.mtime
1571 ti.uid = tarinfo.uid
1572 ti.gid = tarinfo.gid
1573 ti.uname = tarinfo.uname
1574 ti.gname = tarinfo.gname
1575 try:
1576 self._extract_member(ti, ti.name)
1577 except:
1578 pass
1580 if tarinfo.islnk() or tarinfo.issym():
1581 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1582 else:
1583 self._dbg(1, tarinfo.name)
1585 if tarinfo.isreg():
1586 self.makefile(tarinfo, targetpath)
1587 elif tarinfo.isdir():
1588 self.makedir(tarinfo, targetpath)
1589 elif tarinfo.isfifo():
1590 self.makefifo(tarinfo, targetpath)
1591 elif tarinfo.ischr() or tarinfo.isblk():
1592 self.makedev(tarinfo, targetpath)
1593 elif tarinfo.islnk() or tarinfo.issym():
1594 self.makelink(tarinfo, targetpath)
1595 elif tarinfo.type not in SUPPORTED_TYPES:
1596 self.makeunknown(tarinfo, targetpath)
1597 else:
1598 self.makefile(tarinfo, targetpath)
1600 self.chown(tarinfo, targetpath)
1601 if not tarinfo.issym():
1602 self.chmod(tarinfo, targetpath)
1603 self.utime(tarinfo, targetpath)
1605 #--------------------------------------------------------------------------
1606 # Below are the different file methods. They are called via
1607 # _extract_member() when extract() is called. They can be replaced in a
1608 # subclass to implement other functionality.
1610 def makedir(self, tarinfo, targetpath):
1611 """Make a directory called targetpath.
1613 try:
1614 os.mkdir(targetpath)
1615 except EnvironmentError, e:
1616 if e.errno != errno.EEXIST:
1617 raise
1619 def makefile(self, tarinfo, targetpath):
1620 """Make a file called targetpath.
1622 source = self.extractfile(tarinfo)
1623 target = file(targetpath, "wb")
1624 copyfileobj(source, target)
1625 source.close()
1626 target.close()
1628 def makeunknown(self, tarinfo, targetpath):
1629 """Make a file from a TarInfo object with an unknown type
1630 at targetpath.
1632 self.makefile(tarinfo, targetpath)
1633 self._dbg(1, "tarfile: Unknown file type %r, " \
1634 "extracted as regular file." % tarinfo.type)
1636 def makefifo(self, tarinfo, targetpath):
1637 """Make a fifo called targetpath.
1639 if hasattr(os, "mkfifo"):
1640 os.mkfifo(targetpath)
1641 else:
1642 raise ExtractError("fifo not supported by system")
1644 def makedev(self, tarinfo, targetpath):
1645 """Make a character or block device called targetpath.
1647 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1648 raise ExtractError("special devices not supported by system")
1650 mode = tarinfo.mode
1651 if tarinfo.isblk():
1652 mode |= stat.S_IFBLK
1653 else:
1654 mode |= stat.S_IFCHR
1656 os.mknod(targetpath, mode,
1657 os.makedev(tarinfo.devmajor, tarinfo.devminor))
1659 def makelink(self, tarinfo, targetpath):
1660 """Make a (symbolic) link called targetpath. If it cannot be created
1661 (platform limitation), we try to make a copy of the referenced file
1662 instead of a link.
1664 linkpath = tarinfo.linkname
1665 try:
1666 if tarinfo.issym():
1667 os.symlink(linkpath, targetpath)
1668 else:
1669 # See extract().
1670 os.link(tarinfo._link_target, targetpath)
1671 except AttributeError:
1672 if tarinfo.issym():
1673 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1674 linkpath)
1675 linkpath = normpath(linkpath)
1677 try:
1678 self._extract_member(self.getmember(linkpath), targetpath)
1679 except (EnvironmentError, KeyError), e:
1680 linkpath = os.path.normpath(linkpath)
1681 try:
1682 shutil.copy2(linkpath, targetpath)
1683 except EnvironmentError, e:
1684 raise IOError("link could not be created")
1686 def chown(self, tarinfo, targetpath):
1687 """Set owner of targetpath according to tarinfo.
1689 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1690 # We have to be root to do so.
1691 try:
1692 g = grp.getgrnam(tarinfo.gname)[2]
1693 except KeyError:
1694 try:
1695 g = grp.getgrgid(tarinfo.gid)[2]
1696 except KeyError:
1697 g = os.getgid()
1698 try:
1699 u = pwd.getpwnam(tarinfo.uname)[2]
1700 except KeyError:
1701 try:
1702 u = pwd.getpwuid(tarinfo.uid)[2]
1703 except KeyError:
1704 u = os.getuid()
1705 try:
1706 if tarinfo.issym() and hasattr(os, "lchown"):
1707 os.lchown(targetpath, u, g)
1708 else:
1709 if sys.platform != "os2emx":
1710 os.chown(targetpath, u, g)
1711 except EnvironmentError, e:
1712 raise ExtractError("could not change owner")
1714 def chmod(self, tarinfo, targetpath):
1715 """Set file permissions of targetpath according to tarinfo.
1717 if hasattr(os, 'chmod'):
1718 try:
1719 os.chmod(targetpath, tarinfo.mode)
1720 except EnvironmentError, e:
1721 raise ExtractError("could not change mode")
1723 def utime(self, tarinfo, targetpath):
1724 """Set modification time of targetpath according to tarinfo.
1726 if not hasattr(os, 'utime'):
1727 return
1728 if sys.platform == "win32" and tarinfo.isdir():
1729 # According to msdn.microsoft.com, it is an error (EACCES)
1730 # to use utime() on directories.
1731 return
1732 try:
1733 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1734 except EnvironmentError, e:
1735 raise ExtractError("could not change modification time")
1737 #--------------------------------------------------------------------------
1738 def next(self):
1739 """Return the next member of the archive as a TarInfo object, when
1740 TarFile is opened for reading. Return None if there is no more
1741 available.
1743 self._check("ra")
1744 if self.firstmember is not None:
1745 m = self.firstmember
1746 self.firstmember = None
1747 return m
1749 # Read the next block.
1750 self.fileobj.seek(self.offset)
1751 while True:
1752 buf = self.fileobj.read(BLOCKSIZE)
1753 if not buf:
1754 return None
1756 try:
1757 tarinfo = TarInfo.frombuf(buf)
1759 # Set the TarInfo object's offset to the current position of the
1760 # TarFile and set self.offset to the position where the data blocks
1761 # should begin.
1762 tarinfo.offset = self.offset
1763 self.offset += BLOCKSIZE
1765 tarinfo = self.proc_member(tarinfo)
1767 except ValueError, e:
1768 if self.ignore_zeros:
1769 self._dbg(2, "0x%X: empty or invalid block: %s" %
1770 (self.offset, e))
1771 self.offset += BLOCKSIZE
1772 continue
1773 else:
1774 if self.offset == 0:
1775 raise ReadError("empty, unreadable or compressed "
1776 "file: %s" % e)
1777 return None
1778 break
1780 # Some old tar programs represent a directory as a regular
1781 # file with a trailing slash.
1782 if tarinfo.isreg() and tarinfo.name.endswith("/"):
1783 tarinfo.type = DIRTYPE
1785 # The prefix field is used for filenames > 100 in
1786 # the POSIX standard.
1787 # name = prefix + '/' + name
1788 tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
1789 tarinfo.name))
1791 # Directory names should have a '/' at the end.
1792 if tarinfo.isdir():
1793 tarinfo.name += "/"
1795 self.members.append(tarinfo)
1796 return tarinfo
1798 #--------------------------------------------------------------------------
1799 # The following are methods that are called depending on the type of a
1800 # member. The entry point is proc_member() which is called with a TarInfo
1801 # object created from the header block from the current offset. The
1802 # proc_member() method can be overridden in a subclass to add custom
1803 # proc_*() methods. A proc_*() method MUST implement the following
1804 # operations:
1805 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1806 # if there is data that follows.
1807 # 2. Set self.offset to the position where the next member's header will
1808 # begin.
1809 # 3. Return tarinfo or another valid TarInfo object.
1810 def proc_member(self, tarinfo):
1811 """Choose the right processing method for tarinfo depending
1812 on its type and call it.
1814 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1815 return self.proc_gnulong(tarinfo)
1816 elif tarinfo.type == GNUTYPE_SPARSE:
1817 return self.proc_sparse(tarinfo)
1818 else:
1819 return self.proc_builtin(tarinfo)
1821 def proc_builtin(self, tarinfo):
1822 """Process a builtin type member or an unknown member
1823 which will be treated as a regular file.
1825 tarinfo.offset_data = self.offset
1826 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1827 # Skip the following data blocks.
1828 self.offset += self._block(tarinfo.size)
1829 return tarinfo
1831 def proc_gnulong(self, tarinfo):
1832 """Process the blocks that hold a GNU longname
1833 or longlink member.
1835 buf = ""
1836 count = tarinfo.size
1837 while count > 0:
1838 block = self.fileobj.read(BLOCKSIZE)
1839 buf += block
1840 self.offset += BLOCKSIZE
1841 count -= BLOCKSIZE
1843 # Fetch the next header and process it.
1844 b = self.fileobj.read(BLOCKSIZE)
1845 t = TarInfo.frombuf(b)
1846 t.offset = self.offset
1847 self.offset += BLOCKSIZE
1848 next = self.proc_member(t)
1850 # Patch the TarInfo object from the next header with
1851 # the longname information.
1852 next.offset = tarinfo.offset
1853 if tarinfo.type == GNUTYPE_LONGNAME:
1854 next.name = buf.rstrip(NUL)
1855 elif tarinfo.type == GNUTYPE_LONGLINK:
1856 next.linkname = buf.rstrip(NUL)
1858 return next
1860 def proc_sparse(self, tarinfo):
1861 """Process a GNU sparse header plus extra headers.
1863 buf = tarinfo.buf
1864 sp = _ringbuffer()
1865 pos = 386
1866 lastpos = 0L
1867 realpos = 0L
1868 # There are 4 possible sparse structs in the
1869 # first header.
1870 for i in xrange(4):
1871 try:
1872 offset = nti(buf[pos:pos + 12])
1873 numbytes = nti(buf[pos + 12:pos + 24])
1874 except ValueError:
1875 break
1876 if offset > lastpos:
1877 sp.append(_hole(lastpos, offset - lastpos))
1878 sp.append(_data(offset, numbytes, realpos))
1879 realpos += numbytes
1880 lastpos = offset + numbytes
1881 pos += 24
1883 isextended = ord(buf[482])
1884 origsize = nti(buf[483:495])
1886 # If the isextended flag is given,
1887 # there are extra headers to process.
1888 while isextended == 1:
1889 buf = self.fileobj.read(BLOCKSIZE)
1890 self.offset += BLOCKSIZE
1891 pos = 0
1892 for i in xrange(21):
1893 try:
1894 offset = nti(buf[pos:pos + 12])
1895 numbytes = nti(buf[pos + 12:pos + 24])
1896 except ValueError:
1897 break
1898 if offset > lastpos:
1899 sp.append(_hole(lastpos, offset - lastpos))
1900 sp.append(_data(offset, numbytes, realpos))
1901 realpos += numbytes
1902 lastpos = offset + numbytes
1903 pos += 24
1904 isextended = ord(buf[504])
1906 if lastpos < origsize:
1907 sp.append(_hole(lastpos, origsize - lastpos))
1909 tarinfo.sparse = sp
1911 tarinfo.offset_data = self.offset
1912 self.offset += self._block(tarinfo.size)
1913 tarinfo.size = origsize
1915 # Clear the prefix field so that it is not used
1916 # as a pathname in next().
1917 tarinfo.prefix = ""
1919 return tarinfo
1921 #--------------------------------------------------------------------------
1922 # Little helper methods:
1924 def _block(self, count):
1925 """Round up a byte count by BLOCKSIZE and return it,
1926 e.g. _block(834) => 1024.
1928 blocks, remainder = divmod(count, BLOCKSIZE)
1929 if remainder:
1930 blocks += 1
1931 return blocks * BLOCKSIZE
1933 def _getmember(self, name, tarinfo=None):
1934 """Find an archive member by name from bottom to top.
1935 If tarinfo is given, it is used as the starting point.
1937 # Ensure that all members have been loaded.
1938 members = self.getmembers()
1940 if tarinfo is None:
1941 end = len(members)
1942 else:
1943 end = members.index(tarinfo)
1945 for i in xrange(end - 1, -1, -1):
1946 if name == members[i].name:
1947 return members[i]
1949 def _load(self):
1950 """Read through the entire archive file and look for readable
1951 members.
1953 while True:
1954 tarinfo = self.next()
1955 if tarinfo is None:
1956 break
1957 self._loaded = True
1959 def _check(self, mode=None):
1960 """Check if TarFile is still open, and if the operation's mode
1961 corresponds to TarFile's mode.
1963 if self.closed:
1964 raise IOError("%s is closed" % self.__class__.__name__)
1965 if mode is not None and self._mode not in mode:
1966 raise IOError("bad operation for mode %r" % self._mode)
1968 def __iter__(self):
1969 """Provide an iterator object.
1971 if self._loaded:
1972 return iter(self.members)
1973 else:
1974 return TarIter(self)
1976 def _create_gnulong(self, name, type):
1977 """Write a GNU longname/longlink member to the TarFile.
1978 It consists of an extended tar header, with the length
1979 of the longname as size, followed by data blocks,
1980 which contain the longname as a null terminated string.
1982 name += NUL
1984 tarinfo = TarInfo()
1985 tarinfo.name = "././@LongLink"
1986 tarinfo.type = type
1987 tarinfo.mode = 0
1988 tarinfo.size = len(name)
1990 # write extended header
1991 self.fileobj.write(tarinfo.tobuf())
1992 self.offset += BLOCKSIZE
1993 # write name blocks
1994 self.fileobj.write(name)
1995 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1996 if remainder > 0:
1997 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1998 blocks += 1
1999 self.offset += blocks * BLOCKSIZE
2001 def _dbg(self, level, msg):
2002 """Write debugging output to sys.stderr.
2004 if level <= self.debug:
2005 print >> sys.stderr, msg
2006 # class TarFile
2008 class TarIter:
2009 """Iterator Class.
2011 for tarinfo in TarFile(...):
2012 suite...
2015 def __init__(self, tarfile):
2016 """Construct a TarIter object.
2018 self.tarfile = tarfile
2019 self.index = 0
2020 def __iter__(self):
2021 """Return iterator object.
2023 return self
2024 def next(self):
2025 """Return the next item using TarFile's next() method.
2026 When all members have been read, set TarFile as _loaded.
2028 # Fix for SF #1100429: Under rare circumstances it can
2029 # happen that getmembers() is called during iteration,
2030 # which will cause TarIter to stop prematurely.
2031 if not self.tarfile._loaded:
2032 tarinfo = self.tarfile.next()
2033 if not tarinfo:
2034 self.tarfile._loaded = True
2035 raise StopIteration
2036 else:
2037 try:
2038 tarinfo = self.tarfile.members[self.index]
2039 except IndexError:
2040 raise StopIteration
2041 self.index += 1
2042 return tarinfo
2044 # Helper classes for sparse file support
2045 class _section:
2046 """Base class for _data and _hole.
2048 def __init__(self, offset, size):
2049 self.offset = offset
2050 self.size = size
2051 def __contains__(self, offset):
2052 return self.offset <= offset < self.offset + self.size
2054 class _data(_section):
2055 """Represent a data section in a sparse file.
2057 def __init__(self, offset, size, realpos):
2058 _section.__init__(self, offset, size)
2059 self.realpos = realpos
2061 class _hole(_section):
2062 """Represent a hole section in a sparse file.
2064 pass
2066 class _ringbuffer(list):
2067 """Ringbuffer class which increases performance
2068 over a regular list.
2070 def __init__(self):
2071 self.idx = 0
2072 def find(self, offset):
2073 idx = self.idx
2074 while True:
2075 item = self[idx]
2076 if offset in item:
2077 break
2078 idx += 1
2079 if idx == len(self):
2080 idx = 0
2081 if idx == self.idx:
2082 # End of File
2083 return None
2084 self.idx = idx
2085 return item
2087 #---------------------------------------------
2088 # zipfile compatible TarFile class
2089 #---------------------------------------------
2090 TAR_PLAIN = 0 # zipfile.ZIP_STORED
2091 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2092 class TarFileCompat:
2093 """TarFile class compatible with standard module zipfile's
2094 ZipFile class.
2096 def __init__(self, file, mode="r", compression=TAR_PLAIN):
2097 if compression == TAR_PLAIN:
2098 self.tarfile = TarFile.taropen(file, mode)
2099 elif compression == TAR_GZIPPED:
2100 self.tarfile = TarFile.gzopen(file, mode)
2101 else:
2102 raise ValueError("unknown compression constant")
2103 if mode[0:1] == "r":
2104 members = self.tarfile.getmembers()
2105 for m in members:
2106 m.filename = m.name
2107 m.file_size = m.size
2108 m.date_time = time.gmtime(m.mtime)[:6]
2109 def namelist(self):
2110 return map(lambda m: m.name, self.infolist())
2111 def infolist(self):
2112 return filter(lambda m: m.type in REGULAR_TYPES,
2113 self.tarfile.getmembers())
2114 def printdir(self):
2115 self.tarfile.list()
2116 def testzip(self):
2117 return
2118 def getinfo(self, name):
2119 return self.tarfile.getmember(name)
2120 def read(self, name):
2121 return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2122 def write(self, filename, arcname=None, compress_type=None):
2123 self.tarfile.add(filename, arcname)
2124 def writestr(self, zinfo, bytes):
2125 try:
2126 from cStringIO import StringIO
2127 except ImportError:
2128 from StringIO import StringIO
2129 import calendar
2130 zinfo.name = zinfo.filename
2131 zinfo.size = zinfo.file_size
2132 zinfo.mtime = calendar.timegm(zinfo.date_time)
2133 self.tarfile.addfile(zinfo, StringIO(bytes))
2134 def close(self):
2135 self.tarfile.close()
2136 #class TarFileCompat
2138 #--------------------
2139 # exported functions
2140 #--------------------
2141 def is_tarfile(name):
2142 """Return True if name points to a tar archive that we
2143 are able to handle, else return False.
2145 try:
2146 t = open(name)
2147 t.close()
2148 return True
2149 except TarError:
2150 return False
2152 open = TarFile.open