2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
54 if sys
.platform
== 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
66 # from tarfile import *
67 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69 #---------------------------------------------------------
71 #---------------------------------------------------------
72 NUL
= "\0" # the null character
73 BLOCKSIZE
= 512 # length of processing blocks
74 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
75 MAGIC
= "ustar" # magic tar string
76 VERSION
= "00" # version number
78 LENGTH_NAME
= 100 # maximum length of a filename
79 LENGTH_LINK
= 100 # maximum length of a linkname
80 LENGTH_PREFIX
= 155 # maximum length of the prefix field
81 MAXSIZE_MEMBER
= 077777777777L # maximum size of a file (11 octal digits)
83 REGTYPE
= "0" # regular file
84 AREGTYPE
= "\0" # regular file
85 LNKTYPE
= "1" # link (inside tarfile)
86 SYMTYPE
= "2" # symbolic link
87 CHRTYPE
= "3" # character special device
88 BLKTYPE
= "4" # block special device
89 DIRTYPE
= "5" # directory
90 FIFOTYPE
= "6" # fifo special device
91 CONTTYPE
= "7" # contiguous file
93 GNUTYPE_LONGNAME
= "L" # GNU tar extension for longnames
94 GNUTYPE_LONGLINK
= "K" # GNU tar extension for longlink
95 GNUTYPE_SPARSE
= "S" # GNU tar extension for sparse file
97 #---------------------------------------------------------
99 #---------------------------------------------------------
100 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
, # file types that tarfile
101 SYMTYPE
, DIRTYPE
, FIFOTYPE
, # can cope with.
102 CONTTYPE
, CHRTYPE
, BLKTYPE
,
103 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
106 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
, # file types that somehow
107 CONTTYPE
, GNUTYPE_SPARSE
) # represent regular files
109 #---------------------------------------------------------
110 # Bits used in the mode field, values in octal.
111 #---------------------------------------------------------
112 S_IFLNK
= 0120000 # symbolic link
113 S_IFREG
= 0100000 # regular file
114 S_IFBLK
= 0060000 # block device
115 S_IFDIR
= 0040000 # directory
116 S_IFCHR
= 0020000 # character device
117 S_IFIFO
= 0010000 # fifo
119 TSUID
= 04000 # set UID on execution
120 TSGID
= 02000 # set GID on execution
121 TSVTX
= 01000 # reserved
123 TUREAD
= 0400 # read by owner
124 TUWRITE
= 0200 # write by owner
125 TUEXEC
= 0100 # execute/search by owner
126 TGREAD
= 0040 # read by group
127 TGWRITE
= 0020 # write by group
128 TGEXEC
= 0010 # execute/search by group
129 TOREAD
= 0004 # read by other
130 TOWRITE
= 0002 # write by other
131 TOEXEC
= 0001 # execute/search by other
133 #---------------------------------------------------------
134 # Some useful functions
135 #---------------------------------------------------------
138 """Convert a python string to a null-terminated string buffer.
140 return s
[:length
] + (length
- len(s
)) * NUL
143 """Convert a number field to a python number.
145 # There are two possible encodings for a number field, see
147 if s
[0] != chr(0200):
148 n
= int(s
.rstrip(NUL
+ " ") or "0", 8)
151 for i
in xrange(len(s
) - 1):
156 def itn(n
, digits
=8, posix
=False):
157 """Convert a python number to a number field.
159 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
160 # octal digits followed by a null-byte, this allows values up to
161 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
162 # that if necessary. A leading 0200 byte indicates this particular
163 # encoding, the following digits-1 bytes are a big-endian
164 # representation. This allows values up to (256**(digits-1))-1.
165 if 0 <= n
< 8 ** (digits
- 1):
166 s
= "%0*o" % (digits
- 1, n
) + NUL
169 raise ValueError("overflow in number field")
172 # XXX We mimic GNU tar's behaviour with negative numbers,
173 # this could raise OverflowError.
174 n
= struct
.unpack("L", struct
.pack("l", n
))[0]
177 for i
in xrange(digits
- 1):
178 s
= chr(n
& 0377) + s
183 def calc_chksums(buf
):
184 """Calculate the checksum for a member's header by summing up all
185 characters except for the chksum field which is treated as if
186 it was filled with spaces. According to the GNU tar sources,
187 some tars (Sun and NeXT) calculate chksum with signed char,
188 which will be different if there are chars in the buffer with
189 the high bit set. So we calculate two checksums, unsigned and
192 unsigned_chksum
= 256 + sum(struct
.unpack("148B", buf
[:148]) + struct
.unpack("356B", buf
[156:512]))
193 signed_chksum
= 256 + sum(struct
.unpack("148b", buf
[:148]) + struct
.unpack("356b", buf
[156:512]))
194 return unsigned_chksum
, signed_chksum
196 def copyfileobj(src
, dst
, length
=None):
197 """Copy length bytes from fileobj src to fileobj dst.
198 If length is None, copy the entire content.
203 shutil
.copyfileobj(src
, dst
)
207 blocks
, remainder
= divmod(length
, BUFSIZE
)
208 for b
in xrange(blocks
):
209 buf
= src
.read(BUFSIZE
)
210 if len(buf
) < BUFSIZE
:
211 raise IOError("end of file reached")
215 buf
= src
.read(remainder
)
216 if len(buf
) < remainder
:
217 raise IOError("end of file reached")
231 ((TUEXEC|TSUID
, "s"),
237 ((TGEXEC|TSGID
, "s"),
243 ((TOEXEC|TSVTX
, "t"),
249 """Convert a file's mode to a string of the form
251 Used by TarFile.list()
254 for table
in filemode_table
:
255 for bit
, char
in table
:
256 if mode
& bit
== bit
:
264 normpath
= lambda path
: os
.path
.normpath(path
).replace(os
.sep
, "/")
266 normpath
= os
.path
.normpath
268 class TarError(Exception):
269 """Base exception."""
271 class ExtractError(TarError
):
272 """General exception for extract errors."""
274 class ReadError(TarError
):
275 """Exception for unreadble tar archives."""
277 class CompressionError(TarError
):
278 """Exception for unavailable compression methods."""
280 class StreamError(TarError
):
281 """Exception for unsupported operations on stream-like TarFiles."""
284 #---------------------------
285 # internal stream interface
286 #---------------------------
288 """Low-level file object. Supports reading and writing.
289 It is used instead of a regular file object for streaming
293 def __init__(self
, name
, mode
):
296 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
298 if hasattr(os
, "O_BINARY"):
300 self
.fd
= os
.open(name
, mode
)
305 def read(self
, size
):
306 return os
.read(self
.fd
, size
)
312 """Class that serves as an adapter between TarFile and
313 a stream-like object. The stream-like object only
314 needs to have a read() or write() method and is accessed
315 blockwise. Use of gzip or bzip2 compression is possible.
316 A stream-like object could be for example: sys.stdin,
317 sys.stdout, a socket, a tape device etc.
319 _Stream is intended to be used only internally.
322 def __init__(self
, name
, mode
, comptype
, fileobj
, bufsize
):
323 """Construct a _Stream object.
325 self
._extfileobj
= True
327 fileobj
= _LowLevelFile(name
, mode
)
328 self
._extfileobj
= False
331 # Enable transparent compression detection for the
333 fileobj
= _StreamProxy(fileobj
)
334 comptype
= fileobj
.getcomptype()
336 self
.name
= name
or ""
338 self
.comptype
= comptype
339 self
.fileobj
= fileobj
340 self
.bufsize
= bufsize
349 raise CompressionError("zlib module is not available")
351 self
.crc
= zlib
.crc32("")
355 self
._init
_write
_gz
()
357 if comptype
== "bz2":
361 raise CompressionError("bz2 module is not available")
364 self
.cmp = bz2
.BZ2Decompressor()
366 self
.cmp = bz2
.BZ2Compressor()
369 if hasattr(self
, "closed") and not self
.closed
:
372 def _init_write_gz(self
):
373 """Initialize for writing with gzip compression.
375 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
376 -self
.zlib
.MAX_WBITS
,
377 self
.zlib
.DEF_MEM_LEVEL
,
379 timestamp
= struct
.pack("<L", long(time
.time()))
380 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
381 if self
.name
.endswith(".gz"):
382 self
.name
= self
.name
[:-3]
383 self
.__write
(self
.name
+ NUL
)
386 """Write string s to the stream.
388 if self
.comptype
== "gz":
389 self
.crc
= self
.zlib
.crc32(s
, self
.crc
)
391 if self
.comptype
!= "tar":
392 s
= self
.cmp.compress(s
)
395 def __write(self
, s
):
396 """Write string s to the stream if a whole new block
397 is ready to be written.
400 while len(self
.buf
) > self
.bufsize
:
401 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
402 self
.buf
= self
.buf
[self
.bufsize
:]
405 """Close the _Stream object. No operation should be
406 done on it afterwards.
411 if self
.mode
== "w" and self
.comptype
!= "tar":
412 self
.buf
+= self
.cmp.flush()
414 if self
.mode
== "w" and self
.buf
:
415 self
.fileobj
.write(self
.buf
)
417 if self
.comptype
== "gz":
418 # The native zlib crc is an unsigned 32-bit integer, but
419 # the Python wrapper implicitly casts that to a signed C
420 # long. So, on a 32-bit box self.crc may "look negative",
421 # while the same crc on a 64-bit box may "look positive".
422 # To avoid irksome warnings from the `struct` module, force
423 # it to look positive on all boxes.
424 self
.fileobj
.write(struct
.pack("<L", self
.crc
& 0xffffffffL
))
425 self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
427 if not self
._extfileobj
:
432 def _init_read_gz(self
):
433 """Initialize for reading a gzip compressed fileobj.
435 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
438 # taken from gzip.GzipFile with some alterations
439 if self
.__read
(2) != "\037\213":
440 raise ReadError("not a gzip file")
441 if self
.__read
(1) != "\010":
442 raise CompressionError("unsupported compression method")
444 flag
= ord(self
.__read
(1))
448 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
453 if not s
or s
== NUL
:
458 if not s
or s
== NUL
:
464 """Return the stream's file pointer position.
468 def seek(self
, pos
=0):
469 """Set the stream's file pointer to pos. Negative seeking
472 if pos
- self
.pos
>= 0:
473 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
474 for i
in xrange(blocks
):
475 self
.read(self
.bufsize
)
478 raise StreamError("seeking backwards is not allowed")
481 def read(self
, size
=None):
482 """Return the next size number of bytes from the stream.
483 If size is not defined, return all bytes of the stream
489 buf
= self
._read
(self
.bufsize
)
495 buf
= self
._read
(size
)
499 def _read(self
, size
):
500 """Return size bytes from the stream.
502 if self
.comptype
== "tar":
503 return self
.__read
(size
)
508 buf
= self
.__read
(self
.bufsize
)
511 buf
= self
.cmp.decompress(buf
)
518 def __read(self
, size
):
519 """Return size bytes from stream. If internal buffer is empty,
520 read another block from the stream.
525 buf
= self
.fileobj
.read(self
.bufsize
)
535 class _StreamProxy(object):
536 """Small proxy class that enables transparent compression
537 detection for the Stream interface (mode 'r|*').
540 def __init__(self
, fileobj
):
541 self
.fileobj
= fileobj
542 self
.buf
= self
.fileobj
.read(BLOCKSIZE
)
544 def read(self
, size
):
545 self
.read
= self
.fileobj
.read
548 def getcomptype(self
):
549 if self
.buf
.startswith("\037\213\010"):
551 if self
.buf
.startswith("BZh91"):
559 class _BZ2Proxy(object):
560 """Small proxy class that enables external file object
561 support for "r:bz2" and "w:bz2" modes. This is actually
562 a workaround for a limitation in bz2 module's BZ2File
563 class which (unlike gzip.GzipFile) has no support for
564 a file object argument.
567 blocksize
= 16 * 1024
569 def __init__(self
, fileobj
, mode
):
570 self
.fileobj
= fileobj
578 self
.bz2obj
= bz2
.BZ2Decompressor()
582 self
.bz2obj
= bz2
.BZ2Compressor()
584 def read(self
, size
):
589 raw
= self
.fileobj
.read(self
.blocksize
)
590 data
= self
.bz2obj
.decompress(raw
)
595 self
.buf
= "".join(b
)
597 buf
= self
.buf
[:size
]
598 self
.buf
= self
.buf
[size
:]
605 self
.read(pos
- self
.pos
)
610 def write(self
, data
):
611 self
.pos
+= len(data
)
612 raw
= self
.bz2obj
.compress(data
)
613 self
.fileobj
.write(raw
)
617 raw
= self
.bz2obj
.flush()
618 self
.fileobj
.write(raw
)
622 #------------------------
623 # Extraction file object
624 #------------------------
625 class ExFileObject(object):
626 """File-like object for reading an archive member.
627 Is returned by TarFile.extractfile(). Support for
628 sparse files included.
631 def __init__(self
, tarfile
, tarinfo
):
632 self
.fileobj
= tarfile
.fileobj
633 self
.name
= tarinfo
.name
636 self
.offset
= tarinfo
.offset_data
637 self
.size
= tarinfo
.size
640 if tarinfo
.issparse():
641 self
.sparse
= tarinfo
.sparse
642 self
.read
= self
._readsparse
644 self
.read
= self
._readnormal
646 def __read(self
, size
):
647 """Overloadable read method.
649 return self
.fileobj
.read(size
)
651 def readline(self
, size
=-1):
652 """Read a line with approx. size. If size is negative,
653 read a whole line. readline() and read() must not
659 nl
= self
.linebuffer
.find("\n")
663 size
-= len(self
.linebuffer
)
664 while (nl
< 0 and size
> 0):
665 buf
= self
.read(min(size
, 100))
668 self
.linebuffer
+= buf
670 nl
= self
.linebuffer
.find("\n")
675 buf
= self
.linebuffer
[:nl
]
676 self
.linebuffer
= self
.linebuffer
[nl
+ 1:]
677 while buf
[-1:] == "\r":
682 """Return a list with all (following) lines.
686 line
= self
.readline()
691 def _readnormal(self
, size
=None):
692 """Read operation for regular files.
695 raise ValueError("file is closed")
696 self
.fileobj
.seek(self
.offset
+ self
.pos
)
697 bytesleft
= self
.size
- self
.pos
699 bytestoread
= bytesleft
701 bytestoread
= min(size
, bytesleft
)
702 self
.pos
+= bytestoread
703 return self
.__read
(bytestoread
)
705 def _readsparse(self
, size
=None):
706 """Read operation for sparse files.
709 raise ValueError("file is closed")
712 size
= self
.size
- self
.pos
716 buf
= self
._readsparsesection
(size
)
723 def _readsparsesection(self
, size
):
724 """Read a single section of a sparse file.
726 section
= self
.sparse
.find(self
.pos
)
731 toread
= min(size
, section
.offset
+ section
.size
- self
.pos
)
732 if isinstance(section
, _data
):
733 realpos
= section
.realpos
+ self
.pos
- section
.offset
735 self
.fileobj
.seek(self
.offset
+ realpos
)
736 return self
.__read
(toread
)
742 """Return the current file position.
746 def seek(self
, pos
, whence
=0):
747 """Seek to a position in the file.
751 self
.pos
= min(max(pos
, 0), self
.size
)
754 self
.pos
= max(self
.pos
+ pos
, 0)
756 self
.pos
= min(self
.pos
+ pos
, self
.size
)
758 self
.pos
= max(min(self
.size
+ pos
, self
.size
), 0)
761 """Close the file object.
766 """Get an iterator over the file object.
769 raise ValueError("I/O operation on closed file")
773 """Get the next item from the file iterator.
775 result
= self
.readline()
785 class TarInfo(object):
786 """Informational class which holds the details about an
787 archive member given by a tar header block.
788 TarInfo objects are returned by TarFile.getmember(),
789 TarFile.getmembers() and TarFile.gettarinfo() and are
790 usually created internally.
793 def __init__(self
, name
=""):
794 """Construct a TarInfo object. name is the optional name
797 self
.name
= name
# member name (dirnames must end with '/')
798 self
.mode
= 0666 # file permissions
799 self
.uid
= 0 # user id
800 self
.gid
= 0 # group id
801 self
.size
= 0 # file size
802 self
.mtime
= 0 # modification time
803 self
.chksum
= 0 # header checksum
804 self
.type = REGTYPE
# member type
805 self
.linkname
= "" # link name
806 self
.uname
= "user" # user name
807 self
.gname
= "group" # group name
808 self
.devmajor
= 0 # device major number
809 self
.devminor
= 0 # device minor number
811 self
.offset
= 0 # the tar header starts here
812 self
.offset_data
= 0 # the file's data starts here
815 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
818 def frombuf(cls
, buf
):
819 """Construct a TarInfo object from a 512 byte string buffer.
821 if len(buf
) != BLOCKSIZE
:
822 raise ValueError("truncated header")
823 if buf
.count(NUL
) == BLOCKSIZE
:
824 raise ValueError("empty header")
828 tarinfo
.name
= buf
[0:100].rstrip(NUL
)
829 tarinfo
.mode
= nti(buf
[100:108])
830 tarinfo
.uid
= nti(buf
[108:116])
831 tarinfo
.gid
= nti(buf
[116:124])
832 tarinfo
.size
= nti(buf
[124:136])
833 tarinfo
.mtime
= nti(buf
[136:148])
834 tarinfo
.chksum
= nti(buf
[148:156])
835 tarinfo
.type = buf
[156:157]
836 tarinfo
.linkname
= buf
[157:257].rstrip(NUL
)
837 tarinfo
.uname
= buf
[265:297].rstrip(NUL
)
838 tarinfo
.gname
= buf
[297:329].rstrip(NUL
)
839 tarinfo
.devmajor
= nti(buf
[329:337])
840 tarinfo
.devminor
= nti(buf
[337:345])
841 prefix
= buf
[345:500].rstrip(NUL
)
843 if prefix
and not tarinfo
.issparse():
844 tarinfo
.name
= prefix
+ "/" + tarinfo
.name
846 if tarinfo
.chksum
not in calc_chksums(buf
):
847 raise ValueError("invalid header")
850 def tobuf(self
, posix
=False):
851 """Return a tar header as a string of 512 byte blocks.
857 if self
.name
.endswith("/"):
860 if type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
861 # Prevent "././@LongLink" from being normalized.
864 name
= normpath(self
.name
)
867 # directories should end with '/'
870 linkname
= self
.linkname
872 # if linkname is empty we end up with a '.'
873 linkname
= normpath(linkname
)
876 if self
.size
> MAXSIZE_MEMBER
:
877 raise ValueError("file is too large (>= 8 GB)")
879 if len(self
.linkname
) > LENGTH_LINK
:
880 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK
))
882 if len(name
) > LENGTH_NAME
:
883 prefix
= name
[:LENGTH_PREFIX
+ 1]
884 while prefix
and prefix
[-1] != "/":
887 name
= name
[len(prefix
):]
890 if not prefix
or len(name
) > LENGTH_NAME
:
891 raise ValueError("name is too long")
894 if len(self
.linkname
) > LENGTH_LINK
:
895 buf
+= self
._create
_gnulong
(self
.linkname
, GNUTYPE_LONGLINK
)
897 if len(name
) > LENGTH_NAME
:
898 buf
+= self
._create
_gnulong
(name
, GNUTYPE_LONGNAME
)
902 itn(self
.mode
& 07777, 8, posix
),
903 itn(self
.uid
, 8, posix
),
904 itn(self
.gid
, 8, posix
),
905 itn(self
.size
, 12, posix
),
906 itn(self
.mtime
, 12, posix
),
907 " ", # checksum field
909 stn(self
.linkname
, 100),
914 itn(self
.devmajor
, 8, posix
),
915 itn(self
.devminor
, 8, posix
),
919 buf
+= struct
.pack("%ds" % BLOCKSIZE
, "".join(parts
))
920 chksum
= calc_chksums(buf
[-BLOCKSIZE
:])[0]
921 buf
= buf
[:-364] + "%06o\0" % chksum
+ buf
[-357:]
925 def _create_gnulong(self
, name
, type):
926 """Create a GNU longname/longlink header from name.
927 It consists of an extended tar header, with the length
928 of the longname as size, followed by data blocks,
929 which contain the longname as a null terminated string.
933 tarinfo
= self
.__class
__()
934 tarinfo
.name
= "././@LongLink"
937 tarinfo
.size
= len(name
)
939 # create extended header
940 buf
= tarinfo
.tobuf()
943 blocks
, remainder
= divmod(len(name
), BLOCKSIZE
)
945 buf
+= (BLOCKSIZE
- remainder
) * NUL
949 return self
.type in REGULAR_TYPES
953 return self
.type == DIRTYPE
955 return self
.type == SYMTYPE
957 return self
.type == LNKTYPE
959 return self
.type == CHRTYPE
961 return self
.type == BLKTYPE
963 return self
.type == FIFOTYPE
965 return self
.type == GNUTYPE_SPARSE
967 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
970 class TarFile(object):
971 """The TarFile Class provides an interface to tar archives.
974 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
976 dereference
= False # If true, add content of linked file to the
977 # tar file, else the link.
979 ignore_zeros
= False # If true, skips empty or invalid blocks and
980 # continues processing.
982 errorlevel
= 0 # If 0, fatal errors only appear in debug
983 # messages (if debug >= 0). If > 0, errors
984 # are passed to the caller as exceptions.
986 posix
= False # If True, generates POSIX.1-1990-compliant
987 # archives (no GNU extensions!)
989 fileobject
= ExFileObject
991 def __init__(self
, name
=None, mode
="r", fileobj
=None):
992 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
993 read from an existing archive, 'a' to append data to an existing
994 file or 'w' to create a new file overwriting an existing one. `mode'
996 If `fileobj' is given, it is used for reading or writing data. If it
997 can be determined, `mode' is overridden by `fileobj's mode.
998 `fileobj' is not closed, when TarFile is closed.
1002 if len(mode
) > 1 or mode
not in "raw":
1003 raise ValueError("mode must be 'r', 'a' or 'w'")
1005 self
.mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
1008 fileobj
= file(self
.name
, self
.mode
)
1009 self
._extfileobj
= False
1011 if self
.name
is None and hasattr(fileobj
, "name"):
1012 self
.name
= fileobj
.name
1013 if hasattr(fileobj
, "mode"):
1014 self
.mode
= fileobj
.mode
1015 self
._extfileobj
= True
1016 self
.fileobj
= fileobj
1018 # Init datastructures
1020 self
.members
= [] # list of members as TarInfo objects
1021 self
._loaded
= False # flag if all members have been read
1022 self
.offset
= 0L # current position in the archive file
1023 self
.inodes
= {} # dictionary caching the inodes of
1024 # archive members already added
1026 if self
._mode
== "r":
1027 self
.firstmember
= None
1028 self
.firstmember
= self
.next()
1030 if self
._mode
== "a":
1031 # Move to the end of the archive,
1032 # before the first empty block.
1033 self
.firstmember
= None
1036 tarinfo
= self
.next()
1038 self
.fileobj
.seek(0)
1041 self
.fileobj
.seek(- BLOCKSIZE
, 1)
1044 if self
._mode
in "aw":
1047 #--------------------------------------------------------------------------
1048 # Below are the classmethods which act as alternate constructors to the
1049 # TarFile class. The open() method is the only one that is needed for
1050 # public use; it is the "super"-constructor and is able to select an
1051 # adequate "sub"-constructor for a particular compression using the mapping
1054 # This concept allows one to subclass TarFile without losing the comfort of
1055 # the super-constructor. A sub-constructor is registered and made available
1056 # by adding it to the mapping in OPEN_METH.
1059 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=20*512):
1060 """Open a tar archive for reading, writing or appending. Return
1061 an appropriate TarFile class.
1064 'r' or 'r:*' open for reading with transparent compression
1065 'r:' open for reading exclusively uncompressed
1066 'r:gz' open for reading with gzip compression
1067 'r:bz2' open for reading with bzip2 compression
1068 'a' or 'a:' open for appending
1069 'w' or 'w:' open for writing without compression
1070 'w:gz' open for writing with gzip compression
1071 'w:bz2' open for writing with bzip2 compression
1073 'r|*' open a stream of tar blocks with transparent compression
1074 'r|' open an uncompressed stream of tar blocks for reading
1075 'r|gz' open a gzip compressed stream of tar blocks
1076 'r|bz2' open a bzip2 compressed stream of tar blocks
1077 'w|' open an uncompressed stream for writing
1078 'w|gz' open a gzip compressed stream for writing
1079 'w|bz2' open a bzip2 compressed stream for writing
1082 if not name
and not fileobj
:
1083 raise ValueError("nothing to open")
1085 if mode
in ("r", "r:*"):
1086 # Find out which *open() is appropriate for opening the file.
1087 for comptype
in cls
.OPEN_METH
:
1088 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1090 return func(name
, "r", fileobj
)
1091 except (ReadError
, CompressionError
):
1093 raise ReadError("file could not be opened successfully")
1096 filemode
, comptype
= mode
.split(":", 1)
1097 filemode
= filemode
or "r"
1098 comptype
= comptype
or "tar"
1100 # Select the *open() function according to
1101 # given compression.
1102 if comptype
in cls
.OPEN_METH
:
1103 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1105 raise CompressionError("unknown compression type %r" % comptype
)
1106 return func(name
, filemode
, fileobj
)
1109 filemode
, comptype
= mode
.split("|", 1)
1110 filemode
= filemode
or "r"
1111 comptype
= comptype
or "tar"
1113 if filemode
not in "rw":
1114 raise ValueError("mode must be 'r' or 'w'")
1116 t
= cls(name
, filemode
,
1117 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
))
1118 t
._extfileobj
= False
1122 return cls
.taropen(name
, mode
, fileobj
)
1124 raise ValueError("undiscernible mode")
1127 def taropen(cls
, name
, mode
="r", fileobj
=None):
1128 """Open uncompressed tar archive name for reading or writing.
1130 if len(mode
) > 1 or mode
not in "raw":
1131 raise ValueError("mode must be 'r', 'a' or 'w'")
1132 return cls(name
, mode
, fileobj
)
1135 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
1136 """Open gzip compressed tar archive name for reading or writing.
1137 Appending is not allowed.
1139 if len(mode
) > 1 or mode
not in "rw":
1140 raise ValueError("mode must be 'r' or 'w'")
1145 except (ImportError, AttributeError):
1146 raise CompressionError("gzip module is not available")
1148 pre
, ext
= os
.path
.splitext(name
)
1149 pre
= os
.path
.basename(pre
)
1157 fileobj
= file(name
, mode
+ "b")
1163 t
= cls
.taropen(tarname
, mode
,
1164 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
)
1167 raise ReadError("not a gzip file")
1168 t
._extfileobj
= False
1172 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
1173 """Open bzip2 compressed tar archive name for reading or writing.
1174 Appending is not allowed.
1176 if len(mode
) > 1 or mode
not in "rw":
1177 raise ValueError("mode must be 'r' or 'w'.")
1182 raise CompressionError("bz2 module is not available")
1184 pre
, ext
= os
.path
.splitext(name
)
1185 pre
= os
.path
.basename(pre
)
1192 if fileobj
is not None:
1193 fileobj
= _BZ2Proxy(fileobj
, mode
)
1195 fileobj
= bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
)
1198 t
= cls
.taropen(tarname
, mode
, fileobj
)
1200 raise ReadError("not a bzip2 file")
1201 t
._extfileobj
= False
1204 # All *open() methods are registered here.
1206 "tar": "taropen", # uncompressed tar
1207 "gz": "gzopen", # gzip compressed tar
1208 "bz2": "bz2open" # bzip2 compressed tar
1211 #--------------------------------------------------------------------------
1212 # The public methods which TarFile provides:
1215 """Close the TarFile. In write-mode, two finishing zero blocks are
1216 appended to the archive.
1221 if self
._mode
in "aw":
1222 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1223 self
.offset
+= (BLOCKSIZE
* 2)
1224 # fill up the end with zero-blocks
1225 # (like option -b20 for tar does)
1226 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1228 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1230 if not self
._extfileobj
:
1231 self
.fileobj
.close()
1234 def getmember(self
, name
):
1235 """Return a TarInfo object for member `name'. If `name' can not be
1236 found in the archive, KeyError is raised. If a member occurs more
1237 than once in the archive, its last occurence is assumed to be the
1238 most up-to-date version.
1240 tarinfo
= self
._getmember
(name
)
1242 raise KeyError("filename %r not found" % name
)
1245 def getmembers(self
):
1246 """Return the members of the archive as a list of TarInfo objects. The
1247 list has the same order as the members in the archive.
1250 if not self
._loaded
: # if we want to obtain a list of
1251 self
._load
() # all members, we first have to
1252 # scan the whole archive.
1256 """Return the members of the archive as a list of their names. It has
1257 the same order as the list returned by getmembers().
1259 return [tarinfo
.name
for tarinfo
in self
.getmembers()]
1261 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1262 """Create a TarInfo object for either the file `name' or the file
1263 object `fileobj' (using os.fstat on its file descriptor). You can
1264 modify some of the TarInfo's attributes before you add it using
1265 addfile(). If given, `arcname' specifies an alternative name for the
1266 file in the archive.
1270 # When fileobj is given, replace name by
1271 # fileobj's real name.
1272 if fileobj
is not None:
1275 # Building the name of the member in the archive.
1276 # Backward slashes are converted to forward slashes,
1277 # Absolute paths are turned to relative paths.
1280 arcname
= normpath(arcname
)
1281 drv
, arcname
= os
.path
.splitdrive(arcname
)
1282 while arcname
[0:1] == "/":
1283 arcname
= arcname
[1:]
1285 # Now, fill the TarInfo object with
1286 # information specific for the file.
1289 # Use os.stat or os.lstat, depending on platform
1290 # and if symlinks shall be resolved.
1292 if hasattr(os
, "lstat") and not self
.dereference
:
1293 statres
= os
.lstat(name
)
1295 statres
= os
.stat(name
)
1297 statres
= os
.fstat(fileobj
.fileno())
1300 stmd
= statres
.st_mode
1301 if stat
.S_ISREG(stmd
):
1302 inode
= (statres
.st_ino
, statres
.st_dev
)
1303 if not self
.dereference
and \
1304 statres
.st_nlink
> 1 and inode
in self
.inodes
:
1305 # Is it a hardlink to an already
1308 linkname
= self
.inodes
[inode
]
1310 # The inode is added only if its valid.
1311 # For win32 it is always 0.
1314 self
.inodes
[inode
] = arcname
1315 elif stat
.S_ISDIR(stmd
):
1317 if arcname
[-1:] != "/":
1319 elif stat
.S_ISFIFO(stmd
):
1321 elif stat
.S_ISLNK(stmd
):
1323 linkname
= os
.readlink(name
)
1324 elif stat
.S_ISCHR(stmd
):
1326 elif stat
.S_ISBLK(stmd
):
1331 # Fill the TarInfo object with all
1332 # information we can get.
1333 tarinfo
.name
= arcname
1335 tarinfo
.uid
= statres
.st_uid
1336 tarinfo
.gid
= statres
.st_gid
1337 if stat
.S_ISREG(stmd
):
1338 tarinfo
.size
= statres
.st_size
1341 tarinfo
.mtime
= statres
.st_mtime
1343 tarinfo
.linkname
= linkname
1346 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1351 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1355 if type in (CHRTYPE
, BLKTYPE
):
1356 if hasattr(os
, "major") and hasattr(os
, "minor"):
1357 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1358 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1361 def list(self
, verbose
=True):
1362 """Print a table of contents to sys.stdout. If `verbose' is False, only
1363 the names of the members are printed. If it is True, an `ls -l'-like
1368 for tarinfo
in self
:
1370 print filemode(tarinfo
.mode
),
1371 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1372 tarinfo
.gname
or tarinfo
.gid
),
1373 if tarinfo
.ischr() or tarinfo
.isblk():
1374 print "%10s" % ("%d,%d" \
1375 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1377 print "%10d" % tarinfo
.size
,
1378 print "%d-%02d-%02d %02d:%02d:%02d" \
1379 % time
.localtime(tarinfo
.mtime
)[:6],
1385 print "->", tarinfo
.linkname
,
1387 print "link to", tarinfo
.linkname
,
1390 def add(self
, name
, arcname
=None, recursive
=True):
1391 """Add the file `name' to the archive. `name' may be any type of file
1392 (directory, fifo, symbolic link, etc.). If given, `arcname'
1393 specifies an alternative name for the file in the archive.
1394 Directories are added recursively by default. This can be avoided by
1395 setting `recursive' to False.
1402 # Skip if somebody tries to archive the archive...
1403 if self
.name
is not None \
1404 and os
.path
.abspath(name
) == os
.path
.abspath(self
.name
):
1405 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1408 # Special case: The user wants to add the current
1409 # working directory.
1414 for f
in os
.listdir("."):
1415 self
.add(f
, os
.path
.join(arcname
, f
))
1420 # Create a TarInfo object from the file.
1421 tarinfo
= self
.gettarinfo(name
, arcname
)
1424 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1427 # Append the tar header and data to the archive.
1429 f
= file(name
, "rb")
1430 self
.addfile(tarinfo
, f
)
1433 elif tarinfo
.isdir():
1434 self
.addfile(tarinfo
)
1436 for f
in os
.listdir(name
):
1437 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
))
1440 self
.addfile(tarinfo
)
1442 def addfile(self
, tarinfo
, fileobj
=None):
1443 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1444 given, tarinfo.size bytes are read from it and added to the archive.
1445 You can create TarInfo objects using gettarinfo().
1446 On Windows platforms, `fileobj' should always be opened with mode
1447 'rb' to avoid irritation about the file size.
1451 tarinfo
= copy
.copy(tarinfo
)
1453 buf
= tarinfo
.tobuf(self
.posix
)
1454 self
.fileobj
.write(buf
)
1455 self
.offset
+= len(buf
)
1457 # If there's data to follow, append it.
1458 if fileobj
is not None:
1459 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
1460 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1462 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1464 self
.offset
+= blocks
* BLOCKSIZE
1466 self
.members
.append(tarinfo
)
1468 def extractall(self
, path
=".", members
=None):
1469 """Extract all members from the archive to the current working
1470 directory and set owner, modification time and permissions on
1471 directories afterwards. `path' specifies a different directory
1472 to extract to. `members' is optional and must be a subset of the
1473 list returned by getmembers().
1480 for tarinfo
in members
:
1482 # Extract directory with a safe mode, so that
1483 # all files below can be extracted as well.
1485 os
.makedirs(os
.path
.join(path
, tarinfo
.name
), 0777)
1486 except EnvironmentError:
1488 directories
.append(tarinfo
)
1490 self
.extract(tarinfo
, path
)
1492 # Reverse sort directories.
1493 directories
.sort(lambda a
, b
: cmp(a
.name
, b
.name
))
1494 directories
.reverse()
1496 # Set correct owner, mtime and filemode on directories.
1497 for tarinfo
in directories
:
1498 path
= os
.path
.join(path
, tarinfo
.name
)
1500 self
.chown(tarinfo
, path
)
1501 self
.utime(tarinfo
, path
)
1502 self
.chmod(tarinfo
, path
)
1503 except ExtractError
, e
:
1504 if self
.errorlevel
> 1:
1507 self
._dbg
(1, "tarfile: %s" % e
)
1509 def extract(self
, member
, path
=""):
1510 """Extract a member from the archive to the current working directory,
1511 using its full name. Its file information is extracted as accurately
1512 as possible. `member' may be a filename or a TarInfo object. You can
1513 specify a different directory using `path'.
1517 if isinstance(member
, TarInfo
):
1520 tarinfo
= self
.getmember(member
)
1522 # Prepare the link target for makelink().
1524 tarinfo
._link
_target
= os
.path
.join(path
, tarinfo
.linkname
)
1527 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
1528 except EnvironmentError, e
:
1529 if self
.errorlevel
> 0:
1532 if e
.filename
is None:
1533 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
1535 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
1536 except ExtractError
, e
:
1537 if self
.errorlevel
> 1:
1540 self
._dbg
(1, "tarfile: %s" % e
)
1542 def extractfile(self
, member
):
1543 """Extract a member from the archive as a file object. `member' may be
1544 a filename or a TarInfo object. If `member' is a regular file, a
1545 file-like object is returned. If `member' is a link, a file-like
1546 object is constructed from the link's target. If `member' is none of
1547 the above, None is returned.
1548 The file-like object is read-only and provides the following
1549 methods: read(), readline(), readlines(), seek() and tell()
1553 if isinstance(member
, TarInfo
):
1556 tarinfo
= self
.getmember(member
)
1559 return self
.fileobject(self
, tarinfo
)
1561 elif tarinfo
.type not in SUPPORTED_TYPES
:
1562 # If a member's type is unknown, it is treated as a
1564 return self
.fileobject(self
, tarinfo
)
1566 elif tarinfo
.islnk() or tarinfo
.issym():
1567 if isinstance(self
.fileobj
, _Stream
):
1568 # A small but ugly workaround for the case that someone tries
1569 # to extract a (sym)link as a file-object from a non-seekable
1570 # stream of tar blocks.
1571 raise StreamError("cannot extract (sym)link as file object")
1573 # A (sym)link's file object is its target's file object.
1574 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
1577 # If there's no data associated with the member (directory, chrdev,
1578 # blkdev, etc.), return None instead of a file object.
1581 def _extract_member(self
, tarinfo
, targetpath
):
1582 """Extract the TarInfo object tarinfo to a physical
1583 file called targetpath.
1585 # Fetch the TarInfo object for the given name
1586 # and build the destination pathname, replacing
1587 # forward slashes to platform specific separators.
1588 if targetpath
[-1:] == "/":
1589 targetpath
= targetpath
[:-1]
1590 targetpath
= os
.path
.normpath(targetpath
)
1592 # Create all upper directories.
1593 upperdirs
= os
.path
.dirname(targetpath
)
1594 if upperdirs
and not os
.path
.exists(upperdirs
):
1599 ti
.mtime
= tarinfo
.mtime
1600 ti
.uid
= tarinfo
.uid
1601 ti
.gid
= tarinfo
.gid
1602 ti
.uname
= tarinfo
.uname
1603 ti
.gname
= tarinfo
.gname
1605 self
._extract
_member
(ti
, ti
.name
)
1609 if tarinfo
.islnk() or tarinfo
.issym():
1610 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
1612 self
._dbg
(1, tarinfo
.name
)
1615 self
.makefile(tarinfo
, targetpath
)
1616 elif tarinfo
.isdir():
1617 self
.makedir(tarinfo
, targetpath
)
1618 elif tarinfo
.isfifo():
1619 self
.makefifo(tarinfo
, targetpath
)
1620 elif tarinfo
.ischr() or tarinfo
.isblk():
1621 self
.makedev(tarinfo
, targetpath
)
1622 elif tarinfo
.islnk() or tarinfo
.issym():
1623 self
.makelink(tarinfo
, targetpath
)
1624 elif tarinfo
.type not in SUPPORTED_TYPES
:
1625 self
.makeunknown(tarinfo
, targetpath
)
1627 self
.makefile(tarinfo
, targetpath
)
1629 self
.chown(tarinfo
, targetpath
)
1630 if not tarinfo
.issym():
1631 self
.chmod(tarinfo
, targetpath
)
1632 self
.utime(tarinfo
, targetpath
)
1634 #--------------------------------------------------------------------------
1635 # Below are the different file methods. They are called via
1636 # _extract_member() when extract() is called. They can be replaced in a
1637 # subclass to implement other functionality.
1639 def makedir(self
, tarinfo
, targetpath
):
1640 """Make a directory called targetpath.
1643 os
.mkdir(targetpath
)
1644 except EnvironmentError, e
:
1645 if e
.errno
!= errno
.EEXIST
:
1648 def makefile(self
, tarinfo
, targetpath
):
1649 """Make a file called targetpath.
1651 source
= self
.extractfile(tarinfo
)
1652 target
= file(targetpath
, "wb")
1653 copyfileobj(source
, target
)
1657 def makeunknown(self
, tarinfo
, targetpath
):
1658 """Make a file from a TarInfo object with an unknown type
1661 self
.makefile(tarinfo
, targetpath
)
1662 self
._dbg
(1, "tarfile: Unknown file type %r, " \
1663 "extracted as regular file." % tarinfo
.type)
1665 def makefifo(self
, tarinfo
, targetpath
):
1666 """Make a fifo called targetpath.
1668 if hasattr(os
, "mkfifo"):
1669 os
.mkfifo(targetpath
)
1671 raise ExtractError("fifo not supported by system")
1673 def makedev(self
, tarinfo
, targetpath
):
1674 """Make a character or block device called targetpath.
1676 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
1677 raise ExtractError("special devices not supported by system")
1681 mode |
= stat
.S_IFBLK
1683 mode |
= stat
.S_IFCHR
1685 os
.mknod(targetpath
, mode
,
1686 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
1688 def makelink(self
, tarinfo
, targetpath
):
1689 """Make a (symbolic) link called targetpath. If it cannot be created
1690 (platform limitation), we try to make a copy of the referenced file
1693 linkpath
= tarinfo
.linkname
1696 os
.symlink(linkpath
, targetpath
)
1699 os
.link(tarinfo
._link
_target
, targetpath
)
1700 except AttributeError:
1702 linkpath
= os
.path
.join(os
.path
.dirname(tarinfo
.name
),
1704 linkpath
= normpath(linkpath
)
1707 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
1708 except (EnvironmentError, KeyError), e
:
1709 linkpath
= os
.path
.normpath(linkpath
)
1711 shutil
.copy2(linkpath
, targetpath
)
1712 except EnvironmentError, e
:
1713 raise IOError("link could not be created")
1715 def chown(self
, tarinfo
, targetpath
):
1716 """Set owner of targetpath according to tarinfo.
1718 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
1719 # We have to be root to do so.
1721 g
= grp
.getgrnam(tarinfo
.gname
)[2]
1724 g
= grp
.getgrgid(tarinfo
.gid
)[2]
1728 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
1731 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
1735 if tarinfo
.issym() and hasattr(os
, "lchown"):
1736 os
.lchown(targetpath
, u
, g
)
1738 if sys
.platform
!= "os2emx":
1739 os
.chown(targetpath
, u
, g
)
1740 except EnvironmentError, e
:
1741 raise ExtractError("could not change owner")
1743 def chmod(self
, tarinfo
, targetpath
):
1744 """Set file permissions of targetpath according to tarinfo.
1746 if hasattr(os
, 'chmod'):
1748 os
.chmod(targetpath
, tarinfo
.mode
)
1749 except EnvironmentError, e
:
1750 raise ExtractError("could not change mode")
1752 def utime(self
, tarinfo
, targetpath
):
1753 """Set modification time of targetpath according to tarinfo.
1755 if not hasattr(os
, 'utime'):
1757 if sys
.platform
== "win32" and tarinfo
.isdir():
1758 # According to msdn.microsoft.com, it is an error (EACCES)
1759 # to use utime() on directories.
1762 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
1763 except EnvironmentError, e
:
1764 raise ExtractError("could not change modification time")
1766 #--------------------------------------------------------------------------
1768 """Return the next member of the archive as a TarInfo object, when
1769 TarFile is opened for reading. Return None if there is no more
1773 if self
.firstmember
is not None:
1774 m
= self
.firstmember
1775 self
.firstmember
= None
1778 # Read the next block.
1779 self
.fileobj
.seek(self
.offset
)
1781 buf
= self
.fileobj
.read(BLOCKSIZE
)
1786 tarinfo
= TarInfo
.frombuf(buf
)
1788 # Set the TarInfo object's offset to the current position of the
1789 # TarFile and set self.offset to the position where the data blocks
1791 tarinfo
.offset
= self
.offset
1792 self
.offset
+= BLOCKSIZE
1794 tarinfo
= self
.proc_member(tarinfo
)
1796 except ValueError, e
:
1797 if self
.ignore_zeros
:
1798 self
._dbg
(2, "0x%X: empty or invalid block: %s" %
1800 self
.offset
+= BLOCKSIZE
1803 if self
.offset
== 0:
1804 raise ReadError("empty, unreadable or compressed "
1809 # Some old tar programs represent a directory as a regular
1810 # file with a trailing slash.
1811 if tarinfo
.isreg() and tarinfo
.name
.endswith("/"):
1812 tarinfo
.type = DIRTYPE
1814 # Directory names should have a '/' at the end.
1818 self
.members
.append(tarinfo
)
1821 #--------------------------------------------------------------------------
1822 # The following are methods that are called depending on the type of a
1823 # member. The entry point is proc_member() which is called with a TarInfo
1824 # object created from the header block from the current offset. The
1825 # proc_member() method can be overridden in a subclass to add custom
1826 # proc_*() methods. A proc_*() method MUST implement the following
1828 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1829 # if there is data that follows.
1830 # 2. Set self.offset to the position where the next member's header will
1832 # 3. Return tarinfo or another valid TarInfo object.
1833 def proc_member(self
, tarinfo
):
1834 """Choose the right processing method for tarinfo depending
1835 on its type and call it.
1837 if tarinfo
.type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
1838 return self
.proc_gnulong(tarinfo
)
1839 elif tarinfo
.type == GNUTYPE_SPARSE
:
1840 return self
.proc_sparse(tarinfo
)
1842 return self
.proc_builtin(tarinfo
)
1844 def proc_builtin(self
, tarinfo
):
1845 """Process a builtin type member or an unknown member
1846 which will be treated as a regular file.
1848 tarinfo
.offset_data
= self
.offset
1849 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1850 # Skip the following data blocks.
1851 self
.offset
+= self
._block
(tarinfo
.size
)
1854 def proc_gnulong(self
, tarinfo
):
1855 """Process the blocks that hold a GNU longname
1859 count
= tarinfo
.size
1861 block
= self
.fileobj
.read(BLOCKSIZE
)
1863 self
.offset
+= BLOCKSIZE
1866 # Fetch the next header and process it.
1867 b
= self
.fileobj
.read(BLOCKSIZE
)
1868 t
= TarInfo
.frombuf(b
)
1869 t
.offset
= self
.offset
1870 self
.offset
+= BLOCKSIZE
1871 next
= self
.proc_member(t
)
1873 # Patch the TarInfo object from the next header with
1874 # the longname information.
1875 next
.offset
= tarinfo
.offset
1876 if tarinfo
.type == GNUTYPE_LONGNAME
:
1877 next
.name
= buf
.rstrip(NUL
)
1878 elif tarinfo
.type == GNUTYPE_LONGLINK
:
1879 next
.linkname
= buf
.rstrip(NUL
)
1883 def proc_sparse(self
, tarinfo
):
1884 """Process a GNU sparse header plus extra headers.
1891 # There are 4 possible sparse structs in the
1895 offset
= nti(buf
[pos
:pos
+ 12])
1896 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1899 if offset
> lastpos
:
1900 sp
.append(_hole(lastpos
, offset
- lastpos
))
1901 sp
.append(_data(offset
, numbytes
, realpos
))
1903 lastpos
= offset
+ numbytes
1906 isextended
= ord(buf
[482])
1907 origsize
= nti(buf
[483:495])
1909 # If the isextended flag is given,
1910 # there are extra headers to process.
1911 while isextended
== 1:
1912 buf
= self
.fileobj
.read(BLOCKSIZE
)
1913 self
.offset
+= BLOCKSIZE
1915 for i
in xrange(21):
1917 offset
= nti(buf
[pos
:pos
+ 12])
1918 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1921 if offset
> lastpos
:
1922 sp
.append(_hole(lastpos
, offset
- lastpos
))
1923 sp
.append(_data(offset
, numbytes
, realpos
))
1925 lastpos
= offset
+ numbytes
1927 isextended
= ord(buf
[504])
1929 if lastpos
< origsize
:
1930 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1934 tarinfo
.offset_data
= self
.offset
1935 self
.offset
+= self
._block
(tarinfo
.size
)
1936 tarinfo
.size
= origsize
1940 #--------------------------------------------------------------------------
1941 # Little helper methods:
1943 def _block(self
, count
):
1944 """Round up a byte count by BLOCKSIZE and return it,
1945 e.g. _block(834) => 1024.
1947 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1950 return blocks
* BLOCKSIZE
1952 def _getmember(self
, name
, tarinfo
=None):
1953 """Find an archive member by name from bottom to top.
1954 If tarinfo is given, it is used as the starting point.
1956 # Ensure that all members have been loaded.
1957 members
= self
.getmembers()
1962 end
= members
.index(tarinfo
)
1964 for i
in xrange(end
- 1, -1, -1):
1965 if name
== members
[i
].name
:
1969 """Read through the entire archive file and look for readable
1973 tarinfo
= self
.next()
1978 def _check(self
, mode
=None):
1979 """Check if TarFile is still open, and if the operation's mode
1980 corresponds to TarFile's mode.
1983 raise IOError("%s is closed" % self
.__class
__.__name
__)
1984 if mode
is not None and self
._mode
not in mode
:
1985 raise IOError("bad operation for mode %r" % self
._mode
)
1988 """Provide an iterator object.
1991 return iter(self
.members
)
1993 return TarIter(self
)
1995 def _dbg(self
, level
, msg
):
1996 """Write debugging output to sys.stderr.
1998 if level
<= self
.debug
:
1999 print >> sys
.stderr
, msg
2005 for tarinfo in TarFile(...):
2009 def __init__(self
, tarfile
):
2010 """Construct a TarIter object.
2012 self
.tarfile
= tarfile
2015 """Return iterator object.
2019 """Return the next item using TarFile's next() method.
2020 When all members have been read, set TarFile as _loaded.
2022 # Fix for SF #1100429: Under rare circumstances it can
2023 # happen that getmembers() is called during iteration,
2024 # which will cause TarIter to stop prematurely.
2025 if not self
.tarfile
._loaded
:
2026 tarinfo
= self
.tarfile
.next()
2028 self
.tarfile
._loaded
= True
2032 tarinfo
= self
.tarfile
.members
[self
.index
]
2038 # Helper classes for sparse file support
2040 """Base class for _data and _hole.
2042 def __init__(self
, offset
, size
):
2043 self
.offset
= offset
2045 def __contains__(self
, offset
):
2046 return self
.offset
<= offset
< self
.offset
+ self
.size
2048 class _data(_section
):
2049 """Represent a data section in a sparse file.
2051 def __init__(self
, offset
, size
, realpos
):
2052 _section
.__init
__(self
, offset
, size
)
2053 self
.realpos
= realpos
2055 class _hole(_section
):
2056 """Represent a hole section in a sparse file.
2060 class _ringbuffer(list):
2061 """Ringbuffer class which increases performance
2062 over a regular list.
2066 def find(self
, offset
):
2073 if idx
== len(self
):
2081 #---------------------------------------------
2082 # zipfile compatible TarFile class
2083 #---------------------------------------------
2084 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
2085 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
2086 class TarFileCompat
:
2087 """TarFile class compatible with standard module zipfile's
2090 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
2091 if compression
== TAR_PLAIN
:
2092 self
.tarfile
= TarFile
.taropen(file, mode
)
2093 elif compression
== TAR_GZIPPED
:
2094 self
.tarfile
= TarFile
.gzopen(file, mode
)
2096 raise ValueError("unknown compression constant")
2097 if mode
[0:1] == "r":
2098 members
= self
.tarfile
.getmembers()
2101 m
.file_size
= m
.size
2102 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
2104 return map(lambda m
: m
.name
, self
.infolist())
2106 return filter(lambda m
: m
.type in REGULAR_TYPES
,
2107 self
.tarfile
.getmembers())
2112 def getinfo(self
, name
):
2113 return self
.tarfile
.getmember(name
)
2114 def read(self
, name
):
2115 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
2116 def write(self
, filename
, arcname
=None, compress_type
=None):
2117 self
.tarfile
.add(filename
, arcname
)
2118 def writestr(self
, zinfo
, bytes
):
2120 from cStringIO
import StringIO
2122 from StringIO
import StringIO
2124 zinfo
.name
= zinfo
.filename
2125 zinfo
.size
= zinfo
.file_size
2126 zinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
2127 self
.tarfile
.addfile(zinfo
, StringIO(bytes
))
2129 self
.tarfile
.close()
2130 #class TarFileCompat
2132 #--------------------
2133 # exported functions
2134 #--------------------
2135 def is_tarfile(name
):
2136 """Return True if name points to a tar archive that we
2137 are able to handle, else return False.