2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
54 if sys
.platform
== 'mac':
55 # This module needs work for MacOS9, especially in the area of pathname
56 # handling. In many places it is assumed a simple substitution of / by the
57 # local os.path.sep is good enough to convert pathnames, but this does not
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
59 raise ImportError, "tarfile does not work for platform==mac"
66 # from tarfile import *
67 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
69 #---------------------------------------------------------
71 #---------------------------------------------------------
72 NUL
= "\0" # the null character
73 BLOCKSIZE
= 512 # length of processing blocks
74 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
75 MAGIC
= "ustar" # magic tar string
76 VERSION
= "00" # version number
78 LENGTH_NAME
= 100 # maximum length of a filename
79 LENGTH_LINK
= 100 # maximum length of a linkname
80 LENGTH_PREFIX
= 155 # maximum length of the prefix field
81 MAXSIZE_MEMBER
= 077777777777L # maximum size of a file (11 octal digits)
83 REGTYPE
= "0" # regular file
84 AREGTYPE
= "\0" # regular file
85 LNKTYPE
= "1" # link (inside tarfile)
86 SYMTYPE
= "2" # symbolic link
87 CHRTYPE
= "3" # character special device
88 BLKTYPE
= "4" # block special device
89 DIRTYPE
= "5" # directory
90 FIFOTYPE
= "6" # fifo special device
91 CONTTYPE
= "7" # contiguous file
93 GNUTYPE_LONGNAME
= "L" # GNU tar extension for longnames
94 GNUTYPE_LONGLINK
= "K" # GNU tar extension for longlink
95 GNUTYPE_SPARSE
= "S" # GNU tar extension for sparse file
97 #---------------------------------------------------------
99 #---------------------------------------------------------
100 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
, # file types that tarfile
101 SYMTYPE
, DIRTYPE
, FIFOTYPE
, # can cope with.
102 CONTTYPE
, CHRTYPE
, BLKTYPE
,
103 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
106 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
, # file types that somehow
107 CONTTYPE
, GNUTYPE_SPARSE
) # represent regular files
109 #---------------------------------------------------------
110 # Bits used in the mode field, values in octal.
111 #---------------------------------------------------------
112 S_IFLNK
= 0120000 # symbolic link
113 S_IFREG
= 0100000 # regular file
114 S_IFBLK
= 0060000 # block device
115 S_IFDIR
= 0040000 # directory
116 S_IFCHR
= 0020000 # character device
117 S_IFIFO
= 0010000 # fifo
119 TSUID
= 04000 # set UID on execution
120 TSGID
= 02000 # set GID on execution
121 TSVTX
= 01000 # reserved
123 TUREAD
= 0400 # read by owner
124 TUWRITE
= 0200 # write by owner
125 TUEXEC
= 0100 # execute/search by owner
126 TGREAD
= 0040 # read by group
127 TGWRITE
= 0020 # write by group
128 TGEXEC
= 0010 # execute/search by group
129 TOREAD
= 0004 # read by other
130 TOWRITE
= 0002 # write by other
131 TOEXEC
= 0001 # execute/search by other
133 #---------------------------------------------------------
134 # Some useful functions
135 #---------------------------------------------------------
138 """Convert a python string to a null-terminated string buffer.
140 return s
[:length
] + (length
- len(s
)) * NUL
143 """Convert a number field to a python number.
145 # There are two possible encodings for a number field, see
147 if s
[0] != chr(0200):
149 n
= int(s
.rstrip(NUL
+ " ") or "0", 8)
151 raise HeaderError("invalid header")
154 for i
in xrange(len(s
) - 1):
159 def itn(n
, digits
=8, posix
=False):
160 """Convert a python number to a number field.
162 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
163 # octal digits followed by a null-byte, this allows values up to
164 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
165 # that if necessary. A leading 0200 byte indicates this particular
166 # encoding, the following digits-1 bytes are a big-endian
167 # representation. This allows values up to (256**(digits-1))-1.
168 if 0 <= n
< 8 ** (digits
- 1):
169 s
= "%0*o" % (digits
- 1, n
) + NUL
172 raise ValueError("overflow in number field")
175 # XXX We mimic GNU tar's behaviour with negative numbers,
176 # this could raise OverflowError.
177 n
= struct
.unpack("L", struct
.pack("l", n
))[0]
180 for i
in xrange(digits
- 1):
181 s
= chr(n
& 0377) + s
186 def calc_chksums(buf
):
187 """Calculate the checksum for a member's header by summing up all
188 characters except for the chksum field which is treated as if
189 it was filled with spaces. According to the GNU tar sources,
190 some tars (Sun and NeXT) calculate chksum with signed char,
191 which will be different if there are chars in the buffer with
192 the high bit set. So we calculate two checksums, unsigned and
195 unsigned_chksum
= 256 + sum(struct
.unpack("148B", buf
[:148]) + struct
.unpack("356B", buf
[156:512]))
196 signed_chksum
= 256 + sum(struct
.unpack("148b", buf
[:148]) + struct
.unpack("356b", buf
[156:512]))
197 return unsigned_chksum
, signed_chksum
199 def copyfileobj(src
, dst
, length
=None):
200 """Copy length bytes from fileobj src to fileobj dst.
201 If length is None, copy the entire content.
206 shutil
.copyfileobj(src
, dst
)
210 blocks
, remainder
= divmod(length
, BUFSIZE
)
211 for b
in xrange(blocks
):
212 buf
= src
.read(BUFSIZE
)
213 if len(buf
) < BUFSIZE
:
214 raise IOError("end of file reached")
218 buf
= src
.read(remainder
)
219 if len(buf
) < remainder
:
220 raise IOError("end of file reached")
234 ((TUEXEC|TSUID
, "s"),
240 ((TGEXEC|TSGID
, "s"),
246 ((TOEXEC|TSVTX
, "t"),
252 """Convert a file's mode to a string of the form
254 Used by TarFile.list()
257 for table
in filemode_table
:
258 for bit
, char
in table
:
259 if mode
& bit
== bit
:
267 normpath
= lambda path
: os
.path
.normpath(path
).replace(os
.sep
, "/")
269 normpath
= os
.path
.normpath
271 class TarError(Exception):
272 """Base exception."""
274 class ExtractError(TarError
):
275 """General exception for extract errors."""
277 class ReadError(TarError
):
278 """Exception for unreadble tar archives."""
280 class CompressionError(TarError
):
281 """Exception for unavailable compression methods."""
283 class StreamError(TarError
):
284 """Exception for unsupported operations on stream-like TarFiles."""
286 class HeaderError(TarError
):
287 """Exception for invalid headers."""
290 #---------------------------
291 # internal stream interface
292 #---------------------------
294 """Low-level file object. Supports reading and writing.
295 It is used instead of a regular file object for streaming
299 def __init__(self
, name
, mode
):
302 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
304 if hasattr(os
, "O_BINARY"):
306 self
.fd
= os
.open(name
, mode
)
311 def read(self
, size
):
312 return os
.read(self
.fd
, size
)
318 """Class that serves as an adapter between TarFile and
319 a stream-like object. The stream-like object only
320 needs to have a read() or write() method and is accessed
321 blockwise. Use of gzip or bzip2 compression is possible.
322 A stream-like object could be for example: sys.stdin,
323 sys.stdout, a socket, a tape device etc.
325 _Stream is intended to be used only internally.
328 def __init__(self
, name
, mode
, comptype
, fileobj
, bufsize
):
329 """Construct a _Stream object.
331 self
._extfileobj
= True
333 fileobj
= _LowLevelFile(name
, mode
)
334 self
._extfileobj
= False
337 # Enable transparent compression detection for the
339 fileobj
= _StreamProxy(fileobj
)
340 comptype
= fileobj
.getcomptype()
342 self
.name
= name
or ""
344 self
.comptype
= comptype
345 self
.fileobj
= fileobj
346 self
.bufsize
= bufsize
355 raise CompressionError("zlib module is not available")
357 self
.crc
= zlib
.crc32("")
361 self
._init
_write
_gz
()
363 if comptype
== "bz2":
367 raise CompressionError("bz2 module is not available")
370 self
.cmp = bz2
.BZ2Decompressor()
372 self
.cmp = bz2
.BZ2Compressor()
375 if hasattr(self
, "closed") and not self
.closed
:
378 def _init_write_gz(self
):
379 """Initialize for writing with gzip compression.
381 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
382 -self
.zlib
.MAX_WBITS
,
383 self
.zlib
.DEF_MEM_LEVEL
,
385 timestamp
= struct
.pack("<L", long(time
.time()))
386 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
387 if self
.name
.endswith(".gz"):
388 self
.name
= self
.name
[:-3]
389 self
.__write
(self
.name
+ NUL
)
392 """Write string s to the stream.
394 if self
.comptype
== "gz":
395 self
.crc
= self
.zlib
.crc32(s
, self
.crc
)
397 if self
.comptype
!= "tar":
398 s
= self
.cmp.compress(s
)
401 def __write(self
, s
):
402 """Write string s to the stream if a whole new block
403 is ready to be written.
406 while len(self
.buf
) > self
.bufsize
:
407 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
408 self
.buf
= self
.buf
[self
.bufsize
:]
411 """Close the _Stream object. No operation should be
412 done on it afterwards.
417 if self
.mode
== "w" and self
.comptype
!= "tar":
418 self
.buf
+= self
.cmp.flush()
420 if self
.mode
== "w" and self
.buf
:
421 self
.fileobj
.write(self
.buf
)
423 if self
.comptype
== "gz":
424 # The native zlib crc is an unsigned 32-bit integer, but
425 # the Python wrapper implicitly casts that to a signed C
426 # long. So, on a 32-bit box self.crc may "look negative",
427 # while the same crc on a 64-bit box may "look positive".
428 # To avoid irksome warnings from the `struct` module, force
429 # it to look positive on all boxes.
430 self
.fileobj
.write(struct
.pack("<L", self
.crc
& 0xffffffffL
))
431 self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
433 if not self
._extfileobj
:
438 def _init_read_gz(self
):
439 """Initialize for reading a gzip compressed fileobj.
441 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
444 # taken from gzip.GzipFile with some alterations
445 if self
.__read
(2) != "\037\213":
446 raise ReadError("not a gzip file")
447 if self
.__read
(1) != "\010":
448 raise CompressionError("unsupported compression method")
450 flag
= ord(self
.__read
(1))
454 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
459 if not s
or s
== NUL
:
464 if not s
or s
== NUL
:
470 """Return the stream's file pointer position.
474 def seek(self
, pos
=0):
475 """Set the stream's file pointer to pos. Negative seeking
478 if pos
- self
.pos
>= 0:
479 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
480 for i
in xrange(blocks
):
481 self
.read(self
.bufsize
)
484 raise StreamError("seeking backwards is not allowed")
487 def read(self
, size
=None):
488 """Return the next size number of bytes from the stream.
489 If size is not defined, return all bytes of the stream
495 buf
= self
._read
(self
.bufsize
)
501 buf
= self
._read
(size
)
505 def _read(self
, size
):
506 """Return size bytes from the stream.
508 if self
.comptype
== "tar":
509 return self
.__read
(size
)
514 buf
= self
.__read
(self
.bufsize
)
517 buf
= self
.cmp.decompress(buf
)
524 def __read(self
, size
):
525 """Return size bytes from stream. If internal buffer is empty,
526 read another block from the stream.
531 buf
= self
.fileobj
.read(self
.bufsize
)
541 class _StreamProxy(object):
542 """Small proxy class that enables transparent compression
543 detection for the Stream interface (mode 'r|*').
546 def __init__(self
, fileobj
):
547 self
.fileobj
= fileobj
548 self
.buf
= self
.fileobj
.read(BLOCKSIZE
)
550 def read(self
, size
):
551 self
.read
= self
.fileobj
.read
554 def getcomptype(self
):
555 if self
.buf
.startswith("\037\213\010"):
557 if self
.buf
.startswith("BZh91"):
565 class _BZ2Proxy(object):
566 """Small proxy class that enables external file object
567 support for "r:bz2" and "w:bz2" modes. This is actually
568 a workaround for a limitation in bz2 module's BZ2File
569 class which (unlike gzip.GzipFile) has no support for
570 a file object argument.
573 blocksize
= 16 * 1024
575 def __init__(self
, fileobj
, mode
):
576 self
.fileobj
= fileobj
584 self
.bz2obj
= bz2
.BZ2Decompressor()
588 self
.bz2obj
= bz2
.BZ2Compressor()
590 def read(self
, size
):
595 raw
= self
.fileobj
.read(self
.blocksize
)
596 data
= self
.bz2obj
.decompress(raw
)
601 self
.buf
= "".join(b
)
603 buf
= self
.buf
[:size
]
604 self
.buf
= self
.buf
[size
:]
611 self
.read(pos
- self
.pos
)
616 def write(self
, data
):
617 self
.pos
+= len(data
)
618 raw
= self
.bz2obj
.compress(data
)
619 self
.fileobj
.write(raw
)
623 raw
= self
.bz2obj
.flush()
624 self
.fileobj
.write(raw
)
628 #------------------------
629 # Extraction file object
630 #------------------------
631 class _FileInFile(object):
632 """A thin wrapper around an existing file object that
633 provides a part of its data as an individual file
637 def __init__(self
, fileobj
, offset
, size
, sparse
=None):
638 self
.fileobj
= fileobj
645 """Return the current file position.
649 def seek(self
, position
):
650 """Seek to a position in the file.
652 self
.position
= position
654 def read(self
, size
=None):
655 """Read data from the file.
658 size
= self
.size
- self
.position
660 size
= min(size
, self
.size
- self
.position
)
662 if self
.sparse
is None:
663 return self
.readnormal(size
)
665 return self
.readsparse(size
)
667 def readnormal(self
, size
):
668 """Read operation for regular files.
670 self
.fileobj
.seek(self
.offset
+ self
.position
)
671 self
.position
+= size
672 return self
.fileobj
.read(size
)
674 def readsparse(self
, size
):
675 """Read operation for sparse files.
679 buf
= self
.readsparsesection(size
)
686 def readsparsesection(self
, size
):
687 """Read a single section of a sparse file.
689 section
= self
.sparse
.find(self
.position
)
694 size
= min(size
, section
.offset
+ section
.size
- self
.position
)
696 if isinstance(section
, _data
):
697 realpos
= section
.realpos
+ self
.position
- section
.offset
698 self
.fileobj
.seek(self
.offset
+ realpos
)
699 self
.position
+= size
700 return self
.fileobj
.read(size
)
702 self
.position
+= size
707 class ExFileObject(object):
708 """File-like object for reading an archive member.
709 Is returned by TarFile.extractfile().
713 def __init__(self
, tarfile
, tarinfo
):
714 self
.fileobj
= _FileInFile(tarfile
.fileobj
,
717 getattr(tarinfo
, "sparse", None))
718 self
.name
= tarinfo
.name
721 self
.size
= tarinfo
.size
726 def read(self
, size
=None):
727 """Read at most size bytes from the file. If size is not
728 present or None, read all data until EOF is reached.
731 raise ValueError("I/O operation on closed file")
739 buf
= self
.buffer[:size
]
740 self
.buffer = self
.buffer[size
:]
743 buf
+= self
.fileobj
.read()
745 buf
+= self
.fileobj
.read(size
- len(buf
))
747 self
.position
+= len(buf
)
750 def readline(self
, size
=-1):
751 """Read one entire line from the file. If size is present
752 and non-negative, return a string with at most that
753 size, which may be an incomplete line.
756 raise ValueError("I/O operation on closed file")
758 if "\n" in self
.buffer:
759 pos
= self
.buffer.find("\n") + 1
761 buffers
= [self
.buffer]
763 buf
= self
.fileobj
.read(self
.blocksize
)
765 if not buf
or "\n" in buf
:
766 self
.buffer = "".join(buffers
)
767 pos
= self
.buffer.find("\n") + 1
770 pos
= len(self
.buffer)
776 buf
= self
.buffer[:pos
]
777 self
.buffer = self
.buffer[pos
:]
778 self
.position
+= len(buf
)
782 """Return a list with all remaining lines.
786 line
= self
.readline()
792 """Return the current file position.
795 raise ValueError("I/O operation on closed file")
799 def seek(self
, pos
, whence
=os
.SEEK_SET
):
800 """Seek to a position in the file.
803 raise ValueError("I/O operation on closed file")
805 if whence
== os
.SEEK_SET
:
806 self
.position
= min(max(pos
, 0), self
.size
)
807 elif whence
== os
.SEEK_CUR
:
809 self
.position
= max(self
.position
+ pos
, 0)
811 self
.position
= min(self
.position
+ pos
, self
.size
)
812 elif whence
== os
.SEEK_END
:
813 self
.position
= max(min(self
.size
+ pos
, self
.size
), 0)
815 raise ValueError("Invalid argument")
818 self
.fileobj
.seek(self
.position
)
821 """Close the file object.
826 """Get an iterator over the file's lines.
829 line
= self
.readline()
838 class TarInfo(object):
839 """Informational class which holds the details about an
840 archive member given by a tar header block.
841 TarInfo objects are returned by TarFile.getmember(),
842 TarFile.getmembers() and TarFile.gettarinfo() and are
843 usually created internally.
846 def __init__(self
, name
=""):
847 """Construct a TarInfo object. name is the optional name
850 self
.name
= name
# member name (dirnames must end with '/')
851 self
.mode
= 0666 # file permissions
852 self
.uid
= 0 # user id
853 self
.gid
= 0 # group id
854 self
.size
= 0 # file size
855 self
.mtime
= 0 # modification time
856 self
.chksum
= 0 # header checksum
857 self
.type = REGTYPE
# member type
858 self
.linkname
= "" # link name
859 self
.uname
= "user" # user name
860 self
.gname
= "group" # group name
861 self
.devmajor
= 0 # device major number
862 self
.devminor
= 0 # device minor number
864 self
.offset
= 0 # the tar header starts here
865 self
.offset_data
= 0 # the file's data starts here
868 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
871 def frombuf(cls
, buf
):
872 """Construct a TarInfo object from a 512 byte string buffer.
874 if len(buf
) != BLOCKSIZE
:
875 raise HeaderError("truncated header")
876 if buf
.count(NUL
) == BLOCKSIZE
:
877 raise HeaderError("empty header")
879 chksum
= nti(buf
[148:156])
880 if chksum
not in calc_chksums(buf
):
881 raise HeaderError("bad checksum")
885 tarinfo
.name
= buf
[0:100].rstrip(NUL
)
886 tarinfo
.mode
= nti(buf
[100:108])
887 tarinfo
.uid
= nti(buf
[108:116])
888 tarinfo
.gid
= nti(buf
[116:124])
889 tarinfo
.size
= nti(buf
[124:136])
890 tarinfo
.mtime
= nti(buf
[136:148])
891 tarinfo
.chksum
= chksum
892 tarinfo
.type = buf
[156:157]
893 tarinfo
.linkname
= buf
[157:257].rstrip(NUL
)
894 tarinfo
.uname
= buf
[265:297].rstrip(NUL
)
895 tarinfo
.gname
= buf
[297:329].rstrip(NUL
)
896 tarinfo
.devmajor
= nti(buf
[329:337])
897 tarinfo
.devminor
= nti(buf
[337:345])
898 prefix
= buf
[345:500].rstrip(NUL
)
900 if prefix
and not tarinfo
.issparse():
901 tarinfo
.name
= prefix
+ "/" + tarinfo
.name
905 def tobuf(self
, posix
=False):
906 """Return a tar header as a string of 512 byte blocks.
912 if self
.name
.endswith("/"):
915 if type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
916 # Prevent "././@LongLink" from being normalized.
919 name
= normpath(self
.name
)
922 # directories should end with '/'
925 linkname
= self
.linkname
927 # if linkname is empty we end up with a '.'
928 linkname
= normpath(linkname
)
931 if self
.size
> MAXSIZE_MEMBER
:
932 raise ValueError("file is too large (>= 8 GB)")
934 if len(self
.linkname
) > LENGTH_LINK
:
935 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK
))
937 if len(name
) > LENGTH_NAME
:
938 prefix
= name
[:LENGTH_PREFIX
+ 1]
939 while prefix
and prefix
[-1] != "/":
942 name
= name
[len(prefix
):]
945 if not prefix
or len(name
) > LENGTH_NAME
:
946 raise ValueError("name is too long")
949 if len(self
.linkname
) > LENGTH_LINK
:
950 buf
+= self
._create
_gnulong
(self
.linkname
, GNUTYPE_LONGLINK
)
952 if len(name
) > LENGTH_NAME
:
953 buf
+= self
._create
_gnulong
(name
, GNUTYPE_LONGNAME
)
957 itn(self
.mode
& 07777, 8, posix
),
958 itn(self
.uid
, 8, posix
),
959 itn(self
.gid
, 8, posix
),
960 itn(self
.size
, 12, posix
),
961 itn(self
.mtime
, 12, posix
),
962 " ", # checksum field
964 stn(self
.linkname
, 100),
969 itn(self
.devmajor
, 8, posix
),
970 itn(self
.devminor
, 8, posix
),
974 buf
+= struct
.pack("%ds" % BLOCKSIZE
, "".join(parts
))
975 chksum
= calc_chksums(buf
[-BLOCKSIZE
:])[0]
976 buf
= buf
[:-364] + "%06o\0" % chksum
+ buf
[-357:]
980 def _create_gnulong(self
, name
, type):
981 """Create a GNU longname/longlink header from name.
982 It consists of an extended tar header, with the length
983 of the longname as size, followed by data blocks,
984 which contain the longname as a null terminated string.
988 tarinfo
= self
.__class
__()
989 tarinfo
.name
= "././@LongLink"
992 tarinfo
.size
= len(name
)
994 # create extended header
995 buf
= tarinfo
.tobuf()
998 blocks
, remainder
= divmod(len(name
), BLOCKSIZE
)
1000 buf
+= (BLOCKSIZE
- remainder
) * NUL
1004 return self
.type in REGULAR_TYPES
1008 return self
.type == DIRTYPE
1010 return self
.type == SYMTYPE
1012 return self
.type == LNKTYPE
1014 return self
.type == CHRTYPE
1016 return self
.type == BLKTYPE
1018 return self
.type == FIFOTYPE
1020 return self
.type == GNUTYPE_SPARSE
1022 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
1025 class TarFile(object):
1026 """The TarFile Class provides an interface to tar archives.
1029 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
1031 dereference
= False # If true, add content of linked file to the
1032 # tar file, else the link.
1034 ignore_zeros
= False # If true, skips empty or invalid blocks and
1035 # continues processing.
1037 errorlevel
= 0 # If 0, fatal errors only appear in debug
1038 # messages (if debug >= 0). If > 0, errors
1039 # are passed to the caller as exceptions.
1041 posix
= False # If True, generates POSIX.1-1990-compliant
1042 # archives (no GNU extensions!)
1044 fileobject
= ExFileObject
1046 def __init__(self
, name
=None, mode
="r", fileobj
=None):
1047 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1048 read from an existing archive, 'a' to append data to an existing
1049 file or 'w' to create a new file overwriting an existing one. `mode'
1051 If `fileobj' is given, it is used for reading or writing data. If it
1052 can be determined, `mode' is overridden by `fileobj's mode.
1053 `fileobj' is not closed, when TarFile is closed.
1055 self
.name
= os
.path
.abspath(name
)
1057 if len(mode
) > 1 or mode
not in "raw":
1058 raise ValueError("mode must be 'r', 'a' or 'w'")
1060 self
.mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
1063 fileobj
= file(self
.name
, self
.mode
)
1064 self
._extfileobj
= False
1066 if self
.name
is None and hasattr(fileobj
, "name"):
1067 self
.name
= os
.path
.abspath(fileobj
.name
)
1068 if hasattr(fileobj
, "mode"):
1069 self
.mode
= fileobj
.mode
1070 self
._extfileobj
= True
1071 self
.fileobj
= fileobj
1073 # Init datastructures
1075 self
.members
= [] # list of members as TarInfo objects
1076 self
._loaded
= False # flag if all members have been read
1077 self
.offset
= 0L # current position in the archive file
1078 self
.inodes
= {} # dictionary caching the inodes of
1079 # archive members already added
1081 if self
._mode
== "r":
1082 self
.firstmember
= None
1083 self
.firstmember
= self
.next()
1085 if self
._mode
== "a":
1086 # Move to the end of the archive,
1087 # before the first empty block.
1088 self
.firstmember
= None
1091 tarinfo
= self
.next()
1093 self
.fileobj
.seek(0)
1096 self
.fileobj
.seek(- BLOCKSIZE
, 1)
1099 if self
._mode
in "aw":
1102 #--------------------------------------------------------------------------
1103 # Below are the classmethods which act as alternate constructors to the
1104 # TarFile class. The open() method is the only one that is needed for
1105 # public use; it is the "super"-constructor and is able to select an
1106 # adequate "sub"-constructor for a particular compression using the mapping
1109 # This concept allows one to subclass TarFile without losing the comfort of
1110 # the super-constructor. A sub-constructor is registered and made available
1111 # by adding it to the mapping in OPEN_METH.
1114 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=20*512):
1115 """Open a tar archive for reading, writing or appending. Return
1116 an appropriate TarFile class.
1119 'r' or 'r:*' open for reading with transparent compression
1120 'r:' open for reading exclusively uncompressed
1121 'r:gz' open for reading with gzip compression
1122 'r:bz2' open for reading with bzip2 compression
1123 'a' or 'a:' open for appending
1124 'w' or 'w:' open for writing without compression
1125 'w:gz' open for writing with gzip compression
1126 'w:bz2' open for writing with bzip2 compression
1128 'r|*' open a stream of tar blocks with transparent compression
1129 'r|' open an uncompressed stream of tar blocks for reading
1130 'r|gz' open a gzip compressed stream of tar blocks
1131 'r|bz2' open a bzip2 compressed stream of tar blocks
1132 'w|' open an uncompressed stream for writing
1133 'w|gz' open a gzip compressed stream for writing
1134 'w|bz2' open a bzip2 compressed stream for writing
1137 if not name
and not fileobj
:
1138 raise ValueError("nothing to open")
1140 if mode
in ("r", "r:*"):
1141 # Find out which *open() is appropriate for opening the file.
1142 for comptype
in cls
.OPEN_METH
:
1143 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1144 if fileobj
is not None:
1145 saved_pos
= fileobj
.tell()
1147 return func(name
, "r", fileobj
)
1148 except (ReadError
, CompressionError
):
1149 if fileobj
is not None:
1150 fileobj
.seek(saved_pos
)
1152 raise ReadError("file could not be opened successfully")
1155 filemode
, comptype
= mode
.split(":", 1)
1156 filemode
= filemode
or "r"
1157 comptype
= comptype
or "tar"
1159 # Select the *open() function according to
1160 # given compression.
1161 if comptype
in cls
.OPEN_METH
:
1162 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1164 raise CompressionError("unknown compression type %r" % comptype
)
1165 return func(name
, filemode
, fileobj
)
1168 filemode
, comptype
= mode
.split("|", 1)
1169 filemode
= filemode
or "r"
1170 comptype
= comptype
or "tar"
1172 if filemode
not in "rw":
1173 raise ValueError("mode must be 'r' or 'w'")
1175 t
= cls(name
, filemode
,
1176 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
))
1177 t
._extfileobj
= False
1181 return cls
.taropen(name
, mode
, fileobj
)
1183 raise ValueError("undiscernible mode")
1186 def taropen(cls
, name
, mode
="r", fileobj
=None):
1187 """Open uncompressed tar archive name for reading or writing.
1189 if len(mode
) > 1 or mode
not in "raw":
1190 raise ValueError("mode must be 'r', 'a' or 'w'")
1191 return cls(name
, mode
, fileobj
)
1194 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
1195 """Open gzip compressed tar archive name for reading or writing.
1196 Appending is not allowed.
1198 if len(mode
) > 1 or mode
not in "rw":
1199 raise ValueError("mode must be 'r' or 'w'")
1204 except (ImportError, AttributeError):
1205 raise CompressionError("gzip module is not available")
1208 fileobj
= file(name
, mode
+ "b")
1211 t
= cls
.taropen(name
, mode
,
1212 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
))
1214 raise ReadError("not a gzip file")
1215 t
._extfileobj
= False
1219 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
1220 """Open bzip2 compressed tar archive name for reading or writing.
1221 Appending is not allowed.
1223 if len(mode
) > 1 or mode
not in "rw":
1224 raise ValueError("mode must be 'r' or 'w'.")
1229 raise CompressionError("bz2 module is not available")
1231 if fileobj
is not None:
1232 fileobj
= _BZ2Proxy(fileobj
, mode
)
1234 fileobj
= bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
)
1237 t
= cls
.taropen(name
, mode
, fileobj
)
1239 raise ReadError("not a bzip2 file")
1240 t
._extfileobj
= False
1243 # All *open() methods are registered here.
1245 "tar": "taropen", # uncompressed tar
1246 "gz": "gzopen", # gzip compressed tar
1247 "bz2": "bz2open" # bzip2 compressed tar
1250 #--------------------------------------------------------------------------
1251 # The public methods which TarFile provides:
1254 """Close the TarFile. In write-mode, two finishing zero blocks are
1255 appended to the archive.
1260 if self
._mode
in "aw":
1261 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1262 self
.offset
+= (BLOCKSIZE
* 2)
1263 # fill up the end with zero-blocks
1264 # (like option -b20 for tar does)
1265 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1267 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1269 if not self
._extfileobj
:
1270 self
.fileobj
.close()
1273 def getmember(self
, name
):
1274 """Return a TarInfo object for member `name'. If `name' can not be
1275 found in the archive, KeyError is raised. If a member occurs more
1276 than once in the archive, its last occurence is assumed to be the
1277 most up-to-date version.
1279 tarinfo
= self
._getmember
(name
)
1281 raise KeyError("filename %r not found" % name
)
1284 def getmembers(self
):
1285 """Return the members of the archive as a list of TarInfo objects. The
1286 list has the same order as the members in the archive.
1289 if not self
._loaded
: # if we want to obtain a list of
1290 self
._load
() # all members, we first have to
1291 # scan the whole archive.
1295 """Return the members of the archive as a list of their names. It has
1296 the same order as the list returned by getmembers().
1298 return [tarinfo
.name
for tarinfo
in self
.getmembers()]
1300 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1301 """Create a TarInfo object for either the file `name' or the file
1302 object `fileobj' (using os.fstat on its file descriptor). You can
1303 modify some of the TarInfo's attributes before you add it using
1304 addfile(). If given, `arcname' specifies an alternative name for the
1305 file in the archive.
1309 # When fileobj is given, replace name by
1310 # fileobj's real name.
1311 if fileobj
is not None:
1314 # Building the name of the member in the archive.
1315 # Backward slashes are converted to forward slashes,
1316 # Absolute paths are turned to relative paths.
1319 arcname
= normpath(arcname
)
1320 drv
, arcname
= os
.path
.splitdrive(arcname
)
1321 while arcname
[0:1] == "/":
1322 arcname
= arcname
[1:]
1324 # Now, fill the TarInfo object with
1325 # information specific for the file.
1328 # Use os.stat or os.lstat, depending on platform
1329 # and if symlinks shall be resolved.
1331 if hasattr(os
, "lstat") and not self
.dereference
:
1332 statres
= os
.lstat(name
)
1334 statres
= os
.stat(name
)
1336 statres
= os
.fstat(fileobj
.fileno())
1339 stmd
= statres
.st_mode
1340 if stat
.S_ISREG(stmd
):
1341 inode
= (statres
.st_ino
, statres
.st_dev
)
1342 if not self
.dereference
and \
1343 statres
.st_nlink
> 1 and inode
in self
.inodes
:
1344 # Is it a hardlink to an already
1347 linkname
= self
.inodes
[inode
]
1349 # The inode is added only if its valid.
1350 # For win32 it is always 0.
1353 self
.inodes
[inode
] = arcname
1354 elif stat
.S_ISDIR(stmd
):
1356 if arcname
[-1:] != "/":
1358 elif stat
.S_ISFIFO(stmd
):
1360 elif stat
.S_ISLNK(stmd
):
1362 linkname
= os
.readlink(name
)
1363 elif stat
.S_ISCHR(stmd
):
1365 elif stat
.S_ISBLK(stmd
):
1370 # Fill the TarInfo object with all
1371 # information we can get.
1372 tarinfo
.name
= arcname
1374 tarinfo
.uid
= statres
.st_uid
1375 tarinfo
.gid
= statres
.st_gid
1376 if stat
.S_ISREG(stmd
):
1377 tarinfo
.size
= statres
.st_size
1380 tarinfo
.mtime
= statres
.st_mtime
1382 tarinfo
.linkname
= linkname
1385 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1390 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1394 if type in (CHRTYPE
, BLKTYPE
):
1395 if hasattr(os
, "major") and hasattr(os
, "minor"):
1396 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1397 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1400 def list(self
, verbose
=True):
1401 """Print a table of contents to sys.stdout. If `verbose' is False, only
1402 the names of the members are printed. If it is True, an `ls -l'-like
1407 for tarinfo
in self
:
1409 print filemode(tarinfo
.mode
),
1410 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1411 tarinfo
.gname
or tarinfo
.gid
),
1412 if tarinfo
.ischr() or tarinfo
.isblk():
1413 print "%10s" % ("%d,%d" \
1414 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1416 print "%10d" % tarinfo
.size
,
1417 print "%d-%02d-%02d %02d:%02d:%02d" \
1418 % time
.localtime(tarinfo
.mtime
)[:6],
1424 print "->", tarinfo
.linkname
,
1426 print "link to", tarinfo
.linkname
,
1429 def add(self
, name
, arcname
=None, recursive
=True):
1430 """Add the file `name' to the archive. `name' may be any type of file
1431 (directory, fifo, symbolic link, etc.). If given, `arcname'
1432 specifies an alternative name for the file in the archive.
1433 Directories are added recursively by default. This can be avoided by
1434 setting `recursive' to False.
1441 # Skip if somebody tries to archive the archive...
1442 if self
.name
is not None and os
.path
.abspath(name
) == self
.name
:
1443 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1446 # Special case: The user wants to add the current
1447 # working directory.
1452 for f
in os
.listdir("."):
1453 self
.add(f
, os
.path
.join(arcname
, f
))
1458 # Create a TarInfo object from the file.
1459 tarinfo
= self
.gettarinfo(name
, arcname
)
1462 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1465 # Append the tar header and data to the archive.
1467 f
= file(name
, "rb")
1468 self
.addfile(tarinfo
, f
)
1471 elif tarinfo
.isdir():
1472 self
.addfile(tarinfo
)
1474 for f
in os
.listdir(name
):
1475 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
))
1478 self
.addfile(tarinfo
)
1480 def addfile(self
, tarinfo
, fileobj
=None):
1481 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1482 given, tarinfo.size bytes are read from it and added to the archive.
1483 You can create TarInfo objects using gettarinfo().
1484 On Windows platforms, `fileobj' should always be opened with mode
1485 'rb' to avoid irritation about the file size.
1489 tarinfo
= copy
.copy(tarinfo
)
1491 buf
= tarinfo
.tobuf(self
.posix
)
1492 self
.fileobj
.write(buf
)
1493 self
.offset
+= len(buf
)
1495 # If there's data to follow, append it.
1496 if fileobj
is not None:
1497 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
1498 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1500 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1502 self
.offset
+= blocks
* BLOCKSIZE
1504 self
.members
.append(tarinfo
)
1506 def extractall(self
, path
=".", members
=None):
1507 """Extract all members from the archive to the current working
1508 directory and set owner, modification time and permissions on
1509 directories afterwards. `path' specifies a different directory
1510 to extract to. `members' is optional and must be a subset of the
1511 list returned by getmembers().
1518 for tarinfo
in members
:
1520 # Extract directory with a safe mode, so that
1521 # all files below can be extracted as well.
1523 os
.makedirs(os
.path
.join(path
, tarinfo
.name
), 0777)
1524 except EnvironmentError:
1526 directories
.append(tarinfo
)
1528 self
.extract(tarinfo
, path
)
1530 # Reverse sort directories.
1531 directories
.sort(lambda a
, b
: cmp(a
.name
, b
.name
))
1532 directories
.reverse()
1534 # Set correct owner, mtime and filemode on directories.
1535 for tarinfo
in directories
:
1536 path
= os
.path
.join(path
, tarinfo
.name
)
1538 self
.chown(tarinfo
, path
)
1539 self
.utime(tarinfo
, path
)
1540 self
.chmod(tarinfo
, path
)
1541 except ExtractError
, e
:
1542 if self
.errorlevel
> 1:
1545 self
._dbg
(1, "tarfile: %s" % e
)
1547 def extract(self
, member
, path
=""):
1548 """Extract a member from the archive to the current working directory,
1549 using its full name. Its file information is extracted as accurately
1550 as possible. `member' may be a filename or a TarInfo object. You can
1551 specify a different directory using `path'.
1555 if isinstance(member
, TarInfo
):
1558 tarinfo
= self
.getmember(member
)
1560 # Prepare the link target for makelink().
1562 tarinfo
._link
_target
= os
.path
.join(path
, tarinfo
.linkname
)
1565 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
1566 except EnvironmentError, e
:
1567 if self
.errorlevel
> 0:
1570 if e
.filename
is None:
1571 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
1573 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
1574 except ExtractError
, e
:
1575 if self
.errorlevel
> 1:
1578 self
._dbg
(1, "tarfile: %s" % e
)
1580 def extractfile(self
, member
):
1581 """Extract a member from the archive as a file object. `member' may be
1582 a filename or a TarInfo object. If `member' is a regular file, a
1583 file-like object is returned. If `member' is a link, a file-like
1584 object is constructed from the link's target. If `member' is none of
1585 the above, None is returned.
1586 The file-like object is read-only and provides the following
1587 methods: read(), readline(), readlines(), seek() and tell()
1591 if isinstance(member
, TarInfo
):
1594 tarinfo
= self
.getmember(member
)
1597 return self
.fileobject(self
, tarinfo
)
1599 elif tarinfo
.type not in SUPPORTED_TYPES
:
1600 # If a member's type is unknown, it is treated as a
1602 return self
.fileobject(self
, tarinfo
)
1604 elif tarinfo
.islnk() or tarinfo
.issym():
1605 if isinstance(self
.fileobj
, _Stream
):
1606 # A small but ugly workaround for the case that someone tries
1607 # to extract a (sym)link as a file-object from a non-seekable
1608 # stream of tar blocks.
1609 raise StreamError("cannot extract (sym)link as file object")
1611 # A (sym)link's file object is its target's file object.
1612 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
1615 # If there's no data associated with the member (directory, chrdev,
1616 # blkdev, etc.), return None instead of a file object.
1619 def _extract_member(self
, tarinfo
, targetpath
):
1620 """Extract the TarInfo object tarinfo to a physical
1621 file called targetpath.
1623 # Fetch the TarInfo object for the given name
1624 # and build the destination pathname, replacing
1625 # forward slashes to platform specific separators.
1626 if targetpath
[-1:] == "/":
1627 targetpath
= targetpath
[:-1]
1628 targetpath
= os
.path
.normpath(targetpath
)
1630 # Create all upper directories.
1631 upperdirs
= os
.path
.dirname(targetpath
)
1632 if upperdirs
and not os
.path
.exists(upperdirs
):
1637 ti
.mtime
= tarinfo
.mtime
1638 ti
.uid
= tarinfo
.uid
1639 ti
.gid
= tarinfo
.gid
1640 ti
.uname
= tarinfo
.uname
1641 ti
.gname
= tarinfo
.gname
1643 self
._extract
_member
(ti
, ti
.name
)
1647 if tarinfo
.islnk() or tarinfo
.issym():
1648 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
1650 self
._dbg
(1, tarinfo
.name
)
1653 self
.makefile(tarinfo
, targetpath
)
1654 elif tarinfo
.isdir():
1655 self
.makedir(tarinfo
, targetpath
)
1656 elif tarinfo
.isfifo():
1657 self
.makefifo(tarinfo
, targetpath
)
1658 elif tarinfo
.ischr() or tarinfo
.isblk():
1659 self
.makedev(tarinfo
, targetpath
)
1660 elif tarinfo
.islnk() or tarinfo
.issym():
1661 self
.makelink(tarinfo
, targetpath
)
1662 elif tarinfo
.type not in SUPPORTED_TYPES
:
1663 self
.makeunknown(tarinfo
, targetpath
)
1665 self
.makefile(tarinfo
, targetpath
)
1667 self
.chown(tarinfo
, targetpath
)
1668 if not tarinfo
.issym():
1669 self
.chmod(tarinfo
, targetpath
)
1670 self
.utime(tarinfo
, targetpath
)
1672 #--------------------------------------------------------------------------
1673 # Below are the different file methods. They are called via
1674 # _extract_member() when extract() is called. They can be replaced in a
1675 # subclass to implement other functionality.
1677 def makedir(self
, tarinfo
, targetpath
):
1678 """Make a directory called targetpath.
1681 os
.mkdir(targetpath
)
1682 except EnvironmentError, e
:
1683 if e
.errno
!= errno
.EEXIST
:
1686 def makefile(self
, tarinfo
, targetpath
):
1687 """Make a file called targetpath.
1689 source
= self
.extractfile(tarinfo
)
1690 target
= file(targetpath
, "wb")
1691 copyfileobj(source
, target
)
1695 def makeunknown(self
, tarinfo
, targetpath
):
1696 """Make a file from a TarInfo object with an unknown type
1699 self
.makefile(tarinfo
, targetpath
)
1700 self
._dbg
(1, "tarfile: Unknown file type %r, " \
1701 "extracted as regular file." % tarinfo
.type)
1703 def makefifo(self
, tarinfo
, targetpath
):
1704 """Make a fifo called targetpath.
1706 if hasattr(os
, "mkfifo"):
1707 os
.mkfifo(targetpath
)
1709 raise ExtractError("fifo not supported by system")
1711 def makedev(self
, tarinfo
, targetpath
):
1712 """Make a character or block device called targetpath.
1714 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
1715 raise ExtractError("special devices not supported by system")
1719 mode |
= stat
.S_IFBLK
1721 mode |
= stat
.S_IFCHR
1723 os
.mknod(targetpath
, mode
,
1724 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
1726 def makelink(self
, tarinfo
, targetpath
):
1727 """Make a (symbolic) link called targetpath. If it cannot be created
1728 (platform limitation), we try to make a copy of the referenced file
1731 linkpath
= tarinfo
.linkname
1734 os
.symlink(linkpath
, targetpath
)
1737 os
.link(tarinfo
._link
_target
, targetpath
)
1738 except AttributeError:
1740 linkpath
= os
.path
.join(os
.path
.dirname(tarinfo
.name
),
1742 linkpath
= normpath(linkpath
)
1745 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
1746 except (EnvironmentError, KeyError), e
:
1747 linkpath
= os
.path
.normpath(linkpath
)
1749 shutil
.copy2(linkpath
, targetpath
)
1750 except EnvironmentError, e
:
1751 raise IOError("link could not be created")
1753 def chown(self
, tarinfo
, targetpath
):
1754 """Set owner of targetpath according to tarinfo.
1756 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
1757 # We have to be root to do so.
1759 g
= grp
.getgrnam(tarinfo
.gname
)[2]
1762 g
= grp
.getgrgid(tarinfo
.gid
)[2]
1766 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
1769 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
1773 if tarinfo
.issym() and hasattr(os
, "lchown"):
1774 os
.lchown(targetpath
, u
, g
)
1776 if sys
.platform
!= "os2emx":
1777 os
.chown(targetpath
, u
, g
)
1778 except EnvironmentError, e
:
1779 raise ExtractError("could not change owner")
1781 def chmod(self
, tarinfo
, targetpath
):
1782 """Set file permissions of targetpath according to tarinfo.
1784 if hasattr(os
, 'chmod'):
1786 os
.chmod(targetpath
, tarinfo
.mode
)
1787 except EnvironmentError, e
:
1788 raise ExtractError("could not change mode")
1790 def utime(self
, tarinfo
, targetpath
):
1791 """Set modification time of targetpath according to tarinfo.
1793 if not hasattr(os
, 'utime'):
1795 if sys
.platform
== "win32" and tarinfo
.isdir():
1796 # According to msdn.microsoft.com, it is an error (EACCES)
1797 # to use utime() on directories.
1800 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
1801 except EnvironmentError, e
:
1802 raise ExtractError("could not change modification time")
1804 #--------------------------------------------------------------------------
1806 """Return the next member of the archive as a TarInfo object, when
1807 TarFile is opened for reading. Return None if there is no more
1811 if self
.firstmember
is not None:
1812 m
= self
.firstmember
1813 self
.firstmember
= None
1816 # Read the next block.
1817 self
.fileobj
.seek(self
.offset
)
1819 buf
= self
.fileobj
.read(BLOCKSIZE
)
1824 tarinfo
= TarInfo
.frombuf(buf
)
1826 # Set the TarInfo object's offset to the current position of the
1827 # TarFile and set self.offset to the position where the data blocks
1829 tarinfo
.offset
= self
.offset
1830 self
.offset
+= BLOCKSIZE
1832 tarinfo
= self
.proc_member(tarinfo
)
1834 except HeaderError
, e
:
1835 if self
.ignore_zeros
:
1836 self
._dbg
(2, "0x%X: %s" % (self
.offset
, e
))
1837 self
.offset
+= BLOCKSIZE
1840 if self
.offset
== 0:
1841 raise ReadError(str(e
))
1845 # Some old tar programs represent a directory as a regular
1846 # file with a trailing slash.
1847 if tarinfo
.isreg() and tarinfo
.name
.endswith("/"):
1848 tarinfo
.type = DIRTYPE
1850 # Directory names should have a '/' at the end.
1854 self
.members
.append(tarinfo
)
1857 #--------------------------------------------------------------------------
1858 # The following are methods that are called depending on the type of a
1859 # member. The entry point is proc_member() which is called with a TarInfo
1860 # object created from the header block from the current offset. The
1861 # proc_member() method can be overridden in a subclass to add custom
1862 # proc_*() methods. A proc_*() method MUST implement the following
1864 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1865 # if there is data that follows.
1866 # 2. Set self.offset to the position where the next member's header will
1868 # 3. Return tarinfo or another valid TarInfo object.
1869 def proc_member(self
, tarinfo
):
1870 """Choose the right processing method for tarinfo depending
1871 on its type and call it.
1873 if tarinfo
.type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
1874 return self
.proc_gnulong(tarinfo
)
1875 elif tarinfo
.type == GNUTYPE_SPARSE
:
1876 return self
.proc_sparse(tarinfo
)
1878 return self
.proc_builtin(tarinfo
)
1880 def proc_builtin(self
, tarinfo
):
1881 """Process a builtin type member or an unknown member
1882 which will be treated as a regular file.
1884 tarinfo
.offset_data
= self
.offset
1885 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1886 # Skip the following data blocks.
1887 self
.offset
+= self
._block
(tarinfo
.size
)
1890 def proc_gnulong(self
, tarinfo
):
1891 """Process the blocks that hold a GNU longname
1895 count
= tarinfo
.size
1897 block
= self
.fileobj
.read(BLOCKSIZE
)
1899 self
.offset
+= BLOCKSIZE
1902 # Fetch the next header and process it.
1903 b
= self
.fileobj
.read(BLOCKSIZE
)
1904 t
= TarInfo
.frombuf(b
)
1905 t
.offset
= self
.offset
1906 self
.offset
+= BLOCKSIZE
1907 next
= self
.proc_member(t
)
1909 # Patch the TarInfo object from the next header with
1910 # the longname information.
1911 next
.offset
= tarinfo
.offset
1912 if tarinfo
.type == GNUTYPE_LONGNAME
:
1913 next
.name
= buf
.rstrip(NUL
)
1914 elif tarinfo
.type == GNUTYPE_LONGLINK
:
1915 next
.linkname
= buf
.rstrip(NUL
)
1919 def proc_sparse(self
, tarinfo
):
1920 """Process a GNU sparse header plus extra headers.
1927 # There are 4 possible sparse structs in the
1931 offset
= nti(buf
[pos
:pos
+ 12])
1932 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1935 if offset
> lastpos
:
1936 sp
.append(_hole(lastpos
, offset
- lastpos
))
1937 sp
.append(_data(offset
, numbytes
, realpos
))
1939 lastpos
= offset
+ numbytes
1942 isextended
= ord(buf
[482])
1943 origsize
= nti(buf
[483:495])
1945 # If the isextended flag is given,
1946 # there are extra headers to process.
1947 while isextended
== 1:
1948 buf
= self
.fileobj
.read(BLOCKSIZE
)
1949 self
.offset
+= BLOCKSIZE
1951 for i
in xrange(21):
1953 offset
= nti(buf
[pos
:pos
+ 12])
1954 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1957 if offset
> lastpos
:
1958 sp
.append(_hole(lastpos
, offset
- lastpos
))
1959 sp
.append(_data(offset
, numbytes
, realpos
))
1961 lastpos
= offset
+ numbytes
1963 isextended
= ord(buf
[504])
1965 if lastpos
< origsize
:
1966 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1970 tarinfo
.offset_data
= self
.offset
1971 self
.offset
+= self
._block
(tarinfo
.size
)
1972 tarinfo
.size
= origsize
1976 #--------------------------------------------------------------------------
1977 # Little helper methods:
1979 def _block(self
, count
):
1980 """Round up a byte count by BLOCKSIZE and return it,
1981 e.g. _block(834) => 1024.
1983 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1986 return blocks
* BLOCKSIZE
1988 def _getmember(self
, name
, tarinfo
=None):
1989 """Find an archive member by name from bottom to top.
1990 If tarinfo is given, it is used as the starting point.
1992 # Ensure that all members have been loaded.
1993 members
= self
.getmembers()
1998 end
= members
.index(tarinfo
)
2000 for i
in xrange(end
- 1, -1, -1):
2001 if name
== members
[i
].name
:
2005 """Read through the entire archive file and look for readable
2009 tarinfo
= self
.next()
2014 def _check(self
, mode
=None):
2015 """Check if TarFile is still open, and if the operation's mode
2016 corresponds to TarFile's mode.
2019 raise IOError("%s is closed" % self
.__class
__.__name
__)
2020 if mode
is not None and self
._mode
not in mode
:
2021 raise IOError("bad operation for mode %r" % self
._mode
)
2024 """Provide an iterator object.
2027 return iter(self
.members
)
2029 return TarIter(self
)
2031 def _dbg(self
, level
, msg
):
2032 """Write debugging output to sys.stderr.
2034 if level
<= self
.debug
:
2035 print >> sys
.stderr
, msg
2041 for tarinfo in TarFile(...):
2045 def __init__(self
, tarfile
):
2046 """Construct a TarIter object.
2048 self
.tarfile
= tarfile
2051 """Return iterator object.
2055 """Return the next item using TarFile's next() method.
2056 When all members have been read, set TarFile as _loaded.
2058 # Fix for SF #1100429: Under rare circumstances it can
2059 # happen that getmembers() is called during iteration,
2060 # which will cause TarIter to stop prematurely.
2061 if not self
.tarfile
._loaded
:
2062 tarinfo
= self
.tarfile
.next()
2064 self
.tarfile
._loaded
= True
2068 tarinfo
= self
.tarfile
.members
[self
.index
]
2074 # Helper classes for sparse file support
2076 """Base class for _data and _hole.
2078 def __init__(self
, offset
, size
):
2079 self
.offset
= offset
2081 def __contains__(self
, offset
):
2082 return self
.offset
<= offset
< self
.offset
+ self
.size
2084 class _data(_section
):
2085 """Represent a data section in a sparse file.
2087 def __init__(self
, offset
, size
, realpos
):
2088 _section
.__init
__(self
, offset
, size
)
2089 self
.realpos
= realpos
2091 class _hole(_section
):
2092 """Represent a hole section in a sparse file.
2096 class _ringbuffer(list):
2097 """Ringbuffer class which increases performance
2098 over a regular list.
2102 def find(self
, offset
):
2109 if idx
== len(self
):
2117 #---------------------------------------------
2118 # zipfile compatible TarFile class
2119 #---------------------------------------------
2120 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
2121 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
2122 class TarFileCompat
:
2123 """TarFile class compatible with standard module zipfile's
2126 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
2127 if compression
== TAR_PLAIN
:
2128 self
.tarfile
= TarFile
.taropen(file, mode
)
2129 elif compression
== TAR_GZIPPED
:
2130 self
.tarfile
= TarFile
.gzopen(file, mode
)
2132 raise ValueError("unknown compression constant")
2133 if mode
[0:1] == "r":
2134 members
= self
.tarfile
.getmembers()
2137 m
.file_size
= m
.size
2138 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
2140 return map(lambda m
: m
.name
, self
.infolist())
2142 return filter(lambda m
: m
.type in REGULAR_TYPES
,
2143 self
.tarfile
.getmembers())
2148 def getinfo(self
, name
):
2149 return self
.tarfile
.getmember(name
)
2150 def read(self
, name
):
2151 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
2152 def write(self
, filename
, arcname
=None, compress_type
=None):
2153 self
.tarfile
.add(filename
, arcname
)
2154 def writestr(self
, zinfo
, bytes
):
2156 from cStringIO
import StringIO
2158 from StringIO
import StringIO
2160 zinfo
.name
= zinfo
.filename
2161 zinfo
.size
= zinfo
.file_size
2162 zinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
2163 self
.tarfile
.addfile(zinfo
, StringIO(bytes
))
2165 self
.tarfile
.close()
2166 #class TarFileCompat
2168 #--------------------
2169 # exported functions
2170 #--------------------
2171 def is_tarfile(name
):
2172 """Return True if name points to a tar archive that we
2173 are able to handle, else return False.