2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
53 if sys
.platform
== 'mac':
54 # This module needs work for MacOS9, especially in the area of pathname
55 # handling. In many places it is assumed a simple substitution of / by the
56 # local os.path.sep is good enough to convert pathnames, but this does not
57 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
58 raise ImportError, "tarfile does not work for platform==mac"
65 # from tarfile import *
66 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68 #---------------------------------------------------------
70 #---------------------------------------------------------
71 NUL
= "\0" # the null character
72 BLOCKSIZE
= 512 # length of processing blocks
73 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
74 MAGIC
= "ustar" # magic tar string
75 VERSION
= "00" # version number
77 LENGTH_NAME
= 100 # maximum length of a filename
78 LENGTH_LINK
= 100 # maximum length of a linkname
79 LENGTH_PREFIX
= 155 # maximum length of the prefix field
80 MAXSIZE_MEMBER
= 077777777777L # maximum size of a file (11 octal digits)
82 REGTYPE
= "0" # regular file
83 AREGTYPE
= "\0" # regular file
84 LNKTYPE
= "1" # link (inside tarfile)
85 SYMTYPE
= "2" # symbolic link
86 CHRTYPE
= "3" # character special device
87 BLKTYPE
= "4" # block special device
88 DIRTYPE
= "5" # directory
89 FIFOTYPE
= "6" # fifo special device
90 CONTTYPE
= "7" # contiguous file
92 GNUTYPE_LONGNAME
= "L" # GNU tar extension for longnames
93 GNUTYPE_LONGLINK
= "K" # GNU tar extension for longlink
94 GNUTYPE_SPARSE
= "S" # GNU tar extension for sparse file
96 #---------------------------------------------------------
98 #---------------------------------------------------------
99 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
, # file types that tarfile
100 SYMTYPE
, DIRTYPE
, FIFOTYPE
, # can cope with.
101 CONTTYPE
, CHRTYPE
, BLKTYPE
,
102 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
105 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
, # file types that somehow
106 CONTTYPE
, GNUTYPE_SPARSE
) # represent regular files
108 #---------------------------------------------------------
109 # Bits used in the mode field, values in octal.
110 #---------------------------------------------------------
111 S_IFLNK
= 0120000 # symbolic link
112 S_IFREG
= 0100000 # regular file
113 S_IFBLK
= 0060000 # block device
114 S_IFDIR
= 0040000 # directory
115 S_IFCHR
= 0020000 # character device
116 S_IFIFO
= 0010000 # fifo
118 TSUID
= 04000 # set UID on execution
119 TSGID
= 02000 # set GID on execution
120 TSVTX
= 01000 # reserved
122 TUREAD
= 0400 # read by owner
123 TUWRITE
= 0200 # write by owner
124 TUEXEC
= 0100 # execute/search by owner
125 TGREAD
= 0040 # read by group
126 TGWRITE
= 0020 # write by group
127 TGEXEC
= 0010 # execute/search by group
128 TOREAD
= 0004 # read by other
129 TOWRITE
= 0002 # write by other
130 TOEXEC
= 0001 # execute/search by other
132 #---------------------------------------------------------
133 # Some useful functions
134 #---------------------------------------------------------
137 """Convert a python string to a null-terminated string buffer.
139 return s
[:length
-1] + (length
- len(s
) - 1) * NUL
+ NUL
142 """Convert a number field to a python number.
144 # There are two possible encodings for a number field, see
146 if s
[0] != chr(0200):
147 n
= int(s
.rstrip(NUL
) or "0", 8)
150 for i
in xrange(len(s
) - 1):
155 def itn(n
, digits
=8, posix
=False):
156 """Convert a python number to a number field.
158 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
159 # octal digits followed by a null-byte, this allows values up to
160 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
161 # that if necessary. A leading 0200 byte indicates this particular
162 # encoding, the following digits-1 bytes are a big-endian
163 # representation. This allows values up to (256**(digits-1))-1.
164 if 0 <= n
< 8 ** (digits
- 1):
165 s
= "%0*o" % (digits
- 1, n
) + NUL
168 raise ValueError("overflow in number field")
171 # XXX We mimic GNU tar's behaviour with negative numbers,
172 # this could raise OverflowError.
173 n
= struct
.unpack("L", struct
.pack("l", n
))[0]
176 for i
in xrange(digits
- 1):
177 s
= chr(n
& 0377) + s
182 def calc_chksums(buf
):
183 """Calculate the checksum for a member's header by summing up all
184 characters except for the chksum field which is treated as if
185 it was filled with spaces. According to the GNU tar sources,
186 some tars (Sun and NeXT) calculate chksum with signed char,
187 which will be different if there are chars in the buffer with
188 the high bit set. So we calculate two checksums, unsigned and
191 unsigned_chksum
= 256 + sum(struct
.unpack("148B", buf
[:148]) + struct
.unpack("356B", buf
[156:512]))
192 signed_chksum
= 256 + sum(struct
.unpack("148b", buf
[:148]) + struct
.unpack("356b", buf
[156:512]))
193 return unsigned_chksum
, signed_chksum
195 def copyfileobj(src
, dst
, length
=None):
196 """Copy length bytes from fileobj src to fileobj dst.
197 If length is None, copy the entire content.
202 shutil
.copyfileobj(src
, dst
)
206 blocks
, remainder
= divmod(length
, BUFSIZE
)
207 for b
in xrange(blocks
):
208 buf
= src
.read(BUFSIZE
)
209 if len(buf
) < BUFSIZE
:
210 raise IOError("end of file reached")
214 buf
= src
.read(remainder
)
215 if len(buf
) < remainder
:
216 raise IOError("end of file reached")
230 ((TUEXEC|TSUID
, "s"),
236 ((TGEXEC|TSGID
, "s"),
242 ((TOEXEC|TSVTX
, "t"),
248 """Convert a file's mode to a string of the form
250 Used by TarFile.list()
253 for table
in filemode_table
:
254 for bit
, char
in table
:
255 if mode
& bit
== bit
:
263 normpath
= lambda path
: os
.path
.normpath(path
).replace(os
.sep
, "/")
265 normpath
= os
.path
.normpath
267 class TarError(Exception):
268 """Base exception."""
270 class ExtractError(TarError
):
271 """General exception for extract errors."""
273 class ReadError(TarError
):
274 """Exception for unreadble tar archives."""
276 class CompressionError(TarError
):
277 """Exception for unavailable compression methods."""
279 class StreamError(TarError
):
280 """Exception for unsupported operations on stream-like TarFiles."""
283 #---------------------------
284 # internal stream interface
285 #---------------------------
287 """Low-level file object. Supports reading and writing.
288 It is used instead of a regular file object for streaming
292 def __init__(self
, name
, mode
):
295 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
297 if hasattr(os
, "O_BINARY"):
299 self
.fd
= os
.open(name
, mode
)
304 def read(self
, size
):
305 return os
.read(self
.fd
, size
)
311 """Class that serves as an adapter between TarFile and
312 a stream-like object. The stream-like object only
313 needs to have a read() or write() method and is accessed
314 blockwise. Use of gzip or bzip2 compression is possible.
315 A stream-like object could be for example: sys.stdin,
316 sys.stdout, a socket, a tape device etc.
318 _Stream is intended to be used only internally.
321 def __init__(self
, name
, mode
, comptype
, fileobj
, bufsize
):
322 """Construct a _Stream object.
324 self
._extfileobj
= True
326 fileobj
= _LowLevelFile(name
, mode
)
327 self
._extfileobj
= False
330 # Enable transparent compression detection for the
332 fileobj
= _StreamProxy(fileobj
)
333 comptype
= fileobj
.getcomptype()
335 self
.name
= name
or ""
337 self
.comptype
= comptype
338 self
.fileobj
= fileobj
339 self
.bufsize
= bufsize
348 raise CompressionError("zlib module is not available")
350 self
.crc
= zlib
.crc32("")
354 self
._init
_write
_gz
()
356 if comptype
== "bz2":
360 raise CompressionError("bz2 module is not available")
363 self
.cmp = bz2
.BZ2Decompressor()
365 self
.cmp = bz2
.BZ2Compressor()
368 if hasattr(self
, "closed") and not self
.closed
:
371 def _init_write_gz(self
):
372 """Initialize for writing with gzip compression.
374 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
375 -self
.zlib
.MAX_WBITS
,
376 self
.zlib
.DEF_MEM_LEVEL
,
378 timestamp
= struct
.pack("<L", long(time
.time()))
379 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
380 if self
.name
.endswith(".gz"):
381 self
.name
= self
.name
[:-3]
382 self
.__write
(self
.name
+ NUL
)
385 """Write string s to the stream.
387 if self
.comptype
== "gz":
388 self
.crc
= self
.zlib
.crc32(s
, self
.crc
)
390 if self
.comptype
!= "tar":
391 s
= self
.cmp.compress(s
)
394 def __write(self
, s
):
395 """Write string s to the stream if a whole new block
396 is ready to be written.
399 while len(self
.buf
) > self
.bufsize
:
400 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
401 self
.buf
= self
.buf
[self
.bufsize
:]
404 """Close the _Stream object. No operation should be
405 done on it afterwards.
410 if self
.mode
== "w" and self
.comptype
!= "tar":
411 self
.buf
+= self
.cmp.flush()
413 if self
.mode
== "w" and self
.buf
:
414 blocks
, remainder
= divmod(len(self
.buf
), self
.bufsize
)
416 self
.buf
+= NUL
* (self
.bufsize
- remainder
)
417 self
.fileobj
.write(self
.buf
)
419 if self
.comptype
== "gz":
420 # The native zlib crc is an unsigned 32-bit integer, but
421 # the Python wrapper implicitly casts that to a signed C
422 # long. So, on a 32-bit box self.crc may "look negative",
423 # while the same crc on a 64-bit box may "look positive".
424 # To avoid irksome warnings from the `struct` module, force
425 # it to look positive on all boxes.
426 self
.fileobj
.write(struct
.pack("<L", self
.crc
& 0xffffffffL
))
427 self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
429 if not self
._extfileobj
:
434 def _init_read_gz(self
):
435 """Initialize for reading a gzip compressed fileobj.
437 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
440 # taken from gzip.GzipFile with some alterations
441 if self
.__read
(2) != "\037\213":
442 raise ReadError("not a gzip file")
443 if self
.__read
(1) != "\010":
444 raise CompressionError("unsupported compression method")
446 flag
= ord(self
.__read
(1))
450 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
455 if not s
or s
== NUL
:
460 if not s
or s
== NUL
:
466 """Return the stream's file pointer position.
470 def seek(self
, pos
=0):
471 """Set the stream's file pointer to pos. Negative seeking
474 if pos
- self
.pos
>= 0:
475 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
476 for i
in xrange(blocks
):
477 self
.read(self
.bufsize
)
480 raise StreamError("seeking backwards is not allowed")
483 def read(self
, size
=None):
484 """Return the next size number of bytes from the stream.
485 If size is not defined, return all bytes of the stream
491 buf
= self
._read
(self
.bufsize
)
497 buf
= self
._read
(size
)
501 def _read(self
, size
):
502 """Return size bytes from the stream.
504 if self
.comptype
== "tar":
505 return self
.__read
(size
)
510 buf
= self
.__read
(self
.bufsize
)
513 buf
= self
.cmp.decompress(buf
)
520 def __read(self
, size
):
521 """Return size bytes from stream. If internal buffer is empty,
522 read another block from the stream.
527 buf
= self
.fileobj
.read(self
.bufsize
)
537 class _StreamProxy(object):
538 """Small proxy class that enables transparent compression
539 detection for the Stream interface (mode 'r|*').
542 def __init__(self
, fileobj
):
543 self
.fileobj
= fileobj
544 self
.buf
= self
.fileobj
.read(BLOCKSIZE
)
546 def read(self
, size
):
547 self
.read
= self
.fileobj
.read
550 def getcomptype(self
):
551 if self
.buf
.startswith("\037\213\010"):
553 if self
.buf
.startswith("BZh91"):
561 class _BZ2Proxy(object):
562 """Small proxy class that enables external file object
563 support for "r:bz2" and "w:bz2" modes. This is actually
564 a workaround for a limitation in bz2 module's BZ2File
565 class which (unlike gzip.GzipFile) has no support for
566 a file object argument.
569 blocksize
= 16 * 1024
571 def __init__(self
, fileobj
, mode
):
572 self
.fileobj
= fileobj
580 self
.bz2obj
= bz2
.BZ2Decompressor()
584 self
.bz2obj
= bz2
.BZ2Compressor()
586 def read(self
, size
):
591 raw
= self
.fileobj
.read(self
.blocksize
)
592 data
= self
.bz2obj
.decompress(raw
)
597 self
.buf
= "".join(b
)
599 buf
= self
.buf
[:size
]
600 self
.buf
= self
.buf
[size
:]
607 self
.read(pos
- self
.pos
)
612 def write(self
, data
):
613 self
.pos
+= len(data
)
614 raw
= self
.bz2obj
.compress(data
)
615 self
.fileobj
.write(raw
)
619 raw
= self
.bz2obj
.flush()
620 self
.fileobj
.write(raw
)
624 #------------------------
625 # Extraction file object
626 #------------------------
627 class ExFileObject(object):
628 """File-like object for reading an archive member.
629 Is returned by TarFile.extractfile(). Support for
630 sparse files included.
633 def __init__(self
, tarfile
, tarinfo
):
634 self
.fileobj
= tarfile
.fileobj
635 self
.name
= tarinfo
.name
638 self
.offset
= tarinfo
.offset_data
639 self
.size
= tarinfo
.size
642 if tarinfo
.issparse():
643 self
.sparse
= tarinfo
.sparse
644 self
.read
= self
._readsparse
646 self
.read
= self
._readnormal
648 def __read(self
, size
):
649 """Overloadable read method.
651 return self
.fileobj
.read(size
)
653 def readline(self
, size
=-1):
654 """Read a line with approx. size. If size is negative,
655 read a whole line. readline() and read() must not
661 nl
= self
.linebuffer
.find("\n")
665 size
-= len(self
.linebuffer
)
666 while (nl
< 0 and size
> 0):
667 buf
= self
.read(min(size
, 100))
670 self
.linebuffer
+= buf
672 nl
= self
.linebuffer
.find("\n")
677 buf
= self
.linebuffer
[:nl
]
678 self
.linebuffer
= self
.linebuffer
[nl
+ 1:]
679 while buf
[-1:] == "\r":
684 """Return a list with all (following) lines.
688 line
= self
.readline()
693 def _readnormal(self
, size
=None):
694 """Read operation for regular files.
697 raise ValueError("file is closed")
698 self
.fileobj
.seek(self
.offset
+ self
.pos
)
699 bytesleft
= self
.size
- self
.pos
701 bytestoread
= bytesleft
703 bytestoread
= min(size
, bytesleft
)
704 self
.pos
+= bytestoread
705 return self
.__read
(bytestoread
)
707 def _readsparse(self
, size
=None):
708 """Read operation for sparse files.
711 raise ValueError("file is closed")
714 size
= self
.size
- self
.pos
718 buf
= self
._readsparsesection
(size
)
725 def _readsparsesection(self
, size
):
726 """Read a single section of a sparse file.
728 section
= self
.sparse
.find(self
.pos
)
733 toread
= min(size
, section
.offset
+ section
.size
- self
.pos
)
734 if isinstance(section
, _data
):
735 realpos
= section
.realpos
+ self
.pos
- section
.offset
737 self
.fileobj
.seek(self
.offset
+ realpos
)
738 return self
.__read
(toread
)
744 """Return the current file position.
748 def seek(self
, pos
, whence
=0):
749 """Seek to a position in the file.
753 self
.pos
= min(max(pos
, 0), self
.size
)
756 self
.pos
= max(self
.pos
+ pos
, 0)
758 self
.pos
= min(self
.pos
+ pos
, self
.size
)
760 self
.pos
= max(min(self
.size
+ pos
, self
.size
), 0)
763 """Close the file object.
768 """Get an iterator over the file object.
771 raise ValueError("I/O operation on closed file")
775 """Get the next item from the file iterator.
777 result
= self
.readline()
787 class TarInfo(object):
788 """Informational class which holds the details about an
789 archive member given by a tar header block.
790 TarInfo objects are returned by TarFile.getmember(),
791 TarFile.getmembers() and TarFile.gettarinfo() and are
792 usually created internally.
795 def __init__(self
, name
=""):
796 """Construct a TarInfo object. name is the optional name
800 self
.name
= name
# member name (dirnames must end with '/')
801 self
.mode
= 0666 # file permissions
802 self
.uid
= 0 # user id
803 self
.gid
= 0 # group id
804 self
.size
= 0 # file size
805 self
.mtime
= 0 # modification time
806 self
.chksum
= 0 # header checksum
807 self
.type = REGTYPE
# member type
808 self
.linkname
= "" # link name
809 self
.uname
= "user" # user name
810 self
.gname
= "group" # group name
811 self
.devmajor
= 0 # device major number
812 self
.devminor
= 0 # device minor number
813 self
.prefix
= "" # prefix to filename or information
816 self
.offset
= 0 # the tar header starts here
817 self
.offset_data
= 0 # the file's data starts here
820 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
823 def frombuf(cls
, buf
):
824 """Construct a TarInfo object from a 512 byte string buffer.
826 if len(buf
) != BLOCKSIZE
:
827 raise ValueError("truncated header")
828 if buf
.count(NUL
) == BLOCKSIZE
:
829 raise ValueError("empty header")
833 tarinfo
.name
= buf
[0:100].rstrip(NUL
)
834 tarinfo
.mode
= nti(buf
[100:108])
835 tarinfo
.uid
= nti(buf
[108:116])
836 tarinfo
.gid
= nti(buf
[116:124])
837 tarinfo
.size
= nti(buf
[124:136])
838 tarinfo
.mtime
= nti(buf
[136:148])
839 tarinfo
.chksum
= nti(buf
[148:156])
840 tarinfo
.type = buf
[156:157]
841 tarinfo
.linkname
= buf
[157:257].rstrip(NUL
)
842 tarinfo
.uname
= buf
[265:297].rstrip(NUL
)
843 tarinfo
.gname
= buf
[297:329].rstrip(NUL
)
844 tarinfo
.devmajor
= nti(buf
[329:337])
845 tarinfo
.devminor
= nti(buf
[337:345])
846 tarinfo
.prefix
= buf
[345:500]
848 if tarinfo
.chksum
not in calc_chksums(buf
):
849 raise ValueError("invalid header")
852 def tobuf(self
, posix
=False):
853 """Return a tar header block as a 512 byte string.
857 itn(self
.mode
& 07777, 8, posix
),
858 itn(self
.uid
, 8, posix
),
859 itn(self
.gid
, 8, posix
),
860 itn(self
.size
, 12, posix
),
861 itn(self
.mtime
, 12, posix
),
862 " ", # checksum field
864 stn(self
.linkname
, 100),
869 itn(self
.devmajor
, 8, posix
),
870 itn(self
.devminor
, 8, posix
),
871 stn(self
.prefix
, 155)
874 buf
= struct
.pack("%ds" % BLOCKSIZE
, "".join(parts
))
875 chksum
= calc_chksums(buf
)[0]
876 buf
= buf
[:148] + "%06o\0" % chksum
+ buf
[155:]
881 return self
.type in REGULAR_TYPES
885 return self
.type == DIRTYPE
887 return self
.type == SYMTYPE
889 return self
.type == LNKTYPE
891 return self
.type == CHRTYPE
893 return self
.type == BLKTYPE
895 return self
.type == FIFOTYPE
897 return self
.type == GNUTYPE_SPARSE
899 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
902 class TarFile(object):
903 """The TarFile Class provides an interface to tar archives.
906 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
908 dereference
= False # If true, add content of linked file to the
909 # tar file, else the link.
911 ignore_zeros
= False # If true, skips empty or invalid blocks and
912 # continues processing.
914 errorlevel
= 0 # If 0, fatal errors only appear in debug
915 # messages (if debug >= 0). If > 0, errors
916 # are passed to the caller as exceptions.
918 posix
= False # If True, generates POSIX.1-1990-compliant
919 # archives (no GNU extensions!)
921 fileobject
= ExFileObject
923 def __init__(self
, name
=None, mode
="r", fileobj
=None):
924 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
925 read from an existing archive, 'a' to append data to an existing
926 file or 'w' to create a new file overwriting an existing one. `mode'
928 If `fileobj' is given, it is used for reading or writing data. If it
929 can be determined, `mode' is overridden by `fileobj's mode.
930 `fileobj' is not closed, when TarFile is closed.
934 if len(mode
) > 1 or mode
not in "raw":
935 raise ValueError("mode must be 'r', 'a' or 'w'")
937 self
.mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
940 fileobj
= file(self
.name
, self
.mode
)
941 self
._extfileobj
= False
943 if self
.name
is None and hasattr(fileobj
, "name"):
944 self
.name
= fileobj
.name
945 if hasattr(fileobj
, "mode"):
946 self
.mode
= fileobj
.mode
947 self
._extfileobj
= True
948 self
.fileobj
= fileobj
950 # Init datastructures
952 self
.members
= [] # list of members as TarInfo objects
953 self
._loaded
= False # flag if all members have been read
954 self
.offset
= 0L # current position in the archive file
955 self
.inodes
= {} # dictionary caching the inodes of
956 # archive members already added
958 if self
._mode
== "r":
959 self
.firstmember
= None
960 self
.firstmember
= self
.next()
962 if self
._mode
== "a":
963 # Move to the end of the archive,
964 # before the first empty block.
965 self
.firstmember
= None
968 tarinfo
= self
.next()
973 self
.fileobj
.seek(- BLOCKSIZE
, 1)
976 if self
._mode
in "aw":
979 #--------------------------------------------------------------------------
980 # Below are the classmethods which act as alternate constructors to the
981 # TarFile class. The open() method is the only one that is needed for
982 # public use; it is the "super"-constructor and is able to select an
983 # adequate "sub"-constructor for a particular compression using the mapping
986 # This concept allows one to subclass TarFile without losing the comfort of
987 # the super-constructor. A sub-constructor is registered and made available
988 # by adding it to the mapping in OPEN_METH.
991 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=20*512):
992 """Open a tar archive for reading, writing or appending. Return
993 an appropriate TarFile class.
996 'r' or 'r:*' open for reading with transparent compression
997 'r:' open for reading exclusively uncompressed
998 'r:gz' open for reading with gzip compression
999 'r:bz2' open for reading with bzip2 compression
1000 'a' or 'a:' open for appending
1001 'w' or 'w:' open for writing without compression
1002 'w:gz' open for writing with gzip compression
1003 'w:bz2' open for writing with bzip2 compression
1005 'r|*' open a stream of tar blocks with transparent compression
1006 'r|' open an uncompressed stream of tar blocks for reading
1007 'r|gz' open a gzip compressed stream of tar blocks
1008 'r|bz2' open a bzip2 compressed stream of tar blocks
1009 'w|' open an uncompressed stream for writing
1010 'w|gz' open a gzip compressed stream for writing
1011 'w|bz2' open a bzip2 compressed stream for writing
1014 if not name
and not fileobj
:
1015 raise ValueError("nothing to open")
1017 if mode
in ("r", "r:*"):
1018 # Find out which *open() is appropriate for opening the file.
1019 for comptype
in cls
.OPEN_METH
:
1020 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1022 return func(name
, "r", fileobj
)
1023 except (ReadError
, CompressionError
):
1025 raise ReadError("file could not be opened successfully")
1028 filemode
, comptype
= mode
.split(":", 1)
1029 filemode
= filemode
or "r"
1030 comptype
= comptype
or "tar"
1032 # Select the *open() function according to
1033 # given compression.
1034 if comptype
in cls
.OPEN_METH
:
1035 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1037 raise CompressionError("unknown compression type %r" % comptype
)
1038 return func(name
, filemode
, fileobj
)
1041 filemode
, comptype
= mode
.split("|", 1)
1042 filemode
= filemode
or "r"
1043 comptype
= comptype
or "tar"
1045 if filemode
not in "rw":
1046 raise ValueError("mode must be 'r' or 'w'")
1048 t
= cls(name
, filemode
,
1049 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
))
1050 t
._extfileobj
= False
1054 return cls
.taropen(name
, mode
, fileobj
)
1056 raise ValueError("undiscernible mode")
1059 def taropen(cls
, name
, mode
="r", fileobj
=None):
1060 """Open uncompressed tar archive name for reading or writing.
1062 if len(mode
) > 1 or mode
not in "raw":
1063 raise ValueError("mode must be 'r', 'a' or 'w'")
1064 return cls(name
, mode
, fileobj
)
1067 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
1068 """Open gzip compressed tar archive name for reading or writing.
1069 Appending is not allowed.
1071 if len(mode
) > 1 or mode
not in "rw":
1072 raise ValueError("mode must be 'r' or 'w'")
1077 except (ImportError, AttributeError):
1078 raise CompressionError("gzip module is not available")
1080 pre
, ext
= os
.path
.splitext(name
)
1081 pre
= os
.path
.basename(pre
)
1089 fileobj
= file(name
, mode
+ "b")
1095 t
= cls
.taropen(tarname
, mode
,
1096 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
)
1099 raise ReadError("not a gzip file")
1100 t
._extfileobj
= False
1104 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9):
1105 """Open bzip2 compressed tar archive name for reading or writing.
1106 Appending is not allowed.
1108 if len(mode
) > 1 or mode
not in "rw":
1109 raise ValueError("mode must be 'r' or 'w'.")
1114 raise CompressionError("bz2 module is not available")
1116 pre
, ext
= os
.path
.splitext(name
)
1117 pre
= os
.path
.basename(pre
)
1124 if fileobj
is not None:
1125 fileobj
= _BZ2Proxy(fileobj
, mode
)
1127 fileobj
= bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
)
1130 t
= cls
.taropen(tarname
, mode
, fileobj
)
1132 raise ReadError("not a bzip2 file")
1133 t
._extfileobj
= False
1136 # All *open() methods are registered here.
1138 "tar": "taropen", # uncompressed tar
1139 "gz": "gzopen", # gzip compressed tar
1140 "bz2": "bz2open" # bzip2 compressed tar
1143 #--------------------------------------------------------------------------
1144 # The public methods which TarFile provides:
1147 """Close the TarFile. In write-mode, two finishing zero blocks are
1148 appended to the archive.
1153 if self
._mode
in "aw":
1154 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1155 self
.offset
+= (BLOCKSIZE
* 2)
1156 # fill up the end with zero-blocks
1157 # (like option -b20 for tar does)
1158 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1160 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1162 if not self
._extfileobj
:
1163 self
.fileobj
.close()
1166 def getmember(self
, name
):
1167 """Return a TarInfo object for member `name'. If `name' can not be
1168 found in the archive, KeyError is raised. If a member occurs more
1169 than once in the archive, its last occurence is assumed to be the
1170 most up-to-date version.
1172 tarinfo
= self
._getmember
(name
)
1174 raise KeyError("filename %r not found" % name
)
1177 def getmembers(self
):
1178 """Return the members of the archive as a list of TarInfo objects. The
1179 list has the same order as the members in the archive.
1182 if not self
._loaded
: # if we want to obtain a list of
1183 self
._load
() # all members, we first have to
1184 # scan the whole archive.
1188 """Return the members of the archive as a list of their names. It has
1189 the same order as the list returned by getmembers().
1191 return [tarinfo
.name
for tarinfo
in self
.getmembers()]
1193 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1194 """Create a TarInfo object for either the file `name' or the file
1195 object `fileobj' (using os.fstat on its file descriptor). You can
1196 modify some of the TarInfo's attributes before you add it using
1197 addfile(). If given, `arcname' specifies an alternative name for the
1198 file in the archive.
1202 # When fileobj is given, replace name by
1203 # fileobj's real name.
1204 if fileobj
is not None:
1207 # Building the name of the member in the archive.
1208 # Backward slashes are converted to forward slashes,
1209 # Absolute paths are turned to relative paths.
1212 arcname
= normpath(arcname
)
1213 drv
, arcname
= os
.path
.splitdrive(arcname
)
1214 while arcname
[0:1] == "/":
1215 arcname
= arcname
[1:]
1217 # Now, fill the TarInfo object with
1218 # information specific for the file.
1221 # Use os.stat or os.lstat, depending on platform
1222 # and if symlinks shall be resolved.
1224 if hasattr(os
, "lstat") and not self
.dereference
:
1225 statres
= os
.lstat(name
)
1227 statres
= os
.stat(name
)
1229 statres
= os
.fstat(fileobj
.fileno())
1232 stmd
= statres
.st_mode
1233 if stat
.S_ISREG(stmd
):
1234 inode
= (statres
.st_ino
, statres
.st_dev
)
1235 if not self
.dereference
and \
1236 statres
.st_nlink
> 1 and inode
in self
.inodes
:
1237 # Is it a hardlink to an already
1240 linkname
= self
.inodes
[inode
]
1242 # The inode is added only if its valid.
1243 # For win32 it is always 0.
1246 self
.inodes
[inode
] = arcname
1247 elif stat
.S_ISDIR(stmd
):
1249 if arcname
[-1:] != "/":
1251 elif stat
.S_ISFIFO(stmd
):
1253 elif stat
.S_ISLNK(stmd
):
1255 linkname
= os
.readlink(name
)
1256 elif stat
.S_ISCHR(stmd
):
1258 elif stat
.S_ISBLK(stmd
):
1263 # Fill the TarInfo object with all
1264 # information we can get.
1265 tarinfo
.name
= arcname
1267 tarinfo
.uid
= statres
.st_uid
1268 tarinfo
.gid
= statres
.st_gid
1269 if stat
.S_ISREG(stmd
):
1270 tarinfo
.size
= statres
.st_size
1273 tarinfo
.mtime
= statres
.st_mtime
1275 tarinfo
.linkname
= linkname
1278 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1283 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1287 if type in (CHRTYPE
, BLKTYPE
):
1288 if hasattr(os
, "major") and hasattr(os
, "minor"):
1289 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1290 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1293 def list(self
, verbose
=True):
1294 """Print a table of contents to sys.stdout. If `verbose' is False, only
1295 the names of the members are printed. If it is True, an `ls -l'-like
1300 for tarinfo
in self
:
1302 print filemode(tarinfo
.mode
),
1303 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1304 tarinfo
.gname
or tarinfo
.gid
),
1305 if tarinfo
.ischr() or tarinfo
.isblk():
1306 print "%10s" % ("%d,%d" \
1307 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1309 print "%10d" % tarinfo
.size
,
1310 print "%d-%02d-%02d %02d:%02d:%02d" \
1311 % time
.localtime(tarinfo
.mtime
)[:6],
1317 print "->", tarinfo
.linkname
,
1319 print "link to", tarinfo
.linkname
,
1322 def add(self
, name
, arcname
=None, recursive
=True):
1323 """Add the file `name' to the archive. `name' may be any type of file
1324 (directory, fifo, symbolic link, etc.). If given, `arcname'
1325 specifies an alternative name for the file in the archive.
1326 Directories are added recursively by default. This can be avoided by
1327 setting `recursive' to False.
1334 # Skip if somebody tries to archive the archive...
1335 if self
.name
is not None \
1336 and os
.path
.abspath(name
) == os
.path
.abspath(self
.name
):
1337 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1340 # Special case: The user wants to add the current
1341 # working directory.
1346 for f
in os
.listdir("."):
1347 self
.add(f
, os
.path
.join(arcname
, f
))
1352 # Create a TarInfo object from the file.
1353 tarinfo
= self
.gettarinfo(name
, arcname
)
1356 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1359 # Append the tar header and data to the archive.
1361 f
= file(name
, "rb")
1362 self
.addfile(tarinfo
, f
)
1365 elif tarinfo
.isdir():
1366 self
.addfile(tarinfo
)
1368 for f
in os
.listdir(name
):
1369 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
))
1372 self
.addfile(tarinfo
)
1374 def addfile(self
, tarinfo
, fileobj
=None):
1375 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1376 given, tarinfo.size bytes are read from it and added to the archive.
1377 You can create TarInfo objects using gettarinfo().
1378 On Windows platforms, `fileobj' should always be opened with mode
1379 'rb' to avoid irritation about the file size.
1383 tarinfo
.name
= normpath(tarinfo
.name
)
1385 # directories should end with '/'
1388 if tarinfo
.linkname
:
1389 tarinfo
.linkname
= normpath(tarinfo
.linkname
)
1391 if tarinfo
.size
> MAXSIZE_MEMBER
:
1393 raise ValueError("file is too large (>= 8 GB)")
1395 self
._dbg
(2, "tarfile: Created GNU tar largefile header")
1398 if len(tarinfo
.linkname
) > LENGTH_LINK
:
1400 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK
))
1402 self
._create
_gnulong
(tarinfo
.linkname
, GNUTYPE_LONGLINK
)
1403 tarinfo
.linkname
= tarinfo
.linkname
[:LENGTH_LINK
-1]
1404 self
._dbg
(2, "tarfile: Created GNU tar extension LONGLINK")
1406 if len(tarinfo
.name
) > LENGTH_NAME
:
1408 prefix
= tarinfo
.name
[:LENGTH_PREFIX
+ 1]
1409 while prefix
and prefix
[-1] != "/":
1410 prefix
= prefix
[:-1]
1412 name
= tarinfo
.name
[len(prefix
):]
1413 prefix
= prefix
[:-1]
1415 if not prefix
or len(name
) > LENGTH_NAME
:
1416 raise ValueError("name is too long (>%d)" % (LENGTH_NAME
))
1419 tarinfo
.prefix
= prefix
1421 self
._create
_gnulong
(tarinfo
.name
, GNUTYPE_LONGNAME
)
1422 tarinfo
.name
= tarinfo
.name
[:LENGTH_NAME
- 1]
1423 self
._dbg
(2, "tarfile: Created GNU tar extension LONGNAME")
1425 self
.fileobj
.write(tarinfo
.tobuf(self
.posix
))
1426 self
.offset
+= BLOCKSIZE
1428 # If there's data to follow, append it.
1429 if fileobj
is not None:
1430 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
1431 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1433 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1435 self
.offset
+= blocks
* BLOCKSIZE
1437 self
.members
.append(tarinfo
)
1439 def extractall(self
, path
=".", members
=None):
1440 """Extract all members from the archive to the current working
1441 directory and set owner, modification time and permissions on
1442 directories afterwards. `path' specifies a different directory
1443 to extract to. `members' is optional and must be a subset of the
1444 list returned by getmembers().
1451 for tarinfo
in members
:
1453 # Extract directory with a safe mode, so that
1454 # all files below can be extracted as well.
1456 os
.makedirs(os
.path
.join(path
, tarinfo
.name
), 0777)
1457 except EnvironmentError:
1459 directories
.append(tarinfo
)
1461 self
.extract(tarinfo
, path
)
1463 # Reverse sort directories.
1464 directories
.sort(lambda a
, b
: cmp(a
.name
, b
.name
))
1465 directories
.reverse()
1467 # Set correct owner, mtime and filemode on directories.
1468 for tarinfo
in directories
:
1469 path
= os
.path
.join(path
, tarinfo
.name
)
1471 self
.chown(tarinfo
, path
)
1472 self
.utime(tarinfo
, path
)
1473 self
.chmod(tarinfo
, path
)
1474 except ExtractError
, e
:
1475 if self
.errorlevel
> 1:
1478 self
._dbg
(1, "tarfile: %s" % e
)
1480 def extract(self
, member
, path
=""):
1481 """Extract a member from the archive to the current working directory,
1482 using its full name. Its file information is extracted as accurately
1483 as possible. `member' may be a filename or a TarInfo object. You can
1484 specify a different directory using `path'.
1488 if isinstance(member
, TarInfo
):
1491 tarinfo
= self
.getmember(member
)
1493 # Prepare the link target for makelink().
1495 tarinfo
._link
_target
= os
.path
.join(path
, tarinfo
.linkname
)
1498 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
1499 except EnvironmentError, e
:
1500 if self
.errorlevel
> 0:
1503 if e
.filename
is None:
1504 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
1506 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
1507 except ExtractError
, e
:
1508 if self
.errorlevel
> 1:
1511 self
._dbg
(1, "tarfile: %s" % e
)
1513 def extractfile(self
, member
):
1514 """Extract a member from the archive as a file object. `member' may be
1515 a filename or a TarInfo object. If `member' is a regular file, a
1516 file-like object is returned. If `member' is a link, a file-like
1517 object is constructed from the link's target. If `member' is none of
1518 the above, None is returned.
1519 The file-like object is read-only and provides the following
1520 methods: read(), readline(), readlines(), seek() and tell()
1524 if isinstance(member
, TarInfo
):
1527 tarinfo
= self
.getmember(member
)
1530 return self
.fileobject(self
, tarinfo
)
1532 elif tarinfo
.type not in SUPPORTED_TYPES
:
1533 # If a member's type is unknown, it is treated as a
1535 return self
.fileobject(self
, tarinfo
)
1537 elif tarinfo
.islnk() or tarinfo
.issym():
1538 if isinstance(self
.fileobj
, _Stream
):
1539 # A small but ugly workaround for the case that someone tries
1540 # to extract a (sym)link as a file-object from a non-seekable
1541 # stream of tar blocks.
1542 raise StreamError("cannot extract (sym)link as file object")
1544 # A (sym)link's file object is its target's file object.
1545 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
1548 # If there's no data associated with the member (directory, chrdev,
1549 # blkdev, etc.), return None instead of a file object.
1552 def _extract_member(self
, tarinfo
, targetpath
):
1553 """Extract the TarInfo object tarinfo to a physical
1554 file called targetpath.
1556 # Fetch the TarInfo object for the given name
1557 # and build the destination pathname, replacing
1558 # forward slashes to platform specific separators.
1559 if targetpath
[-1:] == "/":
1560 targetpath
= targetpath
[:-1]
1561 targetpath
= os
.path
.normpath(targetpath
)
1563 # Create all upper directories.
1564 upperdirs
= os
.path
.dirname(targetpath
)
1565 if upperdirs
and not os
.path
.exists(upperdirs
):
1570 ti
.mtime
= tarinfo
.mtime
1571 ti
.uid
= tarinfo
.uid
1572 ti
.gid
= tarinfo
.gid
1573 ti
.uname
= tarinfo
.uname
1574 ti
.gname
= tarinfo
.gname
1576 self
._extract
_member
(ti
, ti
.name
)
1580 if tarinfo
.islnk() or tarinfo
.issym():
1581 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
1583 self
._dbg
(1, tarinfo
.name
)
1586 self
.makefile(tarinfo
, targetpath
)
1587 elif tarinfo
.isdir():
1588 self
.makedir(tarinfo
, targetpath
)
1589 elif tarinfo
.isfifo():
1590 self
.makefifo(tarinfo
, targetpath
)
1591 elif tarinfo
.ischr() or tarinfo
.isblk():
1592 self
.makedev(tarinfo
, targetpath
)
1593 elif tarinfo
.islnk() or tarinfo
.issym():
1594 self
.makelink(tarinfo
, targetpath
)
1595 elif tarinfo
.type not in SUPPORTED_TYPES
:
1596 self
.makeunknown(tarinfo
, targetpath
)
1598 self
.makefile(tarinfo
, targetpath
)
1600 self
.chown(tarinfo
, targetpath
)
1601 if not tarinfo
.issym():
1602 self
.chmod(tarinfo
, targetpath
)
1603 self
.utime(tarinfo
, targetpath
)
1605 #--------------------------------------------------------------------------
1606 # Below are the different file methods. They are called via
1607 # _extract_member() when extract() is called. They can be replaced in a
1608 # subclass to implement other functionality.
1610 def makedir(self
, tarinfo
, targetpath
):
1611 """Make a directory called targetpath.
1614 os
.mkdir(targetpath
)
1615 except EnvironmentError, e
:
1616 if e
.errno
!= errno
.EEXIST
:
1619 def makefile(self
, tarinfo
, targetpath
):
1620 """Make a file called targetpath.
1622 source
= self
.extractfile(tarinfo
)
1623 target
= file(targetpath
, "wb")
1624 copyfileobj(source
, target
)
1628 def makeunknown(self
, tarinfo
, targetpath
):
1629 """Make a file from a TarInfo object with an unknown type
1632 self
.makefile(tarinfo
, targetpath
)
1633 self
._dbg
(1, "tarfile: Unknown file type %r, " \
1634 "extracted as regular file." % tarinfo
.type)
1636 def makefifo(self
, tarinfo
, targetpath
):
1637 """Make a fifo called targetpath.
1639 if hasattr(os
, "mkfifo"):
1640 os
.mkfifo(targetpath
)
1642 raise ExtractError("fifo not supported by system")
1644 def makedev(self
, tarinfo
, targetpath
):
1645 """Make a character or block device called targetpath.
1647 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
1648 raise ExtractError("special devices not supported by system")
1652 mode |
= stat
.S_IFBLK
1654 mode |
= stat
.S_IFCHR
1656 os
.mknod(targetpath
, mode
,
1657 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
1659 def makelink(self
, tarinfo
, targetpath
):
1660 """Make a (symbolic) link called targetpath. If it cannot be created
1661 (platform limitation), we try to make a copy of the referenced file
1664 linkpath
= tarinfo
.linkname
1667 os
.symlink(linkpath
, targetpath
)
1670 os
.link(tarinfo
._link
_target
, targetpath
)
1671 except AttributeError:
1673 linkpath
= os
.path
.join(os
.path
.dirname(tarinfo
.name
),
1675 linkpath
= normpath(linkpath
)
1678 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
1679 except (EnvironmentError, KeyError), e
:
1680 linkpath
= os
.path
.normpath(linkpath
)
1682 shutil
.copy2(linkpath
, targetpath
)
1683 except EnvironmentError, e
:
1684 raise IOError("link could not be created")
1686 def chown(self
, tarinfo
, targetpath
):
1687 """Set owner of targetpath according to tarinfo.
1689 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
1690 # We have to be root to do so.
1692 g
= grp
.getgrnam(tarinfo
.gname
)[2]
1695 g
= grp
.getgrgid(tarinfo
.gid
)[2]
1699 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
1702 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
1706 if tarinfo
.issym() and hasattr(os
, "lchown"):
1707 os
.lchown(targetpath
, u
, g
)
1709 if sys
.platform
!= "os2emx":
1710 os
.chown(targetpath
, u
, g
)
1711 except EnvironmentError, e
:
1712 raise ExtractError("could not change owner")
1714 def chmod(self
, tarinfo
, targetpath
):
1715 """Set file permissions of targetpath according to tarinfo.
1717 if hasattr(os
, 'chmod'):
1719 os
.chmod(targetpath
, tarinfo
.mode
)
1720 except EnvironmentError, e
:
1721 raise ExtractError("could not change mode")
1723 def utime(self
, tarinfo
, targetpath
):
1724 """Set modification time of targetpath according to tarinfo.
1726 if not hasattr(os
, 'utime'):
1728 if sys
.platform
== "win32" and tarinfo
.isdir():
1729 # According to msdn.microsoft.com, it is an error (EACCES)
1730 # to use utime() on directories.
1733 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
1734 except EnvironmentError, e
:
1735 raise ExtractError("could not change modification time")
1737 #--------------------------------------------------------------------------
1739 """Return the next member of the archive as a TarInfo object, when
1740 TarFile is opened for reading. Return None if there is no more
1744 if self
.firstmember
is not None:
1745 m
= self
.firstmember
1746 self
.firstmember
= None
1749 # Read the next block.
1750 self
.fileobj
.seek(self
.offset
)
1752 buf
= self
.fileobj
.read(BLOCKSIZE
)
1757 tarinfo
= TarInfo
.frombuf(buf
)
1759 # Set the TarInfo object's offset to the current position of the
1760 # TarFile and set self.offset to the position where the data blocks
1762 tarinfo
.offset
= self
.offset
1763 self
.offset
+= BLOCKSIZE
1765 tarinfo
= self
.proc_member(tarinfo
)
1767 except ValueError, e
:
1768 if self
.ignore_zeros
:
1769 self
._dbg
(2, "0x%X: empty or invalid block: %s" %
1771 self
.offset
+= BLOCKSIZE
1774 if self
.offset
== 0:
1775 raise ReadError("empty, unreadable or compressed "
1780 # Some old tar programs represent a directory as a regular
1781 # file with a trailing slash.
1782 if tarinfo
.isreg() and tarinfo
.name
.endswith("/"):
1783 tarinfo
.type = DIRTYPE
1785 # The prefix field is used for filenames > 100 in
1786 # the POSIX standard.
1787 # name = prefix + '/' + name
1788 tarinfo
.name
= normpath(os
.path
.join(tarinfo
.prefix
.rstrip(NUL
),
1791 # Directory names should have a '/' at the end.
1795 self
.members
.append(tarinfo
)
1798 #--------------------------------------------------------------------------
1799 # The following are methods that are called depending on the type of a
1800 # member. The entry point is proc_member() which is called with a TarInfo
1801 # object created from the header block from the current offset. The
1802 # proc_member() method can be overridden in a subclass to add custom
1803 # proc_*() methods. A proc_*() method MUST implement the following
1805 # 1. Set tarinfo.offset_data to the position where the data blocks begin,
1806 # if there is data that follows.
1807 # 2. Set self.offset to the position where the next member's header will
1809 # 3. Return tarinfo or another valid TarInfo object.
1810 def proc_member(self
, tarinfo
):
1811 """Choose the right processing method for tarinfo depending
1812 on its type and call it.
1814 if tarinfo
.type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
1815 return self
.proc_gnulong(tarinfo
)
1816 elif tarinfo
.type == GNUTYPE_SPARSE
:
1817 return self
.proc_sparse(tarinfo
)
1819 return self
.proc_builtin(tarinfo
)
1821 def proc_builtin(self
, tarinfo
):
1822 """Process a builtin type member or an unknown member
1823 which will be treated as a regular file.
1825 tarinfo
.offset_data
= self
.offset
1826 if tarinfo
.isreg() or tarinfo
.type not in SUPPORTED_TYPES
:
1827 # Skip the following data blocks.
1828 self
.offset
+= self
._block
(tarinfo
.size
)
1831 def proc_gnulong(self
, tarinfo
):
1832 """Process the blocks that hold a GNU longname
1836 count
= tarinfo
.size
1838 block
= self
.fileobj
.read(BLOCKSIZE
)
1840 self
.offset
+= BLOCKSIZE
1843 # Fetch the next header and process it.
1844 b
= self
.fileobj
.read(BLOCKSIZE
)
1845 t
= TarInfo
.frombuf(b
)
1846 t
.offset
= self
.offset
1847 self
.offset
+= BLOCKSIZE
1848 next
= self
.proc_member(t
)
1850 # Patch the TarInfo object from the next header with
1851 # the longname information.
1852 next
.offset
= tarinfo
.offset
1853 if tarinfo
.type == GNUTYPE_LONGNAME
:
1854 next
.name
= buf
.rstrip(NUL
)
1855 elif tarinfo
.type == GNUTYPE_LONGLINK
:
1856 next
.linkname
= buf
.rstrip(NUL
)
1860 def proc_sparse(self
, tarinfo
):
1861 """Process a GNU sparse header plus extra headers.
1868 # There are 4 possible sparse structs in the
1872 offset
= nti(buf
[pos
:pos
+ 12])
1873 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1876 if offset
> lastpos
:
1877 sp
.append(_hole(lastpos
, offset
- lastpos
))
1878 sp
.append(_data(offset
, numbytes
, realpos
))
1880 lastpos
= offset
+ numbytes
1883 isextended
= ord(buf
[482])
1884 origsize
= nti(buf
[483:495])
1886 # If the isextended flag is given,
1887 # there are extra headers to process.
1888 while isextended
== 1:
1889 buf
= self
.fileobj
.read(BLOCKSIZE
)
1890 self
.offset
+= BLOCKSIZE
1892 for i
in xrange(21):
1894 offset
= nti(buf
[pos
:pos
+ 12])
1895 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1898 if offset
> lastpos
:
1899 sp
.append(_hole(lastpos
, offset
- lastpos
))
1900 sp
.append(_data(offset
, numbytes
, realpos
))
1902 lastpos
= offset
+ numbytes
1904 isextended
= ord(buf
[504])
1906 if lastpos
< origsize
:
1907 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1911 tarinfo
.offset_data
= self
.offset
1912 self
.offset
+= self
._block
(tarinfo
.size
)
1913 tarinfo
.size
= origsize
1915 # Clear the prefix field so that it is not used
1916 # as a pathname in next().
1921 #--------------------------------------------------------------------------
1922 # Little helper methods:
1924 def _block(self
, count
):
1925 """Round up a byte count by BLOCKSIZE and return it,
1926 e.g. _block(834) => 1024.
1928 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1931 return blocks
* BLOCKSIZE
1933 def _getmember(self
, name
, tarinfo
=None):
1934 """Find an archive member by name from bottom to top.
1935 If tarinfo is given, it is used as the starting point.
1937 # Ensure that all members have been loaded.
1938 members
= self
.getmembers()
1943 end
= members
.index(tarinfo
)
1945 for i
in xrange(end
- 1, -1, -1):
1946 if name
== members
[i
].name
:
1950 """Read through the entire archive file and look for readable
1954 tarinfo
= self
.next()
1959 def _check(self
, mode
=None):
1960 """Check if TarFile is still open, and if the operation's mode
1961 corresponds to TarFile's mode.
1964 raise IOError("%s is closed" % self
.__class
__.__name
__)
1965 if mode
is not None and self
._mode
not in mode
:
1966 raise IOError("bad operation for mode %r" % self
._mode
)
1969 """Provide an iterator object.
1972 return iter(self
.members
)
1974 return TarIter(self
)
1976 def _create_gnulong(self
, name
, type):
1977 """Write a GNU longname/longlink member to the TarFile.
1978 It consists of an extended tar header, with the length
1979 of the longname as size, followed by data blocks,
1980 which contain the longname as a null terminated string.
1985 tarinfo
.name
= "././@LongLink"
1988 tarinfo
.size
= len(name
)
1990 # write extended header
1991 self
.fileobj
.write(tarinfo
.tobuf())
1992 self
.offset
+= BLOCKSIZE
1994 self
.fileobj
.write(name
)
1995 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
1997 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
1999 self
.offset
+= blocks
* BLOCKSIZE
2001 def _dbg(self
, level
, msg
):
2002 """Write debugging output to sys.stderr.
2004 if level
<= self
.debug
:
2005 print >> sys
.stderr
, msg
2011 for tarinfo in TarFile(...):
2015 def __init__(self
, tarfile
):
2016 """Construct a TarIter object.
2018 self
.tarfile
= tarfile
2021 """Return iterator object.
2025 """Return the next item using TarFile's next() method.
2026 When all members have been read, set TarFile as _loaded.
2028 # Fix for SF #1100429: Under rare circumstances it can
2029 # happen that getmembers() is called during iteration,
2030 # which will cause TarIter to stop prematurely.
2031 if not self
.tarfile
._loaded
:
2032 tarinfo
= self
.tarfile
.next()
2034 self
.tarfile
._loaded
= True
2038 tarinfo
= self
.tarfile
.members
[self
.index
]
2044 # Helper classes for sparse file support
2046 """Base class for _data and _hole.
2048 def __init__(self
, offset
, size
):
2049 self
.offset
= offset
2051 def __contains__(self
, offset
):
2052 return self
.offset
<= offset
< self
.offset
+ self
.size
2054 class _data(_section
):
2055 """Represent a data section in a sparse file.
2057 def __init__(self
, offset
, size
, realpos
):
2058 _section
.__init
__(self
, offset
, size
)
2059 self
.realpos
= realpos
2061 class _hole(_section
):
2062 """Represent a hole section in a sparse file.
2066 class _ringbuffer(list):
2067 """Ringbuffer class which increases performance
2068 over a regular list.
2072 def find(self
, offset
):
2079 if idx
== len(self
):
2087 #---------------------------------------------
2088 # zipfile compatible TarFile class
2089 #---------------------------------------------
2090 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
2091 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
2092 class TarFileCompat
:
2093 """TarFile class compatible with standard module zipfile's
2096 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
2097 if compression
== TAR_PLAIN
:
2098 self
.tarfile
= TarFile
.taropen(file, mode
)
2099 elif compression
== TAR_GZIPPED
:
2100 self
.tarfile
= TarFile
.gzopen(file, mode
)
2102 raise ValueError("unknown compression constant")
2103 if mode
[0:1] == "r":
2104 members
= self
.tarfile
.getmembers()
2107 m
.file_size
= m
.size
2108 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
2110 return map(lambda m
: m
.name
, self
.infolist())
2112 return filter(lambda m
: m
.type in REGULAR_TYPES
,
2113 self
.tarfile
.getmembers())
2118 def getinfo(self
, name
):
2119 return self
.tarfile
.getmember(name
)
2120 def read(self
, name
):
2121 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
2122 def write(self
, filename
, arcname
=None, compress_type
=None):
2123 self
.tarfile
.add(filename
, arcname
)
2124 def writestr(self
, zinfo
, bytes
):
2126 from cStringIO
import StringIO
2128 from StringIO
import StringIO
2130 zinfo
.name
= zinfo
.filename
2131 zinfo
.size
= zinfo
.file_size
2132 zinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
2133 self
.tarfile
.addfile(zinfo
, StringIO(bytes
))
2135 self
.tarfile
.close()
2136 #class TarFileCompat
2138 #--------------------
2139 # exported functions
2140 #--------------------
2141 def is_tarfile(name
):
2142 """Return True if name points to a tar archive that we
2143 are able to handle, else return False.