2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
56 if sys
.platform
== 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
68 # from tarfile import *
69 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
71 #---------------------------------------------------------
73 #---------------------------------------------------------
74 NUL
= "\0" # the null character
75 BLOCKSIZE
= 512 # length of processing blocks
76 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
77 GNU_MAGIC
= "ustar \0" # magic gnu tar string
78 POSIX_MAGIC
= "ustar\x0000" # magic posix tar string
80 LENGTH_NAME
= 100 # maximum length of a filename
81 LENGTH_LINK
= 100 # maximum length of a linkname
82 LENGTH_PREFIX
= 155 # maximum length of the prefix field
84 REGTYPE
= "0" # regular file
85 AREGTYPE
= "\0" # regular file
86 LNKTYPE
= "1" # link (inside tarfile)
87 SYMTYPE
= "2" # symbolic link
88 CHRTYPE
= "3" # character special device
89 BLKTYPE
= "4" # block special device
90 DIRTYPE
= "5" # directory
91 FIFOTYPE
= "6" # fifo special device
92 CONTTYPE
= "7" # contiguous file
94 GNUTYPE_LONGNAME
= "L" # GNU tar longname
95 GNUTYPE_LONGLINK
= "K" # GNU tar longlink
96 GNUTYPE_SPARSE
= "S" # GNU tar sparse file
98 XHDTYPE
= "x" # POSIX.1-2001 extended header
99 XGLTYPE
= "g" # POSIX.1-2001 global header
100 SOLARIS_XHDTYPE
= "X" # Solaris extended header
102 USTAR_FORMAT
= 0 # POSIX.1-1988 (ustar) format
103 GNU_FORMAT
= 1 # GNU tar format
104 PAX_FORMAT
= 2 # POSIX.1-2001 (pax) format
105 DEFAULT_FORMAT
= GNU_FORMAT
107 #---------------------------------------------------------
109 #---------------------------------------------------------
110 # File types that tarfile supports:
111 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
,
112 SYMTYPE
, DIRTYPE
, FIFOTYPE
,
113 CONTTYPE
, CHRTYPE
, BLKTYPE
,
114 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
117 # File types that will be treated as a regular file.
118 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
,
119 CONTTYPE
, GNUTYPE_SPARSE
)
121 # File types that are part of the GNU tar format.
122 GNU_TYPES
= (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
125 # Fields from a pax header that override a TarInfo attribute.
126 PAX_FIELDS
= ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
129 # Fields in a pax header that are numbers, all other fields
130 # are treated as strings.
131 PAX_NUMBER_FIELDS
= {
140 #---------------------------------------------------------
141 # Bits used in the mode field, values in octal.
142 #---------------------------------------------------------
143 S_IFLNK
= 0120000 # symbolic link
144 S_IFREG
= 0100000 # regular file
145 S_IFBLK
= 0060000 # block device
146 S_IFDIR
= 0040000 # directory
147 S_IFCHR
= 0020000 # character device
148 S_IFIFO
= 0010000 # fifo
150 TSUID
= 04000 # set UID on execution
151 TSGID
= 02000 # set GID on execution
152 TSVTX
= 01000 # reserved
154 TUREAD
= 0400 # read by owner
155 TUWRITE
= 0200 # write by owner
156 TUEXEC
= 0100 # execute/search by owner
157 TGREAD
= 0040 # read by group
158 TGWRITE
= 0020 # write by group
159 TGEXEC
= 0010 # execute/search by group
160 TOREAD
= 0004 # read by other
161 TOWRITE
= 0002 # write by other
162 TOEXEC
= 0001 # execute/search by other
164 #---------------------------------------------------------
166 #---------------------------------------------------------
167 ENCODING
= sys
.getfilesystemencoding()
169 ENCODING
= sys
.getdefaultencoding()
171 #---------------------------------------------------------
172 # Some useful functions
173 #---------------------------------------------------------
176 """Convert a python string to a null-terminated string buffer.
178 return s
[:length
] + (length
- len(s
)) * NUL
181 """Convert a null-terminated string field to a python string.
183 # Use the string up to the first null char.
190 """Convert a number field to a python number.
192 # There are two possible encodings for a number field, see
194 if s
[0] != chr(0200):
196 n
= int(nts(s
) or "0", 8)
198 raise InvalidHeaderError("invalid header")
201 for i
in xrange(len(s
) - 1):
206 def itn(n
, digits
=8, format
=DEFAULT_FORMAT
):
207 """Convert a python number to a number field.
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n
< 8 ** (digits
- 1):
216 s
= "%0*o" % (digits
- 1, n
) + NUL
218 if format
!= GNU_FORMAT
or n
>= 256 ** (digits
- 1):
219 raise ValueError("overflow in number field")
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n
= struct
.unpack("L", struct
.pack("l", n
))[0]
227 for i
in xrange(digits
- 1):
228 s
= chr(n
& 0377) + s
233 def uts(s
, encoding
, errors
):
234 """Convert a unicode object to a string.
236 if errors
== "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
241 return s
.encode(encoding
, "strict")
242 except UnicodeEncodeError:
246 x
.append(c
.encode(encoding
, "strict"))
247 except UnicodeEncodeError:
248 x
.append(c
.encode("utf8"))
251 return s
.encode(encoding
, errors
)
253 def calc_chksums(buf
):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
262 unsigned_chksum
= 256 + sum(struct
.unpack("148B", buf
[:148]) + struct
.unpack("356B", buf
[156:512]))
263 signed_chksum
= 256 + sum(struct
.unpack("148b", buf
[:148]) + struct
.unpack("356b", buf
[156:512]))
264 return unsigned_chksum
, signed_chksum
266 def copyfileobj(src
, dst
, length
=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
273 shutil
.copyfileobj(src
, dst
)
277 blocks
, remainder
= divmod(length
, BUFSIZE
)
278 for b
in xrange(blocks
):
279 buf
= src
.read(BUFSIZE
)
280 if len(buf
) < BUFSIZE
:
281 raise IOError("end of file reached")
285 buf
= src
.read(remainder
)
286 if len(buf
) < remainder
:
287 raise IOError("end of file reached")
301 ((TUEXEC|TSUID
, "s"),
307 ((TGEXEC|TSGID
, "s"),
313 ((TOEXEC|TSVTX
, "t"),
319 """Convert a file's mode to a string of the form
321 Used by TarFile.list()
324 for table
in filemode_table
:
325 for bit
, char
in table
:
326 if mode
& bit
== bit
:
333 class TarError(Exception):
334 """Base exception."""
336 class ExtractError(TarError
):
337 """General exception for extract errors."""
339 class ReadError(TarError
):
340 """Exception for unreadble tar archives."""
342 class CompressionError(TarError
):
343 """Exception for unavailable compression methods."""
345 class StreamError(TarError
):
346 """Exception for unsupported operations on stream-like TarFiles."""
348 class HeaderError(TarError
):
349 """Base exception for header errors."""
351 class EmptyHeaderError(HeaderError
):
352 """Exception for empty headers."""
354 class TruncatedHeaderError(HeaderError
):
355 """Exception for truncated headers."""
357 class EOFHeaderError(HeaderError
):
358 """Exception for end of file headers."""
360 class InvalidHeaderError(HeaderError
):
361 """Exception for invalid headers."""
363 class SubsequentHeaderError(HeaderError
):
364 """Exception for missing and invalid extended headers."""
367 #---------------------------
368 # internal stream interface
369 #---------------------------
371 """Low-level file object. Supports reading and writing.
372 It is used instead of a regular file object for streaming
376 def __init__(self
, name
, mode
):
379 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
381 if hasattr(os
, "O_BINARY"):
383 self
.fd
= os
.open(name
, mode
)
388 def read(self
, size
):
389 return os
.read(self
.fd
, size
)
395 """Class that serves as an adapter between TarFile and
396 a stream-like object. The stream-like object only
397 needs to have a read() or write() method and is accessed
398 blockwise. Use of gzip or bzip2 compression is possible.
399 A stream-like object could be for example: sys.stdin,
400 sys.stdout, a socket, a tape device etc.
402 _Stream is intended to be used only internally.
405 def __init__(self
, name
, mode
, comptype
, fileobj
, bufsize
):
406 """Construct a _Stream object.
408 self
._extfileobj
= True
410 fileobj
= _LowLevelFile(name
, mode
)
411 self
._extfileobj
= False
414 # Enable transparent compression detection for the
416 fileobj
= _StreamProxy(fileobj
)
417 comptype
= fileobj
.getcomptype()
419 self
.name
= name
or ""
421 self
.comptype
= comptype
422 self
.fileobj
= fileobj
423 self
.bufsize
= bufsize
432 raise CompressionError("zlib module is not available")
434 self
.crc
= zlib
.crc32("") & 0xffffffffL
438 self
._init
_write
_gz
()
440 if comptype
== "bz2":
444 raise CompressionError("bz2 module is not available")
447 self
.cmp = bz2
.BZ2Decompressor()
449 self
.cmp = bz2
.BZ2Compressor()
452 if hasattr(self
, "closed") and not self
.closed
:
455 def _init_write_gz(self
):
456 """Initialize for writing with gzip compression.
458 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
459 -self
.zlib
.MAX_WBITS
,
460 self
.zlib
.DEF_MEM_LEVEL
,
462 timestamp
= struct
.pack("<L", long(time
.time()))
463 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
464 if self
.name
.endswith(".gz"):
465 self
.name
= self
.name
[:-3]
466 self
.__write
(self
.name
+ NUL
)
469 """Write string s to the stream.
471 if self
.comptype
== "gz":
472 self
.crc
= self
.zlib
.crc32(s
, self
.crc
) & 0xffffffffL
474 if self
.comptype
!= "tar":
475 s
= self
.cmp.compress(s
)
478 def __write(self
, s
):
479 """Write string s to the stream if a whole new block
480 is ready to be written.
483 while len(self
.buf
) > self
.bufsize
:
484 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
485 self
.buf
= self
.buf
[self
.bufsize
:]
488 """Close the _Stream object. No operation should be
489 done on it afterwards.
494 if self
.mode
== "w" and self
.comptype
!= "tar":
495 self
.buf
+= self
.cmp.flush()
497 if self
.mode
== "w" and self
.buf
:
498 self
.fileobj
.write(self
.buf
)
500 if self
.comptype
== "gz":
501 # The native zlib crc is an unsigned 32-bit integer, but
502 # the Python wrapper implicitly casts that to a signed C
503 # long. So, on a 32-bit box self.crc may "look negative",
504 # while the same crc on a 64-bit box may "look positive".
505 # To avoid irksome warnings from the `struct` module, force
506 # it to look positive on all boxes.
507 self
.fileobj
.write(struct
.pack("<L", self
.crc
& 0xffffffffL
))
508 self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
510 if not self
._extfileobj
:
515 def _init_read_gz(self
):
516 """Initialize for reading a gzip compressed fileobj.
518 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
521 # taken from gzip.GzipFile with some alterations
522 if self
.__read
(2) != "\037\213":
523 raise ReadError("not a gzip file")
524 if self
.__read
(1) != "\010":
525 raise CompressionError("unsupported compression method")
527 flag
= ord(self
.__read
(1))
531 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
536 if not s
or s
== NUL
:
541 if not s
or s
== NUL
:
547 """Return the stream's file pointer position.
551 def seek(self
, pos
=0):
552 """Set the stream's file pointer to pos. Negative seeking
555 if pos
- self
.pos
>= 0:
556 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
557 for i
in xrange(blocks
):
558 self
.read(self
.bufsize
)
561 raise StreamError("seeking backwards is not allowed")
564 def read(self
, size
=None):
565 """Return the next size number of bytes from the stream.
566 If size is not defined, return all bytes of the stream
572 buf
= self
._read
(self
.bufsize
)
578 buf
= self
._read
(size
)
582 def _read(self
, size
):
583 """Return size bytes from the stream.
585 if self
.comptype
== "tar":
586 return self
.__read
(size
)
591 buf
= self
.__read
(self
.bufsize
)
595 buf
= self
.cmp.decompress(buf
)
597 raise ReadError("invalid compressed data")
604 def __read(self
, size
):
605 """Return size bytes from stream. If internal buffer is empty,
606 read another block from the stream.
611 buf
= self
.fileobj
.read(self
.bufsize
)
621 class _StreamProxy(object):
622 """Small proxy class that enables transparent compression
623 detection for the Stream interface (mode 'r|*').
626 def __init__(self
, fileobj
):
627 self
.fileobj
= fileobj
628 self
.buf
= self
.fileobj
.read(BLOCKSIZE
)
630 def read(self
, size
):
631 self
.read
= self
.fileobj
.read
634 def getcomptype(self
):
635 if self
.buf
.startswith("\037\213\010"):
637 if self
.buf
.startswith("BZh91"):
645 class _BZ2Proxy(object):
646 """Small proxy class that enables external file object
647 support for "r:bz2" and "w:bz2" modes. This is actually
648 a workaround for a limitation in bz2 module's BZ2File
649 class which (unlike gzip.GzipFile) has no support for
650 a file object argument.
653 blocksize
= 16 * 1024
655 def __init__(self
, fileobj
, mode
):
656 self
.fileobj
= fileobj
658 self
.name
= getattr(self
.fileobj
, "name", None)
665 self
.bz2obj
= bz2
.BZ2Decompressor()
669 self
.bz2obj
= bz2
.BZ2Compressor()
671 def read(self
, size
):
675 raw
= self
.fileobj
.read(self
.blocksize
)
678 data
= self
.bz2obj
.decompress(raw
)
681 self
.buf
= "".join(b
)
683 buf
= self
.buf
[:size
]
684 self
.buf
= self
.buf
[size
:]
691 self
.read(pos
- self
.pos
)
696 def write(self
, data
):
697 self
.pos
+= len(data
)
698 raw
= self
.bz2obj
.compress(data
)
699 self
.fileobj
.write(raw
)
703 raw
= self
.bz2obj
.flush()
704 self
.fileobj
.write(raw
)
707 #------------------------
708 # Extraction file object
709 #------------------------
710 class _FileInFile(object):
711 """A thin wrapper around an existing file object that
712 provides a part of its data as an individual file
716 def __init__(self
, fileobj
, offset
, size
, sparse
=None):
717 self
.fileobj
= fileobj
724 """Return the current file position.
728 def seek(self
, position
):
729 """Seek to a position in the file.
731 self
.position
= position
733 def read(self
, size
=None):
734 """Read data from the file.
737 size
= self
.size
- self
.position
739 size
= min(size
, self
.size
- self
.position
)
741 if self
.sparse
is None:
742 return self
.readnormal(size
)
744 return self
.readsparse(size
)
746 def readnormal(self
, size
):
747 """Read operation for regular files.
749 self
.fileobj
.seek(self
.offset
+ self
.position
)
750 self
.position
+= size
751 return self
.fileobj
.read(size
)
753 def readsparse(self
, size
):
754 """Read operation for sparse files.
758 buf
= self
.readsparsesection(size
)
765 def readsparsesection(self
, size
):
766 """Read a single section of a sparse file.
768 section
= self
.sparse
.find(self
.position
)
773 size
= min(size
, section
.offset
+ section
.size
- self
.position
)
775 if isinstance(section
, _data
):
776 realpos
= section
.realpos
+ self
.position
- section
.offset
777 self
.fileobj
.seek(self
.offset
+ realpos
)
778 self
.position
+= size
779 return self
.fileobj
.read(size
)
781 self
.position
+= size
786 class ExFileObject(object):
787 """File-like object for reading an archive member.
788 Is returned by TarFile.extractfile().
792 def __init__(self
, tarfile
, tarinfo
):
793 self
.fileobj
= _FileInFile(tarfile
.fileobj
,
796 getattr(tarinfo
, "sparse", None))
797 self
.name
= tarinfo
.name
800 self
.size
= tarinfo
.size
805 def read(self
, size
=None):
806 """Read at most size bytes from the file. If size is not
807 present or None, read all data until EOF is reached.
810 raise ValueError("I/O operation on closed file")
818 buf
= self
.buffer[:size
]
819 self
.buffer = self
.buffer[size
:]
822 buf
+= self
.fileobj
.read()
824 buf
+= self
.fileobj
.read(size
- len(buf
))
826 self
.position
+= len(buf
)
829 def readline(self
, size
=-1):
830 """Read one entire line from the file. If size is present
831 and non-negative, return a string with at most that
832 size, which may be an incomplete line.
835 raise ValueError("I/O operation on closed file")
837 if "\n" in self
.buffer:
838 pos
= self
.buffer.find("\n") + 1
840 buffers
= [self
.buffer]
842 buf
= self
.fileobj
.read(self
.blocksize
)
844 if not buf
or "\n" in buf
:
845 self
.buffer = "".join(buffers
)
846 pos
= self
.buffer.find("\n") + 1
849 pos
= len(self
.buffer)
855 buf
= self
.buffer[:pos
]
856 self
.buffer = self
.buffer[pos
:]
857 self
.position
+= len(buf
)
861 """Return a list with all remaining lines.
865 line
= self
.readline()
871 """Return the current file position.
874 raise ValueError("I/O operation on closed file")
878 def seek(self
, pos
, whence
=os
.SEEK_SET
):
879 """Seek to a position in the file.
882 raise ValueError("I/O operation on closed file")
884 if whence
== os
.SEEK_SET
:
885 self
.position
= min(max(pos
, 0), self
.size
)
886 elif whence
== os
.SEEK_CUR
:
888 self
.position
= max(self
.position
+ pos
, 0)
890 self
.position
= min(self
.position
+ pos
, self
.size
)
891 elif whence
== os
.SEEK_END
:
892 self
.position
= max(min(self
.size
+ pos
, self
.size
), 0)
894 raise ValueError("Invalid argument")
897 self
.fileobj
.seek(self
.position
)
900 """Close the file object.
905 """Get an iterator over the file's lines.
908 line
= self
.readline()
917 class TarInfo(object):
918 """Informational class which holds the details about an
919 archive member given by a tar header block.
920 TarInfo objects are returned by TarFile.getmember(),
921 TarFile.getmembers() and TarFile.gettarinfo() and are
922 usually created internally.
925 def __init__(self
, name
=""):
926 """Construct a TarInfo object. name is the optional name
929 self
.name
= name
# member name
930 self
.mode
= 0644 # file permissions
931 self
.uid
= 0 # user id
932 self
.gid
= 0 # group id
933 self
.size
= 0 # file size
934 self
.mtime
= 0 # modification time
935 self
.chksum
= 0 # header checksum
936 self
.type = REGTYPE
# member type
937 self
.linkname
= "" # link name
938 self
.uname
= "root" # user name
939 self
.gname
= "root" # group name
940 self
.devmajor
= 0 # device major number
941 self
.devminor
= 0 # device minor number
943 self
.offset
= 0 # the tar header starts here
944 self
.offset_data
= 0 # the file's data starts here
946 self
.pax_headers
= {} # pax header information
948 # In pax headers the "name" and "linkname" field are called
949 # "path" and "linkpath".
952 def _setpath(self
, name
):
954 path
= property(_getpath
, _setpath
)
956 def _getlinkpath(self
):
958 def _setlinkpath(self
, linkname
):
959 self
.linkname
= linkname
960 linkpath
= property(_getlinkpath
, _setlinkpath
)
963 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
965 def get_info(self
, encoding
, errors
):
966 """Return the TarInfo's attributes as a dictionary.
970 "mode": self
.mode
& 07777,
975 "chksum": self
.chksum
,
977 "linkname": self
.linkname
,
980 "devmajor": self
.devmajor
,
981 "devminor": self
.devminor
984 if info
["type"] == DIRTYPE
and not info
["name"].endswith("/"):
987 for key
in ("name", "linkname", "uname", "gname"):
988 if type(info
[key
]) is unicode:
989 info
[key
] = info
[key
].encode(encoding
, errors
)
993 def tobuf(self
, format
=DEFAULT_FORMAT
, encoding
=ENCODING
, errors
="strict"):
994 """Return a tar header as a string of 512 byte blocks.
996 info
= self
.get_info(encoding
, errors
)
998 if format
== USTAR_FORMAT
:
999 return self
.create_ustar_header(info
)
1000 elif format
== GNU_FORMAT
:
1001 return self
.create_gnu_header(info
)
1002 elif format
== PAX_FORMAT
:
1003 return self
.create_pax_header(info
, encoding
, errors
)
1005 raise ValueError("invalid format")
1007 def create_ustar_header(self
, info
):
1008 """Return the object as a ustar header block.
1010 info
["magic"] = POSIX_MAGIC
1012 if len(info
["linkname"]) > LENGTH_LINK
:
1013 raise ValueError("linkname is too long")
1015 if len(info
["name"]) > LENGTH_NAME
:
1016 info
["prefix"], info
["name"] = self
._posix
_split
_name
(info
["name"])
1018 return self
._create
_header
(info
, USTAR_FORMAT
)
1020 def create_gnu_header(self
, info
):
1021 """Return the object as a GNU header block sequence.
1023 info
["magic"] = GNU_MAGIC
1026 if len(info
["linkname"]) > LENGTH_LINK
:
1027 buf
+= self
._create
_gnu
_long
_header
(info
["linkname"], GNUTYPE_LONGLINK
)
1029 if len(info
["name"]) > LENGTH_NAME
:
1030 buf
+= self
._create
_gnu
_long
_header
(info
["name"], GNUTYPE_LONGNAME
)
1032 return buf
+ self
._create
_header
(info
, GNU_FORMAT
)
1034 def create_pax_header(self
, info
, encoding
, errors
):
1035 """Return the object as a ustar header block. If it cannot be
1036 represented this way, prepend a pax extended header sequence
1037 with supplement information.
1039 info
["magic"] = POSIX_MAGIC
1040 pax_headers
= self
.pax_headers
.copy()
1042 # Test string fields for values that exceed the field length or cannot
1043 # be represented in ASCII encoding.
1044 for name
, hname
, length
in (
1045 ("name", "path", LENGTH_NAME
), ("linkname", "linkpath", LENGTH_LINK
),
1046 ("uname", "uname", 32), ("gname", "gname", 32)):
1048 if hname
in pax_headers
:
1049 # The pax header has priority.
1052 val
= info
[name
].decode(encoding
, errors
)
1054 # Try to encode the string as ASCII.
1057 except UnicodeEncodeError:
1058 pax_headers
[hname
] = val
1061 if len(info
[name
]) > length
:
1062 pax_headers
[hname
] = val
1064 # Test number fields for values that exceed the field limit or values
1065 # that like to be stored as float.
1066 for name
, digits
in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1067 if name
in pax_headers
:
1068 # The pax header has priority. Avoid overflow.
1073 if not 0 <= val
< 8 ** (digits
- 1) or isinstance(val
, float):
1074 pax_headers
[name
] = unicode(val
)
1077 # Create a pax extended header if necessary.
1079 buf
= self
._create
_pax
_generic
_header
(pax_headers
)
1083 return buf
+ self
._create
_header
(info
, USTAR_FORMAT
)
1086 def create_pax_global_header(cls
, pax_headers
):
1087 """Return the object as a pax global header block sequence.
1089 return cls
._create
_pax
_generic
_header
(pax_headers
, type=XGLTYPE
)
1091 def _posix_split_name(self
, name
):
1092 """Split a name longer than 100 chars into a prefix
1095 prefix
= name
[:LENGTH_PREFIX
+ 1]
1096 while prefix
and prefix
[-1] != "/":
1097 prefix
= prefix
[:-1]
1099 name
= name
[len(prefix
):]
1100 prefix
= prefix
[:-1]
1102 if not prefix
or len(name
) > LENGTH_NAME
:
1103 raise ValueError("name is too long")
1107 def _create_header(info
, format
):
1108 """Return a header block. info is a dictionary with file
1109 information, format must be one of the *_FORMAT constants.
1112 stn(info
.get("name", ""), 100),
1113 itn(info
.get("mode", 0) & 07777, 8, format
),
1114 itn(info
.get("uid", 0), 8, format
),
1115 itn(info
.get("gid", 0), 8, format
),
1116 itn(info
.get("size", 0), 12, format
),
1117 itn(info
.get("mtime", 0), 12, format
),
1118 " ", # checksum field
1119 info
.get("type", REGTYPE
),
1120 stn(info
.get("linkname", ""), 100),
1121 stn(info
.get("magic", POSIX_MAGIC
), 8),
1122 stn(info
.get("uname", "root"), 32),
1123 stn(info
.get("gname", "root"), 32),
1124 itn(info
.get("devmajor", 0), 8, format
),
1125 itn(info
.get("devminor", 0), 8, format
),
1126 stn(info
.get("prefix", ""), 155)
1129 buf
= struct
.pack("%ds" % BLOCKSIZE
, "".join(parts
))
1130 chksum
= calc_chksums(buf
[-BLOCKSIZE
:])[0]
1131 buf
= buf
[:-364] + "%06o\0" % chksum
+ buf
[-357:]
1135 def _create_payload(payload
):
1136 """Return the string payload filled with zero bytes
1137 up to the next 512 byte border.
1139 blocks
, remainder
= divmod(len(payload
), BLOCKSIZE
)
1141 payload
+= (BLOCKSIZE
- remainder
) * NUL
1145 def _create_gnu_long_header(cls
, name
, type):
1146 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1152 info
["name"] = "././@LongLink"
1154 info
["size"] = len(name
)
1155 info
["magic"] = GNU_MAGIC
1157 # create extended header + name blocks.
1158 return cls
._create
_header
(info
, USTAR_FORMAT
) + \
1159 cls
._create
_payload
(name
)
1162 def _create_pax_generic_header(cls
, pax_headers
, type=XHDTYPE
):
1163 """Return a POSIX.1-2001 extended or global header sequence
1164 that contains a list of keyword, value pairs. The values
1165 must be unicode objects.
1168 for keyword
, value
in pax_headers
.iteritems():
1169 keyword
= keyword
.encode("utf8")
1170 value
= value
.encode("utf8")
1171 l
= len(keyword
) + len(value
) + 3 # ' ' + '=' + '\n'
1178 records
.append("%d %s=%s\n" % (p
, keyword
, value
))
1179 records
= "".join(records
)
1181 # We use a hardcoded "././@PaxHeader" name like star does
1182 # instead of the one that POSIX recommends.
1184 info
["name"] = "././@PaxHeader"
1186 info
["size"] = len(records
)
1187 info
["magic"] = POSIX_MAGIC
1189 # Create pax header + record blocks.
1190 return cls
._create
_header
(info
, USTAR_FORMAT
) + \
1191 cls
._create
_payload
(records
)
1194 def frombuf(cls
, buf
):
1195 """Construct a TarInfo object from a 512 byte string buffer.
1198 raise EmptyHeaderError("empty header")
1199 if len(buf
) != BLOCKSIZE
:
1200 raise TruncatedHeaderError("truncated header")
1201 if buf
.count(NUL
) == BLOCKSIZE
:
1202 raise EOFHeaderError("end of file header")
1204 chksum
= nti(buf
[148:156])
1205 if chksum
not in calc_chksums(buf
):
1206 raise InvalidHeaderError("bad checksum")
1210 obj
.name
= nts(buf
[0:100])
1211 obj
.mode
= nti(buf
[100:108])
1212 obj
.uid
= nti(buf
[108:116])
1213 obj
.gid
= nti(buf
[116:124])
1214 obj
.size
= nti(buf
[124:136])
1215 obj
.mtime
= nti(buf
[136:148])
1217 obj
.type = buf
[156:157]
1218 obj
.linkname
= nts(buf
[157:257])
1219 obj
.uname
= nts(buf
[265:297])
1220 obj
.gname
= nts(buf
[297:329])
1221 obj
.devmajor
= nti(buf
[329:337])
1222 obj
.devminor
= nti(buf
[337:345])
1223 prefix
= nts(buf
[345:500])
1225 # Old V7 tar format represents a directory as a regular
1226 # file with a trailing slash.
1227 if obj
.type == AREGTYPE
and obj
.name
.endswith("/"):
1230 # Remove redundant slashes from directories.
1232 obj
.name
= obj
.name
.rstrip("/")
1234 # Reconstruct a ustar longname.
1235 if prefix
and obj
.type not in GNU_TYPES
:
1236 obj
.name
= prefix
+ "/" + obj
.name
1240 def fromtarfile(cls
, tarfile
):
1241 """Return the next TarInfo object from TarFile object
1244 buf
= tarfile
.fileobj
.read(BLOCKSIZE
)
1245 obj
= cls
.frombuf(buf
)
1246 obj
.offset
= tarfile
.fileobj
.tell() - BLOCKSIZE
1247 return obj
._proc
_member
(tarfile
)
1249 #--------------------------------------------------------------------------
1250 # The following are methods that are called depending on the type of a
1251 # member. The entry point is _proc_member() which can be overridden in a
1252 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1253 # implement the following
1255 # 1. Set self.offset_data to the position where the data blocks begin,
1256 # if there is data that follows.
1257 # 2. Set tarfile.offset to the position where the next member's header will
1259 # 3. Return self or another valid TarInfo object.
1260 def _proc_member(self
, tarfile
):
1261 """Choose the right processing method depending on
1262 the type and call it.
1264 if self
.type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
1265 return self
._proc
_gnulong
(tarfile
)
1266 elif self
.type == GNUTYPE_SPARSE
:
1267 return self
._proc
_sparse
(tarfile
)
1268 elif self
.type in (XHDTYPE
, XGLTYPE
, SOLARIS_XHDTYPE
):
1269 return self
._proc
_pax
(tarfile
)
1271 return self
._proc
_builtin
(tarfile
)
1273 def _proc_builtin(self
, tarfile
):
1274 """Process a builtin type or an unknown type which
1275 will be treated as a regular file.
1277 self
.offset_data
= tarfile
.fileobj
.tell()
1278 offset
= self
.offset_data
1279 if self
.isreg() or self
.type not in SUPPORTED_TYPES
:
1280 # Skip the following data blocks.
1281 offset
+= self
._block
(self
.size
)
1282 tarfile
.offset
= offset
1284 # Patch the TarInfo object with saved global
1285 # header information.
1286 self
._apply
_pax
_info
(tarfile
.pax_headers
, tarfile
.encoding
, tarfile
.errors
)
1290 def _proc_gnulong(self
, tarfile
):
1291 """Process the blocks that hold a GNU longname
1294 buf
= tarfile
.fileobj
.read(self
._block
(self
.size
))
1296 # Fetch the next header and process it.
1298 next
= self
.fromtarfile(tarfile
)
1300 raise SubsequentHeaderError("missing or bad subsequent header")
1302 # Patch the TarInfo object from the next header with
1303 # the longname information.
1304 next
.offset
= self
.offset
1305 if self
.type == GNUTYPE_LONGNAME
:
1306 next
.name
= nts(buf
)
1307 elif self
.type == GNUTYPE_LONGLINK
:
1308 next
.linkname
= nts(buf
)
1312 def _proc_sparse(self
, tarfile
):
1313 """Process a GNU sparse header plus extra headers.
1320 # There are 4 possible sparse structs in the
1324 offset
= nti(buf
[pos
:pos
+ 12])
1325 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1328 if offset
> lastpos
:
1329 sp
.append(_hole(lastpos
, offset
- lastpos
))
1330 sp
.append(_data(offset
, numbytes
, realpos
))
1332 lastpos
= offset
+ numbytes
1335 isextended
= ord(buf
[482])
1336 origsize
= nti(buf
[483:495])
1338 # If the isextended flag is given,
1339 # there are extra headers to process.
1340 while isextended
== 1:
1341 buf
= tarfile
.fileobj
.read(BLOCKSIZE
)
1343 for i
in xrange(21):
1345 offset
= nti(buf
[pos
:pos
+ 12])
1346 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1349 if offset
> lastpos
:
1350 sp
.append(_hole(lastpos
, offset
- lastpos
))
1351 sp
.append(_data(offset
, numbytes
, realpos
))
1353 lastpos
= offset
+ numbytes
1355 isextended
= ord(buf
[504])
1357 if lastpos
< origsize
:
1358 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1362 self
.offset_data
= tarfile
.fileobj
.tell()
1363 tarfile
.offset
= self
.offset_data
+ self
._block
(self
.size
)
1364 self
.size
= origsize
1368 def _proc_pax(self
, tarfile
):
1369 """Process an extended or global header as described in
1372 # Read the header information.
1373 buf
= tarfile
.fileobj
.read(self
._block
(self
.size
))
1375 # A pax header stores supplemental information for either
1376 # the following file (extended) or all following files
1378 if self
.type == XGLTYPE
:
1379 pax_headers
= tarfile
.pax_headers
1381 pax_headers
= tarfile
.pax_headers
.copy()
1383 # Parse pax header information. A record looks like that:
1384 # "%d %s=%s\n" % (length, keyword, value). length is the size
1385 # of the complete record including the length field itself and
1386 # the newline. keyword and value are both UTF-8 encoded strings.
1387 regex
= re
.compile(r
"(\d+) ([^=]+)=", re
.U
)
1390 match
= regex
.match(buf
, pos
)
1394 length
, keyword
= match
.groups()
1395 length
= int(length
)
1396 value
= buf
[match
.end(2) + 1:match
.start(1) + length
- 1]
1398 keyword
= keyword
.decode("utf8")
1399 value
= value
.decode("utf8")
1401 pax_headers
[keyword
] = value
1404 # Fetch the next header.
1406 next
= self
.fromtarfile(tarfile
)
1408 raise SubsequentHeaderError("missing or bad subsequent header")
1410 if self
.type in (XHDTYPE
, SOLARIS_XHDTYPE
):
1411 # Patch the TarInfo object with the extended header info.
1412 next
._apply
_pax
_info
(pax_headers
, tarfile
.encoding
, tarfile
.errors
)
1413 next
.offset
= self
.offset
1415 if "size" in pax_headers
:
1416 # If the extended header replaces the size field,
1417 # we need to recalculate the offset where the next
1419 offset
= next
.offset_data
1420 if next
.isreg() or next
.type not in SUPPORTED_TYPES
:
1421 offset
+= next
._block
(next
.size
)
1422 tarfile
.offset
= offset
1426 def _apply_pax_info(self
, pax_headers
, encoding
, errors
):
1427 """Replace fields with supplemental information from a previous
1428 pax extended or global header.
1430 for keyword
, value
in pax_headers
.iteritems():
1431 if keyword
not in PAX_FIELDS
:
1434 if keyword
== "path":
1435 value
= value
.rstrip("/")
1437 if keyword
in PAX_NUMBER_FIELDS
:
1439 value
= PAX_NUMBER_FIELDS
[keyword
](value
)
1443 value
= uts(value
, encoding
, errors
)
1445 setattr(self
, keyword
, value
)
1447 self
.pax_headers
= pax_headers
.copy()
1449 def _block(self
, count
):
1450 """Round up a byte count by BLOCKSIZE and return it,
1451 e.g. _block(834) => 1024.
1453 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1456 return blocks
* BLOCKSIZE
1459 return self
.type in REGULAR_TYPES
1463 return self
.type == DIRTYPE
1465 return self
.type == SYMTYPE
1467 return self
.type == LNKTYPE
1469 return self
.type == CHRTYPE
1471 return self
.type == BLKTYPE
1473 return self
.type == FIFOTYPE
1475 return self
.type == GNUTYPE_SPARSE
1477 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
1480 class TarFile(object):
1481 """The TarFile Class provides an interface to tar archives.
1484 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
1486 dereference
= False # If true, add content of linked file to the
1487 # tar file, else the link.
1489 ignore_zeros
= False # If true, skips empty or invalid blocks and
1490 # continues processing.
1492 errorlevel
= 1 # If 0, fatal errors only appear in debug
1493 # messages (if debug >= 0). If > 0, errors
1494 # are passed to the caller as exceptions.
1496 format
= DEFAULT_FORMAT
# The format to use when creating an archive.
1498 encoding
= ENCODING
# Encoding for 8-bit character strings.
1500 errors
= None # Error handler for unicode conversion.
1502 tarinfo
= TarInfo
# The default TarInfo class to use.
1504 fileobject
= ExFileObject
# The default ExFileObject class to use.
1506 def __init__(self
, name
=None, mode
="r", fileobj
=None, format
=None,
1507 tarinfo
=None, dereference
=None, ignore_zeros
=None, encoding
=None,
1508 errors
=None, pax_headers
=None, debug
=None, errorlevel
=None):
1509 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1510 read from an existing archive, 'a' to append data to an existing
1511 file or 'w' to create a new file overwriting an existing one. `mode'
1513 If `fileobj' is given, it is used for reading or writing data. If it
1514 can be determined, `mode' is overridden by `fileobj's mode.
1515 `fileobj' is not closed, when TarFile is closed.
1517 if len(mode
) > 1 or mode
not in "raw":
1518 raise ValueError("mode must be 'r', 'a' or 'w'")
1520 self
._mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
1523 if self
.mode
== "a" and not os
.path
.exists(name
):
1524 # Create nonexistent files in append mode.
1527 fileobj
= bltn_open(name
, self
._mode
)
1528 self
._extfileobj
= False
1530 if name
is None and hasattr(fileobj
, "name"):
1532 if hasattr(fileobj
, "mode"):
1533 self
._mode
= fileobj
.mode
1534 self
._extfileobj
= True
1535 self
.name
= os
.path
.abspath(name
) if name
else None
1536 self
.fileobj
= fileobj
1539 if format
is not None:
1540 self
.format
= format
1541 if tarinfo
is not None:
1542 self
.tarinfo
= tarinfo
1543 if dereference
is not None:
1544 self
.dereference
= dereference
1545 if ignore_zeros
is not None:
1546 self
.ignore_zeros
= ignore_zeros
1547 if encoding
is not None:
1548 self
.encoding
= encoding
1550 if errors
is not None:
1551 self
.errors
= errors
1553 self
.errors
= "utf-8"
1555 self
.errors
= "strict"
1557 if pax_headers
is not None and self
.format
== PAX_FORMAT
:
1558 self
.pax_headers
= pax_headers
1560 self
.pax_headers
= {}
1562 if debug
is not None:
1564 if errorlevel
is not None:
1565 self
.errorlevel
= errorlevel
1567 # Init datastructures.
1569 self
.members
= [] # list of members as TarInfo objects
1570 self
._loaded
= False # flag if all members have been read
1571 self
.offset
= self
.fileobj
.tell()
1572 # current position in the archive file
1573 self
.inodes
= {} # dictionary caching the inodes of
1574 # archive members already added
1577 if self
.mode
== "r":
1578 self
.firstmember
= None
1579 self
.firstmember
= self
.next()
1581 if self
.mode
== "a":
1582 # Move to the end of the archive,
1583 # before the first empty block.
1585 self
.fileobj
.seek(self
.offset
)
1587 tarinfo
= self
.tarinfo
.fromtarfile(self
)
1588 self
.members
.append(tarinfo
)
1589 except EOFHeaderError
:
1590 self
.fileobj
.seek(self
.offset
)
1592 except HeaderError
, e
:
1593 raise ReadError(str(e
))
1595 if self
.mode
in "aw":
1598 if self
.pax_headers
:
1599 buf
= self
.tarinfo
.create_pax_global_header(self
.pax_headers
.copy())
1600 self
.fileobj
.write(buf
)
1601 self
.offset
+= len(buf
)
1603 if not self
._extfileobj
:
1604 self
.fileobj
.close()
1608 def _getposix(self
):
1609 return self
.format
== USTAR_FORMAT
1610 def _setposix(self
, value
):
1612 warnings
.warn("use the format attribute instead", DeprecationWarning,
1615 self
.format
= USTAR_FORMAT
1617 self
.format
= GNU_FORMAT
1618 posix
= property(_getposix
, _setposix
)
1620 #--------------------------------------------------------------------------
1621 # Below are the classmethods which act as alternate constructors to the
1622 # TarFile class. The open() method is the only one that is needed for
1623 # public use; it is the "super"-constructor and is able to select an
1624 # adequate "sub"-constructor for a particular compression using the mapping
1627 # This concept allows one to subclass TarFile without losing the comfort of
1628 # the super-constructor. A sub-constructor is registered and made available
1629 # by adding it to the mapping in OPEN_METH.
1632 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=RECORDSIZE
, **kwargs
):
1633 """Open a tar archive for reading, writing or appending. Return
1634 an appropriate TarFile class.
1637 'r' or 'r:*' open for reading with transparent compression
1638 'r:' open for reading exclusively uncompressed
1639 'r:gz' open for reading with gzip compression
1640 'r:bz2' open for reading with bzip2 compression
1641 'a' or 'a:' open for appending, creating the file if necessary
1642 'w' or 'w:' open for writing without compression
1643 'w:gz' open for writing with gzip compression
1644 'w:bz2' open for writing with bzip2 compression
1646 'r|*' open a stream of tar blocks with transparent compression
1647 'r|' open an uncompressed stream of tar blocks for reading
1648 'r|gz' open a gzip compressed stream of tar blocks
1649 'r|bz2' open a bzip2 compressed stream of tar blocks
1650 'w|' open an uncompressed stream for writing
1651 'w|gz' open a gzip compressed stream for writing
1652 'w|bz2' open a bzip2 compressed stream for writing
1655 if not name
and not fileobj
:
1656 raise ValueError("nothing to open")
1658 if mode
in ("r", "r:*"):
1659 # Find out which *open() is appropriate for opening the file.
1660 for comptype
in cls
.OPEN_METH
:
1661 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1662 if fileobj
is not None:
1663 saved_pos
= fileobj
.tell()
1665 return func(name
, "r", fileobj
, **kwargs
)
1666 except (ReadError
, CompressionError
), e
:
1667 if fileobj
is not None:
1668 fileobj
.seek(saved_pos
)
1670 raise ReadError("file could not be opened successfully")
1673 filemode
, comptype
= mode
.split(":", 1)
1674 filemode
= filemode
or "r"
1675 comptype
= comptype
or "tar"
1677 # Select the *open() function according to
1678 # given compression.
1679 if comptype
in cls
.OPEN_METH
:
1680 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1682 raise CompressionError("unknown compression type %r" % comptype
)
1683 return func(name
, filemode
, fileobj
, **kwargs
)
1686 filemode
, comptype
= mode
.split("|", 1)
1687 filemode
= filemode
or "r"
1688 comptype
= comptype
or "tar"
1690 if filemode
not in "rw":
1691 raise ValueError("mode must be 'r' or 'w'")
1693 t
= cls(name
, filemode
,
1694 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
),
1696 t
._extfileobj
= False
1700 return cls
.taropen(name
, mode
, fileobj
, **kwargs
)
1702 raise ValueError("undiscernible mode")
1705 def taropen(cls
, name
, mode
="r", fileobj
=None, **kwargs
):
1706 """Open uncompressed tar archive name for reading or writing.
1708 if len(mode
) > 1 or mode
not in "raw":
1709 raise ValueError("mode must be 'r', 'a' or 'w'")
1710 return cls(name
, mode
, fileobj
, **kwargs
)
1713 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9, **kwargs
):
1714 """Open gzip compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1717 if len(mode
) > 1 or mode
not in "rw":
1718 raise ValueError("mode must be 'r' or 'w'")
1723 except (ImportError, AttributeError):
1724 raise CompressionError("gzip module is not available")
1727 fileobj
= bltn_open(name
, mode
+ "b")
1730 t
= cls
.taropen(name
, mode
,
1731 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
),
1734 raise ReadError("not a gzip file")
1735 t
._extfileobj
= False
1739 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9, **kwargs
):
1740 """Open bzip2 compressed tar archive name for reading or writing.
1741 Appending is not allowed.
1743 if len(mode
) > 1 or mode
not in "rw":
1744 raise ValueError("mode must be 'r' or 'w'.")
1749 raise CompressionError("bz2 module is not available")
1751 if fileobj
is not None:
1752 fileobj
= _BZ2Proxy(fileobj
, mode
)
1754 fileobj
= bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
)
1757 t
= cls
.taropen(name
, mode
, fileobj
, **kwargs
)
1758 except (IOError, EOFError):
1759 raise ReadError("not a bzip2 file")
1760 t
._extfileobj
= False
1763 # All *open() methods are registered here.
1765 "tar": "taropen", # uncompressed tar
1766 "gz": "gzopen", # gzip compressed tar
1767 "bz2": "bz2open" # bzip2 compressed tar
1770 #--------------------------------------------------------------------------
1771 # The public methods which TarFile provides:
1774 """Close the TarFile. In write-mode, two finishing zero blocks are
1775 appended to the archive.
1780 if self
.mode
in "aw":
1781 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1782 self
.offset
+= (BLOCKSIZE
* 2)
1783 # fill up the end with zero-blocks
1784 # (like option -b20 for tar does)
1785 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1787 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1789 if not self
._extfileobj
:
1790 self
.fileobj
.close()
1793 def getmember(self
, name
):
1794 """Return a TarInfo object for member `name'. If `name' can not be
1795 found in the archive, KeyError is raised. If a member occurs more
1796 than once in the archive, its last occurrence is assumed to be the
1797 most up-to-date version.
1799 tarinfo
= self
._getmember
(name
)
1801 raise KeyError("filename %r not found" % name
)
1804 def getmembers(self
):
1805 """Return the members of the archive as a list of TarInfo objects. The
1806 list has the same order as the members in the archive.
1809 if not self
._loaded
: # if we want to obtain a list of
1810 self
._load
() # all members, we first have to
1811 # scan the whole archive.
1815 """Return the members of the archive as a list of their names. It has
1816 the same order as the list returned by getmembers().
1818 return [tarinfo
.name
for tarinfo
in self
.getmembers()]
1820 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1821 """Create a TarInfo object for either the file `name' or the file
1822 object `fileobj' (using os.fstat on its file descriptor). You can
1823 modify some of the TarInfo's attributes before you add it using
1824 addfile(). If given, `arcname' specifies an alternative name for the
1825 file in the archive.
1829 # When fileobj is given, replace name by
1830 # fileobj's real name.
1831 if fileobj
is not None:
1834 # Building the name of the member in the archive.
1835 # Backward slashes are converted to forward slashes,
1836 # Absolute paths are turned to relative paths.
1839 drv
, arcname
= os
.path
.splitdrive(arcname
)
1840 arcname
= arcname
.replace(os
.sep
, "/")
1841 arcname
= arcname
.lstrip("/")
1843 # Now, fill the TarInfo object with
1844 # information specific for the file.
1845 tarinfo
= self
.tarinfo()
1846 tarinfo
.tarfile
= self
1848 # Use os.stat or os.lstat, depending on platform
1849 # and if symlinks shall be resolved.
1851 if hasattr(os
, "lstat") and not self
.dereference
:
1852 statres
= os
.lstat(name
)
1854 statres
= os
.stat(name
)
1856 statres
= os
.fstat(fileobj
.fileno())
1859 stmd
= statres
.st_mode
1860 if stat
.S_ISREG(stmd
):
1861 inode
= (statres
.st_ino
, statres
.st_dev
)
1862 if not self
.dereference
and statres
.st_nlink
> 1 and \
1863 inode
in self
.inodes
and arcname
!= self
.inodes
[inode
]:
1864 # Is it a hardlink to an already
1867 linkname
= self
.inodes
[inode
]
1869 # The inode is added only if its valid.
1870 # For win32 it is always 0.
1873 self
.inodes
[inode
] = arcname
1874 elif stat
.S_ISDIR(stmd
):
1876 elif stat
.S_ISFIFO(stmd
):
1878 elif stat
.S_ISLNK(stmd
):
1880 linkname
= os
.readlink(name
)
1881 elif stat
.S_ISCHR(stmd
):
1883 elif stat
.S_ISBLK(stmd
):
1888 # Fill the TarInfo object with all
1889 # information we can get.
1890 tarinfo
.name
= arcname
1892 tarinfo
.uid
= statres
.st_uid
1893 tarinfo
.gid
= statres
.st_gid
1894 if stat
.S_ISREG(stmd
):
1895 tarinfo
.size
= statres
.st_size
1898 tarinfo
.mtime
= statres
.st_mtime
1900 tarinfo
.linkname
= linkname
1903 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1908 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1912 if type in (CHRTYPE
, BLKTYPE
):
1913 if hasattr(os
, "major") and hasattr(os
, "minor"):
1914 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1915 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1918 def list(self
, verbose
=True):
1919 """Print a table of contents to sys.stdout. If `verbose' is False, only
1920 the names of the members are printed. If it is True, an `ls -l'-like
1925 for tarinfo
in self
:
1927 print filemode(tarinfo
.mode
),
1928 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1929 tarinfo
.gname
or tarinfo
.gid
),
1930 if tarinfo
.ischr() or tarinfo
.isblk():
1931 print "%10s" % ("%d,%d" \
1932 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1934 print "%10d" % tarinfo
.size
,
1935 print "%d-%02d-%02d %02d:%02d:%02d" \
1936 % time
.localtime(tarinfo
.mtime
)[:6],
1938 print tarinfo
.name
+ ("/" if tarinfo
.isdir() else ""),
1942 print "->", tarinfo
.linkname
,
1944 print "link to", tarinfo
.linkname
,
1947 def add(self
, name
, arcname
=None, recursive
=True, exclude
=None, filter=None):
1948 """Add the file `name' to the archive. `name' may be any type of file
1949 (directory, fifo, symbolic link, etc.). If given, `arcname'
1950 specifies an alternative name for the file in the archive.
1951 Directories are added recursively by default. This can be avoided by
1952 setting `recursive' to False. `exclude' is a function that should
1953 return True for each filename to be excluded. `filter' is a function
1954 that expects a TarInfo object argument and returns the changed
1955 TarInfo object, if it returns None the TarInfo object will be
1956 excluded from the archive.
1963 # Exclude pathnames.
1964 if exclude
is not None:
1966 warnings
.warn("use the filter argument instead",
1967 DeprecationWarning, 2)
1969 self
._dbg
(2, "tarfile: Excluded %r" % name
)
1972 # Skip if somebody tries to archive the archive...
1973 if self
.name
is not None and os
.path
.abspath(name
) == self
.name
:
1974 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1979 # Create a TarInfo object from the file.
1980 tarinfo
= self
.gettarinfo(name
, arcname
)
1983 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1986 # Change or exclude the TarInfo object.
1987 if filter is not None:
1988 tarinfo
= filter(tarinfo
)
1990 self
._dbg
(2, "tarfile: Excluded %r" % name
)
1993 # Append the tar header and data to the archive.
1995 f
= bltn_open(name
, "rb")
1996 self
.addfile(tarinfo
, f
)
1999 elif tarinfo
.isdir():
2000 self
.addfile(tarinfo
)
2002 for f
in os
.listdir(name
):
2003 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
),
2004 recursive
, exclude
, filter)
2007 self
.addfile(tarinfo
)
2009 def addfile(self
, tarinfo
, fileobj
=None):
2010 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2011 given, tarinfo.size bytes are read from it and added to the archive.
2012 You can create TarInfo objects using gettarinfo().
2013 On Windows platforms, `fileobj' should always be opened with mode
2014 'rb' to avoid irritation about the file size.
2018 tarinfo
= copy
.copy(tarinfo
)
2020 buf
= tarinfo
.tobuf(self
.format
, self
.encoding
, self
.errors
)
2021 self
.fileobj
.write(buf
)
2022 self
.offset
+= len(buf
)
2024 # If there's data to follow, append it.
2025 if fileobj
is not None:
2026 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
2027 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
2029 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
2031 self
.offset
+= blocks
* BLOCKSIZE
2033 self
.members
.append(tarinfo
)
2035 def extractall(self
, path
=".", members
=None):
2036 """Extract all members from the archive to the current working
2037 directory and set owner, modification time and permissions on
2038 directories afterwards. `path' specifies a different directory
2039 to extract to. `members' is optional and must be a subset of the
2040 list returned by getmembers().
2047 for tarinfo
in members
:
2049 # Extract directories with a safe mode.
2050 directories
.append(tarinfo
)
2051 tarinfo
= copy
.copy(tarinfo
)
2053 self
.extract(tarinfo
, path
)
2055 # Reverse sort directories.
2056 directories
.sort(key
=operator
.attrgetter('name'))
2057 directories
.reverse()
2059 # Set correct owner, mtime and filemode on directories.
2060 for tarinfo
in directories
:
2061 dirpath
= os
.path
.join(path
, tarinfo
.name
)
2063 self
.chown(tarinfo
, dirpath
)
2064 self
.utime(tarinfo
, dirpath
)
2065 self
.chmod(tarinfo
, dirpath
)
2066 except ExtractError
, e
:
2067 if self
.errorlevel
> 1:
2070 self
._dbg
(1, "tarfile: %s" % e
)
2072 def extract(self
, member
, path
=""):
2073 """Extract a member from the archive to the current working directory,
2074 using its full name. Its file information is extracted as accurately
2075 as possible. `member' may be a filename or a TarInfo object. You can
2076 specify a different directory using `path'.
2080 if isinstance(member
, basestring
):
2081 tarinfo
= self
.getmember(member
)
2085 # Prepare the link target for makelink().
2087 tarinfo
._link
_target
= os
.path
.join(path
, tarinfo
.linkname
)
2090 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
2091 except EnvironmentError, e
:
2092 if self
.errorlevel
> 0:
2095 if e
.filename
is None:
2096 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
2098 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
2099 except ExtractError
, e
:
2100 if self
.errorlevel
> 1:
2103 self
._dbg
(1, "tarfile: %s" % e
)
2105 def extractfile(self
, member
):
2106 """Extract a member from the archive as a file object. `member' may be
2107 a filename or a TarInfo object. If `member' is a regular file, a
2108 file-like object is returned. If `member' is a link, a file-like
2109 object is constructed from the link's target. If `member' is none of
2110 the above, None is returned.
2111 The file-like object is read-only and provides the following
2112 methods: read(), readline(), readlines(), seek() and tell()
2116 if isinstance(member
, basestring
):
2117 tarinfo
= self
.getmember(member
)
2122 return self
.fileobject(self
, tarinfo
)
2124 elif tarinfo
.type not in SUPPORTED_TYPES
:
2125 # If a member's type is unknown, it is treated as a
2127 return self
.fileobject(self
, tarinfo
)
2129 elif tarinfo
.islnk() or tarinfo
.issym():
2130 if isinstance(self
.fileobj
, _Stream
):
2131 # A small but ugly workaround for the case that someone tries
2132 # to extract a (sym)link as a file-object from a non-seekable
2133 # stream of tar blocks.
2134 raise StreamError("cannot extract (sym)link as file object")
2136 # A (sym)link's file object is its target's file object.
2137 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
2140 # If there's no data associated with the member (directory, chrdev,
2141 # blkdev, etc.), return None instead of a file object.
2144 def _extract_member(self
, tarinfo
, targetpath
):
2145 """Extract the TarInfo object tarinfo to a physical
2146 file called targetpath.
2148 # Fetch the TarInfo object for the given name
2149 # and build the destination pathname, replacing
2150 # forward slashes to platform specific separators.
2151 targetpath
= targetpath
.rstrip("/")
2152 targetpath
= targetpath
.replace("/", os
.sep
)
2154 # Create all upper directories.
2155 upperdirs
= os
.path
.dirname(targetpath
)
2156 if upperdirs
and not os
.path
.exists(upperdirs
):
2157 # Create directories that are not part of the archive with
2158 # default permissions.
2159 os
.makedirs(upperdirs
)
2161 if tarinfo
.islnk() or tarinfo
.issym():
2162 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
2164 self
._dbg
(1, tarinfo
.name
)
2167 self
.makefile(tarinfo
, targetpath
)
2168 elif tarinfo
.isdir():
2169 self
.makedir(tarinfo
, targetpath
)
2170 elif tarinfo
.isfifo():
2171 self
.makefifo(tarinfo
, targetpath
)
2172 elif tarinfo
.ischr() or tarinfo
.isblk():
2173 self
.makedev(tarinfo
, targetpath
)
2174 elif tarinfo
.islnk() or tarinfo
.issym():
2175 self
.makelink(tarinfo
, targetpath
)
2176 elif tarinfo
.type not in SUPPORTED_TYPES
:
2177 self
.makeunknown(tarinfo
, targetpath
)
2179 self
.makefile(tarinfo
, targetpath
)
2181 self
.chown(tarinfo
, targetpath
)
2182 if not tarinfo
.issym():
2183 self
.chmod(tarinfo
, targetpath
)
2184 self
.utime(tarinfo
, targetpath
)
2186 #--------------------------------------------------------------------------
2187 # Below are the different file methods. They are called via
2188 # _extract_member() when extract() is called. They can be replaced in a
2189 # subclass to implement other functionality.
2191 def makedir(self
, tarinfo
, targetpath
):
2192 """Make a directory called targetpath.
2195 # Use a safe mode for the directory, the real mode is set
2196 # later in _extract_member().
2197 os
.mkdir(targetpath
, 0700)
2198 except EnvironmentError, e
:
2199 if e
.errno
!= errno
.EEXIST
:
2202 def makefile(self
, tarinfo
, targetpath
):
2203 """Make a file called targetpath.
2205 source
= self
.extractfile(tarinfo
)
2206 target
= bltn_open(targetpath
, "wb")
2207 copyfileobj(source
, target
)
2211 def makeunknown(self
, tarinfo
, targetpath
):
2212 """Make a file from a TarInfo object with an unknown type
2215 self
.makefile(tarinfo
, targetpath
)
2216 self
._dbg
(1, "tarfile: Unknown file type %r, " \
2217 "extracted as regular file." % tarinfo
.type)
2219 def makefifo(self
, tarinfo
, targetpath
):
2220 """Make a fifo called targetpath.
2222 if hasattr(os
, "mkfifo"):
2223 os
.mkfifo(targetpath
)
2225 raise ExtractError("fifo not supported by system")
2227 def makedev(self
, tarinfo
, targetpath
):
2228 """Make a character or block device called targetpath.
2230 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
2231 raise ExtractError("special devices not supported by system")
2235 mode |
= stat
.S_IFBLK
2237 mode |
= stat
.S_IFCHR
2239 os
.mknod(targetpath
, mode
,
2240 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
2242 def makelink(self
, tarinfo
, targetpath
):
2243 """Make a (symbolic) link called targetpath. If it cannot be created
2244 (platform limitation), we try to make a copy of the referenced file
2249 os
.symlink(tarinfo
.linkname
, targetpath
)
2252 os
.link(tarinfo
._link
_target
, targetpath
)
2253 except AttributeError:
2255 linkpath
= os
.path
.dirname(tarinfo
.name
) + "/" + \
2258 linkpath
= tarinfo
.linkname
2261 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
2262 except (EnvironmentError, KeyError), e
:
2263 linkpath
= linkpath
.replace("/", os
.sep
)
2265 shutil
.copy2(linkpath
, targetpath
)
2266 except EnvironmentError, e
:
2267 raise IOError("link could not be created")
2269 def chown(self
, tarinfo
, targetpath
):
2270 """Set owner of targetpath according to tarinfo.
2272 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
2273 # We have to be root to do so.
2275 g
= grp
.getgrnam(tarinfo
.gname
)[2]
2278 g
= grp
.getgrgid(tarinfo
.gid
)[2]
2282 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
2285 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
2289 if tarinfo
.issym() and hasattr(os
, "lchown"):
2290 os
.lchown(targetpath
, u
, g
)
2292 if sys
.platform
!= "os2emx":
2293 os
.chown(targetpath
, u
, g
)
2294 except EnvironmentError, e
:
2295 raise ExtractError("could not change owner")
2297 def chmod(self
, tarinfo
, targetpath
):
2298 """Set file permissions of targetpath according to tarinfo.
2300 if hasattr(os
, 'chmod'):
2302 os
.chmod(targetpath
, tarinfo
.mode
)
2303 except EnvironmentError, e
:
2304 raise ExtractError("could not change mode")
2306 def utime(self
, tarinfo
, targetpath
):
2307 """Set modification time of targetpath according to tarinfo.
2309 if not hasattr(os
, 'utime'):
2312 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
2313 except EnvironmentError, e
:
2314 raise ExtractError("could not change modification time")
2316 #--------------------------------------------------------------------------
2318 """Return the next member of the archive as a TarInfo object, when
2319 TarFile is opened for reading. Return None if there is no more
2323 if self
.firstmember
is not None:
2324 m
= self
.firstmember
2325 self
.firstmember
= None
2328 # Read the next block.
2329 self
.fileobj
.seek(self
.offset
)
2333 tarinfo
= self
.tarinfo
.fromtarfile(self
)
2334 except EOFHeaderError
, e
:
2335 if self
.ignore_zeros
:
2336 self
._dbg
(2, "0x%X: %s" % (self
.offset
, e
))
2337 self
.offset
+= BLOCKSIZE
2339 except InvalidHeaderError
, e
:
2340 if self
.ignore_zeros
:
2341 self
._dbg
(2, "0x%X: %s" % (self
.offset
, e
))
2342 self
.offset
+= BLOCKSIZE
2344 elif self
.offset
== 0:
2345 raise ReadError(str(e
))
2346 except EmptyHeaderError
:
2347 if self
.offset
== 0:
2348 raise ReadError("empty file")
2349 except TruncatedHeaderError
, e
:
2350 if self
.offset
== 0:
2351 raise ReadError(str(e
))
2352 except SubsequentHeaderError
, e
:
2353 raise ReadError(str(e
))
2356 if tarinfo
is not None:
2357 self
.members
.append(tarinfo
)
2363 #--------------------------------------------------------------------------
2364 # Little helper methods:
2366 def _getmember(self
, name
, tarinfo
=None):
2367 """Find an archive member by name from bottom to top.
2368 If tarinfo is given, it is used as the starting point.
2370 # Ensure that all members have been loaded.
2371 members
= self
.getmembers()
2376 end
= members
.index(tarinfo
)
2378 for i
in xrange(end
- 1, -1, -1):
2379 if name
== members
[i
].name
:
2383 """Read through the entire archive file and look for readable
2387 tarinfo
= self
.next()
2392 def _check(self
, mode
=None):
2393 """Check if TarFile is still open, and if the operation's mode
2394 corresponds to TarFile's mode.
2397 raise IOError("%s is closed" % self
.__class
__.__name
__)
2398 if mode
is not None and self
.mode
not in mode
:
2399 raise IOError("bad operation for mode %r" % self
.mode
)
2402 """Provide an iterator object.
2405 return iter(self
.members
)
2407 return TarIter(self
)
2409 def _dbg(self
, level
, msg
):
2410 """Write debugging output to sys.stderr.
2412 if level
<= self
.debug
:
2413 print >> sys
.stderr
, msg
2419 for tarinfo in TarFile(...):
2423 def __init__(self
, tarfile
):
2424 """Construct a TarIter object.
2426 self
.tarfile
= tarfile
2429 """Return iterator object.
2433 """Return the next item using TarFile's next() method.
2434 When all members have been read, set TarFile as _loaded.
2436 # Fix for SF #1100429: Under rare circumstances it can
2437 # happen that getmembers() is called during iteration,
2438 # which will cause TarIter to stop prematurely.
2439 if not self
.tarfile
._loaded
:
2440 tarinfo
= self
.tarfile
.next()
2442 self
.tarfile
._loaded
= True
2446 tarinfo
= self
.tarfile
.members
[self
.index
]
2452 # Helper classes for sparse file support
2454 """Base class for _data and _hole.
2456 def __init__(self
, offset
, size
):
2457 self
.offset
= offset
2459 def __contains__(self
, offset
):
2460 return self
.offset
<= offset
< self
.offset
+ self
.size
2462 class _data(_section
):
2463 """Represent a data section in a sparse file.
2465 def __init__(self
, offset
, size
, realpos
):
2466 _section
.__init
__(self
, offset
, size
)
2467 self
.realpos
= realpos
2469 class _hole(_section
):
2470 """Represent a hole section in a sparse file.
2474 class _ringbuffer(list):
2475 """Ringbuffer class which increases performance
2476 over a regular list.
2480 def find(self
, offset
):
2487 if idx
== len(self
):
2495 #---------------------------------------------
2496 # zipfile compatible TarFile class
2497 #---------------------------------------------
2498 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
2499 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
2500 class TarFileCompat
:
2501 """TarFile class compatible with standard module zipfile's
2504 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
2505 from warnings
import warnpy3k
2506 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2508 if compression
== TAR_PLAIN
:
2509 self
.tarfile
= TarFile
.taropen(file, mode
)
2510 elif compression
== TAR_GZIPPED
:
2511 self
.tarfile
= TarFile
.gzopen(file, mode
)
2513 raise ValueError("unknown compression constant")
2514 if mode
[0:1] == "r":
2515 members
= self
.tarfile
.getmembers()
2518 m
.file_size
= m
.size
2519 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
2521 return map(lambda m
: m
.name
, self
.infolist())
2523 return filter(lambda m
: m
.type in REGULAR_TYPES
,
2524 self
.tarfile
.getmembers())
2529 def getinfo(self
, name
):
2530 return self
.tarfile
.getmember(name
)
2531 def read(self
, name
):
2532 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
2533 def write(self
, filename
, arcname
=None, compress_type
=None):
2534 self
.tarfile
.add(filename
, arcname
)
2535 def writestr(self
, zinfo
, bytes
):
2537 from cStringIO
import StringIO
2539 from StringIO
import StringIO
2541 tinfo
= TarInfo(zinfo
.filename
)
2542 tinfo
.size
= len(bytes
)
2543 tinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
2544 self
.tarfile
.addfile(tinfo
, StringIO(bytes
))
2546 self
.tarfile
.close()
2547 #class TarFileCompat
2549 #--------------------
2550 # exported functions
2551 #--------------------
2552 def is_tarfile(name
):
2553 """Return True if name points to a tar archive that we
2554 are able to handle, else return False.