2 # -*- coding: iso-8859-1 -*-
3 #-------------------------------------------------------------------
5 #-------------------------------------------------------------------
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
9 # Permission is hereby granted, free of charge, to any person
10 # obtaining a copy of this software and associated documentation
11 # files (the "Software"), to deal in the Software without
12 # restriction, including without limitation the rights to use,
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
14 # copies of the Software, and to permit persons to whom the
15 # Software is furnished to do so, subject to the following
18 # The above copyright notice and this permission notice shall be
19 # included in all copies or substantial portions of the Software.
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 # OTHER DEALINGS IN THE SOFTWARE.
30 """Read from and write to tar format archives.
33 __version__
= "$Revision$"
37 __author__
= "Lars Gustäbel (lars@gustaebel.de)"
40 __credits__
= "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
56 if sys
.platform
== 'mac':
57 # This module needs work for MacOS9, especially in the area of pathname
58 # handling. In many places it is assumed a simple substitution of / by the
59 # local os.path.sep is good enough to convert pathnames, but this does not
60 # work with the mac rooted:path:name versus :nonrooted:path:name syntax
61 raise ImportError, "tarfile does not work for platform==mac"
68 # from tarfile import *
69 __all__
= ["TarFile", "TarInfo", "is_tarfile", "TarError"]
71 #---------------------------------------------------------
73 #---------------------------------------------------------
74 NUL
= "\0" # the null character
75 BLOCKSIZE
= 512 # length of processing blocks
76 RECORDSIZE
= BLOCKSIZE
* 20 # length of records
77 GNU_MAGIC
= "ustar \0" # magic gnu tar string
78 POSIX_MAGIC
= "ustar\x0000" # magic posix tar string
80 LENGTH_NAME
= 100 # maximum length of a filename
81 LENGTH_LINK
= 100 # maximum length of a linkname
82 LENGTH_PREFIX
= 155 # maximum length of the prefix field
84 REGTYPE
= "0" # regular file
85 AREGTYPE
= "\0" # regular file
86 LNKTYPE
= "1" # link (inside tarfile)
87 SYMTYPE
= "2" # symbolic link
88 CHRTYPE
= "3" # character special device
89 BLKTYPE
= "4" # block special device
90 DIRTYPE
= "5" # directory
91 FIFOTYPE
= "6" # fifo special device
92 CONTTYPE
= "7" # contiguous file
94 GNUTYPE_LONGNAME
= "L" # GNU tar longname
95 GNUTYPE_LONGLINK
= "K" # GNU tar longlink
96 GNUTYPE_SPARSE
= "S" # GNU tar sparse file
98 XHDTYPE
= "x" # POSIX.1-2001 extended header
99 XGLTYPE
= "g" # POSIX.1-2001 global header
100 SOLARIS_XHDTYPE
= "X" # Solaris extended header
102 USTAR_FORMAT
= 0 # POSIX.1-1988 (ustar) format
103 GNU_FORMAT
= 1 # GNU tar format
104 PAX_FORMAT
= 2 # POSIX.1-2001 (pax) format
105 DEFAULT_FORMAT
= GNU_FORMAT
107 #---------------------------------------------------------
109 #---------------------------------------------------------
110 # File types that tarfile supports:
111 SUPPORTED_TYPES
= (REGTYPE
, AREGTYPE
, LNKTYPE
,
112 SYMTYPE
, DIRTYPE
, FIFOTYPE
,
113 CONTTYPE
, CHRTYPE
, BLKTYPE
,
114 GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
117 # File types that will be treated as a regular file.
118 REGULAR_TYPES
= (REGTYPE
, AREGTYPE
,
119 CONTTYPE
, GNUTYPE_SPARSE
)
121 # File types that are part of the GNU tar format.
122 GNU_TYPES
= (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
,
125 # Fields from a pax header that override a TarInfo attribute.
126 PAX_FIELDS
= ("path", "linkpath", "size", "mtime",
127 "uid", "gid", "uname", "gname")
129 # Fields in a pax header that are numbers, all other fields
130 # are treated as strings.
131 PAX_NUMBER_FIELDS
= {
140 #---------------------------------------------------------
141 # Bits used in the mode field, values in octal.
142 #---------------------------------------------------------
143 S_IFLNK
= 0120000 # symbolic link
144 S_IFREG
= 0100000 # regular file
145 S_IFBLK
= 0060000 # block device
146 S_IFDIR
= 0040000 # directory
147 S_IFCHR
= 0020000 # character device
148 S_IFIFO
= 0010000 # fifo
150 TSUID
= 04000 # set UID on execution
151 TSGID
= 02000 # set GID on execution
152 TSVTX
= 01000 # reserved
154 TUREAD
= 0400 # read by owner
155 TUWRITE
= 0200 # write by owner
156 TUEXEC
= 0100 # execute/search by owner
157 TGREAD
= 0040 # read by group
158 TGWRITE
= 0020 # write by group
159 TGEXEC
= 0010 # execute/search by group
160 TOREAD
= 0004 # read by other
161 TOWRITE
= 0002 # write by other
162 TOEXEC
= 0001 # execute/search by other
164 #---------------------------------------------------------
166 #---------------------------------------------------------
167 ENCODING
= sys
.getfilesystemencoding()
169 ENCODING
= sys
.getdefaultencoding()
171 #---------------------------------------------------------
172 # Some useful functions
173 #---------------------------------------------------------
176 """Convert a python string to a null-terminated string buffer.
178 return s
[:length
] + (length
- len(s
)) * NUL
181 """Convert a null-terminated string field to a python string.
183 # Use the string up to the first null char.
190 """Convert a number field to a python number.
192 # There are two possible encodings for a number field, see
194 if s
[0] != chr(0200):
196 n
= int(nts(s
) or "0", 8)
198 raise HeaderError("invalid header")
201 for i
in xrange(len(s
) - 1):
206 def itn(n
, digits
=8, format
=DEFAULT_FORMAT
):
207 """Convert a python number to a number field.
209 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
210 # octal digits followed by a null-byte, this allows values up to
211 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
212 # that if necessary. A leading 0200 byte indicates this particular
213 # encoding, the following digits-1 bytes are a big-endian
214 # representation. This allows values up to (256**(digits-1))-1.
215 if 0 <= n
< 8 ** (digits
- 1):
216 s
= "%0*o" % (digits
- 1, n
) + NUL
218 if format
!= GNU_FORMAT
or n
>= 256 ** (digits
- 1):
219 raise ValueError("overflow in number field")
222 # XXX We mimic GNU tar's behaviour with negative numbers,
223 # this could raise OverflowError.
224 n
= struct
.unpack("L", struct
.pack("l", n
))[0]
227 for i
in xrange(digits
- 1):
228 s
= chr(n
& 0377) + s
233 def uts(s
, encoding
, errors
):
234 """Convert a unicode object to a string.
236 if errors
== "utf-8":
237 # An extra error handler similar to the -o invalid=UTF-8 option
238 # in POSIX.1-2001. Replace untranslatable characters with their
239 # UTF-8 representation.
241 return s
.encode(encoding
, "strict")
242 except UnicodeEncodeError:
246 x
.append(c
.encode(encoding
, "strict"))
247 except UnicodeEncodeError:
248 x
.append(c
.encode("utf8"))
251 return s
.encode(encoding
, errors
)
253 def calc_chksums(buf
):
254 """Calculate the checksum for a member's header by summing up all
255 characters except for the chksum field which is treated as if
256 it was filled with spaces. According to the GNU tar sources,
257 some tars (Sun and NeXT) calculate chksum with signed char,
258 which will be different if there are chars in the buffer with
259 the high bit set. So we calculate two checksums, unsigned and
262 unsigned_chksum
= 256 + sum(struct
.unpack("148B", buf
[:148]) + struct
.unpack("356B", buf
[156:512]))
263 signed_chksum
= 256 + sum(struct
.unpack("148b", buf
[:148]) + struct
.unpack("356b", buf
[156:512]))
264 return unsigned_chksum
, signed_chksum
266 def copyfileobj(src
, dst
, length
=None):
267 """Copy length bytes from fileobj src to fileobj dst.
268 If length is None, copy the entire content.
273 shutil
.copyfileobj(src
, dst
)
277 blocks
, remainder
= divmod(length
, BUFSIZE
)
278 for b
in xrange(blocks
):
279 buf
= src
.read(BUFSIZE
)
280 if len(buf
) < BUFSIZE
:
281 raise IOError("end of file reached")
285 buf
= src
.read(remainder
)
286 if len(buf
) < remainder
:
287 raise IOError("end of file reached")
301 ((TUEXEC|TSUID
, "s"),
307 ((TGEXEC|TSGID
, "s"),
313 ((TOEXEC|TSVTX
, "t"),
319 """Convert a file's mode to a string of the form
321 Used by TarFile.list()
324 for table
in filemode_table
:
325 for bit
, char
in table
:
326 if mode
& bit
== bit
:
333 class TarError(Exception):
334 """Base exception."""
336 class ExtractError(TarError
):
337 """General exception for extract errors."""
339 class ReadError(TarError
):
340 """Exception for unreadble tar archives."""
342 class CompressionError(TarError
):
343 """Exception for unavailable compression methods."""
345 class StreamError(TarError
):
346 """Exception for unsupported operations on stream-like TarFiles."""
348 class HeaderError(TarError
):
349 """Exception for invalid headers."""
352 #---------------------------
353 # internal stream interface
354 #---------------------------
356 """Low-level file object. Supports reading and writing.
357 It is used instead of a regular file object for streaming
361 def __init__(self
, name
, mode
):
364 "w": os
.O_WRONLY | os
.O_CREAT | os
.O_TRUNC
,
366 if hasattr(os
, "O_BINARY"):
368 self
.fd
= os
.open(name
, mode
)
373 def read(self
, size
):
374 return os
.read(self
.fd
, size
)
380 """Class that serves as an adapter between TarFile and
381 a stream-like object. The stream-like object only
382 needs to have a read() or write() method and is accessed
383 blockwise. Use of gzip or bzip2 compression is possible.
384 A stream-like object could be for example: sys.stdin,
385 sys.stdout, a socket, a tape device etc.
387 _Stream is intended to be used only internally.
390 def __init__(self
, name
, mode
, comptype
, fileobj
, bufsize
):
391 """Construct a _Stream object.
393 self
._extfileobj
= True
395 fileobj
= _LowLevelFile(name
, mode
)
396 self
._extfileobj
= False
399 # Enable transparent compression detection for the
401 fileobj
= _StreamProxy(fileobj
)
402 comptype
= fileobj
.getcomptype()
404 self
.name
= name
or ""
406 self
.comptype
= comptype
407 self
.fileobj
= fileobj
408 self
.bufsize
= bufsize
417 raise CompressionError("zlib module is not available")
419 self
.crc
= zlib
.crc32("") & 0xffffffffL
423 self
._init
_write
_gz
()
425 if comptype
== "bz2":
429 raise CompressionError("bz2 module is not available")
432 self
.cmp = bz2
.BZ2Decompressor()
434 self
.cmp = bz2
.BZ2Compressor()
437 if hasattr(self
, "closed") and not self
.closed
:
440 def _init_write_gz(self
):
441 """Initialize for writing with gzip compression.
443 self
.cmp = self
.zlib
.compressobj(9, self
.zlib
.DEFLATED
,
444 -self
.zlib
.MAX_WBITS
,
445 self
.zlib
.DEF_MEM_LEVEL
,
447 timestamp
= struct
.pack("<L", long(time
.time()))
448 self
.__write
("\037\213\010\010%s\002\377" % timestamp
)
449 if self
.name
.endswith(".gz"):
450 self
.name
= self
.name
[:-3]
451 self
.__write
(self
.name
+ NUL
)
454 """Write string s to the stream.
456 if self
.comptype
== "gz":
457 self
.crc
= self
.zlib
.crc32(s
, self
.crc
) & 0xffffffffL
459 if self
.comptype
!= "tar":
460 s
= self
.cmp.compress(s
)
463 def __write(self
, s
):
464 """Write string s to the stream if a whole new block
465 is ready to be written.
468 while len(self
.buf
) > self
.bufsize
:
469 self
.fileobj
.write(self
.buf
[:self
.bufsize
])
470 self
.buf
= self
.buf
[self
.bufsize
:]
473 """Close the _Stream object. No operation should be
474 done on it afterwards.
479 if self
.mode
== "w" and self
.comptype
!= "tar":
480 self
.buf
+= self
.cmp.flush()
482 if self
.mode
== "w" and self
.buf
:
483 self
.fileobj
.write(self
.buf
)
485 if self
.comptype
== "gz":
486 # The native zlib crc is an unsigned 32-bit integer, but
487 # the Python wrapper implicitly casts that to a signed C
488 # long. So, on a 32-bit box self.crc may "look negative",
489 # while the same crc on a 64-bit box may "look positive".
490 # To avoid irksome warnings from the `struct` module, force
491 # it to look positive on all boxes.
492 self
.fileobj
.write(struct
.pack("<L", self
.crc
& 0xffffffffL
))
493 self
.fileobj
.write(struct
.pack("<L", self
.pos
& 0xffffFFFFL
))
495 if not self
._extfileobj
:
500 def _init_read_gz(self
):
501 """Initialize for reading a gzip compressed fileobj.
503 self
.cmp = self
.zlib
.decompressobj(-self
.zlib
.MAX_WBITS
)
506 # taken from gzip.GzipFile with some alterations
507 if self
.__read
(2) != "\037\213":
508 raise ReadError("not a gzip file")
509 if self
.__read
(1) != "\010":
510 raise CompressionError("unsupported compression method")
512 flag
= ord(self
.__read
(1))
516 xlen
= ord(self
.__read
(1)) + 256 * ord(self
.__read
(1))
521 if not s
or s
== NUL
:
526 if not s
or s
== NUL
:
532 """Return the stream's file pointer position.
536 def seek(self
, pos
=0):
537 """Set the stream's file pointer to pos. Negative seeking
540 if pos
- self
.pos
>= 0:
541 blocks
, remainder
= divmod(pos
- self
.pos
, self
.bufsize
)
542 for i
in xrange(blocks
):
543 self
.read(self
.bufsize
)
546 raise StreamError("seeking backwards is not allowed")
549 def read(self
, size
=None):
550 """Return the next size number of bytes from the stream.
551 If size is not defined, return all bytes of the stream
557 buf
= self
._read
(self
.bufsize
)
563 buf
= self
._read
(size
)
567 def _read(self
, size
):
568 """Return size bytes from the stream.
570 if self
.comptype
== "tar":
571 return self
.__read
(size
)
576 buf
= self
.__read
(self
.bufsize
)
580 buf
= self
.cmp.decompress(buf
)
582 raise ReadError("invalid compressed data")
589 def __read(self
, size
):
590 """Return size bytes from stream. If internal buffer is empty,
591 read another block from the stream.
596 buf
= self
.fileobj
.read(self
.bufsize
)
606 class _StreamProxy(object):
607 """Small proxy class that enables transparent compression
608 detection for the Stream interface (mode 'r|*').
611 def __init__(self
, fileobj
):
612 self
.fileobj
= fileobj
613 self
.buf
= self
.fileobj
.read(BLOCKSIZE
)
615 def read(self
, size
):
616 self
.read
= self
.fileobj
.read
619 def getcomptype(self
):
620 if self
.buf
.startswith("\037\213\010"):
622 if self
.buf
.startswith("BZh91"):
630 class _BZ2Proxy(object):
631 """Small proxy class that enables external file object
632 support for "r:bz2" and "w:bz2" modes. This is actually
633 a workaround for a limitation in bz2 module's BZ2File
634 class which (unlike gzip.GzipFile) has no support for
635 a file object argument.
638 blocksize
= 16 * 1024
640 def __init__(self
, fileobj
, mode
):
641 self
.fileobj
= fileobj
643 self
.name
= getattr(self
.fileobj
, "name", None)
650 self
.bz2obj
= bz2
.BZ2Decompressor()
654 self
.bz2obj
= bz2
.BZ2Compressor()
656 def read(self
, size
):
660 raw
= self
.fileobj
.read(self
.blocksize
)
663 data
= self
.bz2obj
.decompress(raw
)
666 self
.buf
= "".join(b
)
668 buf
= self
.buf
[:size
]
669 self
.buf
= self
.buf
[size
:]
676 self
.read(pos
- self
.pos
)
681 def write(self
, data
):
682 self
.pos
+= len(data
)
683 raw
= self
.bz2obj
.compress(data
)
684 self
.fileobj
.write(raw
)
688 raw
= self
.bz2obj
.flush()
689 self
.fileobj
.write(raw
)
692 #------------------------
693 # Extraction file object
694 #------------------------
695 class _FileInFile(object):
696 """A thin wrapper around an existing file object that
697 provides a part of its data as an individual file
701 def __init__(self
, fileobj
, offset
, size
, sparse
=None):
702 self
.fileobj
= fileobj
709 """Return the current file position.
713 def seek(self
, position
):
714 """Seek to a position in the file.
716 self
.position
= position
718 def read(self
, size
=None):
719 """Read data from the file.
722 size
= self
.size
- self
.position
724 size
= min(size
, self
.size
- self
.position
)
726 if self
.sparse
is None:
727 return self
.readnormal(size
)
729 return self
.readsparse(size
)
731 def readnormal(self
, size
):
732 """Read operation for regular files.
734 self
.fileobj
.seek(self
.offset
+ self
.position
)
735 self
.position
+= size
736 return self
.fileobj
.read(size
)
738 def readsparse(self
, size
):
739 """Read operation for sparse files.
743 buf
= self
.readsparsesection(size
)
750 def readsparsesection(self
, size
):
751 """Read a single section of a sparse file.
753 section
= self
.sparse
.find(self
.position
)
758 size
= min(size
, section
.offset
+ section
.size
- self
.position
)
760 if isinstance(section
, _data
):
761 realpos
= section
.realpos
+ self
.position
- section
.offset
762 self
.fileobj
.seek(self
.offset
+ realpos
)
763 self
.position
+= size
764 return self
.fileobj
.read(size
)
766 self
.position
+= size
771 class ExFileObject(object):
772 """File-like object for reading an archive member.
773 Is returned by TarFile.extractfile().
777 def __init__(self
, tarfile
, tarinfo
):
778 self
.fileobj
= _FileInFile(tarfile
.fileobj
,
781 getattr(tarinfo
, "sparse", None))
782 self
.name
= tarinfo
.name
785 self
.size
= tarinfo
.size
790 def read(self
, size
=None):
791 """Read at most size bytes from the file. If size is not
792 present or None, read all data until EOF is reached.
795 raise ValueError("I/O operation on closed file")
803 buf
= self
.buffer[:size
]
804 self
.buffer = self
.buffer[size
:]
807 buf
+= self
.fileobj
.read()
809 buf
+= self
.fileobj
.read(size
- len(buf
))
811 self
.position
+= len(buf
)
814 def readline(self
, size
=-1):
815 """Read one entire line from the file. If size is present
816 and non-negative, return a string with at most that
817 size, which may be an incomplete line.
820 raise ValueError("I/O operation on closed file")
822 if "\n" in self
.buffer:
823 pos
= self
.buffer.find("\n") + 1
825 buffers
= [self
.buffer]
827 buf
= self
.fileobj
.read(self
.blocksize
)
829 if not buf
or "\n" in buf
:
830 self
.buffer = "".join(buffers
)
831 pos
= self
.buffer.find("\n") + 1
834 pos
= len(self
.buffer)
840 buf
= self
.buffer[:pos
]
841 self
.buffer = self
.buffer[pos
:]
842 self
.position
+= len(buf
)
846 """Return a list with all remaining lines.
850 line
= self
.readline()
856 """Return the current file position.
859 raise ValueError("I/O operation on closed file")
863 def seek(self
, pos
, whence
=os
.SEEK_SET
):
864 """Seek to a position in the file.
867 raise ValueError("I/O operation on closed file")
869 if whence
== os
.SEEK_SET
:
870 self
.position
= min(max(pos
, 0), self
.size
)
871 elif whence
== os
.SEEK_CUR
:
873 self
.position
= max(self
.position
+ pos
, 0)
875 self
.position
= min(self
.position
+ pos
, self
.size
)
876 elif whence
== os
.SEEK_END
:
877 self
.position
= max(min(self
.size
+ pos
, self
.size
), 0)
879 raise ValueError("Invalid argument")
882 self
.fileobj
.seek(self
.position
)
885 """Close the file object.
890 """Get an iterator over the file's lines.
893 line
= self
.readline()
902 class TarInfo(object):
903 """Informational class which holds the details about an
904 archive member given by a tar header block.
905 TarInfo objects are returned by TarFile.getmember(),
906 TarFile.getmembers() and TarFile.gettarinfo() and are
907 usually created internally.
910 def __init__(self
, name
=""):
911 """Construct a TarInfo object. name is the optional name
914 self
.name
= name
# member name
915 self
.mode
= 0644 # file permissions
916 self
.uid
= 0 # user id
917 self
.gid
= 0 # group id
918 self
.size
= 0 # file size
919 self
.mtime
= 0 # modification time
920 self
.chksum
= 0 # header checksum
921 self
.type = REGTYPE
# member type
922 self
.linkname
= "" # link name
923 self
.uname
= "root" # user name
924 self
.gname
= "root" # group name
925 self
.devmajor
= 0 # device major number
926 self
.devminor
= 0 # device minor number
928 self
.offset
= 0 # the tar header starts here
929 self
.offset_data
= 0 # the file's data starts here
931 self
.pax_headers
= {} # pax header information
933 # In pax headers the "name" and "linkname" field are called
934 # "path" and "linkpath".
937 def _setpath(self
, name
):
939 path
= property(_getpath
, _setpath
)
941 def _getlinkpath(self
):
943 def _setlinkpath(self
, linkname
):
944 self
.linkname
= linkname
945 linkpath
= property(_getlinkpath
, _setlinkpath
)
948 return "<%s %r at %#x>" % (self
.__class
__.__name
__,self
.name
,id(self
))
950 def get_info(self
, encoding
, errors
):
951 """Return the TarInfo's attributes as a dictionary.
955 "mode": self
.mode
& 07777,
960 "chksum": self
.chksum
,
962 "linkname": self
.linkname
,
965 "devmajor": self
.devmajor
,
966 "devminor": self
.devminor
969 if info
["type"] == DIRTYPE
and not info
["name"].endswith("/"):
972 for key
in ("name", "linkname", "uname", "gname"):
973 if type(info
[key
]) is unicode:
974 info
[key
] = info
[key
].encode(encoding
, errors
)
978 def tobuf(self
, format
=DEFAULT_FORMAT
, encoding
=ENCODING
, errors
="strict"):
979 """Return a tar header as a string of 512 byte blocks.
981 info
= self
.get_info(encoding
, errors
)
983 if format
== USTAR_FORMAT
:
984 return self
.create_ustar_header(info
)
985 elif format
== GNU_FORMAT
:
986 return self
.create_gnu_header(info
)
987 elif format
== PAX_FORMAT
:
988 return self
.create_pax_header(info
, encoding
, errors
)
990 raise ValueError("invalid format")
992 def create_ustar_header(self
, info
):
993 """Return the object as a ustar header block.
995 info
["magic"] = POSIX_MAGIC
997 if len(info
["linkname"]) > LENGTH_LINK
:
998 raise ValueError("linkname is too long")
1000 if len(info
["name"]) > LENGTH_NAME
:
1001 info
["prefix"], info
["name"] = self
._posix
_split
_name
(info
["name"])
1003 return self
._create
_header
(info
, USTAR_FORMAT
)
1005 def create_gnu_header(self
, info
):
1006 """Return the object as a GNU header block sequence.
1008 info
["magic"] = GNU_MAGIC
1011 if len(info
["linkname"]) > LENGTH_LINK
:
1012 buf
+= self
._create
_gnu
_long
_header
(info
["linkname"], GNUTYPE_LONGLINK
)
1014 if len(info
["name"]) > LENGTH_NAME
:
1015 buf
+= self
._create
_gnu
_long
_header
(info
["name"], GNUTYPE_LONGNAME
)
1017 return buf
+ self
._create
_header
(info
, GNU_FORMAT
)
1019 def create_pax_header(self
, info
, encoding
, errors
):
1020 """Return the object as a ustar header block. If it cannot be
1021 represented this way, prepend a pax extended header sequence
1022 with supplement information.
1024 info
["magic"] = POSIX_MAGIC
1025 pax_headers
= self
.pax_headers
.copy()
1027 # Test string fields for values that exceed the field length or cannot
1028 # be represented in ASCII encoding.
1029 for name
, hname
, length
in (
1030 ("name", "path", LENGTH_NAME
), ("linkname", "linkpath", LENGTH_LINK
),
1031 ("uname", "uname", 32), ("gname", "gname", 32)):
1033 if hname
in pax_headers
:
1034 # The pax header has priority.
1037 val
= info
[name
].decode(encoding
, errors
)
1039 # Try to encode the string as ASCII.
1042 except UnicodeEncodeError:
1043 pax_headers
[hname
] = val
1046 if len(info
[name
]) > length
:
1047 pax_headers
[hname
] = val
1049 # Test number fields for values that exceed the field limit or values
1050 # that like to be stored as float.
1051 for name
, digits
in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1052 if name
in pax_headers
:
1053 # The pax header has priority. Avoid overflow.
1058 if not 0 <= val
< 8 ** (digits
- 1) or isinstance(val
, float):
1059 pax_headers
[name
] = unicode(val
)
1062 # Create a pax extended header if necessary.
1064 buf
= self
._create
_pax
_generic
_header
(pax_headers
)
1068 return buf
+ self
._create
_header
(info
, USTAR_FORMAT
)
1071 def create_pax_global_header(cls
, pax_headers
):
1072 """Return the object as a pax global header block sequence.
1074 return cls
._create
_pax
_generic
_header
(pax_headers
, type=XGLTYPE
)
1076 def _posix_split_name(self
, name
):
1077 """Split a name longer than 100 chars into a prefix
1080 prefix
= name
[:LENGTH_PREFIX
+ 1]
1081 while prefix
and prefix
[-1] != "/":
1082 prefix
= prefix
[:-1]
1084 name
= name
[len(prefix
):]
1085 prefix
= prefix
[:-1]
1087 if not prefix
or len(name
) > LENGTH_NAME
:
1088 raise ValueError("name is too long")
1092 def _create_header(info
, format
):
1093 """Return a header block. info is a dictionary with file
1094 information, format must be one of the *_FORMAT constants.
1097 stn(info
.get("name", ""), 100),
1098 itn(info
.get("mode", 0) & 07777, 8, format
),
1099 itn(info
.get("uid", 0), 8, format
),
1100 itn(info
.get("gid", 0), 8, format
),
1101 itn(info
.get("size", 0), 12, format
),
1102 itn(info
.get("mtime", 0), 12, format
),
1103 " ", # checksum field
1104 info
.get("type", REGTYPE
),
1105 stn(info
.get("linkname", ""), 100),
1106 stn(info
.get("magic", POSIX_MAGIC
), 8),
1107 stn(info
.get("uname", "root"), 32),
1108 stn(info
.get("gname", "root"), 32),
1109 itn(info
.get("devmajor", 0), 8, format
),
1110 itn(info
.get("devminor", 0), 8, format
),
1111 stn(info
.get("prefix", ""), 155)
1114 buf
= struct
.pack("%ds" % BLOCKSIZE
, "".join(parts
))
1115 chksum
= calc_chksums(buf
[-BLOCKSIZE
:])[0]
1116 buf
= buf
[:-364] + "%06o\0" % chksum
+ buf
[-357:]
1120 def _create_payload(payload
):
1121 """Return the string payload filled with zero bytes
1122 up to the next 512 byte border.
1124 blocks
, remainder
= divmod(len(payload
), BLOCKSIZE
)
1126 payload
+= (BLOCKSIZE
- remainder
) * NUL
1130 def _create_gnu_long_header(cls
, name
, type):
1131 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1137 info
["name"] = "././@LongLink"
1139 info
["size"] = len(name
)
1140 info
["magic"] = GNU_MAGIC
1142 # create extended header + name blocks.
1143 return cls
._create
_header
(info
, USTAR_FORMAT
) + \
1144 cls
._create
_payload
(name
)
1147 def _create_pax_generic_header(cls
, pax_headers
, type=XHDTYPE
):
1148 """Return a POSIX.1-2001 extended or global header sequence
1149 that contains a list of keyword, value pairs. The values
1150 must be unicode objects.
1153 for keyword
, value
in pax_headers
.iteritems():
1154 keyword
= keyword
.encode("utf8")
1155 value
= value
.encode("utf8")
1156 l
= len(keyword
) + len(value
) + 3 # ' ' + '=' + '\n'
1163 records
.append("%d %s=%s\n" % (p
, keyword
, value
))
1164 records
= "".join(records
)
1166 # We use a hardcoded "././@PaxHeader" name like star does
1167 # instead of the one that POSIX recommends.
1169 info
["name"] = "././@PaxHeader"
1171 info
["size"] = len(records
)
1172 info
["magic"] = POSIX_MAGIC
1174 # Create pax header + record blocks.
1175 return cls
._create
_header
(info
, USTAR_FORMAT
) + \
1176 cls
._create
_payload
(records
)
1179 def frombuf(cls
, buf
):
1180 """Construct a TarInfo object from a 512 byte string buffer.
1182 if len(buf
) != BLOCKSIZE
:
1183 raise HeaderError("truncated header")
1184 if buf
.count(NUL
) == BLOCKSIZE
:
1185 raise HeaderError("empty header")
1187 chksum
= nti(buf
[148:156])
1188 if chksum
not in calc_chksums(buf
):
1189 raise HeaderError("bad checksum")
1193 obj
.name
= nts(buf
[0:100])
1194 obj
.mode
= nti(buf
[100:108])
1195 obj
.uid
= nti(buf
[108:116])
1196 obj
.gid
= nti(buf
[116:124])
1197 obj
.size
= nti(buf
[124:136])
1198 obj
.mtime
= nti(buf
[136:148])
1200 obj
.type = buf
[156:157]
1201 obj
.linkname
= nts(buf
[157:257])
1202 obj
.uname
= nts(buf
[265:297])
1203 obj
.gname
= nts(buf
[297:329])
1204 obj
.devmajor
= nti(buf
[329:337])
1205 obj
.devminor
= nti(buf
[337:345])
1206 prefix
= nts(buf
[345:500])
1208 # Old V7 tar format represents a directory as a regular
1209 # file with a trailing slash.
1210 if obj
.type == AREGTYPE
and obj
.name
.endswith("/"):
1213 # Remove redundant slashes from directories.
1215 obj
.name
= obj
.name
.rstrip("/")
1217 # Reconstruct a ustar longname.
1218 if prefix
and obj
.type not in GNU_TYPES
:
1219 obj
.name
= prefix
+ "/" + obj
.name
1223 def fromtarfile(cls
, tarfile
):
1224 """Return the next TarInfo object from TarFile object
1227 buf
= tarfile
.fileobj
.read(BLOCKSIZE
)
1230 obj
= cls
.frombuf(buf
)
1231 obj
.offset
= tarfile
.fileobj
.tell() - BLOCKSIZE
1232 return obj
._proc
_member
(tarfile
)
1234 #--------------------------------------------------------------------------
1235 # The following are methods that are called depending on the type of a
1236 # member. The entry point is _proc_member() which can be overridden in a
1237 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1238 # implement the following
1240 # 1. Set self.offset_data to the position where the data blocks begin,
1241 # if there is data that follows.
1242 # 2. Set tarfile.offset to the position where the next member's header will
1244 # 3. Return self or another valid TarInfo object.
1245 def _proc_member(self
, tarfile
):
1246 """Choose the right processing method depending on
1247 the type and call it.
1249 if self
.type in (GNUTYPE_LONGNAME
, GNUTYPE_LONGLINK
):
1250 return self
._proc
_gnulong
(tarfile
)
1251 elif self
.type == GNUTYPE_SPARSE
:
1252 return self
._proc
_sparse
(tarfile
)
1253 elif self
.type in (XHDTYPE
, XGLTYPE
, SOLARIS_XHDTYPE
):
1254 return self
._proc
_pax
(tarfile
)
1256 return self
._proc
_builtin
(tarfile
)
1258 def _proc_builtin(self
, tarfile
):
1259 """Process a builtin type or an unknown type which
1260 will be treated as a regular file.
1262 self
.offset_data
= tarfile
.fileobj
.tell()
1263 offset
= self
.offset_data
1264 if self
.isreg() or self
.type not in SUPPORTED_TYPES
:
1265 # Skip the following data blocks.
1266 offset
+= self
._block
(self
.size
)
1267 tarfile
.offset
= offset
1269 # Patch the TarInfo object with saved global
1270 # header information.
1271 self
._apply
_pax
_info
(tarfile
.pax_headers
, tarfile
.encoding
, tarfile
.errors
)
1275 def _proc_gnulong(self
, tarfile
):
1276 """Process the blocks that hold a GNU longname
1279 buf
= tarfile
.fileobj
.read(self
._block
(self
.size
))
1281 # Fetch the next header and process it.
1282 next
= self
.fromtarfile(tarfile
)
1284 raise HeaderError("missing subsequent header")
1286 # Patch the TarInfo object from the next header with
1287 # the longname information.
1288 next
.offset
= self
.offset
1289 if self
.type == GNUTYPE_LONGNAME
:
1290 next
.name
= nts(buf
)
1291 elif self
.type == GNUTYPE_LONGLINK
:
1292 next
.linkname
= nts(buf
)
1296 def _proc_sparse(self
, tarfile
):
1297 """Process a GNU sparse header plus extra headers.
1304 # There are 4 possible sparse structs in the
1308 offset
= nti(buf
[pos
:pos
+ 12])
1309 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1312 if offset
> lastpos
:
1313 sp
.append(_hole(lastpos
, offset
- lastpos
))
1314 sp
.append(_data(offset
, numbytes
, realpos
))
1316 lastpos
= offset
+ numbytes
1319 isextended
= ord(buf
[482])
1320 origsize
= nti(buf
[483:495])
1322 # If the isextended flag is given,
1323 # there are extra headers to process.
1324 while isextended
== 1:
1325 buf
= tarfile
.fileobj
.read(BLOCKSIZE
)
1327 for i
in xrange(21):
1329 offset
= nti(buf
[pos
:pos
+ 12])
1330 numbytes
= nti(buf
[pos
+ 12:pos
+ 24])
1333 if offset
> lastpos
:
1334 sp
.append(_hole(lastpos
, offset
- lastpos
))
1335 sp
.append(_data(offset
, numbytes
, realpos
))
1337 lastpos
= offset
+ numbytes
1339 isextended
= ord(buf
[504])
1341 if lastpos
< origsize
:
1342 sp
.append(_hole(lastpos
, origsize
- lastpos
))
1346 self
.offset_data
= tarfile
.fileobj
.tell()
1347 tarfile
.offset
= self
.offset_data
+ self
._block
(self
.size
)
1348 self
.size
= origsize
1352 def _proc_pax(self
, tarfile
):
1353 """Process an extended or global header as described in
1356 # Read the header information.
1357 buf
= tarfile
.fileobj
.read(self
._block
(self
.size
))
1359 # A pax header stores supplemental information for either
1360 # the following file (extended) or all following files
1362 if self
.type == XGLTYPE
:
1363 pax_headers
= tarfile
.pax_headers
1365 pax_headers
= tarfile
.pax_headers
.copy()
1367 # Parse pax header information. A record looks like that:
1368 # "%d %s=%s\n" % (length, keyword, value). length is the size
1369 # of the complete record including the length field itself and
1370 # the newline. keyword and value are both UTF-8 encoded strings.
1371 regex
= re
.compile(r
"(\d+) ([^=]+)=", re
.U
)
1374 match
= regex
.match(buf
, pos
)
1378 length
, keyword
= match
.groups()
1379 length
= int(length
)
1380 value
= buf
[match
.end(2) + 1:match
.start(1) + length
- 1]
1382 keyword
= keyword
.decode("utf8")
1383 value
= value
.decode("utf8")
1385 pax_headers
[keyword
] = value
1388 # Fetch the next header.
1389 next
= self
.fromtarfile(tarfile
)
1391 if self
.type in (XHDTYPE
, SOLARIS_XHDTYPE
):
1393 raise HeaderError("missing subsequent header")
1395 # Patch the TarInfo object with the extended header info.
1396 next
._apply
_pax
_info
(pax_headers
, tarfile
.encoding
, tarfile
.errors
)
1397 next
.offset
= self
.offset
1399 if "size" in pax_headers
:
1400 # If the extended header replaces the size field,
1401 # we need to recalculate the offset where the next
1403 offset
= next
.offset_data
1404 if next
.isreg() or next
.type not in SUPPORTED_TYPES
:
1405 offset
+= next
._block
(next
.size
)
1406 tarfile
.offset
= offset
1410 def _apply_pax_info(self
, pax_headers
, encoding
, errors
):
1411 """Replace fields with supplemental information from a previous
1412 pax extended or global header.
1414 for keyword
, value
in pax_headers
.iteritems():
1415 if keyword
not in PAX_FIELDS
:
1418 if keyword
== "path":
1419 value
= value
.rstrip("/")
1421 if keyword
in PAX_NUMBER_FIELDS
:
1423 value
= PAX_NUMBER_FIELDS
[keyword
](value
)
1427 value
= uts(value
, encoding
, errors
)
1429 setattr(self
, keyword
, value
)
1431 self
.pax_headers
= pax_headers
.copy()
1433 def _block(self
, count
):
1434 """Round up a byte count by BLOCKSIZE and return it,
1435 e.g. _block(834) => 1024.
1437 blocks
, remainder
= divmod(count
, BLOCKSIZE
)
1440 return blocks
* BLOCKSIZE
1443 return self
.type in REGULAR_TYPES
1447 return self
.type == DIRTYPE
1449 return self
.type == SYMTYPE
1451 return self
.type == LNKTYPE
1453 return self
.type == CHRTYPE
1455 return self
.type == BLKTYPE
1457 return self
.type == FIFOTYPE
1459 return self
.type == GNUTYPE_SPARSE
1461 return self
.type in (CHRTYPE
, BLKTYPE
, FIFOTYPE
)
1464 class TarFile(object):
1465 """The TarFile Class provides an interface to tar archives.
1468 debug
= 0 # May be set from 0 (no msgs) to 3 (all msgs)
1470 dereference
= False # If true, add content of linked file to the
1471 # tar file, else the link.
1473 ignore_zeros
= False # If true, skips empty or invalid blocks and
1474 # continues processing.
1476 errorlevel
= 0 # If 0, fatal errors only appear in debug
1477 # messages (if debug >= 0). If > 0, errors
1478 # are passed to the caller as exceptions.
1480 format
= DEFAULT_FORMAT
# The format to use when creating an archive.
1482 encoding
= ENCODING
# Encoding for 8-bit character strings.
1484 errors
= None # Error handler for unicode conversion.
1486 tarinfo
= TarInfo
# The default TarInfo class to use.
1488 fileobject
= ExFileObject
# The default ExFileObject class to use.
1490 def __init__(self
, name
=None, mode
="r", fileobj
=None, format
=None,
1491 tarinfo
=None, dereference
=None, ignore_zeros
=None, encoding
=None,
1492 errors
=None, pax_headers
=None, debug
=None, errorlevel
=None):
1493 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1494 read from an existing archive, 'a' to append data to an existing
1495 file or 'w' to create a new file overwriting an existing one. `mode'
1497 If `fileobj' is given, it is used for reading or writing data. If it
1498 can be determined, `mode' is overridden by `fileobj's mode.
1499 `fileobj' is not closed, when TarFile is closed.
1501 if len(mode
) > 1 or mode
not in "raw":
1502 raise ValueError("mode must be 'r', 'a' or 'w'")
1504 self
._mode
= {"r": "rb", "a": "r+b", "w": "wb"}[mode
]
1507 if self
.mode
== "a" and not os
.path
.exists(name
):
1508 # Create nonexistent files in append mode.
1511 fileobj
= bltn_open(name
, self
._mode
)
1512 self
._extfileobj
= False
1514 if name
is None and hasattr(fileobj
, "name"):
1516 if hasattr(fileobj
, "mode"):
1517 self
._mode
= fileobj
.mode
1518 self
._extfileobj
= True
1519 self
.name
= os
.path
.abspath(name
) if name
else None
1520 self
.fileobj
= fileobj
1523 if format
is not None:
1524 self
.format
= format
1525 if tarinfo
is not None:
1526 self
.tarinfo
= tarinfo
1527 if dereference
is not None:
1528 self
.dereference
= dereference
1529 if ignore_zeros
is not None:
1530 self
.ignore_zeros
= ignore_zeros
1531 if encoding
is not None:
1532 self
.encoding
= encoding
1534 if errors
is not None:
1535 self
.errors
= errors
1537 self
.errors
= "utf-8"
1539 self
.errors
= "strict"
1541 if pax_headers
is not None and self
.format
== PAX_FORMAT
:
1542 self
.pax_headers
= pax_headers
1544 self
.pax_headers
= {}
1546 if debug
is not None:
1548 if errorlevel
is not None:
1549 self
.errorlevel
= errorlevel
1551 # Init datastructures.
1553 self
.members
= [] # list of members as TarInfo objects
1554 self
._loaded
= False # flag if all members have been read
1555 self
.offset
= self
.fileobj
.tell()
1556 # current position in the archive file
1557 self
.inodes
= {} # dictionary caching the inodes of
1558 # archive members already added
1560 if self
.mode
== "r":
1561 self
.firstmember
= None
1562 self
.firstmember
= self
.next()
1564 if self
.mode
== "a":
1565 # Move to the end of the archive,
1566 # before the first empty block.
1567 self
.firstmember
= None
1569 if self
.next() is None:
1571 self
.fileobj
.seek(- BLOCKSIZE
, 1)
1574 if self
.mode
in "aw":
1577 if self
.pax_headers
:
1578 buf
= self
.tarinfo
.create_pax_global_header(self
.pax_headers
.copy())
1579 self
.fileobj
.write(buf
)
1580 self
.offset
+= len(buf
)
1582 def _getposix(self
):
1583 return self
.format
== USTAR_FORMAT
1584 def _setposix(self
, value
):
1586 warnings
.warn("use the format attribute instead", DeprecationWarning,
1589 self
.format
= USTAR_FORMAT
1591 self
.format
= GNU_FORMAT
1592 posix
= property(_getposix
, _setposix
)
1594 #--------------------------------------------------------------------------
1595 # Below are the classmethods which act as alternate constructors to the
1596 # TarFile class. The open() method is the only one that is needed for
1597 # public use; it is the "super"-constructor and is able to select an
1598 # adequate "sub"-constructor for a particular compression using the mapping
1601 # This concept allows one to subclass TarFile without losing the comfort of
1602 # the super-constructor. A sub-constructor is registered and made available
1603 # by adding it to the mapping in OPEN_METH.
1606 def open(cls
, name
=None, mode
="r", fileobj
=None, bufsize
=RECORDSIZE
, **kwargs
):
1607 """Open a tar archive for reading, writing or appending. Return
1608 an appropriate TarFile class.
1611 'r' or 'r:*' open for reading with transparent compression
1612 'r:' open for reading exclusively uncompressed
1613 'r:gz' open for reading with gzip compression
1614 'r:bz2' open for reading with bzip2 compression
1615 'a' or 'a:' open for appending, creating the file if necessary
1616 'w' or 'w:' open for writing without compression
1617 'w:gz' open for writing with gzip compression
1618 'w:bz2' open for writing with bzip2 compression
1620 'r|*' open a stream of tar blocks with transparent compression
1621 'r|' open an uncompressed stream of tar blocks for reading
1622 'r|gz' open a gzip compressed stream of tar blocks
1623 'r|bz2' open a bzip2 compressed stream of tar blocks
1624 'w|' open an uncompressed stream for writing
1625 'w|gz' open a gzip compressed stream for writing
1626 'w|bz2' open a bzip2 compressed stream for writing
1629 if not name
and not fileobj
:
1630 raise ValueError("nothing to open")
1632 if mode
in ("r", "r:*"):
1633 # Find out which *open() is appropriate for opening the file.
1634 for comptype
in cls
.OPEN_METH
:
1635 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1636 if fileobj
is not None:
1637 saved_pos
= fileobj
.tell()
1639 return func(name
, "r", fileobj
, **kwargs
)
1640 except (ReadError
, CompressionError
), e
:
1641 if fileobj
is not None:
1642 fileobj
.seek(saved_pos
)
1644 raise ReadError("file could not be opened successfully")
1647 filemode
, comptype
= mode
.split(":", 1)
1648 filemode
= filemode
or "r"
1649 comptype
= comptype
or "tar"
1651 # Select the *open() function according to
1652 # given compression.
1653 if comptype
in cls
.OPEN_METH
:
1654 func
= getattr(cls
, cls
.OPEN_METH
[comptype
])
1656 raise CompressionError("unknown compression type %r" % comptype
)
1657 return func(name
, filemode
, fileobj
, **kwargs
)
1660 filemode
, comptype
= mode
.split("|", 1)
1661 filemode
= filemode
or "r"
1662 comptype
= comptype
or "tar"
1664 if filemode
not in "rw":
1665 raise ValueError("mode must be 'r' or 'w'")
1667 t
= cls(name
, filemode
,
1668 _Stream(name
, filemode
, comptype
, fileobj
, bufsize
),
1670 t
._extfileobj
= False
1674 return cls
.taropen(name
, mode
, fileobj
, **kwargs
)
1676 raise ValueError("undiscernible mode")
1679 def taropen(cls
, name
, mode
="r", fileobj
=None, **kwargs
):
1680 """Open uncompressed tar archive name for reading or writing.
1682 if len(mode
) > 1 or mode
not in "raw":
1683 raise ValueError("mode must be 'r', 'a' or 'w'")
1684 return cls(name
, mode
, fileobj
, **kwargs
)
1687 def gzopen(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9, **kwargs
):
1688 """Open gzip compressed tar archive name for reading or writing.
1689 Appending is not allowed.
1691 if len(mode
) > 1 or mode
not in "rw":
1692 raise ValueError("mode must be 'r' or 'w'")
1697 except (ImportError, AttributeError):
1698 raise CompressionError("gzip module is not available")
1701 fileobj
= bltn_open(name
, mode
+ "b")
1704 t
= cls
.taropen(name
, mode
,
1705 gzip
.GzipFile(name
, mode
, compresslevel
, fileobj
),
1708 raise ReadError("not a gzip file")
1709 t
._extfileobj
= False
1713 def bz2open(cls
, name
, mode
="r", fileobj
=None, compresslevel
=9, **kwargs
):
1714 """Open bzip2 compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1717 if len(mode
) > 1 or mode
not in "rw":
1718 raise ValueError("mode must be 'r' or 'w'.")
1723 raise CompressionError("bz2 module is not available")
1725 if fileobj
is not None:
1726 fileobj
= _BZ2Proxy(fileobj
, mode
)
1728 fileobj
= bz2
.BZ2File(name
, mode
, compresslevel
=compresslevel
)
1731 t
= cls
.taropen(name
, mode
, fileobj
, **kwargs
)
1733 raise ReadError("not a bzip2 file")
1734 t
._extfileobj
= False
1737 # All *open() methods are registered here.
1739 "tar": "taropen", # uncompressed tar
1740 "gz": "gzopen", # gzip compressed tar
1741 "bz2": "bz2open" # bzip2 compressed tar
1744 #--------------------------------------------------------------------------
1745 # The public methods which TarFile provides:
1748 """Close the TarFile. In write-mode, two finishing zero blocks are
1749 appended to the archive.
1754 if self
.mode
in "aw":
1755 self
.fileobj
.write(NUL
* (BLOCKSIZE
* 2))
1756 self
.offset
+= (BLOCKSIZE
* 2)
1757 # fill up the end with zero-blocks
1758 # (like option -b20 for tar does)
1759 blocks
, remainder
= divmod(self
.offset
, RECORDSIZE
)
1761 self
.fileobj
.write(NUL
* (RECORDSIZE
- remainder
))
1763 if not self
._extfileobj
:
1764 self
.fileobj
.close()
1767 def getmember(self
, name
):
1768 """Return a TarInfo object for member `name'. If `name' can not be
1769 found in the archive, KeyError is raised. If a member occurs more
1770 than once in the archive, its last occurrence is assumed to be the
1771 most up-to-date version.
1773 tarinfo
= self
._getmember
(name
)
1775 raise KeyError("filename %r not found" % name
)
1778 def getmembers(self
):
1779 """Return the members of the archive as a list of TarInfo objects. The
1780 list has the same order as the members in the archive.
1783 if not self
._loaded
: # if we want to obtain a list of
1784 self
._load
() # all members, we first have to
1785 # scan the whole archive.
1789 """Return the members of the archive as a list of their names. It has
1790 the same order as the list returned by getmembers().
1792 return [tarinfo
.name
for tarinfo
in self
.getmembers()]
1794 def gettarinfo(self
, name
=None, arcname
=None, fileobj
=None):
1795 """Create a TarInfo object for either the file `name' or the file
1796 object `fileobj' (using os.fstat on its file descriptor). You can
1797 modify some of the TarInfo's attributes before you add it using
1798 addfile(). If given, `arcname' specifies an alternative name for the
1799 file in the archive.
1803 # When fileobj is given, replace name by
1804 # fileobj's real name.
1805 if fileobj
is not None:
1808 # Building the name of the member in the archive.
1809 # Backward slashes are converted to forward slashes,
1810 # Absolute paths are turned to relative paths.
1813 drv
, arcname
= os
.path
.splitdrive(arcname
)
1814 arcname
= arcname
.replace(os
.sep
, "/")
1815 arcname
= arcname
.lstrip("/")
1817 # Now, fill the TarInfo object with
1818 # information specific for the file.
1819 tarinfo
= self
.tarinfo()
1820 tarinfo
.tarfile
= self
1822 # Use os.stat or os.lstat, depending on platform
1823 # and if symlinks shall be resolved.
1825 if hasattr(os
, "lstat") and not self
.dereference
:
1826 statres
= os
.lstat(name
)
1828 statres
= os
.stat(name
)
1830 statres
= os
.fstat(fileobj
.fileno())
1833 stmd
= statres
.st_mode
1834 if stat
.S_ISREG(stmd
):
1835 inode
= (statres
.st_ino
, statres
.st_dev
)
1836 if not self
.dereference
and statres
.st_nlink
> 1 and \
1837 inode
in self
.inodes
and arcname
!= self
.inodes
[inode
]:
1838 # Is it a hardlink to an already
1841 linkname
= self
.inodes
[inode
]
1843 # The inode is added only if its valid.
1844 # For win32 it is always 0.
1847 self
.inodes
[inode
] = arcname
1848 elif stat
.S_ISDIR(stmd
):
1850 elif stat
.S_ISFIFO(stmd
):
1852 elif stat
.S_ISLNK(stmd
):
1854 linkname
= os
.readlink(name
)
1855 elif stat
.S_ISCHR(stmd
):
1857 elif stat
.S_ISBLK(stmd
):
1862 # Fill the TarInfo object with all
1863 # information we can get.
1864 tarinfo
.name
= arcname
1866 tarinfo
.uid
= statres
.st_uid
1867 tarinfo
.gid
= statres
.st_gid
1868 if stat
.S_ISREG(stmd
):
1869 tarinfo
.size
= statres
.st_size
1872 tarinfo
.mtime
= statres
.st_mtime
1874 tarinfo
.linkname
= linkname
1877 tarinfo
.uname
= pwd
.getpwuid(tarinfo
.uid
)[0]
1882 tarinfo
.gname
= grp
.getgrgid(tarinfo
.gid
)[0]
1886 if type in (CHRTYPE
, BLKTYPE
):
1887 if hasattr(os
, "major") and hasattr(os
, "minor"):
1888 tarinfo
.devmajor
= os
.major(statres
.st_rdev
)
1889 tarinfo
.devminor
= os
.minor(statres
.st_rdev
)
1892 def list(self
, verbose
=True):
1893 """Print a table of contents to sys.stdout. If `verbose' is False, only
1894 the names of the members are printed. If it is True, an `ls -l'-like
1899 for tarinfo
in self
:
1901 print filemode(tarinfo
.mode
),
1902 print "%s/%s" % (tarinfo
.uname
or tarinfo
.uid
,
1903 tarinfo
.gname
or tarinfo
.gid
),
1904 if tarinfo
.ischr() or tarinfo
.isblk():
1905 print "%10s" % ("%d,%d" \
1906 % (tarinfo
.devmajor
, tarinfo
.devminor
)),
1908 print "%10d" % tarinfo
.size
,
1909 print "%d-%02d-%02d %02d:%02d:%02d" \
1910 % time
.localtime(tarinfo
.mtime
)[:6],
1912 print tarinfo
.name
+ ("/" if tarinfo
.isdir() else ""),
1916 print "->", tarinfo
.linkname
,
1918 print "link to", tarinfo
.linkname
,
1921 def add(self
, name
, arcname
=None, recursive
=True, exclude
=None, filter=None):
1922 """Add the file `name' to the archive. `name' may be any type of file
1923 (directory, fifo, symbolic link, etc.). If given, `arcname'
1924 specifies an alternative name for the file in the archive.
1925 Directories are added recursively by default. This can be avoided by
1926 setting `recursive' to False. `exclude' is a function that should
1927 return True for each filename to be excluded. `filter' is a function
1928 that expects a TarInfo object argument and returns the changed
1929 TarInfo object, if it returns None the TarInfo object will be
1930 excluded from the archive.
1937 # Exclude pathnames.
1938 if exclude
is not None:
1940 warnings
.warn("use the filter argument instead",
1941 DeprecationWarning, 2)
1943 self
._dbg
(2, "tarfile: Excluded %r" % name
)
1946 # Skip if somebody tries to archive the archive...
1947 if self
.name
is not None and os
.path
.abspath(name
) == self
.name
:
1948 self
._dbg
(2, "tarfile: Skipped %r" % name
)
1953 # Create a TarInfo object from the file.
1954 tarinfo
= self
.gettarinfo(name
, arcname
)
1957 self
._dbg
(1, "tarfile: Unsupported type %r" % name
)
1960 # Change or exclude the TarInfo object.
1961 if filter is not None:
1962 tarinfo
= filter(tarinfo
)
1964 self
._dbg
(2, "tarfile: Excluded %r" % name
)
1967 # Append the tar header and data to the archive.
1969 f
= bltn_open(name
, "rb")
1970 self
.addfile(tarinfo
, f
)
1973 elif tarinfo
.isdir():
1974 self
.addfile(tarinfo
)
1976 for f
in os
.listdir(name
):
1977 self
.add(os
.path
.join(name
, f
), os
.path
.join(arcname
, f
),
1978 recursive
, exclude
, filter)
1981 self
.addfile(tarinfo
)
1983 def addfile(self
, tarinfo
, fileobj
=None):
1984 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1985 given, tarinfo.size bytes are read from it and added to the archive.
1986 You can create TarInfo objects using gettarinfo().
1987 On Windows platforms, `fileobj' should always be opened with mode
1988 'rb' to avoid irritation about the file size.
1992 tarinfo
= copy
.copy(tarinfo
)
1994 buf
= tarinfo
.tobuf(self
.format
, self
.encoding
, self
.errors
)
1995 self
.fileobj
.write(buf
)
1996 self
.offset
+= len(buf
)
1998 # If there's data to follow, append it.
1999 if fileobj
is not None:
2000 copyfileobj(fileobj
, self
.fileobj
, tarinfo
.size
)
2001 blocks
, remainder
= divmod(tarinfo
.size
, BLOCKSIZE
)
2003 self
.fileobj
.write(NUL
* (BLOCKSIZE
- remainder
))
2005 self
.offset
+= blocks
* BLOCKSIZE
2007 self
.members
.append(tarinfo
)
2009 def extractall(self
, path
=".", members
=None):
2010 """Extract all members from the archive to the current working
2011 directory and set owner, modification time and permissions on
2012 directories afterwards. `path' specifies a different directory
2013 to extract to. `members' is optional and must be a subset of the
2014 list returned by getmembers().
2021 for tarinfo
in members
:
2023 # Extract directories with a safe mode.
2024 directories
.append(tarinfo
)
2025 tarinfo
= copy
.copy(tarinfo
)
2027 self
.extract(tarinfo
, path
)
2029 # Reverse sort directories.
2030 directories
.sort(key
=operator
.attrgetter('name'))
2031 directories
.reverse()
2033 # Set correct owner, mtime and filemode on directories.
2034 for tarinfo
in directories
:
2035 dirpath
= os
.path
.join(path
, tarinfo
.name
)
2037 self
.chown(tarinfo
, dirpath
)
2038 self
.utime(tarinfo
, dirpath
)
2039 self
.chmod(tarinfo
, dirpath
)
2040 except ExtractError
, e
:
2041 if self
.errorlevel
> 1:
2044 self
._dbg
(1, "tarfile: %s" % e
)
2046 def extract(self
, member
, path
=""):
2047 """Extract a member from the archive to the current working directory,
2048 using its full name. Its file information is extracted as accurately
2049 as possible. `member' may be a filename or a TarInfo object. You can
2050 specify a different directory using `path'.
2054 if isinstance(member
, basestring
):
2055 tarinfo
= self
.getmember(member
)
2059 # Prepare the link target for makelink().
2061 tarinfo
._link
_target
= os
.path
.join(path
, tarinfo
.linkname
)
2064 self
._extract
_member
(tarinfo
, os
.path
.join(path
, tarinfo
.name
))
2065 except EnvironmentError, e
:
2066 if self
.errorlevel
> 0:
2069 if e
.filename
is None:
2070 self
._dbg
(1, "tarfile: %s" % e
.strerror
)
2072 self
._dbg
(1, "tarfile: %s %r" % (e
.strerror
, e
.filename
))
2073 except ExtractError
, e
:
2074 if self
.errorlevel
> 1:
2077 self
._dbg
(1, "tarfile: %s" % e
)
2079 def extractfile(self
, member
):
2080 """Extract a member from the archive as a file object. `member' may be
2081 a filename or a TarInfo object. If `member' is a regular file, a
2082 file-like object is returned. If `member' is a link, a file-like
2083 object is constructed from the link's target. If `member' is none of
2084 the above, None is returned.
2085 The file-like object is read-only and provides the following
2086 methods: read(), readline(), readlines(), seek() and tell()
2090 if isinstance(member
, basestring
):
2091 tarinfo
= self
.getmember(member
)
2096 return self
.fileobject(self
, tarinfo
)
2098 elif tarinfo
.type not in SUPPORTED_TYPES
:
2099 # If a member's type is unknown, it is treated as a
2101 return self
.fileobject(self
, tarinfo
)
2103 elif tarinfo
.islnk() or tarinfo
.issym():
2104 if isinstance(self
.fileobj
, _Stream
):
2105 # A small but ugly workaround for the case that someone tries
2106 # to extract a (sym)link as a file-object from a non-seekable
2107 # stream of tar blocks.
2108 raise StreamError("cannot extract (sym)link as file object")
2110 # A (sym)link's file object is its target's file object.
2111 return self
.extractfile(self
._getmember
(tarinfo
.linkname
,
2114 # If there's no data associated with the member (directory, chrdev,
2115 # blkdev, etc.), return None instead of a file object.
2118 def _extract_member(self
, tarinfo
, targetpath
):
2119 """Extract the TarInfo object tarinfo to a physical
2120 file called targetpath.
2122 # Fetch the TarInfo object for the given name
2123 # and build the destination pathname, replacing
2124 # forward slashes to platform specific separators.
2125 targetpath
= targetpath
.rstrip("/")
2126 targetpath
= targetpath
.replace("/", os
.sep
)
2128 # Create all upper directories.
2129 upperdirs
= os
.path
.dirname(targetpath
)
2130 if upperdirs
and not os
.path
.exists(upperdirs
):
2131 # Create directories that are not part of the archive with
2132 # default permissions.
2133 os
.makedirs(upperdirs
)
2135 if tarinfo
.islnk() or tarinfo
.issym():
2136 self
._dbg
(1, "%s -> %s" % (tarinfo
.name
, tarinfo
.linkname
))
2138 self
._dbg
(1, tarinfo
.name
)
2141 self
.makefile(tarinfo
, targetpath
)
2142 elif tarinfo
.isdir():
2143 self
.makedir(tarinfo
, targetpath
)
2144 elif tarinfo
.isfifo():
2145 self
.makefifo(tarinfo
, targetpath
)
2146 elif tarinfo
.ischr() or tarinfo
.isblk():
2147 self
.makedev(tarinfo
, targetpath
)
2148 elif tarinfo
.islnk() or tarinfo
.issym():
2149 self
.makelink(tarinfo
, targetpath
)
2150 elif tarinfo
.type not in SUPPORTED_TYPES
:
2151 self
.makeunknown(tarinfo
, targetpath
)
2153 self
.makefile(tarinfo
, targetpath
)
2155 self
.chown(tarinfo
, targetpath
)
2156 if not tarinfo
.issym():
2157 self
.chmod(tarinfo
, targetpath
)
2158 self
.utime(tarinfo
, targetpath
)
2160 #--------------------------------------------------------------------------
2161 # Below are the different file methods. They are called via
2162 # _extract_member() when extract() is called. They can be replaced in a
2163 # subclass to implement other functionality.
2165 def makedir(self
, tarinfo
, targetpath
):
2166 """Make a directory called targetpath.
2169 # Use a safe mode for the directory, the real mode is set
2170 # later in _extract_member().
2171 os
.mkdir(targetpath
, 0700)
2172 except EnvironmentError, e
:
2173 if e
.errno
!= errno
.EEXIST
:
2176 def makefile(self
, tarinfo
, targetpath
):
2177 """Make a file called targetpath.
2179 source
= self
.extractfile(tarinfo
)
2180 target
= bltn_open(targetpath
, "wb")
2181 copyfileobj(source
, target
)
2185 def makeunknown(self
, tarinfo
, targetpath
):
2186 """Make a file from a TarInfo object with an unknown type
2189 self
.makefile(tarinfo
, targetpath
)
2190 self
._dbg
(1, "tarfile: Unknown file type %r, " \
2191 "extracted as regular file." % tarinfo
.type)
2193 def makefifo(self
, tarinfo
, targetpath
):
2194 """Make a fifo called targetpath.
2196 if hasattr(os
, "mkfifo"):
2197 os
.mkfifo(targetpath
)
2199 raise ExtractError("fifo not supported by system")
2201 def makedev(self
, tarinfo
, targetpath
):
2202 """Make a character or block device called targetpath.
2204 if not hasattr(os
, "mknod") or not hasattr(os
, "makedev"):
2205 raise ExtractError("special devices not supported by system")
2209 mode |
= stat
.S_IFBLK
2211 mode |
= stat
.S_IFCHR
2213 os
.mknod(targetpath
, mode
,
2214 os
.makedev(tarinfo
.devmajor
, tarinfo
.devminor
))
2216 def makelink(self
, tarinfo
, targetpath
):
2217 """Make a (symbolic) link called targetpath. If it cannot be created
2218 (platform limitation), we try to make a copy of the referenced file
2223 os
.symlink(tarinfo
.linkname
, targetpath
)
2226 os
.link(tarinfo
._link
_target
, targetpath
)
2227 except AttributeError:
2229 linkpath
= os
.path
.dirname(tarinfo
.name
) + "/" + \
2232 linkpath
= tarinfo
.linkname
2235 self
._extract
_member
(self
.getmember(linkpath
), targetpath
)
2236 except (EnvironmentError, KeyError), e
:
2237 linkpath
= linkpath
.replace("/", os
.sep
)
2239 shutil
.copy2(linkpath
, targetpath
)
2240 except EnvironmentError, e
:
2241 raise IOError("link could not be created")
2243 def chown(self
, tarinfo
, targetpath
):
2244 """Set owner of targetpath according to tarinfo.
2246 if pwd
and hasattr(os
, "geteuid") and os
.geteuid() == 0:
2247 # We have to be root to do so.
2249 g
= grp
.getgrnam(tarinfo
.gname
)[2]
2252 g
= grp
.getgrgid(tarinfo
.gid
)[2]
2256 u
= pwd
.getpwnam(tarinfo
.uname
)[2]
2259 u
= pwd
.getpwuid(tarinfo
.uid
)[2]
2263 if tarinfo
.issym() and hasattr(os
, "lchown"):
2264 os
.lchown(targetpath
, u
, g
)
2266 if sys
.platform
!= "os2emx":
2267 os
.chown(targetpath
, u
, g
)
2268 except EnvironmentError, e
:
2269 raise ExtractError("could not change owner")
2271 def chmod(self
, tarinfo
, targetpath
):
2272 """Set file permissions of targetpath according to tarinfo.
2274 if hasattr(os
, 'chmod'):
2276 os
.chmod(targetpath
, tarinfo
.mode
)
2277 except EnvironmentError, e
:
2278 raise ExtractError("could not change mode")
2280 def utime(self
, tarinfo
, targetpath
):
2281 """Set modification time of targetpath according to tarinfo.
2283 if not hasattr(os
, 'utime'):
2286 os
.utime(targetpath
, (tarinfo
.mtime
, tarinfo
.mtime
))
2287 except EnvironmentError, e
:
2288 raise ExtractError("could not change modification time")
2290 #--------------------------------------------------------------------------
2292 """Return the next member of the archive as a TarInfo object, when
2293 TarFile is opened for reading. Return None if there is no more
2297 if self
.firstmember
is not None:
2298 m
= self
.firstmember
2299 self
.firstmember
= None
2302 # Read the next block.
2303 self
.fileobj
.seek(self
.offset
)
2306 tarinfo
= self
.tarinfo
.fromtarfile(self
)
2309 self
.members
.append(tarinfo
)
2311 except HeaderError
, e
:
2312 if self
.ignore_zeros
:
2313 self
._dbg
(2, "0x%X: %s" % (self
.offset
, e
))
2314 self
.offset
+= BLOCKSIZE
2317 if self
.offset
== 0:
2318 raise ReadError(str(e
))
2324 #--------------------------------------------------------------------------
2325 # Little helper methods:
2327 def _getmember(self
, name
, tarinfo
=None):
2328 """Find an archive member by name from bottom to top.
2329 If tarinfo is given, it is used as the starting point.
2331 # Ensure that all members have been loaded.
2332 members
= self
.getmembers()
2337 end
= members
.index(tarinfo
)
2339 for i
in xrange(end
- 1, -1, -1):
2340 if name
== members
[i
].name
:
2344 """Read through the entire archive file and look for readable
2348 tarinfo
= self
.next()
2353 def _check(self
, mode
=None):
2354 """Check if TarFile is still open, and if the operation's mode
2355 corresponds to TarFile's mode.
2358 raise IOError("%s is closed" % self
.__class
__.__name
__)
2359 if mode
is not None and self
.mode
not in mode
:
2360 raise IOError("bad operation for mode %r" % self
.mode
)
2363 """Provide an iterator object.
2366 return iter(self
.members
)
2368 return TarIter(self
)
2370 def _dbg(self
, level
, msg
):
2371 """Write debugging output to sys.stderr.
2373 if level
<= self
.debug
:
2374 print >> sys
.stderr
, msg
2380 for tarinfo in TarFile(...):
2384 def __init__(self
, tarfile
):
2385 """Construct a TarIter object.
2387 self
.tarfile
= tarfile
2390 """Return iterator object.
2394 """Return the next item using TarFile's next() method.
2395 When all members have been read, set TarFile as _loaded.
2397 # Fix for SF #1100429: Under rare circumstances it can
2398 # happen that getmembers() is called during iteration,
2399 # which will cause TarIter to stop prematurely.
2400 if not self
.tarfile
._loaded
:
2401 tarinfo
= self
.tarfile
.next()
2403 self
.tarfile
._loaded
= True
2407 tarinfo
= self
.tarfile
.members
[self
.index
]
2413 # Helper classes for sparse file support
2415 """Base class for _data and _hole.
2417 def __init__(self
, offset
, size
):
2418 self
.offset
= offset
2420 def __contains__(self
, offset
):
2421 return self
.offset
<= offset
< self
.offset
+ self
.size
2423 class _data(_section
):
2424 """Represent a data section in a sparse file.
2426 def __init__(self
, offset
, size
, realpos
):
2427 _section
.__init
__(self
, offset
, size
)
2428 self
.realpos
= realpos
2430 class _hole(_section
):
2431 """Represent a hole section in a sparse file.
2435 class _ringbuffer(list):
2436 """Ringbuffer class which increases performance
2437 over a regular list.
2441 def find(self
, offset
):
2448 if idx
== len(self
):
2456 #---------------------------------------------
2457 # zipfile compatible TarFile class
2458 #---------------------------------------------
2459 TAR_PLAIN
= 0 # zipfile.ZIP_STORED
2460 TAR_GZIPPED
= 8 # zipfile.ZIP_DEFLATED
2461 class TarFileCompat
:
2462 """TarFile class compatible with standard module zipfile's
2465 def __init__(self
, file, mode
="r", compression
=TAR_PLAIN
):
2466 from warnings
import warnpy3k
2467 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2469 if compression
== TAR_PLAIN
:
2470 self
.tarfile
= TarFile
.taropen(file, mode
)
2471 elif compression
== TAR_GZIPPED
:
2472 self
.tarfile
= TarFile
.gzopen(file, mode
)
2474 raise ValueError("unknown compression constant")
2475 if mode
[0:1] == "r":
2476 members
= self
.tarfile
.getmembers()
2479 m
.file_size
= m
.size
2480 m
.date_time
= time
.gmtime(m
.mtime
)[:6]
2482 return map(lambda m
: m
.name
, self
.infolist())
2484 return filter(lambda m
: m
.type in REGULAR_TYPES
,
2485 self
.tarfile
.getmembers())
2490 def getinfo(self
, name
):
2491 return self
.tarfile
.getmember(name
)
2492 def read(self
, name
):
2493 return self
.tarfile
.extractfile(self
.tarfile
.getmember(name
)).read()
2494 def write(self
, filename
, arcname
=None, compress_type
=None):
2495 self
.tarfile
.add(filename
, arcname
)
2496 def writestr(self
, zinfo
, bytes
):
2498 from cStringIO
import StringIO
2500 from StringIO
import StringIO
2502 tinfo
= TarInfo(zinfo
.filename
)
2503 tinfo
.size
= len(bytes
)
2504 tinfo
.mtime
= calendar
.timegm(zinfo
.date_time
)
2505 self
.tarfile
.addfile(tinfo
, StringIO(bytes
))
2507 self
.tarfile
.close()
2508 #class TarFileCompat
2510 #--------------------
2511 # exported functions
2512 #--------------------
2513 def is_tarfile(name
):
2514 """Return True if name points to a tar archive that we
2515 are able to handle, else return False.