3 /// API for reading and writing sequentially from compressed
7 Copyright (C) 2007-2013, Chris Frey <cdfrey@foursquare.net>
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
33 TarFile::TarFile(const char *filename
,
35 tartype_t
*compress_ops
,
38 m_throw(always_throw
),
41 // figure out how to handle the file flags/modes
43 mode_t mode
= S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
;
46 flags
= O_WRONLY
| O_CREAT
| O_EXCL
;
52 // open... throw on error, as we are in the constructor
53 if( tar_open(&m_tar
, const_cast<char*>(filename
),
54 compress_ops
, flags
, mode
, TAR_VERBOSE
| TAR_GNU
) == -1 ) {
55 throw TarError(std::string(_("Unable to open tar file: ")) + strerror(errno
));
63 } catch( TarError
&te
) {}
66 bool TarFile::False(const char *msg
)
75 bool TarFile::False(const std::string
&msg
, int err
)
77 std::string str
= msg
;
87 if( tar_append_eof(m_tar
) != 0 )
88 return False(_("Unable to write eof"), errno
);
91 if( tar_close(m_tar
) != 0 ) {
92 return False(_("Unable to close file"), errno
);
99 /// Appends a new file to the current tarfile, using tarpath as
100 /// its internal filename, and data as the complete file contents.
101 /// Uses current date and time as file mtime.
102 bool TarFile::AppendFile(const char *tarpath
, const std::string
&data
)
104 // write standard file header
105 th_set_type(m_tar
, REGTYPE
);
106 th_set_mode(m_tar
, 0644);
107 th_set_path(m_tar
, const_cast<char*>(tarpath
));
108 th_set_user(m_tar
, 0);
109 th_set_group(m_tar
, 0);
110 th_set_size(m_tar
, data
.size());
111 th_set_mtime(m_tar
, time(NULL
));
112 if( th_write(m_tar
) != 0 ) {
113 return False(_("Unable to write tar header"), errno
);
116 // write the data in blocks until finished
117 char block
[T_BLOCKSIZE
];
118 for( size_t pos
= 0; pos
< data
.size(); pos
+= T_BLOCKSIZE
) {
119 memset(block
, 0, T_BLOCKSIZE
);
121 size_t size
= T_BLOCKSIZE
;
122 if( data
.size() - pos
< T_BLOCKSIZE
)
123 size
= data
.size() - pos
;
125 memcpy(block
, data
.data() + pos
, size
);
127 if( tar_block_write(m_tar
, block
) != T_BLOCKSIZE
) {
128 return False(_("Unable to write block"), errno
);
135 /// Reads next available file into data, filling tarpath with
136 /// internal filename from tarball.
137 bool TarFile::ReadNextFile(std::string
&tarpath
, std::string
&data
)
143 // read next tar file header... skip all directories
145 if( th_read(m_tar
) != 0 ) {
146 // this is not necessarily an error, as it could just
147 // be the end of file, so a simple false is good here,
148 // don't throw an exception
152 } while( TH_ISDIR(m_tar
) );
154 // write standard file header
155 if( !TH_ISREG(m_tar
) ) {
156 return False(_("Only regular files are supported inside a tarball."));
159 char *pathname
= th_get_pathname(m_tar
);
162 // FIXME (leak) - someday, when all distros use a patched version of
163 // libtar, we may be able to avoid this memory leak, but
164 // th_get_pathname() does not consistently return a user-freeable
165 // string on all distros.
167 // See the following links for more information:
168 // https://bugs.launchpad.net/ubuntu/+source/libtar/+bug/41804
169 // https://lists.feep.net:8080/pipermail/libtar/2006-April/000222.html
172 size_t size
= th_get_size(m_tar
);
174 // read the data in blocks until finished
175 char block
[T_BLOCKSIZE
];
176 for( size_t pos
= 0; pos
< size
; pos
+= T_BLOCKSIZE
) {
177 memset(block
, 0, T_BLOCKSIZE
);
179 size_t readsize
= T_BLOCKSIZE
;
180 if( size
- pos
< T_BLOCKSIZE
)
181 readsize
= size
- pos
;
183 if( tar_block_read(m_tar
, block
) != T_BLOCKSIZE
) {
184 return False(_("Unable to read block"), errno
);
187 data
.append(block
, readsize
);
193 // FIXME - yes, this is blatant copying of code, but this is
194 // specific to Barry, to use a Barry::Data object instead of std::string
195 // in order to reduce copies.
196 bool TarFile::ReadNextFile(std::string
&tarpath
, Barry::Data
&data
)
202 // read next tar file header... skip all directories
204 if( th_read(m_tar
) != 0 ) {
205 // this is not necessarily an error, as it could just
206 // be the end of file, so a simple false is good here,
207 // don't throw an exception
211 } while( TH_ISDIR(m_tar
) );
213 // write standard file header
214 if( !TH_ISREG(m_tar
) ) {
215 return False(_("Only regular files are supported inside a tarball."));
218 char *pathname
= th_get_pathname(m_tar
);
221 // FIXME (leak) - someday, when all distros use a patched version of
222 // libtar, we may be able to avoid this memory leak, but
223 // th_get_pathname() does not consistently return a user-freeable
224 // string on all distros.
226 // See the following links for more information:
227 // https://bugs.launchpad.net/ubuntu/+source/libtar/+bug/41804
228 // https://lists.feep.net:8080/pipermail/libtar/2006-April/000222.html
231 size_t size
= th_get_size(m_tar
);
233 // read the data in blocks until finished
234 char block
[T_BLOCKSIZE
];
235 for( size_t pos
= 0; pos
< size
; pos
+= T_BLOCKSIZE
) {
236 memset(block
, 0, T_BLOCKSIZE
);
238 size_t readsize
= T_BLOCKSIZE
;
239 if( size
- pos
< T_BLOCKSIZE
)
240 readsize
= size
- pos
;
242 if( tar_block_read(m_tar
, block
) != T_BLOCKSIZE
) {
243 return False(_("Unable to read block"), errno
);
246 data
.Append(block
, readsize
);
252 /// Read next available filename, skipping the data if it is
254 bool TarFile::ReadNextFilenameOnly(std::string
&tarpath
)
259 // read next tar file header... skip all directories
261 if( th_read(m_tar
) != 0 ) {
262 // this is not necessarily an error, as it could just
263 // be the end of file, so a simple false is good here,
264 // don't throw an exception
268 } while( TH_ISDIR(m_tar
) );
270 // write standard file header
271 if( !TH_ISREG(m_tar
) ) {
272 return False(_("Only regular files are supported inside a tarball."));
275 char *pathname
= th_get_pathname(m_tar
);
277 // See above FIXME (leak) comment
280 if( tar_skip_regfile(m_tar
) != 0 ) {
281 return False(_("Unable to skip tar file"), errno
);
301 cout
<< "Writing test file..." << endl
;
302 reuse::TarFile
output("tartest.tar.gz", true, true, true);
304 for( int i
= 0; i
< 60; i
++ ) {
305 data
.append("0123456789", 10);
308 output
.AppendFile("path1/test1.txt", data
);
309 output
.AppendFile("path2/test2.txt", data
);
313 cout
<< "Reading test file..." << endl
;
314 reuse::TarFile
input("tartest.tar.gz", false, true, true);
315 std::string path
, incoming
;
317 while( input
.ReadNextFile(path
, incoming
) ) {
318 cout
<< "Read: " << path
320 << (( data
== incoming
) ? "equal" : "different")
326 unlink("tartest.tar.gz");
328 } catch( reuse::TarFile::TarError
&te
) {
329 cerr
<< te
.what() << endl
;