From 4335e050773343dd67ba6f80926f9123b8549614 Mon Sep 17 00:00:00 2001 From: Chris Frey Date: Tue, 30 Nov 2010 21:17:58 -0500 Subject: [PATCH] lib: added Data version of TarFile::ReadNextFile() (optimization) This is in an effort to reduce data copies in the code, but it causes code duplication. It is probably better to turn TarFile into a complete Barry namespace class someday, using Data objects instead of std::string, instead of leaving it as a generic reuse library class. See FIXME notes in the code. --- ChangeLog | 1 + src/tarfile.cc | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/tarfile.h | 5 +++++ 3 files changed, 64 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8d22a30c..cd3a4dd9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,7 @@ Release: version 0.17.0 - 2010/01/?? 2010/11/30 - lib: fixed but in Messages parser, which used CheckSize() inappropriately + - lib: added Data version of TarFile::ReadNextFile() (optimization) 2010/11/16 - doc: clarified btool -X in its man page 2010/11/09 diff --git a/src/tarfile.cc b/src/tarfile.cc index da0d07d8..8d3a943d 100644 --- a/src/tarfile.cc +++ b/src/tarfile.cc @@ -20,6 +20,7 @@ */ #include "tarfile.h" +#include "data.h" #include #include @@ -186,6 +187,63 @@ bool TarFile::ReadNextFile(std::string &tarpath, std::string &data) return true; } +// FIXME - yes, this is blatant copying of code, but this is +// specific to Barry, to use a Barry::Data object instead of std::string +// in order to reduce copies. +bool TarFile::ReadNextFile(std::string &tarpath, Barry::Data &data) +{ + // start fresh + tarpath.clear(); + data.QuickZap(); + + // read next tar file header + if( th_read(m_tar) != 0 ) { + // this is not necessarily an error, as it could just + // be the end of file, so a simple false is good here, + // don't throw an exception + m_last_error = ""; + return false; + } + + // write standard file header + if( !TH_ISREG(m_tar) ) { + return False("Only regular files are supported inside a tarball."); + } + + char *pathname = th_get_pathname(m_tar); + tarpath = pathname; + // + // FIXME (leak) - someday, when all distros use a patched version of + // libtar, we may be able to avoid this memory leak, but + // th_get_pathname() does not consistently return a user-freeable + // string on all distros. + // + // See the following links for more information: + // https://bugs.launchpad.net/ubuntu/+source/libtar/+bug/41804 + // https://lists.feep.net:8080/pipermail/libtar/2006-April/000222.html + // +// free(pathname); + size_t size = th_get_size(m_tar); + + // read the data in blocks until finished + char block[T_BLOCKSIZE]; + for( size_t pos = 0; pos < size; pos += T_BLOCKSIZE ) { + memset(block, 0, T_BLOCKSIZE); + + size_t readsize = T_BLOCKSIZE; + if( size - pos < T_BLOCKSIZE ) + readsize = size - pos; + + if( tar_block_read(m_tar, block) != T_BLOCKSIZE ) { + return False("Unable to read block", errno); + } + + data.Append(block, readsize); + } + + return true; +} + /// Read next available filename, skipping the data if it is /// a regular file bool TarFile::ReadNextFilenameOnly(std::string &tarpath) diff --git a/src/tarfile.h b/src/tarfile.h index f48a4611..da945e52 100644 --- a/src/tarfile.h +++ b/src/tarfile.h @@ -27,6 +27,10 @@ #include #include +namespace Barry { + class Data; +} + namespace reuse { // @@ -74,6 +78,7 @@ public: /// internal filename from tarball. /// Returns false on end of archive. bool ReadNextFile(std::string &tarpath, std::string &data); + bool ReadNextFile(std::string &tarpath, Barry::Data &data); /// Read next available filename, skipping the data if it is /// a regular file -- 2.11.4.GIT