Barry debian version 0.18.5-1
[barry.git] / tools / btarcmp.cc
blob5f08eb8e8f22a77de85f0cdc4b9cc912852b3068
1 ///
2 /// \file btarcmp.cc
3 /// Compare / diff tool to analyze Barry backup tarballs
4 ///
6 /*
7 Copyright (C) 2012-2013, Net Direct Inc. (http://www.netdirect.ca/)
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include <barry/barry.h>
23 #include <barry/barrybackup.h>
25 #include <iostream>
26 #include <iomanip>
27 #include <tr1/memory>
28 #include <string>
29 #include <vector>
30 #include <map>
31 #include <algorithm>
32 #include <stdexcept>
34 #include "barrygetopt.h"
35 #include "util.h"
36 #include "i18n.h"
38 using namespace std;
39 using namespace std::tr1;
40 using namespace Barry;
43 Still TODO: should have the ability to copy all differing records
44 into another tarball, to function as a "patch", so
45 user can write it to his device to update only differing
46 records, or to store as a record of changes, etc.
48 void Usage()
50 int logical, major, minor;
51 const char *Version = Barry::Version(logical, major, minor);
53 cerr << string_vprintf(
54 // TRANSLATORS: the Using: string is followed by the Barry library version
55 // string.
56 _("btarcmp - Compare Barry backup tarballs\n"
57 " Copyright 2012-2013, Net Direct Inc. (http://www.netdirect.ca/)\n"
58 " Using: %s\n"
59 "\n"
60 " Usage: btarcmp [options...] tarball_0 tarball_1\n"
61 "\n"
62 " -b Use brief filename output\n"
63 " -d db Specify a specific database to compare. Can be used\n"
64 " multiple times. If not used at all, all databases are\n"
65 " compared.\n"
66 " -D db Specify a database name to skip. If both -d and -D are\n"
67 " used for the same database name, it will be skipped.\n"
68 " -h This help\n"
69 " -I cs International charset for string conversions\n"
70 " Valid values here are available with 'iconv --list'\n"
71 " -P Only compare records that can be parsed\n"
72 " This is the same as specifying -d for each database\n"
73 " listed with -S.\n"
74 " -S Show list of supported database parsers. Use twice\n"
75 " to show field names as well.\n"
76 " -v Show verbose diff output (twice to force hex output)\n"
77 "\n"),
78 Version)
79 << endl;
83 //////////////////////////////////////////////////////////////////////////////
84 // Utility functions and functors
86 bool DBDataCmp(const DBData &a, const DBData &b)
88 return a.GetUniqueId() < b.GetUniqueId();
91 bool UnknownCmp(const UnknownField &a, const UnknownField &b)
93 return a.type < b.type;
96 class DBDataIdCmp
98 uint32_t m_id;
100 public:
101 explicit DBDataIdCmp(uint32_t id)
102 : m_id(id)
106 bool operator()(const DBData &data) const
108 return data.GetUniqueId() == m_id;
112 void ChecksumDBData(const DBData &data, bool include_ids, std::string &sum)
114 Barry::SHA_CTX m_ctx;
116 SHA1_Init(&m_ctx);
118 if( include_ids ) {
119 SHA1_Update(&m_ctx, data.GetDBName().c_str(),
120 data.GetDBName().size());
122 uint8_t recType = data.GetRecType();
123 SHA1_Update(&m_ctx, &recType, sizeof(recType));
125 uint32_t uniqueId = data.GetUniqueId();
126 SHA1_Update(&m_ctx, &uniqueId, sizeof(uniqueId));
129 int len = data.GetData().GetSize() - data.GetOffset();
130 SHA1_Update(&m_ctx,
131 data.GetData().GetData() + data.GetOffset(), len);
133 unsigned char sha1[SHA_DIGEST_LENGTH];
134 SHA1_Final(sha1, &m_ctx);
136 ostringstream oss;
137 for( int i = 0; i < SHA_DIGEST_LENGTH; i++ ) {
138 oss << hex << setfill('0') << setw(2)
139 << (unsigned int) sha1[i];
141 sum = oss.str();
145 //////////////////////////////////////////////////////////////////////////////
146 // Parsed Compare class
148 class ParsedCompare
150 private:
151 const DBData &m_one, &m_two;
152 const IConverter *m_ic;
153 bool m_known_record;
154 std::string m_first_description;
156 public:
157 ParsedCompare(const DBData &one, const DBData &two,
158 const IConverter *ic = 0);
160 bool CanParse() const { return m_known_record; }
161 const std::string& GetDescription() const { return m_first_description;}
163 /// Returns true if differing fields found and displayed.
164 /// False if no differences found.
165 bool ShowDifferingFields();
168 ParsedCompare::ParsedCompare(const DBData &one,
169 const DBData &two,
170 const IConverter *ic)
171 : m_one(one)
172 , m_two(two)
173 , m_ic(ic)
174 , m_known_record(false)
176 #undef HANDLE_PARSER
177 #define HANDLE_PARSER(tname) \
178 else if( tname::GetDBName() == one.GetDBName() ) { \
179 m_known_record = true; \
180 tname a; \
181 ParseDBData(m_one, a, m_ic); \
182 m_first_description = a.GetDescription(); \
185 if( one.GetDBName() != two.GetDBName() ) {
186 throw logic_error(_("Different database types in ParsedCompare ctor!"));
188 // fall through and use the else's
189 ALL_KNOWN_PARSER_TYPES
192 template <class RecordT>
193 class FieldHandler
195 private:
196 const RecordT &m_one, &m_two;
197 mutable bool m_found_difference;
199 public:
200 FieldHandler(const RecordT &one, const RecordT &two)
201 : m_one(one)
202 , m_two(two)
203 , m_found_difference(false)
207 bool Differing() const { return m_found_difference; }
209 void operator()(EnumFieldBase<RecordT> *ep,
210 const FieldIdentity &id) const
212 if( ep->GetValue(m_one) == ep->GetValue(m_two) )
213 return;
215 m_found_difference = true;
216 cout << " " << id.Name << ":\n"
217 << " tar[0] = "
218 << ep->GetName(ep->GetValue(m_one))
219 << " (" << ep->GetValue(m_one) << ")\n"
220 << " tar[1] = "
221 << ep->GetName(ep->GetValue(m_two))
222 << " (" << ep->GetValue(m_two) << ")"
223 << endl;
226 void operator()(typename FieldHandle<RecordT>::PostalPointer pp,
227 const FieldIdentity &id) const
229 const std::string
230 &a = m_one.*(pp.m_PostalAddress).*(pp.m_PostalField),
231 &b = m_two.*(pp.m_PostalAddress).*(pp.m_PostalField);
233 if( a == b )
234 return;
236 m_found_difference = true;
237 cout << " " << id.Name << ":\n"
238 << " tar[0] = '" << a << "'\n"
239 << " tar[1] = '" << b << "'"
240 << endl;
243 void operator()(std::string RecordT::* mp, const FieldIdentity &id) const
245 if( m_one.*mp == m_two.*mp )
246 return;
248 m_found_difference = true;
249 cout << " " << id.Name << ":\n"
250 << " tar[0] = '"
251 << Cr2LfWrapper(m_one.*mp) << "'\n"
252 << " tar[1] = '"
253 << Cr2LfWrapper(m_two.*mp) << "'"
254 << endl;
257 void operator()(UnknownsType RecordT::* mp, const FieldIdentity &id) const
259 UnknownsType a = m_one.*mp, b = m_two.*mp;
261 sort(a.begin(), a.end(), UnknownCmp);
262 sort(b.begin(), b.end(), UnknownCmp);
264 if( a == b )
265 return;
267 m_found_difference = true;
268 cout << " " << id.Name << ":\n"
269 << " tar[0] = '" << a << "'\n"
270 << " tar[1] = '" << b << "'"
271 << endl;
274 template <class TypeT>
275 void operator()(TypeT RecordT::* mp, const FieldIdentity &id) const
277 if( m_one.*mp == m_two.*mp )
278 return;
280 m_found_difference = true;
281 cout << " " << id.Name << ":\n"
282 << " tar[0] = '" << m_one.*mp << "'\n"
283 << " tar[1] = '" << m_two.*mp << "'"
284 << endl;
288 template <class RecordT>
289 bool DoParsedCompare(const RecordT &a, const RecordT &b)
291 FieldHandler<RecordT> handler(a, b);
292 ForEachField(RecordT::GetFieldHandles(), handler);
293 return handler.Differing();
296 /// Returns true if differing fields found and displayed.
297 /// False if no differences found.
298 bool ParsedCompare::ShowDifferingFields()
300 #undef HANDLE_PARSER
301 #define HANDLE_PARSER(tname) \
302 else if( tname::GetDBName() == m_one.GetDBName() ) { \
303 tname a, b; \
304 ParseDBData(m_one, a, m_ic); \
305 ParseDBData(m_two, b, m_ic); \
306 return DoParsedCompare<tname>(a, b); \
309 if( !m_known_record ) {
310 return false;
313 ALL_KNOWN_PARSER_TYPES
315 else {
316 return false;
321 //////////////////////////////////////////////////////////////////////////////
322 // Main application class
324 class App
326 public:
327 typedef Barry::ConfigFile::DBListType DBListType;
328 typedef std::vector<Barry::DBData> DBDataList;
329 typedef std::map<std::string, DBDataList> DatabaseMap;
331 private:
332 DBListType m_compare_list;
333 DBListType m_skip_list;
334 DBListType m_valid_list; // this list is created during the
335 // database name compare... it holds
336 // all the names that exist in both
337 // maps, in sorted order
338 DatabaseMap m_tars[2];
339 std::string m_tarpaths[2]; // full filename with path
340 std::string m_tarfiles[2]; // just filename, no path; or brief mark
341 auto_ptr<IConverter> m_ic;
343 int m_main_return; // 0 - success
344 // 1 - low level error or logic error
345 // 2 - databases lists not the same
346 // 3 - a record was added or deleted
347 bool m_verbose;
348 bool m_always_hex;
349 bool m_sort_on_load; // if true, sort each database by
350 // Unique ID after loading from tarball
351 bool m_include_ids; // if true, include DBData IDs in SHA1
353 std::string m_last_dbname;
355 public:
356 App();
358 void LoadTarballs();
359 void CompareDatabaseNames();
360 void CompareData();
361 void Compare(const std::string &dbname);
362 void Compare(const std::string &dbname,
363 const DBDataList &one, const DBDataList &two);
364 void Compare(const DBData &one, const DBData &two);
366 bool Alike(DBDataList::const_iterator b1, DBDataList::const_iterator b2,
367 DBDataList::const_iterator e1, DBDataList::const_iterator e2);
368 void SearchCheck(DBDataList::const_iterator &b,
369 DBDataList::const_iterator &e, const DBDataList &opposite_list,
370 const std::string &action);
372 void ShowRecordDiff(const DBData &one, const DBData &two,
373 ParsedCompare &pc);
374 void DumpRecord(const DBData &data);
375 void ShowDatabaseHeader(const std::string &dbname);
376 void AddParsersToCompare();
378 // returns true if any of the items in Outputs needs a probe
379 int main(int argc, char *argv[]);
382 //////////////////////////////////////////////////////////////////////////////
383 // Memory storage parser
385 class StoreParser : public Barry::Parser
387 App::DatabaseMap &m_map;
389 public:
390 explicit StoreParser(App::DatabaseMap &map)
391 : m_map(map)
395 virtual void ParseRecord(const DBData &data, const IConverter *ic)
397 m_map[data.GetDBName()].push_back(data);
402 //////////////////////////////////////////////////////////////////////////////
403 // Misc helpers dependent on App
405 bool IdExists(const App::DBDataList &list, uint32_t id)
407 return find_if(list.begin(), list.end(), DBDataIdCmp(id)) != list.end();
410 //////////////////////////////////////////////////////////////////////////////
411 // Member function definitions
413 App::App()
414 : m_main_return(0)
415 , m_verbose(false)
416 , m_always_hex(false)
417 , m_sort_on_load(true)
418 , m_include_ids(true)
422 void App::AddParsersToCompare()
424 #undef HANDLE_PARSER
425 #define HANDLE_PARSER(tname) \
426 m_compare_list.push_back(tname::GetDBName());
428 ALL_KNOWN_PARSER_TYPES
431 void App::LoadTarballs()
433 for( int i = 0; i < 2; i++ ) {
434 // load data into memory
435 Restore builder(m_tarpaths[i]);
436 StoreParser parser(m_tars[i]);
438 Pipe pipe(builder);
439 pipe.PumpFile(parser, m_ic.get());
441 // sort each database's record data by UniqueId
442 for( DatabaseMap::iterator b = m_tars[i].begin();
443 b != m_tars[i].end();
444 ++b )
446 if( m_sort_on_load )
447 sort(b->second.begin(), b->second.end(), DBDataCmp);
452 void App::CompareDatabaseNames()
454 for( int i = 1; i >= 0; i-- ) {
455 int other = i == 0 ? 1 : 0;
457 DatabaseMap::const_iterator b = m_tars[i].begin(), match;
458 for( ; b != m_tars[i].end(); ++b ) {
459 match = m_tars[other].find(b->first);
460 if( match == m_tars[other].end() ) {
461 cout << m_tarfiles[other] << _(": has no database '") << b->first << "'" << endl;
462 m_main_return = 2;
464 else {
465 if( !m_valid_list.IsSelected(b->first) ) {
466 m_valid_list.push_back(b->first);
472 // sort the valid list
473 sort(m_valid_list.begin(), m_valid_list.end());
474 // cout << m_valid_list.size() << " valid database names found." << endl;
477 void App::CompareData()
479 DBListType::const_iterator valid = m_valid_list.begin();
480 for( ; valid != m_valid_list.end(); ++valid ) {
481 // if m_compare_list contains items, then only compare
482 // if this database is present in the list
483 if( m_compare_list.size() && !m_compare_list.IsSelected(*valid) )
484 continue;
486 // check if we should skip this database
487 if( m_skip_list.IsSelected(*valid) )
488 continue;
490 // all's well so far... compare!
491 Compare(*valid);
495 void App::Compare(const std::string &dbname)
497 DatabaseMap::const_iterator tar[2];
498 tar[0] = m_tars[0].find(dbname);
499 tar[1] = m_tars[1].find(dbname);
501 if( tar[0] == m_tars[0].end() || tar[1] == m_tars[1].end() )
502 throw logic_error(_("Comparing non-existant database!") + dbname);
504 Compare(dbname, tar[0]->second, tar[1]->second);
507 void App::Compare(const std::string &dbname,
508 const DBDataList &one,
509 const DBDataList &two)
511 DBDataList::const_iterator
512 b1 = one.begin(), e1 = one.end(), // begin/end for one
513 b2 = two.begin(), e2 = two.end(), // begin/end for two
514 s1, s2; // search markers
516 // if IDs are alike, compare
517 // if not alike, then for each b1 and b2, do:
518 // search for id in opposite list
519 // if id found in opposite list, we're done, leave for next match
520 // if id not found, then entry has either been deleted or added
522 // NOTE: this algorithm assumes that both one and two are sorted!
523 while( b1 != e1 || b2 != e2 ) {
524 if( Alike(b1, b2, e1, e2 ) ) {
525 Compare(*b1, *b2);
526 ++b1;
527 ++b2;
528 continue;
530 else {
531 // SearchCheck increments iterators if needed
532 SearchCheck(b1, e1, two, _("record has been deleted in "));
533 SearchCheck(b2, e2, one, _("record has been added in "));
538 void App::Compare(const DBData &one, const DBData &two)
540 // make sure one and two are of the same database, or throw
541 if( one.GetDBName() != two.GetDBName() )
542 throw logic_error(_("Tried to compare records from different databases: ") + one.GetDBName() + " & " + two.GetDBName());
544 // always compare the sums of the data first, and if match, done
545 string sum1, sum2;
546 ChecksumDBData(one, m_include_ids, sum1);
547 ChecksumDBData(two, m_include_ids, sum2);
548 if( sum1 == sum2 )
549 return; // done
551 // records are different, print concise report
552 ShowDatabaseHeader(one.GetDBName());
554 // if different, check if there's a parser available for this data
555 // if not, display that these records differ, dump verbose if
556 // needed, and done
557 ParsedCompare pc(one, two, m_ic.get());
558 ShowRecordDiff(one, two, pc);
561 void App::ShowRecordDiff(const DBData &one,
562 const DBData &two,
563 ParsedCompare &pc)
565 if( !pc.CanParse() ) {
566 // if can't parse, print:
567 // UniqueID: sizes (one vs. two), X bytes differ
568 // then the differing fields
569 cout << " 0x" << hex << one.GetUniqueId() << _(": differs: ")
570 << dec
571 << _("sizes (") << one.GetData().GetSize()
572 << " vs. " << two.GetData().GetSize()
573 << _("), SHA1 sums differ")
574 << endl;
576 else {
577 // otherwise, print:
578 // UniqueID: sizes (one vs. two), (custom display name)
579 cout << " 0x" << hex << one.GetUniqueId() << _(": differs: ")
580 << dec
581 << _("sizes (") << one.GetData().GetSize()
582 << " vs. " << two.GetData().GetSize()
583 << "), "
584 << pc.GetDescription()
585 << endl;
587 if( !pc.ShowDifferingFields() ) {
588 // no difference found...
589 cout << _("No differences found in parsed records, but SHA1 sums differ.") << endl;
593 // if verbose and parser is null, or if always_hex,
594 // then display a (messy?) hex diff of the raw data
595 if( (m_verbose && !pc.CanParse()) || m_always_hex ) {
596 cout << _(" Hex diff of record:") << endl;
597 cout << Diff(one.GetData(), two.GetData()) << endl;
601 bool App::Alike(DBDataList::const_iterator b1,
602 DBDataList::const_iterator b2,
603 DBDataList::const_iterator e1,
604 DBDataList::const_iterator e2)
606 if( b1 == e1 || b2 == e2 )
607 return false;
608 return b1->GetUniqueId() == b2->GetUniqueId();
611 std::string GetDBDescription(const DBData &data, const IConverter *ic)
613 string desc;
615 // try to parse it
616 #undef HANDLE_PARSER
617 #define HANDLE_PARSER(tname) \
618 if( data.GetDBName() == tname::GetDBName() ) { \
619 tname rec; \
620 ParseDBData(data, rec, ic); \
621 return rec.GetDescription(); \
624 ALL_KNOWN_PARSER_TYPES
626 return desc;
629 void App::SearchCheck(DBDataList::const_iterator &b,
630 DBDataList::const_iterator &e,
631 const DBDataList &opposite_list,
632 const std::string &action)
634 // nothing to do if we're at end of list
635 if( b == e )
636 return;
638 // if id is found in opposite list, we're done!
639 // leave the iterator as-is for the next cycle's match
640 if( IdExists(opposite_list, b->GetUniqueId()) )
641 return;
643 // id not found, so set return value
644 m_main_return = 3;
646 // if id not found, then entry has either been deleted or added
647 // (action says which one), and we need to display the diff
648 // and advance the iterator
649 ShowDatabaseHeader(b->GetDBName());
650 cout << " 0x" << hex << b->GetUniqueId() << ": " << action << "tar[1]";
651 string desc = GetDBDescription(*b, m_ic.get());
652 if( desc.size() ) {
653 cout << ": " << desc << endl;
655 else {
656 cout << endl;
658 if( m_verbose ) {
659 DumpRecord(*b);
662 // advance!
663 ++b;
666 void App::DumpRecord(const DBData &data)
668 #undef HANDLE_PARSER
669 #define HANDLE_PARSER(tname) \
670 if( data.GetDBName() == tname::GetDBName() ) { \
671 tname rec; \
672 ParseDBData(data, rec, m_ic.get()); \
673 cout << rec << endl; \
674 return; \
677 ALL_KNOWN_PARSER_TYPES
679 // if we get here, it's not a known record, so just dump the hex
680 cout << data.GetData() << endl;
683 void App::ShowDatabaseHeader(const std::string &dbname)
685 if( dbname != m_last_dbname ) {
686 m_last_dbname = dbname;
687 cout << _("In database: ") << dbname << endl;
692 int App::main(int argc, char *argv[])
694 bool brief = false;
695 bool show_parsers = false, show_fields = false;
696 string iconvCharset;
698 // process command line options
699 for(;;) {
700 int cmd = getopt(argc, argv, "bd:D:hI:PSv");
701 if( cmd == -1 )
702 break;
704 switch( cmd )
706 case 'b': // use brief output
707 brief = true;
708 break;
710 case 'd': // database name to compare
711 m_compare_list.push_back(optarg);
712 break;
714 case 'D': // skip database to compare
715 m_skip_list.push_back(optarg);
716 break;
718 case 'P': // only compare parseable records
719 AddParsersToCompare();
720 break;
722 case 'S': // show parsers and builders
723 if( show_parsers )
724 show_fields = true;
725 else
726 show_parsers = true;
727 break;
729 case 'I': // international charset (iconv)
730 iconvCharset = optarg;
731 break;
733 case 'v': // verbose
734 if( !m_verbose )
735 m_verbose = true;
736 else
737 m_always_hex = true;
738 break;
740 case 'h': // help
741 default:
742 Usage();
743 return 0;
747 if( show_parsers ) {
748 ShowParsers(show_fields, false);
749 return 0;
752 if( (optind + 2) > argc ) {
753 Usage();
754 return 0;
757 // save the tarball filenames for later processing
758 // start out assuming both arguments are simple, no path filenames
759 m_tarpaths[0] = m_tarfiles[0] = argv[optind];
760 m_tarpaths[1] = m_tarfiles[1] = argv[optind+1];
762 if( brief ) {
763 // user wants brief markers... filenames must be huge! :-)
764 m_tarfiles[0] = "tar[0]";
765 m_tarfiles[1] = "tar[1]";
767 else {
768 // attempt to trim paths to filenames only
769 if( m_tarpaths[0].find('/') != string::npos )
770 m_tarfiles[0] = m_tarpaths[0].substr(m_tarpaths[0].rfind('/') + 1);
771 if( m_tarpaths[1].find('/') != string::npos )
772 m_tarfiles[1] = m_tarpaths[1].substr(m_tarpaths[1].rfind('/') + 1);
774 // double check... don't want both markers the same:
775 if( m_tarfiles[0] == m_tarfiles[1] ) {
776 // doh... back to where we started
777 m_tarfiles[0] = m_tarpaths[0];
778 m_tarfiles[1] = m_tarpaths[1];
782 // display key for user
783 cout << "tar[0] = " << m_tarpaths[0] << endl;
784 cout << "tar[1] = " << m_tarpaths[1] << endl;
786 // initialize the Barry library
787 Barry::Init(false);
789 // create an IConverter object if needed
790 if( iconvCharset.size() ) {
791 m_ic.reset( new IConverter(iconvCharset.c_str(), true) );
794 // load both tarballs into memory for easy comparisons
795 LoadTarballs();
797 // compare plain list of database names first
798 CompareDatabaseNames();
800 // compare the actual data
801 CompareData();
803 return m_main_return;
806 int main(int argc, char *argv[])
808 INIT_I18N(PACKAGE);
810 try {
811 App app;
812 return app.main(argc, argv);
814 catch( std::exception &e ) {
815 cerr << _("Exception: ") << e.what() << endl;
816 return 1;