tools: added some optional bools in btarcmp's App for future functionality
[barry.git] / tools / btarcmp.cc
blob8c9e88638401ca5d5b20052f601065dd873753b6
1 ///
2 /// \file btarcmp.cc
3 /// Compare / diff tool to analyze Barry backup tarballs
4 ///
6 /*
7 Copyright (C) 2012, Net Direct Inc. (http://www.netdirect.ca/)
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include <barry/barry.h>
23 #include <barry/barrybackup.h>
25 #include <iostream>
26 #include <iomanip>
27 #include <tr1/memory>
28 #include <string>
29 #include <vector>
30 #include <map>
31 #include <algorithm>
32 #include <stdexcept>
34 #include "barrygetopt.h"
36 using namespace std;
37 using namespace std::tr1;
38 using namespace Barry;
41 Still TODO: should have the ability to copy all differing records
42 into another tarball, to function as a "patch", so
43 user can write it to his device to update only differing
44 records, or to store as a record of changes, etc.
46 void Usage()
48 int logical, major, minor;
49 const char *Version = Barry::Version(logical, major, minor);
51 cerr
52 << "btarcmp - Compare Barry backup tarballs\n"
53 << " Copyright 2012, Net Direct Inc. (http://www.netdirect.ca/)\n"
54 << " Using: " << Version << "\n"
55 << "\n"
56 << " Usage: btarcmp [options...] tarball_0 tarball_1\n"
57 << "\n"
58 << " -b Use brief filename output\n"
59 << " -d db Specify a specific database to compare. Can be used\n"
60 << " multiple times. If not used at all, all databases are\n"
61 << " compared.\n"
62 << " -D db Specify a database name to skip. If both -d and -D are\n"
63 << " used for the same database name, it will be skipped.\n"
64 << " -h This help\n"
65 << " -I cs International charset for string conversions\n"
66 << " Valid values here are available with 'iconv --list'\n"
67 << " -P Only compare records that can be parsed\n"
68 << " This is the same as specifying -d for each database\n"
69 << " listed with -S.\n"
70 << " -S Show list of supported database parsers\n"
71 << " -v Show verbose diff output (twice to force hex output)\n"
72 << "\n"
73 << endl;
77 //////////////////////////////////////////////////////////////////////////////
78 // Utility functions and functors
80 bool DBDataCmp(const DBData &a, const DBData &b)
82 return a.GetUniqueId() < b.GetUniqueId();
85 bool UnknownCmp(const UnknownField &a, const UnknownField &b)
87 return a.type < b.type;
90 class DBDataIdCmp
92 uint32_t m_id;
94 public:
95 explicit DBDataIdCmp(uint32_t id)
96 : m_id(id)
100 bool operator()(const DBData &data) const
102 return data.GetUniqueId() == m_id;
106 void ChecksumDBData(const DBData &data, bool include_ids, std::string &sum)
108 Barry::SHA_CTX m_ctx;
110 SHA1_Init(&m_ctx);
112 if( include_ids ) {
113 SHA1_Update(&m_ctx, data.GetDBName().c_str(),
114 data.GetDBName().size());
116 uint8_t recType = data.GetRecType();
117 SHA1_Update(&m_ctx, &recType, sizeof(recType));
119 uint32_t uniqueId = data.GetUniqueId();
120 SHA1_Update(&m_ctx, &uniqueId, sizeof(uniqueId));
123 int len = data.GetData().GetSize() - data.GetOffset();
124 SHA1_Update(&m_ctx,
125 data.GetData().GetData() + data.GetOffset(), len);
127 unsigned char sha1[SHA_DIGEST_LENGTH];
128 SHA1_Final(sha1, &m_ctx);
130 ostringstream oss;
131 for( int i = 0; i < SHA_DIGEST_LENGTH; i++ ) {
132 oss << hex << setfill('0') << setw(2)
133 << (unsigned int) sha1[i];
135 sum = oss.str();
139 //////////////////////////////////////////////////////////////////////////////
140 // Parsed Compare class
142 class ParsedCompare
144 private:
145 const DBData &m_one, &m_two;
146 const IConverter *m_ic;
147 bool m_known_record;
148 std::string m_first_description;
150 public:
151 ParsedCompare(const DBData &one, const DBData &two,
152 const IConverter *ic = 0);
154 bool CanParse() const { return m_known_record; }
155 const std::string& GetDescription() const { return m_first_description;}
157 /// Returns true if differing fields found and displayed.
158 /// False if no differences found.
159 bool ShowDifferingFields();
162 ParsedCompare::ParsedCompare(const DBData &one,
163 const DBData &two,
164 const IConverter *ic)
165 : m_one(one)
166 , m_two(two)
167 , m_ic(ic)
168 , m_known_record(false)
170 if( one.GetDBName() != two.GetDBName() )
171 throw logic_error("Different database types in ParsedCompare ctor!");
173 #undef HANDLE_PARSER
174 #define HANDLE_PARSER(tname) \
175 else if( tname::GetDBName() == one.GetDBName() ) { \
176 m_known_record = true; \
177 tname a; \
178 ParseDBData(m_one, a, m_ic); \
179 m_first_description = a.GetDescription(); \
182 if( m_known_record ) {
184 ALL_KNOWN_PARSER_TYPES
187 template <class RecordT>
188 class FieldHandler
190 private:
191 const RecordT &m_one, &m_two;
192 mutable bool m_found_difference;
194 public:
195 FieldHandler(const RecordT &one, const RecordT &two)
196 : m_one(one)
197 , m_two(two)
198 , m_found_difference(false)
202 bool Differing() const { return m_found_difference; }
204 void operator()(EnumFieldBase<RecordT> *ep,
205 const FieldIdentity &id) const
207 if( ep->GetValue(m_one) == ep->GetValue(m_two) )
208 return;
210 m_found_difference = true;
211 cout << " " << id.Name << ":\n"
212 << " tar[0] = "
213 << ep->GetName(ep->GetValue(m_one))
214 << " (" << ep->GetValue(m_one) << ")\n"
215 << " tar[1] = "
216 << ep->GetName(ep->GetValue(m_two))
217 << " (" << ep->GetValue(m_two) << ")"
218 << endl;
221 void operator()(typename FieldHandle<RecordT>::PostalPointer pp,
222 const FieldIdentity &id) const
224 const std::string
225 &a = m_one.*(pp.m_PostalAddress).*(pp.m_PostalField),
226 &b = m_two.*(pp.m_PostalAddress).*(pp.m_PostalField);
228 if( a == b )
229 return;
231 m_found_difference = true;
232 cout << " " << id.Name << ":\n"
233 << " tar[0] = '" << a << "'\n"
234 << " tar[1] = '" << b << "'"
235 << endl;
238 void operator()(std::string RecordT::* mp, const FieldIdentity &id) const
240 if( m_one.*mp == m_two.*mp )
241 return;
243 m_found_difference = true;
244 cout << " " << id.Name << ":\n"
245 << " tar[0] = '"
246 << Cr2LfWrapper(m_one.*mp) << "'\n"
247 << " tar[1] = '"
248 << Cr2LfWrapper(m_two.*mp) << "'"
249 << endl;
252 void operator()(UnknownsType RecordT::* mp, const FieldIdentity &id) const
254 UnknownsType a = m_one.*mp, b = m_two.*mp;
256 sort(a.begin(), a.end(), UnknownCmp);
257 sort(b.begin(), b.end(), UnknownCmp);
259 if( a == b )
260 return;
262 m_found_difference = true;
263 cout << " " << id.Name << ":\n"
264 << " tar[0] = '" << a << "'\n"
265 << " tar[1] = '" << b << "'"
266 << endl;
269 template <class TypeT>
270 void operator()(TypeT RecordT::* mp, const FieldIdentity &id) const
272 if( m_one.*mp == m_two.*mp )
273 return;
275 m_found_difference = true;
276 cout << " " << id.Name << ":\n"
277 << " tar[0] = '" << m_one.*mp << "'\n"
278 << " tar[1] = '" << m_two.*mp << "'"
279 << endl;
283 template <class RecordT>
284 bool DoParsedCompare(const RecordT &a, const RecordT &b)
286 FieldHandler<RecordT> handler(a, b);
287 ForEachField(RecordT::GetFieldHandles(), handler);
288 return handler.Differing();
291 /// Returns true if differing fields found and displayed.
292 /// False if no differences found.
293 bool ParsedCompare::ShowDifferingFields()
295 #undef HANDLE_PARSER
296 #define HANDLE_PARSER(tname) \
297 else if( tname::GetDBName() == m_one.GetDBName() ) { \
298 tname a, b; \
299 ParseDBData(m_one, a, m_ic); \
300 ParseDBData(m_two, b, m_ic); \
301 return DoParsedCompare<tname>(a, b); \
304 if( !m_known_record ) {
305 return false;
308 ALL_KNOWN_PARSER_TYPES
310 else {
311 return false;
316 //////////////////////////////////////////////////////////////////////////////
317 // Main application class
319 class App
321 public:
322 typedef Barry::ConfigFile::DBListType DBListType;
323 typedef std::vector<Barry::DBData> DBDataList;
324 typedef std::map<std::string, DBDataList> DatabaseMap;
326 private:
327 DBListType m_compare_list;
328 DBListType m_skip_list;
329 DBListType m_valid_list; // this list is created during the
330 // database name compare... it holds
331 // all the names that exist in both
332 // maps, in sorted order
333 DatabaseMap m_tars[2];
334 std::string m_tarpaths[2]; // full filename with path
335 std::string m_tarfiles[2]; // just filename, no path; or brief mark
336 auto_ptr<IConverter> m_ic;
338 int m_main_return; // 0 - success
339 // 1 - low level error or logic error
340 // 2 - databases lists not the same
341 // 3 - a record was added or deleted
342 bool m_verbose;
343 bool m_always_hex;
344 bool m_sort_on_load; // if true, sort each database by
345 // Unique ID after loading from tarball
346 bool m_include_ids; // if true, include DBData IDs in SHA1
348 std::string m_last_dbname;
350 public:
351 App();
353 void LoadTarballs();
354 void CompareDatabaseNames();
355 void CompareData();
356 void Compare(const std::string &dbname);
357 void Compare(const std::string &dbname,
358 const DBDataList &one, const DBDataList &two);
359 void Compare(const DBData &one, const DBData &two);
361 bool Alike(DBDataList::const_iterator b1, DBDataList::const_iterator b2,
362 DBDataList::const_iterator e1, DBDataList::const_iterator e2);
363 void SearchCheck(DBDataList::const_iterator &b,
364 DBDataList::const_iterator &e, const DBDataList &opposite_list,
365 const std::string &action);
367 void ShowRecordDiff(const DBData &one, const DBData &two,
368 ParsedCompare &pc);
369 void DumpRecord(const DBData &data);
370 void ShowDatabaseHeader(const std::string &dbname);
371 void AddParsersToCompare();
373 // returns true if any of the items in Outputs needs a probe
374 int main(int argc, char *argv[]);
376 static void ShowParsers();
379 //////////////////////////////////////////////////////////////////////////////
380 // Memory storage parser
382 class StoreParser : public Barry::Parser
384 App::DatabaseMap &m_map;
386 public:
387 explicit StoreParser(App::DatabaseMap &map)
388 : m_map(map)
392 virtual void ParseRecord(const DBData &data, const IConverter *ic)
394 m_map[data.GetDBName()].push_back(data);
399 //////////////////////////////////////////////////////////////////////////////
400 // Misc helpers dependent on App
402 bool IdExists(const App::DBDataList &list, uint32_t id)
404 return find_if(list.begin(), list.end(), DBDataIdCmp(id)) != list.end();
407 //////////////////////////////////////////////////////////////////////////////
408 // Member function definitions
410 App::App()
411 : m_main_return(0)
412 , m_verbose(false)
413 , m_always_hex(false)
414 , m_sort_on_load(true)
415 , m_include_ids(true)
419 void App::ShowParsers()
421 cout << "Supported Database parsers:\n";
423 #undef HANDLE_PARSER
424 #define HANDLE_PARSER(tname) \
426 cout << " " << tname::GetDBName() << "\n "; \
427 std::vector<Barry::FieldHandle<tname> >::const_iterator \
428 fhi = tname::GetFieldHandles().begin(), \
429 fhe = tname::GetFieldHandles().end(); \
430 for( int count = 0, len = 6; fhi != fhe; ++fhi, ++count ) { \
431 if( count ) { \
432 cout << ", "; \
433 len += 2; \
435 std::string name = fhi->GetIdentity().Name; \
436 if( len + name.size() >= 75 ) { \
437 cout << "\n "; \
438 len = 6; \
440 cout << name; \
441 len += name.size(); \
443 cout << "\n"; \
446 ALL_KNOWN_PARSER_TYPES
448 cout << endl;
451 void App::AddParsersToCompare()
453 #undef HANDLE_PARSER
454 #define HANDLE_PARSER(tname) \
455 m_compare_list.push_back(tname::GetDBName());
457 ALL_KNOWN_PARSER_TYPES
460 void App::LoadTarballs()
462 for( int i = 0; i < 2; i++ ) {
463 // load data into memory
464 Restore builder(m_tarpaths[i]);
465 StoreParser parser(m_tars[i]);
467 Pipe pipe(builder);
468 pipe.PumpFile(parser, m_ic.get());
470 // sort each database's record data by UniqueId
471 for( DatabaseMap::iterator b = m_tars[i].begin();
472 b != m_tars[i].end();
473 ++b )
475 if( m_sort_on_load )
476 sort(b->second.begin(), b->second.end(), DBDataCmp);
481 void App::CompareDatabaseNames()
483 for( int i = 1; i >= 0; i-- ) {
484 int other = i == 0 ? 1 : 0;
486 DatabaseMap::const_iterator b = m_tars[i].begin(), match;
487 for( ; b != m_tars[i].end(); ++b ) {
488 match = m_tars[other].find(b->first);
489 if( match == m_tars[other].end() ) {
490 cout << m_tarfiles[other] << ": has no database '" << b->first << "'" << endl;
491 m_main_return = 2;
493 else {
494 if( !m_valid_list.IsSelected(b->first) ) {
495 m_valid_list.push_back(b->first);
501 // sort the valid list
502 sort(m_valid_list.begin(), m_valid_list.end());
503 // cout << m_valid_list.size() << " valid database names found." << endl;
506 void App::CompareData()
508 DBListType::const_iterator valid = m_valid_list.begin();
509 for( ; valid != m_valid_list.end(); ++valid ) {
510 // if m_compare_list contains items, then only compare
511 // if this database is present in the list
512 if( m_compare_list.size() && !m_compare_list.IsSelected(*valid) )
513 continue;
515 // check if we should skip this database
516 if( m_skip_list.IsSelected(*valid) )
517 continue;
519 // all's well so far... compare!
520 Compare(*valid);
524 void App::Compare(const std::string &dbname)
526 DatabaseMap::const_iterator tar[2];
527 tar[0] = m_tars[0].find(dbname);
528 tar[1] = m_tars[1].find(dbname);
530 if( tar[0] == m_tars[0].end() || tar[1] == m_tars[1].end() )
531 throw logic_error("Comparing non-existant database!" + dbname);
533 Compare(dbname, tar[0]->second, tar[1]->second);
536 void App::Compare(const std::string &dbname,
537 const DBDataList &one,
538 const DBDataList &two)
540 DBDataList::const_iterator
541 b1 = one.begin(), e1 = one.end(), // begin/end for one
542 b2 = two.begin(), e2 = two.end(), // begin/end for two
543 s1, s2; // search markers
545 // if IDs are alike, compare
546 // if not alike, then for each b1 and b2, do:
547 // search for id in opposite list
548 // if id found in opposite list, we're done, leave for next match
549 // if id not found, then entry has either been deleted or added
551 // NOTE: this algorithm assumes that both one and two are sorted!
552 while( b1 != e1 || b2 != e2 ) {
553 if( Alike(b1, b2, e1, e2 ) ) {
554 Compare(*b1, *b2);
555 ++b1;
556 ++b2;
557 continue;
559 else {
560 // SearchCheck increments iterators if needed
561 SearchCheck(b1, e1, two, "deleted");
562 SearchCheck(b2, e2, one, "added");
567 void App::Compare(const DBData &one, const DBData &two)
569 // make sure one and two are of the same database, or throw
570 if( one.GetDBName() != two.GetDBName() )
571 throw logic_error("Tried to compare records from different databases: " + one.GetDBName() + ", and " + two.GetDBName());
573 // always compare the sums of the data first, and if match, done
574 string sum1, sum2;
575 ChecksumDBData(one, m_include_ids, sum1);
576 ChecksumDBData(two, m_include_ids, sum2);
577 if( sum1 == sum2 )
578 return; // done
580 // records are different, print concise report
581 ShowDatabaseHeader(one.GetDBName());
583 // if different, check if there's a parser available for this data
584 // if not, display that these records differ, dump verbose if
585 // needed, and done
586 ParsedCompare pc(one, two, m_ic.get());
587 ShowRecordDiff(one, two, pc);
590 void App::ShowRecordDiff(const DBData &one,
591 const DBData &two,
592 ParsedCompare &pc)
594 if( !pc.CanParse() ) {
595 // if can't parse, print:
596 // UniqueID: sizes (one vs. two), X bytes differ
597 // then the differing fields
598 cout << " 0x" << hex << one.GetUniqueId() << ": differs: "
599 << dec
600 << "sizes (" << one.GetData().GetSize()
601 << " vs. " << two.GetData().GetSize()
602 << "), SHA1 sums differ"
603 << endl;
605 else {
606 // otherwise, print:
607 // UniqueID: sizes (one vs. two), (custom display name)
608 cout << " 0x" << hex << one.GetUniqueId() << ": differs: "
609 << dec
610 << "sizes (" << one.GetData().GetSize()
611 << " vs. " << two.GetData().GetSize()
612 << "), "
613 << pc.GetDescription()
614 << endl;
616 if( !pc.ShowDifferingFields() ) {
617 // no difference found...
618 cout << "No differences found in parsed records, but SHA1 sums differ." << endl;
622 // if verbose and parser is null, or if always_hex,
623 // then display a (messy?) hex diff of the raw data
624 if( (m_verbose && !pc.CanParse()) || m_always_hex ) {
625 cout << " Hex diff of record:" << endl;
626 cout << Diff(one.GetData(), two.GetData()) << endl;
630 bool App::Alike(DBDataList::const_iterator b1,
631 DBDataList::const_iterator b2,
632 DBDataList::const_iterator e1,
633 DBDataList::const_iterator e2)
635 if( b1 == e1 || b2 == e2 )
636 return false;
637 return b1->GetUniqueId() == b2->GetUniqueId();
640 std::string GetDBDescription(const DBData &data, const IConverter *ic)
642 string desc;
644 // try to parse it
645 #undef HANDLE_PARSER
646 #define HANDLE_PARSER(tname) \
647 if( data.GetDBName() == tname::GetDBName() ) { \
648 tname rec; \
649 ParseDBData(data, rec, ic); \
650 return rec.GetDescription(); \
653 ALL_KNOWN_PARSER_TYPES
655 return desc;
658 void App::SearchCheck(DBDataList::const_iterator &b,
659 DBDataList::const_iterator &e,
660 const DBDataList &opposite_list,
661 const std::string &action)
663 // nothing to do if we're at end of list
664 if( b == e )
665 return;
667 // if id is found in opposite list, we're done!
668 // leave the iterator as-is for the next cycle's match
669 if( IdExists(opposite_list, b->GetUniqueId()) )
670 return;
672 // id not found, so set return value
673 m_main_return = 3;
675 // if id not found, then entry has either been deleted or added
676 // (action says which one), and we need to display the diff
677 // and advance the iterator
678 ShowDatabaseHeader(b->GetDBName());
679 cout << " 0x" << hex << b->GetUniqueId() << ": record has been "
680 << action << " in " << "tar[1]";
681 string desc = GetDBDescription(*b, m_ic.get());
682 if( desc.size() ) {
683 cout << ": " << desc << endl;
685 else {
686 cout << endl;
688 if( m_verbose ) {
689 DumpRecord(*b);
692 // advance!
693 ++b;
696 void App::DumpRecord(const DBData &data)
698 #undef HANDLE_PARSER
699 #define HANDLE_PARSER(tname) \
700 if( data.GetDBName() == tname::GetDBName() ) { \
701 tname rec; \
702 ParseDBData(data, rec, m_ic.get()); \
703 cout << rec << endl; \
704 return; \
707 ALL_KNOWN_PARSER_TYPES
709 // if we get here, it's not a known record, so just dump the hex
710 cout << data.GetData() << endl;
713 void App::ShowDatabaseHeader(const std::string &dbname)
715 if( dbname != m_last_dbname ) {
716 m_last_dbname = dbname;
717 cout << "In database: " << dbname << endl;
722 int App::main(int argc, char *argv[])
724 bool brief = false;
725 string iconvCharset;
727 // process command line options
728 for(;;) {
729 int cmd = getopt(argc, argv, "bd:D:hI:PSv");
730 if( cmd == -1 )
731 break;
733 switch( cmd )
735 case 'b': // use brief output
736 brief = true;
737 break;
739 case 'd': // database name to compare
740 m_compare_list.push_back(optarg);
741 break;
743 case 'D': // skip database to compare
744 m_skip_list.push_back(optarg);
745 break;
747 case 'P': // only compare parseable records
748 AddParsersToCompare();
749 break;
751 case 'S': // show parsers and builders
752 ShowParsers();
753 return 0;
755 case 'I': // international charset (iconv)
756 iconvCharset = optarg;
757 break;
759 case 'v': // verbose
760 if( !m_verbose )
761 m_verbose = true;
762 else
763 m_always_hex = true;
764 break;
766 case 'h': // help
767 default:
768 Usage();
769 return 0;
773 if( (optind + 2) > argc ) {
774 Usage();
775 return 0;
778 // save the tarball filenames for later processing
779 // start out assuming both arguments are simple, no path filenames
780 m_tarpaths[0] = m_tarfiles[0] = argv[optind];
781 m_tarpaths[1] = m_tarfiles[1] = argv[optind+1];
783 if( brief ) {
784 // user wants brief markers... filenames must be huge! :-)
785 m_tarfiles[0] = "tar[0]";
786 m_tarfiles[1] = "tar[1]";
788 else {
789 // attempt to trim paths to filenames only
790 if( m_tarpaths[0].find('/') != string::npos )
791 m_tarfiles[0] = m_tarpaths[0].substr(m_tarpaths[0].rfind('/') + 1);
792 if( m_tarpaths[1].find('/') != string::npos )
793 m_tarfiles[1] = m_tarpaths[1].substr(m_tarpaths[1].rfind('/') + 1);
795 // double check... don't want both markers the same:
796 if( m_tarfiles[0] == m_tarfiles[1] ) {
797 // doh... back to where we started
798 m_tarfiles[0] = m_tarpaths[0];
799 m_tarfiles[1] = m_tarpaths[1];
803 // display key for user
804 cout << "tar[0] = " << m_tarpaths[0] << endl;
805 cout << "tar[1] = " << m_tarpaths[1] << endl;
807 // initialize the Barry library
808 Barry::Init(false);
810 // create an IConverter object if needed
811 if( iconvCharset.size() ) {
812 m_ic.reset( new IConverter(iconvCharset.c_str(), true) );
815 // load both tarballs into memory for easy comparisons
816 LoadTarballs();
818 // compare plain list of database names first
819 CompareDatabaseNames();
821 // compare the actual data
822 CompareData();
824 return m_main_return;
827 int main(int argc, char *argv[])
829 try {
830 App app;
831 return app.main(argc, argv);
833 catch( std::exception &e ) {
834 cerr << "Exception: " << e.what() << endl;
835 return 1;