3 /// Compare / diff tool to analyze Barry backup tarballs
7 Copyright (C) 2012, Net Direct Inc. (http://www.netdirect.ca/)
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include <barry/barry.h>
23 #include <barry/barrybackup.h>
34 #include "barrygetopt.h"
37 using namespace std::tr1
;
38 using namespace Barry
;
41 Still TODO: should have the ability to copy all differing records
42 into another tarball, to function as a "patch", so
43 user can write it to his device to update only differing
44 records, or to store as a record of changes, etc.
48 int logical
, major
, minor
;
49 const char *Version
= Barry::Version(logical
, major
, minor
);
52 << "btarcmp - Compare Barry backup tarballs\n"
53 << " Copyright 2012, Net Direct Inc. (http://www.netdirect.ca/)\n"
54 << " Using: " << Version
<< "\n"
56 << " Usage: btarcmp [options...] tarball_0 tarball_1\n"
58 << " -b Use brief filename output\n"
59 << " -d db Specify a specific database to compare. Can be used\n"
60 << " multiple times. If not used at all, all databases are\n"
62 << " -D db Specify a database name to skip. If both -d and -D are\n"
63 << " used for the same database name, it will be skipped.\n"
65 << " -I cs International charset for string conversions\n"
66 << " Valid values here are available with 'iconv --list'\n"
67 << " -P Only compare records that can be parsed\n"
68 << " This is the same as specifying -d for each database\n"
69 << " listed with -S.\n"
70 << " -S Show list of supported database parsers\n"
71 << " -v Show verbose diff output (twice to force hex output)\n"
77 //////////////////////////////////////////////////////////////////////////////
78 // Utility functions and functors
80 bool DBDataCmp(const DBData
&a
, const DBData
&b
)
82 return a
.GetUniqueId() < b
.GetUniqueId();
85 bool UnknownCmp(const UnknownField
&a
, const UnknownField
&b
)
87 return a
.type
< b
.type
;
95 explicit DBDataIdCmp(uint32_t id
)
100 bool operator()(const DBData
&data
) const
102 return data
.GetUniqueId() == m_id
;
106 void ChecksumDBData(const DBData
&data
, bool include_ids
, std::string
&sum
)
108 Barry::SHA_CTX m_ctx
;
113 SHA1_Update(&m_ctx
, data
.GetDBName().c_str(),
114 data
.GetDBName().size());
116 uint8_t recType
= data
.GetRecType();
117 SHA1_Update(&m_ctx
, &recType
, sizeof(recType
));
119 uint32_t uniqueId
= data
.GetUniqueId();
120 SHA1_Update(&m_ctx
, &uniqueId
, sizeof(uniqueId
));
123 int len
= data
.GetData().GetSize() - data
.GetOffset();
125 data
.GetData().GetData() + data
.GetOffset(), len
);
127 unsigned char sha1
[SHA_DIGEST_LENGTH
];
128 SHA1_Final(sha1
, &m_ctx
);
131 for( int i
= 0; i
< SHA_DIGEST_LENGTH
; i
++ ) {
132 oss
<< hex
<< setfill('0') << setw(2)
133 << (unsigned int) sha1
[i
];
139 //////////////////////////////////////////////////////////////////////////////
140 // Parsed Compare class
145 const DBData
&m_one
, &m_two
;
146 const IConverter
*m_ic
;
148 std::string m_first_description
;
151 ParsedCompare(const DBData
&one
, const DBData
&two
,
152 const IConverter
*ic
= 0);
154 bool CanParse() const { return m_known_record
; }
155 const std::string
& GetDescription() const { return m_first_description
;}
157 /// Returns true if differing fields found and displayed.
158 /// False if no differences found.
159 bool ShowDifferingFields();
162 ParsedCompare::ParsedCompare(const DBData
&one
,
164 const IConverter
*ic
)
168 , m_known_record(false)
170 if( one
.GetDBName() != two
.GetDBName() )
171 throw logic_error("Different database types in ParsedCompare ctor!");
174 #define HANDLE_PARSER(tname) \
175 else if( tname::GetDBName() == one.GetDBName() ) { \
176 m_known_record = true; \
178 ParseDBData(m_one, a, m_ic); \
179 m_first_description = a.GetDescription(); \
182 if( m_known_record
) {
184 ALL_KNOWN_PARSER_TYPES
187 template <class RecordT
>
191 const RecordT
&m_one
, &m_two
;
192 mutable bool m_found_difference
;
195 FieldHandler(const RecordT
&one
, const RecordT
&two
)
198 , m_found_difference(false)
202 bool Differing() const { return m_found_difference
; }
204 void operator()(EnumFieldBase
<RecordT
> *ep
,
205 const FieldIdentity
&id
) const
207 if( ep
->GetValue(m_one
) == ep
->GetValue(m_two
) )
210 m_found_difference
= true;
211 cout
<< " " << id
.Name
<< ":\n"
213 << ep
->GetName(ep
->GetValue(m_one
))
214 << " (" << ep
->GetValue(m_one
) << ")\n"
216 << ep
->GetName(ep
->GetValue(m_two
))
217 << " (" << ep
->GetValue(m_two
) << ")"
221 void operator()(typename FieldHandle
<RecordT
>::PostalPointer pp
,
222 const FieldIdentity
&id
) const
225 &a
= m_one
.*(pp
.m_PostalAddress
).*(pp
.m_PostalField
),
226 &b
= m_two
.*(pp
.m_PostalAddress
).*(pp
.m_PostalField
);
231 m_found_difference
= true;
232 cout
<< " " << id
.Name
<< ":\n"
233 << " tar[0] = '" << a
<< "'\n"
234 << " tar[1] = '" << b
<< "'"
238 void operator()(std::string
RecordT::* mp
, const FieldIdentity
&id
) const
240 if( m_one
.*mp
== m_two
.*mp
)
243 m_found_difference
= true;
244 cout
<< " " << id
.Name
<< ":\n"
246 << Cr2LfWrapper(m_one
.*mp
) << "'\n"
248 << Cr2LfWrapper(m_two
.*mp
) << "'"
252 void operator()(UnknownsType
RecordT::* mp
, const FieldIdentity
&id
) const
254 UnknownsType a
= m_one
.*mp
, b
= m_two
.*mp
;
256 sort(a
.begin(), a
.end(), UnknownCmp
);
257 sort(b
.begin(), b
.end(), UnknownCmp
);
262 m_found_difference
= true;
263 cout
<< " " << id
.Name
<< ":\n"
264 << " tar[0] = '" << a
<< "'\n"
265 << " tar[1] = '" << b
<< "'"
269 template <class TypeT
>
270 void operator()(TypeT
RecordT::* mp
, const FieldIdentity
&id
) const
272 if( m_one
.*mp
== m_two
.*mp
)
275 m_found_difference
= true;
276 cout
<< " " << id
.Name
<< ":\n"
277 << " tar[0] = '" << m_one
.*mp
<< "'\n"
278 << " tar[1] = '" << m_two
.*mp
<< "'"
283 template <class RecordT
>
284 bool DoParsedCompare(const RecordT
&a
, const RecordT
&b
)
286 FieldHandler
<RecordT
> handler(a
, b
);
287 ForEachField(RecordT::GetFieldHandles(), handler
);
288 return handler
.Differing();
291 /// Returns true if differing fields found and displayed.
292 /// False if no differences found.
293 bool ParsedCompare::ShowDifferingFields()
296 #define HANDLE_PARSER(tname) \
297 else if( tname::GetDBName() == m_one.GetDBName() ) { \
299 ParseDBData(m_one, a, m_ic); \
300 ParseDBData(m_two, b, m_ic); \
301 return DoParsedCompare<tname>(a, b); \
304 if( !m_known_record
) {
308 ALL_KNOWN_PARSER_TYPES
316 //////////////////////////////////////////////////////////////////////////////
317 // Main application class
322 typedef Barry::ConfigFile::DBListType DBListType
;
323 typedef std::vector
<Barry::DBData
> DBDataList
;
324 typedef std::map
<std::string
, DBDataList
> DatabaseMap
;
327 DBListType m_compare_list
;
328 DBListType m_skip_list
;
329 DBListType m_valid_list
; // this list is created during the
330 // database name compare... it holds
331 // all the names that exist in both
332 // maps, in sorted order
333 DatabaseMap m_tars
[2];
334 std::string m_tarpaths
[2]; // full filename with path
335 std::string m_tarfiles
[2]; // just filename, no path; or brief mark
336 auto_ptr
<IConverter
> m_ic
;
338 int m_main_return
; // 0 - success
339 // 1 - low level error or logic error
340 // 2 - databases lists not the same
341 // 3 - a record was added or deleted
344 bool m_sort_on_load
; // if true, sort each database by
345 // Unique ID after loading from tarball
346 bool m_include_ids
; // if true, include DBData IDs in SHA1
348 std::string m_last_dbname
;
354 void CompareDatabaseNames();
356 void Compare(const std::string
&dbname
);
357 void Compare(const std::string
&dbname
,
358 const DBDataList
&one
, const DBDataList
&two
);
359 void Compare(const DBData
&one
, const DBData
&two
);
361 bool Alike(DBDataList::const_iterator b1
, DBDataList::const_iterator b2
,
362 DBDataList::const_iterator e1
, DBDataList::const_iterator e2
);
363 void SearchCheck(DBDataList::const_iterator
&b
,
364 DBDataList::const_iterator
&e
, const DBDataList
&opposite_list
,
365 const std::string
&action
);
367 void ShowRecordDiff(const DBData
&one
, const DBData
&two
,
369 void DumpRecord(const DBData
&data
);
370 void ShowDatabaseHeader(const std::string
&dbname
);
371 void AddParsersToCompare();
373 // returns true if any of the items in Outputs needs a probe
374 int main(int argc
, char *argv
[]);
376 static void ShowParsers();
379 //////////////////////////////////////////////////////////////////////////////
380 // Memory storage parser
382 class StoreParser
: public Barry::Parser
384 App::DatabaseMap
&m_map
;
387 explicit StoreParser(App::DatabaseMap
&map
)
392 virtual void ParseRecord(const DBData
&data
, const IConverter
*ic
)
394 m_map
[data
.GetDBName()].push_back(data
);
399 //////////////////////////////////////////////////////////////////////////////
400 // Misc helpers dependent on App
402 bool IdExists(const App::DBDataList
&list
, uint32_t id
)
404 return find_if(list
.begin(), list
.end(), DBDataIdCmp(id
)) != list
.end();
407 //////////////////////////////////////////////////////////////////////////////
408 // Member function definitions
413 , m_always_hex(false)
414 , m_sort_on_load(true)
415 , m_include_ids(true)
419 void App::ShowParsers()
421 cout
<< "Supported Database parsers:\n";
424 #define HANDLE_PARSER(tname) \
426 cout << " " << tname::GetDBName() << "\n "; \
427 std::vector<Barry::FieldHandle<tname> >::const_iterator \
428 fhi = tname::GetFieldHandles().begin(), \
429 fhe = tname::GetFieldHandles().end(); \
430 for( int count = 0, len = 6; fhi != fhe; ++fhi, ++count ) { \
435 std::string name = fhi->GetIdentity().Name; \
436 if( len + name.size() >= 75 ) { \
441 len += name.size(); \
446 ALL_KNOWN_PARSER_TYPES
451 void App::AddParsersToCompare()
454 #define HANDLE_PARSER(tname) \
455 m_compare_list.push_back(tname::GetDBName());
457 ALL_KNOWN_PARSER_TYPES
460 void App::LoadTarballs()
462 for( int i
= 0; i
< 2; i
++ ) {
463 // load data into memory
464 Restore
builder(m_tarpaths
[i
]);
465 StoreParser
parser(m_tars
[i
]);
468 pipe
.PumpFile(parser
, m_ic
.get());
470 // sort each database's record data by UniqueId
471 for( DatabaseMap::iterator b
= m_tars
[i
].begin();
472 b
!= m_tars
[i
].end();
476 sort(b
->second
.begin(), b
->second
.end(), DBDataCmp
);
481 void App::CompareDatabaseNames()
483 for( int i
= 1; i
>= 0; i
-- ) {
484 int other
= i
== 0 ? 1 : 0;
486 DatabaseMap::const_iterator b
= m_tars
[i
].begin(), match
;
487 for( ; b
!= m_tars
[i
].end(); ++b
) {
488 match
= m_tars
[other
].find(b
->first
);
489 if( match
== m_tars
[other
].end() ) {
490 cout
<< m_tarfiles
[other
] << ": has no database '" << b
->first
<< "'" << endl
;
494 if( !m_valid_list
.IsSelected(b
->first
) ) {
495 m_valid_list
.push_back(b
->first
);
501 // sort the valid list
502 sort(m_valid_list
.begin(), m_valid_list
.end());
503 // cout << m_valid_list.size() << " valid database names found." << endl;
506 void App::CompareData()
508 DBListType::const_iterator valid
= m_valid_list
.begin();
509 for( ; valid
!= m_valid_list
.end(); ++valid
) {
510 // if m_compare_list contains items, then only compare
511 // if this database is present in the list
512 if( m_compare_list
.size() && !m_compare_list
.IsSelected(*valid
) )
515 // check if we should skip this database
516 if( m_skip_list
.IsSelected(*valid
) )
519 // all's well so far... compare!
524 void App::Compare(const std::string
&dbname
)
526 DatabaseMap::const_iterator tar
[2];
527 tar
[0] = m_tars
[0].find(dbname
);
528 tar
[1] = m_tars
[1].find(dbname
);
530 if( tar
[0] == m_tars
[0].end() || tar
[1] == m_tars
[1].end() )
531 throw logic_error("Comparing non-existant database!" + dbname
);
533 Compare(dbname
, tar
[0]->second
, tar
[1]->second
);
536 void App::Compare(const std::string
&dbname
,
537 const DBDataList
&one
,
538 const DBDataList
&two
)
540 DBDataList::const_iterator
541 b1
= one
.begin(), e1
= one
.end(), // begin/end for one
542 b2
= two
.begin(), e2
= two
.end(), // begin/end for two
543 s1
, s2
; // search markers
545 // if IDs are alike, compare
546 // if not alike, then for each b1 and b2, do:
547 // search for id in opposite list
548 // if id found in opposite list, we're done, leave for next match
549 // if id not found, then entry has either been deleted or added
551 // NOTE: this algorithm assumes that both one and two are sorted!
552 while( b1
!= e1
|| b2
!= e2
) {
553 if( Alike(b1
, b2
, e1
, e2
) ) {
560 // SearchCheck increments iterators if needed
561 SearchCheck(b1
, e1
, two
, "deleted");
562 SearchCheck(b2
, e2
, one
, "added");
567 void App::Compare(const DBData
&one
, const DBData
&two
)
569 // make sure one and two are of the same database, or throw
570 if( one
.GetDBName() != two
.GetDBName() )
571 throw logic_error("Tried to compare records from different databases: " + one
.GetDBName() + ", and " + two
.GetDBName());
573 // always compare the sums of the data first, and if match, done
575 ChecksumDBData(one
, m_include_ids
, sum1
);
576 ChecksumDBData(two
, m_include_ids
, sum2
);
580 // records are different, print concise report
581 ShowDatabaseHeader(one
.GetDBName());
583 // if different, check if there's a parser available for this data
584 // if not, display that these records differ, dump verbose if
586 ParsedCompare
pc(one
, two
, m_ic
.get());
587 ShowRecordDiff(one
, two
, pc
);
590 void App::ShowRecordDiff(const DBData
&one
,
594 if( !pc
.CanParse() ) {
595 // if can't parse, print:
596 // UniqueID: sizes (one vs. two), X bytes differ
597 // then the differing fields
598 cout
<< " 0x" << hex
<< one
.GetUniqueId() << ": differs: "
600 << "sizes (" << one
.GetData().GetSize()
601 << " vs. " << two
.GetData().GetSize()
602 << "), SHA1 sums differ"
607 // UniqueID: sizes (one vs. two), (custom display name)
608 cout
<< " 0x" << hex
<< one
.GetUniqueId() << ": differs: "
610 << "sizes (" << one
.GetData().GetSize()
611 << " vs. " << two
.GetData().GetSize()
613 << pc
.GetDescription()
616 if( !pc
.ShowDifferingFields() ) {
617 // no difference found...
618 cout
<< "No differences found in parsed records, but SHA1 sums differ." << endl
;
622 // if verbose and parser is null, or if always_hex,
623 // then display a (messy?) hex diff of the raw data
624 if( (m_verbose
&& !pc
.CanParse()) || m_always_hex
) {
625 cout
<< " Hex diff of record:" << endl
;
626 cout
<< Diff(one
.GetData(), two
.GetData()) << endl
;
630 bool App::Alike(DBDataList::const_iterator b1
,
631 DBDataList::const_iterator b2
,
632 DBDataList::const_iterator e1
,
633 DBDataList::const_iterator e2
)
635 if( b1
== e1
|| b2
== e2
)
637 return b1
->GetUniqueId() == b2
->GetUniqueId();
640 std::string
GetDBDescription(const DBData
&data
, const IConverter
*ic
)
646 #define HANDLE_PARSER(tname) \
647 if( data.GetDBName() == tname::GetDBName() ) { \
649 ParseDBData(data, rec, ic); \
650 return rec.GetDescription(); \
653 ALL_KNOWN_PARSER_TYPES
658 void App::SearchCheck(DBDataList::const_iterator
&b
,
659 DBDataList::const_iterator
&e
,
660 const DBDataList
&opposite_list
,
661 const std::string
&action
)
663 // nothing to do if we're at end of list
667 // if id is found in opposite list, we're done!
668 // leave the iterator as-is for the next cycle's match
669 if( IdExists(opposite_list
, b
->GetUniqueId()) )
672 // id not found, so set return value
675 // if id not found, then entry has either been deleted or added
676 // (action says which one), and we need to display the diff
677 // and advance the iterator
678 ShowDatabaseHeader(b
->GetDBName());
679 cout
<< " 0x" << hex
<< b
->GetUniqueId() << ": record has been "
680 << action
<< " in " << "tar[1]";
681 string desc
= GetDBDescription(*b
, m_ic
.get());
683 cout
<< ": " << desc
<< endl
;
696 void App::DumpRecord(const DBData
&data
)
699 #define HANDLE_PARSER(tname) \
700 if( data.GetDBName() == tname::GetDBName() ) { \
702 ParseDBData(data, rec, m_ic.get()); \
703 cout << rec << endl; \
707 ALL_KNOWN_PARSER_TYPES
709 // if we get here, it's not a known record, so just dump the hex
710 cout
<< data
.GetData() << endl
;
713 void App::ShowDatabaseHeader(const std::string
&dbname
)
715 if( dbname
!= m_last_dbname
) {
716 m_last_dbname
= dbname
;
717 cout
<< "In database: " << dbname
<< endl
;
722 int App::main(int argc
, char *argv
[])
727 // process command line options
729 int cmd
= getopt(argc
, argv
, "bd:D:hI:PSv");
735 case 'b': // use brief output
739 case 'd': // database name to compare
740 m_compare_list
.push_back(optarg
);
743 case 'D': // skip database to compare
744 m_skip_list
.push_back(optarg
);
747 case 'P': // only compare parseable records
748 AddParsersToCompare();
751 case 'S': // show parsers and builders
755 case 'I': // international charset (iconv)
756 iconvCharset
= optarg
;
773 if( (optind
+ 2) > argc
) {
778 // save the tarball filenames for later processing
779 // start out assuming both arguments are simple, no path filenames
780 m_tarpaths
[0] = m_tarfiles
[0] = argv
[optind
];
781 m_tarpaths
[1] = m_tarfiles
[1] = argv
[optind
+1];
784 // user wants brief markers... filenames must be huge! :-)
785 m_tarfiles
[0] = "tar[0]";
786 m_tarfiles
[1] = "tar[1]";
789 // attempt to trim paths to filenames only
790 if( m_tarpaths
[0].find('/') != string::npos
)
791 m_tarfiles
[0] = m_tarpaths
[0].substr(m_tarpaths
[0].rfind('/') + 1);
792 if( m_tarpaths
[1].find('/') != string::npos
)
793 m_tarfiles
[1] = m_tarpaths
[1].substr(m_tarpaths
[1].rfind('/') + 1);
795 // double check... don't want both markers the same:
796 if( m_tarfiles
[0] == m_tarfiles
[1] ) {
797 // doh... back to where we started
798 m_tarfiles
[0] = m_tarpaths
[0];
799 m_tarfiles
[1] = m_tarpaths
[1];
803 // display key for user
804 cout
<< "tar[0] = " << m_tarpaths
[0] << endl
;
805 cout
<< "tar[1] = " << m_tarpaths
[1] << endl
;
807 // initialize the Barry library
810 // create an IConverter object if needed
811 if( iconvCharset
.size() ) {
812 m_ic
.reset( new IConverter(iconvCharset
.c_str(), true) );
815 // load both tarballs into memory for easy comparisons
818 // compare plain list of database names first
819 CompareDatabaseNames();
821 // compare the actual data
824 return m_main_return
;
827 int main(int argc
, char *argv
[])
831 return app
.main(argc
, argv
);
833 catch( std::exception
&e
) {
834 cerr
<< "Exception: " << e
.what() << endl
;