lib: fixed parsing of recurring VEVENTS: DAILY and interval support
[barry/progweb.git] / tools / btarcmp.cc
blob2f173e7c8fbc55708829e2d700c659169a612379
1 ///
2 /// \file btarcmp.cc
3 /// Compare / diff tool to analyze Barry backup tarballs
4 ///
6 /*
7 Copyright (C) 2012, Net Direct Inc. (http://www.netdirect.ca/)
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include <barry/barry.h>
23 #include <barry/barrybackup.h>
25 #include <iostream>
26 #include <iomanip>
27 #include <tr1/memory>
28 #include <string>
29 #include <vector>
30 #include <map>
31 #include <algorithm>
32 #include <stdexcept>
34 #include "barrygetopt.h"
35 #include "util.h"
37 using namespace std;
38 using namespace std::tr1;
39 using namespace Barry;
42 Still TODO: should have the ability to copy all differing records
43 into another tarball, to function as a "patch", so
44 user can write it to his device to update only differing
45 records, or to store as a record of changes, etc.
47 void Usage()
49 int logical, major, minor;
50 const char *Version = Barry::Version(logical, major, minor);
52 cerr
53 << "btarcmp - Compare Barry backup tarballs\n"
54 << " Copyright 2012, Net Direct Inc. (http://www.netdirect.ca/)\n"
55 << " Using: " << Version << "\n"
56 << "\n"
57 << " Usage: btarcmp [options...] tarball_0 tarball_1\n"
58 << "\n"
59 << " -b Use brief filename output\n"
60 << " -d db Specify a specific database to compare. Can be used\n"
61 << " multiple times. If not used at all, all databases are\n"
62 << " compared.\n"
63 << " -D db Specify a database name to skip. If both -d and -D are\n"
64 << " used for the same database name, it will be skipped.\n"
65 << " -h This help\n"
66 << " -I cs International charset for string conversions\n"
67 << " Valid values here are available with 'iconv --list'\n"
68 << " -P Only compare records that can be parsed\n"
69 << " This is the same as specifying -d for each database\n"
70 << " listed with -S.\n"
71 << " -S Show list of supported database parsers. Use twice\n"
72 << " to show field names as well.\n"
73 << " -v Show verbose diff output (twice to force hex output)\n"
74 << "\n"
75 << endl;
79 //////////////////////////////////////////////////////////////////////////////
80 // Utility functions and functors
82 bool DBDataCmp(const DBData &a, const DBData &b)
84 return a.GetUniqueId() < b.GetUniqueId();
87 bool UnknownCmp(const UnknownField &a, const UnknownField &b)
89 return a.type < b.type;
92 class DBDataIdCmp
94 uint32_t m_id;
96 public:
97 explicit DBDataIdCmp(uint32_t id)
98 : m_id(id)
102 bool operator()(const DBData &data) const
104 return data.GetUniqueId() == m_id;
108 void ChecksumDBData(const DBData &data, bool include_ids, std::string &sum)
110 Barry::SHA_CTX m_ctx;
112 SHA1_Init(&m_ctx);
114 if( include_ids ) {
115 SHA1_Update(&m_ctx, data.GetDBName().c_str(),
116 data.GetDBName().size());
118 uint8_t recType = data.GetRecType();
119 SHA1_Update(&m_ctx, &recType, sizeof(recType));
121 uint32_t uniqueId = data.GetUniqueId();
122 SHA1_Update(&m_ctx, &uniqueId, sizeof(uniqueId));
125 int len = data.GetData().GetSize() - data.GetOffset();
126 SHA1_Update(&m_ctx,
127 data.GetData().GetData() + data.GetOffset(), len);
129 unsigned char sha1[SHA_DIGEST_LENGTH];
130 SHA1_Final(sha1, &m_ctx);
132 ostringstream oss;
133 for( int i = 0; i < SHA_DIGEST_LENGTH; i++ ) {
134 oss << hex << setfill('0') << setw(2)
135 << (unsigned int) sha1[i];
137 sum = oss.str();
141 //////////////////////////////////////////////////////////////////////////////
142 // Parsed Compare class
144 class ParsedCompare
146 private:
147 const DBData &m_one, &m_two;
148 const IConverter *m_ic;
149 bool m_known_record;
150 std::string m_first_description;
152 public:
153 ParsedCompare(const DBData &one, const DBData &two,
154 const IConverter *ic = 0);
156 bool CanParse() const { return m_known_record; }
157 const std::string& GetDescription() const { return m_first_description;}
159 /// Returns true if differing fields found and displayed.
160 /// False if no differences found.
161 bool ShowDifferingFields();
164 ParsedCompare::ParsedCompare(const DBData &one,
165 const DBData &two,
166 const IConverter *ic)
167 : m_one(one)
168 , m_two(two)
169 , m_ic(ic)
170 , m_known_record(false)
172 #undef HANDLE_PARSER
173 #define HANDLE_PARSER(tname) \
174 else if( tname::GetDBName() == one.GetDBName() ) { \
175 m_known_record = true; \
176 tname a; \
177 ParseDBData(m_one, a, m_ic); \
178 m_first_description = a.GetDescription(); \
181 if( one.GetDBName() != two.GetDBName() ) {
182 throw logic_error("Different database types in ParsedCompare ctor!");
184 // fall through and use the else's
185 ALL_KNOWN_PARSER_TYPES
188 template <class RecordT>
189 class FieldHandler
191 private:
192 const RecordT &m_one, &m_two;
193 mutable bool m_found_difference;
195 public:
196 FieldHandler(const RecordT &one, const RecordT &two)
197 : m_one(one)
198 , m_two(two)
199 , m_found_difference(false)
203 bool Differing() const { return m_found_difference; }
205 void operator()(EnumFieldBase<RecordT> *ep,
206 const FieldIdentity &id) const
208 if( ep->GetValue(m_one) == ep->GetValue(m_two) )
209 return;
211 m_found_difference = true;
212 cout << " " << id.Name << ":\n"
213 << " tar[0] = "
214 << ep->GetName(ep->GetValue(m_one))
215 << " (" << ep->GetValue(m_one) << ")\n"
216 << " tar[1] = "
217 << ep->GetName(ep->GetValue(m_two))
218 << " (" << ep->GetValue(m_two) << ")"
219 << endl;
222 void operator()(typename FieldHandle<RecordT>::PostalPointer pp,
223 const FieldIdentity &id) const
225 const std::string
226 &a = m_one.*(pp.m_PostalAddress).*(pp.m_PostalField),
227 &b = m_two.*(pp.m_PostalAddress).*(pp.m_PostalField);
229 if( a == b )
230 return;
232 m_found_difference = true;
233 cout << " " << id.Name << ":\n"
234 << " tar[0] = '" << a << "'\n"
235 << " tar[1] = '" << b << "'"
236 << endl;
239 void operator()(std::string RecordT::* mp, const FieldIdentity &id) const
241 if( m_one.*mp == m_two.*mp )
242 return;
244 m_found_difference = true;
245 cout << " " << id.Name << ":\n"
246 << " tar[0] = '"
247 << Cr2LfWrapper(m_one.*mp) << "'\n"
248 << " tar[1] = '"
249 << Cr2LfWrapper(m_two.*mp) << "'"
250 << endl;
253 void operator()(UnknownsType RecordT::* mp, const FieldIdentity &id) const
255 UnknownsType a = m_one.*mp, b = m_two.*mp;
257 sort(a.begin(), a.end(), UnknownCmp);
258 sort(b.begin(), b.end(), UnknownCmp);
260 if( a == b )
261 return;
263 m_found_difference = true;
264 cout << " " << id.Name << ":\n"
265 << " tar[0] = '" << a << "'\n"
266 << " tar[1] = '" << b << "'"
267 << endl;
270 template <class TypeT>
271 void operator()(TypeT RecordT::* mp, const FieldIdentity &id) const
273 if( m_one.*mp == m_two.*mp )
274 return;
276 m_found_difference = true;
277 cout << " " << id.Name << ":\n"
278 << " tar[0] = '" << m_one.*mp << "'\n"
279 << " tar[1] = '" << m_two.*mp << "'"
280 << endl;
284 template <class RecordT>
285 bool DoParsedCompare(const RecordT &a, const RecordT &b)
287 FieldHandler<RecordT> handler(a, b);
288 ForEachField(RecordT::GetFieldHandles(), handler);
289 return handler.Differing();
292 /// Returns true if differing fields found and displayed.
293 /// False if no differences found.
294 bool ParsedCompare::ShowDifferingFields()
296 #undef HANDLE_PARSER
297 #define HANDLE_PARSER(tname) \
298 else if( tname::GetDBName() == m_one.GetDBName() ) { \
299 tname a, b; \
300 ParseDBData(m_one, a, m_ic); \
301 ParseDBData(m_two, b, m_ic); \
302 return DoParsedCompare<tname>(a, b); \
305 if( !m_known_record ) {
306 return false;
309 ALL_KNOWN_PARSER_TYPES
311 else {
312 return false;
317 //////////////////////////////////////////////////////////////////////////////
318 // Main application class
320 class App
322 public:
323 typedef Barry::ConfigFile::DBListType DBListType;
324 typedef std::vector<Barry::DBData> DBDataList;
325 typedef std::map<std::string, DBDataList> DatabaseMap;
327 private:
328 DBListType m_compare_list;
329 DBListType m_skip_list;
330 DBListType m_valid_list; // this list is created during the
331 // database name compare... it holds
332 // all the names that exist in both
333 // maps, in sorted order
334 DatabaseMap m_tars[2];
335 std::string m_tarpaths[2]; // full filename with path
336 std::string m_tarfiles[2]; // just filename, no path; or brief mark
337 auto_ptr<IConverter> m_ic;
339 int m_main_return; // 0 - success
340 // 1 - low level error or logic error
341 // 2 - databases lists not the same
342 // 3 - a record was added or deleted
343 bool m_verbose;
344 bool m_always_hex;
345 bool m_sort_on_load; // if true, sort each database by
346 // Unique ID after loading from tarball
347 bool m_include_ids; // if true, include DBData IDs in SHA1
349 std::string m_last_dbname;
351 public:
352 App();
354 void LoadTarballs();
355 void CompareDatabaseNames();
356 void CompareData();
357 void Compare(const std::string &dbname);
358 void Compare(const std::string &dbname,
359 const DBDataList &one, const DBDataList &two);
360 void Compare(const DBData &one, const DBData &two);
362 bool Alike(DBDataList::const_iterator b1, DBDataList::const_iterator b2,
363 DBDataList::const_iterator e1, DBDataList::const_iterator e2);
364 void SearchCheck(DBDataList::const_iterator &b,
365 DBDataList::const_iterator &e, const DBDataList &opposite_list,
366 const std::string &action);
368 void ShowRecordDiff(const DBData &one, const DBData &two,
369 ParsedCompare &pc);
370 void DumpRecord(const DBData &data);
371 void ShowDatabaseHeader(const std::string &dbname);
372 void AddParsersToCompare();
374 // returns true if any of the items in Outputs needs a probe
375 int main(int argc, char *argv[]);
378 //////////////////////////////////////////////////////////////////////////////
379 // Memory storage parser
381 class StoreParser : public Barry::Parser
383 App::DatabaseMap &m_map;
385 public:
386 explicit StoreParser(App::DatabaseMap &map)
387 : m_map(map)
391 virtual void ParseRecord(const DBData &data, const IConverter *ic)
393 m_map[data.GetDBName()].push_back(data);
398 //////////////////////////////////////////////////////////////////////////////
399 // Misc helpers dependent on App
401 bool IdExists(const App::DBDataList &list, uint32_t id)
403 return find_if(list.begin(), list.end(), DBDataIdCmp(id)) != list.end();
406 //////////////////////////////////////////////////////////////////////////////
407 // Member function definitions
409 App::App()
410 : m_main_return(0)
411 , m_verbose(false)
412 , m_always_hex(false)
413 , m_sort_on_load(true)
414 , m_include_ids(true)
418 void App::AddParsersToCompare()
420 #undef HANDLE_PARSER
421 #define HANDLE_PARSER(tname) \
422 m_compare_list.push_back(tname::GetDBName());
424 ALL_KNOWN_PARSER_TYPES
427 void App::LoadTarballs()
429 for( int i = 0; i < 2; i++ ) {
430 // load data into memory
431 Restore builder(m_tarpaths[i]);
432 StoreParser parser(m_tars[i]);
434 Pipe pipe(builder);
435 pipe.PumpFile(parser, m_ic.get());
437 // sort each database's record data by UniqueId
438 for( DatabaseMap::iterator b = m_tars[i].begin();
439 b != m_tars[i].end();
440 ++b )
442 if( m_sort_on_load )
443 sort(b->second.begin(), b->second.end(), DBDataCmp);
448 void App::CompareDatabaseNames()
450 for( int i = 1; i >= 0; i-- ) {
451 int other = i == 0 ? 1 : 0;
453 DatabaseMap::const_iterator b = m_tars[i].begin(), match;
454 for( ; b != m_tars[i].end(); ++b ) {
455 match = m_tars[other].find(b->first);
456 if( match == m_tars[other].end() ) {
457 cout << m_tarfiles[other] << ": has no database '" << b->first << "'" << endl;
458 m_main_return = 2;
460 else {
461 if( !m_valid_list.IsSelected(b->first) ) {
462 m_valid_list.push_back(b->first);
468 // sort the valid list
469 sort(m_valid_list.begin(), m_valid_list.end());
470 // cout << m_valid_list.size() << " valid database names found." << endl;
473 void App::CompareData()
475 DBListType::const_iterator valid = m_valid_list.begin();
476 for( ; valid != m_valid_list.end(); ++valid ) {
477 // if m_compare_list contains items, then only compare
478 // if this database is present in the list
479 if( m_compare_list.size() && !m_compare_list.IsSelected(*valid) )
480 continue;
482 // check if we should skip this database
483 if( m_skip_list.IsSelected(*valid) )
484 continue;
486 // all's well so far... compare!
487 Compare(*valid);
491 void App::Compare(const std::string &dbname)
493 DatabaseMap::const_iterator tar[2];
494 tar[0] = m_tars[0].find(dbname);
495 tar[1] = m_tars[1].find(dbname);
497 if( tar[0] == m_tars[0].end() || tar[1] == m_tars[1].end() )
498 throw logic_error("Comparing non-existant database!" + dbname);
500 Compare(dbname, tar[0]->second, tar[1]->second);
503 void App::Compare(const std::string &dbname,
504 const DBDataList &one,
505 const DBDataList &two)
507 DBDataList::const_iterator
508 b1 = one.begin(), e1 = one.end(), // begin/end for one
509 b2 = two.begin(), e2 = two.end(), // begin/end for two
510 s1, s2; // search markers
512 // if IDs are alike, compare
513 // if not alike, then for each b1 and b2, do:
514 // search for id in opposite list
515 // if id found in opposite list, we're done, leave for next match
516 // if id not found, then entry has either been deleted or added
518 // NOTE: this algorithm assumes that both one and two are sorted!
519 while( b1 != e1 || b2 != e2 ) {
520 if( Alike(b1, b2, e1, e2 ) ) {
521 Compare(*b1, *b2);
522 ++b1;
523 ++b2;
524 continue;
526 else {
527 // SearchCheck increments iterators if needed
528 SearchCheck(b1, e1, two, "deleted");
529 SearchCheck(b2, e2, one, "added");
534 void App::Compare(const DBData &one, const DBData &two)
536 // make sure one and two are of the same database, or throw
537 if( one.GetDBName() != two.GetDBName() )
538 throw logic_error("Tried to compare records from different databases: " + one.GetDBName() + ", and " + two.GetDBName());
540 // always compare the sums of the data first, and if match, done
541 string sum1, sum2;
542 ChecksumDBData(one, m_include_ids, sum1);
543 ChecksumDBData(two, m_include_ids, sum2);
544 if( sum1 == sum2 )
545 return; // done
547 // records are different, print concise report
548 ShowDatabaseHeader(one.GetDBName());
550 // if different, check if there's a parser available for this data
551 // if not, display that these records differ, dump verbose if
552 // needed, and done
553 ParsedCompare pc(one, two, m_ic.get());
554 ShowRecordDiff(one, two, pc);
557 void App::ShowRecordDiff(const DBData &one,
558 const DBData &two,
559 ParsedCompare &pc)
561 if( !pc.CanParse() ) {
562 // if can't parse, print:
563 // UniqueID: sizes (one vs. two), X bytes differ
564 // then the differing fields
565 cout << " 0x" << hex << one.GetUniqueId() << ": differs: "
566 << dec
567 << "sizes (" << one.GetData().GetSize()
568 << " vs. " << two.GetData().GetSize()
569 << "), SHA1 sums differ"
570 << endl;
572 else {
573 // otherwise, print:
574 // UniqueID: sizes (one vs. two), (custom display name)
575 cout << " 0x" << hex << one.GetUniqueId() << ": differs: "
576 << dec
577 << "sizes (" << one.GetData().GetSize()
578 << " vs. " << two.GetData().GetSize()
579 << "), "
580 << pc.GetDescription()
581 << endl;
583 if( !pc.ShowDifferingFields() ) {
584 // no difference found...
585 cout << "No differences found in parsed records, but SHA1 sums differ." << endl;
589 // if verbose and parser is null, or if always_hex,
590 // then display a (messy?) hex diff of the raw data
591 if( (m_verbose && !pc.CanParse()) || m_always_hex ) {
592 cout << " Hex diff of record:" << endl;
593 cout << Diff(one.GetData(), two.GetData()) << endl;
597 bool App::Alike(DBDataList::const_iterator b1,
598 DBDataList::const_iterator b2,
599 DBDataList::const_iterator e1,
600 DBDataList::const_iterator e2)
602 if( b1 == e1 || b2 == e2 )
603 return false;
604 return b1->GetUniqueId() == b2->GetUniqueId();
607 std::string GetDBDescription(const DBData &data, const IConverter *ic)
609 string desc;
611 // try to parse it
612 #undef HANDLE_PARSER
613 #define HANDLE_PARSER(tname) \
614 if( data.GetDBName() == tname::GetDBName() ) { \
615 tname rec; \
616 ParseDBData(data, rec, ic); \
617 return rec.GetDescription(); \
620 ALL_KNOWN_PARSER_TYPES
622 return desc;
625 void App::SearchCheck(DBDataList::const_iterator &b,
626 DBDataList::const_iterator &e,
627 const DBDataList &opposite_list,
628 const std::string &action)
630 // nothing to do if we're at end of list
631 if( b == e )
632 return;
634 // if id is found in opposite list, we're done!
635 // leave the iterator as-is for the next cycle's match
636 if( IdExists(opposite_list, b->GetUniqueId()) )
637 return;
639 // id not found, so set return value
640 m_main_return = 3;
642 // if id not found, then entry has either been deleted or added
643 // (action says which one), and we need to display the diff
644 // and advance the iterator
645 ShowDatabaseHeader(b->GetDBName());
646 cout << " 0x" << hex << b->GetUniqueId() << ": record has been "
647 << action << " in " << "tar[1]";
648 string desc = GetDBDescription(*b, m_ic.get());
649 if( desc.size() ) {
650 cout << ": " << desc << endl;
652 else {
653 cout << endl;
655 if( m_verbose ) {
656 DumpRecord(*b);
659 // advance!
660 ++b;
663 void App::DumpRecord(const DBData &data)
665 #undef HANDLE_PARSER
666 #define HANDLE_PARSER(tname) \
667 if( data.GetDBName() == tname::GetDBName() ) { \
668 tname rec; \
669 ParseDBData(data, rec, m_ic.get()); \
670 cout << rec << endl; \
671 return; \
674 ALL_KNOWN_PARSER_TYPES
676 // if we get here, it's not a known record, so just dump the hex
677 cout << data.GetData() << endl;
680 void App::ShowDatabaseHeader(const std::string &dbname)
682 if( dbname != m_last_dbname ) {
683 m_last_dbname = dbname;
684 cout << "In database: " << dbname << endl;
689 int App::main(int argc, char *argv[])
691 bool brief = false;
692 bool show_parsers = false, show_fields = false;
693 string iconvCharset;
695 // process command line options
696 for(;;) {
697 int cmd = getopt(argc, argv, "bd:D:hI:PSv");
698 if( cmd == -1 )
699 break;
701 switch( cmd )
703 case 'b': // use brief output
704 brief = true;
705 break;
707 case 'd': // database name to compare
708 m_compare_list.push_back(optarg);
709 break;
711 case 'D': // skip database to compare
712 m_skip_list.push_back(optarg);
713 break;
715 case 'P': // only compare parseable records
716 AddParsersToCompare();
717 break;
719 case 'S': // show parsers and builders
720 if( show_parsers )
721 show_fields = true;
722 else
723 show_parsers = true;
724 break;
726 case 'I': // international charset (iconv)
727 iconvCharset = optarg;
728 break;
730 case 'v': // verbose
731 if( !m_verbose )
732 m_verbose = true;
733 else
734 m_always_hex = true;
735 break;
737 case 'h': // help
738 default:
739 Usage();
740 return 0;
744 if( show_parsers ) {
745 ShowParsers(show_fields, false);
746 return 0;
749 if( (optind + 2) > argc ) {
750 Usage();
751 return 0;
754 // save the tarball filenames for later processing
755 // start out assuming both arguments are simple, no path filenames
756 m_tarpaths[0] = m_tarfiles[0] = argv[optind];
757 m_tarpaths[1] = m_tarfiles[1] = argv[optind+1];
759 if( brief ) {
760 // user wants brief markers... filenames must be huge! :-)
761 m_tarfiles[0] = "tar[0]";
762 m_tarfiles[1] = "tar[1]";
764 else {
765 // attempt to trim paths to filenames only
766 if( m_tarpaths[0].find('/') != string::npos )
767 m_tarfiles[0] = m_tarpaths[0].substr(m_tarpaths[0].rfind('/') + 1);
768 if( m_tarpaths[1].find('/') != string::npos )
769 m_tarfiles[1] = m_tarpaths[1].substr(m_tarpaths[1].rfind('/') + 1);
771 // double check... don't want both markers the same:
772 if( m_tarfiles[0] == m_tarfiles[1] ) {
773 // doh... back to where we started
774 m_tarfiles[0] = m_tarpaths[0];
775 m_tarfiles[1] = m_tarpaths[1];
779 // display key for user
780 cout << "tar[0] = " << m_tarpaths[0] << endl;
781 cout << "tar[1] = " << m_tarpaths[1] << endl;
783 // initialize the Barry library
784 Barry::Init(false);
786 // create an IConverter object if needed
787 if( iconvCharset.size() ) {
788 m_ic.reset( new IConverter(iconvCharset.c_str(), true) );
791 // load both tarballs into memory for easy comparisons
792 LoadTarballs();
794 // compare plain list of database names first
795 CompareDatabaseNames();
797 // compare the actual data
798 CompareData();
800 return m_main_return;
803 int main(int argc, char *argv[])
805 try {
806 App app;
807 return app.main(argc, argv);
809 catch( std::exception &e ) {
810 cerr << "Exception: " << e.what() << endl;
811 return 1;