lib: show offset and rectype in HexDumpParser
[barry.git] / src / data.cc
blob878d79c08f70387d3ae7d0ca31fd03a99bf99d05
1 ///
2 /// \file data.cc
3 /// Classes to help manage pre-determined data files.
4 ///
6 /*
7 Copyright (C) 2005-2010, Net Direct Inc. (http://www.netdirect.ca/)
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include "data.h"
23 #include <fstream>
24 #include <sstream>
25 #include <iomanip>
26 #include <string>
27 #include <stdexcept>
28 #include <string.h>
29 #include <stdlib.h>
31 //#define __DEBUG_MODE__
32 #include "debug.h"
35 using namespace std;
38 namespace Barry {
40 inline bool IsHexData(const std::string &s)
42 const char *str = s.c_str();
43 for( int i = 0; i < 4 && *str; str++, i++ )
44 if( *str != ' ' )
45 return false;
47 for( int i = 0; i < 8 && *str; str++, i++ ) {
48 const char *hexchars = "0123456789abcdef";
49 if( strchr(hexchars, *str) == NULL )
50 return false;
53 if( *str != ':' )
54 return false;
56 return true;
61 ///////////////////////////////////////////////////////////////////////////////
62 // Data class
64 bool Data::bPrintAscii = true;
66 Data::Data()
67 : m_data(new unsigned char[0x4000]),
68 m_bufsize(0x4000),
69 m_datasize(0),
70 m_endpoint(-1),
71 m_externalData(0),
72 m_external(false)
74 memset(m_data, 0, m_bufsize);
77 Data::Data(int endpoint, size_t startsize)
78 : m_data(new unsigned char[startsize]),
79 m_bufsize(startsize),
80 m_datasize(0),
81 m_endpoint(endpoint),
82 m_externalData(0),
83 m_external(false)
85 memset(m_data, 0, m_bufsize);
88 Data::Data(const void *ValidData, size_t size)
89 : m_data(0),
90 m_bufsize(0),
91 m_datasize(size),
92 m_endpoint(-1),
93 m_externalData((const unsigned char*)ValidData),
94 m_external(true)
98 Data::Data(const Data &other)
99 : m_data(other.m_bufsize ? new unsigned char[other.m_bufsize] : 0),
100 m_bufsize(other.m_bufsize),
101 m_datasize(other.m_datasize),
102 m_endpoint(other.m_endpoint),
103 m_externalData(other.m_externalData),
104 m_external(other.m_external)
106 // copy over the raw data
107 if( !m_external )
108 memcpy(m_data, other.m_data, other.m_bufsize);
111 Data::~Data()
113 delete [] m_data;
116 void Data::MakeSpace(size_t desiredsize)
118 if( m_bufsize < desiredsize ) {
119 desiredsize += 1024; // get a proper chunk
120 unsigned char *newbuf = new unsigned char[desiredsize];
121 memcpy(newbuf, m_data, m_bufsize);
122 memset(newbuf + m_bufsize, 0, desiredsize - m_bufsize);
123 delete [] m_data;
124 m_data = newbuf;
125 m_bufsize = desiredsize;
129 // perform the copy on write operation if needed
130 void Data::CopyOnWrite(size_t desiredsize)
132 if( m_external ) {
133 // make room
134 MakeSpace(std::max(desiredsize, m_datasize));
136 // copy it over
137 memcpy(m_data, m_externalData, m_datasize);
139 // not external anymore
140 m_external = false;
144 void Data::InputHexLine(istream &is)
146 unsigned int values[16];
147 size_t index = 0;
149 size_t address;
150 is >> setbase(16) >> address;
151 if( !is )
152 return; // nothing to do
154 is.ignore(); // eat the ':'
156 while( is && index < 16 ) {
157 is >> setbase(16) >> values[index];
158 if( is )
159 index++;
162 dout("InputHexLine: read " << index << " bytes");
164 CopyOnWrite(address + index);
165 MakeSpace(address + index); // make space for the new
166 m_datasize = std::max(address + index, m_datasize);
167 while( index-- )
168 m_data[address + index] = (unsigned char) values[index];
169 return;
172 void Data::DumpHexLine(ostream &os, size_t index, size_t size) const
174 ios::fmtflags oldflags = os.setf(ios::right);
176 // index
177 os << " ";
178 os << setbase(16) << setfill('0') << setw(8)
179 << index << ": ";
181 // hex byte data
182 for( size_t i = 0; i < size; i++ ) {
183 if( (index+i) < GetSize() ) {
184 os << setbase(16) << setfill('0')
185 << setw(2) << setprecision(2)
186 << (unsigned int) GetData()[index + i] << ' ';
188 else {
189 os << " ";
193 // printable data
194 if( bPrintAscii ) {
195 locale loc = os.getloc();
196 os << ' ';
197 for( size_t i = 0; i < size && (index+i) < GetSize(); i++ ) {
198 ostream::traits_type::char_type c = GetData()[index + i];
199 os << setbase(10) << (char) (isprint(c, loc) ? c : '.');
203 os << "\n";
204 os.flags(oldflags);
207 void Data::DumpHex(ostream &os) const
209 for( size_t address = 0; address < GetSize(); address += 16 ) {
210 DumpHexLine(os, address, 16);
214 unsigned char * Data::GetBuffer(size_t requiredsize)
216 CopyOnWrite(requiredsize);
217 if( requiredsize > 0 )
218 MakeSpace(requiredsize);
219 return m_data;
222 void Data::ReleaseBuffer(int datasize)
224 assert( datasize >= 0 || datasize == -1 );
225 assert( datasize == -1 || (unsigned int)datasize <= m_bufsize );
226 assert( !m_external );
228 if( m_external )
229 return;
230 if( datasize >= 0 && (unsigned int)datasize > m_bufsize ) {
231 dout("ReleaseBuffer called with datasize("
232 << std::dec << datasize << ") > m_bufsize("
233 << m_bufsize << ")");
234 return;
237 if( datasize >= 0 ) {
238 m_datasize = datasize;
240 else {
241 // search for last non-zero value in buffer
242 m_datasize = m_bufsize - 1;
243 while( m_datasize && m_data[m_datasize] == 0 )
244 --m_datasize;
248 /// Append bytes of data based on str
249 void Data::AppendHexString(const char *str)
251 CopyOnWrite(m_datasize + 512);
253 std::istringstream iss(str);
254 unsigned int byte;
255 while( iss >> hex >> byte ) {
256 MakeSpace(m_datasize + 1);
257 m_data[m_datasize] = (unsigned char) byte;
258 m_datasize++;
262 /// set buffer to 0 and remove all data
263 void Data::Zap()
265 if( !m_external )
266 memset(m_data, 0, m_bufsize);
267 m_datasize = 0;
270 Data & Data::operator=(const Data &other)
272 if( this == &other )
273 return *this;
275 // don't remove our current buffer, only grow it if needed
276 MakeSpace(other.m_bufsize);
277 memcpy(m_data, other.m_data, other.m_bufsize);
279 // then copy over the data state
280 m_datasize = other.m_datasize;
281 m_endpoint = other.m_endpoint;
282 m_externalData = other.m_externalData;
283 m_external = other.m_external;
284 return *this;
287 void Data::MemCpy(size_t &offset, const void *src, size_t size)
289 unsigned char *pd = GetBuffer(offset + size) + offset;
290 memcpy(pd, src, size);
291 offset += size;
294 void Data::Append(const void *buf, size_t size)
296 // MemCpy updates m_datasize via the offset reference
297 MemCpy(m_datasize, buf, size);
300 istream& operator>> (istream &is, Data &data)
302 data.InputHexLine(is);
303 return is;
306 ostream& operator<< (ostream &os, const Data &data)
308 data.DumpHex(os);
309 return os;
313 ///////////////////////////////////////////////////////////////////////////////
314 // Diff class
316 Diff::Diff(const Data &old, const Data &new_)
317 : m_old(old), m_new(new_)
321 void Diff::Compare(ostream &os, size_t index, size_t size) const
323 size_t min = std::min(m_old.GetSize(), m_new.GetSize());
325 // index
326 os << "> ";
327 os << setbase(16) << setfill('0') << setw(8)
328 << index << ": ";
330 // diff data
331 for( size_t i = 0; i < size; i++ ) {
332 size_t address = index + i;
334 // if data is available, print the diff
335 if( address < min ) {
336 if( m_old.GetData()[address] != m_new.GetData()[address] ) {
337 // differ, print hex
338 os << setbase(16) << setfill('0')
339 << setw(2) << setprecision(2)
340 << (unsigned int) m_new.GetData()[address] << ' ';
342 else {
343 // same, just print spaces
344 os << " ";
347 else {
348 // one of the buffers is shorter...
349 if( address < m_new.GetSize() ) {
350 // new still has data, print it
351 os << setbase(16) << setfill('0')
352 << setw(2) << setprecision(2)
353 << (unsigned int) m_new.GetData()[address]
354 << ' ';
356 else if( address < m_old.GetSize() ) {
357 // new is out of data and old still has some
358 os << "XX ";
360 else {
361 // no more data, just print spaces
362 os << " ";
367 // printable data, just dump new
368 if( Data::PrintAscii() ) {
369 os << ' ';
370 for( size_t i = 0; i < size && (index+i) < m_new.GetSize(); i++ ) {
371 int c = m_new.GetData()[index + i];
372 os << setbase(10) << (char) (isprint(c) ? c : '.');
376 os << "\n";
379 void Diff::Dump(std::ostream &os) const
381 if( m_old.GetSize() != m_new.GetSize() )
382 os << "sizes differ: "
383 << m_old.GetSize() << " != " << m_new.GetSize() << endl;
385 size_t max = std::max(m_old.GetSize(), m_new.GetSize());
386 for( size_t i = 0; i < max; i += 16 ) {
387 m_old.DumpHexLine(os, i, 16);
388 Compare(os, i, 16);
392 ostream& operator<< (ostream &os, const Diff &diff)
394 diff.Dump(os);
395 return os;
399 ///////////////////////////////////////////////////////////////////////////////
400 // DBData class
402 /// Default constructor, constructs an empty local Data object
403 DBData::DBData()
404 : m_version(REC_VERSION_1) // a reasonable default for now
405 , m_localData(new Data)
406 , m_data(*m_localData)
411 /// Constructs a local Data object that points to external memory
412 DBData::DBData(const void *ValidData, size_t size)
413 : m_version(REC_VERSION_1) // a reasonable default for now
414 , m_localData(new Data)
415 , m_data(*m_localData)
419 DBData::DBData(RecordFormatVersion ver,
420 const std::string &dbName,
421 uint8_t recType,
422 uint32_t uniqueId,
423 size_t offset,
424 const void *ValidData,
425 size_t size)
426 : m_version(ver)
427 , m_dbName(dbName)
428 , m_recType(recType)
429 , m_uniqueId(uniqueId)
430 , m_offset(offset)
431 , m_localData(new Data(ValidData, size))
432 , m_data(*m_localData)
436 /// If copy == false, constructs an external Data object, no local.
437 /// If copy == true, constructs an internal Data object copy
438 DBData::DBData(Data &externalData, bool copy)
439 : m_version(REC_VERSION_1) // a reasonable default for now
440 , m_localData(copy ? new Data(externalData) : 0)
441 , m_data(copy ? *m_localData : externalData)
445 DBData::DBData(RecordFormatVersion ver,
446 const std::string &dbName,
447 uint8_t recType,
448 uint32_t uniqueId,
449 size_t offset,
450 Data &externalData,
451 bool copy)
452 : m_version(ver)
453 , m_dbName(dbName)
454 , m_recType(recType)
455 , m_uniqueId(uniqueId)
456 , m_offset(offset)
457 , m_localData(copy ? new Data(externalData) : 0)
458 , m_data(copy ? *m_localData : externalData)
462 DBData::~DBData()
464 delete m_localData;
467 Data& DBData::UseData()
469 // make sure m_data is not external anymore
470 m_data.GetBuffer();
471 return m_data; // return it
474 // Note: this copy operator does not change what m_data references...
475 // whatever m_data references in the constructor is what will be changed
476 // in this copy.
477 // Note also that the copy *will* involve a memcpy, and maybe a memory
478 // allocation as well.
479 DBData& DBData::operator=(const DBData &other)
481 if( this == &other )
482 return *this;
484 // copy the data block
485 m_data = other.m_data;
487 // copy the metadata
488 CopyMeta(other);
490 return *this;
493 ///////////////////////////////////////////////////////////////////////////////
494 // Utility functions
496 static bool IsEndpointStart(const std::string &line, int &endpoint)
498 if( strncmp(line.c_str(), "sep: ", 5) == 0 ||
499 strncmp(line.c_str(), "rep: ", 5) == 0 )
501 endpoint = atoi(line.c_str() + 5);
502 return true;
504 return false;
507 bool LoadDataArray(const string &filename, std::vector<Data> &array)
509 ifstream in(filename.c_str());
510 return ReadDataArray(in, array);
513 bool ReadDataArray(std::istream &is, std::vector<Data> &array)
515 if( !is )
516 return false;
518 bool bInEndpoint = false;
519 unsigned int nCurrent = 0;
520 size_t nLargestSize = 0x100;
521 while( is ) {
522 string line;
523 getline(is, line);
524 int endpoint;
525 if( bInEndpoint ) {
526 if( IsHexData(line) ) {
527 istringstream sline(line);
528 sline >> array[nCurrent];
529 continue;
531 else {
532 nLargestSize = std::max(nLargestSize,
533 array[nCurrent].GetBufSize());
534 bInEndpoint = false;
538 // check if this line starts a new endpoint
539 if( IsEndpointStart(line, endpoint) ) {
540 bInEndpoint = true;
541 Data chunk(endpoint, nLargestSize);
542 array.push_back(chunk);
543 nCurrent = array.size() - 1;
546 return true;
549 } // namespace Barry
552 #ifdef __TEST_MODE__
554 #include <iostream>
555 #include <iomanip>
556 #include "data.h"
558 using namespace std;
560 int main()
562 typedef std::vector<Data> DataVec;
563 DataVec array;
564 if( !LoadDataArray("data/parsed.log", array) ) {
565 cout << "Can't load file" << endl;
566 return 1;
569 DataVec::iterator i = array.begin();
570 Data::PrintAscii(false);
571 for( ; i != array.end(); i++ ) {
572 cout << "Endpoint: " << i->GetEndpoint() << endl;
573 cout << *i;
574 cout << "\n\n";
578 Data one, two;
579 one.GetBuffer()[0] = 0x01;
580 one.ReleaseBuffer(1);
581 two.GetBuffer()[0] = 0x02;
582 two.ReleaseBuffer(2);
584 cout << Diff(one, two) << endl;
585 cout << Diff(two, one) << endl;
587 two.GetBuffer();
588 two.ReleaseBuffer(32);
589 cout << Diff(one, two) << endl;
592 #endif