lib: data: added efficient Prepend() support to the Data class
[barry/progweb.git] / src / data.cc
bloba71f702e6c753cd08b9b5a2a6c1a2f5b60d1aca9
1 ///
2 /// \file data.cc
3 /// Classes to help manage pre-determined data files.
4 ///
6 /*
7 Copyright (C) 2005-2011, Net Direct Inc. (http://www.netdirect.ca/)
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License in the COPYING file at the
19 root directory of this project for more details.
22 #include "data.h"
23 #include <fstream>
24 #include <sstream>
25 #include <iomanip>
26 #include <string>
27 #include <stdexcept>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <locale>
32 //#define __DEBUG_MODE__
33 #include "debug.h"
36 using namespace std;
39 namespace Barry {
41 inline bool IsHexData(const std::string &s)
43 const char *str = s.c_str();
44 for( int i = 0; i < 4 && *str; str++, i++ )
45 if( *str != ' ' )
46 return false;
48 for( int i = 0; i < 8 && *str; str++, i++ ) {
49 const char *hexchars = "0123456789abcdef";
50 if( strchr(hexchars, *str) == NULL )
51 return false;
54 if( *str != ':' )
55 return false;
57 return true;
62 ///////////////////////////////////////////////////////////////////////////////
63 // Data class
65 bool Data::bPrintAscii = true;
67 Data::Data()
68 : m_memBlock(new unsigned char[0x4100])
69 , m_blockSize(0x4100)
70 , m_dataStart(m_memBlock + 0x100)
71 , m_dataSize(0)
72 , m_externalData(0)
73 , m_external(false)
74 , m_endpoint(-1)
76 memset(m_memBlock, 0, m_blockSize);
79 Data::Data(int endpoint, size_t startsize, size_t prependsize)
80 : m_memBlock(new unsigned char[startsize + prependsize])
81 , m_blockSize(startsize + prependsize)
82 , m_dataStart(m_memBlock + prependsize)
83 , m_dataSize(0)
84 , m_externalData(0)
85 , m_external(false)
86 , m_endpoint(endpoint)
88 memset(m_memBlock, 0, m_blockSize);
91 Data::Data(const void *ValidData, size_t size)
92 : m_memBlock(0)
93 , m_blockSize(0)
94 , m_dataStart(0)
95 , m_dataSize(size)
96 , m_externalData((const unsigned char*)ValidData)
97 , m_external(true)
98 , m_endpoint(-1)
102 Data::Data(const Data &other)
103 : m_memBlock(other.m_blockSize ? new unsigned char[other.m_blockSize] : 0)
104 , m_blockSize(other.m_blockSize)
105 , m_dataStart(m_memBlock + other.AvailablePrependSpace())
106 , m_dataSize(other.m_dataSize)
107 , m_externalData(other.m_externalData)
108 , m_external(other.m_external)
109 , m_endpoint(other.m_endpoint)
111 // copy over the raw data
112 if( !m_external )
113 memcpy(m_memBlock, other.m_memBlock, other.m_blockSize);
116 Data::~Data()
118 delete [] m_memBlock;
122 // MakeSpace
124 /// Reallocates buffers so that it is safe to write desiredsize data
125 /// to m_dataStart after it returns. All existing data is preserved.
127 /// This function also performs any copy on write needed.
129 /// If desiredprepend is nonzero, then at least desiredprepend bytes
130 /// of prepend space will exist in the buffer after return.
131 /// If desiredprepend is zero, defaults will be used if needed.
133 void Data::MakeSpace(size_t desiredsize, size_t desiredprepend)
135 // use a default prepend size if none currently available
136 size_t prepend = std::max(AvailablePrependSpace(), desiredprepend);
137 if( !prepend )
138 prepend = 0x100;
140 // GetBufSize() returns 0 if m_external is true
141 if( GetBufSize() < (desiredsize + prepend) ||
142 (desiredprepend && AvailablePrependSpace() < desiredprepend) )
144 // get a proper chunk to avoid future resizes
145 desiredsize += 1024 + prepend;
147 // desired size must be at least the size of our current
148 // data (in case of external data), as well as the size
149 // of our desired prepend space
150 if( desiredsize < (m_dataSize + prepend) )
151 desiredsize = m_dataSize + prepend;
153 // setup new zeroed buffer... reuse m_memBlock if it
154 // exists (see operator=())
155 unsigned char *newbuf = 0;
156 if( m_memBlock && m_blockSize >= desiredsize ) {
157 newbuf = m_memBlock;
159 else {
160 newbuf = new unsigned char[desiredsize];
161 memset(newbuf, 0, desiredsize);
164 // copy valid data over
165 if( m_external ) {
166 memcpy(newbuf + prepend, m_externalData, m_dataSize);
168 // not external anymore
169 m_external = false;
171 else {
172 memcpy(newbuf + prepend, m_dataStart, m_dataSize);
175 // install new buffer if we've allocated a new one
176 if( m_memBlock != newbuf ) {
177 delete [] m_memBlock;
178 m_memBlock = newbuf;
179 m_blockSize = desiredsize;
182 // update m_dataStart
183 m_dataStart = m_memBlock + prepend;
187 size_t Data::AvailablePrependSpace() const
189 if( m_external )
190 return 0;
191 else
192 return m_dataStart - m_memBlock;
195 void Data::InputHexLine(istream &is)
197 unsigned int values[16];
198 size_t index = 0;
200 size_t address;
201 is >> setbase(16) >> address;
202 if( !is )
203 return; // nothing to do
205 is.ignore(); // eat the ':'
207 while( is && index < 16 ) {
208 is >> setbase(16) >> values[index];
209 if( is )
210 index++;
213 dout("InputHexLine: read " << index << " bytes");
215 MakeSpace(address + index); // make space for the new
216 m_dataSize = std::max(address + index, m_dataSize);
217 while( index-- )
218 m_dataStart[address + index] = (unsigned char) values[index];
219 return;
222 void Data::DumpHexLine(ostream &os, size_t index, size_t size) const
224 ios::fmtflags oldflags = os.setf(ios::right);
226 // index
227 os << " ";
228 os << setbase(16) << setfill('0') << setw(8)
229 << index << ": ";
231 // hex byte data
232 for( size_t i = 0; i < size; i++ ) {
233 if( (index+i) < GetSize() ) {
234 os << setbase(16) << setfill('0')
235 << setw(2) << setprecision(2)
236 << (unsigned int) GetData()[index + i] << ' ';
238 else {
239 os << " ";
243 // printable data
244 if( bPrintAscii ) {
245 locale loc = os.getloc();
246 os << ' ';
247 for( size_t i = 0; i < size && (index+i) < GetSize(); i++ ) {
248 ostream::traits_type::char_type c = GetData()[index + i];
249 os << setbase(10) << (char) (std::isprint(c, loc) ? c : '.');
253 os << "\n";
254 os.flags(oldflags);
257 void Data::DumpHex(ostream &os) const
259 for( size_t address = 0; address < GetSize(); address += 16 ) {
260 DumpHexLine(os, address, 16);
264 unsigned char * Data::GetBuffer(size_t requiredsize)
266 if( requiredsize == 0 ) {
267 // handle default, use data size
268 requiredsize = m_dataSize;
271 MakeSpace(requiredsize);
272 return m_dataStart;
275 /// Returns size of buffer returned by GetBuffer(). Note that this does not
276 /// include available prepend space.
277 size_t Data::GetBufSize() const
279 if( m_external )
280 return 0;
281 else
282 return m_blockSize - (m_dataStart - m_memBlock);
285 void Data::ReleaseBuffer(int datasize)
287 if( datasize < 0 && datasize != -1)
288 throw std::logic_error("Data::ReleaseBuffer() argument must be -1 or >= 0");
289 if( m_external )
290 throw std::logic_error("Data::ReleaseBuffer() must be called after GetBuffer()");
291 if( !(datasize == -1 || (unsigned int)datasize <= GetBufSize()) )
292 throw std::logic_error("Data::ReleaseBuffer() must be called with a size smaller than the original buffer requested");
294 if( datasize >= 0 ) {
295 m_dataSize = datasize;
297 else {
298 // search for last non-zero value in buffer
299 m_dataSize = GetBufSize() - 1;
300 while( m_dataSize && m_dataStart[m_dataSize] == 0 )
301 --m_dataSize;
305 /// Append bytes of data based on str
306 void Data::AppendHexString(const char *str)
308 MakeSpace(m_dataSize + 512);
310 std::istringstream iss(str);
311 unsigned int byte;
312 while( iss >> hex >> byte ) {
313 MakeSpace(m_dataSize + 1);
314 m_dataStart[m_dataSize] = (unsigned char) byte;
315 m_dataSize++;
319 /// set buffer to 0 and remove all data
320 void Data::Zap()
322 if( !m_external )
323 memset(m_memBlock, 0, m_blockSize);
324 m_dataSize = 0;
327 Data & Data::operator=(const Data &other)
329 if( this == &other )
330 return *this;
332 if( other.m_external ) {
333 // just copy over the pointers
334 m_externalData = other.m_externalData;
335 m_external = other.m_external;
336 m_dataSize = other.m_dataSize;
337 m_endpoint = other.m_endpoint;
339 else {
340 // don't remove our current buffer, only grow it if needed
341 MakeSpace(other.m_dataSize);
342 memcpy(m_dataStart, other.m_dataStart, other.m_dataSize);
344 // then copy over the data state
345 m_dataSize = other.m_dataSize;
346 m_endpoint = other.m_endpoint;
349 return *this;
352 void Data::MemCpy(size_t &offset, const void *src, size_t size)
354 unsigned char *pd = GetBuffer(offset + size) + offset;
355 memcpy(pd, src, size);
356 offset += size;
358 // if the new end of data is larger than m_dataSize, bump it
359 if( offset > m_dataSize )
360 m_dataSize = offset;
363 void Data::Append(const void *buf, size_t size)
365 // MemCpy updates m_datasize via the offset reference
366 MemCpy(m_dataSize, buf, size);
369 void Data::Prepend(const void *buf, size_t size)
371 MakeSpace(0, size);
372 m_dataStart -= size;
373 m_dataSize += size;
374 memcpy(m_dataStart, (const unsigned char*) buf, size);
377 /// Removes size bytes from the beginning of the buffer.
378 /// If GetSize() is less than size, then all bytes will be chopped
379 /// and GetSize() will end up 0.
380 void Data::Prechop(size_t size)
382 // chopping all the bytes that we have?
383 if( size >= GetSize() ) {
384 QuickZap();
385 return;
388 if( m_external ) {
389 m_externalData += size;
390 m_dataSize -= size;
392 else {
393 m_dataStart += size;
394 m_dataSize -= size;
398 istream& operator>> (istream &is, Data &data)
400 data.InputHexLine(is);
401 return is;
404 ostream& operator<< (ostream &os, const Data &data)
406 data.DumpHex(os);
407 return os;
411 ///////////////////////////////////////////////////////////////////////////////
412 // Diff class
414 Diff::Diff(const Data &old, const Data &new_)
415 : m_old(old), m_new(new_)
419 void Diff::Compare(ostream &os, size_t index, size_t size) const
421 size_t min = std::min(m_old.GetSize(), m_new.GetSize());
423 // index
424 os << "> ";
425 os << setbase(16) << setfill('0') << setw(8)
426 << index << ": ";
428 // diff data
429 for( size_t i = 0; i < size; i++ ) {
430 size_t address = index + i;
432 // if data is available, print the diff
433 if( address < min ) {
434 if( m_old.GetData()[address] != m_new.GetData()[address] ) {
435 // differ, print hex
436 os << setbase(16) << setfill('0')
437 << setw(2) << setprecision(2)
438 << (unsigned int) m_new.GetData()[address] << ' ';
440 else {
441 // same, just print spaces
442 os << " ";
445 else {
446 // one of the buffers is shorter...
447 if( address < m_new.GetSize() ) {
448 // new still has data, print it
449 os << setbase(16) << setfill('0')
450 << setw(2) << setprecision(2)
451 << (unsigned int) m_new.GetData()[address]
452 << ' ';
454 else if( address < m_old.GetSize() ) {
455 // new is out of data and old still has some
456 os << "XX ";
458 else {
459 // no more data, just print spaces
460 os << " ";
465 // printable data, just dump new
466 if( Data::PrintAscii() ) {
467 os << ' ';
468 for( size_t i = 0; i < size && (index+i) < m_new.GetSize(); i++ ) {
469 int c = m_new.GetData()[index + i];
470 os << setbase(10) << (char) (isprint(c) ? c : '.');
474 os << "\n";
477 void Diff::Dump(std::ostream &os) const
479 if( m_old.GetSize() != m_new.GetSize() )
480 os << "sizes differ: "
481 << m_old.GetSize() << " != " << m_new.GetSize() << endl;
483 size_t max = std::max(m_old.GetSize(), m_new.GetSize());
484 for( size_t i = 0; i < max; i += 16 ) {
485 m_old.DumpHexLine(os, i, 16);
486 Compare(os, i, 16);
490 ostream& operator<< (ostream &os, const Diff &diff)
492 diff.Dump(os);
493 return os;
497 ///////////////////////////////////////////////////////////////////////////////
498 // DBData class
500 /// Default constructor, constructs an empty local Data object
501 DBData::DBData()
502 : m_version(REC_VERSION_1) // a reasonable default for now
503 , m_localData(new Data)
504 , m_data(*m_localData)
509 /// Constructs a local Data object that points to external memory
510 DBData::DBData(const void *ValidData, size_t size)
511 : m_version(REC_VERSION_1) // a reasonable default for now
512 , m_localData(new Data)
513 , m_data(*m_localData)
517 DBData::DBData(RecordFormatVersion ver,
518 const std::string &dbName,
519 uint8_t recType,
520 uint32_t uniqueId,
521 size_t offset,
522 const void *ValidData,
523 size_t size)
524 : m_version(ver)
525 , m_dbName(dbName)
526 , m_recType(recType)
527 , m_uniqueId(uniqueId)
528 , m_offset(offset)
529 , m_localData(new Data(ValidData, size))
530 , m_data(*m_localData)
534 /// If copy == false, constructs an external Data object, no local.
535 /// If copy == true, constructs an internal Data object copy
536 DBData::DBData(Data &externalData, bool copy)
537 : m_version(REC_VERSION_1) // a reasonable default for now
538 , m_localData(copy ? new Data(externalData) : 0)
539 , m_data(copy ? *m_localData : externalData)
543 DBData::DBData(RecordFormatVersion ver,
544 const std::string &dbName,
545 uint8_t recType,
546 uint32_t uniqueId,
547 size_t offset,
548 Data &externalData,
549 bool copy)
550 : m_version(ver)
551 , m_dbName(dbName)
552 , m_recType(recType)
553 , m_uniqueId(uniqueId)
554 , m_offset(offset)
555 , m_localData(copy ? new Data(externalData) : 0)
556 , m_data(copy ? *m_localData : externalData)
560 DBData::~DBData()
562 delete m_localData;
565 Data& DBData::UseData()
567 // make sure m_data is not external anymore
568 m_data.GetBuffer();
569 return m_data; // return it
572 // Note: this copy operator does not change what m_data references...
573 // whatever m_data references in the constructor is what will be changed
574 // in this copy.
575 // Note also that the copy *will* involve a memcpy, and maybe a memory
576 // allocation as well.
577 DBData& DBData::operator=(const DBData &other)
579 if( this == &other )
580 return *this;
582 // copy the data block
583 m_data = other.m_data;
585 // copy the metadata
586 CopyMeta(other);
588 return *this;
591 ///////////////////////////////////////////////////////////////////////////////
592 // Utility functions
594 static bool IsEndpointStart(const std::string &line, int &endpoint)
596 if( strncmp(line.c_str(), "sep: ", 5) == 0 ||
597 strncmp(line.c_str(), "rep: ", 5) == 0 )
599 endpoint = atoi(line.c_str() + 5);
600 return true;
602 return false;
605 bool LoadDataArray(const string &filename, std::vector<Data> &array)
607 ifstream in(filename.c_str());
608 return ReadDataArray(in, array);
611 bool ReadDataArray(std::istream &is, std::vector<Data> &array)
613 if( !is )
614 return false;
616 bool bInEndpoint = false;
617 unsigned int nCurrent = 0;
618 size_t nLargestSize = 0x100;
619 while( is ) {
620 string line;
621 getline(is, line);
622 int endpoint;
623 if( bInEndpoint ) {
624 if( IsHexData(line) ) {
625 istringstream sline(line);
626 sline >> array[nCurrent];
627 continue;
629 else {
630 nLargestSize = std::max(nLargestSize,
631 array[nCurrent].GetBufSize());
632 bInEndpoint = false;
636 // check if this line starts a new endpoint
637 if( IsEndpointStart(line, endpoint) ) {
638 bInEndpoint = true;
639 Data chunk(endpoint, nLargestSize);
640 array.push_back(chunk);
641 nCurrent = array.size() - 1;
644 return true;
647 } // namespace Barry
650 #ifdef __TEST_MODE__
652 #include <iostream>
653 #include <iomanip>
654 #include "data.h"
656 using namespace std;
658 int main()
660 typedef std::vector<Data> DataVec;
661 DataVec array;
662 if( !LoadDataArray("data/parsed.log", array) ) {
663 cout << "Can't load file" << endl;
664 return 1;
667 DataVec::iterator i = array.begin();
668 Data::PrintAscii(false);
669 for( ; i != array.end(); i++ ) {
670 cout << "Endpoint: " << i->GetEndpoint() << endl;
671 cout << *i;
672 cout << "\n\n";
676 Data one, two;
677 one.GetBuffer()[0] = 0x01;
678 one.ReleaseBuffer(1);
679 two.GetBuffer()[0] = 0x02;
680 two.ReleaseBuffer(2);
682 cout << Diff(one, two) << endl;
683 cout << Diff(two, one) << endl;
685 two.GetBuffer();
686 two.ReleaseBuffer(32);
687 cout << Diff(one, two) << endl;
690 #endif