lsnes rr2-β24
[lsnes.git] / src / library / rrdata.cpp
bloba4c7e686d3546b0bdd162d6c20a149d1d8777423
1 #include "rrdata.hpp"
2 #include "hex.hpp"
3 #include <cstring>
4 #include <limits>
5 #include <functional>
6 #include <cassert>
8 #define MAXRUN 16843009
10 rrdata_set::instance::instance() throw()
12 memset(bytes, 0, RRDATA_BYTES);
15 rrdata_set::instance::instance(const unsigned char* b) throw()
17 memcpy(bytes, b, RRDATA_BYTES);
20 rrdata_set::instance::instance(const std::string& id) throw()
22 memset(bytes, 0, RRDATA_BYTES);
23 for(unsigned i = 0; i < id.length() && i < 2 * RRDATA_BYTES; i++) {
24 unsigned h = 0;
25 char ch = id[i];
26 if(ch >= '0' && ch <= '9')
27 h = ch - '0';
28 else if(ch >= 'A' && ch <= 'F')
29 h = ch - 'A' + 10;
30 else if(ch >= 'a' && ch <= 'f')
31 h = ch - 'a' + 10;
32 bytes[i / 2] = bytes[i / 2] * 16 + h;
36 bool rrdata_set::instance::operator<(const struct instance& i) const throw()
38 for(unsigned j = 0; j < RRDATA_BYTES; j++)
39 if(bytes[j] < i.bytes[j])
40 return true;
41 else if(bytes[j] > i.bytes[j])
42 return false;
43 return false;
46 bool rrdata_set::instance::operator==(const struct instance& i) const throw()
48 for(unsigned j = 0; j < RRDATA_BYTES; j++)
49 if(bytes[j] != i.bytes[j])
50 return false;
51 return true;
54 const struct rrdata_set::instance rrdata_set::instance::operator++(int) throw()
56 instance i = *this;
57 ++*this;
58 return i;
61 struct rrdata_set::instance& rrdata_set::instance::operator++() throw()
63 unsigned carry = 1;
64 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
65 unsigned newcarry = (bytes[i] == 255 && carry);
66 bytes[i] += carry;
67 carry = newcarry;
69 return *this;
72 struct rrdata_set::instance rrdata_set::instance::operator+(unsigned inc) const throw()
74 rrdata_set::instance n = *this;
75 unsigned carry = inc;
76 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
77 unsigned newcarry = ((unsigned)n.bytes[i] + carry) >> 8;
78 if(newcarry == 0 && carry > 255)
79 newcarry = (1U << (8 * sizeof(unsigned) - 8));
80 n.bytes[i] += carry;
81 carry = newcarry;
83 return n;
86 unsigned rrdata_set::instance::operator-(const struct instance& m) const throw()
88 unsigned result = 0;
89 uint8_t diff[RRDATA_BYTES] = {0};
90 unsigned borrow = 0;
91 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
92 diff[i] = bytes[i] - m.bytes[i] - borrow;
93 borrow = ((unsigned)m.bytes[i] + borrow > (unsigned)bytes[i]) ? 1 : 0;
95 for(unsigned i = 0; i < RRDATA_BYTES; i++) {
96 if((result << 8 >> 8) != result)
97 return std::numeric_limits<unsigned>::max();
98 result <<= 8;
99 result |= diff[i];
101 return result;
104 rrdata_set::rrdata_set() throw()
106 rcount = 0;
107 lazy_mode = false;
108 handle_open = false;
111 void rrdata_set::read_base(const std::string& projectfile, bool lazy) throw(std::bad_alloc)
113 if(projectfile == current_projectfile && (!lazy_mode || lazy))
114 return;
115 if(lazy) {
116 std::set<std::pair<instance, instance>> new_rrset;
117 data = new_rrset;
118 current_projectfile = projectfile;
119 rcount = 0;
120 lazy_mode = true;
121 if(handle_open)
122 ohandle.close();
123 handle_open = false;
124 return;
126 std::set<std::pair<instance, instance>> new_rrset;
127 uint64_t new_count = 0;
128 if(projectfile == current_projectfile) {
129 new_rrset = data;
130 new_count = rcount;
132 std::string filename = projectfile;
133 if(handle_open) {
134 ohandle.close();
135 handle_open = false;
137 std::ifstream ihandle(filename.c_str(), std::ios_base::in | std::ios_base::binary);
138 while(ihandle) {
139 unsigned char bytes[RRDATA_BYTES];
140 ihandle.read(reinterpret_cast<char*>(bytes), RRDATA_BYTES);
141 instance k(bytes);
142 //std::cerr << "Loaded symbol: " << k << std::endl;
143 _add(k, k + 1, new_rrset, new_count);
145 ihandle.close();
146 ohandle.open(filename.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::binary);
147 if(ohandle)
148 handle_open = true;
149 if(projectfile == current_projectfile && lazy_mode && !lazy) {
150 //Finish the project creation, write all.
151 for(auto i : data) {
152 instance tmp = i.first;
153 while(tmp != i.second) {
154 ohandle.write(reinterpret_cast<const char*>(tmp.bytes), RRDATA_BYTES);
155 ++tmp;
157 ohandle.flush();
160 data = new_rrset;
161 rcount = new_count;
162 current_projectfile = projectfile;
163 lazy_mode = lazy;
166 void rrdata_set::close() throw()
168 current_projectfile = "";
169 if(handle_open)
170 ohandle.close();
171 handle_open = false;
174 void rrdata_set::add(const struct rrdata_set::instance& i) throw(std::bad_alloc)
176 if(_add(i) && handle_open) {
177 //std::cerr << "New symbol: " << i << std::endl;
178 ohandle.write(reinterpret_cast<const char*>(i.bytes), RRDATA_BYTES);
179 ohandle.flush();
183 namespace
185 size_t _flush_symbol(char* buf1, const rrdata_set::instance& base, const rrdata_set::instance& predicted,
186 unsigned count)
188 char opcode;
189 char buf2[3];
190 unsigned bias;
191 if(count == 1) {
192 opcode = 0x00;
193 bias = 1;
194 } else if(count < 258) {
195 opcode = 0x20;
196 bias = 2;
197 } else if(count < 65794) {
198 opcode = 0x40;
199 bias = 258;
200 } else {
201 opcode = 0x60;
202 bias = 65794;
204 unsigned j;
205 for(j = 0; j < 31; j++)
206 if(base.bytes[j] != predicted.bytes[j])
207 break;
208 opcode += j;
209 buf1[0] = opcode;
210 memcpy(buf1 + 1, base.bytes + j, RRDATA_BYTES - j);
211 buf2[0] = (count - bias) >> 16;
212 buf2[1] = (count - bias) >> 8;
213 buf2[2] = (count - bias);
214 memcpy(buf1 + (RRDATA_BYTES - j + 1), buf2 + (3 - (opcode >> 5)), opcode >> 5);
215 return (RRDATA_BYTES - j + 1) + (opcode >> 5);
218 uint64_t symbols_in_interval(const rrdata_set::instance& b, const rrdata_set::instance& e) throw()
220 uint64_t c = 0;
221 rrdata_set::instance x = b;
222 while(x != e) {
223 unsigned diff = e - x;
224 x = x + diff;
225 c = c + diff;
227 return c;
231 uint64_t rrdata_set::emerg_action(struct rrdata_set::esave_state& state, char* buf, size_t bufsize, uint64_t& scount)
232 const
234 uint64_t rsize = 0;
235 size_t lbytes;
236 state.init(data);
237 while(!state.finished() || state.segptr != state.segend) {
238 if(state.segptr == state.segend) {
239 auto i = state.next();
240 state.segptr = i->first;
241 state.segend = i->second;
243 unsigned syms = state.segend - state.segptr;
244 if(syms > MAXRUN)
245 syms = MAXRUN;
246 char tmp[RRDATA_BYTES + 4];
247 rsize += lbytes = _flush_symbol(tmp, state.segptr, state.pred, syms);
248 if(buf) {
249 if(bufsize < lbytes) break;
250 memcpy(buf, tmp, lbytes);
251 buf += lbytes;
252 bufsize -= lbytes;
254 scount += syms;
255 state.segptr = state.segptr + syms;
256 state.pred = state.segptr;
258 return rsize;
261 uint64_t rrdata_set::write(std::vector<char>& strm) throw(std::bad_alloc)
263 uint64_t scount = 0;
264 esave_state cstate;
265 size_t ssize = emerg_action(cstate, NULL, 0, scount);
266 cstate.reset();
267 strm.resize(ssize);
268 uint64_t scount2 = 0;
269 size_t ssize2 = emerg_action(cstate, &strm[0], ssize, scount2);
270 if(ssize != ssize2 || scount != scount2) {
271 std::cerr << "RRDATA mismatch!" << std::endl;
272 std::cerr << "Length: Prepare: " << ssize << " Write: " << ssize2 << std::endl;
273 std::cerr << "Scount: Prepare: " << scount << " Write: " << scount2 << std::endl;
275 if(scount)
276 return scount - 1;
277 else
278 return 0;
281 namespace
283 uint64_t read_set(std::vector<char>& strm, std::function<void(rrdata_set::instance& d, unsigned rep)> fn)
284 throw(std::bad_alloc)
286 uint64_t scount = 0;
287 rrdata_set::instance decoding;
288 uint64_t ptr = 0;
289 memset(decoding.bytes, 0, RRDATA_BYTES);
290 while(ptr < strm.size()) {
291 char opcode;
292 unsigned char buf1[RRDATA_BYTES];
293 unsigned char buf2[3];
294 opcode = strm[ptr++];
295 unsigned validbytes = (opcode & 0x1F);
296 unsigned lengthbytes = (opcode & 0x60) >> 5;
297 unsigned repeat = 1;
298 memcpy(buf1, &strm[ptr], RRDATA_BYTES - validbytes);
299 ptr += (RRDATA_BYTES - validbytes);
300 memcpy(decoding.bytes + validbytes, buf1, RRDATA_BYTES - validbytes);
301 if(lengthbytes > 0) {
302 memcpy(buf2, &strm[ptr], lengthbytes);
303 ptr += lengthbytes;
305 if(lengthbytes == 1)
306 repeat = 2 + static_cast<unsigned>(buf2[0]);
307 if(lengthbytes == 2)
308 repeat = 258 + static_cast<unsigned>(buf2[0]) * 256 + buf2[1];
309 if(lengthbytes == 3)
310 repeat = 65794 + static_cast<unsigned>(buf2[0]) * 65536 +
311 static_cast<unsigned>(buf2[1]) * 256 + buf2[2];
313 fn(decoding, repeat);
314 decoding = decoding + repeat;
315 scount += repeat;
317 if(scount)
318 return scount - 1;
319 else
320 return 0;
324 uint64_t rrdata_set::read(std::vector<char>& strm) throw(std::bad_alloc)
326 return read_set(strm, [this](instance& d, unsigned rep) {
327 bool any = false;
328 if(handle_open && !_in_set(d, d + rep))
329 for(unsigned i = 0; i < rep; i++) {
330 //TODO: Optimize this.
331 instance n = d + i;
332 if(!_in_set(n)) {
333 ohandle.write(reinterpret_cast<const char*>(n.bytes), RRDATA_BYTES);
334 any = true;
337 if(any)
338 ohandle.flush();
339 _add(d, d + rep);
343 uint64_t rrdata_set::count(std::vector<char>& strm) throw(std::bad_alloc)
345 return read_set(strm, [](instance& d, unsigned rep) {});
348 uint64_t rrdata_set::count() throw()
350 uint64_t c = rcount;
351 if(c)
352 return c - 1;
353 else
354 return 0;
357 std::ostream& operator<<(std::ostream& os, const struct rrdata_set::instance& j)
359 os << hex::b_to(j.bytes, 32, true);
360 return os;
363 bool rrdata_set::_add(const instance& b)
365 uint64_t c = rcount;
366 _add(b, b + 1, data, rcount);
367 return (c != rcount);
370 void rrdata_set::_add(const instance& b, const instance& e)
372 _add(b, e, data, rcount);
375 void rrdata_set::_add(const instance& b, const instance& e, std::set<std::pair<instance, instance>>& set,
376 uint64_t& cnt)
378 //Special case: Nothing.
379 if(set.empty()) {
380 set.insert(std::make_pair(b, e));
381 cnt += symbols_in_interval(b, e);
382 return;
384 //Just insert it.
385 auto itr = set.lower_bound(std::make_pair(b, e));
386 if(itr != set.end() && itr->first == b && itr->second == e)
387 return;
388 set.insert(std::make_pair(b, e));
389 cnt += symbols_in_interval(b, e);
390 itr = set.lower_bound(std::make_pair(b, e));
391 auto itr1 = itr;
392 auto itr2 = itr;
393 if(itr1 != set.begin()) itr1--;
394 itr2++;
395 bool have1 = (itr1 != itr);
396 instance rangebase = b;
397 //If the thing is entierely in itr1, undo the add.
398 if(have1 && b >= itr1->first && e <= itr1->second) {
399 cnt -= symbols_in_interval(b, e);
400 set.erase(itr);
401 return;
403 //Attach the thing to itr1 if appropriate.
404 if(have1 && b <= itr1->second) {
405 cnt -= symbols_in_interval(b, itr1->second);
406 rangebase = itr1->first;
407 set.insert(std::make_pair(itr1->first, e));
408 auto tmp = set.lower_bound(std::make_pair(itr1->first, e));
409 set.erase(itr1);
410 set.erase(itr);
411 itr = tmp;
412 have1 = false;
414 while(itr2 != set.end()) {
415 if(e < itr2->first)
416 break; //Nothing to merge anymore.
417 if(e >= itr2->second && (rangebase != itr2->first || e != itr2->second)) {
418 //This entiere range is subsumed.
419 cnt -= symbols_in_interval(itr2->first, itr2->second);
420 auto tmp = itr2;
421 itr2++;
422 set.erase(tmp);
423 } else if(e < itr2->second) {
424 //Combines with range.
425 cnt -= symbols_in_interval(itr2->first, e);
426 if(rangebase != itr2->first) {
427 set.insert(std::make_pair(rangebase, itr2->second));
428 set.erase(itr2);
430 set.erase(itr);
431 break;
436 bool rrdata_set::_in_set(const instance& b, const instance& e)
438 if(b == e)
439 return true;
440 if(data.empty())
441 return false;
442 auto itr = data.lower_bound(std::make_pair(b, e));
443 if(itr == data.end()) {
444 //If there is anything, it must be the last node.
445 auto r = *data.rbegin();
446 return (r.first <= b && r.second >= e);
447 } else {
448 //It may be this node or the previous one.
449 if(itr->first <= b && itr->second >= e)
450 return true;
451 itr--;
452 return (itr->first <= b && itr->second >= e);
456 std::string rrdata_set::debug_dump()
458 std::ostringstream x;
459 x << rcount << "[";
460 for(auto i : data)
461 x << "{" << i.first << "," << i.second << "}";
462 x << "]";
463 return x.str();
466 uint64_t rrdata_set::debug_nodecount(std::set<std::pair<instance, instance>>& set)
468 uint64_t x = 0;
469 for(auto i : set)
470 x += symbols_in_interval(i.first, i.second);
471 return x;
474 namespace
478 uint64_t rrdata_set::size_emerg() const throw()
480 esave_state s;
481 uint64_t dummy;
482 return emerg_action(s, NULL, 0, dummy);
485 size_t rrdata_set::write_emerg(struct esave_state& state, char* buf, size_t bufsize) const throw()
487 uint64_t dummy;
488 return emerg_action(state, buf, bufsize, dummy);