Actually call on_reset callback
[lsnes.git] / src / library / rrdata.cpp
blob78463b96a35a8ec813ce773ef2a5e98c1fb89b0b
1 #include "rrdata.hpp"
2 #include "hex.hpp"
3 #include <cstring>
4 #include <limits>
5 #include <functional>
6 #include <cassert>
8 #define MAXRUN 16843009
10 rrdata_set::instance::instance() throw()
12 memset(bytes, 0, RRDATA_BYTES);
15 rrdata_set::instance::instance(const unsigned char* b) throw()
17 memcpy(bytes, b, RRDATA_BYTES);
20 rrdata_set::instance::instance(const std::string& id) throw()
22 memset(bytes, 0, RRDATA_BYTES);
23 for(unsigned i = 0; i < id.length() && i < 2 * RRDATA_BYTES; i++) {
24 unsigned h = 0;
25 char ch = id[i];
26 if(ch >= '0' && ch <= '9')
27 h = ch - '0';
28 else if(ch >= 'A' && ch <= 'F')
29 h = ch - 'A' + 10;
30 else if(ch >= 'a' && ch <= 'f')
31 h = ch - 'a' + 10;
32 bytes[i / 2] = bytes[i / 2] * 16 + h;
36 bool rrdata_set::instance::operator<(const struct instance& i) const throw()
38 for(unsigned j = 0; j < RRDATA_BYTES; j++)
39 if(bytes[j] < i.bytes[j])
40 return true;
41 else if(bytes[j] > i.bytes[j])
42 return false;
43 return false;
46 bool rrdata_set::instance::operator==(const struct instance& i) const throw()
48 for(unsigned j = 0; j < RRDATA_BYTES; j++)
49 if(bytes[j] != i.bytes[j])
50 return false;
51 return true;
54 const struct rrdata_set::instance rrdata_set::instance::operator++(int) throw()
56 instance i = *this;
57 ++*this;
58 return i;
61 struct rrdata_set::instance& rrdata_set::instance::operator++() throw()
63 unsigned carry = 1;
64 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
65 unsigned newcarry = (bytes[i] == 255 && carry);
66 bytes[i] += carry;
67 carry = newcarry;
69 return *this;
72 struct rrdata_set::instance rrdata_set::instance::operator+(unsigned inc) const throw()
74 rrdata_set::instance n = *this;
75 unsigned carry = inc;
76 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
77 unsigned newcarry = ((unsigned)n.bytes[i] + carry) >> 8;
78 if(newcarry == 0 && carry > 255)
79 newcarry = (1U << (8 * sizeof(unsigned) - 8));
80 n.bytes[i] += carry;
81 carry = newcarry;
83 return n;
86 unsigned rrdata_set::instance::operator-(const struct instance& m) const throw()
88 unsigned result = 0;
89 uint8_t diff[RRDATA_BYTES] = {0};
90 unsigned borrow = 0;
91 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
92 diff[i] = bytes[i] - m.bytes[i] - borrow;
93 borrow = ((unsigned)m.bytes[i] + borrow > (unsigned)bytes[i]) ? 1 : 0;
95 for(unsigned i = 0; i < RRDATA_BYTES; i++) {
96 if((result << 8 >> 8) != result)
97 return std::numeric_limits<unsigned>::max();
98 result <<= 8;
99 result |= diff[i];
101 return result;
104 rrdata_set::rrdata_set() throw()
106 rcount = 0;
107 lazy_mode = false;
108 handle_open = false;
111 void rrdata_set::read_base(const std::string& projectfile, bool lazy)
113 if(projectfile == current_projectfile && (!lazy_mode || lazy))
114 return;
115 if(lazy) {
116 std::set<std::pair<instance, instance>> new_rrset;
117 data = new_rrset;
118 current_projectfile = projectfile;
119 rcount = 0;
120 lazy_mode = true;
121 if(handle_open)
122 ohandle.close();
123 handle_open = false;
124 return;
126 std::set<std::pair<instance, instance>> new_rrset;
127 uint64_t new_count = 0;
128 if(projectfile == current_projectfile) {
129 new_rrset = data;
130 new_count = rcount;
132 std::string filename = projectfile;
133 if(handle_open) {
134 ohandle.close();
135 handle_open = false;
137 std::ifstream ihandle(filename.c_str(), std::ios_base::in | std::ios_base::binary);
138 while(ihandle) {
139 unsigned char bytes[RRDATA_BYTES];
140 ihandle.read(reinterpret_cast<char*>(bytes), RRDATA_BYTES);
141 instance k(bytes);
142 //std::cerr << "Loaded symbol: " << k << std::endl;
143 _add(k, k + 1, new_rrset, new_count);
145 ihandle.close();
146 ohandle.open(filename.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::binary);
147 if(ohandle)
148 handle_open = true;
149 if(projectfile == current_projectfile && lazy_mode && !lazy) {
150 //Finish the project creation, write all.
151 for(auto i : data) {
152 instance tmp = i.first;
153 while(tmp != i.second) {
154 ohandle.write(reinterpret_cast<const char*>(tmp.bytes), RRDATA_BYTES);
155 ++tmp;
157 ohandle.flush();
160 data = new_rrset;
161 rcount = new_count;
162 current_projectfile = projectfile;
163 lazy_mode = lazy;
166 void rrdata_set::close() throw()
168 current_projectfile = "";
169 if(handle_open)
170 ohandle.close();
171 handle_open = false;
174 void rrdata_set::add(const struct rrdata_set::instance& i)
176 if(_add(i) && handle_open) {
177 //std::cerr << "New symbol: " << i << std::endl;
178 ohandle.write(reinterpret_cast<const char*>(i.bytes), RRDATA_BYTES);
179 ohandle.flush();
183 namespace
185 size_t _flush_symbol(char* buf1, const rrdata_set::instance& base, const rrdata_set::instance& predicted,
186 unsigned count)
188 char opcode;
189 char buf2[3];
190 unsigned bias;
191 if(count == 1) {
192 opcode = 0x00;
193 bias = 1;
194 } else if(count < 258) {
195 opcode = 0x20;
196 bias = 2;
197 } else if(count < 65794) {
198 opcode = 0x40;
199 bias = 258;
200 } else {
201 opcode = 0x60;
202 bias = 65794;
204 unsigned j;
205 for(j = 0; j < 31; j++)
206 if(base.bytes[j] != predicted.bytes[j])
207 break;
208 opcode += j;
209 buf1[0] = opcode;
210 memcpy(buf1 + 1, base.bytes + j, RRDATA_BYTES - j);
211 buf2[0] = (count - bias) >> 16;
212 buf2[1] = (count - bias) >> 8;
213 buf2[2] = (count - bias);
214 memcpy(buf1 + (RRDATA_BYTES - j + 1), buf2 + (3 - (opcode >> 5)), opcode >> 5);
215 return (RRDATA_BYTES - j + 1) + (opcode >> 5);
218 uint64_t symbols_in_interval(const rrdata_set::instance& b, const rrdata_set::instance& e) throw()
220 uint64_t c = 0;
221 rrdata_set::instance x = b;
222 while(x != e) {
223 unsigned diff = e - x;
224 x = x + diff;
225 c = c + diff;
227 return c;
231 uint64_t rrdata_set::emerg_action(struct rrdata_set::esave_state& state, char* buf, size_t bufsize, uint64_t& scount)
232 const
234 uint64_t rsize = 0;
235 size_t lbytes;
236 state.init(data);
237 while(!state.finished() || state.segptr != state.segend) {
238 if(state.segptr == state.segend) {
239 auto i = state.next();
240 state.segptr = i->first;
241 state.segend = i->second;
243 unsigned syms = state.segend - state.segptr;
244 if(syms > MAXRUN)
245 syms = MAXRUN;
246 char tmp[RRDATA_BYTES + 4];
247 rsize += lbytes = _flush_symbol(tmp, state.segptr, state.pred, syms);
248 if(buf) {
249 if(bufsize < lbytes) break;
250 memcpy(buf, tmp, lbytes);
251 buf += lbytes;
252 bufsize -= lbytes;
254 scount += syms;
255 state.segptr = state.segptr + syms;
256 state.pred = state.segptr;
258 return rsize;
261 uint64_t rrdata_set::write(std::vector<char>& strm)
263 uint64_t scount = 0;
264 esave_state cstate;
265 size_t ssize = emerg_action(cstate, NULL, 0, scount);
266 cstate.reset();
267 strm.resize(ssize);
268 uint64_t scount2 = 0;
269 size_t ssize2 = emerg_action(cstate, &strm[0], ssize, scount2);
270 if(ssize != ssize2 || scount != scount2) {
271 std::cerr << "RRDATA mismatch!" << std::endl;
272 std::cerr << "Length: Prepare: " << ssize << " Write: " << ssize2 << std::endl;
273 std::cerr << "Scount: Prepare: " << scount << " Write: " << scount2 << std::endl;
275 if(scount)
276 return scount - 1;
277 else
278 return 0;
281 namespace
283 uint64_t read_set(std::vector<char>& strm, std::function<void(rrdata_set::instance& d, unsigned rep)> fn)
285 uint64_t scount = 0;
286 rrdata_set::instance decoding;
287 uint64_t ptr = 0;
288 memset(decoding.bytes, 0, RRDATA_BYTES);
289 while(ptr < strm.size()) {
290 char opcode;
291 unsigned char buf1[RRDATA_BYTES];
292 unsigned char buf2[3];
293 opcode = strm[ptr++];
294 unsigned validbytes = (opcode & 0x1F);
295 unsigned lengthbytes = (opcode & 0x60) >> 5;
296 unsigned repeat = 1;
297 memcpy(buf1, &strm[ptr], RRDATA_BYTES - validbytes);
298 ptr += (RRDATA_BYTES - validbytes);
299 memcpy(decoding.bytes + validbytes, buf1, RRDATA_BYTES - validbytes);
300 if(lengthbytes > 0) {
301 memcpy(buf2, &strm[ptr], lengthbytes);
302 ptr += lengthbytes;
304 if(lengthbytes == 1)
305 repeat = 2 + static_cast<unsigned>(buf2[0]);
306 if(lengthbytes == 2)
307 repeat = 258 + static_cast<unsigned>(buf2[0]) * 256 + buf2[1];
308 if(lengthbytes == 3)
309 repeat = 65794 + static_cast<unsigned>(buf2[0]) * 65536 +
310 static_cast<unsigned>(buf2[1]) * 256 + buf2[2];
312 fn(decoding, repeat);
313 decoding = decoding + repeat;
314 scount += repeat;
316 if(scount)
317 return scount - 1;
318 else
319 return 0;
323 uint64_t rrdata_set::read(std::vector<char>& strm)
325 return read_set(strm, [this](instance& d, unsigned rep) {
326 bool any = false;
327 if(handle_open && !_in_set(d, d + rep))
328 for(unsigned i = 0; i < rep; i++) {
329 //TODO: Optimize this.
330 instance n = d + i;
331 if(!_in_set(n)) {
332 ohandle.write(reinterpret_cast<const char*>(n.bytes), RRDATA_BYTES);
333 any = true;
336 if(any)
337 ohandle.flush();
338 _add(d, d + rep);
342 uint64_t rrdata_set::count(std::vector<char>& strm)
344 return read_set(strm, [](instance& d, unsigned rep) {});
347 uint64_t rrdata_set::count() throw()
349 uint64_t c = rcount;
350 if(c)
351 return c - 1;
352 else
353 return 0;
356 std::ostream& operator<<(std::ostream& os, const struct rrdata_set::instance& j)
358 os << hex::b_to(j.bytes, 32, true);
359 return os;
362 bool rrdata_set::_add(const instance& b)
364 uint64_t c = rcount;
365 _add(b, b + 1, data, rcount);
366 return (c != rcount);
369 void rrdata_set::_add(const instance& b, const instance& e)
371 _add(b, e, data, rcount);
374 void rrdata_set::_add(const instance& b, const instance& e, std::set<std::pair<instance, instance>>& set,
375 uint64_t& cnt)
377 //Special case: Nothing.
378 if(set.empty()) {
379 set.insert(std::make_pair(b, e));
380 cnt += symbols_in_interval(b, e);
381 return;
383 //Just insert it.
384 auto itr = set.lower_bound(std::make_pair(b, e));
385 if(itr != set.end() && itr->first == b && itr->second == e)
386 return;
387 set.insert(std::make_pair(b, e));
388 cnt += symbols_in_interval(b, e);
389 itr = set.lower_bound(std::make_pair(b, e));
390 auto itr1 = itr;
391 auto itr2 = itr;
392 if(itr1 != set.begin()) itr1--;
393 itr2++;
394 bool have1 = (itr1 != itr);
395 instance rangebase = b;
396 //If the thing is entierely in itr1, undo the add.
397 if(have1 && b >= itr1->first && e <= itr1->second) {
398 cnt -= symbols_in_interval(b, e);
399 set.erase(itr);
400 return;
402 //Attach the thing to itr1 if appropriate.
403 if(have1 && b <= itr1->second) {
404 cnt -= symbols_in_interval(b, itr1->second);
405 rangebase = itr1->first;
406 set.insert(std::make_pair(itr1->first, e));
407 auto tmp = set.lower_bound(std::make_pair(itr1->first, e));
408 set.erase(itr1);
409 set.erase(itr);
410 itr = tmp;
411 have1 = false;
413 while(itr2 != set.end()) {
414 if(e < itr2->first)
415 break; //Nothing to merge anymore.
416 if(e >= itr2->second && (rangebase != itr2->first || e != itr2->second)) {
417 //This entiere range is subsumed.
418 cnt -= symbols_in_interval(itr2->first, itr2->second);
419 auto tmp = itr2;
420 itr2++;
421 set.erase(tmp);
422 } else if(e < itr2->second) {
423 //Combines with range.
424 cnt -= symbols_in_interval(itr2->first, e);
425 if(rangebase != itr2->first) {
426 set.insert(std::make_pair(rangebase, itr2->second));
427 set.erase(itr2);
429 set.erase(itr);
430 break;
435 bool rrdata_set::_in_set(const instance& b, const instance& e)
437 if(b == e)
438 return true;
439 if(data.empty())
440 return false;
441 auto itr = data.lower_bound(std::make_pair(b, e));
442 if(itr == data.end()) {
443 //If there is anything, it must be the last node.
444 auto r = *data.rbegin();
445 return (r.first <= b && r.second >= e);
446 } else {
447 //It may be this node or the previous one.
448 if(itr->first <= b && itr->second >= e)
449 return true;
450 itr--;
451 return (itr->first <= b && itr->second >= e);
455 std::string rrdata_set::debug_dump()
457 std::ostringstream x;
458 x << rcount << "[";
459 for(auto i : data)
460 x << "{" << i.first << "," << i.second << "}";
461 x << "]";
462 return x.str();
465 uint64_t rrdata_set::debug_nodecount(std::set<std::pair<instance, instance>>& set)
467 uint64_t x = 0;
468 for(auto i : set)
469 x += symbols_in_interval(i.first, i.second);
470 return x;
473 namespace
477 uint64_t rrdata_set::size_emerg() const throw()
479 esave_state s;
480 uint64_t dummy;
481 return emerg_action(s, NULL, 0, dummy);
484 size_t rrdata_set::write_emerg(struct esave_state& state, char* buf, size_t bufsize) const throw()
486 uint64_t dummy;
487 return emerg_action(state, buf, bufsize, dummy);