Upload UI
[lsnes.git] / src / library / rrdata.cpp
blob979e80c76230f329a2ec1514ba302395833ce0c6
1 #include "rrdata.hpp"
2 #include <cstring>
3 #include <limits>
4 #include <cassert>
6 #define MAXRUN 16843009
8 namespace
10 const char* hexes = "0123456789ABCDEF";
13 rrdata_set::instance::instance() throw()
15 memset(bytes, 0, RRDATA_BYTES);
18 rrdata_set::instance::instance(const unsigned char* b) throw()
20 memcpy(bytes, b, RRDATA_BYTES);
23 rrdata_set::instance::instance(const std::string& id) throw()
25 memset(bytes, 0, RRDATA_BYTES);
26 for(unsigned i = 0; i < id.length() && i < 2 * RRDATA_BYTES; i++) {
27 unsigned h;
28 char ch = id[i];
29 if(ch >= '0' && ch <= '9')
30 h = ch - '0';
31 else if(ch >= 'A' && ch <= 'F')
32 h = ch - 'A' + 10;
33 else if(ch >= 'a' && ch <= 'f')
34 h = ch - 'a' + 10;
35 bytes[i / 2] = bytes[i / 2] * 16 + h;
39 bool rrdata_set::instance::operator<(const struct instance& i) const throw()
41 for(unsigned j = 0; j < RRDATA_BYTES; j++)
42 if(bytes[j] < i.bytes[j])
43 return true;
44 else if(bytes[j] > i.bytes[j])
45 return false;
46 return false;
49 bool rrdata_set::instance::operator==(const struct instance& i) const throw()
51 for(unsigned j = 0; j < RRDATA_BYTES; j++)
52 if(bytes[j] != i.bytes[j])
53 return false;
54 return true;
57 const struct rrdata_set::instance rrdata_set::instance::operator++(int) throw()
59 instance i = *this;
60 ++*this;
61 return i;
64 struct rrdata_set::instance& rrdata_set::instance::operator++() throw()
66 unsigned carry = 1;
67 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
68 unsigned newcarry = (bytes[i] == 255 && carry);
69 bytes[i] += carry;
70 carry = newcarry;
72 return *this;
75 struct rrdata_set::instance rrdata_set::instance::operator+(unsigned inc) const throw()
77 rrdata_set::instance n = *this;
78 unsigned carry = inc;
79 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
80 unsigned newcarry = ((unsigned)n.bytes[i] + carry) >> 8;
81 if(newcarry == 0 && carry > 255)
82 newcarry = (1U << (8 * sizeof(unsigned) - 8));
83 n.bytes[i] += carry;
84 carry = newcarry;
86 return n;
89 unsigned rrdata_set::instance::operator-(const struct instance& m) const throw()
91 unsigned result = 0;
92 uint8_t diff[RRDATA_BYTES] = {0};
93 unsigned borrow = 0;
94 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
95 diff[i] = bytes[i] - m.bytes[i] - borrow;
96 borrow = ((unsigned)m.bytes[i] + borrow > (unsigned)bytes[i]) ? 1 : 0;
98 for(unsigned i = 0; i < RRDATA_BYTES; i++) {
99 if((result << 8 >> 8) != result)
100 return std::numeric_limits<unsigned>::max();
101 result <<= 8;
102 result |= diff[i];
104 return result;
107 rrdata_set::rrdata_set() throw()
109 rcount = 0;
110 lazy_mode = false;
111 handle_open = false;
114 void rrdata_set::read_base(const std::string& projectfile, bool lazy) throw(std::bad_alloc)
116 if(projectfile == current_projectfile && (!lazy_mode || lazy))
117 return;
118 if(lazy) {
119 std::set<std::pair<instance, instance>> new_rrset;
120 data = new_rrset;
121 current_projectfile = projectfile;
122 lazy_mode = true;
123 if(handle_open)
124 ohandle.close();
125 handle_open = false;
126 return;
128 std::set<std::pair<instance, instance>> new_rrset;
129 uint64_t new_count = 0;
130 if(projectfile == current_projectfile) {
131 new_rrset = data;
132 new_count = rcount;
134 std::string filename = projectfile;
135 if(handle_open) {
136 ohandle.close();
137 handle_open = false;
139 std::ifstream ihandle(filename.c_str(), std::ios_base::in | std::ios_base::binary);
140 while(ihandle) {
141 unsigned char bytes[RRDATA_BYTES];
142 ihandle.read(reinterpret_cast<char*>(bytes), RRDATA_BYTES);
143 instance k(bytes);
144 //std::cerr << "Loaded symbol: " << k << std::endl;
145 _add(k, k + 1, new_rrset, new_count);
147 ihandle.close();
148 ohandle.open(filename.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::binary);
149 if(ohandle)
150 handle_open = true;
151 if(projectfile == current_projectfile && lazy_mode && !lazy) {
152 //Finish the project creation, write all.
153 for(auto i : data) {
154 instance tmp = i.first;
155 while(tmp != i.second) {
156 ohandle.write(reinterpret_cast<const char*>(tmp.bytes), RRDATA_BYTES);
157 ++tmp;
159 ohandle.flush();
162 data = new_rrset;
163 rcount = new_count;
164 current_projectfile = projectfile;
165 lazy_mode = lazy;
168 void rrdata_set::close() throw()
170 current_projectfile = "";
171 if(handle_open)
172 ohandle.close();
173 handle_open = false;
176 void rrdata_set::add(const struct rrdata_set::instance& i) throw(std::bad_alloc)
178 if(_add(i) && handle_open) {
179 //std::cerr << "New symbol: " << i << std::endl;
180 ohandle.write(reinterpret_cast<const char*>(i.bytes), RRDATA_BYTES);
181 ohandle.flush();
185 void rrdata_set::add_internal() throw(std::bad_alloc)
187 add(internal++);
190 namespace
192 void flush_symbol(std::vector<char>& strm, const rrdata_set::instance& base,
193 const rrdata_set::instance& predicted, unsigned count)
195 char opcode;
196 char buf1[RRDATA_BYTES + 4];
197 char buf2[3];
198 unsigned bias;
199 if(count == 1) {
200 opcode = 0x00;
201 bias = 1;
202 } else if(count < 258) {
203 opcode = 0x20;
204 bias = 2;
205 } else if(count < 65794) {
206 opcode = 0x40;
207 bias = 258;
208 } else {
209 opcode = 0x60;
210 bias = 65794;
212 unsigned j;
213 for(j = 0; j < 31; j++)
214 if(base.bytes[j] != predicted.bytes[j])
215 break;
216 opcode += j;
217 buf1[0] = opcode;
218 memcpy(buf1 + 1, base.bytes + j, RRDATA_BYTES - j);
219 buf2[0] = (count - bias) >> 16;
220 buf2[1] = (count - bias) >> 8;
221 buf2[2] = (count - bias);
222 memcpy(buf1 + (RRDATA_BYTES - j + 1), buf2 + (3 - (opcode >> 5)), opcode >> 5);
223 for(size_t s = 0; s < (RRDATA_BYTES - j + 1) + (opcode >> 5); s++)
224 strm.push_back(buf1[s]);
225 //std::cerr << "Encoding " << count << " symbols starting from " << base << std::endl;
228 uint64_t symbols_in_interval(const rrdata_set::instance& b, const rrdata_set::instance& e) throw()
230 uint64_t c = 0;
231 rrdata_set::instance x = b;
232 while(x != e) {
233 unsigned diff = e - x;
234 x = x + diff;
235 c = c + diff;
237 return c;
241 uint64_t rrdata_set::write(std::vector<char>& strm) throw(std::bad_alloc)
243 strm.clear();
244 uint64_t scount = 0;
245 instance last_encode_end;
246 memset(last_encode_end.bytes, 0, RRDATA_BYTES);
248 instance predicted;
249 instance encode_base;
250 unsigned encode_count = 0;
251 for(auto i : data) {
252 //std::cerr << "Considering " << *i << std::endl;
253 encode_base = i.first;
254 while(encode_base != i.second) {
255 unsigned syms = i.second - encode_base;
256 if(syms > MAXRUN)
257 syms = MAXRUN;
258 flush_symbol(strm, encode_base, predicted, syms);
259 scount += syms;
260 encode_base = encode_base + syms;
261 predicted = encode_base;
264 if(scount)
265 return scount - 1;
266 else
267 return 0;
270 uint64_t rrdata_set::read(std::vector<char>& strm, bool dummy) throw(std::bad_alloc)
272 uint64_t scount = 0;
273 instance decoding;
274 uint64_t ptr = 0;
275 memset(decoding.bytes, 0, RRDATA_BYTES);
276 while(ptr < strm.size()) {
277 char opcode;
278 unsigned char buf1[RRDATA_BYTES];
279 unsigned char buf2[3];
280 opcode = strm[ptr++];
281 unsigned validbytes = (opcode & 0x1F);
282 unsigned lengthbytes = (opcode & 0x60) >> 5;
283 unsigned repeat = 1;
284 memcpy(buf1, &strm[ptr], RRDATA_BYTES - validbytes);
285 ptr += (RRDATA_BYTES - validbytes);
286 memcpy(decoding.bytes + validbytes, buf1, RRDATA_BYTES - validbytes);
287 if(lengthbytes > 0) {
288 memcpy(buf2, &strm[ptr], lengthbytes);
289 ptr += lengthbytes;
291 if(lengthbytes == 1)
292 repeat = 2 + static_cast<unsigned>(buf2[0]);
293 if(lengthbytes == 2)
294 repeat = 258 + static_cast<unsigned>(buf2[0]) * 256 + buf2[1];
295 if(lengthbytes == 3)
296 repeat = 65794 + static_cast<unsigned>(buf2[0]) * 65536 + static_cast<unsigned>(buf2[1]) *
297 256 + buf2[2];
298 //std::cerr << "Decoding " << repeat << " symbols starting from " << decoding << std::endl;
299 if(!dummy) {
300 bool any = false;
301 if(!_in_set(decoding, decoding + repeat))
302 for(unsigned i = 0; i < repeat; i++) {
303 //TODO: Optimize this.
304 instance n = decoding + i;
305 if(!_in_set(n) && handle_open) {
306 ohandle.write(reinterpret_cast<const char*>(n.bytes), RRDATA_BYTES);
307 any = true;
310 if(any)
311 ohandle.flush();
312 rrdata_set::_add(decoding, decoding + repeat);
314 decoding = decoding + repeat;
315 scount += repeat;
317 if(scount)
318 return scount - 1;
319 else
320 return 0;
323 uint64_t rrdata_set::count(std::vector<char>& strm) throw(std::bad_alloc)
325 return read(strm, true);
328 uint64_t rrdata_set::count() throw()
330 uint64_t c = rcount;
331 if(c)
332 return c - 1;
333 else
334 return 0;
337 void rrdata_set::set_internal(const instance& b) throw()
339 internal = b;
342 std::ostream& operator<<(std::ostream& os, const struct rrdata_set::instance& j)
344 for(unsigned i = 0; i < 32; i++) {
345 os << hexes[j.bytes[i] / 16] << hexes[j.bytes[i] % 16];
347 return os;
350 bool rrdata_set::_add(const instance& b)
352 uint64_t c = rcount;
353 _add(b, b + 1);
354 return (c != rcount);
357 void rrdata_set::_add(const instance& b, const instance& e)
359 _add(b, e, data, rcount);
362 void rrdata_set::_add(const instance& b, const instance& e, std::set<std::pair<instance, instance>>& set,
363 uint64_t& cnt)
365 //Special case: Nothing.
366 if(set.empty()) {
367 set.insert(std::make_pair(b, e));
368 cnt += symbols_in_interval(b, e);
369 return;
371 //Just insert it.
372 auto itr = set.lower_bound(std::make_pair(b, e));
373 if(itr != set.end() && itr->first == b && itr->second == e)
374 return;
375 set.insert(std::make_pair(b, e));
376 cnt += symbols_in_interval(b, e);
377 itr = set.lower_bound(std::make_pair(b, e));
378 auto itr1 = itr;
379 auto itr2 = itr;
380 if(itr1 != set.begin()) itr1--;
381 itr2++;
382 bool have1 = (itr1 != itr);
383 instance rangebase = b;
384 //If the thing is entierely in itr1, undo the add.
385 if(have1 && b >= itr1->first && e <= itr1->second) {
386 cnt -= symbols_in_interval(b, e);
387 set.erase(itr);
388 return;
390 //Attach the thing to itr1 if appropriate.
391 if(have1 && b <= itr1->second) {
392 cnt -= symbols_in_interval(b, itr1->second);
393 rangebase = itr1->first;
394 set.insert(std::make_pair(itr1->first, e));
395 auto tmp = set.lower_bound(std::make_pair(itr1->first, e));
396 set.erase(itr1);
397 set.erase(itr);
398 itr = tmp;
399 have1 = false;
401 while(itr2 != set.end()) {
402 if(e < itr2->first)
403 break; //Nothing to merge anymore.
404 if(e >= itr2->second && (rangebase != itr2->first || e != itr2->second)) {
405 //This entiere range is subsumed.
406 cnt -= symbols_in_interval(itr2->first, itr2->second);
407 auto tmp = itr2;
408 itr2++;
409 set.erase(tmp);
410 } else if(e < itr2->second) {
411 //Combines with range.
412 cnt -= symbols_in_interval(itr2->first, e);
413 if(rangebase != itr2->first) {
414 set.insert(std::make_pair(rangebase, itr2->second));
415 set.erase(itr2);
417 set.erase(itr);
418 break;
423 bool rrdata_set::_in_set(const instance& b, const instance& e)
425 if(b == e)
426 return true;
427 if(data.empty())
428 return false;
429 auto itr = data.lower_bound(std::make_pair(b, e));
430 if(itr == data.end()) {
431 //If there is anything, it must be the last node.
432 auto r = *data.rbegin();
433 return (r.first <= b && r.second >= e);
434 } else {
435 //It may be this node or the previous one.
436 if(itr->first <= b && itr->second >= e)
437 return true;
438 itr--;
439 return (itr->first <= b && itr->second >= e);
443 std::string rrdata_set::debug_dump()
445 std::ostringstream x;
446 x << rcount << "[";
447 for(auto i : data)
448 x << "{" << i.first << "," << i.second << "}";
449 x << "]";
450 return x.str();
453 uint64_t rrdata_set::debug_nodecount(std::set<std::pair<instance, instance>>& set)
455 uint64_t x = 0;
456 for(auto i : set)
457 x += symbols_in_interval(i.first, i.second);
458 return x;