bsnes: redump sprite/palette functions
[lsnes.git] / src / library / rrdata.cpp
blob194c2f30a18677d98f6782f738dd069f87cb8cf1
1 #include "rrdata.hpp"
2 #include "hex.hpp"
3 #include <cstring>
4 #include <limits>
5 #include <functional>
6 #include <cassert>
8 #define MAXRUN 16843009
10 rrdata_set::instance::instance() throw()
12 memset(bytes, 0, RRDATA_BYTES);
15 rrdata_set::instance::instance(const unsigned char* b) throw()
17 memcpy(bytes, b, RRDATA_BYTES);
20 rrdata_set::instance::instance(const std::string& id) throw()
22 memset(bytes, 0, RRDATA_BYTES);
23 for(unsigned i = 0; i < id.length() && i < 2 * RRDATA_BYTES; i++) {
24 unsigned h;
25 char ch = id[i];
26 if(ch >= '0' && ch <= '9')
27 h = ch - '0';
28 else if(ch >= 'A' && ch <= 'F')
29 h = ch - 'A' + 10;
30 else if(ch >= 'a' && ch <= 'f')
31 h = ch - 'a' + 10;
32 bytes[i / 2] = bytes[i / 2] * 16 + h;
36 bool rrdata_set::instance::operator<(const struct instance& i) const throw()
38 for(unsigned j = 0; j < RRDATA_BYTES; j++)
39 if(bytes[j] < i.bytes[j])
40 return true;
41 else if(bytes[j] > i.bytes[j])
42 return false;
43 return false;
46 bool rrdata_set::instance::operator==(const struct instance& i) const throw()
48 for(unsigned j = 0; j < RRDATA_BYTES; j++)
49 if(bytes[j] != i.bytes[j])
50 return false;
51 return true;
54 const struct rrdata_set::instance rrdata_set::instance::operator++(int) throw()
56 instance i = *this;
57 ++*this;
58 return i;
61 struct rrdata_set::instance& rrdata_set::instance::operator++() throw()
63 unsigned carry = 1;
64 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
65 unsigned newcarry = (bytes[i] == 255 && carry);
66 bytes[i] += carry;
67 carry = newcarry;
69 return *this;
72 struct rrdata_set::instance rrdata_set::instance::operator+(unsigned inc) const throw()
74 rrdata_set::instance n = *this;
75 unsigned carry = inc;
76 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
77 unsigned newcarry = ((unsigned)n.bytes[i] + carry) >> 8;
78 if(newcarry == 0 && carry > 255)
79 newcarry = (1U << (8 * sizeof(unsigned) - 8));
80 n.bytes[i] += carry;
81 carry = newcarry;
83 return n;
86 unsigned rrdata_set::instance::operator-(const struct instance& m) const throw()
88 unsigned result = 0;
89 uint8_t diff[RRDATA_BYTES] = {0};
90 unsigned borrow = 0;
91 for(unsigned i = RRDATA_BYTES - 1; i < RRDATA_BYTES; i--) {
92 diff[i] = bytes[i] - m.bytes[i] - borrow;
93 borrow = ((unsigned)m.bytes[i] + borrow > (unsigned)bytes[i]) ? 1 : 0;
95 for(unsigned i = 0; i < RRDATA_BYTES; i++) {
96 if((result << 8 >> 8) != result)
97 return std::numeric_limits<unsigned>::max();
98 result <<= 8;
99 result |= diff[i];
101 return result;
104 rrdata_set::rrdata_set() throw()
106 rcount = 0;
107 lazy_mode = false;
108 handle_open = false;
111 void rrdata_set::read_base(const std::string& projectfile, bool lazy) throw(std::bad_alloc)
113 if(projectfile == current_projectfile && (!lazy_mode || lazy))
114 return;
115 if(lazy) {
116 std::set<std::pair<instance, instance>> new_rrset;
117 data = new_rrset;
118 current_projectfile = projectfile;
119 rcount = 0;
120 lazy_mode = true;
121 if(handle_open)
122 ohandle.close();
123 handle_open = false;
124 return;
126 std::set<std::pair<instance, instance>> new_rrset;
127 uint64_t new_count = 0;
128 if(projectfile == current_projectfile) {
129 new_rrset = data;
130 new_count = rcount;
132 std::string filename = projectfile;
133 if(handle_open) {
134 ohandle.close();
135 handle_open = false;
137 std::ifstream ihandle(filename.c_str(), std::ios_base::in | std::ios_base::binary);
138 while(ihandle) {
139 unsigned char bytes[RRDATA_BYTES];
140 ihandle.read(reinterpret_cast<char*>(bytes), RRDATA_BYTES);
141 instance k(bytes);
142 //std::cerr << "Loaded symbol: " << k << std::endl;
143 _add(k, k + 1, new_rrset, new_count);
145 ihandle.close();
146 ohandle.open(filename.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::binary);
147 if(ohandle)
148 handle_open = true;
149 if(projectfile == current_projectfile && lazy_mode && !lazy) {
150 //Finish the project creation, write all.
151 for(auto i : data) {
152 instance tmp = i.first;
153 while(tmp != i.second) {
154 ohandle.write(reinterpret_cast<const char*>(tmp.bytes), RRDATA_BYTES);
155 ++tmp;
157 ohandle.flush();
160 data = new_rrset;
161 rcount = new_count;
162 current_projectfile = projectfile;
163 lazy_mode = lazy;
166 void rrdata_set::close() throw()
168 current_projectfile = "";
169 if(handle_open)
170 ohandle.close();
171 handle_open = false;
174 void rrdata_set::add(const struct rrdata_set::instance& i) throw(std::bad_alloc)
176 if(_add(i) && handle_open) {
177 //std::cerr << "New symbol: " << i << std::endl;
178 ohandle.write(reinterpret_cast<const char*>(i.bytes), RRDATA_BYTES);
179 ohandle.flush();
183 namespace
185 size_t _flush_symbol(char* buf1, const rrdata_set::instance& base, const rrdata_set::instance& predicted,
186 unsigned count)
188 char opcode;
189 char buf2[3];
190 unsigned bias;
191 if(count == 1) {
192 opcode = 0x00;
193 bias = 1;
194 } else if(count < 258) {
195 opcode = 0x20;
196 bias = 2;
197 } else if(count < 65794) {
198 opcode = 0x40;
199 bias = 258;
200 } else {
201 opcode = 0x60;
202 bias = 65794;
204 unsigned j;
205 for(j = 0; j < 31; j++)
206 if(base.bytes[j] != predicted.bytes[j])
207 break;
208 opcode += j;
209 buf1[0] = opcode;
210 memcpy(buf1 + 1, base.bytes + j, RRDATA_BYTES - j);
211 buf2[0] = (count - bias) >> 16;
212 buf2[1] = (count - bias) >> 8;
213 buf2[2] = (count - bias);
214 memcpy(buf1 + (RRDATA_BYTES - j + 1), buf2 + (3 - (opcode >> 5)), opcode >> 5);
215 return (RRDATA_BYTES - j + 1) + (opcode >> 5);
218 uint64_t symbols_in_interval(const rrdata_set::instance& b, const rrdata_set::instance& e) throw()
220 uint64_t c = 0;
221 rrdata_set::instance x = b;
222 while(x != e) {
223 unsigned diff = e - x;
224 x = x + diff;
225 c = c + diff;
227 return c;
231 uint64_t rrdata_set::emerg_action(struct rrdata_set::esave_state& state, char* buf, size_t bufsize, uint64_t& scount)
232 const
234 uint64_t rsize = 0;
235 size_t lbytes;
236 unsigned encode_count = 0;
237 state.init(data);
238 while(!state.finished() || state.segptr != state.segend) {
239 if(state.segptr == state.segend) {
240 auto i = state.next();
241 state.segptr = i->first;
242 state.segend = i->second;
244 unsigned syms = state.segend - state.segptr;
245 if(syms > MAXRUN)
246 syms = MAXRUN;
247 char tmp[RRDATA_BYTES + 4];
248 rsize += lbytes = _flush_symbol(tmp, state.segptr, state.pred, syms);
249 if(buf) {
250 if(bufsize < lbytes) break;
251 memcpy(buf, tmp, lbytes);
252 buf += lbytes;
253 bufsize -= lbytes;
255 scount += syms;
256 state.segptr = state.segptr + syms;
257 state.pred = state.segptr;
259 return rsize;
262 uint64_t rrdata_set::write(std::vector<char>& strm) throw(std::bad_alloc)
264 uint64_t scount = 0;
265 esave_state cstate;
266 size_t ssize = emerg_action(cstate, NULL, 0, scount);
267 cstate.reset();
268 strm.resize(ssize);
269 uint64_t scount2 = 0;
270 size_t ssize2 = emerg_action(cstate, &strm[0], ssize, scount2);
271 if(ssize != ssize2 || scount != scount2) {
272 std::cerr << "RRDATA mismatch!" << std::endl;
273 std::cerr << "Length: Prepare: " << ssize << " Write: " << ssize2 << std::endl;
274 std::cerr << "Scount: Prepare: " << scount << " Write: " << scount2 << std::endl;
276 if(scount)
277 return scount - 1;
278 else
279 return 0;
282 namespace
284 uint64_t read_set(std::vector<char>& strm, std::function<void(rrdata_set::instance& d, unsigned rep)> fn)
285 throw(std::bad_alloc)
287 uint64_t scount = 0;
288 rrdata_set::instance decoding;
289 uint64_t ptr = 0;
290 memset(decoding.bytes, 0, RRDATA_BYTES);
291 while(ptr < strm.size()) {
292 char opcode;
293 unsigned char buf1[RRDATA_BYTES];
294 unsigned char buf2[3];
295 opcode = strm[ptr++];
296 unsigned validbytes = (opcode & 0x1F);
297 unsigned lengthbytes = (opcode & 0x60) >> 5;
298 unsigned repeat = 1;
299 memcpy(buf1, &strm[ptr], RRDATA_BYTES - validbytes);
300 ptr += (RRDATA_BYTES - validbytes);
301 memcpy(decoding.bytes + validbytes, buf1, RRDATA_BYTES - validbytes);
302 if(lengthbytes > 0) {
303 memcpy(buf2, &strm[ptr], lengthbytes);
304 ptr += lengthbytes;
306 if(lengthbytes == 1)
307 repeat = 2 + static_cast<unsigned>(buf2[0]);
308 if(lengthbytes == 2)
309 repeat = 258 + static_cast<unsigned>(buf2[0]) * 256 + buf2[1];
310 if(lengthbytes == 3)
311 repeat = 65794 + static_cast<unsigned>(buf2[0]) * 65536 +
312 static_cast<unsigned>(buf2[1]) * 256 + buf2[2];
314 fn(decoding, repeat);
315 decoding = decoding + repeat;
316 scount += repeat;
318 if(scount)
319 return scount - 1;
320 else
321 return 0;
325 uint64_t rrdata_set::read(std::vector<char>& strm) throw(std::bad_alloc)
327 return read_set(strm, [this](instance& d, unsigned rep) {
328 bool any = false;
329 if(handle_open && !_in_set(d, d + rep))
330 for(unsigned i = 0; i < rep; i++) {
331 //TODO: Optimize this.
332 instance n = d + i;
333 if(!_in_set(n)) {
334 ohandle.write(reinterpret_cast<const char*>(n.bytes), RRDATA_BYTES);
335 any = true;
338 if(any)
339 ohandle.flush();
340 _add(d, d + rep);
344 uint64_t rrdata_set::count(std::vector<char>& strm) throw(std::bad_alloc)
346 return read_set(strm, [](instance& d, unsigned rep) {});
349 uint64_t rrdata_set::count() throw()
351 uint64_t c = rcount;
352 if(c)
353 return c - 1;
354 else
355 return 0;
358 std::ostream& operator<<(std::ostream& os, const struct rrdata_set::instance& j)
360 os << hex::b_to(j.bytes, 32, true);
363 bool rrdata_set::_add(const instance& b)
365 uint64_t c = rcount;
366 _add(b, b + 1, data, rcount);
367 return (c != rcount);
370 void rrdata_set::_add(const instance& b, const instance& e)
372 _add(b, e, data, rcount);
375 void rrdata_set::_add(const instance& b, const instance& e, std::set<std::pair<instance, instance>>& set,
376 uint64_t& cnt)
378 //Special case: Nothing.
379 if(set.empty()) {
380 set.insert(std::make_pair(b, e));
381 cnt += symbols_in_interval(b, e);
382 return;
384 //Just insert it.
385 auto itr = set.lower_bound(std::make_pair(b, e));
386 if(itr != set.end() && itr->first == b && itr->second == e)
387 return;
388 set.insert(std::make_pair(b, e));
389 cnt += symbols_in_interval(b, e);
390 itr = set.lower_bound(std::make_pair(b, e));
391 auto itr1 = itr;
392 auto itr2 = itr;
393 if(itr1 != set.begin()) itr1--;
394 itr2++;
395 bool have1 = (itr1 != itr);
396 instance rangebase = b;
397 //If the thing is entierely in itr1, undo the add.
398 if(have1 && b >= itr1->first && e <= itr1->second) {
399 cnt -= symbols_in_interval(b, e);
400 set.erase(itr);
401 return;
403 //Attach the thing to itr1 if appropriate.
404 if(have1 && b <= itr1->second) {
405 cnt -= symbols_in_interval(b, itr1->second);
406 rangebase = itr1->first;
407 set.insert(std::make_pair(itr1->first, e));
408 auto tmp = set.lower_bound(std::make_pair(itr1->first, e));
409 set.erase(itr1);
410 set.erase(itr);
411 itr = tmp;
412 have1 = false;
414 while(itr2 != set.end()) {
415 if(e < itr2->first)
416 break; //Nothing to merge anymore.
417 if(e >= itr2->second && (rangebase != itr2->first || e != itr2->second)) {
418 //This entiere range is subsumed.
419 cnt -= symbols_in_interval(itr2->first, itr2->second);
420 auto tmp = itr2;
421 itr2++;
422 set.erase(tmp);
423 } else if(e < itr2->second) {
424 //Combines with range.
425 cnt -= symbols_in_interval(itr2->first, e);
426 if(rangebase != itr2->first) {
427 set.insert(std::make_pair(rangebase, itr2->second));
428 set.erase(itr2);
430 set.erase(itr);
431 break;
436 bool rrdata_set::_in_set(const instance& b, const instance& e)
438 if(b == e)
439 return true;
440 if(data.empty())
441 return false;
442 auto itr = data.lower_bound(std::make_pair(b, e));
443 if(itr == data.end()) {
444 //If there is anything, it must be the last node.
445 auto r = *data.rbegin();
446 return (r.first <= b && r.second >= e);
447 } else {
448 //It may be this node or the previous one.
449 if(itr->first <= b && itr->second >= e)
450 return true;
451 itr--;
452 return (itr->first <= b && itr->second >= e);
456 std::string rrdata_set::debug_dump()
458 std::ostringstream x;
459 x << rcount << "[";
460 for(auto i : data)
461 x << "{" << i.first << "," << i.second << "}";
462 x << "]";
463 return x.str();
466 uint64_t rrdata_set::debug_nodecount(std::set<std::pair<instance, instance>>& set)
468 uint64_t x = 0;
469 for(auto i : set)
470 x += symbols_in_interval(i.first, i.second);
471 return x;
474 namespace
478 uint64_t rrdata_set::size_emerg() const throw()
480 esave_state s;
481 uint64_t dummy;
482 return emerg_action(s, NULL, 0, dummy);
485 size_t rrdata_set::write_emerg(struct esave_state& state, char* buf, size_t bufsize) const throw()
487 uint64_t dummy;
488 return emerg_action(state, buf, bufsize, dummy);