Merge branch 'rr1-maint'
[lsnes.git] / src / core / rrdata.cpp
blobc5286950cfc6599ba4a440df0613970bc4abac88
1 #include "core/misc.hpp"
2 #include "core/rrdata.hpp"
4 #include <set>
5 #include <cstring>
6 #include <string>
7 #include <iostream>
8 #include <fstream>
9 #include <sstream>
12 // XABCDEFXXXXXXXXX
13 // 0123456789XXXXXX
15 // ABCDEF0123456789XXXXXX
17 rrdata::instance::instance() throw(std::bad_alloc)
19 std::string rnd = get_random_hexstring(2 * RRDATA_BYTES);
20 memset(bytes, 0, RRDATA_BYTES);
21 for(unsigned i = 0; i < 2 * RRDATA_BYTES; i++) {
22 unsigned x = rnd[i];
23 x = x & 0x1F;
24 x = x - x / 16 * 9 - 1;
25 bytes[i / 2] = 16 * bytes[i / 2] + x;
29 rrdata::instance::instance(unsigned char* b) throw()
31 memcpy(bytes, b, RRDATA_BYTES);
34 bool rrdata::instance::operator<(const struct instance& i) const throw()
36 for(unsigned j = 0; j < RRDATA_BYTES; j++)
37 if(bytes[j] < i.bytes[j])
38 return true;
39 else if(bytes[j] > i.bytes[j])
40 return false;
41 return false;
44 bool rrdata::instance::operator==(const struct instance& i) const throw()
46 for(unsigned j = 0; j < RRDATA_BYTES; j++)
47 if(bytes[j] != i.bytes[j])
48 return false;
49 return true;
52 const struct rrdata::instance rrdata::instance::operator++(int) throw()
54 instance i = *this;
55 ++*this;
56 return i;
59 struct rrdata::instance& rrdata::instance::operator++() throw()
61 unsigned carry = 1;
62 for(unsigned i = 31; i < 32; i--) {
63 unsigned newcarry = (bytes[i] == 255 && carry);
64 bytes[i] += carry;
65 carry = newcarry;
67 return *this;
70 namespace
72 std::set<rrdata::instance> rrset;
73 std::ifstream ihandle;
74 std::ofstream ohandle;
75 bool handle_open;
76 std::string current_project;
77 bool lazy_mode;
78 const char* hexes = "0123456789ABCDEF";
81 void rrdata::read_base(const std::string& project, bool lazy) throw(std::bad_alloc)
83 if(project == current_project && (!lazy_mode || lazy))
84 return;
85 if(lazy) {
86 std::set<rrdata::instance> new_rrset;
87 rrset = new_rrset;
88 current_project = project;
89 lazy_mode = true;
90 if(handle_open)
91 ohandle.close();
92 handle_open = false;
93 return;
95 std::set<rrdata::instance> new_rrset;
96 if(project == current_project)
97 new_rrset = rrset;
98 std::string filename = get_config_path() + "/" + safe_filename(project) + ".rr";
99 if(handle_open) {
100 ohandle.close();
101 handle_open = false;
103 ihandle.open(filename.c_str(), std::ios_base::in | std::ios_base::binary);
104 while(ihandle) {
105 unsigned char bytes[RRDATA_BYTES];
106 ihandle.read(reinterpret_cast<char*>(bytes), RRDATA_BYTES);
107 instance k(bytes);
108 //std::cerr << "Loaded symbol: " << k << std::endl;
109 new_rrset.insert(k);
111 ihandle.close();
112 ohandle.open(filename.c_str(), std::ios_base::out | std::ios_base::app | std::ios_base::binary);
113 if(ohandle)
114 handle_open = true;
115 if(project == current_project && lazy_mode && !lazy) {
116 //Finish the project creation, write all.
117 for(auto i : rrset) {
118 ohandle.write(reinterpret_cast<const char*>(i.bytes), RRDATA_BYTES);
119 ohandle.flush();
122 rrset = new_rrset;
123 current_project = project;
124 lazy_mode = lazy;
127 void rrdata::close() throw()
129 current_project = "";
130 if(handle_open)
131 ohandle.close();
132 handle_open = false;
135 void rrdata::add(const struct rrdata::instance& i) throw(std::bad_alloc)
137 if(rrset.insert(i).second && handle_open) {
138 //std::cerr << "New symbol: " << i << std::endl;
139 ohandle.write(reinterpret_cast<const char*>(i.bytes), RRDATA_BYTES);
140 ohandle.flush();
144 void rrdata::add_internal() throw(std::bad_alloc)
146 if(!internal)
147 internal = new instance();
148 add((*internal)++);
151 namespace
153 void flush_symbol(std::vector<char>& strm, const rrdata::instance& base, const rrdata::instance& predicted,
154 unsigned count)
156 char opcode;
157 char buf1[RRDATA_BYTES + 4];
158 char buf2[3];
159 unsigned bias;
160 if(count == 1) {
161 opcode = 0x00;
162 bias = 1;
163 } else if(count < 258) {
164 opcode = 0x20;
165 bias = 2;
166 } else if(count < 65794) {
167 opcode = 0x40;
168 bias = 258;
169 } else {
170 opcode = 0x60;
171 bias = 65794;
173 unsigned j;
174 for(j = 0; j < 31; j++)
175 if(base.bytes[j] != predicted.bytes[j])
176 break;
177 opcode += j;
178 buf1[0] = opcode;
179 memcpy(buf1 + 1, base.bytes + j, RRDATA_BYTES - j);
180 buf2[0] = (count - bias) >> 16;
181 buf2[1] = (count - bias) >> 8;
182 buf2[2] = (count - bias);
183 memcpy(buf1 + (RRDATA_BYTES - j + 1), buf2 + (3 - (opcode >> 5)), opcode >> 5);
184 for(size_t s = 0; s < (RRDATA_BYTES - j + 1) + (opcode >> 5); s++)
185 strm.push_back(buf1[s]);
186 //std::cerr << "Encoding " << count << " symbols starting from " << base << std::endl;
190 uint64_t rrdata::write(std::vector<char>& strm) throw(std::bad_alloc)
192 strm.clear();
193 uint64_t count = 0;
194 instance last_encode_end;
195 memset(last_encode_end.bytes, 0, RRDATA_BYTES);
197 instance predicted;
198 instance encode_base;
199 unsigned encode_count = 0;
200 for(auto i : rrset) {
201 //std::cerr << "Considering " << *i << std::endl;
202 count++;
203 if(encode_count == 0) {
204 //This is the first symbol.
205 encode_base = i;
206 encode_count = 1;
207 } else if(predicted == i && encode_count < 16843009) {
208 //Correct prediction.
209 encode_count++;
210 } else {
211 //Failed prediction
212 flush_symbol(strm, encode_base, last_encode_end, encode_count);
213 last_encode_end = predicted;
214 encode_base = i;
215 encode_count = 1;
217 predicted = i;
218 ++predicted;
220 if(encode_count > 0)
221 flush_symbol(strm, encode_base, last_encode_end, encode_count);
222 if(count)
223 return count - 1;
224 else
225 return 0;
228 uint64_t rrdata::read(std::vector<char>& strm, bool dummy) throw(std::bad_alloc)
230 uint64_t count = 0;
231 instance decoding;
232 uint64_t ptr = 0;
233 memset(decoding.bytes, 0, RRDATA_BYTES);
234 while(ptr < strm.size()) {
235 char opcode;
236 unsigned char buf1[RRDATA_BYTES];
237 unsigned char buf2[3];
238 opcode = strm[ptr++];
239 unsigned validbytes = (opcode & 0x1F);
240 unsigned lengthbytes = (opcode & 0x60) >> 5;
241 unsigned repeat = 1;
242 memcpy(buf1, &strm[ptr], RRDATA_BYTES - validbytes);
243 ptr += (RRDATA_BYTES - validbytes);
244 memcpy(decoding.bytes + validbytes, buf1, RRDATA_BYTES - validbytes);
245 if(lengthbytes > 0) {
246 memcpy(buf2, &strm[ptr], lengthbytes);
247 ptr += lengthbytes;
249 if(lengthbytes == 1)
250 repeat = 2 + static_cast<unsigned>(buf2[0]);
251 if(lengthbytes == 2)
252 repeat = 258 + static_cast<unsigned>(buf2[0]) * 256 + buf2[1];
253 if(lengthbytes == 3)
254 repeat = 65794 + static_cast<unsigned>(buf2[0]) * 65536 + static_cast<unsigned>(buf2[1]) *
255 256 + buf2[2];
256 //std::cerr << "Decoding " << repeat << " symbols starting from " << decoding << std::endl;
257 if(!dummy)
258 for(unsigned i = 0; i < repeat; i++)
259 rrdata::add(decoding++);
260 count += repeat;
262 if(count)
263 return count - 1;
264 else
265 return 0;
268 uint64_t rrdata::count(std::vector<char>& strm) throw(std::bad_alloc)
270 return read(strm, true);
273 uint64_t rrdata::count() throw()
275 uint64_t c = rrset.size();
276 if(c)
277 return c - 1;
278 else
279 return 0;
283 std::ostream& operator<<(std::ostream& os, const struct rrdata::instance& j)
285 for(unsigned i = 0; i < 32; i++) {
286 os << hexes[j.bytes[i] / 16] << hexes[j.bytes[i] % 16];
288 return os;
291 rrdata::instance* rrdata::internal;
294 //DBC0AB8CBAAC6ED4B7781E34057891E8B9D93AAE733DEF764C06957FF705DE00
295 //DBC0AB8CBAAC6ED4B7781E34057891E8B9D93AAE733DEF764C06957FF705DDF3