8 #define MAXRUN 16843009
10 rrdata_set::instance::instance() throw()
12 memset(bytes
, 0, RRDATA_BYTES
);
15 rrdata_set::instance::instance(const unsigned char* b
) throw()
17 memcpy(bytes
, b
, RRDATA_BYTES
);
20 rrdata_set::instance::instance(const std::string
& id
) throw()
22 memset(bytes
, 0, RRDATA_BYTES
);
23 for(unsigned i
= 0; i
< id
.length() && i
< 2 * RRDATA_BYTES
; i
++) {
26 if(ch
>= '0' && ch
<= '9')
28 else if(ch
>= 'A' && ch
<= 'F')
30 else if(ch
>= 'a' && ch
<= 'f')
32 bytes
[i
/ 2] = bytes
[i
/ 2] * 16 + h
;
36 bool rrdata_set::instance::operator<(const struct instance
& i
) const throw()
38 for(unsigned j
= 0; j
< RRDATA_BYTES
; j
++)
39 if(bytes
[j
] < i
.bytes
[j
])
41 else if(bytes
[j
] > i
.bytes
[j
])
46 bool rrdata_set::instance::operator==(const struct instance
& i
) const throw()
48 for(unsigned j
= 0; j
< RRDATA_BYTES
; j
++)
49 if(bytes
[j
] != i
.bytes
[j
])
54 const struct rrdata_set::instance
rrdata_set::instance::operator++(int) throw()
61 struct rrdata_set::instance
& rrdata_set::instance::operator++() throw()
64 for(unsigned i
= RRDATA_BYTES
- 1; i
< RRDATA_BYTES
; i
--) {
65 unsigned newcarry
= (bytes
[i
] == 255 && carry
);
72 struct rrdata_set::instance
rrdata_set::instance::operator+(unsigned inc
) const throw()
74 rrdata_set::instance n
= *this;
76 for(unsigned i
= RRDATA_BYTES
- 1; i
< RRDATA_BYTES
; i
--) {
77 unsigned newcarry
= ((unsigned)n
.bytes
[i
] + carry
) >> 8;
78 if(newcarry
== 0 && carry
> 255)
79 newcarry
= (1U << (8 * sizeof(unsigned) - 8));
86 unsigned rrdata_set::instance::operator-(const struct instance
& m
) const throw()
89 uint8_t diff
[RRDATA_BYTES
] = {0};
91 for(unsigned i
= RRDATA_BYTES
- 1; i
< RRDATA_BYTES
; i
--) {
92 diff
[i
] = bytes
[i
] - m
.bytes
[i
] - borrow
;
93 borrow
= ((unsigned)m
.bytes
[i
] + borrow
> (unsigned)bytes
[i
]) ? 1 : 0;
95 for(unsigned i
= 0; i
< RRDATA_BYTES
; i
++) {
96 if((result
<< 8 >> 8) != result
)
97 return std::numeric_limits
<unsigned>::max();
104 rrdata_set::rrdata_set() throw()
111 void rrdata_set::read_base(const std::string
& projectfile
, bool lazy
) throw(std::bad_alloc
)
113 if(projectfile
== current_projectfile
&& (!lazy_mode
|| lazy
))
116 std::set
<std::pair
<instance
, instance
>> new_rrset
;
118 current_projectfile
= projectfile
;
126 std::set
<std::pair
<instance
, instance
>> new_rrset
;
127 uint64_t new_count
= 0;
128 if(projectfile
== current_projectfile
) {
132 std::string filename
= projectfile
;
137 std::ifstream
ihandle(filename
.c_str(), std::ios_base::in
| std::ios_base::binary
);
139 unsigned char bytes
[RRDATA_BYTES
];
140 ihandle
.read(reinterpret_cast<char*>(bytes
), RRDATA_BYTES
);
142 //std::cerr << "Loaded symbol: " << k << std::endl;
143 _add(k
, k
+ 1, new_rrset
, new_count
);
146 ohandle
.open(filename
.c_str(), std::ios_base::out
| std::ios_base::app
| std::ios_base::binary
);
149 if(projectfile
== current_projectfile
&& lazy_mode
&& !lazy
) {
150 //Finish the project creation, write all.
152 instance tmp
= i
.first
;
153 while(tmp
!= i
.second
) {
154 ohandle
.write(reinterpret_cast<const char*>(tmp
.bytes
), RRDATA_BYTES
);
162 current_projectfile
= projectfile
;
166 void rrdata_set::close() throw()
168 current_projectfile
= "";
174 void rrdata_set::add(const struct rrdata_set::instance
& i
) throw(std::bad_alloc
)
176 if(_add(i
) && handle_open
) {
177 //std::cerr << "New symbol: " << i << std::endl;
178 ohandle
.write(reinterpret_cast<const char*>(i
.bytes
), RRDATA_BYTES
);
185 size_t _flush_symbol(char* buf1
, const rrdata_set::instance
& base
, const rrdata_set::instance
& predicted
,
194 } else if(count
< 258) {
197 } else if(count
< 65794) {
205 for(j
= 0; j
< 31; j
++)
206 if(base
.bytes
[j
] != predicted
.bytes
[j
])
210 memcpy(buf1
+ 1, base
.bytes
+ j
, RRDATA_BYTES
- j
);
211 buf2
[0] = (count
- bias
) >> 16;
212 buf2
[1] = (count
- bias
) >> 8;
213 buf2
[2] = (count
- bias
);
214 memcpy(buf1
+ (RRDATA_BYTES
- j
+ 1), buf2
+ (3 - (opcode
>> 5)), opcode
>> 5);
215 return (RRDATA_BYTES
- j
+ 1) + (opcode
>> 5);
218 uint64_t symbols_in_interval(const rrdata_set::instance
& b
, const rrdata_set::instance
& e
) throw()
221 rrdata_set::instance x
= b
;
223 unsigned diff
= e
- x
;
231 uint64_t rrdata_set::emerg_action(struct rrdata_set::esave_state
& state
, char* buf
, size_t bufsize
, uint64_t& scount
)
237 while(!state
.finished() || state
.segptr
!= state
.segend
) {
238 if(state
.segptr
== state
.segend
) {
239 auto i
= state
.next();
240 state
.segptr
= i
->first
;
241 state
.segend
= i
->second
;
243 unsigned syms
= state
.segend
- state
.segptr
;
246 char tmp
[RRDATA_BYTES
+ 4];
247 rsize
+= lbytes
= _flush_symbol(tmp
, state
.segptr
, state
.pred
, syms
);
249 if(bufsize
< lbytes
) break;
250 memcpy(buf
, tmp
, lbytes
);
255 state
.segptr
= state
.segptr
+ syms
;
256 state
.pred
= state
.segptr
;
261 uint64_t rrdata_set::write(std::vector
<char>& strm
) throw(std::bad_alloc
)
265 size_t ssize
= emerg_action(cstate
, NULL
, 0, scount
);
268 uint64_t scount2
= 0;
269 size_t ssize2
= emerg_action(cstate
, &strm
[0], ssize
, scount2
);
270 if(ssize
!= ssize2
|| scount
!= scount2
) {
271 std::cerr
<< "RRDATA mismatch!" << std::endl
;
272 std::cerr
<< "Length: Prepare: " << ssize
<< " Write: " << ssize2
<< std::endl
;
273 std::cerr
<< "Scount: Prepare: " << scount
<< " Write: " << scount2
<< std::endl
;
283 uint64_t read_set(std::vector
<char>& strm
, std::function
<void(rrdata_set::instance
& d
, unsigned rep
)> fn
)
284 throw(std::bad_alloc
)
287 rrdata_set::instance decoding
;
289 memset(decoding
.bytes
, 0, RRDATA_BYTES
);
290 while(ptr
< strm
.size()) {
292 unsigned char buf1
[RRDATA_BYTES
];
293 unsigned char buf2
[3];
294 opcode
= strm
[ptr
++];
295 unsigned validbytes
= (opcode
& 0x1F);
296 unsigned lengthbytes
= (opcode
& 0x60) >> 5;
298 memcpy(buf1
, &strm
[ptr
], RRDATA_BYTES
- validbytes
);
299 ptr
+= (RRDATA_BYTES
- validbytes
);
300 memcpy(decoding
.bytes
+ validbytes
, buf1
, RRDATA_BYTES
- validbytes
);
301 if(lengthbytes
> 0) {
302 memcpy(buf2
, &strm
[ptr
], lengthbytes
);
306 repeat
= 2 + static_cast<unsigned>(buf2
[0]);
308 repeat
= 258 + static_cast<unsigned>(buf2
[0]) * 256 + buf2
[1];
310 repeat
= 65794 + static_cast<unsigned>(buf2
[0]) * 65536 +
311 static_cast<unsigned>(buf2
[1]) * 256 + buf2
[2];
313 fn(decoding
, repeat
);
314 decoding
= decoding
+ repeat
;
324 uint64_t rrdata_set::read(std::vector
<char>& strm
) throw(std::bad_alloc
)
326 return read_set(strm
, [this](instance
& d
, unsigned rep
) {
328 if(handle_open
&& !_in_set(d
, d
+ rep
))
329 for(unsigned i
= 0; i
< rep
; i
++) {
330 //TODO: Optimize this.
333 ohandle
.write(reinterpret_cast<const char*>(n
.bytes
), RRDATA_BYTES
);
343 uint64_t rrdata_set::count(std::vector
<char>& strm
) throw(std::bad_alloc
)
345 return read_set(strm
, [](instance
& d
, unsigned rep
) {});
348 uint64_t rrdata_set::count() throw()
357 std::ostream
& operator<<(std::ostream
& os
, const struct rrdata_set::instance
& j
)
359 os
<< hex::b_to(j
.bytes
, 32, true);
363 bool rrdata_set::_add(const instance
& b
)
366 _add(b
, b
+ 1, data
, rcount
);
367 return (c
!= rcount
);
370 void rrdata_set::_add(const instance
& b
, const instance
& e
)
372 _add(b
, e
, data
, rcount
);
375 void rrdata_set::_add(const instance
& b
, const instance
& e
, std::set
<std::pair
<instance
, instance
>>& set
,
378 //Special case: Nothing.
380 set
.insert(std::make_pair(b
, e
));
381 cnt
+= symbols_in_interval(b
, e
);
385 auto itr
= set
.lower_bound(std::make_pair(b
, e
));
386 if(itr
!= set
.end() && itr
->first
== b
&& itr
->second
== e
)
388 set
.insert(std::make_pair(b
, e
));
389 cnt
+= symbols_in_interval(b
, e
);
390 itr
= set
.lower_bound(std::make_pair(b
, e
));
393 if(itr1
!= set
.begin()) itr1
--;
395 bool have1
= (itr1
!= itr
);
396 instance rangebase
= b
;
397 //If the thing is entierely in itr1, undo the add.
398 if(have1
&& b
>= itr1
->first
&& e
<= itr1
->second
) {
399 cnt
-= symbols_in_interval(b
, e
);
403 //Attach the thing to itr1 if appropriate.
404 if(have1
&& b
<= itr1
->second
) {
405 cnt
-= symbols_in_interval(b
, itr1
->second
);
406 rangebase
= itr1
->first
;
407 set
.insert(std::make_pair(itr1
->first
, e
));
408 auto tmp
= set
.lower_bound(std::make_pair(itr1
->first
, e
));
414 while(itr2
!= set
.end()) {
416 break; //Nothing to merge anymore.
417 if(e
>= itr2
->second
&& (rangebase
!= itr2
->first
|| e
!= itr2
->second
)) {
418 //This entiere range is subsumed.
419 cnt
-= symbols_in_interval(itr2
->first
, itr2
->second
);
423 } else if(e
< itr2
->second
) {
424 //Combines with range.
425 cnt
-= symbols_in_interval(itr2
->first
, e
);
426 if(rangebase
!= itr2
->first
) {
427 set
.insert(std::make_pair(rangebase
, itr2
->second
));
436 bool rrdata_set::_in_set(const instance
& b
, const instance
& e
)
442 auto itr
= data
.lower_bound(std::make_pair(b
, e
));
443 if(itr
== data
.end()) {
444 //If there is anything, it must be the last node.
445 auto r
= *data
.rbegin();
446 return (r
.first
<= b
&& r
.second
>= e
);
448 //It may be this node or the previous one.
449 if(itr
->first
<= b
&& itr
->second
>= e
)
452 return (itr
->first
<= b
&& itr
->second
>= e
);
456 std::string
rrdata_set::debug_dump()
458 std::ostringstream x
;
461 x
<< "{" << i
.first
<< "," << i
.second
<< "}";
466 uint64_t rrdata_set::debug_nodecount(std::set
<std::pair
<instance
, instance
>>& set
)
470 x
+= symbols_in_interval(i
.first
, i
.second
);
478 uint64_t rrdata_set::size_emerg() const throw()
482 return emerg_action(s
, NULL
, 0, dummy
);
485 size_t rrdata_set::write_emerg(struct esave_state
& state
, char* buf
, size_t bufsize
) const throw()
488 return emerg_action(state
, buf
, bufsize
, dummy
);