1 /** @file honey_cursor.h
2 * @brief HoneyCursor class
4 /* Copyright (C) 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_HONEY_CURSOR_H
22 #define XAPIAN_INCLUDED_HONEY_CURSOR_H
24 #include "honey_table.h"
26 #define DEBUGGING false
30 fh
.set_pos(offset
); // FIXME root
31 current_key
= last_key
= std::string();
39 std::string current_key
, current_tag
;
40 mutable size_t val_size
= 0;
41 bool current_compressed
= false;
42 mutable CompressionStream comp_stream
;
43 bool is_at_end
= false;
44 mutable std::string last_key
;
46 // File offset to start of index and to current position in index.
49 // File offset to start of table (zero except for single-file DB).
52 // Forward to next constructor form.
53 explicit HoneyCursor(const HoneyTable
* table
)
54 : HoneyCursor(table
->fh
, table
->get_root(), table
->get_offset()) {}
56 HoneyCursor(const BufferedFile
& fh_
, off_t root_
, off_t offset_
)
58 comp_stream(Z_DEFAULT_STRATEGY
),
63 fh
.set_pos(offset
); // FIXME root
66 HoneyCursor(const HoneyCursor
& o
)
68 current_key(o
.current_key
),
69 current_tag(o
.current_tag
), // FIXME really copy?
71 current_compressed(o
.current_compressed
),
72 comp_stream(Z_DEFAULT_STRATEGY
),
73 is_at_end(o
.is_at_end
),
79 fh
.set_pos(o
.fh
.get_pos());
82 bool after_end() const { return is_at_end
; }
91 // Skip val data we've not looked at.
96 if (fh
.get_pos() == root
) {
108 if (!last_key
.empty()) {
111 if (ch
== EOF
) throw Xapian::DatabaseError("EOF/error while reading key length", errno
);
113 size_t key_size
= ch
;
115 if (!fh
.read(buf
, key_size
))
116 throw Xapian::DatabaseError("read of " + str(key_size
) + " bytes of key data failed", errno
);
117 current_key
.assign(last_key
, 0, reuse
);
118 current_key
.append(buf
, key_size
);
119 last_key
= current_key
;
123 description_append(esc
, current_key
);
124 std::cerr
<< "K:" << esc
<< std::endl
;
129 // FIXME: rework to take advantage of buffering that's happening anyway?
131 for (int i
= 0; i
< 8; ++i
) {
137 if (ch2
< 128) break;
142 const char* end
= p
+ r
;
143 if (!unpack_uint(&p
, end
, &val_size
)) {
144 throw Xapian::DatabaseError("val_size unpack_uint invalid");
146 if (p
!= end
) std::abort();
147 current_compressed
= val_size
& 1;
150 // FIXME: Always resize to 0? Not doing so avoids always having to
151 // clear all the data before reading it.
152 if (true && val_size
== 0)
153 current_tag
.resize(0);
159 bool read_tag(bool keep_compressed
= false) {
161 current_tag
.resize(val_size
);
162 if (!fh
.read(&(current_tag
[0]), val_size
))
163 throw Xapian::DatabaseError("read of " + str(val_size
) + " bytes of value data failed", errno
);
165 std::cerr
<< "read " << val_size
<< " bytes of value data ending @" << fh
.get_pos() << std::endl
;
170 description_append(esc
, current_tag
);
171 std::cerr
<< "V:" << esc
<< std::endl
;
174 if (!keep_compressed
&& current_compressed
) {
175 // Need to decompress.
176 comp_stream
.decompress_start();
178 if (!comp_stream
.decompress_chunk(current_tag
.data(), current_tag
.size(), new_tag
)) {
179 // Decompression didn't complete.
182 swap(current_tag
, new_tag
);
183 current_compressed
= false;
185 std::cerr
<< "decompressed to " << current_tag
.size() << " bytes of value data" << std::endl
;
188 return current_compressed
;
191 bool find_exact(const std::string
& key
) {
194 description_append(esc
, key
);
195 std::cerr
<< "find_exact(" << esc
<< ") @" << fh
.get_pos() << std::endl
;
201 int cmp0
= current_key
.compare(key
);
202 if (cmp0
== 0) return true;
209 int cmp
= current_key
.compare(key
);
210 if (cmp
== 0) return true;
216 bool find_entry_ge(const std::string
& key
) {
219 description_append(esc
, key
);
220 std::cerr
<< "find_entry_ge(" << esc
<< ") @" << fh
.get_pos() << std::endl
;
226 int cmp0
= current_key
.compare(key
);
227 if (cmp0
== 0) return true;
234 int cmp
= current_key
.compare(key
);
235 if (cmp
== 0) return true;
236 if (cmp
> 0) return false;
242 bool find_entry(const std::string
& key
) {
245 description_append(desc
, key
);
246 std::cerr
<< "find_entry(" << desc
<< ") [LE]" << std::endl
;
253 int cmp0
= current_key
.compare(key
);
254 if (cmp0
== 0) return true;
264 while (pos
= fh
.get_pos(), k
= current_key
, vs
= val_size
, compressed
= current_compressed
, next()) {
265 int cmp
= current_key
.compare(key
);
266 if (cmp
== 0) return true;
270 current_key
= last_key
= k
;
272 current_compressed
= compressed
;
278 int cmp0
= current_key
.compare(key
);
280 if (DEBUGGING
) std::cerr
<< " already on it" << std::endl
;
288 // FIXME: If "close" just seek forwards? Or consider seeking from current index pos?
289 //off_t pos = fh.get_pos();
291 std::string index_key
, prev_index_key
;
292 std::make_unsigned
<off_t
>::type ptr
= 0;
294 int reuse
= fh
.read();
295 if (reuse
== -1) break;
297 if (len
== -1) std::abort(); // FIXME
298 prev_index_key
= index_key
;
299 index_key
.resize(reuse
+ len
);
300 fh
.read(&index_key
[reuse
], len
);
304 description_append(desc
, index_key
);
305 std::cerr
<< "Index key: " << desc
<< std::endl
;
308 cmp0
= index_key
.compare(key
);
310 last_key
= ptr
? index_key
: std::string(); // for now (a lie, but the reuse part is correct).
316 if ((b
& 0x80) == 0) break;
319 if (!unpack_uint(&p
, e
, &ptr
) || p
!= e
) std::abort(); // FIXME
320 if (DEBUGGING
) std::cerr
<< " -> " << ptr
<< std::endl
;
322 if (DEBUGGING
) std::cerr
<< " hit straight from index" << std::endl
;
324 current_key
= index_key
;
327 // FIXME: rework to take advantage of buffering that's happening anyway?
329 for (int i
= 0; i
< 8; ++i
) {
335 if (ch2
< 128) break;
340 const char* end
= p
+ r
;
341 if (!unpack_uint(&p
, end
, &val_size
)) {
342 throw Xapian::DatabaseError("val_size unpack_uint invalid");
344 bool& compressed
= current_compressed
;
345 compressed
= val_size
& 1;
347 if (p
!= end
) std::abort();
351 if (DEBUGGING
) std::cerr
<< " cmp0 = " << cmp0
<< ", going to " << ptr
<< std::endl
;
354 // FIXME: crude for now
356 current_key
= prev_index_key
;
360 // FIXME: rework to take advantage of buffering that's happening anyway?
362 for (int i
= 0; i
< 8; ++i
) {
368 if (ch2
< 128) break;
373 const char* end
= p
+ r
;
374 if (!unpack_uint(&p
, end
, &val_size
)) {
375 throw Xapian::DatabaseError("val_size unpack_uint invalid");
377 bool& compressed
= current_compressed
;
378 auto& val
= current_tag
;
379 compressed
= val_size
& 1;
382 if (p
!= end
) std::abort();
383 val_size
-= (end
- p
);
385 size_t n
= std::min(val_size
, sizeof(buf
));
386 if (!fh
.read(buf
, n
))
387 throw Xapian::DatabaseError("read of " + str(n
) + "/" + str(val_size
) + " bytes of value data failed", errno
);
399 description_append(desc
, current_key
);
400 std::cerr
<< "Dropped to data layer on key: " << desc
<< std::endl
;
403 // FIXME: need to put us in the "read key not tag" state but persist that more?
404 // if (cmp0 == 0) this is an exact hit from the index...
416 compressed
= current_compressed
;
420 description_append(desc
, current_key
);
421 std::cerr
<< "@ " << pos
<< " key: " << desc
<< std::endl
;
426 int cmp
= current_key
.compare(key
);
428 if (DEBUGGING
) std::cerr
<< " found it" << std::endl
;
436 description_append(desc
, current_key
);
437 std::cerr
<< " NOT found - reached " << desc
<< std::endl
;
439 // No match so back up to previous entry.
441 last_key
= current_key
= k
;
443 current_compressed
= compressed
;
447 description_append(desc
, current_key
);
448 std::cerr
<< " NOT found - leaving us on " << desc
<< std::endl
;
454 void find_entry_lt(const std::string
& key
) {
457 description_append(esc
, key
);
458 std::cerr
<< "find_entry_lt(" << esc
<< ") @" << fh
.get_pos() << std::endl
;
462 if (is_at_end
|| (cmp
= key
.compare(current_key
)) <= 0) {
463 if (cmp
== 0 && rare(&key
== ¤t_key
)) {
464 // Avoid bug with this (which should step back one entry):
465 // cursor.find_entry_lt(cursor.current_key);
466 std::string copy
= current_key
;
481 compressed
= current_compressed
;
482 } while (next() && current_key
< key
);
484 // Back up to previous entry.
486 last_key
= current_key
= k
;
488 current_compressed
= compressed
;
492 HoneyCursor
* clone() const {
493 return new HoneyCursor(*this);
496 bool del() { return false; }
499 class MutableHoneyCursor
: public HoneyCursor
{
501 MutableHoneyCursor(HoneyTable
* table_
)
502 : HoneyCursor(table_
->fh
, table_
->get_root(), table_
->get_offset()) { }
505 #endif // XAPIAN_INCLUDED_HONEY_CURSOR_H