HoneyCursor: Handle finding entry < current entry
[xapian.git] / xapian-core / backends / honey / honey_cursor.h
blobfd8b86a73335189e90a5e96196da78c78010e7bc
1 /** @file honey_cursor.h
2 * @brief HoneyCursor class
3 */
4 /* Copyright (C) 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_HONEY_CURSOR_H
22 #define XAPIAN_INCLUDED_HONEY_CURSOR_H
24 #include "honey_table.h"
26 #define DEBUGGING false
28 class HoneyCursor {
29 void rewind() {
30 fh.set_pos(offset); // FIXME root
31 current_key = last_key = std::string();
32 is_at_end = false;
33 index = root;
34 val_size = 0;
37 public:
38 BufferedFile fh;
39 std::string current_key, current_tag;
40 mutable size_t val_size = 0;
41 bool current_compressed = false;
42 mutable CompressionStream comp_stream;
43 bool is_at_end = false;
44 mutable std::string last_key;
46 // File offset to start of index and to current position in index.
47 off_t root, index;
49 // File offset to start of table (zero except for single-file DB).
50 off_t offset;
52 // Forward to next constructor form.
53 explicit HoneyCursor(const HoneyTable* table)
54 : HoneyCursor(table->fh, table->get_root(), table->get_offset()) {}
56 HoneyCursor(const BufferedFile& fh_, off_t root_, off_t offset_)
57 : fh(fh_),
58 comp_stream(Z_DEFAULT_STRATEGY),
59 root(root_),
60 index(root_),
61 offset(offset_)
63 fh.set_pos(offset); // FIXME root
66 HoneyCursor(const HoneyCursor& o)
67 : fh(o.fh),
68 current_key(o.current_key),
69 current_tag(o.current_tag), // FIXME really copy?
70 val_size(o.val_size),
71 current_compressed(o.current_compressed),
72 comp_stream(Z_DEFAULT_STRATEGY),
73 is_at_end(o.is_at_end),
74 last_key(o.last_key),
75 root(o.root),
76 index(o.index),
77 offset(o.offset)
79 fh.set_pos(o.fh.get_pos());
82 bool after_end() const { return is_at_end; }
84 bool next() {
85 if (is_at_end) {
86 Assert(false);
87 return false;
90 if (val_size) {
91 // Skip val data we've not looked at.
92 fh.skip(val_size);
93 val_size = 0;
96 if (fh.get_pos() == root) {
97 is_at_end = true;
98 return false;
101 int ch = fh.read();
102 if (ch == EOF) {
103 is_at_end = true;
104 return false;
107 size_t reuse = 0;
108 if (!last_key.empty()) {
109 reuse = ch;
110 ch = fh.read();
111 if (ch == EOF) throw Xapian::DatabaseError("EOF/error while reading key length", errno);
113 size_t key_size = ch;
114 char buf[256];
115 if (!fh.read(buf, key_size))
116 throw Xapian::DatabaseError("read of " + str(key_size) + " bytes of key data failed", errno);
117 current_key.assign(last_key, 0, reuse);
118 current_key.append(buf, key_size);
119 last_key = current_key;
121 if (DEBUGGING) {
122 std::string esc;
123 description_append(esc, current_key);
124 std::cerr << "K:" << esc << std::endl;
127 int r;
129 // FIXME: rework to take advantage of buffering that's happening anyway?
130 char * p = buf;
131 for (int i = 0; i < 8; ++i) {
132 int ch2 = fh.read();
133 if (ch2 == EOF) {
134 break;
136 *p++ = ch2;
137 if (ch2 < 128) break;
139 r = p - buf;
141 const char* p = buf;
142 const char* end = p + r;
143 if (!unpack_uint(&p, end, &val_size)) {
144 throw Xapian::DatabaseError("val_size unpack_uint invalid");
146 if (p != end) std::abort();
147 current_compressed = val_size & 1;
148 val_size >>= 1;
150 // FIXME: Always resize to 0? Not doing so avoids always having to
151 // clear all the data before reading it.
152 if (true && val_size == 0)
153 current_tag.resize(0);
155 is_at_end = false;
156 return true;
159 bool read_tag(bool keep_compressed = false) {
160 if (val_size) {
161 current_tag.resize(val_size);
162 if (!fh.read(&(current_tag[0]), val_size))
163 throw Xapian::DatabaseError("read of " + str(val_size) + " bytes of value data failed", errno);
164 if (DEBUGGING) {
165 std::cerr << "read " << val_size << " bytes of value data ending @" << fh.get_pos() << std::endl;
167 val_size = 0;
168 if (DEBUGGING) {
169 std::string esc;
170 description_append(esc, current_tag);
171 std::cerr << "V:" << esc << std::endl;
174 if (!keep_compressed && current_compressed) {
175 // Need to decompress.
176 comp_stream.decompress_start();
177 std::string new_tag;
178 if (!comp_stream.decompress_chunk(current_tag.data(), current_tag.size(), new_tag)) {
179 // Decompression didn't complete.
180 abort();
182 swap(current_tag, new_tag);
183 current_compressed = false;
184 if (DEBUGGING) {
185 std::cerr << "decompressed to " << current_tag.size() << " bytes of value data" << std::endl;
188 return current_compressed;
191 bool find_exact(const std::string& key) {
192 if (DEBUGGING) {
193 std::string esc;
194 description_append(esc, key);
195 std::cerr << "find_exact(" << esc << ") @" << fh.get_pos() << std::endl;
197 if (is_at_end) {
198 rewind();
199 } else {
200 // FIXME: use index
201 int cmp0 = current_key.compare(key);
202 if (cmp0 == 0) return true;
203 if (cmp0 > 0) {
204 rewind();
208 while (next()) {
209 int cmp = current_key.compare(key);
210 if (cmp == 0) return true;
211 if (cmp > 0) break;
213 return false;
216 bool find_entry_ge(const std::string& key) {
217 if (DEBUGGING) {
218 std::string esc;
219 description_append(esc, key);
220 std::cerr << "find_entry_ge(" << esc << ") @" << fh.get_pos() << std::endl;
222 if (is_at_end) {
223 rewind();
224 } else {
225 // FIXME: use index
226 int cmp0 = current_key.compare(key);
227 if (cmp0 == 0) return true;
228 if (cmp0 > 0) {
229 rewind();
233 while (next()) {
234 int cmp = current_key.compare(key);
235 if (cmp == 0) return true;
236 if (cmp > 0) return false;
238 is_at_end = true;
239 return false;
242 bool find_entry(const std::string& key) {
243 if (DEBUGGING) {
244 std::string desc;
245 description_append(desc, key);
246 std::cerr << "find_entry(" << desc << ") [LE]" << std::endl;
248 #if 0
249 if (is_at_end) {
250 rewind();
251 } else {
252 // FIXME: use index
253 int cmp0 = current_key.compare(key);
254 if (cmp0 == 0) return true;
255 if (cmp0 > 0) {
256 rewind();
260 off_t pos;
261 std::string k;
262 size_t vs;
263 bool compressed;
264 while (pos = fh.get_pos(), k = current_key, vs = val_size, compressed = current_compressed, next()) {
265 int cmp = current_key.compare(key);
266 if (cmp == 0) return true;
267 if (cmp > 0) break;
269 fh.set_pos(pos);
270 current_key = last_key = k;
271 val_size = vs;
272 current_compressed = compressed;
273 return false;
274 #else
275 if (is_at_end) {
276 rewind();
277 } else {
278 int cmp0 = current_key.compare(key);
279 if (cmp0 == 0) {
280 if (DEBUGGING) std::cerr << " already on it" << std::endl;
281 return true;
283 #if 1
284 if (cmp0 > 0) {
285 rewind();
287 #else
288 // FIXME: If "close" just seek forwards? Or consider seeking from current index pos?
289 //off_t pos = fh.get_pos();
290 fh.set_pos(root);
291 std::string index_key, prev_index_key;
292 std::make_unsigned<off_t>::type ptr = 0;
293 while (true) {
294 int reuse = fh.read();
295 if (reuse == -1) break;
296 int len = fh.read();
297 if (len == -1) std::abort(); // FIXME
298 prev_index_key = index_key;
299 index_key.resize(reuse + len);
300 fh.read(&index_key[reuse], len);
302 if (DEBUGGING) {
303 std::string desc;
304 description_append(desc, index_key);
305 std::cerr << "Index key: " << desc << std::endl;
308 cmp0 = index_key.compare(key);
309 if (cmp0 > 0) break;
310 last_key = ptr ? index_key : std::string(); // for now (a lie, but the reuse part is correct).
311 char buf[8];
312 char* e = buf;
313 while (true) {
314 int b = fh.read();
315 *e++ = b;
316 if ((b & 0x80) == 0) break;
318 const char* p = buf;
319 if (!unpack_uint(&p, e, &ptr) || p != e) std::abort(); // FIXME
320 if (DEBUGGING) std::cerr << " -> " << ptr << std::endl;
321 if (cmp0 == 0) {
322 if (DEBUGGING) std::cerr << " hit straight from index" << std::endl;
323 fh.set_pos(ptr);
324 current_key = index_key;
325 int r;
327 // FIXME: rework to take advantage of buffering that's happening anyway?
328 char * q = buf;
329 for (int i = 0; i < 8; ++i) {
330 int ch2 = fh.read();
331 if (ch2 == EOF) {
332 break;
334 *q++ = ch2;
335 if (ch2 < 128) break;
337 r = q - buf;
339 p = buf;
340 const char* end = p + r;
341 if (!unpack_uint(&p, end, &val_size)) {
342 throw Xapian::DatabaseError("val_size unpack_uint invalid");
344 bool& compressed = current_compressed;
345 compressed = val_size & 1;
346 val_size >>= 1;
347 if (p != end) std::abort();
348 return true;
351 if (DEBUGGING) std::cerr << " cmp0 = " << cmp0 << ", going to " << ptr << std::endl;
352 fh.set_pos(ptr);
354 // FIXME: crude for now
355 if (ptr != 0) {
356 current_key = prev_index_key;
357 char buf[4096];
358 int r;
360 // FIXME: rework to take advantage of buffering that's happening anyway?
361 char * p = buf;
362 for (int i = 0; i < 8; ++i) {
363 int ch2 = fh.read();
364 if (ch2 == EOF) {
365 break;
367 *p++ = ch2;
368 if (ch2 < 128) break;
370 r = p - buf;
372 const char* p = buf;
373 const char* end = p + r;
374 if (!unpack_uint(&p, end, &val_size)) {
375 throw Xapian::DatabaseError("val_size unpack_uint invalid");
377 bool& compressed = current_compressed;
378 auto& val = current_tag;
379 compressed = val_size & 1;
380 val_size >>= 1;
381 val.assign(p, end);
382 if (p != end) std::abort();
383 val_size -= (end - p);
384 while (val_size) {
385 size_t n = std::min(val_size, sizeof(buf));
386 if (!fh.read(buf, n))
387 throw Xapian::DatabaseError("read of " + str(n) + "/" + str(val_size) + " bytes of value data failed", errno);
388 val.append(buf, n);
389 val_size -= n;
391 } else {
392 if (!next()) {
393 std::abort();
397 if (DEBUGGING) {
398 std::string desc;
399 description_append(desc, current_key);
400 std::cerr << "Dropped to data layer on key: " << desc << std::endl;
403 // FIXME: need to put us in the "read key not tag" state but persist that more?
404 // if (cmp0 == 0) this is an exact hit from the index...
405 #endif
408 off_t pos;
409 std::string k;
410 size_t vs;
411 bool compressed;
412 while (true) {
413 pos = fh.get_pos();
414 k = current_key;
415 vs = val_size;
416 compressed = current_compressed;
418 if (DEBUGGING) {
419 std::string desc;
420 description_append(desc, current_key);
421 std::cerr << "@ " << pos << " key: " << desc << std::endl;
424 if (!next()) break;
426 int cmp = current_key.compare(key);
427 if (cmp == 0) {
428 if (DEBUGGING) std::cerr << " found it" << std::endl;
429 return true;
431 if (cmp > 0) break;
434 if (DEBUGGING) {
435 std::string desc;
436 description_append(desc, current_key);
437 std::cerr << " NOT found - reached " << desc << std::endl;
439 // No match so back up to previous entry.
440 is_at_end = false;
441 last_key = current_key = k;
442 val_size = vs;
443 current_compressed = compressed;
444 fh.set_pos(pos);
445 if (DEBUGGING) {
446 std::string desc;
447 description_append(desc, current_key);
448 std::cerr << " NOT found - leaving us on " << desc << std::endl;
450 return false;
451 #endif
454 void find_entry_lt(const std::string& key) {
455 if (DEBUGGING) {
456 std::string esc;
457 description_append(esc, key);
458 std::cerr << "find_entry_lt(" << esc << ") @" << fh.get_pos() << std::endl;
460 // FIXME: use index
461 int cmp = -1;
462 if (is_at_end || (cmp = key.compare(current_key)) <= 0) {
463 if (cmp == 0 && rare(&key == &current_key)) {
464 // Avoid bug with this (which should step back one entry):
465 // cursor.find_entry_lt(cursor.current_key);
466 std::string copy = current_key;
467 find_entry_lt(copy);
468 return;
470 rewind();
473 off_t pos;
474 std::string k;
475 size_t vs;
476 bool compressed;
477 do {
478 pos = fh.get_pos();
479 k = current_key;
480 vs = val_size;
481 compressed = current_compressed;
482 } while (next() && current_key < key);
484 // Back up to previous entry.
485 is_at_end = false;
486 last_key = current_key = k;
487 val_size = vs;
488 current_compressed = compressed;
489 fh.set_pos(pos);
492 HoneyCursor * clone() const {
493 return new HoneyCursor(*this);
496 bool del() { return false; }
499 class MutableHoneyCursor : public HoneyCursor {
500 public:
501 MutableHoneyCursor(HoneyTable* table_)
502 : HoneyCursor(table_->fh, table_->get_root(), table_->get_offset()) { }
505 #endif // XAPIAN_INCLUDED_HONEY_CURSOR_H