posting list iterating now works
[xapian.git] / xapian-core / backends / honey / honey_cursor.h
blobe743e7b43f03d8421ad363492498ba81b00b6e08
1 #ifndef XAPIAN_INCLUDED_HONEY_CURSOR_H
2 #define XAPIAN_INCLUDED_HONEY_CURSOR_H
4 #include "honey_table.h"
6 class HoneyCursor {
7 public:
8 BufferedFile fh;
9 std::string current_key, current_tag;
10 mutable size_t val_size = 0;
11 bool current_compressed;
12 mutable CompressionStream comp_stream;
13 bool is_at_end = false;
14 bool is_after_end = false;
15 mutable std::string last_key;
16 off_t root, index;
18 explicit HoneyCursor(const HoneyTable* table)
19 : HoneyCursor(table->fh, table->get_root())
21 fh.set_pos(0 * root);
24 HoneyCursor(const BufferedFile& fh_, off_t root_)
25 : fh(fh_),
26 comp_stream(Z_DEFAULT_STRATEGY),
27 root(root_),
28 index(root_)
30 fh.set_pos(0 * root);
33 HoneyCursor(const HoneyCursor& o)
34 : fh(o.fh),
35 comp_stream(Z_DEFAULT_STRATEGY),
36 root(o.root),
37 index(o.root)
41 void rewind() {
42 fh.set_pos(0 * root);
43 last_key = std::string();
44 is_at_end = is_after_end = false;
45 index = root;
48 bool after_end() const { return is_after_end; }
50 bool next() {
51 if (is_at_end) {
52 is_after_end = true;
53 return false;
56 if (val_size) {
57 // Skip val data we've not looked at.
58 fh.skip(val_size);
59 val_size = 0;
62 int ch = fh.read();
63 if (ch == EOF) {
64 is_at_end = true;
65 return false;
68 size_t reuse = 0;
69 if (!last_key.empty()) {
70 reuse = ch;
71 ch = fh.read();
72 if (ch == EOF) throw Xapian::DatabaseError("EOF/error while reading key length", errno);
74 size_t key_size = ch;
75 char buf[256];
76 if (!fh.read(buf, key_size))
77 throw Xapian::DatabaseError("read of " + str(key_size) + " bytes of key data failed", errno);
78 current_key.assign(last_key, 0, reuse);
79 current_key.append(buf, key_size);
80 last_key = current_key;
82 if (DEBUGGING) {
83 std::string esc;
84 description_append(esc, current_key);
85 std::cout << "K:" << esc << std::endl;
88 int r;
90 // FIXME: rework to take advantage of buffering that's happening anyway?
91 char * p = buf;
92 for (int i = 0; i < 8; ++i) {
93 int ch2 = fh.read();
94 if (ch2 == EOF) {
95 break;
97 *p++ = ch2;
98 if (ch2 < 128) break;
100 r = p - buf;
102 const char* p = buf;
103 const char* end = p + r;
104 if (!unpack_uint(&p, end, &val_size)) {
105 throw Xapian::DatabaseError("val_size unpack_uint invalid");
107 if (p != end) std::abort();
108 current_compressed = val_size & 1;
109 val_size >>= 1;
111 // FIXME: Always resize to 0? Not doing so avoids always having to
112 // clear all the data before reading it.
113 if (true && val_size == 0)
114 current_tag.resize(0);
116 is_at_end = false;
117 return true;
120 bool read_tag(bool keep_compressed = false) {
121 if (val_size) {
122 current_tag.resize(val_size);
123 if (!fh.read(&(current_tag[0]), val_size))
124 throw Xapian::DatabaseError("read of " + str(val_size) + " bytes of value data failed", errno);
125 if (DEBUGGING) {
126 std::cerr << "read " << val_size << " bytes of value data ending @" << fh.get_pos() << std::endl;
128 val_size = 0;
129 if (DEBUGGING) {
130 std::string esc;
131 description_append(esc, current_tag);
132 std::cout << "V:" << esc << std::endl;
135 if (!keep_compressed && current_compressed) {
136 // Need to decompress.
137 comp_stream.decompress_start();
138 std::string new_tag;
139 if (!comp_stream.decompress_chunk(current_tag.data(), current_tag.size(), new_tag)) {
140 // Decompression didn't complete.
141 abort();
143 swap(current_tag, new_tag);
144 current_compressed = false;
145 if (DEBUGGING) {
146 std::cerr << "decompressed to " << current_tag.size() << " bytes of value data" << std::endl;
149 return current_compressed;
152 bool find_exact(const std::string& key) {
153 // std::cerr << "EQ" << std::endl;
154 // FIXME: use index
155 int cmp0 = current_key.compare(key);
156 if (cmp0 == 0) return true;
157 if (cmp0 > 0) {
158 rewind();
161 while (next()) {
162 int cmp = current_key.compare(key);
163 if (cmp == 0) return true;
164 if (cmp > 0) break;
166 return false;
169 bool find_entry_ge(const std::string& key) {
170 // std::cerr << "GE" << std::endl;
171 // FIXME: use index
172 int cmp0 = current_key.compare(key);
173 if (cmp0 == 0) return true;
174 if (cmp0 > 0) {
175 rewind();
178 while (next()) {
179 int cmp = current_key.compare(key);
180 if (cmp == 0) return true;
181 if (cmp > 0) break;
183 return false;
186 bool find_entry(const std::string& key) {
187 if (DEBUGGING) {
188 std::string desc;
189 description_append(desc, key);
190 std::cerr << "find_entry(" << desc << ") [LE]" << std::endl;
192 int cmp0 = current_key.compare(key);
193 if (cmp0 == 0) {
194 if (DEBUGGING) std::cerr << " already on it" << std::endl;
195 return true;
197 #if 0
198 if (cmp0 > 0) {
199 rewind();
201 #else
202 // FIXME: If "close" just seek forwards? Or consider seeking from current index pos?
203 //off_t pos = fh.get_pos();
204 fh.set_pos(root);
205 std::string index_key, prev_index_key;
206 std::make_unsigned<off_t>::type ptr = 0;
207 while (true) {
208 int reuse = fh.read();
209 if (reuse == -1) break;
210 int len = fh.read();
211 if (len == -1) std::abort(); // FIXME
212 prev_index_key = index_key;
213 index_key.resize(reuse + len);
214 fh.read(&index_key[reuse], len);
216 if (DEBUGGING) {
217 std::string desc;
218 description_append(desc, index_key);
219 std::cerr << "Index key: " << desc << std::endl;
222 cmp0 = index_key.compare(key);
223 if (cmp0 > 0) break;
224 last_key = ptr ? index_key : std::string(); // for now (a lie, but the reuse part is correct).
225 char buf[8];
226 char* e = buf;
227 while (true) {
228 int b = fh.read();
229 *e++ = b;
230 if ((b & 0x80) == 0) break;
232 const char* p = buf;
233 if (!unpack_uint(&p, e, &ptr) || p != e) std::abort(); // FIXME
234 if (DEBUGGING) std::cerr << " -> " << ptr << std::endl;
235 if (cmp0 == 0) {
236 if (DEBUGGING) std::cerr << " hit straight from index" << std::endl;
237 fh.set_pos(ptr);
238 current_key = index_key;
239 int r;
241 // FIXME: rework to take advantage of buffering that's happening anyway?
242 char * q = buf;
243 for (int i = 0; i < 8; ++i) {
244 int ch2 = fh.read();
245 if (ch2 == EOF) {
246 break;
248 *q++ = ch2;
249 if (ch2 < 128) break;
251 r = q - buf;
253 p = buf;
254 const char* end = p + r;
255 if (!unpack_uint(&p, end, &val_size)) {
256 throw Xapian::DatabaseError("val_size unpack_uint invalid");
258 bool& compressed = current_compressed;
259 compressed = val_size & 1;
260 val_size >>= 1;
261 if (p != end) std::abort();
262 return true;
265 if (DEBUGGING) std::cerr << " cmp0 = " << cmp0 << ", going to " << ptr << std::endl;
266 fh.set_pos(ptr);
268 // FIXME: crude for now
269 if (ptr != 0) {
270 current_key = prev_index_key;
271 char buf[4096];
272 int r;
274 // FIXME: rework to take advantage of buffering that's happening anyway?
275 char * p = buf;
276 for (int i = 0; i < 8; ++i) {
277 int ch2 = fh.read();
278 if (ch2 == EOF) {
279 break;
281 *p++ = ch2;
282 if (ch2 < 128) break;
284 r = p - buf;
286 const char* p = buf;
287 const char* end = p + r;
288 if (!unpack_uint(&p, end, &val_size)) {
289 throw Xapian::DatabaseError("val_size unpack_uint invalid");
291 bool& compressed = current_compressed;
292 auto& val = current_tag;
293 compressed = val_size & 1;
294 val_size >>= 1;
295 val.assign(p, end);
296 if (p != end) std::abort();
297 val_size -= (end - p);
298 while (val_size) {
299 size_t n = std::min(val_size, sizeof(buf));
300 if (!fh.read(buf, n))
301 throw Xapian::DatabaseError("read of " + str(n) + "/" + str(val_size) + " bytes of value data failed", errno);
302 val.append(buf, n);
303 val_size -= n;
305 } else {
306 if (!next()) {
307 std::abort();
311 if (DEBUGGING) {
312 std::string desc;
313 description_append(desc, current_key);
314 std::cerr << "Dropped to data layer on key: " << desc << std::endl;
317 // FIXME: need to put us in the "read key not tag" state but persist that more?
318 // if (cmp0 == 0) this is an exact hit from the index...
319 #endif
321 off_t pos;
322 std::string k;
323 size_t vs;
324 bool compressed;
325 while (true) {
326 pos = fh.get_pos();
327 k = current_key;
328 vs = val_size;
329 compressed = current_compressed;
331 if (DEBUGGING) {
332 std::string desc;
333 description_append(desc, current_key);
334 std::cerr << "@ " << pos << " key: " << desc << std::endl;
337 if (!next()) break;
339 int cmp = current_key.compare(key);
340 if (cmp == 0) {
341 if (DEBUGGING) std::cerr << " found it" << std::endl;
342 return true;
344 if (cmp > 0) break;
347 if (DEBUGGING) {
348 std::string desc;
349 description_append(desc, current_key);
350 std::cerr << " NOT found - reached " << desc << std::endl;
352 // No match so back up to previous entry.
353 is_at_end = is_after_end = false;
354 last_key = current_key = k;
355 val_size = vs;
356 current_compressed = compressed;
357 fh.set_pos(pos);
358 if (DEBUGGING) {
359 std::string desc;
360 description_append(desc, current_key);
361 std::cerr << " NOT found - leaving us on " << desc << std::endl;
363 return false;
366 void find_entry_lt(const std::string& key) {
367 // std::cerr << "LT" << std::endl;
368 // FIXME: use index
369 if (key < current_key) {
370 rewind();
371 current_key.resize(0);
374 off_t pos;
375 std::string k;
376 size_t vs;
377 bool compressed;
378 do {
379 pos = fh.get_pos();
380 k = current_key;
381 vs = val_size;
382 compressed = current_compressed;
383 } while (next() && current_key < key);
385 // Back up to previous entry.
386 is_at_end = is_after_end = false;
387 last_key = current_key = k;
388 val_size = vs;
389 current_compressed = compressed;
390 fh.set_pos(pos);
393 HoneyCursor * clone() const {
394 return new HoneyCursor(*this);
397 bool del() { return false; }
400 class MutableHoneyCursor : public HoneyCursor {
401 public:
402 MutableHoneyCursor(HoneyTable* table_)
403 : HoneyCursor(table_->fh, table_->get_root()) { }
406 #endif // XAPIAN_INCLUDED_HONEY_CURSOR_H