[honey] Don't include <iostream> by default
[xapian.git] / xapian-core / backends / honey / honey_cursor.cc
blob32af77041bd7c6b3dbd250bd7b789199f9e6ca80
1 /** @file honey_cursor.cc
2 * @brief HoneyCursor class
3 */
4 /* Copyright (C) 2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "honey_cursor.h"
25 #include <string>
27 #if 1
28 # define DEBUGGING false
29 #else
30 # define DEBUGGING true
31 # include <iostream>
32 #endif
34 using namespace std;
36 bool
37 HoneyCursor::next()
39 if (is_at_end) {
40 Assert(false);
41 return false;
44 if (val_size) {
45 // Skip val data we've not looked at.
46 fh.skip(val_size);
47 val_size = 0;
50 if (fh.get_pos() >= root) {
51 AssertEq(fh.get_pos(), root);
52 is_at_end = true;
53 return false;
56 int ch = fh.read();
57 if (ch == EOF) {
58 // The root check above should mean this can't legitimately happen.
59 throw Xapian::DatabaseCorruptError("EOF reading key");
62 size_t reuse = 0;
63 if (!last_key.empty()) {
64 reuse = ch;
65 ch = fh.read();
66 if (ch == EOF) {
67 throw Xapian::DatabaseError("EOF/error while reading key length",
68 errno);
71 size_t key_size = ch;
72 char buf[256];
73 fh.read(buf, key_size);
74 current_key.assign(last_key, 0, reuse);
75 current_key.append(buf, key_size);
76 last_key = current_key;
78 if (DEBUGGING) {
79 string esc;
80 description_append(esc, current_key);
81 cerr << "K:" << esc << endl;
84 return next_from_index();
87 bool
88 HoneyCursor::next_from_index()
90 char buf[8];
91 int r;
93 // FIXME: rework to take advantage of buffering that's happening
94 // anyway?
95 char * p = buf;
96 for (int i = 0; i < 8; ++i) {
97 int ch2 = fh.read();
98 if (ch2 == EOF) {
99 break;
101 *p++ = ch2;
102 if (ch2 < 128) break;
104 r = p - buf;
106 const char* p = buf;
107 const char* end = p + r;
108 if (!unpack_uint(&p, end, &val_size)) {
109 throw Xapian::DatabaseError("val_size unpack_uint invalid");
111 if (p != end) abort();
112 current_compressed = val_size & 1;
113 val_size >>= 1;
115 // FIXME: Always resize to 0? Not doing so avoids always having to clear
116 // all the data before reading it.
117 if (true && val_size == 0)
118 current_tag.resize(0);
120 is_at_end = false;
121 return true;
124 bool
125 HoneyCursor::read_tag(bool keep_compressed)
127 if (val_size) {
128 current_tag.resize(val_size);
129 fh.read(&(current_tag[0]), val_size);
130 if (DEBUGGING) {
131 cerr << "read " << val_size << " bytes of value data ending @"
132 << fh.get_pos() << endl;
134 val_size = 0;
135 if (DEBUGGING) {
136 string esc;
137 description_append(esc, current_tag);
138 cerr << "V:" << esc << endl;
141 if (!keep_compressed && current_compressed) {
142 // Need to decompress.
143 comp_stream.decompress_start();
144 string new_tag;
145 if (!comp_stream.decompress_chunk(current_tag.data(),
146 current_tag.size(),
147 new_tag)) {
148 // Decompression didn't complete.
149 abort();
151 swap(current_tag, new_tag);
152 current_compressed = false;
153 if (DEBUGGING) {
154 cerr << "decompressed to " << current_tag.size()
155 << "bytes of value data" << endl;
158 return current_compressed;
161 bool
162 HoneyCursor::do_find(const string& key, bool greater_than)
164 // FIXME: Actually use this!
165 (void)greater_than;
167 if (DEBUGGING) {
168 string esc;
169 description_append(esc, key);
170 cerr << "do_find(" << esc << ", " << greater_than << ") @" << fh.get_pos() << endl;
173 Assert(!key.empty());
175 bool use_index = true;
176 if (!is_at_end && !last_key.empty() && last_key[0] == key[0]) {
177 int cmp0 = last_key.compare(key);
178 if (cmp0 == 0) {
179 current_key = last_key;
180 return true;
182 if (cmp0 < 0) {
183 // We're going forwards to a key with the same first character, so
184 // an array index won't help us.
185 use_index = false;
189 if (use_index) {
190 fh.rewind(root);
191 unsigned index_type = fh.read();
192 switch (index_type) {
193 case 0x00: {
194 unsigned char first = key[0] - fh.read();
195 unsigned char range = fh.read();
196 if (first > range) {
197 is_at_end = true;
198 return false;
200 fh.skip(first * 4); // FIXME: pointer width
201 off_t jump = fh.read() << 24;
202 jump |= fh.read() << 16;
203 jump |= fh.read() << 8;
204 jump |= fh.read();
205 fh.rewind(jump);
206 // The jump point will be an entirely new key (because it is the first
207 // key with that initial character), and we drop in as if this was the
208 // first key so set last_key to be empty.
209 last_key = string();
210 break;
212 case 0x01: {
213 size_t j = fh.read() << 24;
214 j |= fh.read() << 16;
215 j |= fh.read() << 8;
216 j |= fh.read();
217 if (j == 0) {
218 is_at_end = true;
219 return false;
221 off_t base = fh.get_pos();
222 char kkey[SSINDEX_BINARY_CHOP_KEY_SIZE];
223 size_t kkey_len = 0;
224 size_t i = 0;
225 while (j - i > 1) {
226 size_t k = i + (j - i) / 2;
227 fh.set_pos(base + k * (SSINDEX_BINARY_CHOP_KEY_SIZE + 4));
228 fh.read(kkey, SSINDEX_BINARY_CHOP_KEY_SIZE);
229 kkey_len = 4;
230 while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
231 int r = key.compare(0, SSINDEX_BINARY_CHOP_KEY_SIZE, kkey, kkey_len);
232 if (r < 0) {
233 j = k;
234 } else {
235 i = k;
236 if (r == 0) {
237 break;
241 fh.set_pos(base + i * (SSINDEX_BINARY_CHOP_KEY_SIZE + 4));
242 fh.read(kkey, SSINDEX_BINARY_CHOP_KEY_SIZE);
243 kkey_len = 4;
244 while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
245 off_t jump = fh.read() << 24;
246 jump |= fh.read() << 16;
247 jump |= fh.read() << 8;
248 jump |= fh.read();
249 fh.rewind(jump);
250 // The jump point is to the first key with prefix kkey, so will
251 // work if we set last key to kkey. Unless we're jumping to the
252 // start of the table, in which case last_key needs to be empty.
253 last_key.assign(kkey, jump == 0 ? 0 : kkey_len);
254 break;
256 case 0x02: {
257 // FIXME: If "close" just seek forwards? Or consider seeking from
258 // current index pos?
259 // off_t pos = fh.get_pos();
260 string index_key, prev_index_key;
261 make_unsigned<off_t>::type ptr = 0;
262 int cmp0 = 1;
263 if (DEBUGGING) {
264 cerr << "Using skiplist index\n";
266 while (true) {
267 int reuse = fh.read();
268 if (reuse == EOF) break;
269 int len = fh.read();
270 if (len == EOF) abort(); // FIXME
271 if (DEBUGGING) {
272 cerr << "reuse = " << reuse << " len = " << len << endl;
274 index_key.resize(reuse + len);
275 fh.read(&index_key[reuse], len);
277 if (DEBUGGING) {
278 string desc;
279 description_append(desc, index_key);
280 cerr << "Index key: " << desc << endl;
283 cmp0 = index_key.compare(key);
284 if (cmp0 > 0) {
285 index_key = prev_index_key;
286 break;
288 char buf[8];
289 char* e = buf;
290 while (true) {
291 int b = fh.read();
292 *e++ = b;
293 if ((b & 0x80) == 0) break;
295 const char* p = buf;
296 if (!unpack_uint(&p, e, &ptr) || p != e) abort(); // FIXME
297 if (DEBUGGING) cerr << " -> " << ptr << endl;
298 if (cmp0 == 0)
299 break;
300 prev_index_key = index_key;
301 if (DEBUGGING) {
302 string desc;
303 description_append(desc, prev_index_key);
304 cerr << "prev_index_key -> " << desc << endl;
307 if (DEBUGGING) {
308 string desc;
309 description_append(desc, index_key);
310 cerr << " index_key = " << desc << ", cmp0 = " << cmp0 << ", going to " << ptr << endl;
312 fh.set_pos(ptr);
314 if (ptr != 0) {
315 last_key = current_key = index_key;
316 bool res = next_from_index();
317 (void)res;
318 Assert(res);
319 if (cmp0 == 0) {
320 Assert(ptr != 0);
321 return true;
323 fh.skip(val_size);
324 } else {
325 last_key = current_key = string();
328 if (DEBUGGING) {
329 string desc;
330 description_append(desc, current_key);
331 cerr << "cmp0 was " << cmp0 << ", Dropped to data layer on key: " << desc << endl;
334 break;
336 default:
337 throw Xapian::DatabaseCorruptError("Unknown index type");
339 is_at_end = false;
340 val_size = 0;
343 while (next()) {
344 int cmp = current_key.compare(key);
345 if (cmp == 0) return true;
346 if (cmp > 0) break;
348 return false;
351 bool
352 HoneyCursor::prev()
354 string key;
355 if (is_at_end) {
356 // To position on the last key we just do a < search for a key greater
357 // than any possible key - one longer than the longest possible length
358 // and consisting entirely of the highest sorting byte value.
359 key.assign(HONEY_MAX_KEY_LEN + 1, '\xff');
360 } else {
361 if (current_key.empty())
362 return false;
363 key = current_key;
366 // FIXME: use index - for an array index we can look at index points for
367 // first characters starting with key[0] and working down; for a binary
368 // chop index we can start at the entry including the current key, or the
369 // one before if this is the first key for that index entry; for a skiplist
370 // index we can find the previous entry at the index level above.
371 rewind();
373 off_t pos;
374 string k;
375 size_t vs;
376 bool compressed;
377 do {
378 pos = fh.get_pos();
379 k = current_key;
380 vs = val_size;
381 compressed = current_compressed;
382 } while (next() && current_key < key);
384 // Back up to previous entry.
385 is_at_end = false;
386 last_key = current_key = k;
387 val_size = vs;
388 current_compressed = compressed;
389 fh.set_pos(pos);
391 return true;