1 /** @file honey_cursor.cc
2 * @brief HoneyCursor class
4 /* Copyright (C) 2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "honey_cursor.h"
28 # define DEBUGGING false
30 # define DEBUGGING true
45 // Skip val data we've not looked at.
50 if (fh
.get_pos() >= root
) {
51 AssertEq(fh
.get_pos(), root
);
58 // The root check above should mean this can't legitimately happen.
59 throw Xapian::DatabaseCorruptError("EOF reading key");
63 if (!last_key
.empty()) {
67 throw Xapian::DatabaseError("EOF/error while reading key length",
73 fh
.read(buf
, key_size
);
74 current_key
.assign(last_key
, 0, reuse
);
75 current_key
.append(buf
, key_size
);
76 last_key
= current_key
;
80 description_append(esc
, current_key
);
81 cerr
<< "K:" << esc
<< endl
;
84 return next_from_index();
88 HoneyCursor::next_from_index()
93 // FIXME: rework to take advantage of buffering that's happening
96 for (int i
= 0; i
< 8; ++i
) {
102 if (ch2
< 128) break;
107 const char* end
= p
+ r
;
108 if (!unpack_uint(&p
, end
, &val_size
)) {
109 throw Xapian::DatabaseError("val_size unpack_uint invalid");
111 if (p
!= end
) abort();
112 current_compressed
= val_size
& 1;
115 // FIXME: Always resize to 0? Not doing so avoids always having to clear
116 // all the data before reading it.
117 if (true && val_size
== 0)
118 current_tag
.resize(0);
125 HoneyCursor::read_tag(bool keep_compressed
)
128 current_tag
.resize(val_size
);
129 fh
.read(&(current_tag
[0]), val_size
);
131 cerr
<< "read " << val_size
<< " bytes of value data ending @"
132 << fh
.get_pos() << endl
;
137 description_append(esc
, current_tag
);
138 cerr
<< "V:" << esc
<< endl
;
141 if (!keep_compressed
&& current_compressed
) {
142 // Need to decompress.
143 comp_stream
.decompress_start();
145 if (!comp_stream
.decompress_chunk(current_tag
.data(),
148 // Decompression didn't complete.
151 swap(current_tag
, new_tag
);
152 current_compressed
= false;
154 cerr
<< "decompressed to " << current_tag
.size()
155 << "bytes of value data" << endl
;
158 return current_compressed
;
162 HoneyCursor::do_find(const string
& key
, bool greater_than
)
164 // FIXME: Actually use this!
169 description_append(esc
, key
);
170 cerr
<< "do_find(" << esc
<< ", " << greater_than
<< ") @" << fh
.get_pos() << endl
;
173 Assert(!key
.empty());
175 bool use_index
= true;
176 if (!is_at_end
&& !last_key
.empty() && last_key
[0] == key
[0]) {
177 int cmp0
= last_key
.compare(key
);
179 current_key
= last_key
;
183 // We're going forwards to a key with the same first character, so
184 // an array index won't help us.
191 unsigned index_type
= fh
.read();
192 switch (index_type
) {
194 unsigned char first
= key
[0] - fh
.read();
195 unsigned char range
= fh
.read();
200 fh
.skip(first
* 4); // FIXME: pointer width
201 off_t jump
= fh
.read() << 24;
202 jump
|= fh
.read() << 16;
203 jump
|= fh
.read() << 8;
206 // The jump point will be an entirely new key (because it is the first
207 // key with that initial character), and we drop in as if this was the
208 // first key so set last_key to be empty.
213 size_t j
= fh
.read() << 24;
214 j
|= fh
.read() << 16;
221 off_t base
= fh
.get_pos();
222 char kkey
[SSINDEX_BINARY_CHOP_KEY_SIZE
];
226 size_t k
= i
+ (j
- i
) / 2;
227 fh
.set_pos(base
+ k
* (SSINDEX_BINARY_CHOP_KEY_SIZE
+ 4));
228 fh
.read(kkey
, SSINDEX_BINARY_CHOP_KEY_SIZE
);
230 while (kkey_len
> 0 && kkey
[kkey_len
- 1] == '\0') --kkey_len
;
231 int r
= key
.compare(0, SSINDEX_BINARY_CHOP_KEY_SIZE
, kkey
, kkey_len
);
241 fh
.set_pos(base
+ i
* (SSINDEX_BINARY_CHOP_KEY_SIZE
+ 4));
242 fh
.read(kkey
, SSINDEX_BINARY_CHOP_KEY_SIZE
);
244 while (kkey_len
> 0 && kkey
[kkey_len
- 1] == '\0') --kkey_len
;
245 off_t jump
= fh
.read() << 24;
246 jump
|= fh
.read() << 16;
247 jump
|= fh
.read() << 8;
250 // The jump point is to the first key with prefix kkey, so will
251 // work if we set last key to kkey. Unless we're jumping to the
252 // start of the table, in which case last_key needs to be empty.
253 last_key
.assign(kkey
, jump
== 0 ? 0 : kkey_len
);
257 // FIXME: If "close" just seek forwards? Or consider seeking from
258 // current index pos?
259 // off_t pos = fh.get_pos();
260 string index_key
, prev_index_key
;
261 make_unsigned
<off_t
>::type ptr
= 0;
264 cerr
<< "Using skiplist index\n";
267 int reuse
= fh
.read();
268 if (reuse
== EOF
) break;
270 if (len
== EOF
) abort(); // FIXME
272 cerr
<< "reuse = " << reuse
<< " len = " << len
<< endl
;
274 index_key
.resize(reuse
+ len
);
275 fh
.read(&index_key
[reuse
], len
);
279 description_append(desc
, index_key
);
280 cerr
<< "Index key: " << desc
<< endl
;
283 cmp0
= index_key
.compare(key
);
285 index_key
= prev_index_key
;
293 if ((b
& 0x80) == 0) break;
296 if (!unpack_uint(&p
, e
, &ptr
) || p
!= e
) abort(); // FIXME
297 if (DEBUGGING
) cerr
<< " -> " << ptr
<< endl
;
300 prev_index_key
= index_key
;
303 description_append(desc
, prev_index_key
);
304 cerr
<< "prev_index_key -> " << desc
<< endl
;
309 description_append(desc
, index_key
);
310 cerr
<< " index_key = " << desc
<< ", cmp0 = " << cmp0
<< ", going to " << ptr
<< endl
;
315 last_key
= current_key
= index_key
;
316 bool res
= next_from_index();
325 last_key
= current_key
= string();
330 description_append(desc
, current_key
);
331 cerr
<< "cmp0 was " << cmp0
<< ", Dropped to data layer on key: " << desc
<< endl
;
337 throw Xapian::DatabaseCorruptError("Unknown index type");
344 int cmp
= current_key
.compare(key
);
345 if (cmp
== 0) return true;
356 // To position on the last key we just do a < search for a key greater
357 // than any possible key - one longer than the longest possible length
358 // and consisting entirely of the highest sorting byte value.
359 key
.assign(HONEY_MAX_KEY_LEN
+ 1, '\xff');
361 if (current_key
.empty())
366 // FIXME: use index - for an array index we can look at index points for
367 // first characters starting with key[0] and working down; for a binary
368 // chop index we can start at the entry including the current key, or the
369 // one before if this is the first key for that index entry; for a skiplist
370 // index we can find the previous entry at the index level above.
381 compressed
= current_compressed
;
382 } while (next() && current_key
< key
);
384 // Back up to previous entry.
386 last_key
= current_key
= k
;
388 current_compressed
= compressed
;