[honey] Clean up unused values
[xapian.git] / xapian-core / backends / honey / honey_table.cc
blob9649b0b596b5c765bb76c29488d4fb5b4c39deab
1 /** @file honey_table.cc
2 * @brief HoneyTable class
3 */
4 /* Copyright (C) 2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 //#define DEBUGGING
25 #include "honey_table.h"
27 #include "honey_cursor.h"
28 #include "stringutils.h"
30 #include "unicode/description_append.h"
32 #ifdef DEBUGGING
33 # include <iostream>
34 #endif
36 using Honey::RootInfo;
38 using namespace std;
40 void
41 HoneyTable::create_and_open(int flags_, const RootInfo& root_info)
43 Assert(!single_file());
44 flags = flags_;
45 compress_min = root_info.get_compress_min();
46 if (read_only) {
47 num_entries = root_info.get_num_entries();
48 root = root_info.get_root();
49 // FIXME: levels
51 if (!store.open(path, read_only))
52 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable", errno);
55 void
56 HoneyTable::open(int flags_, const RootInfo& root_info, honey_revision_number_t)
58 flags = flags_;
59 compress_min = root_info.get_compress_min();
60 num_entries = root_info.get_num_entries();
61 offset = root_info.get_offset();
62 root = root_info.get_root();
63 if (!single_file() && !store.open(path, read_only)) {
64 if (!lazy)
65 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable",
66 errno);
68 store.set_pos(offset);
71 void
72 HoneyTable::add(const std::string& key,
73 const char* val,
74 size_t val_size,
75 bool compressed)
77 if (!compressed && compress_min > 0 && val_size > compress_min) {
78 size_t compressed_size = val_size;
79 CompressionStream comp_stream; // FIXME: reuse
80 const char* p = comp_stream.compress(val, &compressed_size);
81 if (p) {
82 add(key, p, compressed_size, true);
83 return;
87 if (read_only)
88 throw Xapian::InvalidOperationError("add() on read-only HoneyTable");
89 if (key.size() == 0 || key.size() > HONEY_MAX_KEY_LEN)
90 throw Xapian::InvalidArgumentError("Invalid key size: " +
91 str(key.size()));
92 if (key <= last_key)
93 throw Xapian::InvalidOperationError("New key <= previous key");
94 size_t reuse = common_prefix_length(last_key, key);
96 #ifdef SSINDEX_ARRAY
97 if (reuse == 0) {
98 index.maybe_add_entry(key, store.get_pos());
100 #elif defined SSINDEX_BINARY_CHOP
101 // For a binary chop index, the index point is before the key info - the
102 // index key must have the same N first bytes as the previous key, where
103 // N >= the keep length.
104 index.maybe_add_entry(key, store.get_pos());
105 #elif defined SSINDEX_SKIPLIST
106 // Handled below.
107 #else
108 # error "SSINDEX type not specified"
109 #endif
111 store.write(static_cast<unsigned char>(reuse));
112 store.write(static_cast<unsigned char>(key.size() - reuse));
113 store.write(key.data() + reuse, key.size() - reuse);
114 ++num_entries;
116 #ifdef SSINDEX_SKIPLIST
117 // For a skiplist index, the index provides the full key, so the index
118 // point is after the key at the level below.
119 index.maybe_add_entry(key, store.get_pos());
120 #endif
122 // Encode "compressed?" flag in bottom bit.
123 // FIXME: Don't do this if a table is uncompressed? That saves a byte
124 // for each item where the extra bit pushes the length up by a byte.
125 size_t val_size_enc = (val_size << 1) | compressed;
126 std::string val_len;
127 pack_uint(val_len, val_size_enc);
128 // FIXME: pass together so we can potentially writev() both?
129 store.write(val_len.data(), val_len.size());
130 store.write(val, val_size);
131 last_key = key;
134 void
135 HoneyTable::commit(honey_revision_number_t, RootInfo* root_info)
137 if (root < 0)
138 throw Xapian::InvalidOperationError("root not set");
140 root_info->set_num_entries(num_entries);
141 // offset should already be set.
142 root_info->set_root(root);
143 // Not really meaningful.
144 // root_info->set_free_list(std::string());
146 read_only = true;
147 store.rewind(offset);
148 last_key = string();
151 bool
152 HoneyTable::read_key(std::string& key,
153 size_t& val_size,
154 bool& compressed) const
156 #ifdef DEBUGGING
158 string desc;
159 description_append(desc, key);
160 cerr << "HoneyTable::read_key(" << desc << ", ...) for path=" << path << endl;
162 #endif
163 if (!read_only) {
164 return false;
167 AssertRel(store.get_pos(), >=, offset);
168 if (store.get_pos() >= root) {
169 AssertEq(store.get_pos(), root);
170 return false;
172 int ch = store.read();
173 if (ch == EOF) return false;
175 size_t reuse = ch;
176 if (reuse > last_key.size()) {
177 throw Xapian::DatabaseCorruptError("Reuse > previous key size");
179 ch = store.read();
180 if (ch == EOF) {
181 throw Xapian::DatabaseError("EOF/error while reading key length",
182 errno);
184 size_t key_size = ch;
185 char buf[256];
186 store.read(buf, key_size);
187 key.assign(last_key, 0, reuse);
188 key.append(buf, key_size);
189 last_key = key;
191 #ifdef DEBUGGING
192 if (false) {
193 std::string esc;
194 description_append(esc, key);
195 std::cout << "K:" << esc << std::endl;
197 #endif
199 int r;
201 // FIXME: rework to take advantage of buffering that's happening anyway?
202 char * p = buf;
203 for (int i = 0; i < 8; ++i) {
204 int ch2 = store.read();
205 if (ch2 == EOF) {
206 break;
208 *p++ = ch2;
209 if (ch2 < 128) break;
211 r = p - buf;
213 const char* p = buf;
214 const char* end = p + r;
215 if (!unpack_uint(&p, end, &val_size)) {
216 throw Xapian::DatabaseError("val_size unpack_uint invalid");
218 compressed = val_size & 1;
219 val_size >>= 1;
220 Assert(p == end);
221 return true;
224 void
225 HoneyTable::read_val(std::string& val, size_t val_size) const
227 AssertRel(store.get_pos() + val_size, <=, size_t(root));
228 val.resize(val_size);
229 store.read(&(val[0]), val_size);
230 #ifdef DEBUGGING
231 if (false) {
232 std::string esc;
233 description_append(esc, val);
234 std::cout << "V:" << esc << std::endl;
236 #endif
239 bool
240 HoneyTable::get_exact_entry(const std::string& key, std::string* tag) const
242 if (!read_only) std::abort();
243 if (!store.is_open()) {
244 if (store.was_forced_closed())
245 throw_database_closed();
246 return false;
248 store.rewind(root);
249 if (rare(key.empty()))
250 return false;
251 bool exact_match = false;
252 bool compressed = false;
253 size_t val_size = 0;
254 int index_type = store.read();
255 switch (index_type) {
256 case EOF:
257 return false;
258 case 0x00: {
259 unsigned char first = key[0] - store.read();
260 unsigned char range = store.read();
261 if (first > range)
262 return false;
263 store.skip(first * 4); // FIXME: pointer width
264 off_t jump = store.read_uint4_be();
265 store.rewind(jump);
266 // The jump point will be an entirely new key (because it is the
267 // first key with that initial character), and we drop in as if
268 // this was the first key so set last_key to be empty.
269 last_key = string();
270 break;
272 case 0x01: {
273 size_t j = store.read_uint4_be();
274 if (j == 0)
275 return false;
276 off_t base = store.get_pos();
277 char kkey[SSINDEX_BINARY_CHOP_KEY_SIZE];
278 size_t kkey_len = 0;
279 size_t i = 0;
280 while (j - i > 1) {
281 size_t k = i + (j - i) / 2;
282 store.set_pos(base + k * SSINDEX_BINARY_CHOP_ENTRY_SIZE);
283 store.read(kkey, SSINDEX_BINARY_CHOP_KEY_SIZE);
284 kkey_len = 4;
285 while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
286 int r = key.compare(0, SSINDEX_BINARY_CHOP_KEY_SIZE, kkey, kkey_len);
287 if (r < 0) {
288 j = k;
289 } else {
290 i = k;
291 if (r == 0) {
292 break;
296 store.set_pos(base + i * SSINDEX_BINARY_CHOP_ENTRY_SIZE);
297 store.read(kkey, SSINDEX_BINARY_CHOP_KEY_SIZE);
298 kkey_len = 4;
299 while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
300 off_t jump = store.read_uint4_be();
301 store.rewind(jump);
302 // The jump point is to the first key with prefix kkey, so will
303 // work if we set last key to kkey. Unless we're jumping to the
304 // start of the table, in which case last_key needs to be empty.
305 last_key.assign(kkey, jump == 0 ? 0 : kkey_len);
306 break;
308 case 0x02: {
309 // FIXME: If "close" just seek forwards? Or consider seeking from
310 // current index pos?
311 // off_t pos = store.get_pos();
312 string index_key, prev_index_key;
313 make_unsigned<off_t>::type ptr = 0;
314 int cmp0 = 1;
315 while (true) {
316 int reuse = store.read();
317 if (reuse == EOF) break;
318 int len = store.read();
319 if (len == EOF) abort(); // FIXME
320 index_key.resize(reuse + len);
321 store.read(&index_key[reuse], len);
323 #ifdef DEBUGGING
325 string desc;
326 description_append(desc, index_key);
327 cerr << "Index key: " << desc << endl;
329 #endif
331 cmp0 = index_key.compare(key);
332 if (cmp0 > 0) {
333 index_key = prev_index_key;
334 break;
336 char buf[8];
337 char* e = buf;
338 while (true) {
339 int b = store.read();
340 *e++ = b;
341 if ((b & 0x80) == 0) break;
343 const char* p = buf;
344 if (!unpack_uint(&p, e, &ptr) || p != e) abort(); // FIXME
345 #ifdef DEBUGGING
347 cerr << " -> " << ptr << endl;
349 #endif
350 if (cmp0 == 0)
351 break;
352 prev_index_key = index_key;
354 #ifdef DEBUGGING
356 cerr << " cmp0 = " << cmp0 << ", going to " << ptr << endl;
358 #endif
359 store.set_pos(ptr);
361 if (ptr != 0) {
362 last_key = index_key;
363 char buf[8];
364 int r;
366 // FIXME: rework to take advantage of buffering that's happening anyway?
367 char * p = buf;
368 for (int i = 0; i < 8; ++i) {
369 int ch2 = store.read();
370 if (ch2 == EOF) {
371 break;
373 *p++ = ch2;
374 if (ch2 < 128) break;
376 r = p - buf;
378 const char* p = buf;
379 const char* end = p + r;
380 if (!unpack_uint(&p, end, &val_size)) {
381 throw Xapian::DatabaseError("val_size unpack_uint invalid");
383 compressed = val_size & 1;
384 val_size >>= 1;
385 Assert(p == end);
386 } else {
387 last_key = string();
390 if (cmp0 == 0) {
391 exact_match = true;
392 break;
395 #ifdef DEBUGGING
397 string desc;
398 description_append(desc, last_key);
399 cerr << "Dropped to data layer on key: " << desc << endl;
401 #endif
403 break;
405 default: {
406 string m = "HoneyTable: Unknown index type ";
407 m += str(index_type);
408 throw Xapian::DatabaseCorruptError(m);
412 std::string k;
413 int cmp;
414 if (!exact_match) {
415 do {
416 if (val_size) {
417 // Skip val data we've not looked at.
418 store.skip(val_size);
419 val_size = 0;
421 if (!read_key(k, val_size, compressed)) return false;
422 cmp = k.compare(key);
423 } while (cmp < 0);
424 if (cmp > 0) return false;
426 if (tag != NULL) {
427 if (compressed) {
428 std::string v;
429 read_val(v, val_size);
430 CompressionStream comp_stream;
431 comp_stream.decompress_start();
432 tag->resize(0);
433 if (!comp_stream.decompress_chunk(v.data(), v.size(), *tag)) {
434 // Decompression didn't complete.
435 abort();
437 } else {
438 read_val(*tag, val_size);
441 return true;
444 HoneyCursor*
445 HoneyTable::cursor_get() const
447 return new HoneyCursor(store, root, offset);