[honey] Fixes for not including <iostream> by default
[xapian.git] / xapian-core / backends / honey / honey_table.cc
blob884b943f84c0511ad93eb895835582ada91b8594
1 /** @file honey_table.cc
2 * @brief HoneyTable class
3 */
4 /* Copyright (C) 2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 //#define DEBUGGING
26 #include "honey_table.h"
28 #include "honey_cursor.h"
29 #include "stringutils.h"
31 #include "unicode/description_append.h"
33 #ifdef DEBUGGING
34 # include <iostream>
35 #endif
37 using Honey::RootInfo;
39 using namespace std;
41 size_t HoneyTable::total_index_size = 0;
43 void
44 HoneyTable::create_and_open(int flags_, const RootInfo& root_info)
46 Assert(!single_file());
47 flags = flags_;
48 compress_min = root_info.get_compress_min();
49 if (read_only) {
50 num_entries = root_info.get_num_entries();
51 root = root_info.get_root();
52 // FIXME: levels
54 if (!fh.open(path, read_only))
55 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable", errno);
58 void
59 HoneyTable::open(int flags_, const RootInfo& root_info, honey_revision_number_t)
61 flags = flags_;
62 compress_min = root_info.get_compress_min();
63 num_entries = root_info.get_num_entries();
64 offset = root_info.get_offset();
65 root = root_info.get_root();
66 if (!single_file() && !fh.open(path, read_only)) {
67 if (!lazy)
68 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable",
69 errno);
71 fh.set_pos(offset);
74 void
75 HoneyTable::add(const std::string& key,
76 const char* val,
77 size_t val_size,
78 bool compressed)
80 if (!compressed && compress_min > 0 && val_size > compress_min) {
81 size_t compressed_size = val_size;
82 CompressionStream comp_stream; // FIXME: reuse
83 const char* p = comp_stream.compress(val, &compressed_size);
84 if (p) {
85 add(key, p, compressed_size, true);
86 return;
90 if (read_only)
91 throw Xapian::InvalidOperationError("add() on read-only HoneyTable");
92 if (key.size() == 0 || key.size() > HONEY_MAX_KEY_LEN)
93 throw Xapian::InvalidArgumentError("Invalid key size: " +
94 str(key.size()));
95 if (key <= last_key)
96 throw Xapian::InvalidOperationError("New key <= previous key");
97 off_t index_pos = fh.get_pos();
98 if (!last_key.empty()) {
99 size_t reuse = common_prefix_length(last_key, key);
100 fh.write(static_cast<unsigned char>(reuse));
101 fh.write(static_cast<unsigned char>(key.size() - reuse));
102 fh.write(key.data() + reuse, key.size() - reuse);
103 } else {
104 fh.write(static_cast<unsigned char>(key.size()));
105 fh.write(key.data(), key.size());
107 ++num_entries;
108 #ifdef SSINDEX_ARRAY
109 // For an array index, the index point is right before the complete key.
110 if (!last_key.empty()) ++index_pos;
111 #elif defined SSINDEX_BINARY_CHOP
112 // For a binary chop index, the index point is before the key info - the
113 // index key must have the same N first bytes as the previous key, where N
114 // >= the keep length.
115 #elif defined SSINDEX_SKIPLIST
116 // For a skiplist index, the index provides the full key, so the index
117 // point is after the key at the level below.
118 index_pos = fh.get_pos();
119 #else
120 # error "SSINDEX type not specified"
121 #endif
122 index.maybe_add_entry(key, index_pos);
124 // Encode "compressed?" flag in bottom bit.
125 // FIXME: Don't do this if a table is uncompressed? That saves a byte
126 // for each item where the extra bit pushes the length up by a byte.
127 size_t val_size_enc = (val_size << 1) | compressed;
128 std::string val_len;
129 pack_uint(val_len, val_size_enc);
130 // FIXME: pass together so we can potentially writev() both?
131 fh.write(val_len.data(), val_len.size());
132 fh.write(val, val_size);
133 last_key = key;
136 void
137 HoneyTable::commit(honey_revision_number_t, RootInfo* root_info)
139 if (root < 0)
140 throw Xapian::InvalidOperationError("root not set");
142 root_info->set_level(1); // FIXME: number of index levels
143 root_info->set_num_entries(num_entries);
144 root_info->set_root_is_fake(false);
145 // Not really meaningful.
146 root_info->set_sequential(true);
147 // offset should already be set.
148 root_info->set_root(root);
149 // Not really meaningful.
150 root_info->set_blocksize(2048);
151 // Not really meaningful.
152 // root_info->set_free_list(std::string());
154 read_only = true;
155 fh.rewind(offset);
156 last_key = string();
159 bool
160 HoneyTable::read_key(std::string& key,
161 size_t& val_size,
162 bool& compressed) const
164 #ifdef DEBUGGING
166 string desc;
167 description_append(desc, key);
168 cerr << "HoneyTable::read_key(" << desc << ", ...) for path=" << path << endl;
170 #endif
171 if (!read_only) {
172 return false;
175 AssertRel(fh.get_pos(), >=, offset);
176 if (fh.get_pos() >= root) {
177 AssertEq(fh.get_pos(), root);
178 return false;
180 int ch = fh.read();
181 if (ch == EOF) return false;
183 size_t reuse = 0;
184 if (!last_key.empty()) {
185 reuse = ch;
186 ch = fh.read();
187 if (ch == EOF) {
188 throw Xapian::DatabaseError("EOF/error while reading key length",
189 errno);
192 size_t key_size = ch;
193 char buf[256];
194 fh.read(buf, key_size);
195 key.assign(last_key, 0, reuse);
196 key.append(buf, key_size);
197 last_key = key;
199 #ifdef DEBUGGING
200 if (false) {
201 std::string esc;
202 description_append(esc, key);
203 std::cout << "K:" << esc << std::endl;
205 #endif
207 int r;
209 // FIXME: rework to take advantage of buffering that's happening anyway?
210 char * p = buf;
211 for (int i = 0; i < 8; ++i) {
212 int ch2 = fh.read();
213 if (ch2 == EOF) {
214 break;
216 *p++ = ch2;
217 if (ch2 < 128) break;
219 r = p - buf;
221 const char* p = buf;
222 const char* end = p + r;
223 if (!unpack_uint(&p, end, &val_size)) {
224 throw Xapian::DatabaseError("val_size unpack_uint invalid");
226 compressed = val_size & 1;
227 val_size >>= 1;
228 Assert(p == end);
229 return true;
232 void
233 HoneyTable::read_val(std::string& val, size_t val_size) const
235 AssertRel(fh.get_pos() + val_size, <=, size_t(root));
236 val.resize(val_size);
237 fh.read(&(val[0]), val_size);
238 #ifdef DEBUGGING
239 if (false) {
240 std::string esc;
241 description_append(esc, val);
242 std::cout << "V:" << esc << std::endl;
244 #endif
247 bool
248 HoneyTable::get_exact_entry(const std::string& key, std::string* tag) const
250 if (!read_only) std::abort();
251 if (!fh.is_open()) {
252 if (fh.was_forced_closed())
253 throw_database_closed();
254 return false;
256 fh.rewind(root);
257 if (rare(key.empty()))
258 return false;
259 bool exact_match = false;
260 bool compressed;
261 size_t val_size = 0;
262 int index_type = fh.read();
263 switch (index_type) {
264 case EOF:
265 return false;
266 case 0x00: {
267 unsigned char first = key[0] - fh.read();
268 unsigned char range = fh.read();
269 if (first > range)
270 return false;
271 fh.skip(first * 4); // FIXME: pointer width
272 off_t jump = fh.read() << 24;
273 jump |= fh.read() << 16;
274 jump |= fh.read() << 8;
275 jump |= fh.read();
276 fh.rewind(jump);
277 // The jump point will be an entirely new key (because it is the
278 // first key with that initial character), and we drop in as if
279 // this was the first key so set last_key to be empty.
280 last_key = string();
281 break;
283 case 0x01: {
284 size_t j = fh.read() << 24;
285 j |= fh.read() << 16;
286 j |= fh.read() << 8;
287 j |= fh.read();
288 if (j == 0)
289 return false;
290 off_t base = fh.get_pos();
291 char kkey[SSINDEX_BINARY_CHOP_KEY_SIZE];
292 size_t kkey_len = 0;
293 size_t i = 0;
294 while (j - i > 1) {
295 size_t k = i + (j - i) / 2;
296 fh.set_pos(base + k * (SSINDEX_BINARY_CHOP_KEY_SIZE + 4));
297 fh.read(kkey, SSINDEX_BINARY_CHOP_KEY_SIZE);
298 kkey_len = 4;
299 while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
300 int r = key.compare(0, SSINDEX_BINARY_CHOP_KEY_SIZE, kkey, kkey_len);
301 if (r < 0) {
302 j = k;
303 } else {
304 i = k;
305 if (r == 0) {
306 break;
310 fh.set_pos(base + i * (SSINDEX_BINARY_CHOP_KEY_SIZE + 4));
311 fh.read(kkey, SSINDEX_BINARY_CHOP_KEY_SIZE);
312 kkey_len = 4;
313 while (kkey_len > 0 && kkey[kkey_len - 1] == '\0') --kkey_len;
314 off_t jump = fh.read() << 24;
315 jump |= fh.read() << 16;
316 jump |= fh.read() << 8;
317 jump |= fh.read();
318 fh.rewind(jump);
319 // The jump point is to the first key with prefix kkey, so will
320 // work if we set last key to kkey. Unless we're jumping to the
321 // start of the table, in which case last_key needs to be empty.
322 last_key.assign(kkey, jump == 0 ? 0 : kkey_len);
323 break;
325 case 0x02: {
326 // FIXME: If "close" just seek forwards? Or consider seeking from
327 // current index pos?
328 // off_t pos = fh.get_pos();
329 string index_key, prev_index_key;
330 make_unsigned<off_t>::type ptr = 0;
331 int cmp0 = 1;
332 while (true) {
333 int reuse = fh.read();
334 if (reuse == EOF) break;
335 int len = fh.read();
336 if (len == EOF) abort(); // FIXME
337 index_key.resize(reuse + len);
338 fh.read(&index_key[reuse], len);
340 #ifdef DEBUGGING
342 string desc;
343 description_append(desc, index_key);
344 cerr << "Index key: " << desc << endl;
346 #endif
348 cmp0 = index_key.compare(key);
349 if (cmp0 > 0) {
350 index_key = prev_index_key;
351 break;
353 char buf[8];
354 char* e = buf;
355 while (true) {
356 int b = fh.read();
357 *e++ = b;
358 if ((b & 0x80) == 0) break;
360 const char* p = buf;
361 if (!unpack_uint(&p, e, &ptr) || p != e) abort(); // FIXME
362 #ifdef DEBUGGING
364 cerr << " -> " << ptr << endl;
366 #endif
367 if (cmp0 == 0)
368 break;
369 prev_index_key = index_key;
371 #ifdef DEBUGGING
373 cerr << " cmp0 = " << cmp0 << ", going to " << ptr << endl;
375 #endif
376 fh.set_pos(ptr);
378 if (ptr != 0) {
379 last_key = index_key;
380 char buf[8];
381 int r;
383 // FIXME: rework to take advantage of buffering that's happening anyway?
384 char * p = buf;
385 for (int i = 0; i < 8; ++i) {
386 int ch2 = fh.read();
387 if (ch2 == EOF) {
388 break;
390 *p++ = ch2;
391 if (ch2 < 128) break;
393 r = p - buf;
395 const char* p = buf;
396 const char* end = p + r;
397 if (!unpack_uint(&p, end, &val_size)) {
398 throw Xapian::DatabaseError("val_size unpack_uint invalid");
400 compressed = val_size & 1;
401 val_size >>= 1;
402 Assert(p == end);
403 } else {
404 last_key = string();
407 if (cmp0 == 0) {
408 exact_match = true;
409 break;
412 #ifdef DEBUGGING
414 string desc;
415 description_append(desc, last_key);
416 cerr << "Dropped to data layer on key: " << desc << endl;
418 #endif
420 break;
422 default:
423 throw Xapian::DatabaseCorruptError("Unknown index type");
426 std::string k;
427 int cmp;
428 if (!exact_match) {
429 do {
430 if (val_size) {
431 // Skip val data we've not looked at.
432 fh.skip(val_size);
433 val_size = 0;
435 if (!read_key(k, val_size, compressed)) return false;
436 cmp = k.compare(key);
437 } while (cmp < 0);
438 if (cmp > 0) return false;
440 if (tag != NULL) {
441 if (compressed) {
442 std::string v;
443 read_val(v, val_size);
444 CompressionStream comp_stream;
445 comp_stream.decompress_start();
446 tag->resize(0);
447 if (!comp_stream.decompress_chunk(v.data(), v.size(), *tag)) {
448 // Decompression didn't complete.
449 abort();
451 } else {
452 read_val(*tag, val_size);
455 return true;
458 HoneyCursor*
459 HoneyTable::cursor_get() const
461 return new HoneyCursor(fh, root, offset);