1 /** @file honey_table.cc
2 * @brief HoneyTable class
4 /* Copyright (C) 2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "honey_table.h"
27 #include "honey_cursor.h"
28 #include "stringutils.h"
30 #include "unicode/description_append.h"
36 using Honey::RootInfo
;
41 HoneyTable::create_and_open(int flags_
, const RootInfo
& root_info
)
43 Assert(!single_file());
45 compress_min
= root_info
.get_compress_min();
47 num_entries
= root_info
.get_num_entries();
48 root
= root_info
.get_root();
51 if (!store
.open(path
, read_only
))
52 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable", errno
);
56 HoneyTable::open(int flags_
, const RootInfo
& root_info
, honey_revision_number_t
)
59 compress_min
= root_info
.get_compress_min();
60 num_entries
= root_info
.get_num_entries();
61 offset
= root_info
.get_offset();
62 root
= root_info
.get_root();
63 if (!single_file() && !store
.open(path
, read_only
)) {
65 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable",
68 store
.set_pos(offset
);
72 HoneyTable::add(const std::string
& key
,
77 if (!compressed
&& compress_min
> 0 && val_size
> compress_min
) {
78 size_t compressed_size
= val_size
;
79 CompressionStream comp_stream
; // FIXME: reuse
80 const char* p
= comp_stream
.compress(val
, &compressed_size
);
82 add(key
, p
, compressed_size
, true);
88 throw Xapian::InvalidOperationError("add() on read-only HoneyTable");
89 if (key
.size() == 0 || key
.size() > HONEY_MAX_KEY_LEN
)
90 throw Xapian::InvalidArgumentError("Invalid key size: " +
93 throw Xapian::InvalidOperationError("New key <= previous key");
94 size_t reuse
= common_prefix_length(last_key
, key
);
98 index
.maybe_add_entry(key
, store
.get_pos());
100 #elif defined SSINDEX_BINARY_CHOP
101 // For a binary chop index, the index point is before the key info - the
102 // index key must have the same N first bytes as the previous key, where
103 // N >= the keep length.
104 index
.maybe_add_entry(key
, store
.get_pos());
105 #elif defined SSINDEX_SKIPLIST
108 # error "SSINDEX type not specified"
111 store
.write(static_cast<unsigned char>(reuse
));
112 store
.write(static_cast<unsigned char>(key
.size() - reuse
));
113 store
.write(key
.data() + reuse
, key
.size() - reuse
);
116 #ifdef SSINDEX_SKIPLIST
117 // For a skiplist index, the index provides the full key, so the index
118 // point is after the key at the level below.
119 index
.maybe_add_entry(key
, store
.get_pos());
122 // Encode "compressed?" flag in bottom bit.
123 // FIXME: Don't do this if a table is uncompressed? That saves a byte
124 // for each item where the extra bit pushes the length up by a byte.
125 size_t val_size_enc
= (val_size
<< 1) | compressed
;
127 pack_uint(val_len
, val_size_enc
);
128 // FIXME: pass together so we can potentially writev() both?
129 store
.write(val_len
.data(), val_len
.size());
130 store
.write(val
, val_size
);
135 HoneyTable::commit(honey_revision_number_t
, RootInfo
* root_info
)
138 throw Xapian::InvalidOperationError("root not set");
140 root_info
->set_num_entries(num_entries
);
141 // offset should already be set.
142 root_info
->set_root(root
);
143 // Not really meaningful.
144 // root_info->set_free_list(std::string());
147 store
.rewind(offset
);
152 HoneyTable::read_key(std::string
& key
,
154 bool& compressed
) const
159 description_append(desc
, key
);
160 cerr
<< "HoneyTable::read_key(" << desc
<< ", ...) for path=" << path
<< endl
;
167 AssertRel(store
.get_pos(), >=, offset
);
168 if (store
.get_pos() >= root
) {
169 AssertEq(store
.get_pos(), root
);
172 int ch
= store
.read();
173 if (ch
== EOF
) return false;
176 if (reuse
> last_key
.size()) {
177 throw Xapian::DatabaseCorruptError("Reuse > previous key size");
181 throw Xapian::DatabaseError("EOF/error while reading key length",
184 size_t key_size
= ch
;
186 store
.read(buf
, key_size
);
187 key
.assign(last_key
, 0, reuse
);
188 key
.append(buf
, key_size
);
194 description_append(esc
, key
);
195 std::cout
<< "K:" << esc
<< std::endl
;
201 // FIXME: rework to take advantage of buffering that's happening anyway?
203 for (int i
= 0; i
< 8; ++i
) {
204 int ch2
= store
.read();
209 if (ch2
< 128) break;
214 const char* end
= p
+ r
;
215 if (!unpack_uint(&p
, end
, &val_size
)) {
216 throw Xapian::DatabaseError("val_size unpack_uint invalid");
218 compressed
= val_size
& 1;
225 HoneyTable::read_val(std::string
& val
, size_t val_size
) const
227 AssertRel(store
.get_pos() + val_size
, <=, size_t(root
));
228 val
.resize(val_size
);
229 store
.read(&(val
[0]), val_size
);
233 description_append(esc
, val
);
234 std::cout
<< "V:" << esc
<< std::endl
;
240 HoneyTable::get_exact_entry(const std::string
& key
, std::string
* tag
) const
242 if (!read_only
) std::abort();
243 if (!store
.is_open()) {
244 if (store
.was_forced_closed())
245 throw_database_closed();
249 if (rare(key
.empty()))
251 bool exact_match
= false;
252 bool compressed
= false;
254 int index_type
= store
.read();
255 switch (index_type
) {
259 unsigned char first
= key
[0] - store
.read();
260 unsigned char range
= store
.read();
263 store
.skip(first
* 4); // FIXME: pointer width
264 off_t jump
= store
.read_uint4_be();
266 // The jump point will be an entirely new key (because it is the
267 // first key with that initial character), and we drop in as if
268 // this was the first key so set last_key to be empty.
273 size_t j
= store
.read_uint4_be();
276 off_t base
= store
.get_pos();
277 char kkey
[SSINDEX_BINARY_CHOP_KEY_SIZE
];
281 size_t k
= i
+ (j
- i
) / 2;
282 store
.set_pos(base
+ k
* SSINDEX_BINARY_CHOP_ENTRY_SIZE
);
283 store
.read(kkey
, SSINDEX_BINARY_CHOP_KEY_SIZE
);
285 while (kkey_len
> 0 && kkey
[kkey_len
- 1] == '\0') --kkey_len
;
286 int r
= key
.compare(0, SSINDEX_BINARY_CHOP_KEY_SIZE
, kkey
, kkey_len
);
296 store
.set_pos(base
+ i
* SSINDEX_BINARY_CHOP_ENTRY_SIZE
);
297 store
.read(kkey
, SSINDEX_BINARY_CHOP_KEY_SIZE
);
299 while (kkey_len
> 0 && kkey
[kkey_len
- 1] == '\0') --kkey_len
;
300 off_t jump
= store
.read_uint4_be();
302 // The jump point is to the first key with prefix kkey, so will
303 // work if we set last key to kkey. Unless we're jumping to the
304 // start of the table, in which case last_key needs to be empty.
305 last_key
.assign(kkey
, jump
== 0 ? 0 : kkey_len
);
309 // FIXME: If "close" just seek forwards? Or consider seeking from
310 // current index pos?
311 // off_t pos = store.get_pos();
312 string index_key
, prev_index_key
;
313 make_unsigned
<off_t
>::type ptr
= 0;
316 int reuse
= store
.read();
317 if (reuse
== EOF
) break;
318 int len
= store
.read();
319 if (len
== EOF
) abort(); // FIXME
320 index_key
.resize(reuse
+ len
);
321 store
.read(&index_key
[reuse
], len
);
326 description_append(desc
, index_key
);
327 cerr
<< "Index key: " << desc
<< endl
;
331 cmp0
= index_key
.compare(key
);
333 index_key
= prev_index_key
;
339 int b
= store
.read();
341 if ((b
& 0x80) == 0) break;
344 if (!unpack_uint(&p
, e
, &ptr
) || p
!= e
) abort(); // FIXME
347 cerr
<< " -> " << ptr
<< endl
;
352 prev_index_key
= index_key
;
356 cerr
<< " cmp0 = " << cmp0
<< ", going to " << ptr
<< endl
;
362 last_key
= index_key
;
366 // FIXME: rework to take advantage of buffering that's happening anyway?
368 for (int i
= 0; i
< 8; ++i
) {
369 int ch2
= store
.read();
374 if (ch2
< 128) break;
379 const char* end
= p
+ r
;
380 if (!unpack_uint(&p
, end
, &val_size
)) {
381 throw Xapian::DatabaseError("val_size unpack_uint invalid");
383 compressed
= val_size
& 1;
398 description_append(desc
, last_key
);
399 cerr
<< "Dropped to data layer on key: " << desc
<< endl
;
406 string m
= "HoneyTable: Unknown index type ";
407 m
+= str(index_type
);
408 throw Xapian::DatabaseCorruptError(m
);
417 // Skip val data we've not looked at.
418 store
.skip(val_size
);
421 if (!read_key(k
, val_size
, compressed
)) return false;
422 cmp
= k
.compare(key
);
424 if (cmp
> 0) return false;
429 read_val(v
, val_size
);
430 CompressionStream comp_stream
;
431 comp_stream
.decompress_start();
433 if (!comp_stream
.decompress_chunk(v
.data(), v
.size(), *tag
)) {
434 // Decompression didn't complete.
438 read_val(*tag
, val_size
);
445 HoneyTable::cursor_get() const
447 return new HoneyCursor(store
, root
, offset
);