1 /** @file honey_table.cc
2 * @brief HoneyTable class
4 /* Copyright (C) 2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "honey_table.h"
28 #include "honey_cursor.h"
29 #include "stringutils.h"
31 #include "unicode/description_append.h"
37 using Honey::RootInfo
;
41 size_t HoneyTable::total_index_size
= 0;
44 HoneyTable::create_and_open(int flags_
, const RootInfo
& root_info
)
46 Assert(!single_file());
48 compress_min
= root_info
.get_compress_min();
50 num_entries
= root_info
.get_num_entries();
51 root
= root_info
.get_root();
54 if (!fh
.open(path
, read_only
))
55 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable", errno
);
59 HoneyTable::open(int flags_
, const RootInfo
& root_info
, honey_revision_number_t
)
62 compress_min
= root_info
.get_compress_min();
63 num_entries
= root_info
.get_num_entries();
64 offset
= root_info
.get_offset();
65 root
= root_info
.get_root();
66 if (!single_file() && !fh
.open(path
, read_only
)) {
68 throw Xapian::DatabaseOpeningError("Failed to open HoneyTable",
75 HoneyTable::add(const std::string
& key
,
80 if (!compressed
&& compress_min
> 0 && val_size
> compress_min
) {
81 size_t compressed_size
= val_size
;
82 CompressionStream comp_stream
; // FIXME: reuse
83 const char* p
= comp_stream
.compress(val
, &compressed_size
);
85 add(key
, p
, compressed_size
, true);
91 throw Xapian::InvalidOperationError("add() on read-only HoneyTable");
92 if (key
.size() == 0 || key
.size() > HONEY_MAX_KEY_LEN
)
93 throw Xapian::InvalidArgumentError("Invalid key size: " +
96 throw Xapian::InvalidOperationError("New key <= previous key");
97 off_t index_pos
= fh
.get_pos();
98 if (!last_key
.empty()) {
99 size_t reuse
= common_prefix_length(last_key
, key
);
100 fh
.write(static_cast<unsigned char>(reuse
));
101 fh
.write(static_cast<unsigned char>(key
.size() - reuse
));
102 fh
.write(key
.data() + reuse
, key
.size() - reuse
);
104 fh
.write(static_cast<unsigned char>(key
.size()));
105 fh
.write(key
.data(), key
.size());
109 // For an array index, the index point is right before the complete key.
110 if (!last_key
.empty()) ++index_pos
;
111 #elif defined SSINDEX_BINARY_CHOP
112 // For a binary chop index, the index point is before the key info - the
113 // index key must have the same N first bytes as the previous key, where N
114 // >= the keep length.
115 #elif defined SSINDEX_SKIPLIST
116 // For a skiplist index, the index provides the full key, so the index
117 // point is after the key at the level below.
118 index_pos
= fh
.get_pos();
120 # error "SSINDEX type not specified"
122 index
.maybe_add_entry(key
, index_pos
);
124 // Encode "compressed?" flag in bottom bit.
125 // FIXME: Don't do this if a table is uncompressed? That saves a byte
126 // for each item where the extra bit pushes the length up by a byte.
127 size_t val_size_enc
= (val_size
<< 1) | compressed
;
129 pack_uint(val_len
, val_size_enc
);
130 // FIXME: pass together so we can potentially writev() both?
131 fh
.write(val_len
.data(), val_len
.size());
132 fh
.write(val
, val_size
);
137 HoneyTable::commit(honey_revision_number_t
, RootInfo
* root_info
)
140 throw Xapian::InvalidOperationError("root not set");
142 root_info
->set_level(1); // FIXME: number of index levels
143 root_info
->set_num_entries(num_entries
);
144 root_info
->set_root_is_fake(false);
145 // Not really meaningful.
146 root_info
->set_sequential(true);
147 // offset should already be set.
148 root_info
->set_root(root
);
149 // Not really meaningful.
150 root_info
->set_blocksize(2048);
151 // Not really meaningful.
152 // root_info->set_free_list(std::string());
160 HoneyTable::read_key(std::string
& key
,
162 bool& compressed
) const
167 description_append(desc
, key
);
168 cerr
<< "HoneyTable::read_key(" << desc
<< ", ...) for path=" << path
<< endl
;
175 AssertRel(fh
.get_pos(), >=, offset
);
176 if (fh
.get_pos() >= root
) {
177 AssertEq(fh
.get_pos(), root
);
181 if (ch
== EOF
) return false;
184 if (!last_key
.empty()) {
188 throw Xapian::DatabaseError("EOF/error while reading key length",
192 size_t key_size
= ch
;
194 fh
.read(buf
, key_size
);
195 key
.assign(last_key
, 0, reuse
);
196 key
.append(buf
, key_size
);
202 description_append(esc
, key
);
203 std::cout
<< "K:" << esc
<< std::endl
;
209 // FIXME: rework to take advantage of buffering that's happening anyway?
211 for (int i
= 0; i
< 8; ++i
) {
217 if (ch2
< 128) break;
222 const char* end
= p
+ r
;
223 if (!unpack_uint(&p
, end
, &val_size
)) {
224 throw Xapian::DatabaseError("val_size unpack_uint invalid");
226 compressed
= val_size
& 1;
233 HoneyTable::read_val(std::string
& val
, size_t val_size
) const
235 AssertRel(fh
.get_pos() + val_size
, <=, size_t(root
));
236 val
.resize(val_size
);
237 fh
.read(&(val
[0]), val_size
);
241 description_append(esc
, val
);
242 std::cout
<< "V:" << esc
<< std::endl
;
248 HoneyTable::get_exact_entry(const std::string
& key
, std::string
* tag
) const
250 if (!read_only
) std::abort();
252 if (fh
.was_forced_closed())
253 throw_database_closed();
257 if (rare(key
.empty()))
259 bool exact_match
= false;
262 int index_type
= fh
.read();
263 switch (index_type
) {
267 unsigned char first
= key
[0] - fh
.read();
268 unsigned char range
= fh
.read();
271 fh
.skip(first
* 4); // FIXME: pointer width
272 off_t jump
= fh
.read() << 24;
273 jump
|= fh
.read() << 16;
274 jump
|= fh
.read() << 8;
277 // The jump point will be an entirely new key (because it is the
278 // first key with that initial character), and we drop in as if
279 // this was the first key so set last_key to be empty.
284 size_t j
= fh
.read() << 24;
285 j
|= fh
.read() << 16;
290 off_t base
= fh
.get_pos();
291 char kkey
[SSINDEX_BINARY_CHOP_KEY_SIZE
];
295 size_t k
= i
+ (j
- i
) / 2;
296 fh
.set_pos(base
+ k
* (SSINDEX_BINARY_CHOP_KEY_SIZE
+ 4));
297 fh
.read(kkey
, SSINDEX_BINARY_CHOP_KEY_SIZE
);
299 while (kkey_len
> 0 && kkey
[kkey_len
- 1] == '\0') --kkey_len
;
300 int r
= key
.compare(0, SSINDEX_BINARY_CHOP_KEY_SIZE
, kkey
, kkey_len
);
310 fh
.set_pos(base
+ i
* (SSINDEX_BINARY_CHOP_KEY_SIZE
+ 4));
311 fh
.read(kkey
, SSINDEX_BINARY_CHOP_KEY_SIZE
);
313 while (kkey_len
> 0 && kkey
[kkey_len
- 1] == '\0') --kkey_len
;
314 off_t jump
= fh
.read() << 24;
315 jump
|= fh
.read() << 16;
316 jump
|= fh
.read() << 8;
319 // The jump point is to the first key with prefix kkey, so will
320 // work if we set last key to kkey. Unless we're jumping to the
321 // start of the table, in which case last_key needs to be empty.
322 last_key
.assign(kkey
, jump
== 0 ? 0 : kkey_len
);
326 // FIXME: If "close" just seek forwards? Or consider seeking from
327 // current index pos?
328 // off_t pos = fh.get_pos();
329 string index_key
, prev_index_key
;
330 make_unsigned
<off_t
>::type ptr
= 0;
333 int reuse
= fh
.read();
334 if (reuse
== EOF
) break;
336 if (len
== EOF
) abort(); // FIXME
337 index_key
.resize(reuse
+ len
);
338 fh
.read(&index_key
[reuse
], len
);
343 description_append(desc
, index_key
);
344 cerr
<< "Index key: " << desc
<< endl
;
348 cmp0
= index_key
.compare(key
);
350 index_key
= prev_index_key
;
358 if ((b
& 0x80) == 0) break;
361 if (!unpack_uint(&p
, e
, &ptr
) || p
!= e
) abort(); // FIXME
364 cerr
<< " -> " << ptr
<< endl
;
369 prev_index_key
= index_key
;
373 cerr
<< " cmp0 = " << cmp0
<< ", going to " << ptr
<< endl
;
379 last_key
= index_key
;
383 // FIXME: rework to take advantage of buffering that's happening anyway?
385 for (int i
= 0; i
< 8; ++i
) {
391 if (ch2
< 128) break;
396 const char* end
= p
+ r
;
397 if (!unpack_uint(&p
, end
, &val_size
)) {
398 throw Xapian::DatabaseError("val_size unpack_uint invalid");
400 compressed
= val_size
& 1;
415 description_append(desc
, last_key
);
416 cerr
<< "Dropped to data layer on key: " << desc
<< endl
;
423 throw Xapian::DatabaseCorruptError("Unknown index type");
431 // Skip val data we've not looked at.
435 if (!read_key(k
, val_size
, compressed
)) return false;
436 cmp
= k
.compare(key
);
438 if (cmp
> 0) return false;
443 read_val(v
, val_size
);
444 CompressionStream comp_stream
;
445 comp_stream
.decompress_start();
447 if (!comp_stream
.decompress_chunk(v
.data(), v
.size(), *tag
)) {
448 // Decompression didn't complete.
452 read_val(*tag
, val_size
);
459 HoneyTable::cursor_get() const
461 return new HoneyCursor(fh
, root
, offset
);