[honey] Fix reading doclength off end of db
[xapian.git] / xapian-core / backends / honey / honey_database.cc
blobdf6a642d6454f161d58c523b8233e7fe76fea779
1 /** @file honey_database.cc
2 * @brief Honey backend database class
3 */
4 /* Copyright 2015,2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "honey_database.h"
25 #include "honey_alltermslist.h"
26 #include "honey_document.h"
27 #include "honey_metadata.h"
28 #include "honey_termlist.h"
29 #include "honey_spellingwordslist.h"
30 #include "honey_valuelist.h"
32 #include "api/leafpostlist.h"
33 #include "backends/backends.h"
34 #include "xapian/error.h"
36 using namespace std;
38 void
39 HoneyDatabase::throw_termlist_table_close_exception() const
41 // Either the database has been closed, or else there's no termlist table.
42 // Check if the postlist table is open to determine which is the case.
43 if (!postlist_table.is_open())
44 HoneyTable::throw_database_closed();
45 throw Xapian::FeatureUnavailableError("Database has no termlist");
48 // Relied on below - opening to read should allow the termlist to be missing.
49 static_assert(Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST,
50 "Xapian::DB_READONLY_ should imply Xapian::DB_NO_TERMLIST");
52 HoneyDatabase::HoneyDatabase(const std::string& path_, int flags)
53 : Xapian::Database::Internal(TRANSACTION_READONLY),
54 path(path_),
55 version_file(path_),
56 docdata_table(path_, true),
57 postlist_table(path_, true),
58 position_table(path_, true),
59 spelling_table(path_, true),
60 synonym_table(path_, true),
61 // Note: (Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST) is true, so
62 // opening to read we always allow the termlist to be missing.
63 termlist_table(path_, true, (flags & Xapian::DB_NO_TERMLIST)),
64 value_manager(postlist_table, termlist_table)
66 version_file.read();
67 auto rev = version_file.get_revision();
68 docdata_table.open(flags, version_file.get_root(Honey::DOCDATA), rev);
69 postlist_table.open(flags, version_file.get_root(Honey::POSTLIST), rev);
70 position_table.open(flags, version_file.get_root(Honey::POSITION), rev);
71 spelling_table.open(flags, version_file.get_root(Honey::SPELLING), rev);
72 synonym_table.open(flags, version_file.get_root(Honey::SYNONYM), rev);
73 termlist_table.open(flags, version_file.get_root(Honey::TERMLIST), rev);
76 HoneyDatabase::HoneyDatabase(int fd, int flags)
77 : Xapian::Database::Internal(TRANSACTION_READONLY),
78 version_file(fd),
79 docdata_table(fd, version_file.get_offset(), true),
80 postlist_table(fd, version_file.get_offset(), true),
81 position_table(fd, version_file.get_offset(), true),
82 spelling_table(fd, version_file.get_offset(), true),
83 synonym_table(fd, version_file.get_offset(), true),
84 // Note: (Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST) is true, so
85 // opening to read we always allow the termlist to be missing.
86 termlist_table(fd, version_file.get_offset(), true,
87 (flags & Xapian::DB_NO_TERMLIST)),
88 value_manager(postlist_table, termlist_table)
90 version_file.read();
91 auto rev = version_file.get_revision();
92 docdata_table.open(flags, version_file.get_root(Honey::DOCDATA), rev);
93 postlist_table.open(flags, version_file.get_root(Honey::POSTLIST), rev);
94 position_table.open(flags, version_file.get_root(Honey::POSITION), rev);
95 spelling_table.open(flags, version_file.get_root(Honey::SPELLING), rev);
96 synonym_table.open(flags, version_file.get_root(Honey::SYNONYM), rev);
97 termlist_table.open(flags, version_file.get_root(Honey::TERMLIST), rev);
100 HoneyDatabase::~HoneyDatabase()
102 delete doclen_cursor;
105 void
106 HoneyDatabase::readahead_for_query(const Xapian::Query& query) const
108 (void)query;
109 // FIXME: Implement - pre-read the start of the postlist table?
112 Xapian::doccount
113 HoneyDatabase::get_doccount() const
115 return version_file.get_doccount();
118 Xapian::docid
119 HoneyDatabase::get_lastdocid() const
121 return version_file.get_last_docid();
124 Xapian::totallength
125 HoneyDatabase::get_total_length() const
127 return version_file.get_total_doclen();
130 Xapian::termcount
131 HoneyDatabase::get_doclength(Xapian::docid did) const
133 Assert(did != 0);
134 if (usual(did <= version_file.get_last_docid())) {
135 if (doclen_cursor == NULL) {
136 doclen_cursor = get_postlist_cursor();
137 } else {
138 if (doclen_chunk_reader.find_doclength(did)) {
139 return doclen_chunk_reader.get_doclength();
143 // If exact is true, the desired docid is the last in this chunk.
144 bool exact =
145 doclen_cursor->find_entry_ge(Honey::make_doclenchunk_key(did));
146 if (doclen_chunk_reader.update(doclen_cursor)) {
147 if (exact)
148 return doclen_chunk_reader.back();
149 if (doclen_chunk_reader.find_doclength(did)) {
150 return doclen_chunk_reader.get_doclength();
155 string message = "Document ID not in use: ";
156 message += str(did);
157 throw Xapian::DocNotFoundError(message);
160 Xapian::termcount
161 HoneyDatabase::get_unique_terms(Xapian::docid did) const
163 Assert(did != 0);
164 return HoneyTermList(this, did).get_unique_terms();
167 void
168 HoneyDatabase::get_freqs(const string& term,
169 Xapian::doccount* termfreq_ptr,
170 Xapian::termcount* collfreq_ptr) const
172 postlist_table.get_freqs(term, termfreq_ptr, collfreq_ptr);
175 Xapian::doccount
176 HoneyDatabase::get_value_freq(Xapian::valueno slot) const
178 return value_manager.get_value_freq(slot);
181 string
182 HoneyDatabase::get_value_lower_bound(Xapian::valueno slot) const
184 return value_manager.get_value_lower_bound(slot);
187 string
188 HoneyDatabase::get_value_upper_bound(Xapian::valueno slot) const
190 return value_manager.get_value_upper_bound(slot);
193 Xapian::termcount
194 HoneyDatabase::get_doclength_lower_bound() const
196 return version_file.get_doclength_lower_bound();
199 Xapian::termcount
200 HoneyDatabase::get_doclength_upper_bound() const
202 return version_file.get_doclength_upper_bound();
205 Xapian::termcount
206 HoneyDatabase::get_wdf_upper_bound(const string& term) const
208 // We don't store per-term wdf upper bounds currently, only a per-database
209 // wdf bound. However, the collection frequency of the term provides a
210 // second upper bound (since collection frequency is the sum of the wdf and
211 // wdf >= 0), so pick the tighter of these bounds.
212 Xapian::termcount wdf_bound = version_file.get_wdf_upper_bound();
213 // It's unlikely wdf is always 0, but when it is there's no need to check
214 // the collection frequency.
215 if (usual(wdf_bound != 0)) {
216 Xapian::termcount coll_freq;
217 get_freqs(term, NULL, &coll_freq);
218 if (coll_freq < wdf_bound) {
219 wdf_bound = coll_freq;
222 return wdf_bound;
225 bool
226 HoneyDatabase::term_exists(const string& term) const
228 if (term.empty())
229 return HoneyDatabase::get_doccount() != 0;
230 return postlist_table.term_exists(term);
233 bool
234 HoneyDatabase::has_positions() const
236 return !position_table.empty();
239 PostList*
240 HoneyDatabase::open_post_list(const string& term) const
242 return HoneyDatabase::open_leaf_post_list(term, false);
245 LeafPostList*
246 HoneyDatabase::open_leaf_post_list(const string& term, bool need_pos) const
248 if (term.empty()) {
249 Assert(!need_pos);
250 return new HoneyAllDocsPostList(this, get_doccount());
253 return postlist_table.open_post_list(this, term, need_pos);
256 ValueList*
257 HoneyDatabase::open_value_list(Xapian::valueno slot) const
259 return new HoneyValueList(slot, this);
262 TermList*
263 HoneyDatabase::open_term_list(Xapian::docid did) const
265 Assert(did != 0);
266 if (!termlist_table.is_open())
267 throw_termlist_table_close_exception();
268 HoneyTermList* tl = new HoneyTermList(this, did);
269 if (tl->size() == 0) {
270 // It could be the document has no terms, but maybe it doesn't exist -
271 // in the latter case we ought to throw DocNotFoundError. FIXME: If
272 // the document has no terms, but does have values, we should be able
273 // to avoid this check.
275 // Put the pointer in a unique_ptr so it gets released if an exception
276 // is thrown.
277 unique_ptr<TermList> tl_ptr(tl);
279 // This will throw DocNotFoundError if did isn't in use.
280 (void)HoneyDatabase::get_doclength(did);
281 tl_ptr.release();
283 return tl;
286 TermList*
287 HoneyDatabase::open_term_list_direct(Xapian::docid did) const
289 // Same as open_term_list() except for MultiDatabase.
290 return HoneyDatabase::open_term_list(did);
293 TermList*
294 HoneyDatabase::open_allterms(const string& prefix) const
296 return new HoneyAllTermsList(this, prefix);
299 PositionList*
300 HoneyDatabase::open_position_list(Xapian::docid did, const string& term) const
302 return new HoneyPositionList(position_table, did, term);
305 Xapian::Document::Internal*
306 HoneyDatabase::open_document(Xapian::docid did, bool lazy) const
308 Assert(did != 0);
309 if (!lazy) {
310 // This will throw DocNotFoundError if did isn't in use.
311 (void)HoneyDatabase::get_doclength(did);
313 return new HoneyDocument(this, did, &value_manager, &docdata_table);
316 TermList*
317 HoneyDatabase::open_spelling_termlist(const string& word) const
319 return spelling_table.open_termlist(word);
322 TermList*
323 HoneyDatabase::open_spelling_wordlist() const
325 auto cursor = spelling_table.cursor_get();
326 if (rare(cursor == NULL)) {
327 // No spelling table.
328 return NULL;
330 return new HoneySpellingWordsList(this, cursor);
333 Xapian::doccount
334 HoneyDatabase::get_spelling_frequency(const string& word) const
336 return spelling_table.get_word_frequency(word);
339 void
340 HoneyDatabase::add_spelling(const string& word, Xapian::termcount freqinc) const
342 (void)word;
343 (void)freqinc;
344 throw Xapian::UnimplementedError("Honey backend doesn't support update");
347 Xapian::termcount
348 HoneyDatabase::remove_spelling(const string& word,
349 Xapian::termcount freqdec) const
351 (void)word;
352 (void)freqdec;
353 throw Xapian::UnimplementedError("Honey backend doesn't support update");
356 TermList*
357 HoneyDatabase::open_synonym_termlist(const string& term) const
359 return synonym_table.open_termlist(term);
362 TermList*
363 HoneyDatabase::open_synonym_keylist(const string& prefix) const
365 auto cursor = synonym_table.cursor_get();
366 if (rare(cursor == NULL)) {
367 // No synonym table.
368 return NULL;
370 return new HoneySynonymTermList(this, cursor, prefix);
373 void
374 HoneyDatabase::add_synonym(const string& term, const string& synonym) const
376 (void)term;
377 (void)synonym;
378 throw Xapian::UnimplementedError("Honey backend doesn't support update");
381 void
382 HoneyDatabase::remove_synonym(const string& term, const string& synonym) const
384 (void)term;
385 (void)synonym;
386 throw Xapian::UnimplementedError("Honey backend doesn't support update");
389 void
390 HoneyDatabase::clear_synonyms(const string& term) const
392 (void)term;
393 throw Xapian::UnimplementedError("Honey backend doesn't support update");
396 string
397 HoneyDatabase::get_metadata(const string& key) const
399 return postlist_table.get_metadata(key);
402 TermList*
403 HoneyDatabase::open_metadata_keylist(const string& prefix) const
405 auto cursor = postlist_table.cursor_get();
406 Assert(cursor != NULL);
407 return new HoneyMetadataTermList(this, cursor, prefix);
410 void
411 HoneyDatabase::set_metadata(const string& key, const string& value)
413 (void)key;
414 (void)value;
415 throw Xapian::UnimplementedError("Honey backend doesn't support update");
418 bool
419 HoneyDatabase::reopen()
421 if (!postlist_table.is_open())
422 HoneyTable::throw_database_closed();
423 return false;
426 void
427 HoneyDatabase::close()
429 docdata_table.close(true);
430 postlist_table.close(true);
431 position_table.close(true);
432 spelling_table.close(true);
433 synonym_table.close(true);
434 termlist_table.close(true);
437 void
438 HoneyDatabase::request_document(Xapian::docid did) const
440 Assert(did != 0);
441 (void)did; // FIXME
444 Xapian::rev
445 HoneyDatabase::get_revision() const
447 return version_file.get_revision();
450 string
451 HoneyDatabase::get_uuid() const
453 return version_file.get_uuid_string();
457 HoneyDatabase::get_backend_info(string* path_ptr) const
459 if (path_ptr)
460 *path_ptr = path;
461 return BACKEND_HONEY;
464 void
465 HoneyDatabase::get_used_docid_range(Xapian::docid& first,
466 Xapian::docid& last) const
468 auto doccount = version_file.get_doccount();
469 if (doccount == 0) {
470 // Empty database.
471 first = last = 0;
472 return;
474 auto last_docid = version_file.get_last_docid();
475 if (last_docid == doccount) {
476 // Contiguous range starting at 1.
477 first = 1;
478 last = last_docid;
479 return;
481 postlist_table.get_used_docid_range(doccount, first, last);
484 string
485 HoneyDatabase::get_description() const
487 string desc = "Honey(";
488 desc += path;
489 desc += ')';
490 return desc;