1 /** @file honey_database.cc
2 * @brief Honey backend database class
4 /* Copyright 2015,2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "honey_database.h"
25 #include "honey_alltermslist.h"
26 #include "honey_document.h"
27 #include "honey_metadata.h"
28 #include "honey_termlist.h"
29 #include "honey_spellingwordslist.h"
30 #include "honey_valuelist.h"
32 #include "api/leafpostlist.h"
33 #include "backends/backends.h"
34 #include "xapian/error.h"
39 HoneyDatabase::throw_termlist_table_close_exception() const
41 // Either the database has been closed, or else there's no termlist table.
42 // Check if the postlist table is open to determine which is the case.
43 if (!postlist_table
.is_open())
44 HoneyTable::throw_database_closed();
45 throw Xapian::FeatureUnavailableError("Database has no termlist");
48 // Relied on below - opening to read should allow the termlist to be missing.
49 static_assert(Xapian::DB_READONLY_
& Xapian::DB_NO_TERMLIST
,
50 "Xapian::DB_READONLY_ should imply Xapian::DB_NO_TERMLIST");
52 HoneyDatabase::HoneyDatabase(const std::string
& path_
, int flags
)
53 : Xapian::Database::Internal(TRANSACTION_READONLY
),
56 docdata_table(path_
, true),
57 postlist_table(path_
, true),
58 position_table(path_
, true),
59 spelling_table(path_
, true),
60 synonym_table(path_
, true),
61 // Note: (Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST) is true, so
62 // opening to read we always allow the termlist to be missing.
63 termlist_table(path_
, true, (flags
& Xapian::DB_NO_TERMLIST
)),
64 value_manager(postlist_table
, termlist_table
)
67 auto rev
= version_file
.get_revision();
68 docdata_table
.open(flags
, version_file
.get_root(Honey::DOCDATA
), rev
);
69 postlist_table
.open(flags
, version_file
.get_root(Honey::POSTLIST
), rev
);
70 position_table
.open(flags
, version_file
.get_root(Honey::POSITION
), rev
);
71 spelling_table
.open(flags
, version_file
.get_root(Honey::SPELLING
), rev
);
72 synonym_table
.open(flags
, version_file
.get_root(Honey::SYNONYM
), rev
);
73 termlist_table
.open(flags
, version_file
.get_root(Honey::TERMLIST
), rev
);
76 HoneyDatabase::HoneyDatabase(int fd
, int flags
)
77 : Xapian::Database::Internal(TRANSACTION_READONLY
),
79 docdata_table(fd
, version_file
.get_offset(), true),
80 postlist_table(fd
, version_file
.get_offset(), true),
81 position_table(fd
, version_file
.get_offset(), true),
82 spelling_table(fd
, version_file
.get_offset(), true),
83 synonym_table(fd
, version_file
.get_offset(), true),
84 // Note: (Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST) is true, so
85 // opening to read we always allow the termlist to be missing.
86 termlist_table(fd
, version_file
.get_offset(), true,
87 (flags
& Xapian::DB_NO_TERMLIST
)),
88 value_manager(postlist_table
, termlist_table
)
91 auto rev
= version_file
.get_revision();
92 docdata_table
.open(flags
, version_file
.get_root(Honey::DOCDATA
), rev
);
93 postlist_table
.open(flags
, version_file
.get_root(Honey::POSTLIST
), rev
);
94 position_table
.open(flags
, version_file
.get_root(Honey::POSITION
), rev
);
95 spelling_table
.open(flags
, version_file
.get_root(Honey::SPELLING
), rev
);
96 synonym_table
.open(flags
, version_file
.get_root(Honey::SYNONYM
), rev
);
97 termlist_table
.open(flags
, version_file
.get_root(Honey::TERMLIST
), rev
);
100 HoneyDatabase::~HoneyDatabase()
102 delete doclen_cursor
;
106 HoneyDatabase::readahead_for_query(const Xapian::Query
& query
) const
109 // FIXME: Implement - pre-read the start of the postlist table?
113 HoneyDatabase::get_doccount() const
115 return version_file
.get_doccount();
119 HoneyDatabase::get_lastdocid() const
121 return version_file
.get_last_docid();
125 HoneyDatabase::get_total_length() const
127 return version_file
.get_total_doclen();
131 HoneyDatabase::get_doclength(Xapian::docid did
) const
134 if (usual(did
<= version_file
.get_last_docid())) {
135 if (doclen_cursor
== NULL
) {
136 doclen_cursor
= get_postlist_cursor();
138 if (doclen_chunk_reader
.find_doclength(did
)) {
139 return doclen_chunk_reader
.get_doclength();
143 // If exact is true, the desired docid is the last in this chunk.
145 doclen_cursor
->find_entry_ge(Honey::make_doclenchunk_key(did
));
146 if (doclen_chunk_reader
.update(doclen_cursor
)) {
148 return doclen_chunk_reader
.back();
149 if (doclen_chunk_reader
.find_doclength(did
)) {
150 return doclen_chunk_reader
.get_doclength();
155 string message
= "Document ID not in use: ";
157 throw Xapian::DocNotFoundError(message
);
161 HoneyDatabase::get_unique_terms(Xapian::docid did
) const
164 return HoneyTermList(this, did
).get_unique_terms();
168 HoneyDatabase::get_freqs(const string
& term
,
169 Xapian::doccount
* termfreq_ptr
,
170 Xapian::termcount
* collfreq_ptr
) const
172 postlist_table
.get_freqs(term
, termfreq_ptr
, collfreq_ptr
);
176 HoneyDatabase::get_value_freq(Xapian::valueno slot
) const
178 return value_manager
.get_value_freq(slot
);
182 HoneyDatabase::get_value_lower_bound(Xapian::valueno slot
) const
184 return value_manager
.get_value_lower_bound(slot
);
188 HoneyDatabase::get_value_upper_bound(Xapian::valueno slot
) const
190 return value_manager
.get_value_upper_bound(slot
);
194 HoneyDatabase::get_doclength_lower_bound() const
196 return version_file
.get_doclength_lower_bound();
200 HoneyDatabase::get_doclength_upper_bound() const
202 return version_file
.get_doclength_upper_bound();
206 HoneyDatabase::get_wdf_upper_bound(const string
& term
) const
208 // We don't store per-term wdf upper bounds currently, only a per-database
209 // wdf bound. However, the collection frequency of the term provides a
210 // second upper bound (since collection frequency is the sum of the wdf and
211 // wdf >= 0), so pick the tighter of these bounds.
212 Xapian::termcount wdf_bound
= version_file
.get_wdf_upper_bound();
213 // It's unlikely wdf is always 0, but when it is there's no need to check
214 // the collection frequency.
215 if (usual(wdf_bound
!= 0)) {
216 Xapian::termcount coll_freq
;
217 get_freqs(term
, NULL
, &coll_freq
);
218 if (coll_freq
< wdf_bound
) {
219 wdf_bound
= coll_freq
;
226 HoneyDatabase::term_exists(const string
& term
) const
229 return HoneyDatabase::get_doccount() != 0;
230 return postlist_table
.term_exists(term
);
234 HoneyDatabase::has_positions() const
236 return !position_table
.empty();
240 HoneyDatabase::open_post_list(const string
& term
) const
242 return HoneyDatabase::open_leaf_post_list(term
, false);
246 HoneyDatabase::open_leaf_post_list(const string
& term
, bool need_pos
) const
250 return new HoneyAllDocsPostList(this, get_doccount());
253 return postlist_table
.open_post_list(this, term
, need_pos
);
257 HoneyDatabase::open_value_list(Xapian::valueno slot
) const
259 return new HoneyValueList(slot
, this);
263 HoneyDatabase::open_term_list(Xapian::docid did
) const
266 if (!termlist_table
.is_open())
267 throw_termlist_table_close_exception();
268 HoneyTermList
* tl
= new HoneyTermList(this, did
);
269 if (tl
->size() == 0) {
270 // It could be the document has no terms, but maybe it doesn't exist -
271 // in the latter case we ought to throw DocNotFoundError. FIXME: If
272 // the document has no terms, but does have values, we should be able
273 // to avoid this check.
275 // Put the pointer in a unique_ptr so it gets released if an exception
277 unique_ptr
<TermList
> tl_ptr(tl
);
279 // This will throw DocNotFoundError if did isn't in use.
280 (void)HoneyDatabase::get_doclength(did
);
287 HoneyDatabase::open_term_list_direct(Xapian::docid did
) const
289 // Same as open_term_list() except for MultiDatabase.
290 return HoneyDatabase::open_term_list(did
);
294 HoneyDatabase::open_allterms(const string
& prefix
) const
296 return new HoneyAllTermsList(this, prefix
);
300 HoneyDatabase::open_position_list(Xapian::docid did
, const string
& term
) const
302 return new HoneyPositionList(position_table
, did
, term
);
305 Xapian::Document::Internal
*
306 HoneyDatabase::open_document(Xapian::docid did
, bool lazy
) const
310 // This will throw DocNotFoundError if did isn't in use.
311 (void)HoneyDatabase::get_doclength(did
);
313 return new HoneyDocument(this, did
, &value_manager
, &docdata_table
);
317 HoneyDatabase::open_spelling_termlist(const string
& word
) const
319 return spelling_table
.open_termlist(word
);
323 HoneyDatabase::open_spelling_wordlist() const
325 auto cursor
= spelling_table
.cursor_get();
326 if (rare(cursor
== NULL
)) {
327 // No spelling table.
330 return new HoneySpellingWordsList(this, cursor
);
334 HoneyDatabase::get_spelling_frequency(const string
& word
) const
336 return spelling_table
.get_word_frequency(word
);
340 HoneyDatabase::add_spelling(const string
& word
, Xapian::termcount freqinc
) const
344 throw Xapian::UnimplementedError("Honey backend doesn't support update");
348 HoneyDatabase::remove_spelling(const string
& word
,
349 Xapian::termcount freqdec
) const
353 throw Xapian::UnimplementedError("Honey backend doesn't support update");
357 HoneyDatabase::open_synonym_termlist(const string
& term
) const
359 return synonym_table
.open_termlist(term
);
363 HoneyDatabase::open_synonym_keylist(const string
& prefix
) const
365 auto cursor
= synonym_table
.cursor_get();
366 if (rare(cursor
== NULL
)) {
370 return new HoneySynonymTermList(this, cursor
, prefix
);
374 HoneyDatabase::add_synonym(const string
& term
, const string
& synonym
) const
378 throw Xapian::UnimplementedError("Honey backend doesn't support update");
382 HoneyDatabase::remove_synonym(const string
& term
, const string
& synonym
) const
386 throw Xapian::UnimplementedError("Honey backend doesn't support update");
390 HoneyDatabase::clear_synonyms(const string
& term
) const
393 throw Xapian::UnimplementedError("Honey backend doesn't support update");
397 HoneyDatabase::get_metadata(const string
& key
) const
399 return postlist_table
.get_metadata(key
);
403 HoneyDatabase::open_metadata_keylist(const string
& prefix
) const
405 auto cursor
= postlist_table
.cursor_get();
406 Assert(cursor
!= NULL
);
407 return new HoneyMetadataTermList(this, cursor
, prefix
);
411 HoneyDatabase::set_metadata(const string
& key
, const string
& value
)
415 throw Xapian::UnimplementedError("Honey backend doesn't support update");
419 HoneyDatabase::reopen()
421 if (!postlist_table
.is_open())
422 HoneyTable::throw_database_closed();
427 HoneyDatabase::close()
429 docdata_table
.close(true);
430 postlist_table
.close(true);
431 position_table
.close(true);
432 spelling_table
.close(true);
433 synonym_table
.close(true);
434 termlist_table
.close(true);
438 HoneyDatabase::request_document(Xapian::docid did
) const
445 HoneyDatabase::get_revision() const
447 return version_file
.get_revision();
451 HoneyDatabase::get_uuid() const
453 return version_file
.get_uuid_string();
457 HoneyDatabase::get_backend_info(string
* path_ptr
) const
461 return BACKEND_HONEY
;
465 HoneyDatabase::get_used_docid_range(Xapian::docid
& first
,
466 Xapian::docid
& last
) const
468 auto doccount
= version_file
.get_doccount();
474 auto last_docid
= version_file
.get_last_docid();
475 if (last_docid
== doccount
) {
476 // Contiguous range starting at 1.
481 postlist_table
.get_used_docid_range(doccount
, first
, last
);
485 HoneyDatabase::get_description() const
487 string desc
= "Honey(";