1 /** @file multi_database.cc
2 * @brief Sharded database backend
4 /* Copyright (C) 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "multi_database.h"
23 #include "backends/backends.h"
24 #include "backends/multi.h"
25 #include "expand/ortermlist.h"
26 #include "expand/termlistmerger.h"
27 #include "multi_alltermslist.h"
28 #include "multi_postlist.h"
29 #include "multi_termlist.h"
30 #include "multi_valuelist.h"
36 MultiDatabase::size_type
37 MultiDatabase::size() const
43 MultiDatabase::reopen()
46 for (auto&& shard
: shards
) {
47 if (shard
->reopen()) {
55 MultiDatabase::close()
57 for (auto&& shard
: shards
) {
63 MultiDatabase::open_post_list(const string
& term
) const
65 PostList
** postlists
= new PostList
*[shards
.size()];
68 for (auto&& shard
: shards
) {
69 postlists
[count
] = shard
->open_post_list(term
);
72 return new MultiPostList(count
, postlists
);
75 delete postlists
[--count
];
82 MultiDatabase::open_leaf_post_list(const string
&, bool) const
84 // This should never get called.
90 MultiDatabase::open_term_list(Xapian::docid did
) const
92 return new MultiTermList(this, MultiDatabase::open_term_list_direct(did
));
96 MultiDatabase::open_term_list_direct(Xapian::docid did
) const
98 size_t n_shards
= shards
.size();
99 auto shard
= shards
[shard_number(did
, n_shards
)];
100 Xapian::docid shard_did
= shard_docid(did
, n_shards
);
101 return shard
->open_term_list(shard_did
);
105 MultiDatabase::open_allterms(const string
& prefix
) const
108 TermList
** termlists
= new TermList
*[shards
.size()];
110 for (auto&& shard
: shards
) {
111 termlists
[count
] = shard
->open_allterms(prefix
);
114 return new MultiAllTermsList(count
, termlists
);
117 delete termlists
[--count
];
124 MultiDatabase::has_positions() const
126 for (auto&& shard
: shards
) {
127 if (shard
->has_positions()) {
135 MultiDatabase::open_position_list(Xapian::docid did
, const string
& term
) const
137 auto n_shards
= shards
.size();
138 auto shard
= shards
[shard_number(did
, n_shards
)];
139 auto shard_did
= shard_docid(did
, n_shards
);
140 return shard
->open_position_list(shard_did
, term
);
144 MultiDatabase::get_doccount() const
146 Xapian::doccount result
= 0;
147 for (auto&& shard
: shards
) {
148 auto old_result
= result
;
149 result
+= shard
->get_doccount();
150 if (result
< old_result
)
151 throw Xapian::DatabaseError("doccount overflowed!");
157 MultiDatabase::get_lastdocid() const
159 Xapian::docid result
= 0;
160 auto n_shards
= shards
.size();
161 for (size_t shard
= 0; shard
!= n_shards
; ++shard
) {
162 Xapian::docid shard_lastdocid
= shards
[shard
]->get_lastdocid();
163 if (shard_lastdocid
== 0) {
164 // This shard is empty, so doesn't influence lastdocid for the
165 // combined database.
168 result
= max(result
, unshard(shard_lastdocid
, shard
, n_shards
));
174 MultiDatabase::get_total_length() const
176 Xapian::totallength result
= 0;
177 for (auto&& shard
: shards
) {
178 auto old_result
= result
;
179 result
+= shard
->get_total_length();
180 if (result
< old_result
)
181 throw Xapian::DatabaseError("Total document length overflowed!");
187 MultiDatabase::get_freqs(const string
& term
,
188 Xapian::doccount
* tf_ptr
,
189 Xapian::termcount
* cf_ptr
) const
191 Assert(!term
.empty());
193 Xapian::doccount shard_tf
;
194 Xapian::doccount
* shard_tf_ptr
= tf_ptr
? &shard_tf
: NULL
;
195 Xapian::doccount total_tf
= 0;
197 Xapian::termcount shard_cf
;
198 Xapian::termcount
* shard_cf_ptr
= cf_ptr
? &shard_cf
: NULL
;
199 Xapian::termcount total_cf
= 0;
201 for (auto&& shard
: shards
) {
202 shard
->get_freqs(term
, shard_tf_ptr
, shard_cf_ptr
);
204 auto old_tf
= total_tf
;
205 total_tf
+= *shard_tf_ptr
;
206 if (total_tf
< old_tf
)
207 throw Xapian::DatabaseError("termfreq overflowed!");
210 auto old_cf
= total_cf
;
211 total_cf
+= *shard_cf_ptr
;
212 if (total_cf
< old_cf
)
213 throw Xapian::DatabaseError("Collection freq overflowed!");
225 MultiDatabase::get_value_freq(Xapian::valueno slot
) const
227 Xapian::termcount result
= 0;
228 for (auto&& shard
: shards
) {
229 auto old_result
= result
;
230 result
+= shard
->get_value_freq(slot
);
231 if (result
< old_result
)
232 throw Xapian::DatabaseError("Value freq overflowed!");
238 MultiDatabase::get_value_lower_bound(Xapian::valueno slot
) const
241 for (auto&& shard
: shards
) {
242 string shard_result
= shard
->get_value_lower_bound(slot
);
243 if (shard_result
.empty())
245 if (result
.empty() || shard_result
< result
)
246 result
= std::move(shard_result
);
252 MultiDatabase::get_value_upper_bound(Xapian::valueno slot
) const
255 for (auto&& shard
: shards
) {
256 string shard_result
= shard
->get_value_upper_bound(slot
);
257 if (shard_result
> result
)
258 result
= std::move(shard_result
);
264 MultiDatabase::get_doclength_lower_bound() const
266 // We want the smallest answer from amongst the shards, except that 0 means
267 // that all documents have length 0 (including the special case of there
268 // being no documents), so any non-zero answer should "beat" 0. To achieve
269 // this we find the *maximum* after negating each of the values (which
270 // since Xapian::termcount is an unsigned type leaves 0 alone but flips the
271 // order of all other values), then negate the answer again at the end.
272 static_assert(std::is_unsigned
<Xapian::termcount
>::value
,
273 "Unsigned type required");
274 Xapian::termcount result
= 0;
275 for (auto&& shard
: shards
) {
276 Xapian::termcount shard_result
= -shard
->get_doclength_lower_bound();
277 result
= max(result
, shard_result
);
283 MultiDatabase::get_doclength_upper_bound() const
285 Xapian::termcount result
= 0;
286 for (auto&& shard
: shards
) {
287 result
= max(result
, shard
->get_doclength_upper_bound());
293 MultiDatabase::get_wdf_upper_bound(const string
& term
) const
295 Assert(!term
.empty());
297 Xapian::termcount result
= 0;
298 for (auto&& shard
: shards
) {
299 result
= max(result
, shard
->get_wdf_upper_bound(term
));
305 MultiDatabase::open_value_list(Xapian::valueno slot
) const
307 SubValueList
** valuelists
= new SubValueList
*[shards
.size()];
310 for (auto&& shard
: shards
) {
311 ValueList
* vl
= shard
->open_value_list(slot
);
312 valuelists
[count
] = new SubValueList(vl
, count
);
315 return new MultiValueList(count
, valuelists
, slot
);
318 delete valuelists
[--count
];
319 delete [] valuelists
;
325 MultiDatabase::get_doclength(Xapian::docid did
) const
329 auto n_shards
= shards
.size();
330 auto shard
= shards
[shard_number(did
, n_shards
)];
331 auto shard_did
= shard_docid(did
, n_shards
);
332 return shard
->get_doclength(shard_did
);
336 MultiDatabase::get_unique_terms(Xapian::docid did
) const
340 auto n_shards
= shards
.size();
341 auto shard
= shards
[shard_number(did
, n_shards
)];
342 auto shard_did
= shard_docid(did
, n_shards
);
343 return shard
->get_unique_terms(shard_did
);
346 Xapian::Document::Internal
*
347 MultiDatabase::open_document(Xapian::docid did
, bool lazy
) const
351 auto n_shards
= shards
.size();
352 auto shard
= shards
[shard_number(did
, n_shards
)];
353 auto shard_did
= shard_docid(did
, n_shards
);
354 return shard
->open_document(shard_did
, lazy
);
358 MultiDatabase::term_exists(const string
& term
) const
360 for (auto&& shard
: shards
) {
361 if (shard
->term_exists(term
))
368 MultiDatabase::keep_alive()
370 for (auto&& shard
: shards
) {
376 MultiDatabase::open_spelling_termlist(const string
& word
) const
378 vector
<TermList
*> termlists
;
379 termlists
.reserve(shards
.size());
382 for (auto&& shard
: shards
) {
383 TermList
* termlist
= shard
->open_spelling_termlist(word
);
386 termlists
.push_back(termlist
);
389 return make_termlist_merger(termlists
);
391 for (auto&& termlist
: termlists
)
398 MultiDatabase::open_spelling_wordlist() const
400 vector
<TermList
*> termlists
;
401 termlists
.reserve(shards
.size());
404 for (auto&& shard
: shards
) {
405 TermList
* termlist
= shard
->open_spelling_wordlist();
408 termlists
.push_back(termlist
);
411 return make_termlist_merger
<FreqAdderOrTermList
>(termlists
);
413 for (auto&& termlist
: termlists
)
420 MultiDatabase::get_spelling_frequency(const string
& word
) const
422 Xapian::doccount result
= 0;
423 for (auto&& shard
: shards
) {
424 auto old_result
= result
;
425 result
+= shard
->get_spelling_frequency(word
);
426 if (result
< old_result
)
427 throw Xapian::DatabaseError("Spelling frequency overflowed!");
433 MultiDatabase::open_synonym_termlist(const string
& term
) const
435 vector
<TermList
*> termlists
;
436 termlists
.reserve(shards
.size());
439 for (auto&& shard
: shards
) {
440 TermList
* termlist
= shard
->open_synonym_termlist(term
);
443 termlists
.push_back(termlist
);
446 return make_termlist_merger(termlists
);
448 for (auto&& termlist
: termlists
)
455 MultiDatabase::open_synonym_keylist(const string
& prefix
) const
457 vector
<TermList
*> termlists
;
458 termlists
.reserve(shards
.size());
461 for (auto&& shard
: shards
) {
462 TermList
* termlist
= shard
->open_synonym_keylist(prefix
);
465 termlists
.push_back(termlist
);
468 return make_termlist_merger(termlists
);
470 for (auto&& termlist
: termlists
)
477 MultiDatabase::get_metadata(const string
& key
) const
479 return shards
[0]->get_metadata(key
);
483 MultiDatabase::open_metadata_keylist(const string
& prefix
) const
485 return shards
[0]->open_metadata_keylist(prefix
);
489 MultiDatabase::get_uuid() const
492 for (auto&& shard
: shards
) {
493 const string
& sub_uuid
= shard
->get_uuid();
494 // If any of the sub-databases have no uuid, we can't make a uuid for
495 // the combined database.
496 if (sub_uuid
.empty())
506 MultiDatabase::locked() const
508 for (auto&& shard
: shards
) {
509 if (shard
->locked()) {
517 MultiDatabase::write_changesets_to_fd(int,
520 Xapian::ReplicationInfo
*)
522 throw Xapian::InvalidOperationError("write_changesets_to_fd() with "
523 "more than one subdatabase");
527 MultiDatabase::get_revision() const
529 throw Xapian::InvalidOperationError("Database::get_revision() with "
530 "more than one subdatabase");
534 MultiDatabase::invalidate_doc_object(Xapian::Document::Internal
*) const
536 // This method should only be called on a single shard.
541 MultiDatabase::get_backend_info(string
*) const
543 // This method should only be called on a single shard.
545 return BACKEND_UNKNOWN
;
549 MultiDatabase::commit()
551 for (auto&& shard
: shards
) {
557 MultiDatabase::cancel()
559 for (auto&& shard
: shards
) {
565 MultiDatabase::begin_transaction(bool flushed
)
567 for (auto&& shard
: shards
) {
568 shard
->begin_transaction(flushed
);
573 MultiDatabase::end_transaction_(bool do_commit
)
575 for (auto&& shard
: shards
) {
576 shard
->end_transaction(do_commit
);
581 MultiDatabase::add_document(const Xapian::Document
& doc
)
583 // With a single shard, add_document() uses docid (get_lastdocid() + 1)
584 // which seems a sensible invariant to preserve with multiple shards.
585 Xapian::docid did
= get_lastdocid() + 1;
586 if (rare(did
== 0)) {
587 throw Xapian::DatabaseError("Run out of docids - you'll have to use "
588 "copydatabase to eliminate any gaps "
589 "before you can add more documents");
592 auto n_shards
= shards
.size();
593 auto shard
= shards
[shard_number(did
, n_shards
)];
594 shard
->replace_document(shard_docid(did
, n_shards
), doc
);
599 MultiDatabase::delete_document(Xapian::docid did
)
601 auto n_shards
= shards
.size();
602 auto shard
= shards
[shard_number(did
, n_shards
)];
603 shard
->delete_document(shard_docid(did
, n_shards
));
607 MultiDatabase::delete_document(const string
& term
)
609 for (auto&& shard
: shards
) {
610 shard
->delete_document(term
);
615 MultiDatabase::replace_document(Xapian::docid did
, const Xapian::Document
& doc
)
617 auto n_shards
= shards
.size();
618 auto shard
= shards
[shard_number(did
, n_shards
)];
619 shard
->replace_document(shard_docid(did
, n_shards
), doc
);
623 MultiDatabase::replace_document(const string
& term
, const Xapian::Document
& doc
)
625 auto n_shards
= shards
.size();
626 unique_ptr
<PostList
> pl(open_post_list(term
));
628 // If no unique_term in the database, this is just an add_document().
630 // Which database will the next never used docid be in?
631 Xapian::docid did
= get_lastdocid() + 1;
632 if (rare(did
== 0)) {
633 throw Xapian::DatabaseError("Run out of docids - you'll have to "
634 "use copydatabase to eliminate any "
635 "gaps before you can add more "
638 auto shard
= shards
[shard_number(did
, n_shards
)];
639 return shard
->add_document(doc
);
642 Xapian::docid result
= pl
->get_docid();
643 auto replacing_shard
= shards
[shard_number(result
, n_shards
)];
644 replacing_shard
->replace_document(shard_docid(result
, n_shards
), doc
);
646 // Delete any other occurrences of the unique term.
647 while (pl
->next(), !pl
->at_end()) {
648 Xapian::docid did
= pl
->get_docid();
649 auto shard
= shards
[shard_number(did
, n_shards
)];
650 shard
->delete_document(shard_docid(did
, n_shards
));
657 MultiDatabase::request_document(Xapian::docid did
) const
661 auto n_shards
= shards
.size();
662 auto shard
= shards
[shard_number(did
, n_shards
)];
663 auto shard_did
= shard_docid(did
, n_shards
);
664 shard
->request_document(shard_did
);
668 MultiDatabase::add_spelling(const string
& word
,
669 Xapian::termcount freqinc
) const
671 shards
[0]->add_spelling(word
, freqinc
);
675 MultiDatabase::remove_spelling(const string
& word
,
676 Xapian::termcount freqdec
) const
678 for (auto&& shard
: shards
) {
679 freqdec
= shard
->remove_spelling(word
, freqdec
);
687 MultiDatabase::add_synonym(const string
& term
,
688 const string
& synonym
) const
690 shards
[0]->add_synonym(term
, synonym
);
694 MultiDatabase::remove_synonym(const string
& term
,
695 const string
& synonym
) const
697 for (auto&& shard
: shards
) {
698 shard
->remove_synonym(term
, synonym
);
703 MultiDatabase::clear_synonyms(const string
& term
) const
705 for (auto&& shard
: shards
) {
706 shard
->clear_synonyms(term
);
711 MultiDatabase::set_metadata(const string
& key
, const string
& value
)
713 shards
[0]->set_metadata(key
, value
);
717 MultiDatabase::get_description() const
720 for (auto&& shard
: shards
) {
724 desc
+= shard
->get_description();