Reduce overheads of PostList positional data support
[xapian.git] / xapian-core / backends / multi / multi_database.cc
blob3f8bb89ce0f5f1a87ef521c930425380eddec2d9
1 /** @file multi_database.cc
2 * @brief Sharded database backend
3 */
4 /* Copyright (C) 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #include <config.h>
21 #include "multi_database.h"
23 #include "backends/backends.h"
24 #include "backends/multi.h"
25 #include "expand/ortermlist.h"
26 #include "expand/termlistmerger.h"
27 #include "multi_alltermslist.h"
28 #include "multi_postlist.h"
29 #include "multi_termlist.h"
30 #include "multi_valuelist.h"
32 #include <memory>
34 using namespace std;
36 MultiDatabase::size_type
37 MultiDatabase::size() const
39 return shards.size();
42 bool
43 MultiDatabase::reopen()
45 bool result = false;
46 for (auto&& shard : shards) {
47 if (shard->reopen()) {
48 result = true;
51 return result;
54 void
55 MultiDatabase::close()
57 for (auto&& shard : shards) {
58 shard->close();
62 PostList*
63 MultiDatabase::open_post_list(const string& term) const
65 PostList** postlists = new PostList*[shards.size()];
66 size_t count = 0;
67 try {
68 for (auto&& shard : shards) {
69 postlists[count] = shard->open_post_list(term);
70 ++count;
72 return new MultiPostList(count, postlists);
73 } catch (...) {
74 while (count)
75 delete postlists[--count];
76 delete [] postlists;
77 throw;
81 LeafPostList*
82 MultiDatabase::open_leaf_post_list(const string&, bool) const
84 // This should never get called.
85 Assert(false);
86 return NULL;
89 TermList*
90 MultiDatabase::open_term_list(Xapian::docid did) const
92 return new MultiTermList(this, MultiDatabase::open_term_list_direct(did));
95 TermList*
96 MultiDatabase::open_term_list_direct(Xapian::docid did) const
98 size_t n_shards = shards.size();
99 auto shard = shards[shard_number(did, n_shards)];
100 Xapian::docid shard_did = shard_docid(did, n_shards);
101 return shard->open_term_list(shard_did);
104 TermList*
105 MultiDatabase::open_allterms(const string& prefix) const
107 size_t count = 0;
108 TermList** termlists = new TermList*[shards.size()];
109 try {
110 for (auto&& shard : shards) {
111 termlists[count] = shard->open_allterms(prefix);
112 ++count;
114 return new MultiAllTermsList(count, termlists);
115 } catch (...) {
116 while (count)
117 delete termlists[--count];
118 delete [] termlists;
119 throw;
123 bool
124 MultiDatabase::has_positions() const
126 for (auto&& shard : shards) {
127 if (shard->has_positions()) {
128 return true;
131 return false;
134 PositionList*
135 MultiDatabase::open_position_list(Xapian::docid did, const string& term) const
137 auto n_shards = shards.size();
138 auto shard = shards[shard_number(did, n_shards)];
139 auto shard_did = shard_docid(did, n_shards);
140 return shard->open_position_list(shard_did, term);
143 Xapian::doccount
144 MultiDatabase::get_doccount() const
146 Xapian::doccount result = 0;
147 for (auto&& shard : shards) {
148 auto old_result = result;
149 result += shard->get_doccount();
150 if (result < old_result)
151 throw Xapian::DatabaseError("doccount overflowed!");
153 return result;
156 Xapian::docid
157 MultiDatabase::get_lastdocid() const
159 Xapian::docid result = 0;
160 auto n_shards = shards.size();
161 for (size_t shard = 0; shard != n_shards; ++shard) {
162 Xapian::docid shard_lastdocid = shards[shard]->get_lastdocid();
163 if (shard_lastdocid == 0) {
164 // This shard is empty, so doesn't influence lastdocid for the
165 // combined database.
166 continue;
168 result = max(result, unshard(shard_lastdocid, shard, n_shards));
170 return result;
173 Xapian::totallength
174 MultiDatabase::get_total_length() const
176 Xapian::totallength result = 0;
177 for (auto&& shard : shards) {
178 auto old_result = result;
179 result += shard->get_total_length();
180 if (result < old_result)
181 throw Xapian::DatabaseError("Total document length overflowed!");
183 return result;
186 void
187 MultiDatabase::get_freqs(const string& term,
188 Xapian::doccount* tf_ptr,
189 Xapian::termcount* cf_ptr) const
191 Assert(!term.empty());
193 Xapian::doccount shard_tf;
194 Xapian::doccount* shard_tf_ptr = tf_ptr ? &shard_tf : NULL;
195 Xapian::doccount total_tf = 0;
197 Xapian::termcount shard_cf;
198 Xapian::termcount* shard_cf_ptr = cf_ptr ? &shard_cf : NULL;
199 Xapian::termcount total_cf = 0;
201 for (auto&& shard : shards) {
202 shard->get_freqs(term, shard_tf_ptr, shard_cf_ptr);
203 if (shard_tf_ptr) {
204 auto old_tf = total_tf;
205 total_tf += *shard_tf_ptr;
206 if (total_tf < old_tf)
207 throw Xapian::DatabaseError("termfreq overflowed!");
209 if (shard_cf_ptr) {
210 auto old_cf = total_cf;
211 total_cf += *shard_cf_ptr;
212 if (total_cf < old_cf)
213 throw Xapian::DatabaseError("Collection freq overflowed!");
216 if (tf_ptr) {
217 *tf_ptr = total_tf;
219 if (cf_ptr) {
220 *cf_ptr = total_cf;
224 Xapian::doccount
225 MultiDatabase::get_value_freq(Xapian::valueno slot) const
227 Xapian::termcount result = 0;
228 for (auto&& shard : shards) {
229 auto old_result = result;
230 result += shard->get_value_freq(slot);
231 if (result < old_result)
232 throw Xapian::DatabaseError("Value freq overflowed!");
234 return result;
237 string
238 MultiDatabase::get_value_lower_bound(Xapian::valueno slot) const
240 string result;
241 for (auto&& shard : shards) {
242 string shard_result = shard->get_value_lower_bound(slot);
243 if (shard_result.empty())
244 continue;
245 if (result.empty() || shard_result < result)
246 result = std::move(shard_result);
248 return result;
251 string
252 MultiDatabase::get_value_upper_bound(Xapian::valueno slot) const
254 string result;
255 for (auto&& shard : shards) {
256 string shard_result = shard->get_value_upper_bound(slot);
257 if (shard_result > result)
258 result = std::move(shard_result);
260 return result;
263 Xapian::termcount
264 MultiDatabase::get_doclength_lower_bound() const
266 // We want the smallest answer from amongst the shards, except that 0 means
267 // that all documents have length 0 (including the special case of there
268 // being no documents), so any non-zero answer should "beat" 0. To achieve
269 // this we find the *maximum* after negating each of the values (which
270 // since Xapian::termcount is an unsigned type leaves 0 alone but flips the
271 // order of all other values), then negate the answer again at the end.
272 static_assert(std::is_unsigned<Xapian::termcount>::value,
273 "Unsigned type required");
274 Xapian::termcount result = 0;
275 for (auto&& shard : shards) {
276 Xapian::termcount shard_result = -shard->get_doclength_lower_bound();
277 result = max(result, shard_result);
279 return -result;
282 Xapian::termcount
283 MultiDatabase::get_doclength_upper_bound() const
285 Xapian::termcount result = 0;
286 for (auto&& shard : shards) {
287 result = max(result, shard->get_doclength_upper_bound());
289 return result;
292 Xapian::termcount
293 MultiDatabase::get_wdf_upper_bound(const string& term) const
295 Assert(!term.empty());
297 Xapian::termcount result = 0;
298 for (auto&& shard : shards) {
299 result = max(result, shard->get_wdf_upper_bound(term));
301 return result;
304 ValueList*
305 MultiDatabase::open_value_list(Xapian::valueno slot) const
307 SubValueList** valuelists = new SubValueList*[shards.size()];
308 size_t count = 0;
309 try {
310 for (auto&& shard : shards) {
311 ValueList* vl = shard->open_value_list(slot);
312 valuelists[count] = new SubValueList(vl, count);
313 ++count;
315 return new MultiValueList(count, valuelists, slot);
316 } catch (...) {
317 while (count)
318 delete valuelists[--count];
319 delete [] valuelists;
320 throw;
324 Xapian::termcount
325 MultiDatabase::get_doclength(Xapian::docid did) const
327 Assert(did != 0);
329 auto n_shards = shards.size();
330 auto shard = shards[shard_number(did, n_shards)];
331 auto shard_did = shard_docid(did, n_shards);
332 return shard->get_doclength(shard_did);
335 Xapian::termcount
336 MultiDatabase::get_unique_terms(Xapian::docid did) const
338 Assert(did != 0);
340 auto n_shards = shards.size();
341 auto shard = shards[shard_number(did, n_shards)];
342 auto shard_did = shard_docid(did, n_shards);
343 return shard->get_unique_terms(shard_did);
346 Xapian::Document::Internal*
347 MultiDatabase::open_document(Xapian::docid did, bool lazy) const
349 Assert(did != 0);
351 auto n_shards = shards.size();
352 auto shard = shards[shard_number(did, n_shards)];
353 auto shard_did = shard_docid(did, n_shards);
354 return shard->open_document(shard_did, lazy);
357 bool
358 MultiDatabase::term_exists(const string& term) const
360 for (auto&& shard : shards) {
361 if (shard->term_exists(term))
362 return true;
364 return false;
367 void
368 MultiDatabase::keep_alive()
370 for (auto&& shard : shards) {
371 shard->keep_alive();
375 TermList*
376 MultiDatabase::open_spelling_termlist(const string& word) const
378 vector<TermList*> termlists;
379 termlists.reserve(shards.size());
381 try {
382 for (auto&& shard : shards) {
383 TermList* termlist = shard->open_spelling_termlist(word);
384 if (!termlist)
385 continue;
386 termlists.push_back(termlist);
389 return make_termlist_merger(termlists);
390 } catch (...) {
391 for (auto&& termlist : termlists)
392 delete termlist;
393 throw;
397 TermList*
398 MultiDatabase::open_spelling_wordlist() const
400 vector<TermList*> termlists;
401 termlists.reserve(shards.size());
403 try {
404 for (auto&& shard : shards) {
405 TermList* termlist = shard->open_spelling_wordlist();
406 if (!termlist)
407 continue;
408 termlists.push_back(termlist);
411 return make_termlist_merger<FreqAdderOrTermList>(termlists);
412 } catch (...) {
413 for (auto&& termlist : termlists)
414 delete termlist;
415 throw;
419 Xapian::doccount
420 MultiDatabase::get_spelling_frequency(const string& word) const
422 Xapian::doccount result = 0;
423 for (auto&& shard : shards) {
424 auto old_result = result;
425 result += shard->get_spelling_frequency(word);
426 if (result < old_result)
427 throw Xapian::DatabaseError("Spelling frequency overflowed!");
429 return result;
432 TermList*
433 MultiDatabase::open_synonym_termlist(const string& term) const
435 vector<TermList*> termlists;
436 termlists.reserve(shards.size());
438 try {
439 for (auto&& shard : shards) {
440 TermList* termlist = shard->open_synonym_termlist(term);
441 if (!termlist)
442 continue;
443 termlists.push_back(termlist);
446 return make_termlist_merger(termlists);
447 } catch (...) {
448 for (auto&& termlist : termlists)
449 delete termlist;
450 throw;
454 TermList*
455 MultiDatabase::open_synonym_keylist(const string& prefix) const
457 vector<TermList*> termlists;
458 termlists.reserve(shards.size());
460 try {
461 for (auto&& shard : shards) {
462 TermList* termlist = shard->open_synonym_keylist(prefix);
463 if (!termlist)
464 continue;
465 termlists.push_back(termlist);
468 return make_termlist_merger(termlists);
469 } catch (...) {
470 for (auto&& termlist : termlists)
471 delete termlist;
472 throw;
476 string
477 MultiDatabase::get_metadata(const string& key) const
479 return shards[0]->get_metadata(key);
482 TermList*
483 MultiDatabase::open_metadata_keylist(const string& prefix) const
485 return shards[0]->open_metadata_keylist(prefix);
488 string
489 MultiDatabase::get_uuid() const
491 string uuid;
492 for (auto&& shard : shards) {
493 const string& sub_uuid = shard->get_uuid();
494 // If any of the sub-databases have no uuid, we can't make a uuid for
495 // the combined database.
496 if (sub_uuid.empty())
497 return sub_uuid;
498 if (!uuid.empty())
499 uuid += ':';
500 uuid += sub_uuid;
502 return uuid;
505 bool
506 MultiDatabase::locked() const
508 for (auto&& shard : shards) {
509 if (shard->locked()) {
510 return true;
513 return false;
516 void
517 MultiDatabase::write_changesets_to_fd(int,
518 const std::string&,
519 bool,
520 Xapian::ReplicationInfo*)
522 throw Xapian::InvalidOperationError("write_changesets_to_fd() with "
523 "more than one subdatabase");
526 Xapian::rev
527 MultiDatabase::get_revision() const
529 throw Xapian::InvalidOperationError("Database::get_revision() with "
530 "more than one subdatabase");
533 void
534 MultiDatabase::invalidate_doc_object(Xapian::Document::Internal*) const
536 // This method should only be called on a single shard.
537 Assert(false);
541 MultiDatabase::get_backend_info(string*) const
543 // This method should only be called on a single shard.
544 Assert(false);
545 return BACKEND_UNKNOWN;
548 void
549 MultiDatabase::commit()
551 for (auto&& shard : shards) {
552 shard->commit();
556 void
557 MultiDatabase::cancel()
559 for (auto&& shard : shards) {
560 shard->cancel();
564 void
565 MultiDatabase::begin_transaction(bool flushed)
567 for (auto&& shard : shards) {
568 shard->begin_transaction(flushed);
572 void
573 MultiDatabase::end_transaction_(bool do_commit)
575 for (auto&& shard : shards) {
576 shard->end_transaction(do_commit);
580 Xapian::docid
581 MultiDatabase::add_document(const Xapian::Document& doc)
583 // With a single shard, add_document() uses docid (get_lastdocid() + 1)
584 // which seems a sensible invariant to preserve with multiple shards.
585 Xapian::docid did = get_lastdocid() + 1;
586 if (rare(did == 0)) {
587 throw Xapian::DatabaseError("Run out of docids - you'll have to use "
588 "copydatabase to eliminate any gaps "
589 "before you can add more documents");
592 auto n_shards = shards.size();
593 auto shard = shards[shard_number(did, n_shards)];
594 shard->replace_document(shard_docid(did, n_shards), doc);
595 return did;
598 void
599 MultiDatabase::delete_document(Xapian::docid did)
601 auto n_shards = shards.size();
602 auto shard = shards[shard_number(did, n_shards)];
603 shard->delete_document(shard_docid(did, n_shards));
606 void
607 MultiDatabase::delete_document(const string& term)
609 for (auto&& shard : shards) {
610 shard->delete_document(term);
614 void
615 MultiDatabase::replace_document(Xapian::docid did, const Xapian::Document& doc)
617 auto n_shards = shards.size();
618 auto shard = shards[shard_number(did, n_shards)];
619 shard->replace_document(shard_docid(did, n_shards), doc);
622 Xapian::docid
623 MultiDatabase::replace_document(const string& term, const Xapian::Document& doc)
625 auto n_shards = shards.size();
626 unique_ptr<PostList> pl(open_post_list(term));
627 pl->next();
628 // If no unique_term in the database, this is just an add_document().
629 if (pl->at_end()) {
630 // Which database will the next never used docid be in?
631 Xapian::docid did = get_lastdocid() + 1;
632 if (rare(did == 0)) {
633 throw Xapian::DatabaseError("Run out of docids - you'll have to "
634 "use copydatabase to eliminate any "
635 "gaps before you can add more "
636 "documents");
638 auto shard = shards[shard_number(did, n_shards)];
639 return shard->add_document(doc);
642 Xapian::docid result = pl->get_docid();
643 auto replacing_shard = shards[shard_number(result, n_shards)];
644 replacing_shard->replace_document(shard_docid(result, n_shards), doc);
646 // Delete any other occurrences of the unique term.
647 while (pl->next(), !pl->at_end()) {
648 Xapian::docid did = pl->get_docid();
649 auto shard = shards[shard_number(did, n_shards)];
650 shard->delete_document(shard_docid(did, n_shards));
653 return result;
656 void
657 MultiDatabase::request_document(Xapian::docid did) const
659 Assert(did != 0);
661 auto n_shards = shards.size();
662 auto shard = shards[shard_number(did, n_shards)];
663 auto shard_did = shard_docid(did, n_shards);
664 shard->request_document(shard_did);
667 void
668 MultiDatabase::add_spelling(const string& word,
669 Xapian::termcount freqinc) const
671 shards[0]->add_spelling(word, freqinc);
674 Xapian::termcount
675 MultiDatabase::remove_spelling(const string& word,
676 Xapian::termcount freqdec) const
678 for (auto&& shard : shards) {
679 freqdec = shard->remove_spelling(word, freqdec);
680 if (freqdec == 0)
681 break;
683 return freqdec;
686 void
687 MultiDatabase::add_synonym(const string& term,
688 const string& synonym) const
690 shards[0]->add_synonym(term, synonym);
693 void
694 MultiDatabase::remove_synonym(const string& term,
695 const string& synonym) const
697 for (auto&& shard : shards) {
698 shard->remove_synonym(term, synonym);
702 void
703 MultiDatabase::clear_synonyms(const string& term) const
705 for (auto&& shard : shards) {
706 shard->clear_synonyms(term);
710 void
711 MultiDatabase::set_metadata(const string& key, const string& value)
713 shards[0]->set_metadata(key, value);
716 string
717 MultiDatabase::get_description() const
719 string desc;
720 for (auto&& shard : shards) {
721 if (!desc.empty()) {
722 desc += ", ";
724 desc += shard->get_description();
726 desc += ')';
727 return desc;