Add Xapian::DOC_ASSUME_VALID flag
[xapian.git] / xapian-core / api / omdatabase.cc
blob3075e4772ef51c986e5beff3ec85075f39e8b7e5
1 /* omdatabase.cc: External interface for running queries
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2001,2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2016 Olly Betts
6 * Copyright 2006,2008 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "autoptr.h"
28 #include <xapian/constants.h>
29 #include <xapian/error.h>
30 #include <xapian/positioniterator.h>
31 #include <xapian/postingiterator.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/unicode.h>
35 #include "omassert.h"
36 #include "debuglog.h"
37 #include "backends/alltermslist.h"
38 #include "backends/multi/multi_alltermslist.h"
39 #include "backends/multi/multi_postlist.h"
40 #include "backends/multi/multi_termlist.h"
41 #include "backends/multivaluelist.h"
42 #include "backends/database.h"
43 #include "editdistance.h"
44 #include "expand/ortermlist.h"
45 #include "internaltypes.h"
46 #include "noreturn.h"
47 #include "pack.h"
49 #include <algorithm>
50 #include <cstdlib> // For abs().
51 #include <cstring>
52 #include <vector>
54 using namespace std;
55 using Xapian::Internal::intrusive_ptr;
57 XAPIAN_NORETURN(static void docid_zero_invalid());
58 static void docid_zero_invalid()
60 throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
63 XAPIAN_NORETURN(static void no_subdatabases());
64 static void no_subdatabases()
66 throw Xapian::InvalidOperationError("No subdatabases");
69 XAPIAN_NORETURN(static void empty_metadata_key());
70 static void empty_metadata_key()
72 throw Xapian::InvalidArgumentError("Empty metadata keys are invalid");
75 inline size_t
76 sub_db(Xapian::docid did, size_t n_dbs)
78 return (did - 1) % n_dbs;
81 inline size_t
82 sub_docid(Xapian::docid did, size_t n_dbs)
84 return (did - 1) / n_dbs + 1;
87 namespace Xapian {
89 Database::Database()
91 LOGCALL_CTOR(API, "Database", NO_ARGS);
94 Database::Database(Database::Internal *internal_)
96 LOGCALL_CTOR(API, "Database", internal_);
97 intrusive_ptr<Database::Internal> newi(internal_);
98 internal.push_back(newi);
101 Database::Database(const Database &other)
103 LOGCALL_CTOR(API, "Database", other);
104 internal = other.internal;
107 void
108 Database::operator=(const Database &other)
110 LOGCALL_VOID(API, "Database::operator=", other);
111 internal = other.internal;
114 Database::~Database()
116 LOGCALL_DTOR(API, "Database");
119 bool
120 Database::reopen()
122 LOGCALL(API, bool, "Database::reopen", NO_ARGS);
123 bool maybe_changed = false;
124 vector<intrusive_ptr<Database::Internal> >::iterator i;
125 for (i = internal.begin(); i != internal.end(); ++i) {
126 if ((*i)->reopen())
127 maybe_changed = true;
129 RETURN(maybe_changed);
132 void
133 Database::close()
135 LOGCALL_VOID(API, "Database::close", NO_ARGS);
136 vector<intrusive_ptr<Database::Internal> >::iterator i;
137 for (i = internal.begin(); i != internal.end(); ++i) {
138 (*i)->close();
142 void
143 Database::add_database(const Database & database)
145 LOGCALL_VOID(API, "Database::add_database", database);
146 if (this == &database) {
147 LOGLINE(API, "Database added to itself");
148 throw Xapian::InvalidArgumentError("Can't add a Database to itself");
150 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
151 for (i = database.internal.begin(); i != database.internal.end(); ++i) {
152 internal.push_back(*i);
156 PostingIterator
157 Database::postlist_begin(const string &tname) const
159 LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
161 // Don't bother checking that the term exists first. If it does, we
162 // just end up doing more work, and if it doesn't, we save very little
163 // work.
165 // Handle the common case of a single database specially.
166 if (internal.size() == 1)
167 RETURN(PostingIterator(internal[0]->open_post_list(tname)));
169 if (rare(internal.empty()))
170 RETURN(PostingIterator());
172 vector<LeafPostList *> pls;
173 try {
174 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
175 for (i = internal.begin(); i != internal.end(); ++i) {
176 pls.push_back((*i)->open_post_list(tname));
177 pls.back()->next();
179 Assert(pls.begin() != pls.end());
180 } catch (...) {
181 vector<LeafPostList *>::iterator i;
182 for (i = pls.begin(); i != pls.end(); ++i) {
183 delete *i;
184 *i = 0;
186 throw;
189 RETURN(PostingIterator(new MultiPostList(pls, *this)));
192 TermIterator
193 Database::termlist_begin(Xapian::docid did) const
195 LOGCALL(API, TermIterator, "Database::termlist_begin", did);
196 if (did == 0)
197 docid_zero_invalid();
199 unsigned int multiplier = internal.size();
200 if (rare(multiplier == 0))
201 no_subdatabases();
202 TermList *tl;
203 if (multiplier == 1) {
204 // There's no need for the MultiTermList wrapper in the common case
205 // where we're only dealing with a single database.
206 tl = internal[0]->open_term_list(did);
207 } else {
208 Assert(multiplier != 0);
209 Xapian::doccount n = (did - 1) % multiplier; // which actual database
210 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
212 tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
214 RETURN(TermIterator(tl));
217 TermIterator
218 Database::allterms_begin(const std::string & prefix) const
220 LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
221 TermList * tl;
222 if (rare(internal.size() == 0)) {
223 tl = NULL;
224 } else if (internal.size() == 1) {
225 tl = internal[0]->open_allterms(prefix);
226 } else {
227 tl = new MultiAllTermsList(internal, prefix);
229 RETURN(TermIterator(tl));
232 bool
233 Database::has_positions() const
235 LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
236 // If any sub-database has positions, the combined database does.
237 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
238 for (i = internal.begin(); i != internal.end(); ++i) {
239 if ((*i)->has_positions()) RETURN(true);
241 RETURN(false);
244 PositionIterator
245 Database::positionlist_begin(Xapian::docid did, const string &tname) const
247 LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
248 if (tname.empty())
249 throw InvalidArgumentError("Zero length terms are invalid");
250 if (did == 0)
251 docid_zero_invalid();
253 unsigned int multiplier = internal.size();
254 if (rare(multiplier == 0))
255 no_subdatabases();
256 Xapian::doccount n = (did - 1) % multiplier; // which actual database
257 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
258 RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
261 Xapian::doccount
262 Database::get_doccount() const
264 LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
265 Xapian::doccount docs = 0;
266 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
267 for (i = internal.begin(); i != internal.end(); ++i) {
268 docs += (*i)->get_doccount();
270 RETURN(docs);
273 Xapian::docid
274 Database::get_lastdocid() const
276 LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
277 Xapian::docid did = 0;
279 unsigned int multiplier = internal.size();
280 for (Xapian::doccount i = 0; i < multiplier; ++i) {
281 Xapian::docid did_i = internal[i]->get_lastdocid();
282 if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
284 RETURN(did);
287 Xapian::doclength
288 Database::get_avlength() const
290 LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
291 Xapian::doccount docs = 0;
292 totlen_t totlen = 0;
294 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
295 for (i = internal.begin(); i != internal.end(); ++i) {
296 docs += (*i)->get_doccount();
297 totlen += (*i)->get_total_length();
299 LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
300 " (from " << internal.size() << " dbs)");
302 if (docs == 0) RETURN(0.0);
303 RETURN(totlen / double(docs));
306 Xapian::doccount
307 Database::get_termfreq(const string & tname) const
309 LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
310 if (tname.empty()) RETURN(get_doccount());
312 Xapian::doccount tf = 0;
313 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
314 for (i = internal.begin(); i != internal.end(); ++i) {
315 Xapian::doccount sub_tf;
316 (*i)->get_freqs(tname, &sub_tf, NULL);
317 tf += sub_tf;
319 RETURN(tf);
322 Xapian::termcount
323 Database::get_collection_freq(const string & tname) const
325 LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
326 if (tname.empty()) RETURN(get_doccount());
328 Xapian::termcount cf = 0;
329 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
330 for (i = internal.begin(); i != internal.end(); ++i) {
331 Xapian::termcount sub_cf;
332 (*i)->get_freqs(tname, NULL, &sub_cf);
333 cf += sub_cf;
335 RETURN(cf);
338 Xapian::doccount
339 Database::get_value_freq(Xapian::valueno slot) const
341 LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
343 Xapian::doccount vf = 0;
344 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
345 for (i = internal.begin(); i != internal.end(); ++i) {
346 vf += (*i)->get_value_freq(slot);
348 RETURN(vf);
351 string
352 Database::get_value_lower_bound(Xapian::valueno slot) const
354 LOGCALL(API, string, "Database::get_value_lower_bound", slot);
356 if (rare(internal.empty())) RETURN(string());
358 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
359 i = internal.begin();
360 string full_lb = (*i)->get_value_lower_bound(slot);
361 while (++i != internal.end()) {
362 string lb = (*i)->get_value_lower_bound(slot);
363 if (lb < full_lb) full_lb = lb;
365 RETURN(full_lb);
368 std::string
369 Database::get_value_upper_bound(Xapian::valueno slot) const
371 LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
373 std::string full_ub;
374 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
375 for (i = internal.begin(); i != internal.end(); ++i) {
376 std::string ub = (*i)->get_value_upper_bound(slot);
377 if (ub > full_ub)
378 full_ub = ub;
380 RETURN(full_ub);
383 Xapian::termcount
384 Database::get_doclength_lower_bound() const
386 LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
388 if (rare(internal.empty())) RETURN(0);
390 Xapian::termcount full_lb = 0;
391 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
392 for (i = internal.begin(); i != internal.end(); ++i) {
393 // Skip sub-databases which are empty or only contain documents with
394 // doclen==0.
395 if ((*i)->get_total_length() != 0) {
396 Xapian::termcount lb = (*i)->get_doclength_lower_bound();
397 if (full_lb == 0 || lb < full_lb) full_lb = lb;
400 RETURN(full_lb);
403 Xapian::termcount
404 Database::get_doclength_upper_bound() const
406 LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
408 Xapian::termcount full_ub = 0;
409 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
410 for (i = internal.begin(); i != internal.end(); ++i) {
411 Xapian::termcount ub = (*i)->get_doclength_upper_bound();
412 if (ub > full_ub) full_ub = ub;
414 RETURN(full_ub);
417 Xapian::termcount
418 Database::get_wdf_upper_bound(const string & term) const
420 LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
421 if (term.empty()) RETURN(0);
423 Xapian::termcount full_ub = 0;
424 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
425 for (i = internal.begin(); i != internal.end(); ++i) {
426 Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
427 if (ub > full_ub) full_ub = ub;
429 RETURN(full_ub);
432 ValueIterator
433 Database::valuestream_begin(Xapian::valueno slot) const
435 LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
436 if (internal.size() == 0)
437 RETURN(ValueIterator());
438 if (internal.size() != 1)
439 RETURN(ValueIterator(new MultiValueList(internal, slot)));
440 RETURN(ValueIterator(internal[0]->open_value_list(slot)));
443 Xapian::termcount
444 Database::get_doclength(Xapian::docid did) const
446 LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
447 if (did == 0)
448 docid_zero_invalid();
450 unsigned int multiplier = internal.size();
451 if (rare(multiplier == 0))
452 no_subdatabases();
453 Xapian::doccount n = (did - 1) % multiplier; // which actual database
454 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
455 RETURN(internal[n]->get_doclength(m));
458 Xapian::termcount
459 Database::get_unique_terms(Xapian::docid did) const
461 LOGCALL(API, Xapian::termcount, "Database::get_unique_terms", did);
462 if (did == 0)
463 docid_zero_invalid();
464 unsigned int multiplier = internal.size();
465 if (rare(multiplier == 0))
466 no_subdatabases();
467 Xapian::doccount n = (did - 1) % multiplier; // which actual database
468 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
469 RETURN(internal[n]->get_unique_terms(m));
472 Document
473 Database::get_document(Xapian::docid did) const
475 LOGCALL(API, Document, "Database::get_document", did);
476 if (did == 0)
477 docid_zero_invalid();
479 unsigned int multiplier = internal.size();
480 if (rare(multiplier == 0))
481 no_subdatabases();
482 Xapian::doccount n = (did - 1) % multiplier; // which actual database
483 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
485 // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
486 RETURN(Document(internal[n]->open_document(m, false)));
489 Document
490 Database::get_document(Xapian::docid did, unsigned flags) const
492 LOGCALL(API, Document, "Database::get_document", did|flags);
493 if (did == 0)
494 docid_zero_invalid();
496 unsigned int multiplier = internal.size();
497 if (rare(multiplier == 0))
498 no_subdatabases();
499 Xapian::doccount n = (did - 1) % multiplier; // which actual database
500 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
502 bool assume_valid = flags & Xapian::DOC_ASSUME_VALID;
503 RETURN(Document(internal[n]->open_document(m, assume_valid)));
506 bool
507 Database::term_exists(const string & tname) const
509 LOGCALL(API, bool, "Database::term_exists", tname);
510 if (tname.empty()) {
511 RETURN(get_doccount() != 0);
513 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
514 for (i = internal.begin(); i != internal.end(); ++i) {
515 if ((*i)->term_exists(tname)) RETURN(true);
517 RETURN(false);
520 void
521 Database::keep_alive()
523 LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
524 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
525 for (i = internal.begin(); i != internal.end(); ++i) {
526 (*i)->keep_alive();
530 string
531 Database::get_description() const
533 /// @todo display contents of the database
534 return "Database()";
537 // We sum the character frequency histogram absolute differences to compute a
538 // lower bound on the edit distance. Rather than counting each Unicode code
539 // point uniquely, we use an array with VEC_SIZE elements and tally code points
540 // modulo VEC_SIZE which can only reduce the bound we calculate.
542 // There will be a trade-off between how good the bound is and how large and
543 // array is used (a larger array takes more time to clear and sum over). The
544 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
545 // but that may not reflect real world performance. FIXME: profile and tune.
547 #define VEC_SIZE 64
549 static int
550 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
552 int vec[VEC_SIZE];
553 memset(vec, 0, sizeof(vec));
554 vector<unsigned>::const_iterator i;
555 for (i = a.begin(); i != a.end(); ++i) {
556 ++vec[(*i) % VEC_SIZE];
558 for (i = b.begin(); i != b.end(); ++i) {
559 --vec[(*i) % VEC_SIZE];
561 unsigned int total = 0;
562 for (size_t j = 0; j < VEC_SIZE; ++j) {
563 total += abs(vec[j]);
565 // Each insertion or deletion adds at most 1 to total. Each transposition
566 // doesn't change it at all. But each substitution can change it by 2 so
567 // we need to divide it by 2. Rounding up is OK, since the odd change must
568 // be due to an actual edit.
569 return (total + 1) / 2;
572 // Word must have a trigram score at least this close to the best score seen
573 // so far.
574 #define TRIGRAM_SCORE_THRESHOLD 2
576 string
577 Database::get_spelling_suggestion(const string &word,
578 unsigned max_edit_distance) const
580 LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
581 if (word.size() <= 1) return string();
582 AutoPtr<TermList> merger;
583 for (size_t i = 0; i < internal.size(); ++i) {
584 TermList * tl = internal[i]->open_spelling_termlist(word);
585 LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
586 if (tl) {
587 if (merger.get()) {
588 merger.reset(new OrTermList(merger.release(), tl));
589 } else {
590 merger.reset(tl);
594 if (!merger.get()) RETURN(string());
596 // Convert word to UTF-32.
597 // Extra brackets needed to avoid this being misparsed as a function
598 // prototype.
599 vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
601 vector<unsigned> utf32_term;
603 Xapian::termcount best = 1;
604 string result;
605 int edist_best = max_edit_distance;
606 Xapian::doccount freq_best = 0;
607 Xapian::doccount freq_exact = 0;
608 while (true) {
609 TermList *ret = merger->next();
610 if (ret) merger.reset(ret);
612 if (merger->at_end()) break;
614 string term = merger->get_termname();
615 Xapian::termcount score = merger->get_wdf();
617 LOGLINE(SPELLING, "Term \"" << term << "\" ngram score " << score);
618 if (score + TRIGRAM_SCORE_THRESHOLD >= best) {
619 if (score > best) best = score;
621 // There's no point considering a word where the difference
622 // in length is greater than the smallest number of edits we've
623 // found so far.
625 // First check the length of the encoded UTF-8 version of term.
626 // Each UTF-32 character is 1-4 bytes in UTF-8.
627 if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
628 LOGLINE(SPELLING, "Lengths much too different");
629 continue;
632 // Now convert to UTF-32, and compare the true lengths more
633 // strictly.
634 utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
636 if (abs(long(utf32_term.size()) - long(utf32_word.size()))
637 > edist_best) {
638 LOGLINE(SPELLING, "Lengths too different");
639 continue;
642 if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
643 LOGLINE(SPELLING, "Rejected by character frequency test");
644 continue;
647 int edist = edit_distance_unsigned(&utf32_term[0],
648 int(utf32_term.size()),
649 &utf32_word[0],
650 int(utf32_word.size()),
651 edist_best);
652 LOGLINE(SPELLING, "Edit distance " << edist);
654 if (edist <= edist_best) {
655 Xapian::doccount freq = 0;
656 for (size_t j = 0; j < internal.size(); ++j)
657 freq += internal[j]->get_spelling_frequency(term);
659 LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
660 // Even if we have an exact match, there may be a much more
661 // frequent potential correction which will still be
662 // interesting.
663 if (edist == 0) {
664 freq_exact = freq;
665 continue;
668 if (edist < edist_best || freq > freq_best) {
669 LOGLINE(SPELLING, "Best so far: \"" << term <<
670 "\" edist " << edist << " freq " << freq);
671 result = term;
672 edist_best = edist;
673 freq_best = freq;
678 if (freq_best < freq_exact)
679 RETURN(string());
680 RETURN(result);
683 TermIterator
684 Database::spellings_begin() const
686 LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
687 AutoPtr<TermList> merger;
688 for (size_t i = 0; i < internal.size(); ++i) {
689 TermList * tl = internal[i]->open_spelling_wordlist();
690 if (tl) {
691 if (merger.get()) {
692 merger.reset(new FreqAdderOrTermList(merger.release(), tl));
693 } else {
694 merger.reset(tl);
698 RETURN(TermIterator(merger.release()));
701 TermIterator
702 Database::synonyms_begin(const std::string &term) const
704 LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
705 AutoPtr<TermList> merger;
706 for (size_t i = 0; i < internal.size(); ++i) {
707 TermList * tl = internal[i]->open_synonym_termlist(term);
708 if (tl) {
709 if (merger.get()) {
710 merger.reset(new OrTermList(merger.release(), tl));
711 } else {
712 merger.reset(tl);
716 RETURN(TermIterator(merger.release()));
719 TermIterator
720 Database::synonym_keys_begin(const std::string &prefix) const
722 LOGCALL(API, TermIterator, "Database::synonym_keys_begin", prefix);
723 AutoPtr<TermList> merger;
724 for (size_t i = 0; i < internal.size(); ++i) {
725 TermList * tl = internal[i]->open_synonym_keylist(prefix);
726 if (tl) {
727 if (merger.get()) {
728 merger.reset(new OrTermList(merger.release(), tl));
729 } else {
730 merger.reset(tl);
734 RETURN(TermIterator(merger.release()));
737 string
738 Database::get_metadata(const string & key) const
740 LOGCALL(API, string, "Database::get_metadata", key);
741 if (rare(key.empty()))
742 empty_metadata_key();
743 if (internal.empty()) RETURN(std::string());
744 RETURN(internal[0]->get_metadata(key));
747 Xapian::TermIterator
748 Database::metadata_keys_begin(const std::string &prefix) const
750 LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
751 if (internal.empty()) RETURN(TermIterator());
752 RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
755 std::string
756 Database::get_uuid() const
758 LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
759 string uuid;
760 for (size_t i = 0; i < internal.size(); ++i) {
761 string sub_uuid = internal[i]->get_uuid();
762 // If any of the sub-databases have no uuid, we can't make a uuid for
763 // the combined database.
764 if (sub_uuid.empty())
765 RETURN(sub_uuid);
766 if (!uuid.empty()) uuid += ':';
767 uuid += sub_uuid;
769 RETURN(uuid);
772 Xapian::rev
773 Database::get_revision() const
775 LOGCALL(API, Xapian::rev, "Database::get_revision", NO_ARGS);
776 size_t n_dbs = internal.size();
777 if (rare(n_dbs != 1))
778 throw Xapian::InvalidOperationError("Database::get_revision() requires "
779 "exactly one subdatabase");
780 const string& s = internal[0]->get_revision_info();
781 const char* p = s.data();
782 const char* end = p + s.size();
783 Xapian::rev revision;
784 if (!unpack_uint(&p, end, &revision))
785 throw Xapian::UnimplementedError("Database::get_revision() only "
786 "supported for chert and glass");
787 return revision;
790 ///////////////////////////////////////////////////////////////////////////
792 WritableDatabase::WritableDatabase() : Database()
794 LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
797 WritableDatabase::WritableDatabase(Database::Internal *internal_)
798 : Database(internal_)
800 LOGCALL_CTOR(API, "WritableDatabase", internal_);
803 WritableDatabase::WritableDatabase(const WritableDatabase &other)
804 : Database(other)
806 LOGCALL_CTOR(API, "WritableDatabase", other);
809 void
810 WritableDatabase::operator=(const WritableDatabase &other)
812 LOGCALL_VOID(API, "WritableDatabase::operator=", other);
813 Database::operator=(other);
816 WritableDatabase::~WritableDatabase()
818 LOGCALL_DTOR(API, "WritableDatabase");
821 void
822 WritableDatabase::commit()
824 LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
825 size_t n_dbs = internal.size();
826 if (rare(n_dbs == 0))
827 no_subdatabases();
828 for (size_t i = 0; i != n_dbs; ++i)
829 internal[i]->commit();
832 void
833 WritableDatabase::begin_transaction(bool flushed)
835 LOGCALL_VOID(API, "WritableDatabase::begin_transaction", flushed);
836 size_t n_dbs = internal.size();
837 if (rare(n_dbs == 0))
838 no_subdatabases();
839 for (size_t i = 0; i != n_dbs; ++i)
840 internal[i]->begin_transaction(flushed);
843 void
844 WritableDatabase::commit_transaction()
846 LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
847 size_t n_dbs = internal.size();
848 if (rare(n_dbs == 0))
849 no_subdatabases();
850 for (size_t i = 0; i != n_dbs; ++i)
851 internal[i]->commit_transaction();
854 void
855 WritableDatabase::cancel_transaction()
857 LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
858 size_t n_dbs = internal.size();
859 if (rare(n_dbs == 0))
860 no_subdatabases();
861 for (size_t i = 0; i != n_dbs; ++i)
862 internal[i]->cancel_transaction();
866 Xapian::docid
867 WritableDatabase::add_document(const Document & document)
869 LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
870 size_t n_dbs = internal.size();
871 if (rare(n_dbs == 0))
872 no_subdatabases();
873 if (n_dbs == 1)
874 RETURN(internal[0]->add_document(document));
876 // Which database will the next never used docid be in?
877 Xapian::docid did = get_lastdocid() + 1;
878 if (rare(did == 0)) {
879 throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
881 // We want exactly did to be used, not a lower docid if that subdb isn't
882 // using the docid before it, so call replace_document() not
883 // add_document().
884 size_t i = sub_db(did, n_dbs);
885 internal[i]->replace_document(sub_docid(did, n_dbs), document);
886 RETURN(did);
889 void
890 WritableDatabase::delete_document(Xapian::docid did)
892 LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
893 if (rare(did == 0))
894 docid_zero_invalid();
896 size_t n_dbs = internal.size();
897 if (rare(n_dbs == 0))
898 no_subdatabases();
899 size_t i = sub_db(did, n_dbs);
900 internal[i]->delete_document(sub_docid(did, n_dbs));
903 void
904 WritableDatabase::delete_document(const std::string & unique_term)
906 LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
907 if (unique_term.empty())
908 throw InvalidArgumentError("Empty termnames are invalid");
909 size_t n_dbs = internal.size();
910 if (rare(n_dbs == 0))
911 no_subdatabases();
912 for (size_t i = 0; i != n_dbs; ++i)
913 internal[i]->delete_document(unique_term);
916 void
917 WritableDatabase::replace_document(Xapian::docid did, const Document & document)
919 LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
920 if (did == 0)
921 docid_zero_invalid();
922 size_t n_dbs = internal.size();
923 if (rare(n_dbs == 0))
924 no_subdatabases();
925 size_t i = sub_db(did, n_dbs);
926 internal[i]->replace_document(sub_docid(did, n_dbs), document);
929 Xapian::docid
930 WritableDatabase::replace_document(const std::string & unique_term,
931 const Document & document)
933 LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
934 if (unique_term.empty())
935 throw InvalidArgumentError("Empty termnames are invalid");
936 size_t n_dbs = internal.size();
937 if (rare(n_dbs == 0))
938 no_subdatabases();
939 if (n_dbs == 1)
940 RETURN(internal[0]->replace_document(unique_term, document));
942 Xapian::PostingIterator postit = postlist_begin(unique_term);
943 // If no unique_term in the database, this is just an add_document().
944 if (postit == postlist_end(unique_term)) {
945 // Which database will the next never used docid be in?
946 size_t i = sub_db(get_lastdocid() + 1, n_dbs);
947 RETURN(internal[i]->add_document(document));
950 Xapian::docid retval = *postit;
951 size_t i = sub_db(retval, n_dbs);
952 internal[i]->replace_document(sub_docid(retval, n_dbs), document);
954 // Delete any other occurrences of unique_term.
955 while (++postit != postlist_end(unique_term)) {
956 Xapian::docid did = *postit;
957 i = sub_db(did, n_dbs);
958 internal[i]->delete_document(sub_docid(did, n_dbs));
961 return retval;
964 void
965 WritableDatabase::add_spelling(const std::string & word,
966 Xapian::termcount freqinc) const
968 LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
969 if (rare(internal.empty()))
970 no_subdatabases();
971 // FIXME: Is adding to the first subdatabase sensible?
972 internal[0]->add_spelling(word, freqinc);
975 void
976 WritableDatabase::remove_spelling(const std::string & word,
977 Xapian::termcount freqdec) const
979 LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
980 size_t n_dbs = internal.size();
981 if (rare(n_dbs == 0))
982 no_subdatabases();
983 for (size_t i = 0; i < n_dbs; ++i) {
984 internal[i]->remove_spelling(word, freqdec);
988 void
989 WritableDatabase::add_synonym(const std::string & term,
990 const std::string & synonym) const
992 LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
993 if (rare(internal.empty()))
994 no_subdatabases();
995 // FIXME: Is adding to the first subdatabase sensible?
996 internal[0]->add_synonym(term, synonym);
999 void
1000 WritableDatabase::remove_synonym(const std::string & term,
1001 const std::string & synonym) const
1003 LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
1004 size_t n_dbs = internal.size();
1005 if (rare(n_dbs == 0))
1006 no_subdatabases();
1007 for (size_t i = 0; i < n_dbs; ++i) {
1008 internal[i]->remove_synonym(term, synonym);
1012 void
1013 WritableDatabase::clear_synonyms(const std::string & term) const
1015 LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
1016 size_t n_dbs = internal.size();
1017 if (rare(n_dbs == 0))
1018 no_subdatabases();
1019 for (size_t i = 0; i < n_dbs; ++i) {
1020 internal[i]->clear_synonyms(term);
1024 void
1025 WritableDatabase::set_metadata(const string & key, const string & value)
1027 LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
1028 if (rare(key.empty()))
1029 empty_metadata_key();
1030 if (rare(internal.empty()))
1031 no_subdatabases();
1032 internal[0]->set_metadata(key, value);
1035 string
1036 WritableDatabase::get_description() const
1038 /// @todo display contents of the writable database
1039 return "WritableDatabase()";