Website now in git not CVS
[xapian.git] / xapian-core / api / omdatabase.cc
blob876e3ee057189c5aca8b8430d95bb0060737a832
1 /* omdatabase.cc: External interface for running queries
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2001,2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014 Olly Betts
6 * Copyright 2006,2008 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "autoptr.h"
28 #include <xapian/error.h>
29 #include <xapian/positioniterator.h>
30 #include <xapian/postingiterator.h>
31 #include <xapian/termiterator.h>
32 #include <xapian/unicode.h>
34 #include "omassert.h"
35 #include "debuglog.h"
36 #include "backends/alltermslist.h"
37 #include "backends/multi/multi_alltermslist.h"
38 #include "backends/multi/multi_postlist.h"
39 #include "backends/multi/multi_termlist.h"
40 #include "backends/multivaluelist.h"
41 #include "backends/database.h"
42 #include "editdistance.h"
43 #include "expand/ortermlist.h"
44 #include "noreturn.h"
46 #include <algorithm>
47 #include <cstdlib> // For abs().
48 #include <cstring>
49 #include <vector>
51 using namespace std;
52 using Xapian::Internal::intrusive_ptr;
54 XAPIAN_NORETURN(static void docid_zero_invalid());
55 static void docid_zero_invalid()
57 throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
60 XAPIAN_NORETURN(static void no_subdatabases());
61 static void no_subdatabases()
63 throw Xapian::InvalidOperationError("No subdatabases");
66 XAPIAN_NORETURN(static void empty_metadata_key());
67 static void empty_metadata_key()
69 throw Xapian::InvalidArgumentError("Empty metadata keys are invalid");
72 inline size_t
73 sub_db(Xapian::docid did, size_t n_dbs)
75 return (did - 1) % n_dbs;
78 inline size_t
79 sub_docid(Xapian::docid did, size_t n_dbs)
81 return (did - 1) / n_dbs + 1;
84 namespace Xapian {
86 Database::Database()
88 LOGCALL_CTOR(API, "Database", NO_ARGS);
91 Database::Database(Database::Internal *internal_)
93 LOGCALL_CTOR(API, "Database", internal_);
94 intrusive_ptr<Database::Internal> newi(internal_);
95 internal.push_back(newi);
98 Database::Database(const Database &other)
100 LOGCALL_CTOR(API, "Database", other);
101 internal = other.internal;
104 void
105 Database::operator=(const Database &other)
107 LOGCALL_VOID(API, "Database::operator=", other);
108 internal = other.internal;
111 Database::~Database()
113 LOGCALL_DTOR(API, "Database");
116 bool
117 Database::reopen()
119 LOGCALL(API, bool, "Database::reopen", NO_ARGS);
120 bool maybe_changed = false;
121 vector<intrusive_ptr<Database::Internal> >::iterator i;
122 for (i = internal.begin(); i != internal.end(); ++i) {
123 if ((*i)->reopen())
124 maybe_changed = true;
126 RETURN(maybe_changed);
129 void
130 Database::close()
132 LOGCALL_VOID(API, "Database::close", NO_ARGS);
133 vector<intrusive_ptr<Database::Internal> >::iterator i;
134 for (i = internal.begin(); i != internal.end(); ++i) {
135 (*i)->close();
139 void
140 Database::add_database(const Database & database)
142 LOGCALL_VOID(API, "Database::add_database", database);
143 if (this == &database) {
144 LOGLINE(API, "Database added to itself");
145 throw Xapian::InvalidArgumentError("Can't add a Database to itself");
147 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
148 for (i = database.internal.begin(); i != database.internal.end(); ++i) {
149 internal.push_back(*i);
153 PostingIterator
154 Database::postlist_begin(const string &tname) const
156 LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
158 // Don't bother checking that the term exists first. If it does, we
159 // just end up doing more work, and if it doesn't, we save very little
160 // work.
162 // Handle the common case of a single database specially.
163 if (internal.size() == 1)
164 RETURN(PostingIterator(internal[0]->open_post_list(tname)));
166 if (rare(internal.empty()))
167 RETURN(PostingIterator());
169 vector<LeafPostList *> pls;
170 try {
171 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
172 for (i = internal.begin(); i != internal.end(); ++i) {
173 pls.push_back((*i)->open_post_list(tname));
174 pls.back()->next();
176 Assert(pls.begin() != pls.end());
177 } catch (...) {
178 vector<LeafPostList *>::iterator i;
179 for (i = pls.begin(); i != pls.end(); ++i) {
180 delete *i;
181 *i = 0;
183 throw;
186 RETURN(PostingIterator(new MultiPostList(pls, *this)));
189 TermIterator
190 Database::termlist_begin(Xapian::docid did) const
192 LOGCALL(API, TermIterator, "Database::termlist_begin", did);
193 if (did == 0)
194 docid_zero_invalid();
196 unsigned int multiplier = internal.size();
197 if (rare(multiplier == 0))
198 no_subdatabases();
199 TermList *tl;
200 if (multiplier == 1) {
201 // There's no need for the MultiTermList wrapper in the common case
202 // where we're only dealing with a single database.
203 tl = internal[0]->open_term_list(did);
204 } else {
205 Assert(multiplier != 0);
206 Xapian::doccount n = (did - 1) % multiplier; // which actual database
207 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
209 tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
211 RETURN(TermIterator(tl));
214 TermIterator
215 Database::allterms_begin(const std::string & prefix) const
217 LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
218 TermList * tl;
219 if (rare(internal.size() == 0)) {
220 tl = NULL;
221 } else if (internal.size() == 1) {
222 tl = internal[0]->open_allterms(prefix);
223 } else {
224 tl = new MultiAllTermsList(internal, prefix);
226 RETURN(TermIterator(tl));
229 bool
230 Database::has_positions() const
232 LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
233 // If any sub-database has positions, the combined database does.
234 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
235 for (i = internal.begin(); i != internal.end(); ++i) {
236 if ((*i)->has_positions()) RETURN(true);
238 RETURN(false);
241 PositionIterator
242 Database::positionlist_begin(Xapian::docid did, const string &tname) const
244 LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
245 if (tname.empty())
246 throw InvalidArgumentError("Zero length terms are invalid");
247 if (did == 0)
248 docid_zero_invalid();
250 unsigned int multiplier = internal.size();
251 if (rare(multiplier == 0))
252 no_subdatabases();
253 Xapian::doccount n = (did - 1) % multiplier; // which actual database
254 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
255 RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
258 Xapian::doccount
259 Database::get_doccount() const
261 LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
262 Xapian::doccount docs = 0;
263 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
264 for (i = internal.begin(); i != internal.end(); ++i) {
265 docs += (*i)->get_doccount();
267 RETURN(docs);
270 Xapian::docid
271 Database::get_lastdocid() const
273 LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
274 Xapian::docid did = 0;
276 unsigned int multiplier = internal.size();
277 for (Xapian::doccount i = 0; i < multiplier; ++i) {
278 Xapian::docid did_i = internal[i]->get_lastdocid();
279 if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
281 RETURN(did);
284 Xapian::doclength
285 Database::get_avlength() const
287 LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
288 Xapian::doccount docs = 0;
289 Xapian::doclength totlen = 0;
291 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
292 for (i = internal.begin(); i != internal.end(); ++i) {
293 Xapian::doccount db_doccount = (*i)->get_doccount();
294 docs += db_doccount;
295 totlen += (*i)->get_avlength() * db_doccount;
297 LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
298 " (from " << internal.size() << " dbs)");
300 if (docs == 0) RETURN(0.0);
301 RETURN(totlen / docs);
304 Xapian::doccount
305 Database::get_termfreq(const string & tname) const
307 LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
308 if (tname.empty()) RETURN(get_doccount());
310 Xapian::doccount tf = 0;
311 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
312 for (i = internal.begin(); i != internal.end(); ++i) {
313 Xapian::doccount sub_tf;
314 (*i)->get_freqs(tname, &sub_tf, NULL);
315 tf += sub_tf;
317 RETURN(tf);
320 Xapian::termcount
321 Database::get_collection_freq(const string & tname) const
323 LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
324 if (tname.empty()) RETURN(get_doccount());
326 Xapian::termcount cf = 0;
327 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
328 for (i = internal.begin(); i != internal.end(); ++i) {
329 Xapian::termcount sub_cf;
330 (*i)->get_freqs(tname, NULL, &sub_cf);
331 cf += sub_cf;
333 RETURN(cf);
336 Xapian::doccount
337 Database::get_value_freq(Xapian::valueno slot) const
339 LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
341 Xapian::doccount vf = 0;
342 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
343 for (i = internal.begin(); i != internal.end(); ++i) {
344 vf += (*i)->get_value_freq(slot);
346 RETURN(vf);
349 string
350 Database::get_value_lower_bound(Xapian::valueno slot) const
352 LOGCALL(API, string, "Database::get_value_lower_bound", slot);
354 if (rare(internal.empty())) RETURN(string());
356 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
357 i = internal.begin();
358 string full_lb = (*i)->get_value_lower_bound(slot);
359 while (++i != internal.end()) {
360 string lb = (*i)->get_value_lower_bound(slot);
361 if (lb < full_lb) full_lb = lb;
363 RETURN(full_lb);
366 std::string
367 Database::get_value_upper_bound(Xapian::valueno slot) const
369 LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
371 std::string full_ub;
372 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
373 for (i = internal.begin(); i != internal.end(); ++i) {
374 std::string ub = (*i)->get_value_upper_bound(slot);
375 if (ub > full_ub)
376 full_ub = ub;
378 RETURN(full_ub);
381 Xapian::termcount
382 Database::get_doclength_lower_bound() const
384 LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
386 if (rare(internal.empty())) RETURN(0);
388 Xapian::termcount full_lb = 0;
389 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
390 for (i = internal.begin(); i != internal.end(); ++i) {
391 // Skip sub-databases which are empty or only contain documents with
392 // doclen==0.
393 if ((*i)->get_total_length() != 0) {
394 Xapian::termcount lb = (*i)->get_doclength_lower_bound();
395 if (full_lb == 0 || lb < full_lb) full_lb = lb;
398 RETURN(full_lb);
401 Xapian::termcount
402 Database::get_doclength_upper_bound() const
404 LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
406 Xapian::termcount full_ub = 0;
407 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
408 for (i = internal.begin(); i != internal.end(); ++i) {
409 Xapian::termcount ub = (*i)->get_doclength_upper_bound();
410 if (ub > full_ub) full_ub = ub;
412 RETURN(full_ub);
415 Xapian::termcount
416 Database::get_wdf_upper_bound(const string & term) const
418 LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
419 if (term.empty()) RETURN(0);
421 Xapian::termcount full_ub = 0;
422 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
423 for (i = internal.begin(); i != internal.end(); ++i) {
424 Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
425 if (ub > full_ub) full_ub = ub;
427 RETURN(full_ub);
430 ValueIterator
431 Database::valuestream_begin(Xapian::valueno slot) const
433 LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
434 if (internal.size() == 0)
435 RETURN(ValueIterator());
436 if (internal.size() != 1)
437 RETURN(ValueIterator(new MultiValueList(internal, slot)));
438 RETURN(ValueIterator(internal[0]->open_value_list(slot)));
441 Xapian::termcount
442 Database::get_doclength(Xapian::docid did) const
444 LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
445 if (did == 0)
446 docid_zero_invalid();
448 unsigned int multiplier = internal.size();
449 if (rare(multiplier == 0))
450 no_subdatabases();
451 Xapian::doccount n = (did - 1) % multiplier; // which actual database
452 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
453 RETURN(internal[n]->get_doclength(m));
456 Xapian::termcount
457 Database::get_unique_terms(Xapian::docid did) const
459 LOGCALL(API, Xapian::termcount, "Database::get_unique_terms", did);
460 if (did == 0)
461 docid_zero_invalid();
462 unsigned int multiplier = internal.size();
463 if (rare(multiplier == 0))
464 no_subdatabases();
465 Xapian::doccount n = (did - 1) % multiplier; // which actual database
466 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
467 RETURN(internal[n]->get_unique_terms(m));
470 Document
471 Database::get_document(Xapian::docid did) const
473 LOGCALL(API, Document, "Database::get_document", did);
474 if (did == 0)
475 docid_zero_invalid();
477 unsigned int multiplier = internal.size();
478 if (rare(multiplier == 0))
479 no_subdatabases();
480 Xapian::doccount n = (did - 1) % multiplier; // which actual database
481 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
483 // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
484 RETURN(Document(internal[n]->open_document(m, false)));
487 bool
488 Database::term_exists(const string & tname) const
490 LOGCALL(API, bool, "Database::term_exists", tname);
491 if (tname.empty()) {
492 RETURN(get_doccount() != 0);
494 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
495 for (i = internal.begin(); i != internal.end(); ++i) {
496 if ((*i)->term_exists(tname)) RETURN(true);
498 RETURN(false);
501 void
502 Database::keep_alive()
504 LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
505 vector<intrusive_ptr<Database::Internal> >::const_iterator i;
506 for (i = internal.begin(); i != internal.end(); ++i) {
507 (*i)->keep_alive();
511 string
512 Database::get_description() const
514 /// @todo display contents of the database
515 return "Database()";
518 // We sum the character frequency histogram absolute differences to compute a
519 // lower bound on the edit distance. Rather than counting each Unicode code
520 // point uniquely, we use an array with VEC_SIZE elements and tally code points
521 // modulo VEC_SIZE which can only reduce the bound we calculate.
523 // There will be a trade-off between how good the bound is and how large and
524 // array is used (a larger array takes more time to clear and sum over). The
525 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
526 // but that may not reflect real world performance. FIXME: profile and tune.
528 #define VEC_SIZE 64
530 static int
531 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
533 int vec[VEC_SIZE];
534 memset(vec, 0, sizeof(vec));
535 vector<unsigned>::const_iterator i;
536 for (i = a.begin(); i != a.end(); ++i) {
537 ++vec[(*i) % VEC_SIZE];
539 for (i = b.begin(); i != b.end(); ++i) {
540 --vec[(*i) % VEC_SIZE];
542 unsigned int total = 0;
543 for (size_t j = 0; j < VEC_SIZE; ++j) {
544 total += abs(vec[j]);
546 // Each insertion or deletion adds at most 1 to total. Each transposition
547 // doesn't change it at all. But each substitution can change it by 2 so
548 // we need to divide it by 2. Rounding up is OK, since the odd change must
549 // be due to an actual edit.
550 return (total + 1) / 2;
553 // Word must have a trigram score at least this close to the best score seen
554 // so far.
555 #define TRIGRAM_SCORE_THRESHOLD 2
557 string
558 Database::get_spelling_suggestion(const string &word,
559 unsigned max_edit_distance) const
561 LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
562 if (word.size() <= 1) return string();
563 AutoPtr<TermList> merger;
564 for (size_t i = 0; i < internal.size(); ++i) {
565 TermList * tl = internal[i]->open_spelling_termlist(word);
566 LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
567 if (tl) {
568 if (merger.get()) {
569 merger.reset(new OrTermList(merger.release(), tl));
570 } else {
571 merger.reset(tl);
575 if (!merger.get()) RETURN(string());
577 // Convert word to UTF-32.
578 // Extra brackets needed to avoid this being misparsed as a function
579 // prototype.
580 vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
582 vector<unsigned> utf32_term;
584 Xapian::termcount best = 1;
585 string result;
586 int edist_best = max_edit_distance;
587 Xapian::doccount freq_best = 0;
588 Xapian::doccount freq_exact = 0;
589 while (true) {
590 TermList *ret = merger->next();
591 if (ret) merger.reset(ret);
593 if (merger->at_end()) break;
595 string term = merger->get_termname();
596 Xapian::termcount score = merger->get_wdf();
598 LOGLINE(SPELLING, "Term \"" << term << "\" ngram score " << score);
599 if (score + TRIGRAM_SCORE_THRESHOLD >= best) {
600 if (score > best) best = score;
602 // There's no point considering a word where the difference
603 // in length is greater than the smallest number of edits we've
604 // found so far.
606 // First check the length of the encoded UTF-8 version of term.
607 // Each UTF-32 character is 1-4 bytes in UTF-8.
608 if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
609 LOGLINE(SPELLING, "Lengths much too different");
610 continue;
613 // Now convert to UTF-32, and compare the true lengths more
614 // strictly.
615 utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
617 if (abs(long(utf32_term.size()) - long(utf32_word.size()))
618 > edist_best) {
619 LOGLINE(SPELLING, "Lengths too different");
620 continue;
623 if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
624 LOGLINE(SPELLING, "Rejected by character frequency test");
625 continue;
628 int edist = edit_distance_unsigned(&utf32_term[0],
629 int(utf32_term.size()),
630 &utf32_word[0],
631 int(utf32_word.size()),
632 edist_best);
633 LOGLINE(SPELLING, "Edit distance " << edist);
635 if (edist <= edist_best) {
636 Xapian::doccount freq = 0;
637 for (size_t j = 0; j < internal.size(); ++j)
638 freq += internal[j]->get_spelling_frequency(term);
640 LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
641 // Even if we have an exact match, there may be a much more
642 // frequent potential correction which will still be
643 // interesting.
644 if (edist == 0) {
645 freq_exact = freq;
646 continue;
649 if (edist < edist_best || freq > freq_best) {
650 LOGLINE(SPELLING, "Best so far: \"" << term <<
651 "\" edist " << edist << " freq " << freq);
652 result = term;
653 edist_best = edist;
654 freq_best = freq;
659 if (freq_best < freq_exact)
660 RETURN(string());
661 RETURN(result);
664 TermIterator
665 Database::spellings_begin() const
667 LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
668 AutoPtr<TermList> merger;
669 for (size_t i = 0; i < internal.size(); ++i) {
670 TermList * tl = internal[i]->open_spelling_wordlist();
671 if (tl) {
672 if (merger.get()) {
673 merger.reset(new FreqAdderOrTermList(merger.release(), tl));
674 } else {
675 merger.reset(tl);
679 RETURN(TermIterator(merger.release()));
682 TermIterator
683 Database::synonyms_begin(const std::string &term) const
685 LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
686 AutoPtr<TermList> merger;
687 for (size_t i = 0; i < internal.size(); ++i) {
688 TermList * tl = internal[i]->open_synonym_termlist(term);
689 if (tl) {
690 if (merger.get()) {
691 merger.reset(new OrTermList(merger.release(), tl));
692 } else {
693 merger.reset(tl);
697 RETURN(TermIterator(merger.release()));
700 TermIterator
701 Database::synonym_keys_begin(const std::string &prefix) const
703 LOGCALL(API, TermIterator, "Database::synonym_keys_begin", prefix);
704 AutoPtr<TermList> merger;
705 for (size_t i = 0; i < internal.size(); ++i) {
706 TermList * tl = internal[i]->open_synonym_keylist(prefix);
707 if (tl) {
708 if (merger.get()) {
709 merger.reset(new OrTermList(merger.release(), tl));
710 } else {
711 merger.reset(tl);
715 RETURN(TermIterator(merger.release()));
718 string
719 Database::get_metadata(const string & key) const
721 LOGCALL(API, string, "Database::get_metadata", key);
722 if (rare(key.empty()))
723 empty_metadata_key();
724 if (internal.empty()) RETURN(std::string());
725 RETURN(internal[0]->get_metadata(key));
728 Xapian::TermIterator
729 Database::metadata_keys_begin(const std::string &prefix) const
731 LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
732 if (internal.empty()) RETURN(TermIterator());
733 RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
736 std::string
737 Database::get_uuid() const
739 LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
740 string uuid;
741 for (size_t i = 0; i < internal.size(); ++i) {
742 string sub_uuid = internal[i]->get_uuid();
743 // If any of the sub-databases have no uuid, we can't make a uuid for
744 // the combined database.
745 if (sub_uuid.empty())
746 RETURN(sub_uuid);
747 if (!uuid.empty()) uuid += ':';
748 uuid += sub_uuid;
750 RETURN(uuid);
753 ///////////////////////////////////////////////////////////////////////////
755 WritableDatabase::WritableDatabase() : Database()
757 LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
760 WritableDatabase::WritableDatabase(Database::Internal *internal_)
761 : Database(internal_)
763 LOGCALL_CTOR(API, "WritableDatabase", internal_);
766 WritableDatabase::WritableDatabase(const WritableDatabase &other)
767 : Database(other)
769 LOGCALL_CTOR(API, "WritableDatabase", other);
772 void
773 WritableDatabase::operator=(const WritableDatabase &other)
775 LOGCALL_VOID(API, "WritableDatabase::operator=", other);
776 Database::operator=(other);
779 WritableDatabase::~WritableDatabase()
781 LOGCALL_DTOR(API, "WritableDatabase");
784 void
785 WritableDatabase::commit()
787 LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
788 size_t n_dbs = internal.size();
789 if (rare(n_dbs == 0))
790 no_subdatabases();
791 for (size_t i = 0; i != n_dbs; ++i)
792 internal[i]->commit();
795 void
796 WritableDatabase::begin_transaction(bool flushed)
798 LOGCALL_VOID(API, "WritableDatabase::begin_transaction", flushed);
799 size_t n_dbs = internal.size();
800 if (rare(n_dbs == 0))
801 no_subdatabases();
802 for (size_t i = 0; i != n_dbs; ++i)
803 internal[i]->begin_transaction(flushed);
806 void
807 WritableDatabase::commit_transaction()
809 LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
810 size_t n_dbs = internal.size();
811 if (rare(n_dbs == 0))
812 no_subdatabases();
813 for (size_t i = 0; i != n_dbs; ++i)
814 internal[i]->commit_transaction();
817 void
818 WritableDatabase::cancel_transaction()
820 LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
821 size_t n_dbs = internal.size();
822 if (rare(n_dbs == 0))
823 no_subdatabases();
824 for (size_t i = 0; i != n_dbs; ++i)
825 internal[i]->cancel_transaction();
829 Xapian::docid
830 WritableDatabase::add_document(const Document & document)
832 LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
833 size_t n_dbs = internal.size();
834 if (rare(n_dbs == 0))
835 no_subdatabases();
836 if (n_dbs == 1)
837 RETURN(internal[0]->add_document(document));
839 // Which database will the next never used docid be in?
840 Xapian::docid did = get_lastdocid() + 1;
841 if (rare(did == 0)) {
842 throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
844 // We want exactly did to be used, not a lower docid if that subdb isn't
845 // using the docid before it, so call replace_document() not
846 // add_document().
847 size_t i = sub_db(did, n_dbs);
848 internal[i]->replace_document(sub_docid(did, n_dbs), document);
849 RETURN(did);
852 void
853 WritableDatabase::delete_document(Xapian::docid did)
855 LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
856 if (rare(did == 0))
857 docid_zero_invalid();
859 size_t n_dbs = internal.size();
860 if (rare(n_dbs == 0))
861 no_subdatabases();
862 size_t i = sub_db(did, n_dbs);
863 internal[i]->delete_document(sub_docid(did, n_dbs));
866 void
867 WritableDatabase::delete_document(const std::string & unique_term)
869 LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
870 if (unique_term.empty())
871 throw InvalidArgumentError("Empty termnames are invalid");
872 size_t n_dbs = internal.size();
873 if (rare(n_dbs == 0))
874 no_subdatabases();
875 for (size_t i = 0; i != n_dbs; ++i)
876 internal[i]->delete_document(unique_term);
879 void
880 WritableDatabase::replace_document(Xapian::docid did, const Document & document)
882 LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
883 if (did == 0)
884 docid_zero_invalid();
885 size_t n_dbs = internal.size();
886 if (rare(n_dbs == 0))
887 no_subdatabases();
888 size_t i = sub_db(did, n_dbs);
889 internal[i]->replace_document(sub_docid(did, n_dbs), document);
892 Xapian::docid
893 WritableDatabase::replace_document(const std::string & unique_term,
894 const Document & document)
896 LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
897 if (unique_term.empty())
898 throw InvalidArgumentError("Empty termnames are invalid");
899 size_t n_dbs = internal.size();
900 if (rare(n_dbs == 0))
901 no_subdatabases();
902 if (n_dbs == 1)
903 RETURN(internal[0]->replace_document(unique_term, document));
905 Xapian::PostingIterator postit = postlist_begin(unique_term);
906 // If no unique_term in the database, this is just an add_document().
907 if (postit == postlist_end(unique_term)) {
908 // Which database will the next never used docid be in?
909 size_t i = sub_db(get_lastdocid() + 1, n_dbs);
910 RETURN(internal[i]->add_document(document));
913 Xapian::docid retval = *postit;
914 size_t i = sub_db(retval, n_dbs);
915 internal[i]->replace_document(sub_docid(retval, n_dbs), document);
917 // Delete any other occurrences of unique_term.
918 while (++postit != postlist_end(unique_term)) {
919 Xapian::docid did = *postit;
920 i = sub_db(did, n_dbs);
921 internal[i]->delete_document(sub_docid(did, n_dbs));
924 return retval;
927 void
928 WritableDatabase::add_spelling(const std::string & word,
929 Xapian::termcount freqinc) const
931 LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
932 if (rare(internal.empty()))
933 no_subdatabases();
934 // FIXME: Is adding to the first subdatabase sensible?
935 internal[0]->add_spelling(word, freqinc);
938 void
939 WritableDatabase::remove_spelling(const std::string & word,
940 Xapian::termcount freqdec) const
942 LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
943 size_t n_dbs = internal.size();
944 if (rare(n_dbs == 0))
945 no_subdatabases();
946 for (size_t i = 0; i < n_dbs; ++i) {
947 internal[i]->remove_spelling(word, freqdec);
951 void
952 WritableDatabase::add_synonym(const std::string & term,
953 const std::string & synonym) const
955 LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
956 if (rare(internal.empty()))
957 no_subdatabases();
958 // FIXME: Is adding to the first subdatabase sensible?
959 internal[0]->add_synonym(term, synonym);
962 void
963 WritableDatabase::remove_synonym(const std::string & term,
964 const std::string & synonym) const
966 LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
967 size_t n_dbs = internal.size();
968 if (rare(n_dbs == 0))
969 no_subdatabases();
970 for (size_t i = 0; i < n_dbs; ++i) {
971 internal[i]->remove_synonym(term, synonym);
975 void
976 WritableDatabase::clear_synonyms(const std::string & term) const
978 LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
979 size_t n_dbs = internal.size();
980 if (rare(n_dbs == 0))
981 no_subdatabases();
982 for (size_t i = 0; i < n_dbs; ++i) {
983 internal[i]->clear_synonyms(term);
987 void
988 WritableDatabase::set_metadata(const string & key, const string & value)
990 LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
991 if (rare(key.empty()))
992 empty_metadata_key();
993 if (rare(internal.empty()))
994 no_subdatabases();
995 internal[0]->set_metadata(key, value);
998 string
999 WritableDatabase::get_description() const
1001 /// @todo display contents of the writable database
1002 return "WritableDatabase()";