1 /* omdatabase.cc: External interface for running queries
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2001,2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2013,2014,2016 Olly Betts
6 * Copyright 2006,2008 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
28 #include <xapian/constants.h>
29 #include <xapian/error.h>
30 #include <xapian/positioniterator.h>
31 #include <xapian/postingiterator.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/unicode.h>
37 #include "backends/alltermslist.h"
38 #include "backends/multi/multi_alltermslist.h"
39 #include "backends/multi/multi_postlist.h"
40 #include "backends/multi/multi_termlist.h"
41 #include "backends/multivaluelist.h"
42 #include "backends/database.h"
43 #include "editdistance.h"
44 #include "expand/ortermlist.h"
45 #include "internaltypes.h"
50 #include <cstdlib> // For abs().
55 using Xapian::Internal::intrusive_ptr
;
57 XAPIAN_NORETURN(static void docid_zero_invalid());
58 static void docid_zero_invalid()
60 throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
63 XAPIAN_NORETURN(static void no_subdatabases());
64 static void no_subdatabases()
66 throw Xapian::InvalidOperationError("No subdatabases");
69 XAPIAN_NORETURN(static void empty_metadata_key());
70 static void empty_metadata_key()
72 throw Xapian::InvalidArgumentError("Empty metadata keys are invalid");
76 sub_db(Xapian::docid did
, size_t n_dbs
)
78 return (did
- 1) % n_dbs
;
82 sub_docid(Xapian::docid did
, size_t n_dbs
)
84 return (did
- 1) / n_dbs
+ 1;
91 LOGCALL_CTOR(API
, "Database", NO_ARGS
);
94 Database::Database(Database::Internal
*internal_
)
96 LOGCALL_CTOR(API
, "Database", internal_
);
97 intrusive_ptr
<Database::Internal
> newi(internal_
);
98 internal
.push_back(newi
);
101 Database::Database(const Database
&other
)
103 LOGCALL_CTOR(API
, "Database", other
);
104 internal
= other
.internal
;
108 Database::operator=(const Database
&other
)
110 LOGCALL_VOID(API
, "Database::operator=", other
);
111 internal
= other
.internal
;
114 Database::~Database()
116 LOGCALL_DTOR(API
, "Database");
122 LOGCALL(API
, bool, "Database::reopen", NO_ARGS
);
123 bool maybe_changed
= false;
124 vector
<intrusive_ptr
<Database::Internal
> >::iterator i
;
125 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
127 maybe_changed
= true;
129 RETURN(maybe_changed
);
135 LOGCALL_VOID(API
, "Database::close", NO_ARGS
);
136 vector
<intrusive_ptr
<Database::Internal
> >::iterator i
;
137 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
143 Database::add_database(const Database
& database
)
145 LOGCALL_VOID(API
, "Database::add_database", database
);
146 if (this == &database
) {
147 LOGLINE(API
, "Database added to itself");
148 throw Xapian::InvalidArgumentError("Can't add a Database to itself");
150 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
151 for (i
= database
.internal
.begin(); i
!= database
.internal
.end(); ++i
) {
152 internal
.push_back(*i
);
157 Database::postlist_begin(const string
&tname
) const
159 LOGCALL(API
, PostingIterator
, "Database::postlist_begin", tname
);
161 // Don't bother checking that the term exists first. If it does, we
162 // just end up doing more work, and if it doesn't, we save very little
165 // Handle the common case of a single database specially.
166 if (internal
.size() == 1)
167 RETURN(PostingIterator(internal
[0]->open_post_list(tname
)));
169 if (rare(internal
.empty()))
170 RETURN(PostingIterator());
172 vector
<LeafPostList
*> pls
;
174 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
175 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
176 pls
.push_back((*i
)->open_post_list(tname
));
179 Assert(pls
.begin() != pls
.end());
181 vector
<LeafPostList
*>::iterator i
;
182 for (i
= pls
.begin(); i
!= pls
.end(); ++i
) {
189 RETURN(PostingIterator(new MultiPostList(pls
, *this)));
193 Database::termlist_begin(Xapian::docid did
) const
195 LOGCALL(API
, TermIterator
, "Database::termlist_begin", did
);
197 docid_zero_invalid();
199 unsigned int multiplier
= internal
.size();
200 if (rare(multiplier
== 0))
203 if (multiplier
== 1) {
204 // There's no need for the MultiTermList wrapper in the common case
205 // where we're only dealing with a single database.
206 tl
= internal
[0]->open_term_list(did
);
208 Assert(multiplier
!= 0);
209 Xapian::doccount n
= (did
- 1) % multiplier
; // which actual database
210 Xapian::docid m
= (did
- 1) / multiplier
+ 1; // real docid in that database
212 tl
= new MultiTermList(internal
[n
]->open_term_list(m
), *this, n
);
214 RETURN(TermIterator(tl
));
218 Database::allterms_begin(const std::string
& prefix
) const
220 LOGCALL(API
, TermIterator
, "Database::allterms_begin", NO_ARGS
);
222 if (rare(internal
.size() == 0)) {
224 } else if (internal
.size() == 1) {
225 tl
= internal
[0]->open_allterms(prefix
);
227 tl
= new MultiAllTermsList(internal
, prefix
);
229 RETURN(TermIterator(tl
));
233 Database::has_positions() const
235 LOGCALL(API
, bool, "Database::has_positions", NO_ARGS
);
236 // If any sub-database has positions, the combined database does.
237 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
238 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
239 if ((*i
)->has_positions()) RETURN(true);
245 Database::positionlist_begin(Xapian::docid did
, const string
&tname
) const
247 LOGCALL(API
, PositionIterator
, "Database::positionlist_begin", did
| tname
);
249 throw InvalidArgumentError("Zero length terms are invalid");
251 docid_zero_invalid();
253 unsigned int multiplier
= internal
.size();
254 if (rare(multiplier
== 0))
256 Xapian::doccount n
= (did
- 1) % multiplier
; // which actual database
257 Xapian::docid m
= (did
- 1) / multiplier
+ 1; // real docid in that database
258 RETURN(PositionIterator(internal
[n
]->open_position_list(m
, tname
)));
262 Database::get_doccount() const
264 LOGCALL(API
, Xapian::doccount
, "Database::get_doccount", NO_ARGS
);
265 Xapian::doccount docs
= 0;
266 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
267 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
268 docs
+= (*i
)->get_doccount();
274 Database::get_lastdocid() const
276 LOGCALL(API
, Xapian::docid
, "Database::get_lastdocid", NO_ARGS
);
277 Xapian::docid did
= 0;
279 unsigned int multiplier
= internal
.size();
280 for (Xapian::doccount i
= 0; i
< multiplier
; ++i
) {
281 Xapian::docid did_i
= internal
[i
]->get_lastdocid();
282 if (did_i
) did
= std::max(did
, (did_i
- 1) * multiplier
+ i
+ 1);
288 Database::get_avlength() const
290 LOGCALL(API
, Xapian::doclength
, "Database::get_avlength", NO_ARGS
);
291 Xapian::doccount docs
= 0;
294 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
295 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
296 docs
+= (*i
)->get_doccount();
297 totlen
+= (*i
)->get_total_length();
299 LOGLINE(UNKNOWN
, "get_avlength() = " << totlen
<< " / " << docs
<<
300 " (from " << internal
.size() << " dbs)");
302 if (docs
== 0) RETURN(0.0);
303 RETURN(totlen
/ double(docs
));
307 Database::get_termfreq(const string
& tname
) const
309 LOGCALL(API
, Xapian::doccount
, "Database::get_termfreq", tname
);
310 if (tname
.empty()) RETURN(get_doccount());
312 Xapian::doccount tf
= 0;
313 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
314 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
315 Xapian::doccount sub_tf
;
316 (*i
)->get_freqs(tname
, &sub_tf
, NULL
);
323 Database::get_collection_freq(const string
& tname
) const
325 LOGCALL(API
, Xapian::termcount
, "Database::get_collection_freq", tname
);
326 if (tname
.empty()) RETURN(get_doccount());
328 Xapian::termcount cf
= 0;
329 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
330 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
331 Xapian::termcount sub_cf
;
332 (*i
)->get_freqs(tname
, NULL
, &sub_cf
);
339 Database::get_value_freq(Xapian::valueno slot
) const
341 LOGCALL(API
, Xapian::doccount
, "Database::get_value_freq", slot
);
343 Xapian::doccount vf
= 0;
344 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
345 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
346 vf
+= (*i
)->get_value_freq(slot
);
352 Database::get_value_lower_bound(Xapian::valueno slot
) const
354 LOGCALL(API
, string
, "Database::get_value_lower_bound", slot
);
356 if (rare(internal
.empty())) RETURN(string());
358 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
359 i
= internal
.begin();
360 string full_lb
= (*i
)->get_value_lower_bound(slot
);
361 while (++i
!= internal
.end()) {
362 string lb
= (*i
)->get_value_lower_bound(slot
);
363 if (lb
< full_lb
) full_lb
= lb
;
369 Database::get_value_upper_bound(Xapian::valueno slot
) const
371 LOGCALL(API
, std::string
, "Database::get_value_upper_bound", slot
);
374 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
375 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
376 std::string ub
= (*i
)->get_value_upper_bound(slot
);
384 Database::get_doclength_lower_bound() const
386 LOGCALL(API
, Xapian::termcount
, "Database::get_doclength_lower_bound", NO_ARGS
);
388 if (rare(internal
.empty())) RETURN(0);
390 Xapian::termcount full_lb
= 0;
391 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
392 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
393 // Skip sub-databases which are empty or only contain documents with
395 if ((*i
)->get_total_length() != 0) {
396 Xapian::termcount lb
= (*i
)->get_doclength_lower_bound();
397 if (full_lb
== 0 || lb
< full_lb
) full_lb
= lb
;
404 Database::get_doclength_upper_bound() const
406 LOGCALL(API
, Xapian::termcount
, "Database::get_doclength_upper_bound", NO_ARGS
);
408 Xapian::termcount full_ub
= 0;
409 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
410 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
411 Xapian::termcount ub
= (*i
)->get_doclength_upper_bound();
412 if (ub
> full_ub
) full_ub
= ub
;
418 Database::get_wdf_upper_bound(const string
& term
) const
420 LOGCALL(API
, Xapian::termcount
, "Database::get_wdf_upper_bound", term
);
421 if (term
.empty()) RETURN(0);
423 Xapian::termcount full_ub
= 0;
424 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
425 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
426 Xapian::termcount ub
= (*i
)->get_wdf_upper_bound(term
);
427 if (ub
> full_ub
) full_ub
= ub
;
433 Database::valuestream_begin(Xapian::valueno slot
) const
435 LOGCALL(API
, ValueIterator
, "Database::valuestream_begin", slot
);
436 if (internal
.size() == 0)
437 RETURN(ValueIterator());
438 if (internal
.size() != 1)
439 RETURN(ValueIterator(new MultiValueList(internal
, slot
)));
440 RETURN(ValueIterator(internal
[0]->open_value_list(slot
)));
444 Database::get_doclength(Xapian::docid did
) const
446 LOGCALL(API
, Xapian::termcount
, "Database::get_doclength", did
);
448 docid_zero_invalid();
450 unsigned int multiplier
= internal
.size();
451 if (rare(multiplier
== 0))
453 Xapian::doccount n
= (did
- 1) % multiplier
; // which actual database
454 Xapian::docid m
= (did
- 1) / multiplier
+ 1; // real docid in that database
455 RETURN(internal
[n
]->get_doclength(m
));
459 Database::get_unique_terms(Xapian::docid did
) const
461 LOGCALL(API
, Xapian::termcount
, "Database::get_unique_terms", did
);
463 docid_zero_invalid();
464 unsigned int multiplier
= internal
.size();
465 if (rare(multiplier
== 0))
467 Xapian::doccount n
= (did
- 1) % multiplier
; // which actual database
468 Xapian::docid m
= (did
- 1) / multiplier
+ 1; // real docid in that database
469 RETURN(internal
[n
]->get_unique_terms(m
));
473 Database::get_document(Xapian::docid did
) const
475 LOGCALL(API
, Document
, "Database::get_document", did
);
477 docid_zero_invalid();
479 unsigned int multiplier
= internal
.size();
480 if (rare(multiplier
== 0))
482 Xapian::doccount n
= (did
- 1) % multiplier
; // which actual database
483 Xapian::docid m
= (did
- 1) / multiplier
+ 1; // real docid in that database
485 // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
486 RETURN(Document(internal
[n
]->open_document(m
, false)));
490 Database::get_document(Xapian::docid did
, unsigned flags
) const
492 LOGCALL(API
, Document
, "Database::get_document", did
|flags
);
494 docid_zero_invalid();
496 unsigned int multiplier
= internal
.size();
497 if (rare(multiplier
== 0))
499 Xapian::doccount n
= (did
- 1) % multiplier
; // which actual database
500 Xapian::docid m
= (did
- 1) / multiplier
+ 1; // real docid in that database
502 bool assume_valid
= flags
& Xapian::DOC_ASSUME_VALID
;
503 RETURN(Document(internal
[n
]->open_document(m
, assume_valid
)));
507 Database::term_exists(const string
& tname
) const
509 LOGCALL(API
, bool, "Database::term_exists", tname
);
511 RETURN(get_doccount() != 0);
513 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
514 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
515 if ((*i
)->term_exists(tname
)) RETURN(true);
521 Database::keep_alive()
523 LOGCALL_VOID(API
, "Database::keep_alive", NO_ARGS
);
524 vector
<intrusive_ptr
<Database::Internal
> >::const_iterator i
;
525 for (i
= internal
.begin(); i
!= internal
.end(); ++i
) {
531 Database::get_description() const
533 /// @todo display contents of the database
537 // We sum the character frequency histogram absolute differences to compute a
538 // lower bound on the edit distance. Rather than counting each Unicode code
539 // point uniquely, we use an array with VEC_SIZE elements and tally code points
540 // modulo VEC_SIZE which can only reduce the bound we calculate.
542 // There will be a trade-off between how good the bound is and how large and
543 // array is used (a larger array takes more time to clear and sum over). The
544 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
545 // but that may not reflect real world performance. FIXME: profile and tune.
550 freq_edit_lower_bound(const vector
<unsigned> & a
, const vector
<unsigned> & b
)
553 memset(vec
, 0, sizeof(vec
));
554 vector
<unsigned>::const_iterator i
;
555 for (i
= a
.begin(); i
!= a
.end(); ++i
) {
556 ++vec
[(*i
) % VEC_SIZE
];
558 for (i
= b
.begin(); i
!= b
.end(); ++i
) {
559 --vec
[(*i
) % VEC_SIZE
];
561 unsigned int total
= 0;
562 for (size_t j
= 0; j
< VEC_SIZE
; ++j
) {
563 total
+= abs(vec
[j
]);
565 // Each insertion or deletion adds at most 1 to total. Each transposition
566 // doesn't change it at all. But each substitution can change it by 2 so
567 // we need to divide it by 2. Rounding up is OK, since the odd change must
568 // be due to an actual edit.
569 return (total
+ 1) / 2;
572 // Word must have a trigram score at least this close to the best score seen
574 #define TRIGRAM_SCORE_THRESHOLD 2
577 Database::get_spelling_suggestion(const string
&word
,
578 unsigned max_edit_distance
) const
580 LOGCALL(API
, string
, "Database::get_spelling_suggestion", word
| max_edit_distance
);
581 if (word
.size() <= 1) return string();
582 AutoPtr
<TermList
> merger
;
583 for (size_t i
= 0; i
< internal
.size(); ++i
) {
584 TermList
* tl
= internal
[i
]->open_spelling_termlist(word
);
585 LOGLINE(SPELLING
, "Sub db " << i
<< " tl = " << (void*)tl
);
588 merger
.reset(new OrTermList(merger
.release(), tl
));
594 if (!merger
.get()) RETURN(string());
596 // Convert word to UTF-32.
597 // Extra brackets needed to avoid this being misparsed as a function
599 vector
<unsigned> utf32_word((Utf8Iterator(word
)), Utf8Iterator());
601 vector
<unsigned> utf32_term
;
603 Xapian::termcount best
= 1;
605 int edist_best
= max_edit_distance
;
606 Xapian::doccount freq_best
= 0;
607 Xapian::doccount freq_exact
= 0;
609 TermList
*ret
= merger
->next();
610 if (ret
) merger
.reset(ret
);
612 if (merger
->at_end()) break;
614 string term
= merger
->get_termname();
615 Xapian::termcount score
= merger
->get_wdf();
617 LOGLINE(SPELLING
, "Term \"" << term
<< "\" ngram score " << score
);
618 if (score
+ TRIGRAM_SCORE_THRESHOLD
>= best
) {
619 if (score
> best
) best
= score
;
621 // There's no point considering a word where the difference
622 // in length is greater than the smallest number of edits we've
625 // First check the length of the encoded UTF-8 version of term.
626 // Each UTF-32 character is 1-4 bytes in UTF-8.
627 if (abs(long(term
.size()) - long(word
.size())) > edist_best
* 4) {
628 LOGLINE(SPELLING
, "Lengths much too different");
632 // Now convert to UTF-32, and compare the true lengths more
634 utf32_term
.assign(Utf8Iterator(term
), Utf8Iterator());
636 if (abs(long(utf32_term
.size()) - long(utf32_word
.size()))
638 LOGLINE(SPELLING
, "Lengths too different");
642 if (freq_edit_lower_bound(utf32_term
, utf32_word
) > edist_best
) {
643 LOGLINE(SPELLING
, "Rejected by character frequency test");
647 int edist
= edit_distance_unsigned(&utf32_term
[0],
648 int(utf32_term
.size()),
650 int(utf32_word
.size()),
652 LOGLINE(SPELLING
, "Edit distance " << edist
);
654 if (edist
<= edist_best
) {
655 Xapian::doccount freq
= 0;
656 for (size_t j
= 0; j
< internal
.size(); ++j
)
657 freq
+= internal
[j
]->get_spelling_frequency(term
);
659 LOGLINE(SPELLING
, "Freq " << freq
<< " best " << freq_best
);
660 // Even if we have an exact match, there may be a much more
661 // frequent potential correction which will still be
668 if (edist
< edist_best
|| freq
> freq_best
) {
669 LOGLINE(SPELLING
, "Best so far: \"" << term
<<
670 "\" edist " << edist
<< " freq " << freq
);
678 if (freq_best
< freq_exact
)
684 Database::spellings_begin() const
686 LOGCALL(API
, TermIterator
, "Database::spellings_begin", NO_ARGS
);
687 AutoPtr
<TermList
> merger
;
688 for (size_t i
= 0; i
< internal
.size(); ++i
) {
689 TermList
* tl
= internal
[i
]->open_spelling_wordlist();
692 merger
.reset(new FreqAdderOrTermList(merger
.release(), tl
));
698 RETURN(TermIterator(merger
.release()));
702 Database::synonyms_begin(const std::string
&term
) const
704 LOGCALL(API
, TermIterator
, "Database::synonyms_begin", term
);
705 AutoPtr
<TermList
> merger
;
706 for (size_t i
= 0; i
< internal
.size(); ++i
) {
707 TermList
* tl
= internal
[i
]->open_synonym_termlist(term
);
710 merger
.reset(new OrTermList(merger
.release(), tl
));
716 RETURN(TermIterator(merger
.release()));
720 Database::synonym_keys_begin(const std::string
&prefix
) const
722 LOGCALL(API
, TermIterator
, "Database::synonym_keys_begin", prefix
);
723 AutoPtr
<TermList
> merger
;
724 for (size_t i
= 0; i
< internal
.size(); ++i
) {
725 TermList
* tl
= internal
[i
]->open_synonym_keylist(prefix
);
728 merger
.reset(new OrTermList(merger
.release(), tl
));
734 RETURN(TermIterator(merger
.release()));
738 Database::get_metadata(const string
& key
) const
740 LOGCALL(API
, string
, "Database::get_metadata", key
);
741 if (rare(key
.empty()))
742 empty_metadata_key();
743 if (internal
.empty()) RETURN(std::string());
744 RETURN(internal
[0]->get_metadata(key
));
748 Database::metadata_keys_begin(const std::string
&prefix
) const
750 LOGCALL(API
, Xapian::TermIterator
, "Database::metadata_keys_begin", NO_ARGS
);
751 if (internal
.empty()) RETURN(TermIterator());
752 RETURN(TermIterator(internal
[0]->open_metadata_keylist(prefix
)));
756 Database::get_uuid() const
758 LOGCALL(API
, std::string
, "Database::get_uuid", NO_ARGS
);
760 for (size_t i
= 0; i
< internal
.size(); ++i
) {
761 string sub_uuid
= internal
[i
]->get_uuid();
762 // If any of the sub-databases have no uuid, we can't make a uuid for
763 // the combined database.
764 if (sub_uuid
.empty())
766 if (!uuid
.empty()) uuid
+= ':';
773 Database::get_revision() const
775 LOGCALL(API
, Xapian::rev
, "Database::get_revision", NO_ARGS
);
776 size_t n_dbs
= internal
.size();
777 if (rare(n_dbs
!= 1))
778 throw Xapian::InvalidOperationError("Database::get_revision() requires "
779 "exactly one subdatabase");
780 const string
& s
= internal
[0]->get_revision_info();
781 const char* p
= s
.data();
782 const char* end
= p
+ s
.size();
783 Xapian::rev revision
;
784 if (!unpack_uint(&p
, end
, &revision
))
785 throw Xapian::UnimplementedError("Database::get_revision() only "
786 "supported for chert and glass");
790 ///////////////////////////////////////////////////////////////////////////
792 WritableDatabase::WritableDatabase() : Database()
794 LOGCALL_CTOR(API
, "WritableDatabase", NO_ARGS
);
797 WritableDatabase::WritableDatabase(Database::Internal
*internal_
)
798 : Database(internal_
)
800 LOGCALL_CTOR(API
, "WritableDatabase", internal_
);
803 WritableDatabase::WritableDatabase(const WritableDatabase
&other
)
806 LOGCALL_CTOR(API
, "WritableDatabase", other
);
810 WritableDatabase::operator=(const WritableDatabase
&other
)
812 LOGCALL_VOID(API
, "WritableDatabase::operator=", other
);
813 Database::operator=(other
);
816 WritableDatabase::~WritableDatabase()
818 LOGCALL_DTOR(API
, "WritableDatabase");
822 WritableDatabase::commit()
824 LOGCALL_VOID(API
, "WritableDatabase::commit", NO_ARGS
);
825 size_t n_dbs
= internal
.size();
826 if (rare(n_dbs
== 0))
828 for (size_t i
= 0; i
!= n_dbs
; ++i
)
829 internal
[i
]->commit();
833 WritableDatabase::begin_transaction(bool flushed
)
835 LOGCALL_VOID(API
, "WritableDatabase::begin_transaction", flushed
);
836 size_t n_dbs
= internal
.size();
837 if (rare(n_dbs
== 0))
839 for (size_t i
= 0; i
!= n_dbs
; ++i
)
840 internal
[i
]->begin_transaction(flushed
);
844 WritableDatabase::commit_transaction()
846 LOGCALL_VOID(API
, "WritableDatabase::commit_transaction", NO_ARGS
);
847 size_t n_dbs
= internal
.size();
848 if (rare(n_dbs
== 0))
850 for (size_t i
= 0; i
!= n_dbs
; ++i
)
851 internal
[i
]->commit_transaction();
855 WritableDatabase::cancel_transaction()
857 LOGCALL_VOID(API
, "WritableDatabase::cancel_transaction", NO_ARGS
);
858 size_t n_dbs
= internal
.size();
859 if (rare(n_dbs
== 0))
861 for (size_t i
= 0; i
!= n_dbs
; ++i
)
862 internal
[i
]->cancel_transaction();
867 WritableDatabase::add_document(const Document
& document
)
869 LOGCALL(API
, Xapian::docid
, "WritableDatabase::add_document", document
);
870 size_t n_dbs
= internal
.size();
871 if (rare(n_dbs
== 0))
874 RETURN(internal
[0]->add_document(document
));
876 // Which database will the next never used docid be in?
877 Xapian::docid did
= get_lastdocid() + 1;
878 if (rare(did
== 0)) {
879 throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
881 // We want exactly did to be used, not a lower docid if that subdb isn't
882 // using the docid before it, so call replace_document() not
884 size_t i
= sub_db(did
, n_dbs
);
885 internal
[i
]->replace_document(sub_docid(did
, n_dbs
), document
);
890 WritableDatabase::delete_document(Xapian::docid did
)
892 LOGCALL_VOID(API
, "WritableDatabase::delete_document", did
);
894 docid_zero_invalid();
896 size_t n_dbs
= internal
.size();
897 if (rare(n_dbs
== 0))
899 size_t i
= sub_db(did
, n_dbs
);
900 internal
[i
]->delete_document(sub_docid(did
, n_dbs
));
904 WritableDatabase::delete_document(const std::string
& unique_term
)
906 LOGCALL_VOID(API
, "WritableDatabase::delete_document", unique_term
);
907 if (unique_term
.empty())
908 throw InvalidArgumentError("Empty termnames are invalid");
909 size_t n_dbs
= internal
.size();
910 if (rare(n_dbs
== 0))
912 for (size_t i
= 0; i
!= n_dbs
; ++i
)
913 internal
[i
]->delete_document(unique_term
);
917 WritableDatabase::replace_document(Xapian::docid did
, const Document
& document
)
919 LOGCALL_VOID(API
, "WritableDatabase::replace_document", did
| document
);
921 docid_zero_invalid();
922 size_t n_dbs
= internal
.size();
923 if (rare(n_dbs
== 0))
925 size_t i
= sub_db(did
, n_dbs
);
926 internal
[i
]->replace_document(sub_docid(did
, n_dbs
), document
);
930 WritableDatabase::replace_document(const std::string
& unique_term
,
931 const Document
& document
)
933 LOGCALL(API
, Xapian::docid
, "WritableDatabase::replace_document", unique_term
| document
);
934 if (unique_term
.empty())
935 throw InvalidArgumentError("Empty termnames are invalid");
936 size_t n_dbs
= internal
.size();
937 if (rare(n_dbs
== 0))
940 RETURN(internal
[0]->replace_document(unique_term
, document
));
942 Xapian::PostingIterator postit
= postlist_begin(unique_term
);
943 // If no unique_term in the database, this is just an add_document().
944 if (postit
== postlist_end(unique_term
)) {
945 // Which database will the next never used docid be in?
946 size_t i
= sub_db(get_lastdocid() + 1, n_dbs
);
947 RETURN(internal
[i
]->add_document(document
));
950 Xapian::docid retval
= *postit
;
951 size_t i
= sub_db(retval
, n_dbs
);
952 internal
[i
]->replace_document(sub_docid(retval
, n_dbs
), document
);
954 // Delete any other occurrences of unique_term.
955 while (++postit
!= postlist_end(unique_term
)) {
956 Xapian::docid did
= *postit
;
957 i
= sub_db(did
, n_dbs
);
958 internal
[i
]->delete_document(sub_docid(did
, n_dbs
));
965 WritableDatabase::add_spelling(const std::string
& word
,
966 Xapian::termcount freqinc
) const
968 LOGCALL_VOID(API
, "WritableDatabase::add_spelling", word
| freqinc
);
969 if (rare(internal
.empty()))
971 // FIXME: Is adding to the first subdatabase sensible?
972 internal
[0]->add_spelling(word
, freqinc
);
976 WritableDatabase::remove_spelling(const std::string
& word
,
977 Xapian::termcount freqdec
) const
979 LOGCALL_VOID(API
, "WritableDatabase::remove_spelling", word
| freqdec
);
980 size_t n_dbs
= internal
.size();
981 if (rare(n_dbs
== 0))
983 for (size_t i
= 0; i
< n_dbs
; ++i
) {
984 internal
[i
]->remove_spelling(word
, freqdec
);
989 WritableDatabase::add_synonym(const std::string
& term
,
990 const std::string
& synonym
) const
992 LOGCALL_VOID(API
, "WritableDatabase::add_synonym", term
| synonym
);
993 if (rare(internal
.empty()))
995 // FIXME: Is adding to the first subdatabase sensible?
996 internal
[0]->add_synonym(term
, synonym
);
1000 WritableDatabase::remove_synonym(const std::string
& term
,
1001 const std::string
& synonym
) const
1003 LOGCALL_VOID(API
, "WritableDatabase::remove_synonym", term
| synonym
);
1004 size_t n_dbs
= internal
.size();
1005 if (rare(n_dbs
== 0))
1007 for (size_t i
= 0; i
< n_dbs
; ++i
) {
1008 internal
[i
]->remove_synonym(term
, synonym
);
1013 WritableDatabase::clear_synonyms(const std::string
& term
) const
1015 LOGCALL_VOID(API
, "WritableDatabase::clear_synonyms", term
);
1016 size_t n_dbs
= internal
.size();
1017 if (rare(n_dbs
== 0))
1019 for (size_t i
= 0; i
< n_dbs
; ++i
) {
1020 internal
[i
]->clear_synonyms(term
);
1025 WritableDatabase::set_metadata(const string
& key
, const string
& value
)
1027 LOGCALL_VOID(API
, "WritableDatabase::set_metadata", key
| value
);
1028 if (rare(key
.empty()))
1029 empty_metadata_key();
1030 if (rare(internal
.empty()))
1032 internal
[0]->set_metadata(key
, value
);
1036 WritableDatabase::get_description() const
1038 /// @todo display contents of the writable database
1039 return "WritableDatabase()";