Reduce overheads of PostList positional data support
[xapian.git] / xapian-core / backends / inmemory / inmemory_database.cc
blobc1c6f02ec561fb74224ee58b08e5852c713589ee
1 /* inmemory_database.cc
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2014,2017 Olly Betts
6 * Copyright 2006,2009 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "inmemory_database.h"
28 #include "debuglog.h"
30 #include "expand/expandweight.h"
31 #include "inmemory_document.h"
32 #include "inmemory_alltermslist.h"
33 #include "str.h"
34 #include "backends/valuestats.h"
36 #include <algorithm>
37 #include <string>
38 #include <vector>
39 #include <map>
41 #include <xapian/error.h>
42 #include <xapian/valueiterator.h>
44 using std::make_pair;
45 using Xapian::Internal::intrusive_ptr;
47 inline void
48 InMemoryTerm::add_posting(InMemoryPosting&& post)
50 // Add document to right place in list
51 vector<InMemoryPosting>::iterator p;
52 p = lower_bound(docs.begin(), docs.end(),
53 post, InMemoryPostingLessThan());
54 if (p == docs.end() || InMemoryPostingLessThan()(post, *p)) {
55 docs.insert(p, std::move(post));
56 } else if (!p->valid) {
57 *p = std::move(post);
58 } else {
59 (*p).merge(post);
63 inline void
64 InMemoryDoc::add_posting(InMemoryTermEntry&& post)
66 // Add document to right place in list
67 vector<InMemoryTermEntry>::iterator p;
68 p = lower_bound(terms.begin(), terms.end(),
69 post, InMemoryTermEntryLessThan());
70 if (p == terms.end() || InMemoryTermEntryLessThan()(post, *p)) {
71 terms.insert(p, std::move(post));
72 } else {
73 (*p).merge(post);
77 //////////////
78 // Postlist //
79 //////////////
81 InMemoryPostList::InMemoryPostList(intrusive_ptr<const InMemoryDatabase> db_,
82 const InMemoryTerm & imterm,
83 const std::string & term_)
84 : LeafPostList(term_),
85 pos(imterm.docs.begin()),
86 end(imterm.docs.end()),
87 termfreq(imterm.term_freq),
88 started(false),
89 db(db_)
91 while (pos != end && !pos->valid) ++pos;
94 Xapian::doccount
95 InMemoryPostList::get_termfreq() const
97 return termfreq;
100 Xapian::docid
101 InMemoryPostList::get_docid() const
103 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
104 Assert(started);
105 Assert(!at_end());
106 return (*pos).did;
109 PostList *
110 InMemoryPostList::next(double /*w_min*/)
112 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
113 if (started) {
114 Assert(!at_end());
115 ++pos;
116 while (pos != end && !pos->valid) ++pos;
117 } else {
118 started = true;
120 return NULL;
123 PostList *
124 InMemoryPostList::skip_to(Xapian::docid did, double w_min)
126 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
127 // FIXME - see if we can make more efficient, perhaps using better
128 // data structure. Note, though, that a binary search of
129 // the remaining list may NOT be a good idea (search time is then
130 // O(log {length of list}), as opposed to O(distance we want to skip)
131 // Since we will frequently only be skipping a short distance, this
132 // could well be worse.
134 // If we've not started, it's OK to call skip_to().
135 Assert(!at_end() || !started);
136 started = true;
137 while (!at_end() && (*pos).did < did) {
138 (void) next(w_min);
140 return NULL;
143 bool
144 InMemoryPostList::at_end() const
146 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
147 return (pos == end);
150 string
151 InMemoryPostList::get_description() const
153 return "InMemoryPostList " + str(termfreq);
156 PositionList *
157 InMemoryPostList::read_position_list()
159 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
160 mypositions.assign(pos->positions.copy());
161 return &mypositions;
164 PositionList *
165 InMemoryPostList::open_position_list() const
167 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
168 return new InMemoryPositionList(pos->positions.copy());
171 Xapian::termcount
172 InMemoryPostList::get_wdf() const
174 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
175 return (*pos).wdf;
178 //////////////
179 // Termlist //
180 //////////////
182 InMemoryTermList::InMemoryTermList(intrusive_ptr<const InMemoryDatabase> db_,
183 Xapian::docid did_,
184 const InMemoryDoc & doc,
185 Xapian::termcount len)
186 : pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
187 started(false), db(db_), did(did_), document_length(len)
189 LOGLINE(DB, "InMemoryTermList::InMemoryTermList(): " <<
190 terms << " terms starting from " << pos->tname);
193 Xapian::termcount
194 InMemoryTermList::get_wdf() const
196 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
197 Assert(started);
198 Assert(!at_end());
199 return (*pos).wdf;
202 Xapian::doccount
203 InMemoryTermList::get_termfreq() const
205 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
206 Assert(started);
207 Assert(!at_end());
209 Xapian::doccount tf;
210 db->get_freqs((*pos).tname, &tf, NULL);
211 return tf;
214 Xapian::termcount
215 InMemoryTermList::get_approx_size() const
217 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
218 return terms;
221 void
222 InMemoryTermList::accumulate_stats(Xapian::Internal::ExpandStats & stats) const
224 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
225 Assert(started);
226 Assert(!at_end());
227 stats.accumulate(InMemoryTermList::get_wdf(), document_length,
228 InMemoryTermList::get_termfreq(),
229 db->get_doccount());
232 string
233 InMemoryTermList::get_termname() const
235 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
236 Assert(started);
237 Assert(!at_end());
238 return (*pos).tname;
241 TermList *
242 InMemoryTermList::next()
244 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
245 if (started) {
246 Assert(!at_end());
247 ++pos;
248 } else {
249 started = true;
251 return NULL;
254 TermList *
255 InMemoryTermList::skip_to(const string & term)
257 if (rare(db->is_closed()))
258 InMemoryDatabase::throw_database_closed();
260 while (pos != end && pos->tname < term) {
261 ++pos;
264 started = true;
265 return NULL;
268 bool
269 InMemoryTermList::at_end() const
271 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
272 Assert(started);
273 return (pos == end);
276 Xapian::termcount
277 InMemoryTermList::positionlist_count() const
279 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
280 return db->positionlist_count(did, (*pos).tname);
283 PositionList*
284 InMemoryTermList::positionlist_begin() const
286 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
287 return db->open_position_list(did, (*pos).tname);
290 /////////////////////////////
291 // InMemoryAllDocsPostList //
292 /////////////////////////////
294 InMemoryAllDocsPostList::InMemoryAllDocsPostList(intrusive_ptr<const InMemoryDatabase> db_)
295 : LeafPostList(std::string()), did(0), db(db_)
299 Xapian::doccount
300 InMemoryAllDocsPostList::get_termfreq() const
302 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
303 return db->totdocs;
306 Xapian::docid
307 InMemoryAllDocsPostList::get_docid() const
309 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
310 Assert(did > 0);
311 Assert(did <= db->termlists.size());
312 Assert(db->termlists[did - 1].is_valid);
313 return did;
316 Xapian::termcount
317 InMemoryAllDocsPostList::get_wdf() const
319 return 1;
322 PositionList *
323 InMemoryAllDocsPostList::read_position_list()
325 throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
328 PositionList *
329 InMemoryAllDocsPostList::open_position_list() const
331 throw Xapian::UnimplementedError("Can't open position list for all docs iterator");
334 PostList *
335 InMemoryAllDocsPostList::next(double /*w_min*/)
337 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
338 Assert(!at_end());
339 do {
340 ++did;
341 } while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid);
342 return NULL;
345 PostList *
346 InMemoryAllDocsPostList::skip_to(Xapian::docid did_, double /*w_min*/)
348 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
349 Assert(!at_end());
350 if (did <= did_) {
351 did = did_;
352 while (did <= db->termlists.size() && !db->termlists[did - 1].is_valid) {
353 ++did;
356 return NULL;
359 bool
360 InMemoryAllDocsPostList::at_end() const
362 if (db->is_closed()) InMemoryDatabase::throw_database_closed();
363 return (did > db->termlists.size());
366 string
367 InMemoryAllDocsPostList::get_description() const
369 return "InMemoryAllDocsPostList " + str(did);
372 ///////////////////////////
373 // Actual database class //
374 ///////////////////////////
376 // Updates are applied immediately so we can't support transactions.
377 InMemoryDatabase::InMemoryDatabase()
378 : Xapian::Database::Internal(TRANSACTION_UNIMPLEMENTED),
379 totdocs(0), totlen(0), positions_present(false), closed(false)
381 // We keep an empty entry in postlists for convenience of implementing
382 // allterms iteration and returning a PostList for an absent term.
383 postlists.insert(make_pair(string(), InMemoryTerm()));
386 InMemoryDatabase::~InMemoryDatabase()
388 dtor_called();
391 bool
392 InMemoryDatabase::reopen()
394 if (closed) InMemoryDatabase::throw_database_closed();
395 return false;
398 void
399 InMemoryDatabase::close()
401 // Free all the resources, and mark the db as closed.
402 postlists.clear();
403 termlists.clear();
404 doclists.clear();
405 valuelists.clear();
406 valuestats.clear();
407 doclengths.clear();
408 metadata.clear();
409 closed = true;
412 PostList*
413 InMemoryDatabase::open_post_list(const string& term) const
415 return InMemoryDatabase::open_leaf_post_list(term, false);
418 LeafPostList*
419 InMemoryDatabase::open_leaf_post_list(const string& term, bool need_pos) const
421 (void)need_pos;
422 if (closed) InMemoryDatabase::throw_database_closed();
423 if (term.empty()) {
424 Assert(!need_pos);
425 intrusive_ptr<const InMemoryDatabase> ptrtothis(this);
426 return new InMemoryAllDocsPostList(ptrtothis);
428 map<string, InMemoryTerm>::const_iterator i = postlists.find(term);
429 if (i == postlists.end() || i->second.term_freq == 0) {
430 i = postlists.begin();
431 // Check that our dummy entry for string() is present.
432 Assert(i->first.empty());
434 intrusive_ptr<const InMemoryDatabase> ptrtothis(this);
435 return new InMemoryPostList(ptrtothis, i->second, term);
438 bool
439 InMemoryDatabase::doc_exists(Xapian::docid did) const
441 if (closed) InMemoryDatabase::throw_database_closed();
442 return (did > 0 && did <= termlists.size() && termlists[did - 1].is_valid);
445 void
446 InMemoryDatabase::get_freqs(const string & term,
447 Xapian::doccount * termfreq_ptr,
448 Xapian::termcount * collfreq_ptr) const
450 if (closed) InMemoryDatabase::throw_database_closed();
451 map<string, InMemoryTerm>::const_iterator i = postlists.find(term);
452 if (i != postlists.end()) {
453 if (termfreq_ptr)
454 *termfreq_ptr = i->second.term_freq;
455 if (collfreq_ptr)
456 *collfreq_ptr = i->second.collection_freq;
457 } else {
458 if (termfreq_ptr)
459 *termfreq_ptr = 0;
460 if (collfreq_ptr)
461 *collfreq_ptr = 0;
465 Xapian::doccount
466 InMemoryDatabase::get_value_freq(Xapian::valueno slot) const
468 if (closed) InMemoryDatabase::throw_database_closed();
469 map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
470 if (i == valuestats.end()) return 0;
471 return i->second.freq;
474 std::string
475 InMemoryDatabase::get_value_lower_bound(Xapian::valueno slot) const
477 if (closed) InMemoryDatabase::throw_database_closed();
478 map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
479 if (i == valuestats.end()) return string();
480 return i->second.lower_bound;
483 std::string
484 InMemoryDatabase::get_value_upper_bound(Xapian::valueno slot) const
486 if (closed) InMemoryDatabase::throw_database_closed();
487 map<Xapian::valueno, ValueStats>::const_iterator i = valuestats.find(slot);
488 if (i == valuestats.end()) return string();
489 return i->second.upper_bound;
492 Xapian::doccount
493 InMemoryDatabase::get_doccount() const
495 if (closed) InMemoryDatabase::throw_database_closed();
496 return totdocs;
499 Xapian::docid
500 InMemoryDatabase::get_lastdocid() const
502 if (closed) InMemoryDatabase::throw_database_closed();
503 return termlists.size();
506 Xapian::totallength
507 InMemoryDatabase::get_total_length() const
509 return totlen;
512 Xapian::termcount
513 InMemoryDatabase::get_doclength(Xapian::docid did) const
515 if (closed) InMemoryDatabase::throw_database_closed();
516 if (!doc_exists(did)) {
517 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
518 string(" not found"));
520 return doclengths[did - 1];
523 Xapian::termcount
524 InMemoryDatabase::get_unique_terms(Xapian::docid did) const
526 if (closed) InMemoryDatabase::throw_database_closed();
527 if (did == 0 || did > termlists.size() || !termlists[did - 1].is_valid)
528 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
529 string(" not found"));
530 // get_unique_terms() really ought to only count terms with wdf > 0, but
531 // that's expensive to calculate on demand, so for now let's just ensure
532 // unique_terms <= doclen.
533 Xapian::termcount terms = termlists[did - 1].terms.size();
534 return std::min(terms, Xapian::termcount(doclengths[did - 1]));
537 TermList *
538 InMemoryDatabase::open_term_list(Xapian::docid did) const
540 if (closed) InMemoryDatabase::throw_database_closed();
541 Assert(did != 0);
542 if (!doc_exists(did)) {
543 // FIXME: the docid in this message will be local, not global
544 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
545 string(" not found"));
547 return new InMemoryTermList(intrusive_ptr<const InMemoryDatabase>(this), did,
548 termlists[did - 1], doclengths[did - 1]);
551 TermList *
552 InMemoryDatabase::open_term_list_direct(Xapian::docid did) const
554 return InMemoryDatabase::open_term_list(did);
557 Xapian::Document::Internal *
558 InMemoryDatabase::open_document(Xapian::docid did, bool lazy) const
560 if (closed) InMemoryDatabase::throw_database_closed();
561 Assert(did != 0);
562 if (!lazy && !doc_exists(did)) {
563 // FIXME: the docid in this message will be local, not global
564 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
565 string(" not found"));
567 return new InMemoryDocument(this, did);
570 std::string
571 InMemoryDatabase::get_metadata(const std::string & key) const
573 if (closed) InMemoryDatabase::throw_database_closed();
574 map<string, string>::const_iterator i = metadata.find(key);
575 if (i == metadata.end())
576 return string();
577 return i->second;
580 TermList *
581 InMemoryDatabase::open_metadata_keylist(const string &) const
583 if (closed) InMemoryDatabase::throw_database_closed();
584 if (metadata.empty()) return NULL;
585 // FIXME: nobody implemented this yet...
586 throw Xapian::UnimplementedError("InMemory backend doesn't currently implement Database::metadata_keys_begin()");
589 void
590 InMemoryDatabase::set_metadata(const std::string & key,
591 const std::string & value)
593 if (closed) InMemoryDatabase::throw_database_closed();
594 if (!value.empty()) {
595 metadata[key] = value;
596 } else {
597 metadata.erase(key);
601 Xapian::termcount
602 InMemoryDatabase::positionlist_count(Xapian::docid did,
603 const string & tname) const
605 if (closed) InMemoryDatabase::throw_database_closed();
606 if (!doc_exists(did)) {
607 return 0;
609 const InMemoryDoc &doc = termlists[did - 1];
611 InMemoryTermEntry temp;
612 temp.tname = tname;
613 auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
614 temp, InMemoryTermEntryLessThan());
615 if (t != doc.terms.end() && t->tname == tname) {
616 return t->positions.size();
618 return 0;
621 PositionList *
622 InMemoryDatabase::open_position_list(Xapian::docid did,
623 const string & tname) const
625 if (closed) InMemoryDatabase::throw_database_closed();
626 if (usual(doc_exists(did))) {
627 const InMemoryDoc &doc = termlists[did - 1];
629 InMemoryTermEntry temp;
630 temp.tname = tname;
631 auto t = lower_bound(doc.terms.begin(), doc.terms.end(),
632 temp, InMemoryTermEntryLessThan());
633 if (t != doc.terms.end() && t->tname == tname) {
634 return new InMemoryPositionList(t->positions);
637 return new InMemoryPositionList();
640 void
641 InMemoryDatabase::add_values(Xapian::docid did,
642 const map<Xapian::valueno, string> &values_)
644 if (closed) InMemoryDatabase::throw_database_closed();
645 if (did > valuelists.size()) {
646 valuelists.resize(did);
648 valuelists[did - 1] = values_;
650 // Update the statistics.
651 map<Xapian::valueno, string>::const_iterator j;
652 for (j = values_.begin(); j != values_.end(); ++j) {
653 std::pair<map<Xapian::valueno, ValueStats>::iterator, bool> i;
654 i = valuestats.insert(make_pair(j->first, ValueStats()));
656 // Now, modify the stored statistics.
657 if ((i.first->second.freq)++ == 0) {
658 // If the value count was previously zero, set the upper and lower
659 // bounds to the newly added value.
660 i.first->second.lower_bound = j->second;
661 i.first->second.upper_bound = j->second;
662 } else {
663 // Otherwise, simply make sure they reflect the new value.
664 if (j->second < i.first->second.lower_bound) {
665 i.first->second.lower_bound = j->second;
667 if (j->second > i.first->second.upper_bound) {
668 i.first->second.upper_bound = j->second;
674 // We implicitly commit each modification right away, so nothing to do here.
675 void
676 InMemoryDatabase::commit()
680 // We implicitly commit each modification right away, so nothing to do here.
681 void
682 InMemoryDatabase::cancel()
686 void
687 InMemoryDatabase::delete_document(Xapian::docid did)
689 if (closed) InMemoryDatabase::throw_database_closed();
690 if (!doc_exists(did)) {
691 throw Xapian::DocNotFoundError(string("Docid ") + str(did) +
692 string(" not found"));
694 termlists[did - 1].is_valid = false;
695 doclists[did - 1] = string();
696 map<Xapian::valueno, string>::const_iterator j;
697 for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
698 map<Xapian::valueno, ValueStats>::iterator i;
699 i = valuestats.find(j->first);
700 if (--(i->second.freq) == 0) {
701 i->second.lower_bound.resize(0);
702 i->second.upper_bound.resize(0);
705 valuelists[did - 1].clear();
707 totlen -= doclengths[did - 1];
708 doclengths[did - 1] = 0;
709 totdocs--;
710 // A crude check, but it's hard to be more precise with the current
711 // InMemory structure without being very inefficient.
712 if (totdocs == 0) positions_present = false;
714 vector<InMemoryTermEntry>::const_iterator i;
715 for (i = termlists[did - 1].terms.begin();
716 i != termlists[did - 1].terms.end();
717 ++i) {
718 map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
719 Assert(t != postlists.end());
720 t->second.collection_freq -= i->wdf;
721 --t->second.term_freq;
723 // Just invalidate erased doc ids - otherwise we need to erase
724 // in a vector (inefficient) and we break any posting lists
725 // iterating over this posting list.
726 InMemoryPosting temp;
727 temp.did = did;
728 auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
729 temp, InMemoryPostingLessThan());
730 if (p != t->second.docs.end() && p->did == did) {
731 p->valid = false;
734 termlists[did - 1].terms.clear();
737 void
738 InMemoryDatabase::replace_document(Xapian::docid did,
739 const Xapian::Document & document)
741 LOGCALL_VOID(DB, "InMemoryDatabase::replace_document", did | document);
743 if (closed) InMemoryDatabase::throw_database_closed();
745 if (doc_exists(did)) {
746 map<Xapian::valueno, string>::const_iterator j;
747 for (j = valuelists[did - 1].begin(); j != valuelists[did - 1].end(); ++j) {
748 map<Xapian::valueno, ValueStats>::iterator i;
749 i = valuestats.find(j->first);
750 if (--(i->second.freq) == 0) {
751 i->second.lower_bound.resize(0);
752 i->second.upper_bound.resize(0);
756 totlen -= doclengths[did - 1];
757 totdocs--;
758 } else if (did > termlists.size()) {
759 termlists.resize(did);
760 termlists[did - 1].is_valid = true;
761 doclengths.resize(did);
762 doclists.resize(did);
763 valuelists.resize(did);
764 } else {
765 termlists[did - 1].is_valid = true;
768 vector<InMemoryTermEntry>::const_iterator i;
769 for (i = termlists[did - 1].terms.begin();
770 i != termlists[did - 1].terms.end();
771 ++i) {
772 map<string, InMemoryTerm>::iterator t = postlists.find(i->tname);
773 Assert(t != postlists.end());
774 t->second.collection_freq -= i->wdf;
775 --t->second.term_freq;
777 // Just invalidate erased doc ids - otherwise we need to erase
778 // in a vector (inefficient) and we break any posting lists
779 // iterating over this posting list.
780 InMemoryPosting temp;
781 temp.did = did;
782 auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
783 temp, InMemoryPostingLessThan());
784 if (p != t->second.docs.end() && p->did == did) {
785 p->valid = false;
789 doclengths[did - 1] = 0;
790 doclists[did - 1] = document.get_data();
792 finish_add_doc(did, document);
795 Xapian::docid
796 InMemoryDatabase::add_document(const Xapian::Document & document)
798 LOGCALL(DB, Xapian::docid, "InMemoryDatabase::add_document", document);
799 if (closed) InMemoryDatabase::throw_database_closed();
801 Xapian::docid did = make_doc(document.get_data());
803 finish_add_doc(did, document);
805 RETURN(did);
808 void
809 InMemoryDatabase::finish_add_doc(Xapian::docid did, const Xapian::Document &document)
812 map<Xapian::valueno, string> values;
813 Xapian::ValueIterator k = document.values_begin();
814 for ( ; k != document.values_end(); ++k) {
815 values.insert(make_pair(k.get_valueno(), *k));
816 LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding value " <<
817 k.get_valueno() << " -> " << *k);
819 add_values(did, values);
822 InMemoryDoc doc(true);
823 Xapian::TermIterator i = document.termlist_begin();
824 for ( ; i != document.termlist_end(); ++i) {
825 make_term(*i);
827 LOGLINE(DB, "InMemoryDatabase::finish_add_doc(): adding term " << *i);
828 Xapian::PositionIterator j = i.positionlist_begin();
829 if (j == i.positionlist_end()) {
830 /* Make sure the posting exists, even without a position. */
831 make_posting(&doc, *i, did, 0, i.get_wdf(), false);
832 } else {
833 positions_present = true;
834 for ( ; j != i.positionlist_end(); ++j) {
835 make_posting(&doc, *i, did, *j, i.get_wdf());
839 Assert(did > 0 && did <= doclengths.size());
840 doclengths[did - 1] += i.get_wdf();
841 totlen += i.get_wdf();
842 postlists[*i].collection_freq += i.get_wdf();
843 ++postlists[*i].term_freq;
845 swap(termlists[did - 1], doc);
847 totdocs++;
850 void
851 InMemoryDatabase::make_term(const string & tname)
853 postlists[tname]; // Initialise, if not already there.
856 Xapian::docid
857 InMemoryDatabase::make_doc(const string & docdata)
859 termlists.push_back(InMemoryDoc(true));
860 doclengths.push_back(0);
861 doclists.push_back(docdata);
863 AssertEqParanoid(termlists.size(), doclengths.size());
865 return termlists.size();
868 void InMemoryDatabase::make_posting(InMemoryDoc * doc,
869 const string & tname,
870 Xapian::docid did,
871 Xapian::termpos position,
872 Xapian::termcount wdf,
873 bool use_position)
875 Assert(doc);
876 Assert(postlists.find(tname) != postlists.end());
877 Assert(did > 0 && did <= termlists.size());
878 Assert(did > 0 && did <= doclengths.size());
879 Assert(doc_exists(did));
881 // Make the posting
882 InMemoryPosting posting;
883 posting.did = did;
884 if (use_position) {
885 posting.positions.push_back(position);
887 posting.wdf = wdf;
888 posting.valid = true;
890 // Now record the posting
891 postlists[tname].add_posting(std::move(posting));
893 // Make the termentry
894 InMemoryTermEntry termentry;
895 termentry.tname = tname;
896 if (use_position) {
897 termentry.positions.push_back(position);
899 termentry.wdf = wdf;
901 // Now record the termentry
902 doc->add_posting(std::move(termentry));
905 bool
906 InMemoryDatabase::term_exists(const string & tname) const
908 if (closed) InMemoryDatabase::throw_database_closed();
909 if (tname.empty()) {
910 return totdocs != 0;
912 map<string, InMemoryTerm>::const_iterator i = postlists.find(tname);
913 if (i == postlists.end()) return false;
914 return (i->second.term_freq != 0);
917 bool
918 InMemoryDatabase::has_positions() const
920 if (closed) InMemoryDatabase::throw_database_closed();
921 return positions_present;
924 TermList *
925 InMemoryDatabase::open_allterms(const string & prefix) const
927 if (closed) InMemoryDatabase::throw_database_closed();
928 return new InMemoryAllTermsList(&postlists,
929 intrusive_ptr<const InMemoryDatabase>(this),
930 prefix);
933 void
934 InMemoryDatabase::throw_database_closed()
936 throw Xapian::DatabaseError("Database has been closed");
939 string
940 InMemoryDatabase::get_description() const
942 return "InMemory";
945 #ifdef DISABLE_GPL_LIBXAPIAN
946 # error GPL source we cannot relicense included in libxapian
947 #endif