TermList::positionlist_begin() now returns PositionList*
[xapian.git] / xapian-core / backends / inmemory / inmemory_database.h
blobf9a4e5c3861bcc9ebd93f1a0c5fb23f471446399
1 /** @file inmemory_database.h
2 * @brief C++ class definition for inmemory database access
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015 Olly Betts
7 * Copyright 2006,2009 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #ifndef OM_HGUARD_INMEMORY_DATABASE_H
26 #define OM_HGUARD_INMEMORY_DATABASE_H
28 #include "api/leafpostlist.h"
29 #include "api/smallvector.h"
30 #include "api/termlist.h"
31 #include "backends/backends.h"
32 #include "backends/databaseinternal.h"
33 #include "backends/valuestats.h"
34 #include <map>
35 #include <vector>
36 #include <algorithm>
37 #include <xapian/document.h>
38 #include "inmemory_positionlist.h"
39 #include "internaltypes.h"
40 #include "omassert.h"
42 using namespace std;
44 // Class representing a posting (a term/doc pair, and
45 // all the relevant positional information, is a single posting)
46 class InMemoryPosting {
47 public:
48 Xapian::docid did;
49 bool valid;
50 Xapian::VecCOW<Xapian::termpos> positions; // Sorted vector of positions
51 Xapian::termcount wdf;
53 // Merge two postings (same term/doc pair, new positional info)
54 void merge(const InMemoryPosting & post) {
55 Assert(did == post.did);
57 positions.reserve(positions.size() + post.positions.size());
58 for (auto&& pos : post.positions) {
59 positions.push_back(pos);
61 inplace_merge(positions.begin(),
62 positions.begin() + post.positions.size(),
63 positions.end());
67 class InMemoryTermEntry {
68 public:
69 string tname;
70 Xapian::VecCOW<Xapian::termpos> positions; // Sorted vector of positions
71 Xapian::termcount wdf;
73 // Merge two postings (same term/doc pair, new positional info)
74 void merge(const InMemoryTermEntry & post) {
75 Assert(tname == post.tname);
77 positions.reserve(positions.size() + post.positions.size());
78 for (auto&& pos : post.positions) {
79 positions.push_back(pos);
81 inplace_merge(positions.begin(),
82 positions.begin() + post.positions.size(),
83 positions.end());
87 // Compare by document ID
88 class InMemoryPostingLessThan {
89 public:
90 int operator() (const InMemoryPosting &p1,
91 const InMemoryPosting &p2) const
93 return p1.did < p2.did;
97 // Compare by termname
98 class InMemoryTermEntryLessThan {
99 public:
100 int operator() (const InMemoryTermEntry&p1,
101 const InMemoryTermEntry&p2) const
103 return p1.tname < p2.tname;
107 // Class representing a term and the documents indexing it
108 class InMemoryTerm {
109 public:
110 // Sorted list of documents indexing this term.
111 vector<InMemoryPosting> docs;
113 Xapian::termcount term_freq;
114 Xapian::termcount collection_freq;
116 InMemoryTerm() : term_freq(0), collection_freq(0) {}
118 void add_posting(InMemoryPosting&& post);
121 /// Class representing a document and the terms indexing it.
122 class InMemoryDoc {
123 public:
124 bool is_valid;
125 // Sorted list of terms indexing this document.
126 vector<InMemoryTermEntry> terms;
128 /* Initialise invalid by default, so that resizing the termlist array
129 * doesn't create valid documents. */
130 InMemoryDoc() : is_valid(false) {}
132 // Initialise specifying validity.
133 explicit InMemoryDoc(bool is_valid_) : is_valid(is_valid_) {}
135 void add_posting(InMemoryTermEntry&& post);
138 class InMemoryDatabase;
140 /** A PostList in an inmemory database.
142 class InMemoryPostList : public LeafPostList {
143 friend class InMemoryDatabase;
144 private:
145 vector<InMemoryPosting>::const_iterator pos;
146 vector<InMemoryPosting>::const_iterator end;
147 Xapian::doccount termfreq;
148 bool started;
150 /** List of positions of the current term.
151 * This list is populated when read_position_list() is called.
153 InMemoryPositionList mypositions;
155 Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db;
157 InMemoryPostList(Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db,
158 const InMemoryTerm & imterm, const std::string & term_);
159 public:
160 Xapian::doccount get_termfreq() const;
162 Xapian::docid get_docid() const; // Gets current docid
163 Xapian::termcount get_wdf() const; // Within Document Frequency
164 PositionList * read_position_list();
165 PositionList * open_position_list() const;
167 PostList *next(double w_min); // Moves to next docid
169 PostList *skip_to(Xapian::docid did, double w_min); // Moves to next docid >= specified docid
171 // True if we're off the end of the list.
172 bool at_end() const;
174 string get_description() const;
177 /** A PostList over all docs in an inmemory database.
179 class InMemoryAllDocsPostList : public LeafPostList {
180 friend class InMemoryDatabase;
181 private:
182 Xapian::docid did;
184 Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db;
186 InMemoryAllDocsPostList(Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db);
187 public:
188 Xapian::doccount get_termfreq() const;
190 Xapian::docid get_docid() const; // Gets current docid
191 Xapian::termcount get_doclength() const; // Length of current document
192 Xapian::termcount get_unique_terms() const; // number of terms in current document
193 Xapian::termcount get_wdf() const; // Within Document Frequency
194 PositionList * read_position_list();
195 PositionList * open_position_list() const;
197 PostList *next(double w_min); // Moves to next docid
199 PostList *skip_to(Xapian::docid did, double w_min); // Moves to next docid >= specified docid
201 // True if we're off the end of the list
202 bool at_end() const;
204 string get_description() const;
207 // Term List
208 class InMemoryTermList : public TermList {
209 friend class InMemoryDatabase;
210 private:
211 vector<InMemoryTermEntry>::const_iterator pos;
212 vector<InMemoryTermEntry>::const_iterator end;
213 Xapian::termcount terms;
214 bool started;
216 Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db;
217 Xapian::docid did;
218 Xapian::termcount document_length;
220 InMemoryTermList(Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db,
221 Xapian::docid did,
222 const InMemoryDoc & doc,
223 Xapian::termcount len);
224 public:
225 Xapian::termcount get_approx_size() const;
227 /// Collate weighting information for the current term.
228 void accumulate_stats(Xapian::Internal::ExpandStats & stats) const;
230 string get_termname() const;
231 Xapian::termcount get_wdf() const; // Number of occurrences of term in current doc
232 Xapian::doccount get_termfreq() const; // Number of docs indexed by term
233 TermList * next();
234 TermList * skip_to(const std::string & term);
235 bool at_end() const;
236 Xapian::termcount positionlist_count() const;
237 PositionList* positionlist_begin() const;
240 class InMemoryDocument;
242 /** A database held entirely in memory.
244 * This is a prototype database, mainly used for debugging and testing.
246 class InMemoryDatabase : public Xapian::Database::Internal {
247 friend class InMemoryAllDocsPostList;
248 friend class InMemoryDocument;
250 map<string, InMemoryTerm> postlists;
251 vector<InMemoryDoc> termlists;
252 vector<std::string> doclists;
253 vector<std::map<Xapian::valueno, string> > valuelists;
254 std::map<Xapian::valueno, ValueStats> valuestats;
256 vector<Xapian::termcount> doclengths;
258 std::map<string, string> metadata;
260 Xapian::doccount totdocs;
262 Xapian::totallength totlen;
264 bool positions_present;
266 // Flag, true if the db has been closed.
267 bool closed;
269 // Stop copy / assignment being allowed
270 InMemoryDatabase& operator=(const InMemoryDatabase &);
271 InMemoryDatabase(const InMemoryDatabase &);
273 void make_term(const string & tname);
275 bool doc_exists(Xapian::docid did) const;
276 Xapian::docid make_doc(const string & docdata);
278 /* The common parts of add_doc and replace_doc */
279 void finish_add_doc(Xapian::docid did, const Xapian::Document &document);
280 void add_values(Xapian::docid did, const map<Xapian::valueno, string> &values_);
282 void make_posting(InMemoryDoc * doc,
283 const string & tname,
284 Xapian::docid did,
285 Xapian::termpos position,
286 Xapian::termcount wdf,
287 bool use_position = true);
289 //@{
290 /** Implementation of virtual methods: see Database for details.
292 void commit();
293 void cancel();
295 Xapian::docid add_document(const Xapian::Document & document);
296 // Stop the default implementation of delete_document(term) and
297 // replace_document(term) from being hidden. This isn't really
298 // a problem as we only try to call them through the base class
299 // (where they aren't hidden) but some compilers generate a warning
300 // about the hiding.
301 using Xapian::Database::Internal::delete_document;
302 using Xapian::Database::Internal::replace_document;
303 void delete_document(Xapian::docid did);
304 void replace_document(Xapian::docid did, const Xapian::Document & document);
305 //@}
307 public:
308 /** Create and open an in-memory database.
310 * @exception Xapian::DatabaseOpeningError thrown if database can't be opened.
312 InMemoryDatabase();
314 ~InMemoryDatabase();
316 bool reopen();
317 void close();
318 bool is_closed() const { return closed; }
320 Xapian::doccount get_doccount() const;
322 Xapian::docid get_lastdocid() const;
324 Xapian::totallength get_total_length() const;
325 Xapian::termcount get_doclength(Xapian::docid did) const;
326 Xapian::termcount get_unique_terms(Xapian::docid did) const;
328 void get_freqs(const string & term,
329 Xapian::doccount * termfreq_ptr,
330 Xapian::termcount * collfreq_ptr) const;
331 Xapian::doccount get_value_freq(Xapian::valueno slot) const;
332 std::string get_value_lower_bound(Xapian::valueno slot) const;
333 std::string get_value_upper_bound(Xapian::valueno slot) const;
334 bool term_exists(const string & tname) const;
335 bool has_positions() const;
337 PostList * open_post_list(const string & tname) const;
338 LeafPostList* open_leaf_post_list(const string& term) const;
339 TermList * open_term_list(Xapian::docid did) const;
340 TermList * open_term_list_direct(Xapian::docid did) const;
341 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
343 std::string get_metadata(const std::string & key) const;
344 TermList * open_metadata_keylist(const std::string &prefix) const;
345 void set_metadata(const std::string & key, const std::string & value);
347 Xapian::termcount positionlist_count(Xapian::docid did,
348 const string & tname) const;
349 PositionList * open_position_list(Xapian::docid did,
350 const string & tname) const;
351 TermList * open_allterms(const string & prefix) const;
353 [[noreturn]]
354 static void throw_database_closed();
356 int get_backend_info(string * path) const {
357 if (path) *path = string();
358 return BACKEND_INMEMORY;
361 std::string get_description() const;
364 #endif /* OM_HGUARD_INMEMORY_DATABASE_H */