Drop workaround for older MSVC
[xapian.git] / xapian-core / backends / inmemory / inmemory_database.h
blobbade060890459758f0ccbd236b02fbb496b2b577
1 /** @file inmemory_database.h
2 * @brief C++ class definition for inmemory database access
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015 Olly Betts
7 * Copyright 2006,2009 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #ifndef OM_HGUARD_INMEMORY_DATABASE_H
26 #define OM_HGUARD_INMEMORY_DATABASE_H
28 #include "api/leafpostlist.h"
29 #include "api/smallvector.h"
30 #include "api/termlist.h"
31 #include "backends/backends.h"
32 #include "backends/databaseinternal.h"
33 #include "backends/valuestats.h"
34 #include <map>
35 #include <vector>
36 #include <algorithm>
37 #include <xapian/document.h>
38 #include "inmemory_positionlist.h"
39 #include "internaltypes.h"
40 #include "omassert.h"
42 using namespace std;
44 // Class representing a posting (a term/doc pair, and
45 // all the relevant positional information, is a single posting)
46 class InMemoryPosting {
47 public:
48 Xapian::docid did;
49 bool valid;
50 Xapian::VecCOW<Xapian::termpos> positions; // Sorted vector of positions
51 Xapian::termcount wdf;
53 // Merge two postings (same term/doc pair, new positional info)
54 void merge(const InMemoryPosting & post) {
55 Assert(did == post.did);
57 positions.reserve(positions.size() + post.positions.size());
58 for (auto&& pos : post.positions) {
59 positions.push_back(pos);
61 inplace_merge(positions.begin(),
62 positions.begin() + post.positions.size(),
63 positions.end());
67 class InMemoryTermEntry {
68 public:
69 string tname;
70 Xapian::VecCOW<Xapian::termpos> positions; // Sorted vector of positions
71 Xapian::termcount wdf;
73 // Merge two postings (same term/doc pair, new positional info)
74 void merge(const InMemoryTermEntry & post) {
75 Assert(tname == post.tname);
77 positions.reserve(positions.size() + post.positions.size());
78 for (auto&& pos : post.positions) {
79 positions.push_back(pos);
81 inplace_merge(positions.begin(),
82 positions.begin() + post.positions.size(),
83 positions.end());
87 // Compare by document ID
88 class InMemoryPostingLessThan {
89 public:
90 int operator() (const InMemoryPosting &p1,
91 const InMemoryPosting &p2) const
93 return p1.did < p2.did;
97 // Compare by termname
98 class InMemoryTermEntryLessThan {
99 public:
100 int operator() (const InMemoryTermEntry&p1,
101 const InMemoryTermEntry&p2) const
103 return p1.tname < p2.tname;
107 // Class representing a term and the documents indexing it
108 class InMemoryTerm {
109 public:
110 // Sorted list of documents indexing this term.
111 vector<InMemoryPosting> docs;
113 Xapian::termcount term_freq;
114 Xapian::termcount collection_freq;
116 InMemoryTerm() : term_freq(0), collection_freq(0) {}
118 void add_posting(InMemoryPosting&& post);
121 /// Class representing a document and the terms indexing it.
122 class InMemoryDoc {
123 public:
124 bool is_valid;
125 // Sorted list of terms indexing this document.
126 vector<InMemoryTermEntry> terms;
128 /* Initialise invalid by default, so that resizing the termlist array
129 * doesn't create valid documents. */
130 InMemoryDoc() : is_valid(false) {}
132 // Initialise specifying validity.
133 explicit InMemoryDoc(bool is_valid_) : is_valid(is_valid_) {}
135 void add_posting(InMemoryTermEntry&& post);
138 class InMemoryDatabase;
140 /** A PostList in an inmemory database.
142 class InMemoryPostList : public LeafPostList {
143 friend class InMemoryDatabase;
144 private:
145 vector<InMemoryPosting>::const_iterator pos;
146 vector<InMemoryPosting>::const_iterator end;
147 Xapian::doccount termfreq;
148 bool started;
150 /** List of positions of the current term.
151 * This list is populated when read_position_list() is called.
153 InMemoryPositionList mypositions;
155 Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db;
157 InMemoryPostList(Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db,
158 const InMemoryTerm & imterm, const std::string & term_);
159 public:
160 Xapian::doccount get_termfreq() const;
162 Xapian::docid get_docid() const; // Gets current docid
163 Xapian::termcount get_doclength() const; // Length of current document
164 Xapian::termcount get_unique_terms() const; // number of terms in current document
165 Xapian::termcount get_wdf() const; // Within Document Frequency
166 PositionList * read_position_list();
167 PositionList * open_position_list() const;
169 PostList *next(double w_min); // Moves to next docid
171 PostList *skip_to(Xapian::docid did, double w_min); // Moves to next docid >= specified docid
173 // True if we're off the end of the list.
174 bool at_end() const;
176 string get_description() const;
179 /** A PostList over all docs in an inmemory database.
181 class InMemoryAllDocsPostList : public LeafPostList {
182 friend class InMemoryDatabase;
183 private:
184 Xapian::docid did;
186 Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db;
188 InMemoryAllDocsPostList(Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db);
189 public:
190 Xapian::doccount get_termfreq() const;
192 Xapian::docid get_docid() const; // Gets current docid
193 Xapian::termcount get_doclength() const; // Length of current document
194 Xapian::termcount get_unique_terms() const; // number of terms in current document
195 Xapian::termcount get_wdf() const; // Within Document Frequency
196 PositionList * read_position_list();
197 PositionList * open_position_list() const;
199 PostList *next(double w_min); // Moves to next docid
201 PostList *skip_to(Xapian::docid did, double w_min); // Moves to next docid >= specified docid
203 // True if we're off the end of the list
204 bool at_end() const;
206 string get_description() const;
209 // Term List
210 class InMemoryTermList : public TermList {
211 friend class InMemoryDatabase;
212 private:
213 vector<InMemoryTermEntry>::const_iterator pos;
214 vector<InMemoryTermEntry>::const_iterator end;
215 Xapian::termcount terms;
216 bool started;
218 Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db;
219 Xapian::docid did;
220 Xapian::termcount document_length;
222 InMemoryTermList(Xapian::Internal::intrusive_ptr<const InMemoryDatabase> db,
223 Xapian::docid did,
224 const InMemoryDoc & doc,
225 Xapian::termcount len);
226 public:
227 Xapian::termcount get_approx_size() const;
229 /// Collate weighting information for the current term.
230 void accumulate_stats(Xapian::Internal::ExpandStats & stats) const;
232 string get_termname() const;
233 Xapian::termcount get_wdf() const; // Number of occurrences of term in current doc
234 Xapian::doccount get_termfreq() const; // Number of docs indexed by term
235 TermList * next();
236 TermList * skip_to(const std::string & term);
237 bool at_end() const;
238 Xapian::termcount positionlist_count() const;
239 Xapian::PositionIterator positionlist_begin() const;
242 class InMemoryDocument;
244 /** A database held entirely in memory.
246 * This is a prototype database, mainly used for debugging and testing.
248 class InMemoryDatabase : public Xapian::Database::Internal {
249 friend class InMemoryAllDocsPostList;
250 friend class InMemoryDocument;
252 map<string, InMemoryTerm> postlists;
253 vector<InMemoryDoc> termlists;
254 vector<std::string> doclists;
255 vector<std::map<Xapian::valueno, string> > valuelists;
256 std::map<Xapian::valueno, ValueStats> valuestats;
258 vector<Xapian::termcount> doclengths;
260 std::map<string, string> metadata;
262 Xapian::doccount totdocs;
264 Xapian::totallength totlen;
266 bool positions_present;
268 // Flag, true if the db has been closed.
269 bool closed;
271 // Stop copy / assignment being allowed
272 InMemoryDatabase& operator=(const InMemoryDatabase &);
273 InMemoryDatabase(const InMemoryDatabase &);
275 void make_term(const string & tname);
277 bool doc_exists(Xapian::docid did) const;
278 Xapian::docid make_doc(const string & docdata);
280 /* The common parts of add_doc and replace_doc */
281 void finish_add_doc(Xapian::docid did, const Xapian::Document &document);
282 void add_values(Xapian::docid did, const map<Xapian::valueno, string> &values_);
284 void make_posting(InMemoryDoc * doc,
285 const string & tname,
286 Xapian::docid did,
287 Xapian::termpos position,
288 Xapian::termcount wdf,
289 bool use_position = true);
291 //@{
292 /** Implementation of virtual methods: see Database for details.
294 void commit();
295 void cancel();
297 Xapian::docid add_document(const Xapian::Document & document);
298 // Stop the default implementation of delete_document(term) and
299 // replace_document(term) from being hidden. This isn't really
300 // a problem as we only try to call them through the base class
301 // (where they aren't hidden) but some compilers generate a warning
302 // about the hiding.
303 using Xapian::Database::Internal::delete_document;
304 using Xapian::Database::Internal::replace_document;
305 void delete_document(Xapian::docid did);
306 void replace_document(Xapian::docid did, const Xapian::Document & document);
307 //@}
309 public:
310 /** Create and open an in-memory database.
312 * @exception Xapian::DatabaseOpeningError thrown if database can't be opened.
314 InMemoryDatabase();
316 ~InMemoryDatabase();
318 bool reopen();
319 void close();
320 bool is_closed() const { return closed; }
322 Xapian::doccount get_doccount() const;
324 Xapian::docid get_lastdocid() const;
326 Xapian::totallength get_total_length() const;
327 Xapian::termcount get_doclength(Xapian::docid did) const;
328 Xapian::termcount get_unique_terms(Xapian::docid did) const;
330 void get_freqs(const string & term,
331 Xapian::doccount * termfreq_ptr,
332 Xapian::termcount * collfreq_ptr) const;
333 Xapian::doccount get_value_freq(Xapian::valueno slot) const;
334 std::string get_value_lower_bound(Xapian::valueno slot) const;
335 std::string get_value_upper_bound(Xapian::valueno slot) const;
336 bool term_exists(const string & tname) const;
337 bool has_positions() const;
339 PostList * open_post_list(const string & tname) const;
340 LeafPostList* open_leaf_post_list(const string& term) const;
341 TermList * open_term_list(Xapian::docid did) const;
342 TermList * open_term_list_direct(Xapian::docid did) const;
343 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
345 std::string get_metadata(const std::string & key) const;
346 TermList * open_metadata_keylist(const std::string &prefix) const;
347 void set_metadata(const std::string & key, const std::string & value);
349 Xapian::termcount positionlist_count(Xapian::docid did,
350 const string & tname) const;
351 PositionList * open_position_list(Xapian::docid did,
352 const string & tname) const;
353 TermList * open_allterms(const string & prefix) const;
355 [[noreturn]]
356 static void throw_database_closed();
358 int get_backend_info(string * path) const {
359 if (path) *path = string();
360 return BACKEND_INMEMORY;
363 std::string get_description() const;
366 #endif /* OM_HGUARD_INMEMORY_DATABASE_H */