1 /** @file inmemory_database.h
2 * @brief C++ class definition for inmemory database access
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015 Olly Betts
7 * Copyright 2006,2009 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #ifndef OM_HGUARD_INMEMORY_DATABASE_H
26 #define OM_HGUARD_INMEMORY_DATABASE_H
28 #include "api/leafpostlist.h"
29 #include "api/smallvector.h"
30 #include "api/termlist.h"
31 #include "backends/backends.h"
32 #include "backends/databaseinternal.h"
33 #include "backends/valuestats.h"
37 #include <xapian/document.h>
38 #include "inmemory_positionlist.h"
39 #include "internaltypes.h"
44 // Class representing a posting (a term/doc pair, and
45 // all the relevant positional information, is a single posting)
46 class InMemoryPosting
{
50 Xapian::VecCOW
<Xapian::termpos
> positions
; // Sorted vector of positions
51 Xapian::termcount wdf
;
53 // Merge two postings (same term/doc pair, new positional info)
54 void merge(const InMemoryPosting
& post
) {
55 Assert(did
== post
.did
);
57 positions
.reserve(positions
.size() + post
.positions
.size());
58 for (auto&& pos
: post
.positions
) {
59 positions
.push_back(pos
);
61 inplace_merge(positions
.begin(),
62 positions
.begin() + post
.positions
.size(),
67 class InMemoryTermEntry
{
70 Xapian::VecCOW
<Xapian::termpos
> positions
; // Sorted vector of positions
71 Xapian::termcount wdf
;
73 // Merge two postings (same term/doc pair, new positional info)
74 void merge(const InMemoryTermEntry
& post
) {
75 Assert(tname
== post
.tname
);
77 positions
.reserve(positions
.size() + post
.positions
.size());
78 for (auto&& pos
: post
.positions
) {
79 positions
.push_back(pos
);
81 inplace_merge(positions
.begin(),
82 positions
.begin() + post
.positions
.size(),
87 // Compare by document ID
88 class InMemoryPostingLessThan
{
90 int operator() (const InMemoryPosting
&p1
,
91 const InMemoryPosting
&p2
) const
93 return p1
.did
< p2
.did
;
97 // Compare by termname
98 class InMemoryTermEntryLessThan
{
100 int operator() (const InMemoryTermEntry
&p1
,
101 const InMemoryTermEntry
&p2
) const
103 return p1
.tname
< p2
.tname
;
107 // Class representing a term and the documents indexing it
110 // Sorted list of documents indexing this term.
111 vector
<InMemoryPosting
> docs
;
113 Xapian::termcount term_freq
;
114 Xapian::termcount collection_freq
;
116 InMemoryTerm() : term_freq(0), collection_freq(0) {}
118 void add_posting(InMemoryPosting
&& post
);
121 /// Class representing a document and the terms indexing it.
125 // Sorted list of terms indexing this document.
126 vector
<InMemoryTermEntry
> terms
;
128 /* Initialise invalid by default, so that resizing the termlist array
129 * doesn't create valid documents. */
130 InMemoryDoc() : is_valid(false) {}
132 // Initialise specifying validity.
133 explicit InMemoryDoc(bool is_valid_
) : is_valid(is_valid_
) {}
135 void add_posting(InMemoryTermEntry
&& post
);
138 class InMemoryDatabase
;
140 /** A PostList in an inmemory database.
142 class InMemoryPostList
: public LeafPostList
{
143 friend class InMemoryDatabase
;
145 vector
<InMemoryPosting
>::const_iterator pos
;
146 vector
<InMemoryPosting
>::const_iterator end
;
147 Xapian::doccount termfreq
;
150 /** List of positions of the current term.
151 * This list is populated when read_position_list() is called.
153 InMemoryPositionList mypositions
;
155 Xapian::Internal::intrusive_ptr
<const InMemoryDatabase
> db
;
157 InMemoryPostList(Xapian::Internal::intrusive_ptr
<const InMemoryDatabase
> db
,
158 const InMemoryTerm
& imterm
, const std::string
& term_
);
160 Xapian::doccount
get_termfreq() const;
162 Xapian::docid
get_docid() const; // Gets current docid
163 Xapian::termcount
get_wdf() const; // Within Document Frequency
164 PositionList
* read_position_list();
165 PositionList
* open_position_list() const;
167 PostList
*next(double w_min
); // Moves to next docid
169 PostList
*skip_to(Xapian::docid did
, double w_min
); // Moves to next docid >= specified docid
171 // True if we're off the end of the list.
174 string
get_description() const;
177 /** A PostList over all docs in an inmemory database.
179 class InMemoryAllDocsPostList
: public LeafPostList
{
180 friend class InMemoryDatabase
;
184 Xapian::Internal::intrusive_ptr
<const InMemoryDatabase
> db
;
186 InMemoryAllDocsPostList(Xapian::Internal::intrusive_ptr
<const InMemoryDatabase
> db
);
188 Xapian::doccount
get_termfreq() const;
190 Xapian::docid
get_docid() const; // Gets current docid
191 Xapian::termcount
get_doclength() const; // Length of current document
192 Xapian::termcount
get_unique_terms() const; // number of terms in current document
193 Xapian::termcount
get_wdf() const; // Within Document Frequency
194 PositionList
* read_position_list();
195 PositionList
* open_position_list() const;
197 PostList
*next(double w_min
); // Moves to next docid
199 PostList
*skip_to(Xapian::docid did
, double w_min
); // Moves to next docid >= specified docid
201 // True if we're off the end of the list
204 string
get_description() const;
208 class InMemoryTermList
: public TermList
{
209 friend class InMemoryDatabase
;
211 vector
<InMemoryTermEntry
>::const_iterator pos
;
212 vector
<InMemoryTermEntry
>::const_iterator end
;
213 Xapian::termcount terms
;
216 Xapian::Internal::intrusive_ptr
<const InMemoryDatabase
> db
;
218 Xapian::termcount document_length
;
220 InMemoryTermList(Xapian::Internal::intrusive_ptr
<const InMemoryDatabase
> db
,
222 const InMemoryDoc
& doc
,
223 Xapian::termcount len
);
225 Xapian::termcount
get_approx_size() const;
227 /// Collate weighting information for the current term.
228 void accumulate_stats(Xapian::Internal::ExpandStats
& stats
) const;
230 string
get_termname() const;
231 Xapian::termcount
get_wdf() const; // Number of occurrences of term in current doc
232 Xapian::doccount
get_termfreq() const; // Number of docs indexed by term
234 TermList
* skip_to(const std::string
& term
);
236 Xapian::termcount
positionlist_count() const;
237 PositionList
* positionlist_begin() const;
240 class InMemoryDocument
;
242 /** A database held entirely in memory.
244 * This is a prototype database, mainly used for debugging and testing.
246 class InMemoryDatabase
: public Xapian::Database::Internal
{
247 friend class InMemoryAllDocsPostList
;
248 friend class InMemoryDocument
;
250 map
<string
, InMemoryTerm
> postlists
;
251 vector
<InMemoryDoc
> termlists
;
252 vector
<std::string
> doclists
;
253 vector
<std::map
<Xapian::valueno
, string
> > valuelists
;
254 std::map
<Xapian::valueno
, ValueStats
> valuestats
;
256 vector
<Xapian::termcount
> doclengths
;
258 std::map
<string
, string
> metadata
;
260 Xapian::doccount totdocs
;
262 Xapian::totallength totlen
;
264 bool positions_present
;
266 // Flag, true if the db has been closed.
269 // Stop copy / assignment being allowed
270 InMemoryDatabase
& operator=(const InMemoryDatabase
&);
271 InMemoryDatabase(const InMemoryDatabase
&);
273 void make_term(const string
& tname
);
275 bool doc_exists(Xapian::docid did
) const;
276 Xapian::docid
make_doc(const string
& docdata
);
278 /* The common parts of add_doc and replace_doc */
279 void finish_add_doc(Xapian::docid did
, const Xapian::Document
&document
);
280 void add_values(Xapian::docid did
, const map
<Xapian::valueno
, string
> &values_
);
282 void make_posting(InMemoryDoc
* doc
,
283 const string
& tname
,
285 Xapian::termpos position
,
286 Xapian::termcount wdf
,
287 bool use_position
= true);
290 /** Implementation of virtual methods: see Database for details.
295 Xapian::docid
add_document(const Xapian::Document
& document
);
296 // Stop the default implementation of delete_document(term) and
297 // replace_document(term) from being hidden. This isn't really
298 // a problem as we only try to call them through the base class
299 // (where they aren't hidden) but some compilers generate a warning
301 using Xapian::Database::Internal::delete_document
;
302 using Xapian::Database::Internal::replace_document
;
303 void delete_document(Xapian::docid did
);
304 void replace_document(Xapian::docid did
, const Xapian::Document
& document
);
308 /** Create and open an in-memory database.
310 * @exception Xapian::DatabaseOpeningError thrown if database can't be opened.
318 bool is_closed() const { return closed
; }
320 Xapian::doccount
get_doccount() const;
322 Xapian::docid
get_lastdocid() const;
324 Xapian::totallength
get_total_length() const;
325 Xapian::termcount
get_doclength(Xapian::docid did
) const;
326 Xapian::termcount
get_unique_terms(Xapian::docid did
) const;
328 void get_freqs(const string
& term
,
329 Xapian::doccount
* termfreq_ptr
,
330 Xapian::termcount
* collfreq_ptr
) const;
331 Xapian::doccount
get_value_freq(Xapian::valueno slot
) const;
332 std::string
get_value_lower_bound(Xapian::valueno slot
) const;
333 std::string
get_value_upper_bound(Xapian::valueno slot
) const;
334 bool term_exists(const string
& tname
) const;
335 bool has_positions() const;
337 PostList
* open_post_list(const string
& tname
) const;
338 LeafPostList
* open_leaf_post_list(const string
& term
) const;
339 TermList
* open_term_list(Xapian::docid did
) const;
340 TermList
* open_term_list_direct(Xapian::docid did
) const;
341 Xapian::Document::Internal
* open_document(Xapian::docid did
, bool lazy
) const;
343 std::string
get_metadata(const std::string
& key
) const;
344 TermList
* open_metadata_keylist(const std::string
&prefix
) const;
345 void set_metadata(const std::string
& key
, const std::string
& value
);
347 Xapian::termcount
positionlist_count(Xapian::docid did
,
348 const string
& tname
) const;
349 PositionList
* open_position_list(Xapian::docid did
,
350 const string
& tname
) const;
351 TermList
* open_allterms(const string
& prefix
) const;
354 static void throw_database_closed();
356 int get_backend_info(string
* path
) const {
357 if (path
) *path
= string();
358 return BACKEND_INMEMORY
;
361 std::string
get_description() const;
364 #endif /* OM_HGUARD_INMEMORY_DATABASE_H */