Split PostList and PostingIterator::Internal
[xapian.git] / xapian-core / backends / databaseinternal.h
blobec0ea7960ebd87c9699991783065dfa38f6e4aa6
1 /** @file databaseinternal.h
2 * @brief Virtual base class for Database internals
3 */
4 /* Copyright 2004,2006,2007,2008,2009,2011,2014,2015,2016,2017 Olly Betts
5 * Copyright 2007,2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_DATABASEINTERNAL_H
23 #define XAPIAN_INCLUDED_DATABASEINTERNAL_H
25 #include "internaltypes.h"
27 #include <xapian/database.h>
28 #include <xapian/document.h>
29 #include <xapian/intrusive_ptr.h>
30 #include <xapian/positioniterator.h>
31 #include <xapian/postingiterator.h>
32 #include <xapian/termiterator.h>
33 #include <xapian/types.h>
34 #include <xapian/valueiterator.h>
36 #include <string>
38 typedef Xapian::TermIterator::Internal TermList;
39 typedef Xapian::PositionIterator::Internal PositionList;
40 typedef Xapian::ValueIterator::Internal ValueList;
42 class LeafPostList;
44 namespace Xapian {
45 namespace Internal {
46 class PostList;
49 using Xapian::Internal::PostList;
51 namespace Xapian {
53 class Query;
54 struct ReplicationInfo;
56 /// Virtual base class for Database internals
57 class Database::Internal : public Xapian::Internal::intrusive_base {
58 friend class Database;
60 /// Don't allow assignment.
61 Internal& operator=(const Internal&) = delete;
63 /// Don't allow copying.
64 Internal(const Internal&) = delete;
66 /// The "action required" helper for the dtor_called() helper.
67 void dtor_called_();
69 protected:
70 /// Transaction state enum.
71 enum transaction_state {
72 TRANSACTION_READONLY = -2, // Not a writable database shard.
73 TRANSACTION_UNIMPLEMENTED = -1, // Used by InMemory.
74 TRANSACTION_NONE = 0,
75 TRANSACTION_UNFLUSHED = 1,
76 TRANSACTION_FLUSHED = 2
79 /** Only constructable as a base class for derived classes.
81 * @param transaction_support One of:
82 * * TRANSACTION_READONLY - read-only shard
83 * * TRANSACTION_UNIMPLEMENTED - writable but no transaction support
84 * * TRANSACTION_NONE - writable with transaction support
86 Internal(transaction_state transaction_support)
87 : state(transaction_support) {}
89 /// Current transaction state.
90 transaction_state state;
92 /// Test if this shard is read-only.
93 bool is_read_only() const {
94 return state == TRANSACTION_READONLY;
97 /// Test if a transaction is currently active.
98 bool transaction_active() const { return state > 0; }
100 /** Helper to process uncommitted changes when a writable db is destroyed.
102 * The destructor of a derived writable database class needs to call this
103 * method - we can't call it from our own destructor because we need to
104 * be able to call methods in the derived class, but that's no longer
105 * valid by the time our destructor runs, as that happens after the
106 * destructor of the derived class has run.
108 * If a transaction is active, it is cancelled. Otherwise we attempt to
109 * commit uncommitted changes, but because it is not safe to throw
110 * exceptions from destructors, this method will catch and discard any
111 * exceptions.
113 void dtor_called() {
114 // Inline the check to exclude no-op cases (read-only and unimplemented).
115 if (state >= 0)
116 dtor_called_();
119 public:
120 /** We have virtual methods and want to be able to delete derived classes
121 * using a pointer to the base class, so we need a virtual destructor.
123 virtual ~Internal() {}
125 typedef size_t size_type;
127 virtual size_type size() const;
129 virtual void keep_alive();
131 virtual void readahead_for_query(const Query& query) const;
133 virtual doccount get_doccount() const = 0;
135 /** Return the last used document id of this (sub) database. */
136 virtual docid get_lastdocid() const = 0;
138 /** Return the total length of all documents in this database. */
139 virtual totallength get_total_length() const = 0;
141 virtual termcount get_doclength(docid did) const = 0;
143 /** Get the number of unique terms in document.
145 * @param did The document id of the document to return this value for.
147 virtual termcount get_unique_terms(docid did) const = 0;
149 /** Returns frequencies for a term.
151 * @param term The term to get frequencies for
152 * @param termfreq_ptr Point to return number of docs indexed by @a
153 * term (or NULL not to return)
154 * @param collfreq_ptr Point to return number of occurrences of @a
155 * term in the database (or NULL not to return)
157 virtual void get_freqs(const std::string& term,
158 doccount* termfreq_ptr,
159 termcount* collfreq_ptr) const = 0;
161 /** Return the frequency of a given value slot.
163 * This is the number of documents which have a (non-empty) value
164 * stored in the slot.
166 * @param slot The value slot to examine.
168 * @exception UnimplementedError The frequency of the value isn't
169 * available for this database type.
171 virtual doccount get_value_freq(valueno slot) const;
173 /** Get a lower bound on the values stored in the given value slot.
175 * If the lower bound isn't available for the given database type,
176 * this will return the lowest possible bound - the empty string.
178 * @param slot The value slot to examine.
180 virtual std::string get_value_lower_bound(valueno slot) const;
182 /** Get an upper bound on the values stored in the given value slot.
184 * @param slot The value slot to examine.
186 * @exception UnimplementedError The upper bound of the values isn't
187 * available for this database type.
189 virtual std::string get_value_upper_bound(valueno slot) const;
191 /// Get a lower bound on the length of a document in this DB.
192 virtual termcount get_doclength_lower_bound() const;
194 /// Get an upper bound on the length of a document in this DB.
195 virtual termcount get_doclength_upper_bound() const;
197 /// Get an upper bound on the wdf of term @a term.
198 virtual termcount get_wdf_upper_bound(const std::string& term) const;
200 virtual bool term_exists(const std::string& term) const = 0;
202 /** Check whether this database contains any positional information. */
203 virtual bool has_positions() const = 0;
205 virtual PostList* open_post_list(const std::string& term) const = 0;
207 virtual LeafPostList* open_leaf_post_list(const std::string& term) const = 0;
209 /** Open a value stream.
211 * This returns the value in a particular slot for each document.
213 * @param slot The value slot.
215 * @return Pointer to a new ValueList object which should be deleted by
216 * the caller once it is no longer needed.
218 virtual ValueList* open_value_list(valueno slot) const;
220 virtual TermList* open_term_list(docid did) const = 0;
222 /** Like open_term_list() but without MultiTermList wrapper.
224 * MultiDatabase::open_term_list() wraps the returns TermList in a
225 * MultiTermList, but we don't want that for query expansion.
227 virtual TermList* open_term_list_direct(docid did) const = 0;
229 virtual TermList* open_allterms(const std::string& prefix) const = 0;
231 virtual PositionList* open_position_list(docid did,
232 const std::string& term) const = 0;
234 /** Open a handle on a document.
236 * The returned handle provides access to document data and document
237 * values.
239 * @param did The document id to open.
241 * @param lazy If true, there's no need to check that this document
242 * actually exists (only a hint - the backend may still
243 * check). Used to avoid unnecessary work when we already
244 * know that the requested document exists.
246 * @return A new document object, owned by the caller.
248 virtual Document::Internal* open_document(docid did, bool lazy) const = 0;
250 /** Create a termlist tree from trigrams of @a word.
252 * You can assume word.size() > 1.
254 * If there are no trigrams, returns NULL.
256 virtual TermList* open_spelling_termlist(const std::string& word) const;
258 /** Return a termlist which returns the words which are spelling
259 * correction targets.
261 * If there are no spelling correction targets, returns NULL.
263 virtual TermList* open_spelling_wordlist() const;
265 /** Return the number of times @a word was added as a spelling. */
266 virtual doccount get_spelling_frequency(const std::string& word) const;
268 /** Add a word to the spelling dictionary.
270 * If the word is already present, its frequency is increased.
272 * @param word The word to add.
273 * @param freqinc How much to increase its frequency by.
275 virtual void add_spelling(const std::string& word,
276 termcount freqinc) const;
278 /** Remove a word from the spelling dictionary.
280 * The word's frequency is decreased, and if would become zero or less
281 * then the word is removed completely.
283 * @param word The word to remove.
284 * @param freqdec How much to decrease its frequency by.
286 * @return Any freqdec not "used up".
288 virtual termcount remove_spelling(const std::string& word,
289 termcount freqdec) const;
291 /** Open a termlist returning synonyms for a term.
293 * If @a term has no synonyms, returns NULL.
295 virtual TermList* open_synonym_termlist(const std::string& term) const;
297 /** Open a termlist returning each term which has synonyms.
299 * @param prefix If non-empty, only terms with this prefix are
300 * returned.
302 virtual TermList* open_synonym_keylist(const std::string& prefix) const;
304 /** Add a synonym for a term.
306 * If @a synonym is already a synonym for @a term, then no action is
307 * taken.
309 virtual void add_synonym(const std::string& term,
310 const std::string& synonym) const;
312 /** Remove a synonym for a term.
314 * If @a synonym isn't a synonym for @a term, then no action is taken.
316 virtual void remove_synonym(const std::string& term,
317 const std::string& synonym) const;
319 /** Clear all synonyms for a term.
321 * If @a term has no synonyms, no action is taken.
323 virtual void clear_synonyms(const std::string& term) const;
325 /** Get the metadata associated with a given key.
327 * See Database::get_metadata() for more information.
329 virtual std::string get_metadata(const std::string& key) const;
331 /** Open a termlist returning each metadata key.
333 * Only metadata keys which are associated with a non-empty value will
334 * be returned.
336 * @param prefix If non-empty, only keys with this prefix are returned.
338 virtual TermList* open_metadata_keylist(const std::string& prefix) const;
340 /** Set the metadata associated with a given key.
342 * See WritableDatabase::set_metadata() for more information.
344 virtual void set_metadata(const std::string& key, const std::string& value);
346 /** Reopen the database to the latest available revision.
348 * Database backends which don't support simultaneous update and
349 * reading probably don't need to do anything here.
351 virtual bool reopen();
353 /** Close the database */
354 virtual void close() = 0;
356 /** Commit pending modifications to the database. */
357 virtual void commit();
359 /** Cancel pending modifications to the database. */
360 virtual void cancel();
362 /** Begin transaction. */
363 virtual void begin_transaction(bool flushed);
365 /** End transaction.
367 * @param do_commit If true, commits the transaction; if false,
368 * cancels the transaction.
370 virtual void end_transaction(bool do_commit);
372 virtual docid add_document(const Document& document);
374 virtual void delete_document(docid did);
376 /** Delete any documents indexed by a term from the database. */
377 virtual void delete_document(const std::string& unique_term);
379 virtual void replace_document(docid did,
380 const Document& document);
382 /** Replace any documents matching a term. */
383 virtual docid replace_document(const std::string& unique_term,
384 const Document& document);
386 /** Request a document.
388 * This tells the database that we're going to want a particular
389 * document soon. It's just a hint which the backend may ignore,
390 * but for glass it issues a preread hint on the file with the
391 * document data in, and for the remote backend it might cause
392 * the document to be fetched asynchronously (this isn't currently
393 * implemented though).
395 * It can be called for multiple documents in turn, and a common usage
396 * pattern would be to iterate over an MSet and request the documents,
397 * then iterate over it again to actually get and display them.
399 * The default implementation is a no-op.
401 virtual void request_document(docid did) const;
403 /** Write a set of changesets to a file descriptor.
405 * This call may reopen the database, leaving it pointing to a more
406 * recent version of the database.
408 virtual void write_changesets_to_fd(int fd,
409 const std::string& start_revision,
410 bool need_whole_db,
411 ReplicationInfo* info);
413 /// Get a string describing the current revision of the database.
414 virtual std::string get_revision_info() const;
416 /** Get a UUID for the database.
418 * The UUID will persist for the lifetime of the database.
420 * Replicas (eg, made with the replication protocol, or by copying all
421 * the database files) will have the same UUID. However, copies (made
422 * with copydatabase, or xapian-compact) will have different UUIDs.
424 * If the backend does not support UUIDs the empty string is returned.
426 virtual std::string get_uuid() const;
428 /** Notify the database that document is no longer valid.
430 * This is used to invalidate references to a document kept by a
431 * database for doing lazy updates. If we moved to using a weak_ptr
432 * instead we wouldn't need a special method for this, but it would
433 * involve a fair bit of reorganising of other parts of the code.
435 virtual void invalidate_doc_object(Document::Internal* obj) const;
437 /** Get backend information about this database.
439 * @param path If non-NULL, and set the pointed to string to the file
440 * path of this database (or if to some string describing
441 * the database in a backend-specified format if "path"
442 * isn't a concept which make sense).
444 * @return A constant indicating the backend type.
446 virtual int get_backend_info(std::string* path) const = 0;
448 /** Find lowest and highest docids actually in use.
450 * Only used by compaction, so only needs to be implemented by
451 * backends which support compaction.
453 virtual void get_used_docid_range(docid& first,
454 docid& last) const;
456 /** Return true if the database is open for writing.
458 * If this is a WritableDatabase, always returns true.
460 * For a Database, test if there's a writer holding the lock (or if
461 * we can't test for a lock without taking it on the current platform,
462 * throw Xapian::UnimplementedError).
464 virtual bool locked() const;
466 /// Return a string describing this object.
467 virtual std::string get_description() const = 0;
472 #endif // XAPIAN_INCLUDED_DATABASEINTERNAL_H