Fix whitespace irregularities in code
[xapian.git] / xapian-core / backends / chert / chert_database.h
blob55160f424892fa8591b49d7f81f4c0d3a209ff9c
1 /** @file chert_database.h
2 * @brief C++ class definition for chert database
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
7 * Copyright 2008 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #ifndef OM_HGUARD_CHERT_DATABASE_H
26 #define OM_HGUARD_CHERT_DATABASE_H
28 #include "backends/backends.h"
29 #include "backends/database.h"
30 #include "chert_dbstats.h"
31 #include "chert_positionlist.h"
32 #include "chert_postlist.h"
33 #include "chert_record.h"
34 #include "chert_spelling.h"
35 #include "chert_synonym.h"
36 #include "chert_termlisttable.h"
37 #include "chert_values.h"
38 #include "chert_version.h"
39 #include "../flint_lock.h"
40 #include "chert_types.h"
41 #include "backends/valuestats.h"
43 #include "noreturn.h"
45 #include "xapian/compactor.h"
46 #include "xapian/constants.h"
48 #include <map>
49 #include <vector>
50 #include <string>
52 class ChertTermList;
53 class ChertAllDocsPostList;
54 class RemoteConnection;
56 /** A backend designed for efficient indexing and retrieval, using
57 * compressed posting lists and a btree storage scheme.
59 class ChertDatabase : public Xapian::Database::Internal {
60 friend class ChertWritableDatabase;
61 friend class ChertTermList;
62 friend class ChertPostList;
63 friend class ChertAllTermsList;
64 friend class ChertAllDocsPostList;
65 private:
66 /** Directory to store databases in.
68 std::string db_dir;
70 /** Whether the database is readonly.
72 bool readonly;
74 /** The file describing the Chert database.
75 * This file has information about the format of the database
76 * which can't easily be stored in any of the individual tables.
78 ChertVersion version_file;
80 /** Table storing posting lists.
82 * Whenever an update is performed, this table is the first to be
83 * updated: therefore, its most recent revision number is the most
84 * recent anywhere in the database.
86 mutable ChertPostListTable postlist_table;
88 /** Table storing position lists.
90 ChertPositionListTable position_table;
92 /** Table storing term lists.
94 ChertTermListTable termlist_table;
96 /** Value manager. */
97 mutable ChertValueManager value_manager;
99 /** Table storing synonym data.
101 mutable ChertSynonymTable synonym_table;
103 /** Table storing spelling correction data.
105 mutable ChertSpellingTable spelling_table;
107 /** Table storing records.
109 * Whenever an update is performed, this table is the last to be
110 * updated: therefore, its most recent revision number is the most
111 * recent consistent revision available. If this table's most
112 * recent revision number is not available for all tables, there
113 * is no consistent revision available, and the database is corrupt.
115 ChertRecordTable record_table;
117 /// Lock object.
118 FlintLock lock;
120 /** The maximum number of changesets which should be kept in the
121 * database. */
122 unsigned int max_changesets;
124 /// Database statistics.
125 ChertDatabaseStats stats;
127 /** Return true if a database exists at the path specified for this
128 * database.
130 bool database_exists();
132 /** Create new tables, and open them.
133 * Any existing tables will be removed first.
135 void create_and_open_tables(unsigned int blocksize);
137 /** Open all tables at most recent consistent revision.
139 * @return true if the tables were reopened; false if we could
140 * tell they were already open at the latest revision.
142 * @exception Xapian::DatabaseCorruptError is thrown if there is no
143 * consistent revision available.
145 bool open_tables_consistent();
147 /** Get a write lock on the database, or throw an
148 * Xapian::DatabaseLockError if failure.
150 * @param flags Bit-wise or of zero or more Xapian::DB_* constants
152 * @param creating true if the database is in the process of being
153 * created - if false, will throw a DatabaseOpening error if the lock
154 * can't be acquired and the database doesn't exist.
156 void get_database_write_lock(int flags, bool creating);
158 /** Open tables at specified revision number.
160 * @exception Xapian::InvalidArgumentError is thrown if the specified
161 * revision is not available.
163 void open_tables(chert_revision_number_t revision);
165 /** Get an object holding the next revision number which should be
166 * used in the tables.
168 * @return the next revision number.
170 chert_revision_number_t get_next_revision_number() const;
172 /** Set the revision number in the tables.
174 * This updates the disk tables so that the currently open revision
175 * becomes the specified revision number.
177 * @param new_revision The new revision number to store. This must
178 * be greater than the latest revision number (see
179 * get_latest_revision_number()), or undefined behaviour will
180 * result.
182 void set_revision_number(chert_revision_number_t new_revision);
184 /** Re-open tables to recover from an overwritten condition,
185 * or just get most up-to-date version.
187 bool reopen();
189 /** Close all the tables permanently.
191 void close();
193 /** Called if a modifications fail.
195 * @param msg is a string description of the exception that was
196 * raised when the modifications failed.
198 void modifications_failed(chert_revision_number_t old_revision,
199 chert_revision_number_t new_revision,
200 const std::string & msg);
202 /** Apply any outstanding changes to the tables.
204 * If an error occurs during this operation, this will be signalled
205 * by an exception being thrown. In this case the contents of the
206 * tables on disk will be left in an unmodified state (though possibly
207 * with increased revision numbers), and the outstanding changes will
208 * be lost.
210 void apply();
212 /** Cancel any outstanding changes to the tables.
214 void cancel();
216 /** Send a set of messages which transfer the whole database.
218 void send_whole_database(RemoteConnection & conn, double end_time);
220 /** Get the revision stored in a changeset.
222 void get_changeset_revisions(const string & path,
223 chert_revision_number_t * startrev,
224 chert_revision_number_t * endrev) const;
225 public:
226 /** Create and open a chert database.
228 * @exception Xapian::DatabaseCorruptError is thrown if there is no
229 * consistent revision available.
231 * @exception Xapian::DatabaseOpeningError thrown if database can't
232 * be opened.
234 * @exception Xapian::DatabaseVersionError thrown if database is in an
235 * unsupported format. This implies that the database was
236 * created by an older or newer version of Xapian.
238 * @param dbdir directory holding chert tables
240 * @param block_size Block size, in bytes, to use when creating
241 * tables. This is only important, and has the
242 * correct value, when the database is being
243 * created.
245 ChertDatabase(const string &db_dir_, int action = Xapian::DB_READONLY_,
246 unsigned int block_size = 0u);
248 ~ChertDatabase();
250 /// Get a postlist table cursor (used by ChertValueList).
251 ChertCursor * get_postlist_cursor() const {
252 return postlist_table.cursor_get();
255 /** Get an object holding the revision number which the tables are
256 * opened at.
258 * @return the current revision number.
260 chert_revision_number_t get_revision_number() const;
262 /** Virtual methods of Database::Internal. */
263 //@{
264 Xapian::doccount get_doccount() const;
265 Xapian::docid get_lastdocid() const;
266 totlen_t get_total_length() const;
267 Xapian::termcount get_doclength(Xapian::docid did) const;
268 Xapian::termcount get_unique_terms(Xapian::docid did) const;
269 void get_freqs(const string & term,
270 Xapian::doccount * termfreq_ptr,
271 Xapian::termcount * collfreq_ptr) const;
272 Xapian::doccount get_value_freq(Xapian::valueno slot) const;
273 std::string get_value_lower_bound(Xapian::valueno slot) const;
274 std::string get_value_upper_bound(Xapian::valueno slot) const;
275 Xapian::termcount get_doclength_lower_bound() const;
276 Xapian::termcount get_doclength_upper_bound() const;
277 Xapian::termcount get_wdf_upper_bound(const string & term) const;
278 bool term_exists(const string & tname) const;
279 bool has_positions() const;
281 LeafPostList * open_post_list(const string & tname) const;
282 ValueList * open_value_list(Xapian::valueno slot) const;
283 Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
285 PositionList * open_position_list(Xapian::docid did, const string & term) const;
286 TermList * open_term_list(Xapian::docid did) const;
287 TermList * open_allterms(const string & prefix) const;
289 TermList * open_spelling_termlist(const string & word) const;
290 TermList * open_spelling_wordlist() const;
291 Xapian::doccount get_spelling_frequency(const string & word) const;
293 TermList * open_synonym_termlist(const string & term) const;
294 TermList * open_synonym_keylist(const string & prefix) const;
296 string get_metadata(const string & key) const;
297 TermList * open_metadata_keylist(const std::string &prefix) const;
298 void write_changesets_to_fd(int fd,
299 const string & start_revision,
300 bool need_whole_db,
301 Xapian::ReplicationInfo * info);
302 string get_revision_info() const;
303 string get_uuid() const;
305 void request_document(Xapian::docid /*did*/) const;
306 void readahead_for_query(const Xapian::Query &query);
307 //@}
309 XAPIAN_NORETURN(void throw_termlist_table_close_exception() const);
311 int get_backend_info(string * path) const {
312 if (path) *path = db_dir;
313 return BACKEND_CHERT;
316 void get_used_docid_range(Xapian::docid & first,
317 Xapian::docid & last) const;
319 static void compact(Xapian::Compactor * compactor,
320 const char * destdir,
321 const std::vector<Xapian::Database::Internal *> & sources,
322 const std::vector<Xapian::docid> & offset,
323 size_t block_size,
324 Xapian::Compactor::compaction_level compaction,
325 unsigned flags,
326 Xapian::docid last_docid);
329 /** A writable chert database.
331 class ChertWritableDatabase : public ChertDatabase {
332 /** Unflushed changes to term frequencies and collection frequencies. */
333 mutable map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >
334 freq_deltas;
336 /** Document lengths of new and modified documents which haven't been flushed yet. */
337 mutable map<Xapian::docid, Xapian::termcount> doclens;
339 /// Modifications to posting lists.
340 mutable map<string, map<Xapian::docid,
341 pair<char, Xapian::termcount> > > mod_plists;
343 mutable map<Xapian::valueno, ValueStats> value_stats;
345 /** The number of documents added, deleted, or replaced since the last
346 * flush.
348 mutable Xapian::doccount change_count;
350 /// If change_count reaches this threshold we automatically flush.
351 Xapian::doccount flush_threshold;
353 /** A pointer to the last document which was returned by
354 * open_document(), or NULL if there is no such valid document. This
355 * is used purely for comparing with a supplied document to help with
356 * optimising replace_document. When the document internals are
357 * deleted, this pointer gets set to NULL.
359 mutable Xapian::Document::Internal * modify_shortcut_document;
361 /** The document ID for the last document returned by open_document().
363 mutable Xapian::docid modify_shortcut_docid;
365 /** Check if we should autoflush.
367 * Called at the end of each document changing operation.
369 void check_flush_threshold();
371 /// Flush any unflushed postlist changes, but don't commit them.
372 void flush_postlist_changes() const;
374 /// Close all the tables permanently.
375 void close();
377 /// Apply changes.
378 void apply();
380 /** Add or modify an entry in freq_deltas.
382 * @param tname The term to modify the entry for.
383 * @param tf_delta The change in the term frequency delta.
384 * @param cf_delta The change in the collection frequency delta.
386 void add_freq_delta(const string & tname,
387 Xapian::termcount_diff tf_delta,
388 Xapian::termcount_diff cf_delta);
390 /** Insert modifications for a new document to the postlists.
392 * @param did The document ID to insert the entry for.
393 * @param tname The term to insert the entry for.
394 * @param wdf The new wdf value to store.
396 void insert_mod_plist(Xapian::docid did,
397 const string & tname,
398 Xapian::termcount wdf);
400 /** Update the stored modifications to the postlists.
402 * @param did The document ID to modify the entry for.
403 * @param tname The term to modify the entry for.
404 * @param type The type of change to the postlist.
405 * @param wdf The new wdf value to store.
407 * If type is 'A', and an existing entry is in the stored
408 * modifications, the stored type will be set to 'M'. In all other
409 * cases, the stored type is simply the value supplied.
411 void update_mod_plist(Xapian::docid did,
412 const string & tname,
413 char type,
414 Xapian::termcount wdf);
416 //@{
417 /** Implementation of virtual methods: see Database::Internal for
418 * details.
420 void commit();
422 /** Cancel pending modifications to the database. */
423 void cancel();
425 Xapian::docid add_document(const Xapian::Document & document);
426 Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document);
427 // Stop the default implementation of delete_document(term) and
428 // replace_document(term) from being hidden. This isn't really
429 // a problem as we only try to call them through the base class
430 // (where they aren't hidden) but some compilers generate a warning
431 // about the hiding.
432 #ifndef _MSC_VER
433 using Xapian::Database::Internal::delete_document;
434 using Xapian::Database::Internal::replace_document;
435 #endif
436 void delete_document(Xapian::docid did);
437 void replace_document(Xapian::docid did, const Xapian::Document & document);
439 Xapian::Document::Internal * open_document(Xapian::docid did,
440 bool lazy) const;
442 //@}
444 public:
445 /** Create and open a writable chert database.
447 * @exception Xapian::DatabaseOpeningError thrown if database can't
448 * be opened.
450 * @exception Xapian::DatabaseVersionError thrown if database is in an
451 * unsupported format. This implies that the database was
452 * created by an older or newer version of Xapian.
454 * @param dir directory holding chert tables
456 ChertWritableDatabase(const string &dir, int action, int block_size);
458 ~ChertWritableDatabase();
460 /** Virtual methods of Database::Internal. */
461 //@{
462 Xapian::termcount get_doclength(Xapian::docid did) const;
463 void get_freqs(const string & term,
464 Xapian::doccount * termfreq_ptr,
465 Xapian::termcount * collfreq_ptr) const;
466 Xapian::doccount get_value_freq(Xapian::valueno slot) const;
467 std::string get_value_lower_bound(Xapian::valueno slot) const;
468 std::string get_value_upper_bound(Xapian::valueno slot) const;
469 bool term_exists(const string & tname) const;
471 LeafPostList * open_post_list(const string & tname) const;
472 ValueList * open_value_list(Xapian::valueno slot) const;
473 TermList * open_allterms(const string & prefix) const;
475 void add_spelling(const string & word, Xapian::termcount freqinc) const;
476 void remove_spelling(const string & word, Xapian::termcount freqdec) const;
477 TermList * open_spelling_wordlist() const;
479 TermList * open_synonym_keylist(const string & prefix) const;
480 void add_synonym(const string & word, const string & synonym) const;
481 void remove_synonym(const string & word, const string & synonym) const;
482 void clear_synonyms(const string & word) const;
484 void set_metadata(const string & key, const string & value);
485 void invalidate_doc_object(Xapian::Document::Internal * obj) const;
486 //@}
489 #endif /* OM_HGUARD_CHERT_DATABASE_H */