Fix whitespace irregularities in code
[xapian.git] / xapian-core / include / xapian / database.h
blobf011b85311afeef64be214d601886103fd705e1d
1 /** @file database.h
2 * @brief API for working with Xapian databases
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014,2015,2016 Olly Betts
7 * Copyright 2006,2008 Lemur Consulting Ltd
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA
25 #ifndef XAPIAN_INCLUDED_DATABASE_H
26 #define XAPIAN_INCLUDED_DATABASE_H
28 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
29 # error "Never use <xapian/database.h> directly; include <xapian.h> instead."
30 #endif
32 #include <iosfwd>
33 #include <string>
34 #include <vector>
36 #include <xapian/attributes.h>
37 #include <xapian/deprecated.h>
38 #include <xapian/intrusive_ptr.h>
39 #include <xapian/types.h>
40 #include <xapian/positioniterator.h>
41 #include <xapian/postingiterator.h>
42 #include <xapian/termiterator.h>
43 #include <xapian/valueiterator.h>
44 #include <xapian/visibility.h>
46 namespace Xapian {
48 class Compactor;
49 class Document;
51 /** This class is used to access a database, or a group of databases.
53 * For searching, this class is used in conjunction with an Enquire object.
55 * @exception InvalidArgumentError will be thrown if an invalid
56 * argument is supplied, for example, an unknown database type.
58 * @exception DatabaseOpeningError may be thrown if the database cannot
59 * be opened (for example, a required file cannot be found).
61 * @exception DatabaseVersionError may be thrown if the database is in an
62 * unsupported format (for example, created by a newer version of Xapian
63 * which uses an incompatible format).
65 class XAPIAN_VISIBILITY_DEFAULT Database {
66 /// @internal Implementation behind check() static methods.
67 static size_t check_(const std::string * path_ptr, int fd, int opts,
68 std::ostream *out);
70 /// Internal helper behind public compact() methods.
71 void compact_(const std::string * output_ptr,
72 int fd,
73 unsigned flags,
74 int block_size,
75 Xapian::Compactor * compactor) const;
77 public:
78 class Internal;
79 /// @private @internal Reference counted internals.
80 std::vector<Xapian::Internal::intrusive_ptr<Internal> > internal;
82 /** Add an existing database (or group of databases) to those
83 * accessed by this object.
85 * @param database the database(s) to add.
87 void add_database(const Database & database);
89 /** Create a Database with no databases in.
91 Database();
93 /** Open a Database, automatically determining the database
94 * backend to use.
96 * @param path directory that the database is stored in.
97 * @param flags Bitwise-or of Xapian::DB_* constants.
99 explicit Database(const std::string &path, int flags = 0);
101 /** Open a single-file Database.
103 * This method opens a single-file Database given a file descriptor
104 * open on it. Xapian looks starting at the current file offset,
105 * allowing a single file database to be easily embedded within
106 * another file.
108 * @param fd file descriptor for the file. Xapian takes ownership of
109 * this and will close it when the database is closed.
110 * @param flags Bitwise-or of Xapian::DB_* constants.
112 explicit Database(int fd, int flags = 0);
114 /** @private @internal Create a Database from its internals.
116 explicit Database(Internal *internal);
118 /** Destroy this handle on the database.
120 * If there are no copies of this object remaining, the database(s)
121 * will be closed.
123 virtual ~Database();
125 /** Copying is allowed. The internals are reference counted, so
126 * copying is cheap.
128 * @param other The object to copy.
130 Database(const Database &other);
132 /** Assignment is allowed. The internals are reference counted,
133 * so assignment is cheap.
135 * @param other The object to copy.
137 void operator=(const Database &other);
139 /** Re-open the database.
141 * This re-opens the database(s) to the latest available version(s).
142 * It can be used either to make sure the latest results are returned,
143 * or to recover from a Xapian::DatabaseModifiedError.
145 * Calling reopen() on a database which has been closed (with @a
146 * close()) will always raise a Xapian::DatabaseError.
148 * @return true if the database might have been reopened (if false
149 * is returned, the database definitely hasn't been
150 * reopened, which applications may find useful when
151 * caching results, etc). In Xapian < 1.3.0, this method
152 * did not return a value.
154 bool reopen();
156 /** Close the database.
158 * This closes the database and closes all its file handles.
160 * For a WritableDatabase, if a transaction is active it will be
161 * aborted, while if no transaction is active commit() will be
162 * implicitly called. Also the write lock is released.
164 * Closing a database cannot be undone - in particular, calling
165 * reopen() after close() will not reopen it, but will instead throw a
166 * Xapian::DatabaseError exception.
168 * Calling close() again on a database which has already been closed
169 * has no effect (and doesn't raise an exception).
171 * After close() has been called, calls to other methods of the
172 * database, and to methods of other objects associated with the
173 * database, will either:
175 * - behave exactly as they would have done if the database had not
176 * been closed (this can only happen if all the required data is
177 * cached)
179 * - raise a Xapian::DatabaseError exception indicating that the
180 * database is closed.
182 * The reason for this behaviour is that otherwise we'd have to check
183 * that the database is still open on every method call on every
184 * object associated with a Database, when in many cases they are
185 * working on data which has already been loaded and so they are able
186 * to just behave correctly.
188 * This method was added in Xapian 1.1.0.
190 virtual void close();
192 /// Return a string describing this object.
193 virtual std::string get_description() const;
195 /** An iterator pointing to the start of the postlist
196 * for a given term.
198 * @param tname The termname to iterate postings for. If the
199 * term name is the empty string, the iterator
200 * returned will list all the documents in the
201 * database. Such an iterator will always return
202 * a WDF value of 1, since there is no obvious
203 * meaning for this quantity in this case.
205 PostingIterator postlist_begin(const std::string &tname) const;
207 /** Corresponding end iterator to postlist_begin().
209 PostingIterator XAPIAN_NOTHROW(postlist_end(const std::string &) const) {
210 return PostingIterator();
213 /** An iterator pointing to the start of the termlist
214 * for a given document.
216 * @param did The document id of the document to iterate terms for.
218 TermIterator termlist_begin(Xapian::docid did) const;
220 /** Corresponding end iterator to termlist_begin().
222 TermIterator XAPIAN_NOTHROW(termlist_end(Xapian::docid) const) {
223 return TermIterator();
226 /** Does this database have any positional information? */
227 bool has_positions() const;
229 /** An iterator pointing to the start of the position list
230 * for a given term in a given document.
232 PositionIterator positionlist_begin(Xapian::docid did, const std::string &tname) const;
234 /** Corresponding end iterator to positionlist_begin().
236 PositionIterator XAPIAN_NOTHROW(positionlist_end(Xapian::docid, const std::string &) const) {
237 return PositionIterator();
240 /** An iterator which runs across all terms with a given prefix.
242 * @param prefix The prefix to restrict the returned terms to (default:
243 * iterate all terms)
245 TermIterator allterms_begin(const std::string & prefix = std::string()) const;
247 /** Corresponding end iterator to allterms_begin(prefix).
249 TermIterator XAPIAN_NOTHROW(allterms_end(const std::string & = std::string()) const) {
250 return TermIterator();
253 /// Get the number of documents in the database.
254 Xapian::doccount get_doccount() const;
256 /// Get the highest document id which has been used in the database.
257 Xapian::docid get_lastdocid() const;
259 /// Get the average length of the documents in the database.
260 Xapian::doclength get_avlength() const;
262 /// Get the number of documents in the database indexed by a given term.
263 Xapian::doccount get_termfreq(const std::string & tname) const;
265 /** Check if a given term exists in the database.
267 * @param tname The term to test the existence of.
269 * @return true if and only if the term exists in the database.
270 * This is the same as (get_termfreq(tname) != 0), but
271 * will often be more efficient.
273 bool term_exists(const std::string & tname) const;
275 /** Return the total number of occurrences of the given term.
277 * This is the sum of the number of occurrences of the term in each
278 * document it indexes: i.e., the sum of the within document
279 * frequencies of the term.
281 * @param tname The term whose collection frequency is being
282 * requested.
284 Xapian::termcount get_collection_freq(const std::string & tname) const;
286 /** Return the frequency of a given value slot.
288 * This is the number of documents which have a (non-empty) value
289 * stored in the slot.
291 * @param slot The value slot to examine.
293 * @exception UnimplementedError The frequency of the value isn't
294 * available for this database type.
296 Xapian::doccount get_value_freq(Xapian::valueno slot) const;
298 /** Get a lower bound on the values stored in the given value slot.
300 * If there are no values stored in the given value slot, this will
301 * return an empty string.
303 * If the lower bound isn't available for the given database type,
304 * this will return the lowest possible bound - the empty string.
306 * @param slot The value slot to examine.
308 std::string get_value_lower_bound(Xapian::valueno slot) const;
310 /** Get an upper bound on the values stored in the given value slot.
312 * If there are no values stored in the given value slot, this will
313 * return an empty string.
315 * @param slot The value slot to examine.
317 * @exception UnimplementedError The upper bound of the values isn't
318 * available for this database type.
320 std::string get_value_upper_bound(Xapian::valueno slot) const;
322 /** Get a lower bound on the length of a document in this DB.
324 * This bound does not include any zero-length documents.
326 Xapian::termcount get_doclength_lower_bound() const;
328 /// Get an upper bound on the length of a document in this DB.
329 Xapian::termcount get_doclength_upper_bound() const;
331 /// Get an upper bound on the wdf of term @a term.
332 Xapian::termcount get_wdf_upper_bound(const std::string & term) const;
334 /// Return an iterator over the value in slot @a slot for each document.
335 ValueIterator valuestream_begin(Xapian::valueno slot) const;
337 /// Return end iterator corresponding to valuestream_begin().
338 ValueIterator XAPIAN_NOTHROW(valuestream_end(Xapian::valueno) const) {
339 return ValueIterator();
342 /// Get the length of a document.
343 Xapian::termcount get_doclength(Xapian::docid did) const;
345 /// Get the number of unique terms in document.
346 Xapian::termcount get_unique_terms(Xapian::docid did) const;
348 /** Send a "keep-alive" to remote databases to stop them timing out.
350 * Has no effect on non-remote databases.
352 void keep_alive();
354 /** Get a document from the database, given its document id.
356 * This method returns a Xapian::Document object which provides the
357 * information about a document.
359 * @param did The document id of the document to retrieve.
361 * @return A Xapian::Document object containing the document data
363 * @exception Xapian::DocNotFoundError The document specified
364 * could not be found in the database.
366 * @exception Xapian::InvalidArgumentError did was 0, which is not
367 * a valid document id.
369 Xapian::Document get_document(Xapian::docid did) const;
371 /** Suggest a spelling correction.
373 * @param word The potentially misspelled word.
374 * @param max_edit_distance Only consider words which are at most
375 * @a max_edit_distance edits from @a word. An edit is a
376 * character insertion, deletion, or the transposition of two
377 * adjacent characters (default is 2).
379 std::string get_spelling_suggestion(const std::string &word,
380 unsigned max_edit_distance = 2) const;
382 /** An iterator which returns all the spelling correction targets.
384 * This returns all the words which are considered as targets for the
385 * spelling correction algorithm. The frequency of each word is
386 * available as the term frequency of each entry in the returned
387 * iterator.
389 Xapian::TermIterator spellings_begin() const;
391 /// Corresponding end iterator to spellings_begin().
392 Xapian::TermIterator XAPIAN_NOTHROW(spellings_end() const) {
393 return Xapian::TermIterator();
396 /** An iterator which returns all the synonyms for a given term.
398 * @param term The term to return synonyms for.
400 Xapian::TermIterator synonyms_begin(const std::string &term) const;
402 /// Corresponding end iterator to synonyms_begin(term).
403 Xapian::TermIterator XAPIAN_NOTHROW(synonyms_end(const std::string &) const) {
404 return Xapian::TermIterator();
407 /** An iterator which returns all terms which have synonyms.
409 * @param prefix If non-empty, only terms with this prefix are
410 * returned.
412 Xapian::TermIterator synonym_keys_begin(const std::string &prefix = std::string()) const;
414 /// Corresponding end iterator to synonym_keys_begin(prefix).
415 Xapian::TermIterator XAPIAN_NOTHROW(synonym_keys_end(const std::string & = std::string()) const) {
416 return Xapian::TermIterator();
419 /** Get the user-specified metadata associated with a given key.
421 * User-specified metadata allows you to store arbitrary information
422 * in the form of (key,tag) pairs. See @a
423 * WritableDatabase::set_metadata() for more information.
425 * When invoked on a Xapian::Database object representing multiple
426 * databases, currently only the metadata for the first is considered
427 * but this behaviour may change in the future.
429 * If there is no piece of metadata associated with the specified
430 * key, an empty string is returned (this applies even for backends
431 * which don't support metadata).
433 * Empty keys are not valid, and specifying one will cause an
434 * exception.
436 * @param key The key of the metadata item to access.
438 * @return The retrieved metadata item's value.
440 * @exception Xapian::InvalidArgumentError will be thrown if the
441 * key supplied is empty.
443 std::string get_metadata(const std::string & key) const;
445 /** An iterator which returns all user-specified metadata keys.
447 * When invoked on a Xapian::Database object representing multiple
448 * databases, currently only the metadata for the first is considered
449 * but this behaviour may change in the future.
451 * If the backend doesn't support metadata, then this method returns
452 * an iterator which compares equal to that returned by
453 * metadata_keys_end().
455 * @param prefix If non-empty, only keys with this prefix are
456 * returned.
458 * @exception Xapian::UnimplementedError will be thrown if the
459 * backend implements user-specified metadata, but
460 * doesn't implement iterating its keys (currently
461 * this happens for the InMemory backend).
463 Xapian::TermIterator metadata_keys_begin(const std::string &prefix = std::string()) const;
465 /// Corresponding end iterator to metadata_keys_begin().
466 Xapian::TermIterator XAPIAN_NOTHROW(metadata_keys_end(const std::string & = std::string()) const) {
467 return Xapian::TermIterator();
470 /** Get a UUID for the database.
472 * The UUID will persist for the lifetime of the database.
474 * Replicas (eg, made with the replication protocol, or by copying all
475 * the database files) will have the same UUID. However, copies (made
476 * with copydatabase, or xapian-compact) will have different UUIDs.
478 * If the backend does not support UUIDs or this database has no
479 * subdatabases, the UUID will be empty.
481 * If this database has multiple sub-databases, the UUID string will
482 * contain the UUIDs of all the sub-databases.
484 std::string get_uuid() const;
486 /** Get the revision of the database.
488 * The revision is an unsigned integer which increases with each
489 * commit.
491 * The database must have exactly one sub-database, which must be of
492 * type chert or glass. Otherwise an exception will be thrown.
494 * Experimental - see
495 * https://xapian.org/docs/deprecation#experimental-features
497 Xapian::rev get_revision() const;
499 /** Check the integrity of a database or database table.
501 * @param path Path to database or table
502 * @param opts Options to use for check
503 * @param out std::ostream to write output to (NULL for no output)
505 static size_t check(const std::string & path, int opts = 0,
506 std::ostream *out = NULL) {
507 return check_(&path, 0, opts, out);
510 /** Check the integrity of a single file database.
512 * @param fd file descriptor for the database. The current file
513 * offset is used, allowing checking a single file
514 * database which is embedded within another file. Xapian
515 * takes ownership of the file descriptor and will close
516 * it before returning.
517 * @param opts Options to use for check
518 * @param out std::ostream to write output to (NULL for no output)
520 static size_t check(int fd, int opts = 0, std::ostream *out = NULL) {
521 return check_(NULL, fd, opts, out);
524 /** Produce a compact version of this database.
526 * New 1.3.4. Various methods of the Compactor class were deprecated
527 * in 1.3.4.
529 * @param output Path to write the compact version to.
530 * This can be the same as an input if that input is a
531 * stub database (in which case the database(s) listed
532 * in the stub will be compacted to a new database and
533 * then the stub will be atomically updated to point to
534 * this new database).
536 * @param flags Any of the following combined using bitwise-or (| in
537 * C++):
538 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
539 * be renumbered the output - currently by applying the
540 * same offset to all the document ids in a particular
541 * source database. If this flag is specified, then this
542 * renumbering doesn't happen, but all the document ids
543 * must be unique over all source databases. Currently
544 * the ranges of document ids in each source must not
545 * overlap either, though this restriction may be removed
546 * in the future.
547 * - Xapian::DBCOMPACT_MULTIPASS
548 * If merging more than 3 databases, merge the postlists
549 * in multiple passes, which is generally faster but
550 * requires more disk space for temporary files.
551 * - Xapian::DBCOMPACT_SINGLE_FILE
552 * Produce a single-file database (only supported for
553 * glass currently).
554 * - At most one of:
555 * - Xapian::Compactor::STANDARD - Don't split items unnecessarily.
556 * - Xapian::Compactor::FULL - Split items whenever it saves
557 * space (the default).
558 * - Xapian::Compactor::FULLER - Allow oversize items to save
559 * more space (not recommended if you ever plan to update the
560 * compacted database).
562 * @param block_size This specifies the block size (in bytes) for
563 * to use for the output. For glass, the block size must
564 * be a power of 2 between 2048 and 65536 (inclusive), and
565 * the default (also used if an invalid value is passed)
566 * is 8192 bytes.
568 void compact(const std::string & output,
569 unsigned flags = 0,
570 int block_size = 0) {
571 compact_(&output, 0, flags, block_size, NULL);
574 /** Produce a compact version of this database.
576 * New 1.3.4. Various methods of the Compactor class were deprecated
577 * in 1.3.4.
579 * This variant writes a single-file database to the specified file
580 * descriptor. Only the glass backend supports such databases, so
581 * this form is only supported for this backend.
583 * @param fd File descriptor to write the compact version to. The
584 * descriptor needs to be readable and writable (open with
585 * O_RDWR) and seekable. The current file offset is used,
586 * allowing compacting to a single file database embedded
587 * within another file. Xapian takes ownership of the
588 * file descriptor and will close it before returning.
590 * @param flags Any of the following combined using bitwise-or (| in
591 * C++):
592 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
593 * be renumbered the output - currently by applying the
594 * same offset to all the document ids in a particular
595 * source database. If this flag is specified, then this
596 * renumbering doesn't happen, but all the document ids
597 * must be unique over all source databases. Currently
598 * the ranges of document ids in each source must not
599 * overlap either, though this restriction may be removed
600 * in the future.
601 * - Xapian::DBCOMPACT_MULTIPASS
602 * If merging more than 3 databases, merge the postlists
603 * in multiple passes, which is generally faster but
604 * requires more disk space for temporary files.
605 * - Xapian::DBCOMPACT_SINGLE_FILE
606 * Produce a single-file database (only supported for
607 * glass currently) - this flag is implied in this form
608 * and need not be specified explicitly.
610 * @param block_size This specifies the block size (in bytes) for
611 * to use for the output. For glass, the block size must
612 * be a power of 2 between 2048 and 65536 (inclusive), and
613 * the default (also used if an invalid value is passed)
614 * is 8192 bytes.
616 void compact(int fd,
617 unsigned flags = 0,
618 int block_size = 0) {
619 compact_(NULL, fd, flags, block_size, NULL);
622 /** Produce a compact version of this database.
624 * New 1.3.4. Various methods of the Compactor class were deprecated
625 * in 1.3.4.
627 * The @a compactor functor allows handling progress output and
628 * specifying how user metadata is merged.
630 * @param output Path to write the compact version to.
631 * This can be the same as an input if that input is a
632 * stub database (in which case the database(s) listed
633 * in the stub will be compacted to a new database and
634 * then the stub will be atomically updated to point to
635 * this new database).
637 * @param flags Any of the following combined using bitwise-or (| in
638 * C++):
639 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
640 * be renumbered the output - currently by applying the
641 * same offset to all the document ids in a particular
642 * source database. If this flag is specified, then this
643 * renumbering doesn't happen, but all the document ids
644 * must be unique over all source databases. Currently
645 * the ranges of document ids in each source must not
646 * overlap either, though this restriction may be removed
647 * in the future.
648 * - Xapian::DBCOMPACT_MULTIPASS
649 * If merging more than 3 databases, merge the postlists
650 * in multiple passes, which is generally faster but
651 * requires more disk space for temporary files.
652 * - Xapian::DBCOMPACT_SINGLE_FILE
653 * Produce a single-file database (only supported for
654 * glass currently).
656 * @param block_size This specifies the block size (in bytes) for
657 * to use for the output. For glass, the block size must
658 * be a power of 2 between 2048 and 65536 (inclusive), and
659 * the default (also used if an invalid value is passed)
660 * is 8192 bytes.
662 * @param compactor Functor
664 void compact(const std::string & output,
665 unsigned flags,
666 int block_size,
667 Xapian::Compactor & compactor)
669 compact_(&output, 0, flags, block_size, &compactor);
672 /** Produce a compact version of this database.
674 * New 1.3.4. Various methods of the Compactor class were deprecated
675 * in 1.3.4.
677 * The @a compactor functor allows handling progress output and
678 * specifying how user metadata is merged.
680 * This variant writes a single-file database to the specified file
681 * descriptor. Only the glass backend supports such databases, so
682 * this form is only supported for this backend.
684 * @param fd File descriptor to write the compact version to. The
685 * descriptor needs to be readable and writable (open with
686 * O_RDWR) and seekable. The current file offset is used,
687 * allowing compacting to a single file database embedded
688 * within another file. Xapian takes ownership of the
689 * file descriptor and will close it before returning.
691 * @param flags Any of the following combined using bitwise-or (| in
692 * C++):
693 * - Xapian::DBCOMPACT_NO_RENUMBER By default the document ids will
694 * be renumbered the output - currently by applying the
695 * same offset to all the document ids in a particular
696 * source database. If this flag is specified, then this
697 * renumbering doesn't happen, but all the document ids
698 * must be unique over all source databases. Currently
699 * the ranges of document ids in each source must not
700 * overlap either, though this restriction may be removed
701 * in the future.
702 * - Xapian::DBCOMPACT_MULTIPASS
703 * If merging more than 3 databases, merge the postlists
704 * in multiple passes, which is generally faster but
705 * requires more disk space for temporary files.
706 * - Xapian::DBCOMPACT_SINGLE_FILE
707 * Produce a single-file database (only supported for
708 * glass currently) - this flag is implied in this form
709 * and need not be specified explicitly.
711 * @param block_size This specifies the block size (in bytes) for
712 * to use for the output. For glass, the block size must
713 * be a power of 2 between 2048 and 65536 (inclusive), and
714 * the default (also used if an invalid value is passed)
715 * is 8192 bytes.
717 * @param compactor Functor
719 void compact(int fd,
720 unsigned flags,
721 int block_size,
722 Xapian::Compactor & compactor)
724 compact_(NULL, fd, flags, block_size, &compactor);
728 /** This class provides read/write access to a database.
730 class XAPIAN_VISIBILITY_DEFAULT WritableDatabase : public Database {
731 public:
732 /** Destroy this handle on the database.
734 * If no other handles to this database remain, the database will be
735 * closed.
737 * If a transaction is active cancel_transaction() will be implicitly
738 * called; if no transaction is active commit() will be implicitly
739 * called, but any exception will be swallowed (because throwing
740 * exceptions in C++ destructors is problematic). If you aren't using
741 * transactions and want to know about any failure to commit changes,
742 * call commit() explicitly before the destructor gets called.
744 virtual ~WritableDatabase();
746 /** Create a WritableDatabase with no subdatabases.
748 * The created object isn't very useful in this state - it's intended
749 * as a placeholder value.
751 WritableDatabase();
753 /** Open a database for update, automatically determining the database
754 * backend to use.
756 * If the database is to be created, Xapian will try
757 * to create the directory indicated by path if it doesn't already
758 * exist (but only the leaf directory, not recursively).
760 * @param path directory that the database is stored in.
761 * @param flags one of:
762 * - Xapian::DB_CREATE_OR_OPEN open for read/write; create if no db
763 * exists (the default if flags isn't specified)
764 * - Xapian::DB_CREATE create new database; fail if db exists
765 * - Xapian::DB_CREATE_OR_OVERWRITE overwrite existing db; create if
766 * none exists
767 * - Xapian::DB_OPEN open for read/write; fail if no db exists
769 * Additionally, the following flags can be combined with action
770 * using bitwise-or (| in C++):
772 * - Xapian::DB_NO_SYNC don't call fsync() or similar
773 * - Xapian::DB_DANGEROUS don't be crash-safe, no concurrent readers
774 * - Xapian::DB_RETRY_LOCK to wait to get a write lock
776 * @param block_size If a new database is created, this specifies
777 * the block size (in bytes) for backends which
778 * have such a concept. For chert and glass, the
779 * block size must be a power of 2 between 2048 and
780 * 65536 (inclusive), and the default (also used if
781 * an invalid value is passed) is 8192 bytes.
783 * @exception Xapian::DatabaseCorruptError will be thrown if the
784 * database is in a corrupt state.
786 * @exception Xapian::DatabaseLockError will be thrown if a lock
787 * couldn't be acquired on the database.
789 explicit WritableDatabase(const std::string &path,
790 int flags = 0,
791 int block_size = 0);
793 /** @private @internal Create an WritableDatabase given its internals.
795 explicit WritableDatabase(Database::Internal *internal);
797 /** Copying is allowed. The internals are reference counted, so
798 * copying is cheap.
800 * @param other The object to copy.
802 WritableDatabase(const WritableDatabase &other);
804 /** Assignment is allowed. The internals are reference counted,
805 * so assignment is cheap.
807 * Note that only an WritableDatabase may be assigned to an
808 * WritableDatabase: an attempt to assign a Database is caught
809 * at compile-time.
811 * @param other The object to copy.
813 void operator=(const WritableDatabase &other);
815 /** Commit any pending modifications made to the database.
817 * For efficiency reasons, when performing multiple updates to a
818 * database it is best (indeed, almost essential) to make as many
819 * modifications as memory will permit in a single pass through
820 * the database. To ensure this, Xapian batches up modifications.
822 * This method may be called at any time to commit any pending
823 * modifications to the database.
825 * If any of the modifications fail, an exception will be thrown and
826 * the database will be left in a state in which each separate
827 * addition, replacement or deletion operation has either been fully
828 * performed or not performed at all: it is then up to the
829 * application to work out which operations need to be repeated.
831 * It's not valid to call commit() within a transaction.
833 * Beware of calling commit() too frequently: this will make indexing
834 * take much longer.
836 * Note that commit() need not be called explicitly: it will be called
837 * automatically when the database is closed, or when a sufficient
838 * number of modifications have been made. By default, this is every
839 * 10000 documents added, deleted, or modified. This value is rather
840 * conservative, and if you have a machine with plenty of memory,
841 * you can improve indexing throughput dramatically by setting
842 * XAPIAN_FLUSH_THRESHOLD in the environment to a larger value.
844 * This method was new in Xapian 1.1.0 - in earlier versions it was
845 * called flush().
847 * @exception Xapian::DatabaseError will be thrown if a problem occurs
848 * while modifying the database.
850 * @exception Xapian::DatabaseCorruptError will be thrown if the
851 * database is in a corrupt state.
853 void commit();
855 /** Pre-1.1.0 name for commit().
857 * Use commit() instead.
859 XAPIAN_DEPRECATED(void flush()) { commit(); }
861 /** Begin a transaction.
863 * In Xapian a transaction is a group of modifications to the database
864 * which are linked such that either all will be applied
865 * simultaneously or none will be applied at all. Even in the case of
866 * a power failure, this characteristic should be preserved (as long
867 * as the filesystem isn't corrupted, etc).
869 * A transaction is started with begin_transaction() and can
870 * either be committed by calling commit_transaction() or aborted
871 * by calling cancel_transaction().
873 * By default, a transaction implicitly calls commit() before and
874 * after so that the modifications stand and fall without affecting
875 * modifications before or after.
877 * The downside of these implicit calls to commit() is that small
878 * transactions can harm indexing performance in the same way that
879 * explicitly calling commit() frequently can.
881 * If you're applying atomic groups of changes and only wish to
882 * ensure that each group is either applied or not applied, then
883 * you can prevent the automatic commit() before and after the
884 * transaction by starting the transaction with
885 * begin_transaction(false). However, if cancel_transaction is
886 * called (or if commit_transaction isn't called before the
887 * WritableDatabase object is destroyed) then any changes which
888 * were pending before the transaction began will also be discarded.
890 * Transactions aren't currently supported by the InMemory backend.
892 * @param flushed Is this a flushed transaction? By default
893 * transactions are "flushed", which means that
894 * committing a transaction will ensure those
895 * changes are permanently written to the
896 * database. By contrast, unflushed transactions
897 * only ensure that changes within the transaction
898 * are either all applied or all aren't.
900 * @exception Xapian::UnimplementedError will be thrown if transactions
901 * are not available for this database type.
903 * @exception Xapian::InvalidOperationError will be thrown if this is
904 * called at an invalid time, such as when a transaction
905 * is already in progress.
907 void begin_transaction(bool flushed = true);
909 /** Complete the transaction currently in progress.
911 * If this method completes successfully and this is a flushed
912 * transaction, all the database modifications
913 * made during the transaction will have been committed to the
914 * database.
916 * If an error occurs, an exception will be thrown, and none of
917 * the modifications made to the database during the transaction
918 * will have been applied to the database.
920 * In all cases the transaction will no longer be in progress.
922 * @exception Xapian::DatabaseError will be thrown if a problem occurs
923 * while modifying the database.
925 * @exception Xapian::DatabaseCorruptError will be thrown if the
926 * database is in a corrupt state.
928 * @exception Xapian::InvalidOperationError will be thrown if a
929 * transaction is not currently in progress.
931 * @exception Xapian::UnimplementedError will be thrown if transactions
932 * are not available for this database type.
934 void commit_transaction();
936 /** Abort the transaction currently in progress, discarding the
937 * pending modifications made to the database.
939 * If an error occurs in this method, an exception will be thrown,
940 * but the transaction will be cancelled anyway.
942 * @exception Xapian::DatabaseError will be thrown if a problem occurs
943 * while modifying the database.
945 * @exception Xapian::DatabaseCorruptError will be thrown if the
946 * database is in a corrupt state.
948 * @exception Xapian::InvalidOperationError will be thrown if a
949 * transaction is not currently in progress.
951 * @exception Xapian::UnimplementedError will be thrown if transactions
952 * are not available for this database type.
954 void cancel_transaction();
956 /** Add a new document to the database.
958 * This method adds the specified document to the database,
959 * returning a newly allocated document ID. Automatically allocated
960 * document IDs come from a per-database monotonically increasing
961 * counter, so IDs from deleted documents won't be reused.
963 * If you want to specify the document ID to be used, you should
964 * call replace_document() instead.
966 * Note that changes to the database won't be immediately committed to
967 * disk; see commit() for more details.
969 * As with all database modification operations, the effect is
970 * atomic: the document will either be fully added, or the document
971 * fails to be added and an exception is thrown (possibly at a
972 * later time when commit() is called or the database is closed).
974 * @param document The new document to be added.
976 * @return The document ID of the newly added document.
978 * @exception Xapian::DatabaseError will be thrown if a problem occurs
979 * while writing to the database.
981 * @exception Xapian::DatabaseCorruptError will be thrown if the
982 * database is in a corrupt state.
984 Xapian::docid add_document(const Xapian::Document & document);
986 /** Delete a document from the database.
988 * This method removes the document with the specified document ID
989 * from the database.
991 * Note that changes to the database won't be immediately committed to
992 * disk; see commit() for more details.
994 * As with all database modification operations, the effect is
995 * atomic: the document will either be fully removed, or the document
996 * fails to be removed and an exception is thrown (possibly at a
997 * later time when commit() is called or the database is closed).
999 * @param did The document ID of the document to be removed.
1001 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1002 * while writing to the database.
1004 * @exception Xapian::DatabaseCorruptError will be thrown if the
1005 * database is in a corrupt state.
1007 void delete_document(Xapian::docid did);
1009 /** Delete any documents indexed by a term from the database.
1011 * This method removes any documents indexed by the specified term
1012 * from the database.
1014 * A major use is for convenience when UIDs from another system are
1015 * mapped to terms in Xapian, although this method has other uses
1016 * (for example, you could add a "deletion date" term to documents at
1017 * index time and use this method to delete all documents due for
1018 * deletion on a particular date).
1020 * @param unique_term The term to remove references to.
1022 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1023 * while writing to the database.
1025 * @exception Xapian::DatabaseCorruptError will be thrown if the
1026 * database is in a corrupt state.
1028 void delete_document(const std::string & unique_term);
1030 /** Replace a given document in the database.
1032 * This method replaces the document with the specified document ID.
1033 * If document ID @a did isn't currently used, the document will be
1034 * added with document ID @a did.
1036 * The monotonic counter used for automatically allocating document
1037 * IDs is increased so that the next automatically allocated document
1038 * ID will be did + 1. Be aware that if you use this method to
1039 * specify a high document ID for a new document, and also use
1040 * WritableDatabase::add_document(), Xapian may get to a state where
1041 * this counter wraps around and will be unable to automatically
1042 * allocate document IDs!
1044 * Note that changes to the database won't be immediately committed to
1045 * disk; see commit() for more details.
1047 * As with all database modification operations, the effect is
1048 * atomic: the document will either be fully replaced, or the document
1049 * fails to be replaced and an exception is thrown (possibly at a
1050 * later time when commit() is called or the database is closed).
1052 * @param did The document ID of the document to be replaced.
1053 * @param document The new document.
1055 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1056 * while writing to the database.
1058 * @exception Xapian::DatabaseCorruptError will be thrown if the
1059 * database is in a corrupt state.
1061 void replace_document(Xapian::docid did,
1062 const Xapian::Document & document);
1064 /** Replace any documents matching a term.
1066 * This method replaces any documents indexed by the specified term
1067 * with the specified document. If any documents are indexed by the
1068 * term, the lowest document ID will be used for the document,
1069 * otherwise a new document ID will be generated as for add_document.
1071 * One common use is to allow UIDs from another system to easily be
1072 * mapped to terms in Xapian. Note that this method doesn't
1073 * automatically add unique_term as a term, so you'll need to call
1074 * document.add_term(unique_term) first when using replace_document()
1075 * in this way.
1077 * Note that changes to the database won't be immediately committed to
1078 * disk; see commit() for more details.
1080 * As with all database modification operations, the effect is
1081 * atomic: the document(s) will either be fully replaced, or the
1082 * document(s) fail to be replaced and an exception is thrown
1083 * (possibly at a
1084 * later time when commit() is called or the database is closed).
1086 * @param unique_term The "unique" term.
1087 * @param document The new document.
1089 * @return The document ID that document was given.
1091 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1092 * while writing to the database.
1094 * @exception Xapian::DatabaseCorruptError will be thrown if the
1095 * database is in a corrupt state.
1097 Xapian::docid replace_document(const std::string & unique_term,
1098 const Xapian::Document & document);
1100 /** Add a word to the spelling dictionary.
1102 * If the word is already present, its frequency is increased.
1104 * @param word The word to add.
1105 * @param freqinc How much to increase its frequency by (default 1).
1107 void add_spelling(const std::string & word,
1108 Xapian::termcount freqinc = 1) const;
1110 /** Remove a word from the spelling dictionary.
1112 * The word's frequency is decreased, and if would become zero or less
1113 * then the word is removed completely.
1115 * @param word The word to remove.
1116 * @param freqdec How much to decrease its frequency by (default 1).
1118 void remove_spelling(const std::string & word,
1119 Xapian::termcount freqdec = 1) const;
1121 /** Add a synonym for a term.
1123 * @param term The term to add a synonym for.
1124 * @param synonym The synonym to add. If this is already a
1125 * synonym for @a term, then no action is taken.
1127 void add_synonym(const std::string & term,
1128 const std::string & synonym) const;
1130 /** Remove a synonym for a term.
1132 * @param term The term to remove a synonym for.
1133 * @param synonym The synonym to remove. If this isn't currently
1134 * a synonym for @a term, then no action is taken.
1136 void remove_synonym(const std::string & term,
1137 const std::string & synonym) const;
1139 /** Remove all synonyms for a term.
1141 * @param term The term to remove all synonyms for. If the
1142 * term has no synonyms, no action is taken.
1144 void clear_synonyms(const std::string & term) const;
1146 /** Set the user-specified metadata associated with a given key.
1148 * This method sets the metadata value associated with a given key.
1149 * If there is already a metadata value stored in the database with
1150 * the same key, the old value is replaced. If you want to delete an
1151 * existing item of metadata, just set its value to the empty string.
1153 * User-specified metadata allows you to store arbitrary information
1154 * in the form of (key,tag) pairs.
1156 * There's no hard limit on the number of metadata items, or the size
1157 * of the metadata values. Metadata keys have a limited length, which
1158 * depends on the backend. We recommend limiting them to 200 bytes.
1159 * Empty keys are not valid, and specifying one will cause an
1160 * exception.
1162 * Metadata modifications are committed to disk in the same way as
1163 * modifications to the documents in the database are: i.e.,
1164 * modifications are atomic, and won't be committed to disk
1165 * immediately (see commit() for more details). This allows metadata
1166 * to be used to link databases with versioned external resources
1167 * by storing the appropriate version number in a metadata item.
1169 * You can also use the metadata to store arbitrary extra information
1170 * associated with terms, documents, or postings by encoding the
1171 * termname and/or document id into the metadata key.
1173 * @param key The key of the metadata item to set.
1175 * @param value The value of the metadata item to set.
1177 * @exception Xapian::DatabaseError will be thrown if a problem occurs
1178 * while writing to the database.
1180 * @exception Xapian::DatabaseCorruptError will be thrown if the
1181 * database is in a corrupt state.
1183 * @exception Xapian::InvalidArgumentError will be thrown if the
1184 * key supplied is empty.
1186 * @exception Xapian::UnimplementedError will be thrown if the
1187 * database backend in use doesn't support user-specified
1188 * metadata.
1190 void set_metadata(const std::string & key, const std::string & value);
1192 /// Return a string describing this object.
1193 std::string get_description() const;
1198 #endif /* XAPIAN_INCLUDED_DATABASE_H */