Hook up honey single file databases
[xapian.git] / xapian-core / backends / honey / honey_version.h
blob86aaead9b5f29ba7c35f14ef044441718d4f7cd7
1 /** @file honey_version.h
2 * @brief HoneyVersion class
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2013,2014,2015,2016,2018 Olly Betts
5 * Copyright (C) 2011 Dan Colish
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_HONEY_VERSION_H
23 #define XAPIAN_INCLUDED_HONEY_VERSION_H
25 #include "honey_changes.h"
26 #include "honey_defs.h"
28 #include "omassert.h"
30 #include <cstring>
31 #include <string>
33 #include "common/safeuuid.h"
34 #include "internaltypes.h"
35 #include "xapian/types.h"
37 namespace Honey {
39 class RootInfo {
40 off_t offset;
41 honey_block_t root;
42 unsigned level;
43 honey_tablesize_t num_entries;
44 bool root_is_fake;
45 bool sequential;
46 unsigned blocksize;
47 /// Should be >= 4 or 0 for no compression.
48 uint4 compress_min;
49 std::string fl_serialised;
51 public:
52 void init(unsigned blocksize_, uint4 compress_min_);
54 void serialise(std::string &s) const;
56 bool unserialise(const char ** p, const char * end);
58 off_t get_offset() const { return offset; }
59 honey_block_t get_root() const { return root; }
60 int get_level() const { return int(level); }
61 honey_tablesize_t get_num_entries() const { return num_entries; }
62 bool get_root_is_fake() const { return root_is_fake; }
63 bool get_sequential() const { return sequential; }
64 unsigned get_blocksize() const {
65 AssertRel(blocksize,>=,HONEY_MIN_BLOCKSIZE);
66 AssertRel(blocksize,<=,HONEY_MAX_BLOCKSIZE);
67 return blocksize;
69 uint4 get_compress_min() const { return compress_min; }
70 const std::string & get_free_list() const { return fl_serialised; }
72 void set_level(int level_) { level = unsigned(level_); }
73 void set_num_entries(honey_tablesize_t n) { num_entries = n; }
74 void set_root_is_fake(bool f) { root_is_fake = f; }
75 void set_sequential(bool f) { sequential = f; }
76 void set_offset(off_t offset_) { offset = offset_; }
77 void set_root(honey_block_t root_) { root = root_; }
78 void set_blocksize(unsigned b) {
79 AssertRel(b,>=,HONEY_MIN_BLOCKSIZE);
80 AssertRel(b,<=,HONEY_MAX_BLOCKSIZE);
81 blocksize = b;
83 void set_free_list(const std::string & s) { fl_serialised = s; }
88 /** Maximum size to allow for honey version file data in single file DB. */
89 #define HONEY_VERSION_MAX_SIZE 1024
91 /** The HoneyVersion class manages the revision files.
93 * The "iamhoney" file (currently) contains a "magic" string identifying
94 * that this is a honey database, a database format version number, the UUID
95 * of the database, the revision of the database, and the root block info for
96 * each table.
98 class HoneyVersion {
99 honey_revision_number_t rev;
101 Honey::RootInfo root[Honey::MAX_];
102 Honey::RootInfo old_root[Honey::MAX_];
104 /** The UUID of this database.
106 * This is mutable for older uuid libraries which take non-const uuid_t.
108 mutable uuid_t uuid;
110 /** File descriptor.
112 * When committing, this hold the file descriptor of the new changes file
113 * between the call to the write() and sync() methods.
115 * For a single-file database (when db_dir.empty()), this holds the fd of
116 * that file for use in read().
118 int fd;
120 /** Offset into the file at which the version data starts.
122 * Will be 0, except for an embedded multi-file database.
124 off_t offset;
126 /// The database directory.
127 std::string db_dir;
129 HoneyChanges * changes;
131 /// The number of documents in the database.
132 Xapian::doccount doccount;
134 /// The total of the lengths of all documents in the database.
135 Xapian::totallength total_doclen;
137 /// Greatest document id ever used in this database.
138 Xapian::docid last_docid;
140 /// A lower bound on the smallest document length in this database.
141 Xapian::termcount doclen_lbound;
143 /// An upper bound on the greatest document length in this database.
144 Xapian::termcount doclen_ubound;
146 /// An upper bound on the greatest wdf in this database.
147 Xapian::termcount wdf_ubound;
149 /// An upper bound on the spelling wordfreq in this database.
150 Xapian::termcount spelling_wordfreq_ubound;
152 /// Oldest changeset removed when max_changesets is set
153 mutable honey_revision_number_t oldest_changeset;
155 /// The serialised database stats.
156 std::string serialised_stats;
158 // Serialise the database stats.
159 void serialise_stats();
161 // Unserialise the database stats.
162 void unserialise_stats();
164 public:
165 explicit HoneyVersion(const std::string & db_dir_ = std::string())
166 : rev(0), fd(-1), offset(0), db_dir(db_dir_), changes(NULL),
167 doccount(0), total_doclen(0), last_docid(0),
168 doclen_lbound(0), doclen_ubound(0),
169 wdf_ubound(0), spelling_wordfreq_ubound(0),
170 oldest_changeset(0) { }
172 explicit HoneyVersion(int fd_);
174 ~HoneyVersion();
176 /** Create the version file. */
177 void create(unsigned blocksize);
179 void set_changes(HoneyChanges * changes_) { changes = changes_; }
181 /** Read the version file and check it's a version we understand.
183 * On failure, an exception is thrown.
185 void read();
187 void cancel();
189 const std::string write(honey_revision_number_t new_rev, int flags);
191 bool sync(const std::string & tmpfile,
192 honey_revision_number_t new_rev, int flags);
194 honey_revision_number_t get_revision() const { return rev; }
196 const Honey::RootInfo& get_root(Honey::table_type tbl) const {
197 return root[tbl];
200 Honey::RootInfo* root_to_set(Honey::table_type tbl) {
201 return &root[tbl];
204 /// Return pointer to 16 byte UUID.
205 const char * get_uuid() const {
206 // uuid is unsigned char[].
207 return reinterpret_cast<const char *>(uuid);
210 /// Return UUID in the standard 36 character string format.
211 std::string get_uuid_string() const {
212 char buf[37];
213 uuid_unparse_lower(uuid, buf);
214 return std::string(buf, 36);
217 #if 0 // Unused currently.
218 /// Set the UUID from 16 byte binary value @a data.
219 void set_uuid(const void * data) {
220 std::memcpy(uuid, data, 16);
223 /** Set the UUID from the standard 36 character string format.
225 * @return true if @a s was successfully parsed; false otherwise.
227 bool set_uuid_string(const std::string & s) {
228 return uuid_parse(s.c_str(), uuid);
230 #endif
232 Xapian::doccount get_doccount() const { return doccount; }
234 Xapian::totallength get_total_doclen() const { return total_doclen; }
236 Xapian::docid get_last_docid() const { return last_docid; }
238 Xapian::termcount get_doclength_lower_bound() const {
239 return doclen_lbound;
242 Xapian::termcount get_doclength_upper_bound() const {
243 return doclen_ubound;
246 Xapian::termcount get_wdf_upper_bound() const { return wdf_ubound; }
248 Xapian::termcount get_spelling_wordfreq_upper_bound() const {
249 return spelling_wordfreq_ubound;
252 honey_revision_number_t get_oldest_changeset() const {
253 return oldest_changeset;
256 void set_last_docid(Xapian::docid did) { last_docid = did; }
258 void set_oldest_changeset(honey_revision_number_t changeset) const {
259 oldest_changeset = changeset;
262 void set_spelling_wordfreq_upper_bound(Xapian::termcount ub) {
263 spelling_wordfreq_ubound = ub;
266 void add_document(Xapian::termcount doclen) {
267 ++doccount;
268 if (total_doclen == 0 || (doclen && doclen < doclen_lbound))
269 doclen_lbound = doclen;
270 if (doclen > doclen_ubound)
271 doclen_ubound = doclen;
272 total_doclen += doclen;
275 void delete_document(Xapian::termcount doclen) {
276 --doccount;
277 total_doclen -= doclen;
278 // If the database no longer contains any postings, we can reset
279 // doclen_lbound, doclen_ubound and wdf_ubound.
280 if (total_doclen == 0) {
281 doclen_lbound = 0;
282 doclen_ubound = 0;
283 wdf_ubound = 0;
287 void check_wdf(Xapian::termcount wdf) {
288 if (wdf > wdf_ubound) wdf_ubound = wdf;
291 Xapian::docid get_next_docid() { return ++last_docid; }
293 /** Merge the database stats.
295 * Used by compaction.
297 void merge_stats(const HoneyVersion & o);
299 void merge_stats(Xapian::doccount o_doccount,
300 Xapian::termcount o_doclen_lbound,
301 Xapian::termcount o_doclen_ubound,
302 Xapian::termcount o_wdf_ubound,
303 Xapian::totallength o_total_doclen,
304 Xapian::termcount o_spelling_wordfreq_ubound);
306 bool single_file() const { return db_dir.empty(); }
308 off_t get_offset() const { return offset; }
311 #endif // XAPIAN_INCLUDED_HONEY_VERSION_H