[honey] Improve spelling table key encoding
[xapian.git] / xapian-core / backends / glass / glass_version.cc
blobd7014cf0d8b76c19c7a5db22bd122ce71a66b997
1 /** @file glass_version.cc
2 * @brief GlassVersion class
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2011 Dan Colish
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "glass_version.h"
26 #include "debuglog.h"
27 #include "fd.h"
28 #include "glass_defs.h"
29 #include "io_utils.h"
30 #include "omassert.h"
31 #include "pack.h"
32 #include "posixy_wrapper.h"
33 #include "stringutils.h" // For STRINGIZE() and CONST_STRLEN().
35 #include <cstring> // For memcmp().
36 #include <string>
37 #include "safeerrno.h"
38 #include <sys/types.h>
39 #include "safesysstat.h"
40 #include "safefcntl.h"
41 #include "safeunistd.h"
42 #include "str.h"
43 #include "stringutils.h"
45 #include "common/safeuuid.h"
47 #include "xapian/constants.h"
48 #include "xapian/error.h"
50 using namespace std;
52 /// Glass format version (date of change):
53 #define GLASS_FORMAT_VERSION DATE_TO_VERSION(2016,03,14)
54 // 2016,03,14 1.3.5 compress_min in version file; partly eliminate component_of
55 // 2015,12,24 1.3.4 2 bytes "components_of" per item eliminated, and much more
56 // 2014,11,21 1.3.2 Brass renamed to Glass
58 /// Convert date <-> version number. Dates up to 2141-12-31 fit in 2 bytes.
59 #define DATE_TO_VERSION(Y,M,D) \
60 ((unsigned(Y) - 2014) << 9 | unsigned(M) << 5 | unsigned(D))
61 #define VERSION_TO_YEAR(V) ((unsigned(V) >> 9) + 2014)
62 #define VERSION_TO_MONTH(V) ((unsigned(V) >> 5) & 0x0f)
63 #define VERSION_TO_DAY(V) (unsigned(V) & 0x1f)
65 #define GLASS_VERSION_MAGIC_LEN 14
66 #define GLASS_VERSION_MAGIC_AND_VERSION_LEN 16
68 static const char GLASS_VERSION_MAGIC[GLASS_VERSION_MAGIC_AND_VERSION_LEN] = {
69 '\x0f', '\x0d', 'X', 'a', 'p', 'i', 'a', 'n', ' ', 'G', 'l', 'a', 's', 's',
70 char((GLASS_FORMAT_VERSION >> 8) & 0xff), char(GLASS_FORMAT_VERSION & 0xff)
73 GlassVersion::GlassVersion(int fd_)
74 : rev(0), fd(fd_), offset(0), db_dir(), changes(NULL),
75 doccount(0), total_doclen(0), last_docid(0),
76 doclen_lbound(0), doclen_ubound(0),
77 wdf_ubound(0), spelling_wordfreq_ubound(0),
78 oldest_changeset(0)
80 offset = lseek(fd, 0, SEEK_CUR);
81 if (rare(offset < 0)) {
82 string msg = "lseek failed on file descriptor ";
83 msg += str(fd);
84 throw Xapian::DatabaseOpeningError(msg, errno);
88 GlassVersion::~GlassVersion()
90 // Either this is a single-file database, or this fd is from opening a new
91 // version file in write(), but sync() was never called.
92 if (fd != -1)
93 (void)::close(fd);
96 void
97 GlassVersion::read()
99 LOGCALL_VOID(DB, "GlassVersion::read", NO_ARGS);
100 FD close_fd(-1);
101 int fd_in;
102 if (single_file()) {
103 if (rare(lseek(fd, offset, SEEK_SET) < 0)) {
104 string msg = "Failed to rewind file descriptor ";
105 msg += str(fd);
106 throw Xapian::DatabaseOpeningError(msg, errno);
108 fd_in = fd;
109 } else {
110 string filename = db_dir;
111 filename += "/iamglass";
112 fd_in = posixy_open(filename.c_str(), O_RDONLY|O_BINARY);
113 if (rare(fd_in < 0)) {
114 string msg = filename;
115 msg += ": Failed to open glass revision file for reading";
116 throw Xapian::DatabaseOpeningError(msg, errno);
118 close_fd = fd_in;
121 char buf[256];
123 const char * p = buf;
124 const char * end = p + io_read(fd_in, buf, sizeof(buf), 33);
126 if (memcmp(buf, GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_LEN) != 0)
127 throw Xapian::DatabaseCorruptError("Rev file magic incorrect");
129 unsigned version;
130 version = static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN]);
131 version <<= 8;
132 version |= static_cast<unsigned char>(buf[GLASS_VERSION_MAGIC_LEN + 1]);
133 if (version != GLASS_FORMAT_VERSION) {
134 string msg;
135 if (!single_file()) {
136 msg = db_dir;
137 msg += ": ";
139 msg += "Database is format version ";
140 msg += str(VERSION_TO_YEAR(version) * 10000 +
141 VERSION_TO_MONTH(version) * 100 +
142 VERSION_TO_DAY(version));
143 msg += " but I only understand ";
144 msg += str(VERSION_TO_YEAR(GLASS_FORMAT_VERSION) * 10000 +
145 VERSION_TO_MONTH(GLASS_FORMAT_VERSION) * 100 +
146 VERSION_TO_DAY(GLASS_FORMAT_VERSION));
147 throw Xapian::DatabaseVersionError(msg);
150 p += GLASS_VERSION_MAGIC_AND_VERSION_LEN;
151 memcpy(uuid, p, 16);
152 p += 16;
154 if (!unpack_uint(&p, end, &rev))
155 throw Xapian::DatabaseCorruptError("Rev file failed to decode revision");
157 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
158 if (!root[table_no].unserialise(&p, end)) {
159 throw Xapian::DatabaseCorruptError("Rev file root_info missing");
161 old_root[table_no] = root[table_no];
164 // For a single-file database, this will assign extra data. We read
165 // sizeof(buf) above, then skip GLASS_VERSION_MAGIC_AND_VERSION_LEN,
166 // then 16, then the size of the serialised root info.
167 serialised_stats.assign(p, end);
168 unserialise_stats();
171 void
172 GlassVersion::serialise_stats()
174 serialised_stats.resize(0);
175 pack_uint(serialised_stats, doccount);
176 // last_docid must always be >= doccount.
177 pack_uint(serialised_stats, last_docid - doccount);
178 pack_uint(serialised_stats, doclen_lbound);
179 pack_uint(serialised_stats, wdf_ubound);
180 // doclen_ubound should always be >= wdf_ubound, so we store the
181 // difference as it may encode smaller. wdf_ubound is likely to
182 // be larger than doclen_lbound.
183 pack_uint(serialised_stats, doclen_ubound - wdf_ubound);
184 pack_uint(serialised_stats, oldest_changeset);
185 pack_uint(serialised_stats, total_doclen);
186 pack_uint(serialised_stats, spelling_wordfreq_ubound);
189 void
190 GlassVersion::unserialise_stats()
192 const char * p = serialised_stats.data();
193 const char * end = p + serialised_stats.size();
194 if (p == end) {
195 doccount = 0;
196 total_doclen = 0;
197 last_docid = 0;
198 doclen_lbound = 0;
199 doclen_ubound = 0;
200 wdf_ubound = 0;
201 oldest_changeset = 0;
202 spelling_wordfreq_ubound = 0;
203 return;
206 if (!unpack_uint(&p, end, &doccount) ||
207 !unpack_uint(&p, end, &last_docid) ||
208 !unpack_uint(&p, end, &doclen_lbound) ||
209 !unpack_uint(&p, end, &wdf_ubound) ||
210 !unpack_uint(&p, end, &doclen_ubound) ||
211 !unpack_uint(&p, end, &oldest_changeset) ||
212 !unpack_uint(&p, end, &total_doclen) ||
213 !unpack_uint(&p, end, &spelling_wordfreq_ubound)) {
214 const char * m = p ?
215 "Bad serialised DB stats (overflowed)" :
216 "Bad serialised DB stats (out of data)";
217 throw Xapian::DatabaseCorruptError(m);
220 // In the single-file DB case, there will be extra data in
221 // serialised_stats, so suppress this check.
222 if (p != end && !single_file())
223 throw Xapian::DatabaseCorruptError("Rev file has junk at end");
225 // last_docid must always be >= doccount.
226 last_docid += doccount;
227 // doclen_ubound should always be >= wdf_ubound, so we store the
228 // difference as it may encode smaller. wdf_ubound is likely to
229 // be larger than doclen_lbound.
230 doclen_ubound += wdf_ubound;
233 void
234 GlassVersion::merge_stats(const GlassVersion & o)
236 doccount += o.get_doccount();
237 if (doccount < o.get_doccount()) {
238 throw Xapian::DatabaseError("doccount overflowed!");
241 Xapian::termcount o_doclen_lbound = o.get_doclength_lower_bound();
242 if (o_doclen_lbound > 0) {
243 if (doclen_lbound == 0 || o_doclen_lbound < doclen_lbound)
244 doclen_lbound = o_doclen_lbound;
247 doclen_ubound = max(doclen_ubound, o.get_doclength_upper_bound());
248 wdf_ubound = max(wdf_ubound, o.get_wdf_upper_bound());
249 total_doclen += o.get_total_doclen();
250 if (total_doclen < o.get_total_doclen()) {
251 throw Xapian::DatabaseError("Total document length overflowed!");
254 // The upper bounds might be on the same word, so we must sum them.
255 spelling_wordfreq_ubound += o.get_spelling_wordfreq_upper_bound();
258 void
259 GlassVersion::cancel()
261 LOGCALL_VOID(DB, "GlassVersion::cancel", NO_ARGS);
262 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
263 root[table_no] = old_root[table_no];
265 unserialise_stats();
268 const string
269 GlassVersion::write(glass_revision_number_t new_rev, int flags)
271 LOGCALL(DB, const string, "GlassVersion::write", new_rev|flags);
273 string s(GLASS_VERSION_MAGIC, GLASS_VERSION_MAGIC_AND_VERSION_LEN);
274 s.append(reinterpret_cast<const char *>(uuid), 16);
276 pack_uint(s, new_rev);
278 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
279 root[table_no].serialise(s);
282 // Serialise database statistics.
283 serialise_stats();
284 s += serialised_stats;
286 string tmpfile;
287 if (!single_file()) {
288 tmpfile = db_dir;
289 // In dangerous mode, just write the new version file in place.
290 if (flags & Xapian::DB_DANGEROUS)
291 tmpfile += "/iamglass";
292 else
293 tmpfile += "/v.tmp";
295 fd = posixy_open(tmpfile.c_str(), O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666);
296 if (rare(fd < 0))
297 throw Xapian::DatabaseOpeningError("Couldn't write new rev file: " + tmpfile,
298 errno);
300 if (flags & Xapian::DB_DANGEROUS)
301 tmpfile = string();
304 try {
305 io_write(fd, s.data(), s.size());
306 } catch (...) {
307 if (!single_file())
308 (void)close(fd);
309 throw;
312 if (changes) {
313 string changes_buf;
314 changes_buf += '\xfe';
315 pack_uint(changes_buf, new_rev);
316 pack_uint(changes_buf, s.size());
317 changes->write_block(changes_buf);
318 changes->write_block(s);
321 RETURN(tmpfile);
324 bool
325 GlassVersion::sync(const string & tmpfile,
326 glass_revision_number_t new_rev, int flags)
328 Assert(new_rev > rev || rev == 0);
330 if (single_file()) {
331 if ((flags & Xapian::DB_NO_SYNC) == 0 &&
332 ((flags & Xapian::DB_FULL_SYNC) ?
333 !io_full_sync(fd) :
334 !io_sync(fd))) {
335 // FIXME what to do?
337 } else {
338 int fd_to_close = fd;
339 fd = -1;
340 if ((flags & Xapian::DB_NO_SYNC) == 0 &&
341 ((flags & Xapian::DB_FULL_SYNC) ?
342 !io_full_sync(fd_to_close) :
343 !io_sync(fd_to_close))) {
344 int save_errno = errno;
345 (void)close(fd_to_close);
346 if (!tmpfile.empty())
347 (void)unlink(tmpfile.c_str());
348 errno = save_errno;
349 return false;
352 if (close(fd_to_close) != 0) {
353 if (!tmpfile.empty()) {
354 int save_errno = errno;
355 (void)unlink(tmpfile.c_str());
356 errno = save_errno;
358 return false;
361 if (!tmpfile.empty()) {
362 if (!io_tmp_rename(tmpfile, db_dir + "/iamglass")) {
363 return false;
368 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
369 old_root[table_no] = root[table_no];
372 rev = new_rev;
373 return true;
376 // Only try to compress tags longer than this many bytes.
377 const size_t COMPRESS_MIN = 4;
379 static const uint4 compress_min_tab[] = {
380 0, // POSTLIST
381 COMPRESS_MIN, // DOCDATA
382 COMPRESS_MIN, // TERMLIST
383 0, // POSITION
384 COMPRESS_MIN, // SPELLING
385 COMPRESS_MIN // SYNONYM
388 void
389 GlassVersion::create(unsigned blocksize)
391 AssertRel(blocksize,>=,GLASS_MIN_BLOCKSIZE);
392 uuid_generate(uuid);
393 for (unsigned table_no = 0; table_no < Glass::MAX_; ++table_no) {
394 root[table_no].init(blocksize, compress_min_tab[table_no]);
398 namespace Glass {
400 void
401 RootInfo::init(unsigned blocksize_, uint4 compress_min_)
403 AssertRel(blocksize_,>=,GLASS_MIN_BLOCKSIZE);
404 root = 0;
405 level = 0;
406 num_entries = 0;
407 root_is_fake = true;
408 sequential = true;
409 blocksize = blocksize_;
410 compress_min = compress_min_;
411 fl_serialised.resize(0);
414 void
415 RootInfo::serialise(string &s) const
417 pack_uint(s, root);
418 unsigned val = level << 2;
419 if (sequential) val |= 0x02;
420 if (root_is_fake) val |= 0x01;
421 pack_uint(s, val);
422 pack_uint(s, num_entries);
423 pack_uint(s, blocksize >> 11);
424 pack_uint(s, compress_min);
425 pack_string(s, fl_serialised);
428 bool
429 RootInfo::unserialise(const char ** p, const char * end)
431 unsigned val;
432 if (!unpack_uint(p, end, &root) ||
433 !unpack_uint(p, end, &val) ||
434 !unpack_uint(p, end, &num_entries) ||
435 !unpack_uint(p, end, &blocksize) ||
436 !unpack_uint(p, end, &compress_min) ||
437 !unpack_string(p, end, fl_serialised)) return false;
438 level = val >> 2;
439 sequential = val & 0x02;
440 root_is_fake = val & 0x01;
441 blocksize <<= 11;
442 AssertRel(blocksize,>=,GLASS_MIN_BLOCKSIZE);
443 return true;