1 /** @file glass_version.cc
2 * @brief GlassVersion class
4 /* Copyright (C) 2006,2007,2008,2009,2010,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2011 Dan Colish
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "glass_version.h"
28 #include "glass_defs.h"
32 #include "posixy_wrapper.h"
33 #include "stringutils.h" // For STRINGIZE() and CONST_STRLEN().
35 #include <cstring> // For memcmp().
37 #include "safeerrno.h"
38 #include <sys/types.h>
39 #include "safesysstat.h"
40 #include "safefcntl.h"
41 #include "safeunistd.h"
43 #include "stringutils.h"
45 #include "common/safeuuid.h"
47 #include "xapian/constants.h"
48 #include "xapian/error.h"
52 /// Glass format version (date of change):
53 #define GLASS_FORMAT_VERSION DATE_TO_VERSION(2016,03,14)
54 // 2016,03,14 1.3.5 compress_min in version file; partly eliminate component_of
55 // 2015,12,24 1.3.4 2 bytes "components_of" per item eliminated, and much more
56 // 2014,11,21 1.3.2 Brass renamed to Glass
58 /// Convert date <-> version number. Dates up to 2141-12-31 fit in 2 bytes.
59 #define DATE_TO_VERSION(Y,M,D) \
60 ((unsigned(Y) - 2014) << 9 | unsigned(M) << 5 | unsigned(D))
61 #define VERSION_TO_YEAR(V) ((unsigned(V) >> 9) + 2014)
62 #define VERSION_TO_MONTH(V) ((unsigned(V) >> 5) & 0x0f)
63 #define VERSION_TO_DAY(V) (unsigned(V) & 0x1f)
65 #define GLASS_VERSION_MAGIC_LEN 14
66 #define GLASS_VERSION_MAGIC_AND_VERSION_LEN 16
68 static const char GLASS_VERSION_MAGIC
[GLASS_VERSION_MAGIC_AND_VERSION_LEN
] = {
69 '\x0f', '\x0d', 'X', 'a', 'p', 'i', 'a', 'n', ' ', 'G', 'l', 'a', 's', 's',
70 char((GLASS_FORMAT_VERSION
>> 8) & 0xff), char(GLASS_FORMAT_VERSION
& 0xff)
73 GlassVersion::GlassVersion(int fd_
)
74 : rev(0), fd(fd_
), offset(0), db_dir(), changes(NULL
),
75 doccount(0), total_doclen(0), last_docid(0),
76 doclen_lbound(0), doclen_ubound(0),
77 wdf_ubound(0), spelling_wordfreq_ubound(0),
80 offset
= lseek(fd
, 0, SEEK_CUR
);
81 if (rare(offset
< 0)) {
82 string msg
= "lseek failed on file descriptor ";
84 throw Xapian::DatabaseOpeningError(msg
, errno
);
88 GlassVersion::~GlassVersion()
90 // Either this is a single-file database, or this fd is from opening a new
91 // version file in write(), but sync() was never called.
99 LOGCALL_VOID(DB
, "GlassVersion::read", NO_ARGS
);
103 if (rare(lseek(fd
, offset
, SEEK_SET
) < 0)) {
104 string msg
= "Failed to rewind file descriptor ";
106 throw Xapian::DatabaseOpeningError(msg
, errno
);
110 string filename
= db_dir
;
111 filename
+= "/iamglass";
112 fd_in
= posixy_open(filename
.c_str(), O_RDONLY
|O_BINARY
);
113 if (rare(fd_in
< 0)) {
114 string msg
= filename
;
115 msg
+= ": Failed to open glass revision file for reading";
116 throw Xapian::DatabaseOpeningError(msg
, errno
);
123 const char * p
= buf
;
124 const char * end
= p
+ io_read(fd_in
, buf
, sizeof(buf
), 33);
126 if (memcmp(buf
, GLASS_VERSION_MAGIC
, GLASS_VERSION_MAGIC_LEN
) != 0)
127 throw Xapian::DatabaseCorruptError("Rev file magic incorrect");
130 version
= static_cast<unsigned char>(buf
[GLASS_VERSION_MAGIC_LEN
]);
132 version
|= static_cast<unsigned char>(buf
[GLASS_VERSION_MAGIC_LEN
+ 1]);
133 if (version
!= GLASS_FORMAT_VERSION
) {
135 if (!single_file()) {
139 msg
+= "Database is format version ";
140 msg
+= str(VERSION_TO_YEAR(version
) * 10000 +
141 VERSION_TO_MONTH(version
) * 100 +
142 VERSION_TO_DAY(version
));
143 msg
+= " but I only understand ";
144 msg
+= str(VERSION_TO_YEAR(GLASS_FORMAT_VERSION
) * 10000 +
145 VERSION_TO_MONTH(GLASS_FORMAT_VERSION
) * 100 +
146 VERSION_TO_DAY(GLASS_FORMAT_VERSION
));
147 throw Xapian::DatabaseVersionError(msg
);
150 p
+= GLASS_VERSION_MAGIC_AND_VERSION_LEN
;
154 if (!unpack_uint(&p
, end
, &rev
))
155 throw Xapian::DatabaseCorruptError("Rev file failed to decode revision");
157 for (unsigned table_no
= 0; table_no
< Glass::MAX_
; ++table_no
) {
158 if (!root
[table_no
].unserialise(&p
, end
)) {
159 throw Xapian::DatabaseCorruptError("Rev file root_info missing");
161 old_root
[table_no
] = root
[table_no
];
164 // For a single-file database, this will assign extra data. We read
165 // sizeof(buf) above, then skip GLASS_VERSION_MAGIC_AND_VERSION_LEN,
166 // then 16, then the size of the serialised root info.
167 serialised_stats
.assign(p
, end
);
172 GlassVersion::serialise_stats()
174 serialised_stats
.resize(0);
175 pack_uint(serialised_stats
, doccount
);
176 // last_docid must always be >= doccount.
177 pack_uint(serialised_stats
, last_docid
- doccount
);
178 pack_uint(serialised_stats
, doclen_lbound
);
179 pack_uint(serialised_stats
, wdf_ubound
);
180 // doclen_ubound should always be >= wdf_ubound, so we store the
181 // difference as it may encode smaller. wdf_ubound is likely to
182 // be larger than doclen_lbound.
183 pack_uint(serialised_stats
, doclen_ubound
- wdf_ubound
);
184 pack_uint(serialised_stats
, oldest_changeset
);
185 pack_uint(serialised_stats
, total_doclen
);
186 pack_uint(serialised_stats
, spelling_wordfreq_ubound
);
190 GlassVersion::unserialise_stats()
192 const char * p
= serialised_stats
.data();
193 const char * end
= p
+ serialised_stats
.size();
201 oldest_changeset
= 0;
202 spelling_wordfreq_ubound
= 0;
206 if (!unpack_uint(&p
, end
, &doccount
) ||
207 !unpack_uint(&p
, end
, &last_docid
) ||
208 !unpack_uint(&p
, end
, &doclen_lbound
) ||
209 !unpack_uint(&p
, end
, &wdf_ubound
) ||
210 !unpack_uint(&p
, end
, &doclen_ubound
) ||
211 !unpack_uint(&p
, end
, &oldest_changeset
) ||
212 !unpack_uint(&p
, end
, &total_doclen
) ||
213 !unpack_uint(&p
, end
, &spelling_wordfreq_ubound
)) {
215 "Bad serialised DB stats (overflowed)" :
216 "Bad serialised DB stats (out of data)";
217 throw Xapian::DatabaseCorruptError(m
);
220 // In the single-file DB case, there will be extra data in
221 // serialised_stats, so suppress this check.
222 if (p
!= end
&& !single_file())
223 throw Xapian::DatabaseCorruptError("Rev file has junk at end");
225 // last_docid must always be >= doccount.
226 last_docid
+= doccount
;
227 // doclen_ubound should always be >= wdf_ubound, so we store the
228 // difference as it may encode smaller. wdf_ubound is likely to
229 // be larger than doclen_lbound.
230 doclen_ubound
+= wdf_ubound
;
234 GlassVersion::merge_stats(const GlassVersion
& o
)
236 doccount
+= o
.get_doccount();
237 if (doccount
< o
.get_doccount()) {
238 throw Xapian::DatabaseError("doccount overflowed!");
241 Xapian::termcount o_doclen_lbound
= o
.get_doclength_lower_bound();
242 if (o_doclen_lbound
> 0) {
243 if (doclen_lbound
== 0 || o_doclen_lbound
< doclen_lbound
)
244 doclen_lbound
= o_doclen_lbound
;
247 doclen_ubound
= max(doclen_ubound
, o
.get_doclength_upper_bound());
248 wdf_ubound
= max(wdf_ubound
, o
.get_wdf_upper_bound());
249 total_doclen
+= o
.get_total_doclen();
250 if (total_doclen
< o
.get_total_doclen()) {
251 throw Xapian::DatabaseError("Total document length overflowed!");
254 // The upper bounds might be on the same word, so we must sum them.
255 spelling_wordfreq_ubound
+= o
.get_spelling_wordfreq_upper_bound();
259 GlassVersion::cancel()
261 LOGCALL_VOID(DB
, "GlassVersion::cancel", NO_ARGS
);
262 for (unsigned table_no
= 0; table_no
< Glass::MAX_
; ++table_no
) {
263 root
[table_no
] = old_root
[table_no
];
269 GlassVersion::write(glass_revision_number_t new_rev
, int flags
)
271 LOGCALL(DB
, const string
, "GlassVersion::write", new_rev
|flags
);
273 string
s(GLASS_VERSION_MAGIC
, GLASS_VERSION_MAGIC_AND_VERSION_LEN
);
274 s
.append(reinterpret_cast<const char *>(uuid
), 16);
276 pack_uint(s
, new_rev
);
278 for (unsigned table_no
= 0; table_no
< Glass::MAX_
; ++table_no
) {
279 root
[table_no
].serialise(s
);
282 // Serialise database statistics.
284 s
+= serialised_stats
;
287 if (!single_file()) {
289 // In dangerous mode, just write the new version file in place.
290 if (flags
& Xapian::DB_DANGEROUS
)
291 tmpfile
+= "/iamglass";
295 fd
= posixy_open(tmpfile
.c_str(), O_CREAT
|O_TRUNC
|O_WRONLY
|O_BINARY
, 0666);
297 throw Xapian::DatabaseOpeningError("Couldn't write new rev file: " + tmpfile
,
300 if (flags
& Xapian::DB_DANGEROUS
)
305 io_write(fd
, s
.data(), s
.size());
314 changes_buf
+= '\xfe';
315 pack_uint(changes_buf
, new_rev
);
316 pack_uint(changes_buf
, s
.size());
317 changes
->write_block(changes_buf
);
318 changes
->write_block(s
);
325 GlassVersion::sync(const string
& tmpfile
,
326 glass_revision_number_t new_rev
, int flags
)
328 Assert(new_rev
> rev
|| rev
== 0);
331 if ((flags
& Xapian::DB_NO_SYNC
) == 0 &&
332 ((flags
& Xapian::DB_FULL_SYNC
) ?
338 int fd_to_close
= fd
;
340 if ((flags
& Xapian::DB_NO_SYNC
) == 0 &&
341 ((flags
& Xapian::DB_FULL_SYNC
) ?
342 !io_full_sync(fd_to_close
) :
343 !io_sync(fd_to_close
))) {
344 int save_errno
= errno
;
345 (void)close(fd_to_close
);
346 if (!tmpfile
.empty())
347 (void)unlink(tmpfile
.c_str());
352 if (close(fd_to_close
) != 0) {
353 if (!tmpfile
.empty()) {
354 int save_errno
= errno
;
355 (void)unlink(tmpfile
.c_str());
361 if (!tmpfile
.empty()) {
362 if (!io_tmp_rename(tmpfile
, db_dir
+ "/iamglass")) {
368 for (unsigned table_no
= 0; table_no
< Glass::MAX_
; ++table_no
) {
369 old_root
[table_no
] = root
[table_no
];
376 // Only try to compress tags longer than this many bytes.
377 const size_t COMPRESS_MIN
= 4;
379 static const uint4 compress_min_tab
[] = {
381 COMPRESS_MIN
, // DOCDATA
382 COMPRESS_MIN
, // TERMLIST
384 COMPRESS_MIN
, // SPELLING
385 COMPRESS_MIN
// SYNONYM
389 GlassVersion::create(unsigned blocksize
)
391 AssertRel(blocksize
,>=,GLASS_MIN_BLOCKSIZE
);
393 for (unsigned table_no
= 0; table_no
< Glass::MAX_
; ++table_no
) {
394 root
[table_no
].init(blocksize
, compress_min_tab
[table_no
]);
401 RootInfo::init(unsigned blocksize_
, uint4 compress_min_
)
403 AssertRel(blocksize_
,>=,GLASS_MIN_BLOCKSIZE
);
409 blocksize
= blocksize_
;
410 compress_min
= compress_min_
;
411 fl_serialised
.resize(0);
415 RootInfo::serialise(string
&s
) const
418 unsigned val
= level
<< 2;
419 if (sequential
) val
|= 0x02;
420 if (root_is_fake
) val
|= 0x01;
422 pack_uint(s
, num_entries
);
423 pack_uint(s
, blocksize
>> 11);
424 pack_uint(s
, compress_min
);
425 pack_string(s
, fl_serialised
);
429 RootInfo::unserialise(const char ** p
, const char * end
)
432 if (!unpack_uint(p
, end
, &root
) ||
433 !unpack_uint(p
, end
, &val
) ||
434 !unpack_uint(p
, end
, &num_entries
) ||
435 !unpack_uint(p
, end
, &blocksize
) ||
436 !unpack_uint(p
, end
, &compress_min
) ||
437 !unpack_string(p
, end
, fl_serialised
)) return false;
439 sequential
= val
& 0x02;
440 root_is_fake
= val
& 0x01;
442 AssertRel(blocksize
,>=,GLASS_MIN_BLOCKSIZE
);