Fix whitespace irregularities in code
[xapian.git] / xapian-core / backends / chert / chert_check.cc
blobbb7918f4a03414576a4bc70d2177a820c1995d00
1 /* chert_check.cc: Btree checking
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2004,2005,2008,2011,2012,2013,2014,2015 Olly Betts
6 * Copyright 2008 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "chert_check.h"
28 #include "filetests.h"
29 #include "io_utils.h"
30 #include "unicode/description_append.h"
31 #include "xapian/constants.h"
33 #include <climits>
34 #include <ostream>
35 #include "safefcntl.h"
37 using namespace std;
39 void ChertTableCheck::print_spaces(int n) const
41 while (n--) out->put(' ');
44 void ChertTableCheck::print_bytes(int n, const byte * p) const
46 out->write(reinterpret_cast<const char *>(p), n);
49 void ChertTableCheck::print_key(const byte * p, int c, int j) const
51 Item item(p, c);
52 string key;
53 if (item.key().length() >= 0)
54 item.key().read(&key);
55 string escaped;
56 description_append(escaped, key);
57 *out << escaped;
58 if (j == 0) {
59 *out << ' ' << item.component_of();
63 void ChertTableCheck::print_tag(const byte * p, int c, int j) const
65 Item item(p, c);
66 if (j == 0) {
67 string tag;
68 item.append_chunk(&tag);
69 string escaped;
70 description_append(escaped, tag);
71 *out << '/' << item.components_of() << ' ' << escaped;
72 } else {
73 *out << "--> [" << item.block_given_by() << ']';
77 void ChertTableCheck::report_block_full(int m, int n, const byte * p) const
79 int j = GET_LEVEL(p);
80 int dir_end = DIR_END(p);
81 *out << '\n';
82 print_spaces(m);
83 *out << "Block [" << n << "] level " << j << ", revision *" << REVISION(p)
84 << " items (" << (dir_end - DIR_START) / D2 << ") usage "
85 << block_usage(p) << "%:\n";
86 for (int c = DIR_START; c < dir_end; c += D2) {
87 print_spaces(m);
88 print_key(p, c, j);
89 *out << ' ';
90 print_tag(p, c, j);
91 *out << '\n';
95 int ChertTableCheck::block_usage(const byte * p) const
97 int space = block_size - DIR_END(p);
98 int free = TOTAL_FREE(p);
99 return (space - free) * 100 / space; /* a percentage */
102 /** ChertTableCheck::report_block(m, n, p) prints the block at p, block number n,
103 * indented by m spaces.
105 void ChertTableCheck::report_block(int m, int n, const byte * p) const
107 int j = GET_LEVEL(p);
108 int dir_end = DIR_END(p);
109 int c;
110 print_spaces(m);
111 *out << "[" << n << "] *" << REVISION(p) << " ("
112 << (dir_end - DIR_START) / D2 << ") " << block_usage(p) << "% ";
114 for (c = DIR_START; c < dir_end; c += D2) {
115 if (c >= DIR_START + 6 && c < dir_end - 6) {
116 if (c == DIR_START + 6) *out << "... ";
117 continue;
120 print_key(p, c, j);
121 *out << ' ';
123 *out << endl;
126 void ChertTableCheck::failure(const char * msg) const
128 throw Xapian::DatabaseError(msg);
131 void
132 ChertTableCheck::block_check(Cursor * C_, int j, int opts)
134 byte * p = C_[j].p;
135 uint4 n = C_[j].n;
136 int c;
137 int significant_c = j == 0 ? DIR_START : DIR_START + D2;
138 /* the first key in an index block is dummy, remember */
140 size_t max_free = MAX_FREE(p);
141 int dir_end = DIR_END(p);
142 int total_free = block_size - dir_end;
144 if (opts & Xapian::DBCHECK_FIX) {
145 base.mark_block(n);
146 } else {
147 if (base.block_free_at_start(n))
148 failure("Block was free at start");
149 if (base.block_free_now(n))
150 failure("Block is free now");
151 base.free_block(n);
154 if (j != GET_LEVEL(p))
155 failure("Block has wrong level");
156 // dir_end must be > DIR_START, fit within the block, and be odd.
157 if (dir_end <= DIR_START || dir_end > int(block_size) || (dir_end & 1) != 1)
158 failure("directory end pointer invalid");
160 if (opts & Xapian::DBCHECK_SHORT_TREE)
161 report_block(3*(level - j), n, p);
163 if (opts & Xapian::DBCHECK_FULL_TREE)
164 report_block_full(3*(level - j), n, p);
166 for (c = DIR_START; c < dir_end; c += D2) {
167 Item item(p, c);
168 int o = item.get_address() - p;
169 if (o > int(block_size))
170 failure("Item starts outside block");
171 if (o - dir_end < int(max_free))
172 failure("Item overlaps directory");
174 int kt_len = item.size();
175 if (o + kt_len > int(block_size))
176 failure("Item ends outside block");
177 total_free -= kt_len;
179 if (c > significant_c && Item(p, c - D2).key() >= item.key())
180 failure("Items not in sorted order");
182 if (j == 0 && item.component_of() == 1)
183 ++check_item_count;
185 if (total_free != TOTAL_FREE(p))
186 failure("Stored total free space value wrong");
188 if (j == 0) {
189 // Leaf block.
190 if (check_sequential) {
191 if (n >= last_sequential_block) {
192 last_sequential_block = n;
193 } else {
194 check_sequential = false;
197 return;
200 for (c = DIR_START; c < dir_end; c += D2) {
201 C_[j].c = c;
202 block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
204 block_check(C_, j - 1, opts);
206 byte * q = C_[j - 1].p;
207 /* if j == 1, and c > DIR_START, the first key of level j - 1 must be
208 * >= the key of p, c: */
210 if (j == 1 && c > DIR_START)
211 if (Item(q, DIR_START).key() < Item(p, c).key())
212 failure("Leaf key < left dividing key in level above");
214 /* if j > 1, and c > DIR_START, the second key of level j - 1 must be
215 * >= the key of p, c: */
217 if (j > 1 && c > DIR_START && DIR_END(q) > DIR_START + D2 &&
218 Item(q, DIR_START + D2).key() < Item(p, c).key())
219 failure("Key < left dividing key in level above");
221 /* the last key of level j - 1 must be < the key of p, c + D2, if c +
222 * D2 < dir_end: */
224 if (c + D2 < dir_end &&
225 (j == 1 || DIR_START + D2 < DIR_END(q)) &&
226 Item(q, DIR_END(q) - D2).key() >= Item(p, c + D2).key())
227 failure("Key >= right dividing key in level above");
229 if (REVISION(q) > REVISION(p))
230 failure("Child block has greater revision than parent");
234 void
235 ChertTableCheck::check(const char * tablename, const string & path,
236 chert_revision_number_t * rev_ptr, int opts,
237 ostream *out)
239 string faked_base;
241 ChertTableCheck B(tablename, path, false, out);
242 try {
243 if (rev_ptr && *rev_ptr) {
244 // On failure, fake exception to be caught below.
245 if (!B.open(*rev_ptr)) {
246 string msg = "Failed to open ";
247 msg += tablename;
248 msg += " table at revision ";
249 msg += str(*rev_ptr);
250 throw Xapian::DatabaseOpeningError(msg);
252 } else {
253 // open() throws an exception if it fails.
254 B.open();
255 if (rev_ptr)
256 *rev_ptr = B.get_open_revision_number();
258 } catch (const Xapian::DatabaseOpeningError &) {
259 if ((opts & Xapian::DBCHECK_FIX) == 0 ||
260 file_size(path + "baseA") > 0 ||
261 file_size(path + "baseB") > 0) {
262 // Just propagate the exception.
263 throw;
266 uint4 root = 0;
267 uint4 revision = 0;
268 int level = -1;
269 uint4 blk_no = 0;
271 // Fake up a base file with no bitmap first, then fill it in when we
272 // scan the tree below.
273 int fd = ::open((path + "DB").c_str(), O_RDONLY | O_BINARY | O_CLOEXEC);
274 if (fd < 0) throw;
275 unsigned char buf[65536];
276 uint4 blocksize = 8192; // Default.
277 size_t read = io_read(fd, reinterpret_cast<char*>(buf), sizeof(buf));
278 if (read > 0) {
279 int dir_end = DIR_END(buf);
280 blocksize = dir_end + TOTAL_FREE(buf);
281 for (int c = DIR_START; c < dir_end; c += D2) {
282 Item item(buf, c);
283 blocksize += item.size();
285 if (out)
286 *out << "Block size deduced as " << blocksize << endl;
288 if (lseek(fd, 0, SEEK_SET) < 0) {
289 B.failure("Failed to seek to start of table");
291 // Scan for root block.
292 bool found = false;
293 for (blk_no = 0;
294 io_read(fd, reinterpret_cast<char*>(buf), blocksize) == blocksize;
295 ++blk_no) {
296 uint4 rev = REVISION(buf);
297 if (rev_ptr && *rev_ptr) {
298 // We have a specified revision to look for, but we still need
299 // to scan to find the block with the highest level in that
300 // revision.
302 // Note: We could have more than one root block with the same
303 // revision if one is written but not committed and then
304 // another is written and committed. We go for the lowest
305 // block number, which will probably pick the right one with
306 // the current freespace reallocation strategy.
307 if (rev != *rev_ptr)
308 continue;
309 } else {
310 // FIXME: this isn't smart enough - it will happily pick a new
311 // revision which was partly written but never committed. And
312 // it suffers from the issue of multiple roots mentioned above.
313 if (rev < revision)
314 continue;
316 int blk_level = int(GET_LEVEL(buf));
317 if (blk_level <= level)
318 continue;
319 found = true;
320 root = blk_no;
321 revision = rev;
322 level = blk_level;
323 if (out)
324 *out << "Root guess -> blk " << root << " rev " << revision
325 << " level " << level << endl;
327 ::close(fd);
329 // Check that we actually found a candidate root block.
330 if (!found) {
331 if (out)
332 *out << "Failed to find a suitable root block with revision "
333 << *rev_ptr << endl;
334 throw;
336 } else {
337 if (!rev_ptr) {
338 if (out)
339 *out << "Empty table, but revision number not yet known" << endl;
340 throw;
342 revision = *rev_ptr;
343 level = 0;
344 if (out)
345 *out << "Empty table, assuming default block size of "
346 << blocksize << endl;
349 ChertTable_base fake_base;
350 fake_base.set_revision(revision);
351 fake_base.set_block_size(blocksize);
352 fake_base.set_root(root);
353 fake_base.set_level(uint4(level));
354 fake_base.set_item_count(0); // Will get filled in later.
355 fake_base.set_sequential(false); // Will get filled in later.
356 if (blk_no) {
357 // Mark the last block as in use so that if assertions are enabled,
358 // we don't get a failure in ChertTable::read_block() when we try
359 // to read blocks. We clear the bitmap before we regenerate it
360 // below, so the last block will still end up correctly marked.
361 fake_base.mark_block(blk_no - 1);
362 } else {
363 fake_base.set_have_fakeroot(true);
365 faked_base = path;
366 faked_base += "baseA";
367 fake_base.write_to_file(faked_base, 'A', string(), -1, NULL);
369 // Remove the other base if there was one - it's an empty file anyway.
370 (void)unlink((path + "baseB").c_str());
372 // And retry the open.
373 if (!B.open(revision)) {
374 string msg = "Root guess of blk ";
375 msg += str(root);
376 msg += " rev ";
377 msg += str(revision);
378 msg += " didn't work";
379 throw Xapian::DatabaseOpeningError(msg);
382 if (rev_ptr && !*rev_ptr)
383 *rev_ptr = revision;
386 Cursor * C = B.C;
388 if (opts & Xapian::DBCHECK_SHOW_STATS) {
389 *out << "base" << char(B.base_letter)
390 << " blocksize=" << B.block_size / 1024 << "K"
391 " items=" << B.item_count
392 << " lastblock=" << B.base.get_last_block()
393 << " revision=" << B.revision_number
394 << " levels=" << B.level
395 << " root=";
396 if (B.faked_root_block)
397 *out << "(faked)";
398 else
399 *out << C[B.level].n;
400 *out << endl;
403 if (opts & Xapian::DBCHECK_FIX) {
404 // Clear the bitmap before we start. If we're regenerating it, it'll
405 // likely have some bits set already, and if we're starting from a
406 // fake, the last block will have been marked as in use above.
407 B.base.clear_bit_map();
410 if (opts & Xapian::DBCHECK_SHOW_FREELIST) {
411 int limit = B.base.get_bit_map_size() - 1;
413 limit = limit * CHAR_BIT + CHAR_BIT - 1;
415 for (int j = 0; j <= limit; j++) {
416 *out << (B.base.block_free_at_start(j) ? '.' : '*');
417 if (j > 0) {
418 if ((j + 1) % 100 == 0) {
419 *out << '\n';
420 } else if ((j + 1) % 10 == 0) {
421 *out << ' ';
425 *out << '\n' << endl;
428 if (B.faked_root_block) {
429 if (out && opts)
430 *out << "void ";
431 } else {
432 try {
433 B.block_check(C, B.level, opts);
434 } catch (...) {
435 if (!faked_base.empty())
436 unlink(faked_base.c_str());
437 throw;
440 // Allow for the dummy entry with the empty key.
441 if (B.check_item_count)
442 --B.check_item_count;
444 if (opts & Xapian::DBCHECK_FIX) {
445 if (out) {
446 *out << "Counted " << B.check_item_count << " entries in the "
447 "Btree" << endl;
448 *out << (B.check_sequential ? "Sequential" : "Non-sequential")
449 << endl;
451 B.base.set_item_count(B.check_item_count);
452 B.base.set_sequential(B.check_sequential);
453 string base_name = path;
454 base_name += "base";
455 base_name += B.base_letter;
456 B.base.write_to_file(base_name, B.base_letter, string(), -1, NULL);
457 } else {
458 /* the bit map should now be entirely clear: */
459 B.base.calculate_last_block();
460 if (B.base.get_bit_map_size() != 0) {
461 B.failure("Unused block(s) marked used in bitmap");
464 if (B.check_item_count != B.get_entry_count()) {
465 string err = "Table entry count says ";
466 err += str(B.get_entry_count());
467 err += " but actually counted ";
468 err += str(B.check_item_count);
469 B.failure(err.c_str());
472 if (B.sequential && !B.check_sequential) {
473 B.failure("Btree flagged as sequential but isn't");
475 if (!B.sequential && B.check_sequential && out) {
476 *out << "Note: Btree not flagged as sequential, but is "
477 "(not an error)" << endl;
481 if (out && opts)
482 *out << "B-tree checked okay" << endl;
485 void ChertTableCheck::report_cursor(int N, const Cursor * C_) const
487 *out << N << ")\n";
488 for (int i = 0; i <= level; i++)
489 *out << "p=" << C_[i].p << ", c=" << C_[i].c << ", n=[" << C_[i].n
490 << "], rewrite=" << C_[i].rewrite << endl;