Add test harness backend flag "compact"
[xapian.git] / xapian-core / tests / api_compact.cc
blob7b0155cc31567af91a8118dd49fbe54d15deb53c
1 /** @file api_compact.cc
2 * @brief Tests of Database::compact()
3 */
4 /* Copyright (C) 2009,2010,2011,2012,2013,2015,2016,2017,2018 Olly Betts
5 * Copyright (C) 2010 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <config.h>
25 #include "api_compact.h"
27 #include <xapian.h>
29 #include "apitest.h"
30 #include "dbcheck.h"
31 #include "filetests.h"
32 #include "msvcignoreinvalidparam.h"
33 #include "str.h"
34 #include "testsuite.h"
35 #include "testutils.h"
37 #include <cstdlib>
38 #include <fstream>
40 #include <sys/types.h>
41 #include "safesysstat.h"
42 #include "safefcntl.h"
43 #include "safeunistd.h"
45 #include "unixcmds.h"
47 using namespace std;
49 static void
50 make_sparse_db(Xapian::WritableDatabase &db, const string & s)
52 // Need non-const pointer for strtoul(), but data isn't modified.
53 char * p = const_cast<char *>(s.c_str());
55 while (*p) {
56 bool del = (*p == '!');
57 if (del) ++p;
58 Xapian::docid first = strtoul(p, &p, 10);
59 Xapian::docid last = first;
60 if (*p == '-') {
61 last = strtoul(p + 1, &p, 10);
63 if (*p && *p != ' ') {
64 tout << p - s.c_str() << endl;
65 FAIL_TEST("Bad sparse db spec (expected space): " << s);
67 if (first > last) {
68 FAIL_TEST("Bad sparse db spec (first > last): " << s);
71 do {
72 if (del) {
73 db.delete_document(first);
74 } else {
75 Xapian::Document doc;
76 string id = str(first);
77 doc.set_data(id);
78 doc.add_term("Q" + str(first));
79 doc.add_term(string(first % 7 + 1, char((first % 26) + 'a')));
80 db.replace_document(first, doc);
82 } while (first++ < last);
84 if (*p == '\0') break;
85 ++p;
88 db.commit();
91 static void
92 check_sparse_uid_terms(const string & path)
94 Xapian::Database db(path);
95 Xapian::TermIterator t;
96 for (t = db.allterms_begin("Q"); t != db.allterms_end("Q"); ++t) {
97 Xapian::docid did = atoi((*t).c_str() + 1);
98 Xapian::PostingIterator p = db.postlist_begin(*t);
99 TEST_EQUAL(*p, did);
103 DEFINE_TESTCASE(compactnorenumber1, compact && generated) {
104 string a = get_database_path("compactnorenumber1a", make_sparse_db,
105 "5-7 24 76 987 1023-1027 9999 !9999");
106 string a_uuid;
108 Xapian::Database db(a);
109 a_uuid = db.get_uuid();
111 string b = get_database_path("compactnorenumber1b", make_sparse_db,
112 "1027-1030");
113 string c = get_database_path("compactnorenumber1c", make_sparse_db,
114 "1028-1040");
115 string d = get_database_path("compactnorenumber1d", make_sparse_db,
116 "3000 999999 !999999");
118 string out = get_named_writable_database_path("compactnorenumber1out");
120 rm_rf(out);
122 Xapian::Database db(a);
123 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER);
126 check_sparse_uid_terms(out);
129 TEST(!dir_exists(out + "/donor"));
130 Xapian::Database db(out);
131 // xapian-compact should change the UUID of the database, but didn't
132 // prior to 1.0.18/1.1.4.
133 string out_uuid = db.get_uuid();
134 TEST_NOT_EQUAL(a_uuid, out_uuid);
135 TEST_EQUAL(out_uuid.size(), 36);
136 TEST_NOT_EQUAL(out_uuid, "00000000-0000-0000-0000-000000000000");
138 // White box test - ensure that the donor database is removed.
139 TEST(!dir_exists(out + "/donor"));
142 rm_rf(out);
144 Xapian::Database db;
145 db.add_database(Xapian::Database(a));
146 db.add_database(Xapian::Database(c));
147 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER);
149 check_sparse_uid_terms(out);
151 // Check that xapian-compact is producing a consistent database. Also,
152 // regression test - xapian 1.1.4 set lastdocid to 0 in the output
153 // database.
154 Xapian::Database outdb(out);
155 dbcheck(outdb, 24, 9999);
158 rm_rf(out);
160 Xapian::Database db;
161 db.add_database(Xapian::Database(d));
162 db.add_database(Xapian::Database(a));
163 db.add_database(Xapian::Database(c));
164 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER);
166 check_sparse_uid_terms(out);
168 rm_rf(out);
170 Xapian::Database db;
171 db.add_database(Xapian::Database(c));
172 db.add_database(Xapian::Database(a));
173 db.add_database(Xapian::Database(d));
174 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER);
176 check_sparse_uid_terms(out);
178 // Should fail.
179 rm_rf(out);
181 Xapian::Database db;
182 db.add_database(Xapian::Database(a));
183 db.add_database(Xapian::Database(b));
184 TEST_EXCEPTION(Xapian::InvalidOperationError,
185 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER)
189 // Should fail.
190 rm_rf(out);
192 Xapian::Database db;
193 db.add_database(Xapian::Database(b));
194 db.add_database(Xapian::Database(a));
195 TEST_EXCEPTION(Xapian::InvalidOperationError,
196 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER)
200 // Should fail.
201 rm_rf(out);
203 Xapian::Database db;
204 db.add_database(Xapian::Database(a));
205 db.add_database(Xapian::Database(b));
206 db.add_database(Xapian::Database(d));
207 TEST_EXCEPTION(Xapian::InvalidOperationError,
208 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER)
212 // Should fail.
213 rm_rf(out);
215 Xapian::Database db;
216 db.add_database(Xapian::Database(d));
217 db.add_database(Xapian::Database(b));
218 db.add_database(Xapian::Database(a));
219 TEST_EXCEPTION(Xapian::InvalidOperationError,
220 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER)
224 // Should fail.
225 rm_rf(out);
227 Xapian::Database db;
228 db.add_database(Xapian::Database(b));
229 db.add_database(Xapian::Database(a));
230 db.add_database(Xapian::Database(d));
231 TEST_EXCEPTION(Xapian::InvalidOperationError,
232 db.compact(out, Xapian::DBCOMPACT_NO_RENUMBER)
236 return true;
239 // Test use of compact to merge two databases.
240 DEFINE_TESTCASE(compactmerge1, compact) {
241 string indbpath = get_database_path("apitest_simpledata");
242 string outdbpath = get_named_writable_database_path("compactmerge1out");
243 rm_rf(outdbpath);
246 Xapian::Database db;
247 db.add_database(Xapian::Database(indbpath));
248 db.add_database(Xapian::Database(indbpath));
249 db.compact(outdbpath);
252 Xapian::Database indb(get_database("apitest_simpledata"));
253 Xapian::Database outdb(outdbpath);
255 TEST_EQUAL(indb.get_doccount() * 2, outdb.get_doccount());
256 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
258 if (file_exists(outdbpath)) {
259 // Single file case.
260 TEST_EQUAL(Xapian::Database::check(outdbpath, 0, &tout), 0);
261 } else {
262 static const char* const suffixes[] = {
263 "", "/postlist", "/termlist.", nullptr
265 for (auto s : suffixes) {
266 string suffix;
267 if (s) {
268 suffix = s;
269 } else {
270 suffix = "/docdata." + get_dbtype();
272 tout.str(string());
273 tout << "Trying suffix '" << suffix << "'" << endl;
274 string arg = outdbpath;
275 arg += suffix;
276 TEST_EQUAL(Xapian::Database::check(arg, 0, &tout), 0);
280 return true;
283 static void
284 make_multichunk_db(Xapian::WritableDatabase &db, const string &)
286 int count = 10000;
288 Xapian::Document doc;
289 doc.add_term("a");
290 while (count) {
291 db.add_document(doc);
292 --count;
295 db.commit();
298 // Test use of compact on a database which has multiple chunks for a term.
299 // This is a regression test for ticket #427
300 DEFINE_TESTCASE(compactmultichunks1, compact && generated) {
301 string indbpath = get_database_path("compactmultichunks1in",
302 make_multichunk_db, "");
303 string outdbpath = get_named_writable_database_path("compactmultichunks1out");
304 rm_rf(outdbpath);
307 Xapian::Database db(indbpath);
308 db.compact(outdbpath);
311 Xapian::Database indb(indbpath);
312 Xapian::Database outdb(outdbpath);
314 TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
315 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
317 return true;
320 // Test compacting from a stub database directory.
321 DEFINE_TESTCASE(compactstub1, compact) {
322 const char * stubpath = ".stub/compactstub1";
323 const char * stubpathfile = ".stub/compactstub1/XAPIANDB";
324 mkdir(".stub", 0755);
325 mkdir(stubpath, 0755);
326 ofstream stub(stubpathfile);
327 TEST(stub.is_open());
328 stub << "auto ../../" << get_database_path("apitest_simpledata") << endl;
329 stub << "auto ../../" << get_database_path("apitest_simpledata2") << endl;
330 stub.close();
332 string outdbpath = get_named_writable_database_path("compactstub1out");
333 rm_rf(outdbpath);
336 Xapian::Database db(stubpath);
337 db.compact(outdbpath);
340 Xapian::Database indb(stubpath);
341 Xapian::Database outdb(outdbpath);
343 TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
344 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
346 return true;
349 // Test compacting from a stub database file.
350 DEFINE_TESTCASE(compactstub2, compact) {
351 const char * stubpath = ".stub/compactstub2";
352 mkdir(".stub", 0755);
353 ofstream stub(stubpath);
354 TEST(stub.is_open());
355 stub << "auto ../" << get_database_path("apitest_simpledata") << endl;
356 stub << "auto ../" << get_database_path("apitest_simpledata2") << endl;
357 stub.close();
359 string outdbpath = get_named_writable_database_path("compactstub2out");
360 rm_rf(outdbpath);
363 Xapian::Database db(stubpath);
364 db.compact(outdbpath);
367 Xapian::Database indb(stubpath);
368 Xapian::Database outdb(outdbpath);
370 TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
371 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
373 return true;
376 // Test compacting a stub database file to itself.
377 DEFINE_TESTCASE(compactstub3, compact) {
378 const char * stubpath = ".stub/compactstub3";
379 mkdir(".stub", 0755);
380 ofstream stub(stubpath);
381 TEST(stub.is_open());
382 stub << "auto ../" << get_database_path("apitest_simpledata") << endl;
383 stub << "auto ../" << get_database_path("apitest_simpledata2") << endl;
384 stub.close();
386 Xapian::doccount in_docs;
388 Xapian::Database indb(stubpath);
389 in_docs = indb.get_doccount();
390 indb.compact(stubpath);
393 Xapian::Database outdb(stubpath);
395 TEST_EQUAL(in_docs, outdb.get_doccount());
396 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
398 return true;
401 // Test compacting a stub database directory to itself.
402 DEFINE_TESTCASE(compactstub4, compact) {
403 const char * stubpath = ".stub/compactstub4";
404 const char * stubpathfile = ".stub/compactstub4/XAPIANDB";
405 mkdir(".stub", 0755);
406 mkdir(stubpath, 0755);
407 ofstream stub(stubpathfile);
408 TEST(stub.is_open());
409 stub << "auto ../../" << get_database_path("apitest_simpledata") << endl;
410 stub << "auto ../../" << get_database_path("apitest_simpledata2") << endl;
411 stub.close();
413 Xapian::doccount in_docs;
415 Xapian::Database indb(stubpath);
416 in_docs = indb.get_doccount();
417 indb.compact(stubpath);
420 Xapian::Database outdb(stubpath);
422 TEST_EQUAL(in_docs, outdb.get_doccount());
423 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
425 return true;
428 static void
429 make_all_tables(Xapian::WritableDatabase &db, const string &)
431 Xapian::Document doc;
432 doc.add_term("foo");
433 db.add_document(doc);
434 db.add_spelling("foo");
435 db.add_synonym("bar", "pub");
436 db.add_synonym("foobar", "foo");
438 db.commit();
441 static void
442 make_missing_tables(Xapian::WritableDatabase &db, const string &)
444 Xapian::Document doc;
445 doc.add_term("foo");
446 db.add_document(doc);
448 db.commit();
451 DEFINE_TESTCASE(compactmissingtables1, compact && generated) {
452 string a = get_database_path("compactmissingtables1a",
453 make_all_tables);
454 string b = get_database_path("compactmissingtables1b",
455 make_missing_tables);
457 string out = get_named_writable_database_path("compactmissingtables1out");
458 rm_rf(out);
461 Xapian::Database db;
462 db.add_database(Xapian::Database(a));
463 db.add_database(Xapian::Database(b));
464 db.compact(out);
468 Xapian::Database db(out);
469 TEST_NOT_EQUAL(db.spellings_begin(), db.spellings_end());
470 TEST_NOT_EQUAL(db.synonym_keys_begin(), db.synonym_keys_end());
471 // FIXME: arrange for input b to not have a termlist table.
472 // TEST_EXCEPTION(Xapian::FeatureUnavailableError, db.termlist_begin(1));
475 return true;
478 static void
479 make_all_tables2(Xapian::WritableDatabase &db, const string &)
481 Xapian::Document doc;
482 doc.add_term("bar");
483 db.add_document(doc);
484 db.add_spelling("bar");
485 db.add_synonym("bar", "baa");
486 db.add_synonym("barfoo", "barbar");
487 db.add_synonym("foofoo", "barfoo");
489 db.commit();
492 /// Adds coverage for merging synonym table.
493 DEFINE_TESTCASE(compactmergesynonym1, compact && generated) {
494 string a = get_database_path("compactmergesynonym1a",
495 make_all_tables);
496 string b = get_database_path("compactmergesynonym1b",
497 make_all_tables2);
499 string out = get_named_writable_database_path("compactmergesynonym1out");
500 rm_rf(out);
503 Xapian::Database db;
504 db.add_database(Xapian::Database(a));
505 db.add_database(Xapian::Database(b));
506 db.compact(out);
510 Xapian::Database db(out);
512 Xapian::TermIterator i = db.spellings_begin();
513 TEST_NOT_EQUAL(i, db.spellings_end());
514 TEST_EQUAL(*i, "bar");
515 ++i;
516 TEST_NOT_EQUAL(i, db.spellings_end());
517 TEST_EQUAL(*i, "foo");
518 ++i;
519 TEST_EQUAL(i, db.spellings_end());
521 i = db.synonym_keys_begin();
522 TEST_NOT_EQUAL(i, db.synonym_keys_end());
523 TEST_EQUAL(*i, "bar");
524 ++i;
525 TEST_NOT_EQUAL(i, db.synonym_keys_end());
526 TEST_EQUAL(*i, "barfoo");
527 ++i;
528 TEST_NOT_EQUAL(i, db.synonym_keys_end());
529 TEST_EQUAL(*i, "foobar");
530 ++i;
531 TEST_NOT_EQUAL(i, db.synonym_keys_end());
532 TEST_EQUAL(*i, "foofoo");
533 ++i;
534 TEST_EQUAL(i, db.synonym_keys_end());
537 return true;
540 DEFINE_TESTCASE(compactempty1, compact) {
541 string empty_dbpath = get_database_path(string());
542 string outdbpath = get_named_writable_database_path("compactempty1out");
543 rm_rf(outdbpath);
546 // Compacting an empty database tried to divide by zero in 1.3.0.
547 Xapian::Database db;
548 db.add_database(Xapian::Database(empty_dbpath));
549 db.compact(outdbpath);
551 Xapian::Database outdb(outdbpath);
552 TEST_EQUAL(outdb.get_doccount(), 0);
553 dbcheck(outdb, 0, 0);
557 // Check compacting two empty databases together.
558 Xapian::Database db;
559 db.add_database(Xapian::Database(empty_dbpath));
560 db.add_database(Xapian::Database(empty_dbpath));
561 db.compact(outdbpath);
563 Xapian::Database outdb(outdbpath);
564 TEST_EQUAL(outdb.get_doccount(), 0);
565 dbcheck(outdb, 0, 0);
568 return true;
571 DEFINE_TESTCASE(compactmultipass1, compact && generated) {
572 string outdbpath = get_named_writable_database_path("compactmultipass1");
573 rm_rf(outdbpath);
575 string a = get_database_path("compactnorenumber1a", make_sparse_db,
576 "5-7 24 76 987 1023-1027 9999 !9999");
577 string b = get_database_path("compactnorenumber1b", make_sparse_db,
578 "1027-1030");
579 string c = get_database_path("compactnorenumber1c", make_sparse_db,
580 "1028-1040");
581 string d = get_database_path("compactnorenumber1d", make_sparse_db,
582 "3000 999999 !999999");
585 Xapian::Database db;
586 db.add_database(Xapian::Database(a));
587 db.add_database(Xapian::Database(b));
588 db.add_database(Xapian::Database(c));
589 db.add_database(Xapian::Database(d));
590 db.compact(outdbpath, Xapian::DBCOMPACT_MULTIPASS);
593 Xapian::Database outdb(outdbpath);
594 dbcheck(outdb, 29, 1041);
596 return true;
599 // Test compacting to an fd.
600 DEFINE_TESTCASE(compacttofd1, compact) {
601 Xapian::Database indb(get_database("apitest_simpledata"));
602 string outdbpath = get_named_writable_database_path("compacttofd1out");
603 rm_rf(outdbpath);
605 int fd = open(outdbpath.c_str(), O_CREAT|O_RDWR|O_BINARY, 0666);
606 TEST(fd != -1);
607 indb.compact(fd);
609 // Confirm that the fd was closed by Xapian. Set errno first to workaround
610 // a bug in Wine's msvcrt.dll which fails to set errno in this case:
611 // https://bugs.winehq.org/show_bug.cgi?id=43902
612 errno = EBADF;
614 MSVCIgnoreInvalidParameter invalid_fd_in_close_is_expected;
615 TEST(close(fd) == -1);
616 TEST_EQUAL(errno, EBADF);
619 Xapian::Database outdb(outdbpath);
621 TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
622 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
624 return true;
627 // Test compacting to an fd at at offset.
628 DEFINE_TESTCASE(compacttofd2, compact) {
629 Xapian::Database indb(get_database("apitest_simpledata"));
630 string outdbpath = get_named_writable_database_path("compacttofd2out");
631 rm_rf(outdbpath);
633 int fd = open(outdbpath.c_str(), O_CREAT|O_RDWR|O_BINARY, 0666);
634 TEST(fd != -1);
635 TEST(lseek(fd, 8192, SEEK_SET) == 8192);
636 indb.compact(fd);
638 // Confirm that the fd was closed by Xapian. Set errno first to workaround
639 // a bug in Wine's msvcrt.dll which fails to set errno in this case:
640 // https://bugs.winehq.org/show_bug.cgi?id=43902
641 errno = EBADF;
643 MSVCIgnoreInvalidParameter invalid_fd_in_close_is_expected;
644 TEST(close(fd) == -1);
645 TEST_EQUAL(errno, EBADF);
648 fd = open(outdbpath.c_str(), O_RDONLY|O_BINARY, 0666);
649 TEST(fd != -1);
651 // Test that the database wasn't just written to the start of the file.
652 char buf[8192];
653 size_t n = sizeof(buf);
654 while (n) {
655 ssize_t c = read(fd, buf, n);
656 TEST(c > 0);
657 for (const char * p = buf; p != buf + c; ++p) {
658 TEST(*p == 0);
660 n -= c;
663 TEST(lseek(fd, 8192, SEEK_SET) == 8192);
664 Xapian::Database outdb(fd);
666 TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
667 dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
669 return true;
672 // Regression test for bug fixed in 1.3.5. If you compact a WritableDatabase
673 // with uncommitted changes, you get an inconsistent output.
674 DEFINE_TESTCASE(compactsingle1, compact) {
675 Xapian::WritableDatabase db = get_writable_database();
676 Xapian::Document doc;
677 doc.add_term("foo");
678 doc.add_term("bar");
679 doc.add_term("baz");
680 db.add_document(doc);
682 string output = get_named_writable_database_path("compactsingle1-out");
683 // In 1.3.4, we would hang if the output file already existed, so check
684 // that works.
685 touch(output);
687 TEST_EXCEPTION(Xapian::InvalidOperationError,
688 db.compact(output, Xapian::DBCOMPACT_SINGLE_FILE));
690 // Check the file wasn't removed by the failed attempt.
691 TEST(file_exists(output));
693 db.commit();
694 db.compact(output, Xapian::DBCOMPACT_SINGLE_FILE);
695 db.close();
697 TEST_EQUAL(Xapian::Database::check(output, 0, &tout), 0);
699 return true;
702 // Regression test for bug fixed in 1.4.6. Same as above, except not with
703 // a single file database!
704 DEFINE_TESTCASE(compact1, compact) {
705 Xapian::WritableDatabase db = get_writable_database();
706 Xapian::Document doc;
707 doc.add_term("foo");
708 doc.add_term("bar");
709 doc.add_term("baz");
710 db.add_document(doc);
712 string output = get_named_writable_database_path("compact1-out");
713 rm_rf(output);
715 TEST_EXCEPTION(Xapian::InvalidOperationError,
716 db.compact(output));
718 db.commit();
719 db.compact(output);
720 db.close();
722 TEST_EQUAL(Xapian::Database::check(output, 0, &tout), 0);
724 return true;