[python3] Simplify generated wrapper post-processing
[xapian.git] / xapian-core / api / compactor.cc
blob925e5e43f3b6df1ba9680ea6e66a383fac76e4e2
1 /** @file compactor.cc
2 * @brief Compact a database, or merge and compact several.
3 */
4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2015,2016,2017,2018 Olly Betts
5 * Copyright (C) 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <config.h>
25 #include <xapian/compactor.h>
27 #include "safeerrno.h"
29 #include <algorithm>
30 #include <fstream>
31 #include <vector>
33 #include <cstring>
34 #include <ctime>
35 #include "safesysstat.h"
36 #include <sys/types.h>
38 #include "safeunistd.h"
39 #include "safefcntl.h"
41 #include "backends/backends.h"
42 #include "backends/databaseinternal.h"
43 #include "debuglog.h"
44 #include "leafpostlist.h"
45 #include "omassert.h"
46 #include "filetests.h"
47 #include "fileutils.h"
48 #include "io_utils.h"
49 #include "stringutils.h"
50 #include "str.h"
52 #ifdef XAPIAN_HAS_GLASS_BACKEND
53 #include "backends/glass/glass_database.h"
54 #include "backends/glass/glass_version.h"
55 #endif
57 #ifdef XAPIAN_HAS_HONEY_BACKEND
58 #include "backends/honey/honey_database.h"
59 #include "backends/honey/honey_version.h"
60 #endif
62 #include "backends/multi/multi_database.h"
64 #include <xapian/constants.h>
65 #include <xapian/database.h>
66 #include <xapian/error.h>
68 using namespace std;
70 class CmpByFirstUsed {
71 const vector<pair<Xapian::docid, Xapian::docid> > & used_ranges;
73 public:
74 explicit
75 CmpByFirstUsed(const vector<pair<Xapian::docid, Xapian::docid> > & ur)
76 : used_ranges(ur) { }
78 bool operator()(size_t a, size_t b) const {
79 return used_ranges[a].first < used_ranges[b].first;
83 namespace Xapian {
85 Compactor::~Compactor() { }
87 void
88 Compactor::set_status(const string & table, const string & status)
90 (void)table;
91 (void)status;
94 string
95 Compactor::resolve_duplicate_metadata(const string & key,
96 size_t num_tags, const std::string tags[])
98 (void)key;
99 (void)num_tags;
100 return tags[0];
105 [[noreturn]]
106 static void
107 backend_mismatch(const Xapian::Database::Internal* db, int backend1,
108 const string &dbpath2, int backend2)
110 string dbpath1;
111 db->get_backend_info(&dbpath1);
112 string msg = "All databases must be the same type ('";
113 msg += dbpath1;
114 msg += "' is ";
115 msg += backend_name(backend1);
116 msg += ", but '";
117 msg += dbpath2;
118 msg += "' is ";
119 msg += backend_name(backend2);
120 msg += ')';
121 throw Xapian::InvalidArgumentError(msg);
124 namespace Xapian {
126 void
127 Database::compact_(const string * output_ptr, int fd, unsigned flags,
128 int block_size,
129 Xapian::Compactor * compactor) const
131 LOGCALL_VOID(API, "Database::compact_", output_ptr | fd | flags | block_size | compactor);
133 bool renumber = !(flags & DBCOMPACT_NO_RENUMBER);
135 enum { STUB_NO, STUB_FILE, STUB_DIR } compact_to_stub = STUB_NO;
136 string destdir;
137 if (output_ptr) {
138 // We need a modifiable destdir in this function.
139 destdir = *output_ptr;
140 if (!(flags & DBCOMPACT_SINGLE_FILE)) {
141 if (file_exists(destdir)) {
142 // Stub file.
143 compact_to_stub = STUB_FILE;
144 } else if (file_exists(destdir + "/XAPIANDB")) {
145 // Stub directory.
146 compact_to_stub = STUB_DIR;
149 } else {
150 // Single file is implied when writing to a file descriptor.
151 flags |= DBCOMPACT_SINGLE_FILE;
154 auto n_shards = internal->size();
155 Xapian::docid tot_off = 0;
156 Xapian::docid last_docid = 0;
158 vector<Xapian::docid> offset;
159 vector<pair<Xapian::docid, Xapian::docid> > used_ranges;
160 vector<const Xapian::Database::Internal*> internals;
161 offset.reserve(n_shards);
162 used_ranges.reserve(n_shards);
163 internals.reserve(n_shards);
165 if (n_shards > 1) {
166 auto multi_db = static_cast<MultiDatabase*>(internal.get());
167 for (auto&& db : multi_db->shards) {
168 internals.push_back(db);
170 } else {
171 internals.push_back(internal.get());
174 int backend = BACKEND_UNKNOWN;
175 for (auto&& shard : internals) {
176 string srcdir;
177 int type = shard->get_backend_info(&srcdir);
178 // Check destdir isn't the same as any source directory, unless it
179 // is a stub database or we're compacting to an fd.
180 if (!compact_to_stub && !destdir.empty() && srcdir == destdir) {
181 throw InvalidArgumentError("destination may not be the same as "
182 "any source database, unless it is a "
183 "stub database");
185 switch (type) {
186 case BACKEND_GLASS:
187 if (backend != type && backend != BACKEND_UNKNOWN) {
188 backend_mismatch(internals[0], backend, srcdir, type);
190 backend = type;
191 break;
192 case BACKEND_HONEY:
193 if (backend != type && backend != BACKEND_UNKNOWN) {
194 backend_mismatch(internals[0], backend, srcdir, type);
196 backend = type;
197 break;
198 default:
199 throw DatabaseError("Only glass and honey databases can be "
200 "compacted");
203 Xapian::docid first = 0, last = 0;
205 // "Empty" databases might have spelling or synonym data so can't
206 // just be completely ignored.
207 Xapian::doccount num_docs = shard->get_doccount();
208 if (num_docs != 0) {
209 shard->get_used_docid_range(first, last);
211 if (renumber && first) {
212 // Prune any unused docids off the start of this source
213 // database.
215 // tot_off could wrap here, but it's unsigned, so that's
216 // OK.
217 tot_off -= (first - 1);
220 #ifdef XAPIAN_ASSERTIONS
221 PostList* pl = shard->open_post_list(string());
222 pl->next();
223 // This test should never fail, since shard->get_doccount() is
224 // non-zero!
225 Assert(!pl->at_end());
226 AssertEq(pl->get_docid(), first);
227 AssertRel(last,>=,first);
228 pl->skip_to(last);
229 Assert(!pl->at_end());
230 AssertEq(pl->get_docid(), last);
231 pl->next();
232 Assert(pl->at_end());
233 delete pl;
234 #endif
237 offset.push_back(tot_off);
238 if (renumber)
239 tot_off += last;
240 else if (last_docid < shard->get_lastdocid())
241 last_docid = shard->get_lastdocid();
242 used_ranges.push_back(make_pair(first, last));
245 if (renumber)
246 last_docid = tot_off;
248 if (!renumber && n_shards > 1) {
249 // We want to process the sources in ascending order of first
250 // docid. So we create a vector "order" with ascending integers
251 // and then sort so the indirected order is right.
252 vector<size_t> order;
253 order.reserve(n_shards);
254 for (size_t i = 0; i < n_shards; ++i)
255 order.push_back(i);
257 sort(order.begin(), order.end(), CmpByFirstUsed(used_ranges));
259 // Now use order to reorder internals to be in ascending order by first
260 // docid, and while we're at it check the ranges are disjoint.
261 vector<const Xapian::Database::Internal*> internals_;
262 internals_.reserve(n_shards);
263 vector<pair<Xapian::docid, Xapian::docid> > used_ranges_;
264 used_ranges_.reserve(n_shards);
266 Xapian::docid last_start = 0, last_end = 0;
267 for (size_t j = 0; j != order.size(); ++j) {
268 size_t n = order[j];
270 internals_.push_back(internals[n]);
271 used_ranges_.push_back(used_ranges[n]);
273 const pair<Xapian::docid, Xapian::docid> p = used_ranges[n];
274 // Skip empty databases.
275 if (p.first == 0 && p.second == 0)
276 continue;
277 // Check for overlap with the previous database's range.
278 if (p.first <= last_end) {
279 string tmp;
280 string msg = "when merging databases, --no-renumber is only currently supported if the databases have disjoint ranges of used document ids: ";
281 internals_[j - 1]->get_backend_info(&tmp);
282 msg += tmp;
283 msg += " has range ";
284 msg += str(last_start);
285 msg += '-';
286 msg += str(last_end);
287 msg += ", ";
288 internals_[j]->get_backend_info(&tmp);
289 msg += tmp;
290 msg += " has range ";
291 msg += str(p.first);
292 msg += '-';
293 msg += str(p.second);
294 throw Xapian::InvalidOperationError(msg);
296 last_start = p.first;
297 last_end = p.second;
300 swap(internals, internals_);
301 swap(used_ranges, used_ranges_);
304 string stub_file;
305 if (compact_to_stub) {
306 stub_file = destdir;
307 if (compact_to_stub == STUB_DIR) {
308 stub_file += "/XAPIANDB";
309 destdir += '/';
310 } else {
311 destdir += '_';
313 size_t sfx = destdir.size();
314 time_t now = time(NULL);
315 while (true) {
316 destdir.resize(sfx);
317 destdir += str(now++);
318 if (mkdir(destdir.c_str(), 0755) == 0)
319 break;
320 if (errno != EEXIST) {
321 string msg = destdir;
322 msg += ": mkdir failed";
323 throw Xapian::DatabaseError(msg, errno);
326 } else if (!(flags & Xapian::DBCOMPACT_SINGLE_FILE)) {
327 // If the destination database directory doesn't exist, create it.
328 if (mkdir(destdir.c_str(), 0755) < 0) {
329 // Check why mkdir failed. It's ok if the directory already
330 // exists, but we also get EEXIST if there's an existing file with
331 // that name.
332 if (errno == EEXIST) {
333 if (dir_exists(destdir))
334 errno = 0;
335 else
336 errno = EEXIST; // dir_exists() might have changed it
338 if (errno) {
339 string msg = destdir;
340 msg += ": cannot create directory";
341 throw Xapian::DatabaseError(msg, errno);
346 #if defined XAPIAN_HAS_GLASS_BACKEND
347 Xapian::Compactor::compaction_level compaction =
348 static_cast<Xapian::Compactor::compaction_level>(flags & (Xapian::Compactor::STANDARD|Xapian::Compactor::FULL|Xapian::Compactor::FULLER));
349 #else
350 (void)compactor;
351 (void)block_size;
352 #endif
354 auto output_backend = flags & Xapian::DB_BACKEND_MASK_;
355 if (backend == BACKEND_GLASS) {
356 switch (output_backend) {
357 case 0:
358 case Xapian::DB_BACKEND_GLASS:
359 #ifdef XAPIAN_HAS_GLASS_BACKEND
360 if (output_ptr) {
361 GlassDatabase::compact(compactor, destdir.c_str(), 0,
362 internals, offset,
363 block_size, compaction, flags,
364 last_docid);
365 } else {
366 GlassDatabase::compact(compactor, NULL, fd,
367 internals, offset,
368 block_size, compaction, flags,
369 last_docid);
371 break;
372 #else
373 (void)fd;
374 (void)last_docid;
375 throw Xapian::FeatureUnavailableError("Glass backend disabled "
376 "at build time");
377 #endif
378 case Xapian::DB_BACKEND_HONEY:
379 #ifdef XAPIAN_HAS_HONEY_BACKEND
380 if (output_ptr) {
381 HoneyDatabase::compact(compactor, destdir.c_str(), 0,
382 Xapian::DB_BACKEND_GLASS,
383 internals, offset,
384 block_size, compaction, flags,
385 last_docid);
386 } else {
387 HoneyDatabase::compact(compactor, NULL, fd,
388 Xapian::DB_BACKEND_GLASS,
389 internals, offset,
390 block_size, compaction, flags,
391 last_docid);
393 break;
394 #else
395 (void)fd;
396 (void)last_docid;
397 throw Xapian::FeatureUnavailableError("Honey backend disabled "
398 "at build time");
399 #endif
400 default:
401 throw Xapian::UnimplementedError("Glass can only be "
402 "compacted to itself or "
403 "honey");
405 } else if (backend == BACKEND_HONEY) {
406 switch (output_backend) {
407 case 0:
408 case Xapian::DB_BACKEND_HONEY:
409 #ifdef XAPIAN_HAS_HONEY_BACKEND
410 if (output_ptr) {
411 HoneyDatabase::compact(compactor, destdir.c_str(), 0,
412 Xapian::DB_BACKEND_HONEY,
413 internals, offset,
414 block_size, compaction, flags,
415 last_docid);
416 } else {
417 HoneyDatabase::compact(compactor, NULL, fd,
418 Xapian::DB_BACKEND_HONEY,
419 internals, offset,
420 block_size, compaction, flags,
421 last_docid);
423 break;
424 #else
425 (void)fd;
426 (void)last_docid;
427 throw Xapian::FeatureUnavailableError("Honey backend disabled "
428 "at build time");
429 #endif
430 default:
431 throw Xapian::UnimplementedError("Honey can only be "
432 "compacted to itself");
436 if (compact_to_stub) {
437 string new_stub_file = destdir;
438 new_stub_file += "/new_stub.tmp";
440 ofstream new_stub(new_stub_file.c_str());
441 size_t slash = destdir.find_last_of(DIR_SEPS);
442 new_stub << "auto " << destdir.substr(slash + 1) << '\n';
444 if (!io_tmp_rename(new_stub_file, stub_file)) {
445 string msg = "Cannot rename '";
446 msg += new_stub_file;
447 msg += "' to '";
448 msg += stub_file;
449 msg += '\'';
450 throw Xapian::DatabaseError(msg, errno);