Factor out directory separator knowledge
[xapian.git] / xapian-core / api / compactor.cc
blob18788045768887a5aeeb77725e3d2eeaff8bb421
1 /** @file compactor.cc
2 * @brief Compact a database, or merge and compact several.
3 */
4 /* Copyright (C) 2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2015,2016,2017 Olly Betts
5 * Copyright (C) 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <config.h>
25 #include <xapian/compactor.h>
27 #include "safeerrno.h"
29 #include <algorithm>
30 #include <fstream>
31 #include <vector>
33 #include <cstring>
34 #include <ctime>
35 #include "safesysstat.h"
36 #include <sys/types.h>
38 #include "safeunistd.h"
39 #include "safefcntl.h"
41 #include "backends/backends.h"
42 #include "backends/databaseinternal.h"
43 #include "debuglog.h"
44 #include "leafpostlist.h"
45 #include "omassert.h"
46 #include "filetests.h"
47 #include "fileutils.h"
48 #include "io_utils.h"
49 #include "stringutils.h"
50 #include "str.h"
52 #ifdef XAPIAN_HAS_GLASS_BACKEND
53 #include "backends/glass/glass_database.h"
54 #include "backends/glass/glass_version.h"
55 #endif
57 #include "backends/multi/multi_database.h"
59 #include <xapian/constants.h>
60 #include <xapian/database.h>
61 #include <xapian/error.h>
63 using namespace std;
65 class CmpByFirstUsed {
66 const vector<pair<Xapian::docid, Xapian::docid> > & used_ranges;
68 public:
69 explicit
70 CmpByFirstUsed(const vector<pair<Xapian::docid, Xapian::docid> > & ur)
71 : used_ranges(ur) { }
73 bool operator()(size_t a, size_t b) const {
74 return used_ranges[a].first < used_ranges[b].first;
78 namespace Xapian {
80 Compactor::~Compactor() { }
82 void
83 Compactor::set_status(const string & table, const string & status)
85 (void)table;
86 (void)status;
89 string
90 Compactor::resolve_duplicate_metadata(const string & key,
91 size_t num_tags, const std::string tags[])
93 (void)key;
94 (void)num_tags;
95 return tags[0];
100 [[noreturn]]
101 static void
102 backend_mismatch(const Xapian::Database::Internal* db, int backend1,
103 const string &dbpath2, int backend2)
105 string dbpath1;
106 db->get_backend_info(&dbpath1);
107 string msg = "All databases must be the same type ('";
108 msg += dbpath1;
109 msg += "' is ";
110 msg += backend_name(backend1);
111 msg += ", but '";
112 msg += dbpath2;
113 msg += "' is ";
114 msg += backend_name(backend2);
115 msg += ')';
116 throw Xapian::InvalidArgumentError(msg);
119 namespace Xapian {
121 void
122 Database::compact_(const string * output_ptr, int fd, unsigned flags,
123 int block_size,
124 Xapian::Compactor * compactor) const
126 LOGCALL_VOID(API, "Database::compact_", output_ptr | fd | flags | block_size | compactor);
128 bool renumber = !(flags & DBCOMPACT_NO_RENUMBER);
130 enum { STUB_NO, STUB_FILE, STUB_DIR } compact_to_stub = STUB_NO;
131 string destdir;
132 if (output_ptr) {
133 // We need a modifiable destdir in this function.
134 destdir = *output_ptr;
135 if (!(flags & DBCOMPACT_SINGLE_FILE)) {
136 if (file_exists(destdir)) {
137 // Stub file.
138 compact_to_stub = STUB_FILE;
139 } else if (file_exists(destdir + "/XAPIANDB")) {
140 // Stub directory.
141 compact_to_stub = STUB_DIR;
144 } else {
145 // Single file is implied when writing to a file descriptor.
146 flags |= DBCOMPACT_SINGLE_FILE;
149 auto n_shards = internal->size();
150 Xapian::docid tot_off = 0;
151 Xapian::docid last_docid = 0;
153 vector<Xapian::docid> offset;
154 vector<pair<Xapian::docid, Xapian::docid> > used_ranges;
155 vector<const Xapian::Database::Internal*> internals;
156 offset.reserve(n_shards);
157 used_ranges.reserve(n_shards);
158 internals.reserve(n_shards);
160 if (n_shards > 1) {
161 auto multi_db = static_cast<MultiDatabase*>(internal.get());
162 for (auto&& db : multi_db->shards) {
163 internals.push_back(db);
165 } else {
166 internals.push_back(internal.get());
169 int backend = BACKEND_UNKNOWN;
170 for (auto&& shard : internals) {
171 string srcdir;
172 int type = shard->get_backend_info(&srcdir);
173 // Check destdir isn't the same as any source directory, unless it
174 // is a stub database.
175 if (!compact_to_stub && srcdir == destdir) {
176 throw InvalidArgumentError("destination may not be the same as "
177 "any source database, unless it is a "
178 "stub database");
180 switch (type) {
181 case BACKEND_GLASS:
182 if (backend != type && backend != BACKEND_UNKNOWN) {
183 backend_mismatch(internals[0], backend, srcdir, type);
185 backend = type;
186 break;
187 default:
188 throw DatabaseError("Only glass databases can be compacted");
191 Xapian::docid first = 0, last = 0;
193 // "Empty" databases might have spelling or synonym data so can't
194 // just be completely ignored.
195 Xapian::doccount num_docs = shard->get_doccount();
196 if (num_docs != 0) {
197 shard->get_used_docid_range(first, last);
199 if (renumber && first) {
200 // Prune any unused docids off the start of this source
201 // database.
203 // tot_off could wrap here, but it's unsigned, so that's
204 // OK.
205 tot_off -= (first - 1);
208 #ifdef XAPIAN_ASSERTIONS
209 PostList* pl = shard->open_post_list(string());
210 pl->next();
211 // This test should never fail, since shard->get_doccount() is
212 // non-zero!
213 Assert(!pl->at_end());
214 AssertEq(pl->get_docid(), first);
215 AssertRel(last,>=,first);
216 pl->skip_to(last);
217 Assert(!pl->at_end());
218 AssertEq(pl->get_docid(), last);
219 pl->next();
220 Assert(pl->at_end());
221 delete pl;
222 #endif
225 offset.push_back(tot_off);
226 if (renumber)
227 tot_off += last;
228 else if (last_docid < shard->get_lastdocid())
229 last_docid = shard->get_lastdocid();
230 used_ranges.push_back(make_pair(first, last));
233 if (renumber)
234 last_docid = tot_off;
236 if (!renumber && n_shards > 1) {
237 // We want to process the sources in ascending order of first
238 // docid. So we create a vector "order" with ascending integers
239 // and then sort so the indirected order is right.
240 vector<size_t> order;
241 order.reserve(n_shards);
242 for (size_t i = 0; i < n_shards; ++i)
243 order.push_back(i);
245 sort(order.begin(), order.end(), CmpByFirstUsed(used_ranges));
247 // Now use order to reorder internals to be in ascending order by first
248 // docid, and while we're at it check the ranges are disjoint.
249 vector<const Xapian::Database::Internal*> internals_;
250 internals_.reserve(n_shards);
251 vector<pair<Xapian::docid, Xapian::docid> > used_ranges_;
252 used_ranges_.reserve(n_shards);
254 Xapian::docid last_start = 0, last_end = 0;
255 for (size_t j = 0; j != order.size(); ++j) {
256 size_t n = order[j];
258 internals_.push_back(internals[n]);
259 used_ranges_.push_back(used_ranges[n]);
261 const pair<Xapian::docid, Xapian::docid> p = used_ranges[n];
262 // Skip empty databases.
263 if (p.first == 0 && p.second == 0)
264 continue;
265 // Check for overlap with the previous database's range.
266 if (p.first <= last_end) {
267 string tmp;
268 string msg = "when merging databases, --no-renumber is only currently supported if the databases have disjoint ranges of used document ids: ";
269 internals_[j - 1]->get_backend_info(&tmp);
270 msg += tmp;
271 msg += " has range ";
272 msg += str(last_start);
273 msg += '-';
274 msg += str(last_end);
275 msg += ", ";
276 internals_[j]->get_backend_info(&tmp);
277 msg += tmp;
278 msg += " has range ";
279 msg += str(p.first);
280 msg += '-';
281 msg += str(p.second);
282 throw Xapian::InvalidOperationError(msg);
284 last_start = p.first;
285 last_end = p.second;
288 swap(internals, internals_);
289 swap(used_ranges, used_ranges_);
292 string stub_file;
293 if (compact_to_stub) {
294 stub_file = destdir;
295 if (compact_to_stub == STUB_DIR) {
296 stub_file += "/XAPIANDB";
297 destdir += '/';
298 } else {
299 destdir += '_';
301 size_t sfx = destdir.size();
302 time_t now = time(NULL);
303 while (true) {
304 destdir.resize(sfx);
305 destdir += str(now++);
306 if (mkdir(destdir.c_str(), 0755) == 0)
307 break;
308 if (errno != EEXIST) {
309 string msg = destdir;
310 msg += ": mkdir failed";
311 throw Xapian::DatabaseError(msg, errno);
314 } else if (!(flags & Xapian::DBCOMPACT_SINGLE_FILE)) {
315 // If the destination database directory doesn't exist, create it.
316 if (mkdir(destdir.c_str(), 0755) < 0) {
317 // Check why mkdir failed. It's ok if the directory already
318 // exists, but we also get EEXIST if there's an existing file with
319 // that name.
320 if (errno == EEXIST) {
321 if (dir_exists(destdir))
322 errno = 0;
323 else
324 errno = EEXIST; // dir_exists() might have changed it
326 if (errno) {
327 string msg = destdir;
328 msg += ": cannot create directory";
329 throw Xapian::DatabaseError(msg, errno);
334 #if defined XAPIAN_HAS_GLASS_BACKEND
335 Xapian::Compactor::compaction_level compaction =
336 static_cast<Xapian::Compactor::compaction_level>(flags & (Xapian::Compactor::STANDARD|Xapian::Compactor::FULL|Xapian::Compactor::FULLER));
337 #else
338 (void)compactor;
339 (void)block_size;
340 #endif
342 if (backend == BACKEND_GLASS) {
343 #ifdef XAPIAN_HAS_GLASS_BACKEND
344 if (output_ptr) {
345 GlassDatabase::compact(compactor, destdir.c_str(), 0,
346 internals, offset,
347 block_size, compaction, flags, last_docid);
348 } else {
349 GlassDatabase::compact(compactor, NULL, fd,
350 internals, offset,
351 block_size, compaction, flags, last_docid);
353 #else
354 (void)fd;
355 (void)last_docid;
356 throw Xapian::FeatureUnavailableError("Glass backend disabled at build time");
357 #endif
360 if (compact_to_stub) {
361 string new_stub_file = destdir;
362 new_stub_file += "/new_stub.tmp";
364 ofstream new_stub(new_stub_file.c_str());
365 size_t slash = destdir.find_last_of(DIR_SEPS);
366 new_stub << "auto " << destdir.substr(slash + 1) << '\n';
368 if (!io_tmp_rename(new_stub_file, stub_file)) {
369 string msg = "Cannot rename '";
370 msg += new_stub_file;
371 msg += "' to '";
372 msg += stub_file;
373 msg += '\'';
374 throw Xapian::DatabaseError(msg, errno);