[honey] Improve spelling table key encoding
[xapian.git] / xapian-core / backends / dbfactory.cc
blob04bd95b6f9eb4fb8eee70b9f418d021d4b7b1aec
1 /** @file dbfactory.cc
2 * @brief Database factories for non-remote databases.
3 */
4 /* Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <config.h>
25 #include "xapian/dbfactory.h"
27 #include "xapian/constants.h"
28 #include "xapian/database.h"
29 #include "xapian/error.h"
30 #include "xapian/version.h" // For XAPIAN_HAS_XXX_BACKEND.
32 #include "backends.h"
33 #include "debuglog.h"
34 #include "filetests.h"
35 #include "fileutils.h"
36 #include "posixy_wrapper.h"
37 #include "str.h"
39 #include "safeerrno.h"
40 #include <cstdlib> // For atoi().
42 #ifdef XAPIAN_HAS_GLASS_BACKEND
43 # include "glass/glass_database.h"
44 # include "glass/glass_defs.h"
45 #endif
46 #ifdef XAPIAN_HAS_HONEY_BACKEND
47 # include "honey/honey_database.h"
48 # include "honey/honey_defs.h"
49 #endif
50 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
51 # include "inmemory/inmemory_database.h"
52 #endif
53 // Even if none of the above get included, we still need a definition of
54 // Database::Internal.
55 #include "backends/databaseinternal.h"
57 #include <fstream>
58 #include <string>
60 using namespace std;
62 /** Return a BACKEND_* constant from backends.h.
64 * BACKEND_UNKNOWN : stub file
65 * BACKEND_GLASS : glass single file
66 * BACKEND_HONEY : honey single file
68 static int
69 check_if_single_file_db(const struct stat & sb, const string & path,
70 int * fd_ptr = NULL)
72 #if defined XAPIAN_HAS_GLASS_BACKEND || \
73 defined XAPIAN_HAS_HONEY_BACKEND
74 if (!S_ISREG(sb.st_mode)) return BACKEND_UNKNOWN;
75 // Look at the size as a clue - if it's 0 or not a multiple of
76 // GLASS_MIN_BLOCKSIZE, then it's not a single-file glass database, and
77 // not a honey single-file database either, as we pad those to a multiple
78 // of GLASS_MIN_BLOCKSIZE too. Otherwise peek at the start of the file to
79 // determine which it is.
80 if (sb.st_size == 0 || sb.st_size % GLASS_MIN_BLOCKSIZE != 0)
81 // Look at the size as a clue - if it's less than GLASS_MIN_BLOCKSIZE, then
82 // it's not a single-file glass database and too small to be a honey one
83 // If it is, peek at the start of the file to determine what it is.
84 if (sb.st_size < GLASS_MIN_BLOCKSIZE)
85 return false;
86 int fd = posixy_open(path.c_str(), O_RDONLY|O_BINARY);
87 if (fd != -1) {
88 char magic_buf[14];
89 // FIXME: Don't duplicate magic check here...
90 if (io_read(fd, magic_buf, 14) == 14 &&
91 lseek(fd, 0, SEEK_SET) == 0 &&
92 memcmp(magic_buf, "\x0f\x0dXapian ", 9) == 0) {
93 if (!fd_ptr)
94 ::close(fd);
95 switch (magic_buf[9]) {
96 case 'G':
97 if (memcmp(magic_buf + 10, "lass", 4) == 0) {
98 if (fd_ptr)
99 *fd_ptr = fd;
100 return BACKEND_GLASS;
102 break;
103 case 'H':
104 if (memcmp(magic_buf + 10, "oney", 4) == 0) {
105 if (fd_ptr)
106 *fd_ptr = fd;
107 return BACKEND_HONEY;
109 break;
111 if (fd_ptr)
112 ::close(fd);
113 return BACKEND_UNKNOWN;
115 ::close(fd);
117 #else
118 (void)sb;
119 (void)path;
120 (void)fd_ptr;
121 #endif
122 return BACKEND_UNKNOWN;
125 namespace Xapian {
127 static void
128 open_stub(Database &db, const string &file)
130 // A stub database is a text file with one or more lines of this format:
131 // <dbtype> <serialised db object>
133 // Lines which start with a "#" character are ignored.
135 // Any paths specified in stub database files which are relative will be
136 // considered to be relative to the directory containing the stub database.
137 ifstream stub(file.c_str());
138 if (!stub) {
139 string msg = "Couldn't open stub database file: ";
140 msg += file;
141 throw Xapian::DatabaseOpeningError(msg, errno);
143 string line;
144 unsigned int line_no = 0;
145 while (getline(stub, line)) {
146 ++line_no;
147 if (line.empty() || line[0] == '#')
148 continue;
149 string::size_type space = line.find(' ');
150 if (space == string::npos) space = line.size();
152 string type(line, 0, space);
153 line.erase(0, space + 1);
155 if (type == "auto") {
156 resolve_relative_path(line, file);
157 db.add_database(Database(line));
158 continue;
161 if (type == "glass") {
162 #ifdef XAPIAN_HAS_GLASS_BACKEND
163 resolve_relative_path(line, file);
164 db.add_database(Database(new GlassDatabase(line)));
165 continue;
166 #else
167 throw FeatureUnavailableError("Glass backend disabled");
168 #endif
171 if (type == "honey") {
172 #ifdef XAPIAN_HAS_HONEY_BACKEND
173 resolve_relative_path(line, file);
174 db.add_database(Database(new HoneyDatabase(line)));
175 continue;
176 #else
177 throw FeatureUnavailableError("Honey backend disabled");
178 #endif
181 if (type == "remote" && !line.empty()) {
182 #ifdef XAPIAN_HAS_REMOTE_BACKEND
183 if (line[0] == ':') {
184 // prog
185 // FIXME: timeouts
186 // Is it a security risk?
187 space = line.find(' ');
188 string args;
189 if (space != string::npos) {
190 args.assign(line, space + 1, string::npos);
191 line.assign(line, 1, space - 1);
192 } else {
193 line.erase(0, 1);
195 db.add_database(Remote::open(line, args));
196 continue;
198 string::size_type colon = line.rfind(':');
199 if (colon != string::npos) {
200 // tcp
201 // FIXME: timeouts
202 // Avoid misparsing an IPv6 address without a port number. The
203 // port number is required, so just leave that case to the
204 // error handling further below.
205 if (!(line[0] == '[' && line.back() == ']')) {
206 unsigned int port = atoi(line.c_str() + colon + 1);
207 line.erase(colon);
208 if (line[0] == '[' && line.back() == ']') {
209 line.erase(line.size() - 1, 1);
210 line.erase(0, 1);
212 db.add_database(Remote::open(line, port));
213 continue;
216 #else
217 throw FeatureUnavailableError("Remote backend disabled");
218 #endif
221 if (type == "inmemory" && line.empty()) {
222 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
223 db.add_database(Database(string(), DB_BACKEND_INMEMORY));
224 continue;
225 #else
226 throw FeatureUnavailableError("Inmemory backend disabled");
227 #endif
230 if (type == "chert") {
231 throw FeatureUnavailableError("Chert backend no longer supported");
234 if (type == "flint") {
235 throw FeatureUnavailableError("Flint backend no longer supported");
238 // Don't include the line itself - that might help an attacker
239 // by revealing part of a sensitive file's contents if they can
240 // arrange for it to be read as a stub database via infelicities in
241 // an application which uses Xapian. The line number is enough
242 // information to identify the problem line.
243 throw DatabaseOpeningError(file + ':' + str(line_no) + ": Bad line");
246 // Allowing a stub database with no databases listed allows things like
247 // a "search all databases" feature to be implemented by generating a
248 // stub database file without having to special case there not being any
249 // databases yet.
251 // 1.0.x throws DatabaseOpeningError here, but with a "Bad line" message
252 // with the line number just past the end of the file, which is a bit odd.
255 static void
256 open_stub(WritableDatabase &db, const string &file, int flags)
258 // A stub database is a text file with one or more lines of this format:
259 // <dbtype> <serialised db object>
261 // Lines which start with a "#" character, and lines which have no spaces
262 // in them, are ignored.
264 // Any paths specified in stub database files which are relative will be
265 // considered to be relative to the directory containing the stub database.
266 ifstream stub(file.c_str());
267 if (!stub) {
268 string msg = "Couldn't open stub database file: ";
269 msg += file;
270 throw Xapian::DatabaseOpeningError(msg, errno);
272 string line;
273 unsigned int line_no = 0;
274 while (true) {
275 if (!getline(stub, line)) break;
277 ++line_no;
278 if (line.empty() || line[0] == '#')
279 continue;
280 string::size_type space = line.find(' ');
281 if (space == string::npos) space = line.size();
283 string type(line, 0, space);
284 line.erase(0, space + 1);
286 if (type == "auto") {
287 resolve_relative_path(line, file);
288 db.add_database(WritableDatabase(line, flags));
289 continue;
292 if (type == "glass") {
293 #ifdef XAPIAN_HAS_GLASS_BACKEND
294 resolve_relative_path(line, file);
295 db.add_database(WritableDatabase(line, flags|DB_BACKEND_GLASS));
296 continue;
297 #else
298 throw FeatureUnavailableError("Glass backend disabled");
299 #endif
302 if (type == "remote" && !line.empty()) {
303 #ifdef XAPIAN_HAS_REMOTE_BACKEND
304 if (line[0] == ':') {
305 // prog
306 // FIXME: timeouts
307 // Is it a security risk?
308 space = line.find(' ');
309 string args;
310 if (space != string::npos) {
311 args.assign(line, space + 1, string::npos);
312 line.assign(line, 1, space - 1);
313 } else {
314 line.erase(0, 1);
316 db.add_database(Remote::open_writable(line, args, 0, flags));
317 continue;
319 string::size_type colon = line.rfind(':');
320 if (colon != string::npos) {
321 // tcp
322 // FIXME: timeouts
323 // Avoid misparsing an IPv6 address without a port number. The
324 // port number is required, so just leave that case to the
325 // error handling further below.
326 if (!(line[0] == '[' && line.back() == ']')) {
327 unsigned int port = atoi(line.c_str() + colon + 1);
328 line.erase(colon);
329 if (line[0] == '[' && line.back() == ']') {
330 line.erase(line.size() - 1, 1);
331 line.erase(0, 1);
333 db.add_database(Remote::open_writable(line, port, 0, 10000, flags));
334 continue;
337 #else
338 throw FeatureUnavailableError("Remote backend disabled");
339 #endif
342 if (type == "inmemory" && line.empty()) {
343 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
344 db.add_database(WritableDatabase(string(), DB_BACKEND_INMEMORY));
345 continue;
346 #else
347 throw FeatureUnavailableError("Inmemory backend disabled");
348 #endif
351 if (type == "chert") {
352 throw FeatureUnavailableError("Chert backend no longer supported");
355 if (type == "flint") {
356 throw FeatureUnavailableError("Flint backend no longer supported");
359 // Don't include the line itself - that might help an attacker
360 // by revealing part of a sensitive file's contents if they can
361 // arrange for it to be read as a stub database via infelicities in
362 // an application which uses Xapian. The line number is enough
363 // information to identify the problem line.
364 throw DatabaseOpeningError(file + ':' + str(line_no) + ": Bad line");
367 if (db.internal->size() == 0) {
368 throw DatabaseOpeningError(file + ": No databases listed");
372 Database::Database(const string& path, int flags)
373 : Database()
375 LOGCALL_CTOR(API, "Database", path|flags);
377 int type = flags & DB_BACKEND_MASK_;
378 switch (type) {
379 case DB_BACKEND_CHERT:
380 throw FeatureUnavailableError("Chert backend no longer supported");
381 case DB_BACKEND_GLASS:
382 #ifdef XAPIAN_HAS_GLASS_BACKEND
383 internal = new GlassDatabase(path);
384 return;
385 #else
386 throw FeatureUnavailableError("Glass backend disabled");
387 #endif
388 case DB_BACKEND_HONEY:
389 #ifdef XAPIAN_HAS_HONEY_BACKEND
390 internal = new HoneyDatabase(path);
391 return;
392 #else
393 throw FeatureUnavailableError("Honey backend disabled");
394 #endif
395 case DB_BACKEND_STUB:
396 open_stub(*this, path);
397 return;
398 case DB_BACKEND_INMEMORY:
399 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
400 internal = new InMemoryDatabase();
401 return;
402 #else
403 throw FeatureUnavailableError("Inmemory backend disabled");
404 #endif
407 struct stat statbuf;
408 if (stat(path.c_str(), &statbuf) == -1) {
409 throw DatabaseOpeningError("Couldn't stat '" + path + "'", errno);
412 if (S_ISREG(statbuf.st_mode)) {
413 // Could be a stub database file, or a single file glass database.
414 int fd;
415 switch (check_if_single_file_db(statbuf, path, &fd)) {
416 case BACKEND_GLASS:
417 #ifdef XAPIAN_HAS_GLASS_BACKEND
418 // Single file glass format.
419 internal = new GlassDatabase(fd);
420 return;
421 #else
422 throw FeatureUnavailableError("Glass backend disabled");
423 #endif
424 case BACKEND_HONEY:
425 #ifdef XAPIAN_HAS_HONEY_BACKEND
426 // Single file honey format.
427 internal = new HoneyDatabase(fd);
428 return;
429 #else
430 throw FeatureUnavailableError("Honey backend disabled");
431 #endif
434 open_stub(*this, path);
435 return;
438 if (rare(!S_ISDIR(statbuf.st_mode))) {
439 throw DatabaseOpeningError("Not a regular file or directory: '" + path + "'");
442 #ifdef XAPIAN_HAS_GLASS_BACKEND
443 if (file_exists(path + "/iamglass")) {
444 internal = new GlassDatabase(path);
445 return;
447 #endif
449 #ifdef XAPIAN_HAS_HONEY_BACKEND
450 if (file_exists(path + "/iamhoney")) {
451 internal = new HoneyDatabase(path);
452 return;
454 #endif
456 // Check for "stub directories".
457 string stub_file = path;
458 stub_file += "/XAPIANDB";
459 if (usual(file_exists(stub_file))) {
460 open_stub(*this, stub_file);
461 return;
464 #ifndef XAPIAN_HAS_GLASS_BACKEND
465 if (file_exists(path + "/iamglass")) {
466 throw FeatureUnavailableError("Glass backend disabled");
468 #endif
469 #ifndef XAPIAN_HAS_HONEY_BACKEND
470 if (file_exists(path + "/iamhoney")) {
471 throw FeatureUnavailableError("Honey backend disabled");
473 #endif
474 if (file_exists(path + "/iamchert")) {
475 throw FeatureUnavailableError("Chert backend no longer supported");
477 if (file_exists(path + "/iamflint")) {
478 throw FeatureUnavailableError("Flint backend no longer supported");
481 throw DatabaseOpeningError("Couldn't detect type of database");
484 /** Helper factory function.
486 * This allows us to initialise Database::internal via the constructor's
487 * initialiser list, which we want to be able to do as Database::internal
488 * is an intrusive_ptr_nonnull, so we can't set it to NULL in the initialiser
489 * list and then fill it in later in the constructor body.
491 static Database::Internal*
492 database_factory(int fd, int flags)
494 if (rare(fd < 0))
495 throw InvalidArgumentError("fd < 0");
497 #ifdef XAPIAN_HAS_GLASS_BACKEND
498 int type = flags & DB_BACKEND_MASK_;
499 switch (type) {
500 case 0:
501 case DB_BACKEND_GLASS:
502 return new GlassDatabase(fd);
504 #else
505 (void)flags;
506 #endif
508 (void)::close(fd);
509 throw DatabaseOpeningError("Couldn't detect type of database");
512 Database::Database(int fd, int flags)
513 : internal(database_factory(fd, flags))
515 LOGCALL_CTOR(API, "Database", fd|flags);
518 #if defined XAPIAN_HAS_GLASS_BACKEND
519 #define HAVE_DISK_BACKEND
520 #endif
522 WritableDatabase::WritableDatabase(const std::string &path, int flags, int block_size)
523 : Database()
525 LOGCALL_CTOR(API, "WritableDatabase", path|flags|block_size);
526 // Avoid warning if all disk-based backends are disabled.
527 (void)block_size;
528 int type = flags & DB_BACKEND_MASK_;
529 // Clear the backend bits, so we just pass on other flags to open_stub, etc.
530 flags &= ~DB_BACKEND_MASK_;
531 if (type == 0) {
532 struct stat statbuf;
533 if (stat(path.c_str(), &statbuf) == -1) {
534 // ENOENT probably just means that we need to create the directory.
535 if (errno != ENOENT)
536 throw DatabaseOpeningError("Couldn't stat '" + path + "'", errno);
537 } else {
538 // File or directory already exists.
540 if (S_ISREG(statbuf.st_mode)) {
541 // The path is a file, so assume it is a stub database file.
542 open_stub(*this, path, flags);
543 return;
546 if (rare(!S_ISDIR(statbuf.st_mode))) {
547 throw DatabaseOpeningError("Not a regular file or directory: '" + path + "'");
550 if (file_exists(path + "/iamglass")) {
551 // Existing glass DB.
552 #ifdef XAPIAN_HAS_GLASS_BACKEND
553 type = DB_BACKEND_GLASS;
554 #else
555 throw FeatureUnavailableError("Glass backend disabled");
556 #endif
557 } else if (file_exists(path + "/iamhoney")) {
558 // Existing honey DB.
559 throw InvalidOperationError("Honey backend doesn't support "
560 "updating existing databases");
561 } else if (file_exists(path + "/iamchert")) {
562 // Existing chert DB.
563 throw FeatureUnavailableError("Chert backend no longer supported");
564 } else if (file_exists(path + "/iamflint")) {
565 // Existing flint DB.
566 throw FeatureUnavailableError("Flint backend no longer supported");
567 } else {
568 // Check for "stub directories".
569 string stub_file = path;
570 stub_file += "/XAPIANDB";
571 if (usual(file_exists(stub_file))) {
572 open_stub(*this, stub_file, flags);
573 return;
579 switch (type) {
580 case DB_BACKEND_STUB:
581 open_stub(*this, path, flags);
582 return;
583 case 0:
584 // Fall through to first enabled case, so order the remaining cases
585 // by preference.
586 #ifdef XAPIAN_HAS_GLASS_BACKEND
587 case DB_BACKEND_GLASS:
588 internal = new GlassWritableDatabase(path, flags, block_size);
589 return;
590 #endif
591 case DB_BACKEND_HONEY:
592 throw InvalidArgumentError("Honey backend doesn't support "
593 "updating existing databases");
594 case DB_BACKEND_CHERT:
595 throw FeatureUnavailableError("Chert backend no longer supported");
596 case DB_BACKEND_INMEMORY:
597 #ifdef XAPIAN_HAS_INMEMORY_BACKEND
598 internal = new InMemoryDatabase();
599 return;
600 #else
601 throw FeatureUnavailableError("Inmemory backend disabled");
602 #endif
604 #ifndef HAVE_DISK_BACKEND
605 throw FeatureUnavailableError("No disk-based writable backend is enabled");
606 #endif