Factor out directory separator knowledge
[xapian.git] / xapian-core / examples / copydatabase.cc
blobf30e902238d685ba24f17684e2079360a20ba2c3
1 /** @file copydatabase.cc
2 * @brief Perform a document-by-document copy of one or more Xapian databases.
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2011 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include <xapian.h>
25 #include <initializer_list>
26 #include <iomanip>
27 #include <iostream>
29 #include <cmath> // For log10().
30 #include <cstdlib> // For exit().
31 #include <cstring> // For strcmp() and strrchr().
33 using namespace std;
35 #define PROG_NAME "copydatabase"
36 #define PROG_DESC "Perform a document-by-document copy of one or more Xapian databases"
38 static void
39 show_usage(int rc)
41 cout << "Usage: " PROG_NAME " SOURCE_DATABASE... DESTINATION_DATABASE\n\n"
42 "Options:\n"
43 " --no-renumber Preserve the numbering of document ids (useful if you have\n"
44 " external references to them, or have set them to match\n"
45 " unique ids from an external source). If multiple source\n"
46 " databases are specified and the same docid occurs in more\n"
47 " one, the last occurrence will be the one which ends up in\n"
48 " the destination database.\n"
49 " --help display this help and exit\n"
50 " --version output version information and exit" << endl;
51 exit(rc);
54 int
55 main(int argc, char **argv)
56 try {
57 bool renumber = true;
58 if (argc > 1 && argv[1][0] == '-') {
59 if (strcmp(argv[1], "--help") == 0) {
60 cout << PROG_NAME " - " PROG_DESC "\n\n";
61 show_usage(0);
63 if (strcmp(argv[1], "--version") == 0) {
64 cout << PROG_NAME " - " PACKAGE_STRING << endl;
65 exit(0);
67 if (strcmp(argv[1], "--no-renumber") == 0) {
68 renumber = false;
69 argv[1] = argv[0];
70 ++argv;
71 --argc;
75 // We expect two or more arguments: at least one source database path
76 // followed by the destination database path.
77 if (argc < 3) show_usage(1);
79 // Create the destination database, using DB_CREATE so that we don't
80 // try to overwrite or update an existing database in case the user
81 // got the command line argument order wrong.
82 const char *dest = argv[argc - 1];
83 Xapian::WritableDatabase db_out(dest, Xapian::DB_CREATE);
85 for (int i = 1; i < argc - 1; ++i) {
86 string src = argv[i];
87 if (!src.empty()) {
88 // Remove any trailing directory separator.
89 char& ch = src.back();
90 for (char dir_sep : DIR_SEPS_LIST) {
91 if (ch == dir_sep) {
92 ch = '\0';
93 break;
98 // Open the source database.
99 Xapian::Database db_in(src);
101 // Find the leaf-name of the database path for reporting progress.
103 // If we found a directory separator, + 1 advances to the next
104 // character; If we didn't, incrementing string::npos will give us 0,
105 // so we use the whole of src as the leaf-name.
106 const char * leaf = src.c_str() + (src.find_last_of(DIR_SEPS) + 1);
108 // Iterate over all the documents in db_in, copying each to db_out.
109 Xapian::doccount dbsize = db_in.get_doccount();
110 if (dbsize == 0) {
111 cout << leaf << ": empty!" << endl;
112 } else {
113 // Calculate how many decimal digits there are in dbsize.
114 int width = static_cast<int>(log10(double(dbsize))) + 1;
116 Xapian::doccount c = 0;
117 Xapian::PostingIterator it = db_in.postlist_begin(string());
118 while (it != db_in.postlist_end(string())) {
119 Xapian::docid did = *it;
120 if (renumber) {
121 db_out.add_document(db_in.get_document(did));
122 } else {
123 db_out.replace_document(did, db_in.get_document(did));
126 // Update for the first 10, and then every 13th document
127 // counting back from the end (this means that all the
128 // digits "rotate" and the counter ends up on the exact
129 // total.
130 ++c;
131 if (c <= 10 || (dbsize - c) % 13 == 0) {
132 cout << '\r' << leaf << ": ";
133 cout << setw(width) << c << '/' << dbsize << flush;
136 ++it;
139 cout << endl;
142 cout << "Copying spelling data..." << flush;
143 Xapian::TermIterator spellword = db_in.spellings_begin();
144 while (spellword != db_in.spellings_end()) {
145 db_out.add_spelling(*spellword, spellword.get_termfreq());
146 ++spellword;
148 cout << " done." << endl;
150 cout << "Copying synonym data..." << flush;
151 Xapian::TermIterator synkey = db_in.synonym_keys_begin();
152 while (synkey != db_in.synonym_keys_end()) {
153 string key = *synkey;
154 Xapian::TermIterator syn = db_in.synonyms_begin(key);
155 while (syn != db_in.synonyms_end(key)) {
156 db_out.add_synonym(key, *syn);
157 ++syn;
159 ++synkey;
161 cout << " done." << endl;
163 cout << "Copying user metadata..." << flush;
164 Xapian::TermIterator metakey = db_in.metadata_keys_begin();
165 while (metakey != db_in.metadata_keys_end()) {
166 string key = *metakey;
167 db_out.set_metadata(key, db_in.get_metadata(key));
168 ++metakey;
170 cout << " done." << endl;
173 cout << "Committing..." << flush;
174 // Commit explicitly so that any error is reported.
175 db_out.commit();
176 cout << " done." << endl;
177 } catch (const Xapian::Error & e) {
178 cerr << '\n' << argv[0] << ": " << e.get_description() << endl;
179 exit(1);