Make all read-only data arrays static and const
[xapian.git] / xapian-core / examples / quest.cc
blob440adb025227caadf068d1f90b80f357e1eda9fa
1 /* quest.cc - Command line search tool using Xapian::QueryParser.
3 * Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2012,2013,2014,2016 Olly Betts
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
18 * USA
21 #include <config.h>
23 #include <xapian.h>
25 #include <cstdlib>
26 #include <cstring>
28 #include <algorithm>
29 #include <iostream>
31 #include "gnu_getopt.h"
33 using namespace std;
35 #define PROG_NAME "quest"
36 #define PROG_DESC "Xapian command line search tool"
38 // Stopwords:
39 static const char * const sw[] = {
40 "a", "about", "an", "and", "are", "as", "at",
41 "be", "by",
42 "en",
43 "for", "from",
44 "how",
45 "i", "in", "is", "it",
46 "of", "on", "or",
47 "that", "the", "this", "to",
48 "was", "what", "when", "where", "which", "who", "why", "will", "with"
51 struct qp_flag { const char * s; unsigned f; };
52 static const qp_flag flag_tab[] = {
53 { "auto_multiword_synonyms", Xapian::QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS },
54 { "auto_synonyms", Xapian::QueryParser::FLAG_AUTO_SYNONYMS },
55 { "boolean", Xapian::QueryParser::FLAG_BOOLEAN },
56 { "boolean_any_case", Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE },
57 { "cjk_ngram", Xapian::QueryParser::FLAG_CJK_NGRAM },
58 { "default", Xapian::QueryParser::FLAG_DEFAULT },
59 { "lovehate", Xapian::QueryParser::FLAG_LOVEHATE },
60 { "partial", Xapian::QueryParser::FLAG_PARTIAL },
61 { "phrase", Xapian::QueryParser::FLAG_PHRASE },
62 { "pure_not", Xapian::QueryParser::FLAG_PURE_NOT },
63 { "spelling_correction", Xapian::QueryParser::FLAG_SPELLING_CORRECTION },
64 { "synonym", Xapian::QueryParser::FLAG_SYNONYM },
65 { "wildcard", Xapian::QueryParser::FLAG_WILDCARD }
67 const int n_flag_tab = sizeof(flag_tab) / sizeof(flag_tab[0]);
69 inline bool operator<(const qp_flag & f1, const qp_flag & f2) {
70 return strcmp(f1.s, f2.s) < 0;
73 struct qp_op { const char * s; unsigned f; };
74 static const qp_op op_tab[] = {
75 { "and", Xapian::Query::OP_AND },
76 { "elite_set", Xapian::Query::OP_ELITE_SET },
77 { "max", Xapian::Query::OP_MAX },
78 { "near", Xapian::Query::OP_NEAR },
79 { "or", Xapian::Query::OP_OR },
80 { "phrase", Xapian::Query::OP_PHRASE },
81 { "synonym", Xapian::Query::OP_SYNONYM }
83 const int n_op_tab = sizeof(op_tab) / sizeof(op_tab[0]);
85 inline bool operator<(const qp_op & f1, const qp_op & f2) {
86 return strcmp(f1.s, f2.s) < 0;
89 enum {
90 WEIGHT_BB2,
91 WEIGHT_BM25,
92 WEIGHT_BM25PLUS,
93 WEIGHT_BOOL,
94 WEIGHT_DLH,
95 WEIGHT_DPH,
96 WEIGHT_IFB2,
97 WEIGHT_INEB2,
98 WEIGHT_INL2,
99 WEIGHT_LM,
100 WEIGHT_PL2,
101 WEIGHT_PL2PLUS,
102 WEIGHT_TFIDF,
103 WEIGHT_TRAD
106 struct wt { const char * s; int f; };
107 static const wt wt_tab[] = {
108 { "bb2", WEIGHT_BB2 },
109 { "bm25", WEIGHT_BM25 },
110 { "bm25+", WEIGHT_BM25PLUS },
111 { "bool", WEIGHT_BOOL },
112 { "dlh", WEIGHT_DLH },
113 { "dph", WEIGHT_DPH },
114 { "ifb2", WEIGHT_IFB2 },
115 { "ineb2", WEIGHT_INEB2 },
116 { "inl2", WEIGHT_INL2 },
117 { "lm", WEIGHT_LM },
118 { "pl2", WEIGHT_PL2 },
119 { "pl2+", WEIGHT_PL2PLUS },
120 { "tfidf", WEIGHT_TFIDF },
121 { "trad", WEIGHT_TRAD }
123 const int n_wt_tab = sizeof(wt_tab) / sizeof(wt_tab[0]);
125 inline bool operator<(const wt & f1, const wt & f2) {
126 return strcmp(f1.s, f2.s) < 0;
129 static void show_usage() {
130 cout << "Usage: " PROG_NAME " [OPTIONS] 'QUERY'\n"
131 "NB: QUERY should be quoted to protect it from the shell.\n\n"
132 "Options:\n"
133 " -d, --db=DIRECTORY database to search (multiple databases may\n"
134 " be specified)\n"
135 " -m, --msize=MSIZE maximum number of matches to return\n"
136 " -c, --check-at-least=HOWMANY minimum number of matches to check\n"
137 " -s, --stemmer=LANG set the stemming language, the default is\n"
138 " 'english' (pass 'none' to disable stemming)\n"
139 " -p, --prefix=PFX:TERMPFX add a prefix\n"
140 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n"
141 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags. Valid flags:";
142 #define INDENT \
144 int pos = 256;
145 for (const qp_flag * i = flag_tab; i - flag_tab < n_flag_tab; ++i) {
146 size_t len = strlen(i->s);
147 if (pos < 256) cout << ',';
148 if (pos + len >= 78) {
149 cout << "\n" INDENT;
150 pos = sizeof(INDENT) - 2;
151 } else {
152 cout << ' ';
154 cout << i->s;
155 pos += len + 2;
157 cout << "\n"
158 " -o, --default-op=OP specify QueryParser default operator\n"
159 " (default: or). Valid operators:";
160 pos = 256;
161 for (const qp_op * i = op_tab; i - op_tab < n_op_tab; ++i) {
162 size_t len = strlen(i->s);
163 if (pos < 256) cout << ',';
164 if (pos + len >= 78) {
165 cout << "\n" INDENT;
166 pos = sizeof(INDENT) - 2;
167 } else {
168 cout << ' ';
170 cout << i->s;
171 pos += len + 2;
173 cout << "\n"
174 " -w, --weight=SCHEME specify weighting scheme to use\n"
175 " (default: bm25). Valid schemes:";
176 pos = 256;
177 for (const wt * i = wt_tab; i - wt_tab < n_wt_tab; ++i) {
178 size_t len = strlen(i->s);
179 if (pos < 256) cout << ',';
180 if (pos + len >= 78) {
181 cout << "\n" INDENT;
182 pos = sizeof(INDENT) - 2;
183 } else {
184 cout << ' ';
186 cout << i->s;
187 pos += len + 2;
189 cout << "\n"
190 " -h, --help display this help and exit\n"
191 " -v, --version output version information and exit\n";
194 static unsigned
195 decode_qp_flag(const char * s)
197 qp_flag f;
198 f.s = s;
199 const qp_flag * p = lower_bound(flag_tab, flag_tab + n_flag_tab, f);
200 if (p == flag_tab + n_flag_tab || f < *p)
201 return 0;
202 return p->f;
205 static int
206 decode_qp_op(const char * s)
208 qp_op f;
209 f.s = s;
210 const qp_op * p = lower_bound(op_tab, op_tab + n_op_tab, f);
211 if (p == op_tab + n_op_tab || f < *p)
212 return -1;
213 return p->f;
216 static int
217 decode_wt(const char * s)
219 wt f;
220 f.s = s;
221 const wt * p = lower_bound(wt_tab, wt_tab + n_wt_tab, f);
222 if (p == wt_tab + n_wt_tab || f < *p)
223 return -1;
224 return p->f;
228 main(int argc, char **argv)
229 try {
230 const char * opts = "d:m:c:s:p:b:f:o:w:hv";
231 static const struct option long_opts[] = {
232 { "db", required_argument, 0, 'd' },
233 { "msize", required_argument, 0, 'm' },
234 { "check-at-least", required_argument, 0, 'c' },
235 { "stemmer", required_argument, 0, 's' },
236 { "prefix", required_argument, 0, 'p' },
237 { "boolean-prefix", required_argument, 0, 'b' },
238 { "flags", required_argument, 0, 'f' },
239 { "default-op", required_argument, 0, 'o' },
240 { "weight", required_argument, 0, 'w' },
241 { "help", no_argument, 0, 'h' },
242 { "version", no_argument, 0, 'v' },
243 { NULL, 0, 0, 0}
246 Xapian::SimpleStopper mystopper(sw, sw + sizeof(sw) / sizeof(sw[0]));
247 Xapian::Stem stemmer("english");
248 Xapian::doccount msize = 10;
249 Xapian::doccount check_at_least = 0;
251 bool have_database = false;
253 Xapian::Database db;
254 Xapian::QueryParser parser;
255 unsigned flags = parser.FLAG_DEFAULT|parser.FLAG_SPELLING_CORRECTION;
256 int weight = -1;
258 int c;
259 while ((c = gnu_getopt_long(argc, argv, opts, long_opts, 0)) != -1) {
260 switch (c) {
261 case 'm': {
262 char * p;
263 unsigned long v = strtoul(optarg, &p, 10);
264 msize = static_cast<Xapian::doccount>(v);
265 if (*p || v != msize) {
266 cerr << PROG_NAME": Bad value '" << optarg
267 << "' passed for msize" << endl;
268 exit(1);
270 break;
272 case 'c': {
273 char * p;
274 unsigned long v = strtoul(optarg, &p, 10);
275 check_at_least = static_cast<Xapian::doccount>(v);
276 if (*p || v != check_at_least) {
277 cerr << PROG_NAME": Bad value '" << optarg
278 << "' passed for check_at_least " << endl;
279 exit(1);
281 break;
283 case 'd':
284 db.add_database(Xapian::Database(optarg));
285 have_database = true;
286 break;
287 case 's':
288 try {
289 stemmer = Xapian::Stem(optarg);
290 } catch (const Xapian::InvalidArgumentError &) {
291 cerr << "Unknown stemming language '" << optarg << "'.\n"
292 "Available language names are: "
293 << Xapian::Stem::get_available_languages() << endl;
294 exit(1);
296 break;
297 case 'b': case 'p': {
298 const char * colon = strchr(optarg, ':');
299 if (colon == NULL) {
300 cerr << argv[0] << ": need ':' when setting prefix" << endl;
301 exit(1);
303 string prefix(optarg, colon - optarg);
304 string termprefix(colon + 1);
305 if (c == 'b') {
306 parser.add_boolean_prefix(prefix, termprefix);
307 } else {
308 parser.add_prefix(prefix, termprefix);
310 break;
312 case 'f':
313 flags = 0;
314 do {
315 char * comma = strchr(optarg, ',');
316 if (comma)
317 *comma++ = '\0';
318 unsigned flag = decode_qp_flag(optarg);
319 if (flag == 0) {
320 cerr << "Unknown flag '" << optarg << "'" << endl;
321 exit(1);
323 flags |= flag;
324 optarg = comma;
325 } while (optarg);
326 break;
327 case 'o': {
328 int op = decode_qp_op(optarg);
329 if (op < 0) {
330 cerr << "Unknown op '" << optarg << "'" << endl;
331 exit(1);
333 parser.set_default_op(static_cast<Xapian::Query::op>(op));
334 break;
336 case 'w': {
337 weight = decode_wt(optarg);
338 if (weight < 0) {
339 cerr << "Unknown weighting scheme '" << optarg << "'" << endl;
340 exit(1);
342 break;
344 case 'v':
345 cout << PROG_NAME " - " PACKAGE_STRING << endl;
346 exit(0);
347 case 'h':
348 cout << PROG_NAME " - " PROG_DESC "\n\n";
349 show_usage();
350 exit(0);
351 case ':': // missing parameter
352 case '?': // unknown option
353 show_usage();
354 exit(1);
358 if (argc - optind != 1) {
359 show_usage();
360 exit(1);
363 parser.set_database(db);
364 parser.set_stemmer(stemmer);
365 parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
366 parser.set_stopper(&mystopper);
368 Xapian::Query query = parser.parse_query(argv[optind], flags);
369 const string & correction = parser.get_corrected_query_string();
370 if (!correction.empty())
371 cout << "Did you mean: " << correction << "\n\n";
373 cout << "Parsed Query: " << query.get_description() << endl;
375 if (!have_database) {
376 cout << "No database specified so not running the query." << endl;
377 exit(0);
380 Xapian::Enquire enquire(db);
381 enquire.set_query(query);
383 switch (weight) {
384 case WEIGHT_BB2:
385 enquire.set_weighting_scheme(Xapian::BB2Weight());
386 break;
387 case WEIGHT_BOOL:
388 enquire.set_weighting_scheme(Xapian::BoolWeight());
389 break;
390 case WEIGHT_BM25:
391 enquire.set_weighting_scheme(Xapian::BM25Weight());
392 break;
393 case WEIGHT_BM25PLUS:
394 enquire.set_weighting_scheme(Xapian::BM25PlusWeight());
395 break;
396 case WEIGHT_DLH:
397 enquire.set_weighting_scheme(Xapian::DLHWeight());
398 break;
399 case WEIGHT_DPH:
400 enquire.set_weighting_scheme(Xapian::DPHWeight());
401 break;
402 case WEIGHT_IFB2:
403 enquire.set_weighting_scheme(Xapian::IfB2Weight());
404 break;
405 case WEIGHT_INEB2:
406 enquire.set_weighting_scheme(Xapian::IneB2Weight());
407 break;
408 case WEIGHT_INL2:
409 enquire.set_weighting_scheme(Xapian::InL2Weight());
410 break;
411 case WEIGHT_LM:
412 enquire.set_weighting_scheme(Xapian::LMWeight());
413 break;
414 case WEIGHT_PL2:
415 enquire.set_weighting_scheme(Xapian::PL2Weight());
416 break;
417 case WEIGHT_PL2PLUS:
418 enquire.set_weighting_scheme(Xapian::PL2PlusWeight());
419 break;
420 case WEIGHT_TFIDF:
421 enquire.set_weighting_scheme(Xapian::TfIdfWeight());
422 break;
423 case WEIGHT_TRAD:
424 enquire.set_weighting_scheme(Xapian::TradWeight());
425 break;
428 Xapian::MSet mset = enquire.get_mset(0, msize, check_at_least);
430 cout << "MSet:" << endl;
431 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
432 Xapian::Document doc = i.get_document();
433 string data = doc.get_data();
434 cout << *i << ": [" << i.get_weight() << "]\n" << data << "\n";
436 cout << flush;
437 } catch (const Xapian::QueryParserError & e) {
438 cout << "Couldn't parse query: " << e.get_msg() << endl;
439 exit(1);
440 } catch (const Xapian::Error & err) {
441 cout << err.get_description() << endl;
442 exit(1);