1 /* quest.cc - Command line search tool using Xapian::QueryParser.
3 * Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2012,2013,2014,2016 Olly Betts
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation; either version 2 of the
8 * License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
31 #include "gnu_getopt.h"
35 #define PROG_NAME "quest"
36 #define PROG_DESC "Xapian command line search tool"
39 static const char * const sw
[] = {
40 "a", "about", "an", "and", "are", "as", "at",
45 "i", "in", "is", "it",
47 "that", "the", "this", "to",
48 "was", "what", "when", "where", "which", "who", "why", "will", "with"
51 struct qp_flag
{ const char * s
; unsigned f
; };
52 static const qp_flag flag_tab
[] = {
53 { "auto_multiword_synonyms", Xapian::QueryParser::FLAG_AUTO_MULTIWORD_SYNONYMS
},
54 { "auto_synonyms", Xapian::QueryParser::FLAG_AUTO_SYNONYMS
},
55 { "boolean", Xapian::QueryParser::FLAG_BOOLEAN
},
56 { "boolean_any_case", Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
},
57 { "cjk_ngram", Xapian::QueryParser::FLAG_CJK_NGRAM
},
58 { "default", Xapian::QueryParser::FLAG_DEFAULT
},
59 { "lovehate", Xapian::QueryParser::FLAG_LOVEHATE
},
60 { "partial", Xapian::QueryParser::FLAG_PARTIAL
},
61 { "phrase", Xapian::QueryParser::FLAG_PHRASE
},
62 { "pure_not", Xapian::QueryParser::FLAG_PURE_NOT
},
63 { "spelling_correction", Xapian::QueryParser::FLAG_SPELLING_CORRECTION
},
64 { "synonym", Xapian::QueryParser::FLAG_SYNONYM
},
65 { "wildcard", Xapian::QueryParser::FLAG_WILDCARD
}
67 const int n_flag_tab
= sizeof(flag_tab
) / sizeof(flag_tab
[0]);
69 inline bool operator<(const qp_flag
& f1
, const qp_flag
& f2
) {
70 return strcmp(f1
.s
, f2
.s
) < 0;
73 struct qp_op
{ const char * s
; unsigned f
; };
74 static const qp_op op_tab
[] = {
75 { "and", Xapian::Query::OP_AND
},
76 { "elite_set", Xapian::Query::OP_ELITE_SET
},
77 { "max", Xapian::Query::OP_MAX
},
78 { "near", Xapian::Query::OP_NEAR
},
79 { "or", Xapian::Query::OP_OR
},
80 { "phrase", Xapian::Query::OP_PHRASE
},
81 { "synonym", Xapian::Query::OP_SYNONYM
}
83 const int n_op_tab
= sizeof(op_tab
) / sizeof(op_tab
[0]);
85 inline bool operator<(const qp_op
& f1
, const qp_op
& f2
) {
86 return strcmp(f1
.s
, f2
.s
) < 0;
106 struct wt
{ const char * s
; int f
; };
107 static const wt wt_tab
[] = {
108 { "bb2", WEIGHT_BB2
},
109 { "bm25", WEIGHT_BM25
},
110 { "bm25+", WEIGHT_BM25PLUS
},
111 { "bool", WEIGHT_BOOL
},
112 { "dlh", WEIGHT_DLH
},
113 { "dph", WEIGHT_DPH
},
114 { "ifb2", WEIGHT_IFB2
},
115 { "ineb2", WEIGHT_INEB2
},
116 { "inl2", WEIGHT_INL2
},
118 { "pl2", WEIGHT_PL2
},
119 { "pl2+", WEIGHT_PL2PLUS
},
120 { "tfidf", WEIGHT_TFIDF
},
121 { "trad", WEIGHT_TRAD
}
123 const int n_wt_tab
= sizeof(wt_tab
) / sizeof(wt_tab
[0]);
125 inline bool operator<(const wt
& f1
, const wt
& f2
) {
126 return strcmp(f1
.s
, f2
.s
) < 0;
129 static void show_usage() {
130 cout
<< "Usage: " PROG_NAME
" [OPTIONS] 'QUERY'\n"
131 "NB: QUERY should be quoted to protect it from the shell.\n\n"
133 " -d, --db=DIRECTORY database to search (multiple databases may\n"
135 " -m, --msize=MSIZE maximum number of matches to return\n"
136 " -c, --check-at-least=HOWMANY minimum number of matches to check\n"
137 " -s, --stemmer=LANG set the stemming language, the default is\n"
138 " 'english' (pass 'none' to disable stemming)\n"
139 " -p, --prefix=PFX:TERMPFX add a prefix\n"
140 " -b, --boolean-prefix=PFX:TERMPFX add a boolean prefix\n"
141 " -f, --flags=FLAG1[,FLAG2]... specify QueryParser flags. Valid flags:";
145 for (const qp_flag
* i
= flag_tab
; i
- flag_tab
< n_flag_tab
; ++i
) {
146 size_t len
= strlen(i
->s
);
147 if (pos
< 256) cout
<< ',';
148 if (pos
+ len
>= 78) {
150 pos
= sizeof(INDENT
) - 2;
158 " -o, --default-op=OP specify QueryParser default operator\n"
159 " (default: or). Valid operators:";
161 for (const qp_op
* i
= op_tab
; i
- op_tab
< n_op_tab
; ++i
) {
162 size_t len
= strlen(i
->s
);
163 if (pos
< 256) cout
<< ',';
164 if (pos
+ len
>= 78) {
166 pos
= sizeof(INDENT
) - 2;
174 " -w, --weight=SCHEME specify weighting scheme to use\n"
175 " (default: bm25). Valid schemes:";
177 for (const wt
* i
= wt_tab
; i
- wt_tab
< n_wt_tab
; ++i
) {
178 size_t len
= strlen(i
->s
);
179 if (pos
< 256) cout
<< ',';
180 if (pos
+ len
>= 78) {
182 pos
= sizeof(INDENT
) - 2;
190 " -h, --help display this help and exit\n"
191 " -v, --version output version information and exit\n";
195 decode_qp_flag(const char * s
)
199 const qp_flag
* p
= lower_bound(flag_tab
, flag_tab
+ n_flag_tab
, f
);
200 if (p
== flag_tab
+ n_flag_tab
|| f
< *p
)
206 decode_qp_op(const char * s
)
210 const qp_op
* p
= lower_bound(op_tab
, op_tab
+ n_op_tab
, f
);
211 if (p
== op_tab
+ n_op_tab
|| f
< *p
)
217 decode_wt(const char * s
)
221 const wt
* p
= lower_bound(wt_tab
, wt_tab
+ n_wt_tab
, f
);
222 if (p
== wt_tab
+ n_wt_tab
|| f
< *p
)
228 main(int argc
, char **argv
)
230 const char * opts
= "d:m:c:s:p:b:f:o:w:hv";
231 static const struct option long_opts
[] = {
232 { "db", required_argument
, 0, 'd' },
233 { "msize", required_argument
, 0, 'm' },
234 { "check-at-least", required_argument
, 0, 'c' },
235 { "stemmer", required_argument
, 0, 's' },
236 { "prefix", required_argument
, 0, 'p' },
237 { "boolean-prefix", required_argument
, 0, 'b' },
238 { "flags", required_argument
, 0, 'f' },
239 { "default-op", required_argument
, 0, 'o' },
240 { "weight", required_argument
, 0, 'w' },
241 { "help", no_argument
, 0, 'h' },
242 { "version", no_argument
, 0, 'v' },
246 Xapian::SimpleStopper
mystopper(sw
, sw
+ sizeof(sw
) / sizeof(sw
[0]));
247 Xapian::Stem
stemmer("english");
248 Xapian::doccount msize
= 10;
249 Xapian::doccount check_at_least
= 0;
251 bool have_database
= false;
254 Xapian::QueryParser parser
;
255 unsigned flags
= parser
.FLAG_DEFAULT
|parser
.FLAG_SPELLING_CORRECTION
;
259 while ((c
= gnu_getopt_long(argc
, argv
, opts
, long_opts
, 0)) != -1) {
263 unsigned long v
= strtoul(optarg
, &p
, 10);
264 msize
= static_cast<Xapian::doccount
>(v
);
265 if (*p
|| v
!= msize
) {
266 cerr
<< PROG_NAME
": Bad value '" << optarg
267 << "' passed for msize" << endl
;
274 unsigned long v
= strtoul(optarg
, &p
, 10);
275 check_at_least
= static_cast<Xapian::doccount
>(v
);
276 if (*p
|| v
!= check_at_least
) {
277 cerr
<< PROG_NAME
": Bad value '" << optarg
278 << "' passed for check_at_least " << endl
;
284 db
.add_database(Xapian::Database(optarg
));
285 have_database
= true;
289 stemmer
= Xapian::Stem(optarg
);
290 } catch (const Xapian::InvalidArgumentError
&) {
291 cerr
<< "Unknown stemming language '" << optarg
<< "'.\n"
292 "Available language names are: "
293 << Xapian::Stem::get_available_languages() << endl
;
297 case 'b': case 'p': {
298 const char * colon
= strchr(optarg
, ':');
300 cerr
<< argv
[0] << ": need ':' when setting prefix" << endl
;
303 string
prefix(optarg
, colon
- optarg
);
304 string
termprefix(colon
+ 1);
306 parser
.add_boolean_prefix(prefix
, termprefix
);
308 parser
.add_prefix(prefix
, termprefix
);
315 char * comma
= strchr(optarg
, ',');
318 unsigned flag
= decode_qp_flag(optarg
);
320 cerr
<< "Unknown flag '" << optarg
<< "'" << endl
;
328 int op
= decode_qp_op(optarg
);
330 cerr
<< "Unknown op '" << optarg
<< "'" << endl
;
333 parser
.set_default_op(static_cast<Xapian::Query::op
>(op
));
337 weight
= decode_wt(optarg
);
339 cerr
<< "Unknown weighting scheme '" << optarg
<< "'" << endl
;
345 cout
<< PROG_NAME
" - " PACKAGE_STRING
<< endl
;
348 cout
<< PROG_NAME
" - " PROG_DESC
"\n\n";
351 case ':': // missing parameter
352 case '?': // unknown option
358 if (argc
- optind
!= 1) {
363 parser
.set_database(db
);
364 parser
.set_stemmer(stemmer
);
365 parser
.set_stemming_strategy(Xapian::QueryParser::STEM_SOME
);
366 parser
.set_stopper(&mystopper
);
368 Xapian::Query query
= parser
.parse_query(argv
[optind
], flags
);
369 const string
& correction
= parser
.get_corrected_query_string();
370 if (!correction
.empty())
371 cout
<< "Did you mean: " << correction
<< "\n\n";
373 cout
<< "Parsed Query: " << query
.get_description() << endl
;
375 if (!have_database
) {
376 cout
<< "No database specified so not running the query." << endl
;
380 Xapian::Enquire
enquire(db
);
381 enquire
.set_query(query
);
385 enquire
.set_weighting_scheme(Xapian::BB2Weight());
388 enquire
.set_weighting_scheme(Xapian::BoolWeight());
391 enquire
.set_weighting_scheme(Xapian::BM25Weight());
393 case WEIGHT_BM25PLUS
:
394 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight());
397 enquire
.set_weighting_scheme(Xapian::DLHWeight());
400 enquire
.set_weighting_scheme(Xapian::DPHWeight());
403 enquire
.set_weighting_scheme(Xapian::IfB2Weight());
406 enquire
.set_weighting_scheme(Xapian::IneB2Weight());
409 enquire
.set_weighting_scheme(Xapian::InL2Weight());
412 enquire
.set_weighting_scheme(Xapian::LMWeight());
415 enquire
.set_weighting_scheme(Xapian::PL2Weight());
418 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight());
421 enquire
.set_weighting_scheme(Xapian::TfIdfWeight());
424 enquire
.set_weighting_scheme(Xapian::TradWeight());
428 Xapian::MSet mset
= enquire
.get_mset(0, msize
, check_at_least
);
430 cout
<< "MSet:" << endl
;
431 for (Xapian::MSetIterator i
= mset
.begin(); i
!= mset
.end(); ++i
) {
432 Xapian::Document doc
= i
.get_document();
433 string data
= doc
.get_data();
434 cout
<< *i
<< ": [" << i
.get_weight() << "]\n" << data
<< "\n";
437 } catch (const Xapian::QueryParserError
& e
) {
438 cout
<< "Couldn't parse query: " << e
.get_msg() << endl
;
440 } catch (const Xapian::Error
& err
) {
441 cout
<< err
.get_description() << endl
;