1 /** @file xapian-rank.cc
2 * @brief Command line search tool using Xapian::QueryParser and Xapian::Letor
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2015 Olly Betts
5 * Copyright (C) 2011 Parth Gupta
6 * Copyright (C) 2016 Ayush Tomar
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
27 #include <xapian-letor.h>
33 #include "gnu_getopt.h"
37 #define PROG_NAME "xapian-rank"
38 #define PROG_DESC "Xapian command line search tool with Learning to Rank Facility"
44 static const char * sw
[] = {
45 "a", "about", "an", "and", "are", "as", "at",
50 "i", "in", "is", "it",
52 "that", "the", "this", "to",
53 "was", "what", "when", "where", "which", "who", "why", "will", "with"
56 static void show_usage() {
57 cout
<< "Usage: " PROG_NAME
" [OPTIONS] MODEL_METADATA_KEY QUERY\n"
58 "NB: QUERY should be quoted to protect it from the shell.\n\n"
60 " -d, --db=DIRECTORY path to database to search\n"
61 " -m, --msize=MSIZE maximum number of matches to return\n"
62 " -s, --stemmer=LANG set the stemming language, the default is 'english'\n"
63 " (pass 'none' to disable stemming)\n"
64 " -p, --prefix=PFX:TERMPFX Add a prefix\n"
65 " -b, --boolean-prefix=PFX:TERMPFX Add a boolean prefix\n"
66 " --help display this help and exit\n"
67 " --version output version information and exit\n";
71 main(int argc
, char **argv
)
73 const char * opts
= "d:f:m:s:p:b:h:v";
74 static const struct option long_opts
[] = {
75 { "db", required_argument
, 0, 'd' },
76 { "msize", required_argument
, 0, 'm' },
77 { "stemmer", required_argument
, 0, 's' },
78 { "prefix", required_argument
, 0, 'p' },
79 { "boolean-prefix", required_argument
, 0, 'b' },
80 { "help", no_argument
, 0, OPT_HELP
},
81 { "version", no_argument
, 0, OPT_VERSION
},
85 Xapian::SimpleStopper
mystopper(sw
, sw
+ sizeof(sw
) / sizeof(sw
[0]));
86 Xapian::Stem
stemmer("english");
89 bool have_database
= false;
92 Xapian::QueryParser parser
;
93 parser
.add_prefix("title", "S");
94 parser
.add_prefix("subject", "S");
97 while ((c
= gnu_getopt_long(argc
, argv
, opts
, long_opts
, 0)) != -1) {
101 have_database
= true;
104 msize
= atoi(optarg
);
108 stemmer
= Xapian::Stem(optarg
);
109 } catch (const Xapian::InvalidArgumentError
&) {
110 cerr
<< "Unknown stemming language '" << optarg
<< "'.\n"
111 "Available language names are: "
112 << Xapian::Stem::get_available_languages() << endl
;
116 case 'p': case 'b': {
117 const char * colon
= strchr(optarg
, ':');
119 cerr
<< argv
[0] << ": need ':' when setting prefix" << endl
;
122 string
prefix(optarg
, colon
- optarg
);
123 string
termprefix(colon
+ 1);
125 parser
.add_boolean_prefix(prefix
, termprefix
);
127 parser
.add_prefix(prefix
, termprefix
);
132 cout
<< PROG_NAME
" - " PROG_DESC
"\n\n";
136 cout
<< PROG_NAME
" - " PACKAGE_STRING
<< endl
;
138 case ':': // missing parameter
139 case '?': // unknown option
145 if (argc
- optind
!= 2) {
150 string model_metadata_key
= argv
[optind
];
152 Xapian::Database
db(db_path
);
154 parser
.set_database(db
);
155 parser
.set_default_op(Xapian::Query::OP_OR
);
156 parser
.set_stemmer(stemmer
);
157 parser
.set_stemming_strategy(Xapian::QueryParser::STEM_SOME
);
158 parser
.set_stopper(&mystopper
);
160 string qq
= argv
[optind
+ 1];
162 Xapian::Query query_no_prefix
= parser
.parse_query(qq
,
164 parser
.FLAG_SPELLING_CORRECTION
);
165 // query with title as default prefix
166 Xapian::Query query_default_prefix
= parser
.parse_query(qq
,
168 parser
.FLAG_SPELLING_CORRECTION
,
171 Xapian::Query query
= Xapian::Query(Xapian::Query::OP_OR
, query_no_prefix
, query_default_prefix
);
173 const string
& correction
= parser
.get_corrected_query_string();
174 if (!correction
.empty())
175 cout
<< "Did you mean: " << correction
<< "\n\n";
177 cout
<< "Parsed Query: " << query
.get_description() << endl
;
179 if (!have_database
) {
180 cout
<< "No database specified so not running the query." << endl
;
184 Xapian::Enquire
enquire(db
);
185 enquire
.set_query(query
);
187 Xapian::MSet mset
= enquire
.get_mset(0, msize
);
190 cout
<< "Empty MSet. No documents could be retrieved with the given Query." << endl
;
194 cout
<< "Docids before re-ranking by LTR model:" << endl
;
195 for (Xapian::MSetIterator i
= mset
.begin(); i
!= mset
.end(); ++i
) {
196 Xapian::Document doc
= i
.get_document();
197 string data
= doc
.get_data();
198 cout
<< *i
<< ": [" << i
.get_weight() << "]\n" << data
<< "\n";
201 // Initialise Ranker object with ListNETRanker instance, db path and query.
202 // See Ranker documentation for available Ranker subclass options.
203 Xapian::Ranker
* ranker
= new Xapian::ListNETRanker();
204 ranker
->set_database_path(db_path
);
205 ranker
->set_query(query
);
207 // Get vector of re-ranked docids
208 ranker
->rank(mset
, model_metadata_key
);
210 cout
<< "Docids after re-ranking by LTR model:\n" << endl
;
212 for (Xapian::MSetIterator i
= mset
.begin(); i
!= mset
.end(); ++i
) {
213 Xapian::Document doc
= i
.get_document();
214 string data
= doc
.get_data();
215 cout
<< *i
<< ": [" << i
.get_weight() << "]\n" << data
<< "\n";
220 } catch (const Xapian::QueryParserError
& e
) {
221 cout
<< "Couldn't parse query: " << e
.get_msg() << endl
;
223 } catch (const Xapian::Error
& err
) {
224 cout
<< err
.get_description() << endl
;