[ci] Disable 32-bit cygwin build for now
[xapian.git] / xapian-letor / bin / xapian-rank.cc
blob6f61c32d426fa6c96bf89d15aee75809675b204b
1 /** @file xapian-rank.cc
2 * @brief Command line search tool using Xapian::QueryParser and Xapian::Letor
3 */
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009,2010,2015 Olly Betts
5 * Copyright (C) 2011 Parth Gupta
6 * Copyright (C) 2016 Ayush Tomar
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include <xapian.h>
27 #include <xapian-letor.h>
29 #include <iostream>
30 #include <sstream>
31 #include <string>
33 #include "gnu_getopt.h"
35 using namespace std;
37 #define PROG_NAME "xapian-rank"
38 #define PROG_DESC "Xapian command line search tool with Learning to Rank Facility"
40 #define OPT_HELP 1
41 #define OPT_VERSION 2
43 // Stopwords:
44 static const char * sw[] = {
45 "a", "about", "an", "and", "are", "as", "at",
46 "be", "by",
47 "en",
48 "for", "from",
49 "how",
50 "i", "in", "is", "it",
51 "of", "on", "or",
52 "that", "the", "this", "to",
53 "was", "what", "when", "where", "which", "who", "why", "will", "with"
56 static void show_usage() {
57 cout << "Usage: " PROG_NAME " [OPTIONS] MODEL_METADATA_KEY QUERY\n"
58 "NB: QUERY should be quoted to protect it from the shell.\n\n"
59 "Options:\n"
60 " -d, --db=DIRECTORY path to database to search\n"
61 " -m, --msize=MSIZE maximum number of matches to return\n"
62 " -s, --stemmer=LANG set the stemming language, the default is 'english'\n"
63 " (pass 'none' to disable stemming)\n"
64 " -p, --prefix=PFX:TERMPFX Add a prefix\n"
65 " -b, --boolean-prefix=PFX:TERMPFX Add a boolean prefix\n"
66 " --help display this help and exit\n"
67 " --version output version information and exit\n";
70 int
71 main(int argc, char **argv)
72 try {
73 const char * opts = "d:f:m:s:p:b:h:v";
74 static const struct option long_opts[] = {
75 { "db", required_argument, 0, 'd' },
76 { "msize", required_argument, 0, 'm' },
77 { "stemmer", required_argument, 0, 's' },
78 { "prefix", required_argument, 0, 'p' },
79 { "boolean-prefix", required_argument, 0, 'b' },
80 { "help", no_argument, 0, OPT_HELP },
81 { "version", no_argument, 0, OPT_VERSION },
82 { NULL, 0, 0, 0}
85 Xapian::SimpleStopper mystopper(sw, sw + sizeof(sw) / sizeof(sw[0]));
86 Xapian::Stem stemmer("english");
87 int msize = 10;
89 bool have_database = false;
91 string db_path;
92 Xapian::QueryParser parser;
93 parser.add_prefix("title", "S");
94 parser.add_prefix("subject", "S");
96 int c;
97 while ((c = gnu_getopt_long(argc, argv, opts, long_opts, 0)) != -1) {
98 switch (c) {
99 case 'd':
100 db_path = optarg;
101 have_database = true;
102 break;
103 case 'm':
104 msize = atoi(optarg);
105 break;
106 case 's':
107 try {
108 stemmer = Xapian::Stem(optarg);
109 } catch (const Xapian::InvalidArgumentError &) {
110 cerr << "Unknown stemming language '" << optarg << "'.\n"
111 "Available language names are: "
112 << Xapian::Stem::get_available_languages() << endl;
113 exit(1);
115 break;
116 case 'p': case 'b': {
117 const char * colon = strchr(optarg, ':');
118 if (colon == NULL) {
119 cerr << argv[0] << ": need ':' when setting prefix" << endl;
120 exit(1);
122 string prefix(optarg, colon - optarg);
123 string termprefix(colon + 1);
124 if (c == 'b') {
125 parser.add_boolean_prefix(prefix, termprefix);
126 } else {
127 parser.add_prefix(prefix, termprefix);
129 break;
131 case OPT_HELP:
132 cout << PROG_NAME " - " PROG_DESC "\n\n";
133 show_usage();
134 exit(0);
135 case OPT_VERSION:
136 cout << PROG_NAME " - " PACKAGE_STRING << endl;
137 exit(0);
138 case ':': // missing parameter
139 case '?': // unknown option
140 show_usage();
141 exit(1);
145 if (argc - optind != 2) {
146 show_usage();
147 exit(1);
150 string model_metadata_key = argv[optind];
152 Xapian::Database db(db_path);
154 parser.set_database(db);
155 parser.set_default_op(Xapian::Query::OP_OR);
156 parser.set_stemmer(stemmer);
157 parser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
158 parser.set_stopper(&mystopper);
160 string qq = argv[optind + 1];
162 Xapian::Query query_no_prefix = parser.parse_query(qq,
163 parser.FLAG_DEFAULT|
164 parser.FLAG_SPELLING_CORRECTION);
165 // query with title as default prefix
166 Xapian::Query query_default_prefix = parser.parse_query(qq,
167 parser.FLAG_DEFAULT|
168 parser.FLAG_SPELLING_CORRECTION,
169 "S");
170 // Combine queries
171 Xapian::Query query = Xapian::Query(Xapian::Query::OP_OR, query_no_prefix, query_default_prefix);
173 const string & correction = parser.get_corrected_query_string();
174 if (!correction.empty())
175 cout << "Did you mean: " << correction << "\n\n";
177 cout << "Parsed Query: " << query.get_description() << endl;
179 if (!have_database) {
180 cout << "No database specified so not running the query." << endl;
181 exit(0);
184 Xapian::Enquire enquire(db);
185 enquire.set_query(query);
187 Xapian::MSet mset = enquire.get_mset(0, msize);
189 if (mset.empty()) {
190 cout << "Empty MSet. No documents could be retrieved with the given Query." << endl;
191 exit(1);
194 cout << "Docids before re-ranking by LTR model:" << endl;
195 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
196 Xapian::Document doc = i.get_document();
197 string data = doc.get_data();
198 cout << *i << ": [" << i.get_weight() << "]\n" << data << "\n";
201 // Initialise Ranker object with ListNETRanker instance, db path and query.
202 // See Ranker documentation for available Ranker subclass options.
203 Xapian::Ranker * ranker = new Xapian::ListNETRanker();
204 ranker->set_database_path(db_path);
205 ranker->set_query(query);
207 // Get vector of re-ranked docids
208 ranker->rank(mset, model_metadata_key);
210 cout << "Docids after re-ranking by LTR model:\n" << endl;
212 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
213 Xapian::Document doc = i.get_document();
214 string data = doc.get_data();
215 cout << *i << ": [" << i.get_weight() << "]\n" << data << "\n";
217 delete ranker;
219 cout << flush;
220 } catch (const Xapian::QueryParserError & e) {
221 cout << "Couldn't parse query: " << e.get_msg() << endl;
222 exit(1);
223 } catch (const Xapian::Error & err) {
224 cout << err.get_description() << endl;
225 exit(1);