Fix masking of bits in serialised query
[xapian.git] / xapian-applications / omega / weight.cc
blob5d656a6be0b892777f4ddf6b0d8ea4bd4a0601fd
1 /** @file weight.cc
2 * @brief Set the weighting scheme for Omega
3 */
4 /* Copyright (C) 2009,2013 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "weight.h"
26 #include "stringutils.h"
28 #include <cstdlib>
29 #include "safeerrno.h"
30 #include "common/noreturn.h"
32 #ifndef XAPIAN_AT_LEAST
33 #define XAPIAN_AT_LEAST(A,B,C) \
34 (XAPIAN_MAJOR_VERSION > (A) || \
35 (XAPIAN_MAJOR_VERSION == (A) && \
36 (XAPIAN_MINOR_VERSION > (B) || \
37 (XAPIAN_MINOR_VERSION == (B) && XAPIAN_REVISION >= (C)))))
38 #endif
40 using namespace std;
42 XAPIAN_NORETURN(static void
43 parameter_error(const char * param, const string & scheme));
45 static void
46 parameter_error(const char * msg, const string & scheme)
48 string m(msg);
49 m += ": '";
50 m += scheme;
51 m += "'";
52 throw m;
55 static bool
56 double_param(const char ** p, double * ptr_val)
58 char *end;
59 errno = 0;
60 double v = strtod(*p, &end);
61 if (*p == end || errno) return false;
62 *p = end;
63 *ptr_val = v;
64 return true;
67 #if XAPIAN_AT_LEAST(1,3,2)
68 static bool
69 type_smoothing_param(const char ** p, Xapian::Weight::type_smoothing * ptr_val)
71 char *end;
72 errno = 0;
73 int v = strtol(*p, &end, 10);
74 if (*p == end || errno || v < 1 || v > 4)
75 return false;
76 *p = end;
77 static const Xapian::Weight::type_smoothing smooth_tab[4] = {
78 Xapian::Weight::TWO_STAGE_SMOOTHING,
79 Xapian::Weight::DIRICHLET_SMOOTHING,
80 Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING,
81 Xapian::Weight::JELINEK_MERCER_SMOOTHING
83 *ptr_val = smooth_tab[v - 1];
84 return true;
86 #endif
88 void
89 set_weighting_scheme(Xapian::Enquire & enq, const map<string, string> & opt,
90 bool force_boolean)
92 if (!force_boolean) {
93 map<string, string>::const_iterator i = opt.find("weighting");
94 if (i == opt.end()) return;
96 const string & scheme = i->second;
97 if (scheme.empty()) return;
99 if (startswith(scheme, "bm25")) {
100 const char *p = scheme.c_str() + 4;
101 if (*p == '\0') {
102 enq.set_weighting_scheme(Xapian::BM25Weight());
103 return;
105 if (C_isspace((unsigned char)*p)) {
106 double k1 = 1;
107 double k2 = 0;
108 double k3 = 1;
109 double b = 0.5;
110 double min_normlen = 0.5;
111 if (!double_param(&p, &k1))
112 parameter_error("Parameter 1 (k1) is invalid", scheme);
113 if (*p && !double_param(&p, &k2))
114 parameter_error("Parameter 2 (k2) is invalid", scheme);
115 if (*p && !double_param(&p, &k3))
116 parameter_error("Parameter 3 (k3) is invalid", scheme);
117 if (*p && !double_param(&p, &b))
118 parameter_error("Parameter 4 (b) is invalid", scheme);
119 if (*p && !double_param(&p, &min_normlen))
120 parameter_error("Parameter 5 (min_normlen) is invalid", scheme);
121 if (*p)
122 parameter_error("Extra data after parameter 5", scheme);
123 Xapian::BM25Weight wt(k1, k2, k3, b, min_normlen);
124 enq.set_weighting_scheme(wt);
125 return;
129 if (startswith(scheme, "trad")) {
130 const char *p = scheme.c_str() + 4;
131 if (*p == '\0') {
132 enq.set_weighting_scheme(Xapian::TradWeight());
133 return;
135 if (C_isspace((unsigned char)*p)) {
136 double k;
137 if (!double_param(&p, &k))
138 parameter_error("Parameter is invalid", scheme);
139 if (*p)
140 parameter_error("Extra data after parameter", scheme);
141 enq.set_weighting_scheme(Xapian::TradWeight(k));
142 return;
146 #if XAPIAN_AT_LEAST(1,3,1)
147 if (startswith(scheme, "tfidf")) {
148 const char *p = scheme.c_str() + 5;
149 if (*p == '\0') {
150 enq.set_weighting_scheme(Xapian::TfIdfWeight());
151 return;
153 if (C_isspace((unsigned char)*p)) {
154 enq.set_weighting_scheme(Xapian::TfIdfWeight(p + 1));
155 return;
158 #endif
160 #if XAPIAN_AT_LEAST(1,3,2)
161 if (startswith(scheme, "inl2")) {
162 const char *p = scheme.c_str() + 4;
163 if (*p == '\0') {
164 enq.set_weighting_scheme(Xapian::InL2Weight());
165 return;
167 if (C_isspace((unsigned char)*p)) {
168 double k;
169 if (!double_param(&p, &k))
170 parameter_error("Parameter is invalid", scheme);
171 if (*p)
172 parameter_error("Extra data after parameter", scheme);
173 enq.set_weighting_scheme(Xapian::InL2Weight(k));
174 return;
178 if (startswith(scheme, "ifb2")) {
179 const char *p = scheme.c_str() + 4;
180 if (*p == '\0') {
181 enq.set_weighting_scheme(Xapian::IfB2Weight());
182 return;
184 if (C_isspace((unsigned char)*p)) {
185 double k;
186 if (!double_param(&p, &k))
187 parameter_error("Parameter is invalid", scheme);
188 if (*p)
189 parameter_error("Extra data after parameter", scheme);
190 enq.set_weighting_scheme(Xapian::IfB2Weight(k));
191 return;
195 if (startswith(scheme, "ineb2")) {
196 const char *p = scheme.c_str() + 5;
197 if (*p == '\0') {
198 enq.set_weighting_scheme(Xapian::IneB2Weight());
199 return;
201 if (C_isspace((unsigned char)*p)) {
202 double k;
203 if (!double_param(&p, &k))
204 parameter_error("Parameter is invalid", scheme);
205 if (*p)
206 parameter_error("Extra data after parameter", scheme);
207 enq.set_weighting_scheme(Xapian::IneB2Weight(k));
208 return;
212 if (startswith(scheme, "bb2")) {
213 const char *p = scheme.c_str() + 3;
214 if (*p == '\0') {
215 enq.set_weighting_scheme(Xapian::BB2Weight());
216 return;
218 if (C_isspace((unsigned char)*p)) {
219 double k;
220 if (!double_param(&p, &k))
221 parameter_error("Parameter is invalid", scheme);
222 if (*p)
223 parameter_error("Extra data after parameter", scheme);
224 enq.set_weighting_scheme(Xapian::BB2Weight(k));
225 return;
229 if (startswith(scheme, "dlh")) {
230 const char *p = scheme.c_str() + 3;
231 if (*p == '\0') {
232 enq.set_weighting_scheme(Xapian::DLHWeight());
233 return;
235 if (C_isspace((unsigned char)*p)) {
236 throw "No parameters are required for DLH";
240 if (startswith(scheme, "pl2")) {
241 const char *p = scheme.c_str() + 3;
242 if (*p == '\0') {
243 enq.set_weighting_scheme(Xapian::PL2Weight());
244 return;
246 if (C_isspace((unsigned char)*p)) {
247 double k;
248 if (!double_param(&p, &k))
249 parameter_error("Parameter is invalid", scheme);
250 if (*p)
251 parameter_error("Extra data after parameter", scheme);
252 enq.set_weighting_scheme(Xapian::PL2Weight(k));
253 return;
257 if (startswith(scheme, "dph")) {
258 const char *p = scheme.c_str() + 3;
259 if (*p == '\0') {
260 enq.set_weighting_scheme(Xapian::DPHWeight());
261 return;
263 if (C_isspace((unsigned char)*p)) {
264 throw "No parameters are required for DPH";
267 #endif
269 #if XAPIAN_AT_LEAST(1,3,2)
270 if (startswith(scheme, "lm")) {
271 const char *p = scheme.c_str() + 2;
272 if (*p == '\0') {
273 enq.set_weighting_scheme(Xapian::LMWeight());
274 return;
276 if (C_isspace((unsigned char)*p)) {
277 double param_log = 0;
278 Xapian::Weight::type_smoothing type = Xapian::Weight::TWO_STAGE_SMOOTHING;
279 double smoothing1 = 0.7;
280 double smoothing2 = 2000;
281 if (!double_param(&p, &param_log))
282 parameter_error("Parameter 1 (log) is invalid", scheme);
283 if (*p && !type_smoothing_param(&p, &type))
284 parameter_error("Parameter 2 (smoothing_type) is invalid", scheme);
285 if (*p && !double_param(&p, &smoothing1))
286 parameter_error("Parameter 3 (smoothing1) is invalid", scheme);
287 if (*p && !double_param(&p, &smoothing2))
288 parameter_error("Parameter 4 (smoothing2) is invalid", scheme);
289 if (*p)
290 parameter_error("Extra data after parameter 4", scheme);
291 Xapian::LMWeight wt(param_log, type, smoothing1, smoothing2);
292 enq.set_weighting_scheme(wt);
293 return;
296 #endif
298 if (scheme != "bool") {
299 throw "Unknown $opt{weighting} setting: " + scheme;
303 enq.set_weighting_scheme(Xapian::BoolWeight());