Make glass the default backend
[xapian.git] / xapian-core / tests / api_percentages.cc
blobe83fa96643f6b28829b36a1e591a2ab041dc769c
1 /** @file api_percentages.cc
2 * @brief Tests of percentage calculations.
3 */
4 /* Copyright (C) 2008,2009 Lemur Consulting Ltd
5 * Copyright (C) 2008,2009,2010,2011,2012,2014 Olly Betts
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "api_percentages.h"
26 #include <xapian.h>
28 #include "apitest.h"
29 #include "backendmanager_local.h"
30 #include "str.h"
31 #include "testutils.h"
33 #include <cfloat>
35 using namespace std;
37 // Test that percentages reported are the same regardless of which part of the
38 // mset is returned, for sort-by-value search. Regression test for bug#216 in
39 // 1.0.10 and earlier with returned percentages.
40 DEFINE_TESTCASE(consistency3, backend) {
41 Xapian::Database db(get_database("apitest_sortconsist"));
42 Xapian::Enquire enquire(db);
43 enquire.set_query(Xapian::Query("foo"));
44 enquire.set_sort_by_value(1, 0);
45 Xapian::doccount lots = 3;
46 Xapian::MSet bigmset = enquire.get_mset(0, lots);
47 TEST_EQUAL(bigmset.size(), lots);
48 for (Xapian::doccount start = 0; start < lots; ++start) {
49 tout << *bigmset[start] << ":" << bigmset[start].get_weight() << ":"
50 << bigmset[start].get_percent() << "%" << endl;
51 for (Xapian::doccount size = 0; size < lots - start; ++size) {
52 Xapian::MSet mset = enquire.get_mset(start, size);
53 if (mset.size()) {
54 TEST_EQUAL(start + mset.size(),
55 min(start + size, bigmset.size()));
56 } else if (size) {
57 TEST(start >= bigmset.size());
59 for (Xapian::doccount i = 0; i < mset.size(); ++i) {
60 TEST_EQUAL(*mset[i], *bigmset[start + i]);
61 TEST_EQUAL_DOUBLE(mset[i].get_weight(),
62 bigmset[start + i].get_weight());
63 TEST_EQUAL_DOUBLE(mset[i].get_percent(),
64 bigmset[start + i].get_percent());
68 return true;
71 class MyPostingSource : public Xapian::PostingSource {
72 vector<pair<Xapian::docid, double> > weights;
73 vector<pair<Xapian::docid, double> >::const_iterator i;
74 bool started;
76 MyPostingSource(const vector<pair<Xapian::docid, double> > &weights_,
77 double max_wt)
78 : weights(weights_), started(false)
80 set_maxweight(max_wt);
83 public:
84 MyPostingSource() : started(false) { }
86 PostingSource * clone() const
88 return new MyPostingSource(weights, get_maxweight());
91 void append_docweight(Xapian::docid did, double wt) {
92 weights.push_back(make_pair(did, wt));
93 if (wt > get_maxweight()) set_maxweight(wt);
96 void init(const Xapian::Database &) { started = false; }
98 double get_weight() const { return i->second; }
100 Xapian::doccount get_termfreq_min() const { return weights.size(); }
101 Xapian::doccount get_termfreq_est() const { return weights.size(); }
102 Xapian::doccount get_termfreq_max() const { return weights.size(); }
104 void next(double /*wt*/) {
105 if (!started) {
106 i = weights.begin();
107 started = true;
108 } else {
109 ++i;
113 bool at_end() const {
114 return (i == weights.end());
117 Xapian::docid get_docid() const { return i->first; }
119 string get_description() const {
120 return "MyPostingSource";
125 /// Test for rounding errors in percentage weight calculations and cutoffs.
126 DEFINE_TESTCASE(pctcutoff4, backend && !remote && !multi) {
127 // Find the number of DBL_EPSILONs to subtract which result in the
128 // percentage of the second hit being 49% instead of 50%.
129 int epsilons = 0;
130 Xapian::Database db(get_database("apitest_simpledata"));
131 Xapian::Enquire enquire(db);
132 while (true) {
133 MyPostingSource source;
134 source.append_docweight(1, 100);
135 source.append_docweight(2, 50 - epsilons * DBL_EPSILON);
136 enquire.set_query(Xapian::Query(&source));
137 Xapian::MSet mset = enquire.get_mset(0, 10);
138 TEST_EQUAL(mset.size(), 2);
139 if (mset[1].get_percent() != 50) break;
140 ++epsilons;
143 // Make a set of document weights including ones on either side of the
144 // 49% / 50% boundary.
145 MyPostingSource source;
146 source.append_docweight(1, 100);
147 source.append_docweight(2, 50);
148 source.append_docweight(3, 50 - (epsilons - 1) * DBL_EPSILON);
149 source.append_docweight(4, 50 - epsilons * DBL_EPSILON);
150 source.append_docweight(5, 25);
152 enquire.set_query(Xapian::Query(&source));
153 Xapian::MSet mset1 = enquire.get_mset(0, 10);
154 TEST_EQUAL(mset1.size(), 5);
155 TEST_EQUAL(mset1[2].get_percent(), 50);
156 TEST_EQUAL(mset1[3].get_percent(), 49);
158 // Use various different percentage cutoffs, and check that the values
159 // returned are as expected.
160 int percent = 100;
161 for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
162 int new_percent = mset1.convert_to_percent(i);
163 tout << "mset1 item = " << i.get_percent() << "%\n";
164 if (new_percent != percent) {
165 enquire.set_cutoff(percent);
166 Xapian::MSet mset2 = enquire.get_mset(0, 10);
167 tout << "cutoff = " << percent << "%, "
168 "mset size = " << mset2.size() << "\n";
169 TEST_EQUAL(mset2.size(), i.get_rank());
170 percent = new_percent;
174 return true;
177 /// Check we throw for a percentage cutoff while sorting primarily by value.
178 DEFINE_TESTCASE(pctcutoff5, backend) {
179 Xapian::Database db(get_database("apitest_simpledata"));
180 Xapian::Enquire enquire(db);
181 enquire.set_query(Xapian::Query("test"));
182 enquire.set_cutoff(42);
183 Xapian::MSet mset;
185 enquire.set_sort_by_value(0, false);
186 TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
188 enquire.set_sort_by_value(0, true);
189 TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
191 enquire.set_sort_by_value_then_relevance(0, false);
192 TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
194 enquire.set_sort_by_value_then_relevance(0, true);
195 TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
197 return true;
200 // Regression test for bug fixed in 1.0.14.
201 DEFINE_TESTCASE(topercent3, remote) {
202 BackendManagerLocal local_manager;
203 local_manager.set_datadir(test_driver::get_srcdir() + "/testdata/");
204 Xapian::Database db;
205 db.add_database(get_database("apitest_simpledata"));
206 db.add_database(local_manager.get_database("apitest_simpledata"));
208 Xapian::Enquire enquire(db);
209 enquire.set_sort_by_value(1, false);
211 const char * terms[] = { "paragraph", "banana" };
212 enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 2));
214 Xapian::MSet mset = enquire.get_mset(0, 20);
216 Xapian::MSetIterator i;
217 for (i = mset.begin(); i != mset.end(); ++i) {
218 // We should never achieve 100%.
219 TEST_REL(i.get_percent(),<,100);
222 return true;
225 // Regression test for bug introduced temporarily by the "percent without
226 // termlist" patch.
227 DEFINE_TESTCASE(topercent4, backend) {
228 Xapian::Enquire enquire(get_database("apitest_simpledata"));
230 Xapian::Query query(Xapian::Query::OP_FILTER,
231 Xapian::Query("paragraph"),
232 Xapian::Query("queri"));
233 query = Xapian::Query(Xapian::Query::OP_XOR,
234 query, Xapian::Query("rubbish"));
236 enquire.set_query(query);
237 Xapian::MSet mset = enquire.get_mset(0, 10);
239 // We should get 50% not 33%.
240 TEST(!mset.empty());
241 TEST_EQUAL(mset[0].get_percent(), 50);
243 return true;
246 /// Test that a search with a non-existent term doesn't get 100%.
247 DEFINE_TESTCASE(topercent5, backend) {
248 Xapian::Enquire enquire(get_database("apitest_simpledata"));
249 Xapian::Query q(Xapian::Query::OP_OR,
250 Xapian::Query("paragraph"), Xapian::Query("xyzzy"));
251 enquire.set_query(q);
252 Xapian::MSet mset = enquire.get_mset(0, 10);
253 TEST(!mset.empty());
254 TEST(mset[0].get_percent() < 100);
255 // It would be odd if the non-existent term was worth more, but in 1.0.x
256 // the top hit got 4% in this testcase. In 1.2.x it gets 50%, which is
257 // better, but >50% would be more natural.
258 TEST(mset[0].get_percent() >= 50);
259 return true;
262 /// Test that OP_FILTER doesn't affect percentages.
263 // Regression test for bug#590 fixed in 1.3.1 and 1.2.10.
264 DEFINE_TESTCASE(topercent6, backend) {
265 Xapian::Enquire enquire(get_database("apitest_simpledata"));
266 Xapian::Query q(Xapian::Query::OP_OR,
267 Xapian::Query("rubbish"), Xapian::Query("letter"));
268 enquire.set_query(q);
269 Xapian::MSet mset = enquire.get_mset(0, 10);
270 TEST(!mset.empty());
271 TEST(mset[0].get_percent() < 100);
273 q = Xapian::Query(q.OP_FILTER, q, Xapian::Query("this"));
274 enquire.set_query(q);
275 Xapian::MSet mset2 = enquire.get_mset(0, 10);
276 TEST(!mset2.empty());
277 TEST_EQUAL(mset[0].get_percent(), mset2[0].get_percent());
278 return true;
281 static void
282 make_topercent7_db(Xapian::WritableDatabase &db, const string &)
284 for (int i = 1; i <= 6; ++i) {
285 Xapian::Document d;
286 d.set_data(str(i));
287 d.add_term("boom", 2 + (i - 4)*(i - 2));
288 if (i != 5)
289 d.add_boolean_term("XCAT122");
290 db.add_document(d);
292 db.commit();
295 /// Test that a term with wdf always = 0 gets counted.
296 // Regression test for bug introduced in 1.2.10 by the original fix for #590,
297 // and fixed in 1.2.13 (and in trunk before 1.3.1 was released).
298 DEFINE_TESTCASE(topercent7, generated) {
299 Xapian::Database db(get_database("topercent7", make_topercent7_db));
301 Xapian::Query q;
302 q = Xapian::Query(q.OP_OR, Xapian::Query("tomb"), Xapian::Query("boom"));
303 q = Xapian::Query(q.OP_AND, q, Xapian::Query("XCAT122"));
305 Xapian::Enquire enq(db);
306 enq.set_query(q);
307 Xapian::MSet m = enq.get_mset(0, 10);
308 TEST(!m.empty());
309 TEST_REL(m[0].get_percent(),>,60);
310 return true;
313 class ZWeight : public Xapian::Weight {
314 public:
315 ZWeight() { }
317 void init(double) { }
319 Weight * clone() const {
320 return new ZWeight();
323 double get_sumpart(Xapian::termcount,
324 Xapian::termcount,
325 Xapian::termcount) const {
326 return 0.0;
329 double get_maxpart() const {
330 return 0.0;
333 double get_sumextra(Xapian::termcount doclen,
334 Xapian::termcount) const {
335 return 1.0 / doclen;
338 double get_maxextra() const {
339 return 1.0;
343 /// Regression test for bug introduced in 1.3.1 and fixed in 1.3.2.
344 DEFINE_TESTCASE(checkzeromaxpartopt1, backend && !remote) {
345 Xapian::Database db = get_database("apitest_simpledata");
346 Xapian::Enquire enquire(db);
347 // "this" indexes all documents, so will get replaced with MatchAll
348 // internally.
349 const char * terms[] = { "this", "spoken", "blank" };
350 enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 3));
351 ZWeight wt;
352 enquire.set_weighting_scheme(wt);
353 Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
354 // No documents match all 3 terms, so the score shouldn't be 100%.
355 TEST(mset[0].get_percent() != 100);
356 // Make sure the percentage score isn't 0 or 1 though.
357 TEST_REL(mset[0].get_percent(), >, 1);
358 return true;