1 /** @file api_percentages.cc
2 * @brief Tests of percentage calculations.
4 /* Copyright (C) 2008,2009 Lemur Consulting Ltd
5 * Copyright (C) 2008,2009,2010,2011,2012,2014 Olly Betts
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "api_percentages.h"
29 #include "backendmanager_local.h"
31 #include "testutils.h"
37 // Test that percentages reported are the same regardless of which part of the
38 // mset is returned, for sort-by-value search. Regression test for bug#216 in
39 // 1.0.10 and earlier with returned percentages.
40 DEFINE_TESTCASE(consistency3
, backend
) {
41 Xapian::Database
db(get_database("apitest_sortconsist"));
42 Xapian::Enquire
enquire(db
);
43 enquire
.set_query(Xapian::Query("foo"));
44 enquire
.set_sort_by_value(1, 0);
45 Xapian::doccount lots
= 3;
46 Xapian::MSet bigmset
= enquire
.get_mset(0, lots
);
47 TEST_EQUAL(bigmset
.size(), lots
);
48 for (Xapian::doccount start
= 0; start
< lots
; ++start
) {
49 tout
<< *bigmset
[start
] << ":" << bigmset
[start
].get_weight() << ":"
50 << bigmset
[start
].get_percent() << "%" << endl
;
51 for (Xapian::doccount size
= 0; size
< lots
- start
; ++size
) {
52 Xapian::MSet mset
= enquire
.get_mset(start
, size
);
54 TEST_EQUAL(start
+ mset
.size(),
55 min(start
+ size
, bigmset
.size()));
57 TEST(start
>= bigmset
.size());
59 for (Xapian::doccount i
= 0; i
< mset
.size(); ++i
) {
60 TEST_EQUAL(*mset
[i
], *bigmset
[start
+ i
]);
61 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(),
62 bigmset
[start
+ i
].get_weight());
63 TEST_EQUAL_DOUBLE(mset
[i
].get_percent(),
64 bigmset
[start
+ i
].get_percent());
71 class MyPostingSource
: public Xapian::PostingSource
{
72 vector
<pair
<Xapian::docid
, double> > weights
;
73 vector
<pair
<Xapian::docid
, double> >::const_iterator i
;
76 MyPostingSource(const vector
<pair
<Xapian::docid
, double> > &weights_
,
78 : weights(weights_
), started(false)
80 set_maxweight(max_wt
);
84 MyPostingSource() : started(false) { }
86 PostingSource
* clone() const
88 return new MyPostingSource(weights
, get_maxweight());
91 void append_docweight(Xapian::docid did
, double wt
) {
92 weights
.push_back(make_pair(did
, wt
));
93 if (wt
> get_maxweight()) set_maxweight(wt
);
96 void init(const Xapian::Database
&) { started
= false; }
98 double get_weight() const { return i
->second
; }
100 Xapian::doccount
get_termfreq_min() const { return weights
.size(); }
101 Xapian::doccount
get_termfreq_est() const { return weights
.size(); }
102 Xapian::doccount
get_termfreq_max() const { return weights
.size(); }
104 void next(double /*wt*/) {
113 bool at_end() const {
114 return (i
== weights
.end());
117 Xapian::docid
get_docid() const { return i
->first
; }
119 string
get_description() const {
120 return "MyPostingSource";
125 /// Test for rounding errors in percentage weight calculations and cutoffs.
126 DEFINE_TESTCASE(pctcutoff4
, backend
&& !remote
&& !multi
) {
127 // Find the number of DBL_EPSILONs to subtract which result in the
128 // percentage of the second hit being 49% instead of 50%.
130 Xapian::Database
db(get_database("apitest_simpledata"));
131 Xapian::Enquire
enquire(db
);
133 MyPostingSource source
;
134 source
.append_docweight(1, 100);
135 source
.append_docweight(2, 50 - epsilons
* DBL_EPSILON
);
136 enquire
.set_query(Xapian::Query(&source
));
137 Xapian::MSet mset
= enquire
.get_mset(0, 10);
138 TEST_EQUAL(mset
.size(), 2);
139 if (mset
[1].get_percent() != 50) break;
143 // Make a set of document weights including ones on either side of the
144 // 49% / 50% boundary.
145 MyPostingSource source
;
146 source
.append_docweight(1, 100);
147 source
.append_docweight(2, 50);
148 source
.append_docweight(3, 50 - (epsilons
- 1) * DBL_EPSILON
);
149 source
.append_docweight(4, 50 - epsilons
* DBL_EPSILON
);
150 source
.append_docweight(5, 25);
152 enquire
.set_query(Xapian::Query(&source
));
153 Xapian::MSet mset1
= enquire
.get_mset(0, 10);
154 TEST_EQUAL(mset1
.size(), 5);
155 TEST_EQUAL(mset1
[2].get_percent(), 50);
156 TEST_EQUAL(mset1
[3].get_percent(), 49);
158 // Use various different percentage cutoffs, and check that the values
159 // returned are as expected.
161 for (Xapian::MSetIterator i
= mset1
.begin(); i
!= mset1
.end(); ++i
) {
162 int new_percent
= mset1
.convert_to_percent(i
);
163 tout
<< "mset1 item = " << i
.get_percent() << "%\n";
164 if (new_percent
!= percent
) {
165 enquire
.set_cutoff(percent
);
166 Xapian::MSet mset2
= enquire
.get_mset(0, 10);
167 tout
<< "cutoff = " << percent
<< "%, "
168 "mset size = " << mset2
.size() << "\n";
169 TEST_EQUAL(mset2
.size(), i
.get_rank());
170 percent
= new_percent
;
177 /// Check we throw for a percentage cutoff while sorting primarily by value.
178 DEFINE_TESTCASE(pctcutoff5
, backend
) {
179 Xapian::Database
db(get_database("apitest_simpledata"));
180 Xapian::Enquire
enquire(db
);
181 enquire
.set_query(Xapian::Query("test"));
182 enquire
.set_cutoff(42);
185 enquire
.set_sort_by_value(0, false);
186 TEST_EXCEPTION(Xapian::UnimplementedError
, mset
= enquire
.get_mset(0, 10));
188 enquire
.set_sort_by_value(0, true);
189 TEST_EXCEPTION(Xapian::UnimplementedError
, mset
= enquire
.get_mset(0, 10));
191 enquire
.set_sort_by_value_then_relevance(0, false);
192 TEST_EXCEPTION(Xapian::UnimplementedError
, mset
= enquire
.get_mset(0, 10));
194 enquire
.set_sort_by_value_then_relevance(0, true);
195 TEST_EXCEPTION(Xapian::UnimplementedError
, mset
= enquire
.get_mset(0, 10));
200 // Regression test for bug fixed in 1.0.14.
201 DEFINE_TESTCASE(topercent3
, remote
) {
202 BackendManagerLocal local_manager
;
203 local_manager
.set_datadir(test_driver::get_srcdir() + "/testdata/");
205 db
.add_database(get_database("apitest_simpledata"));
206 db
.add_database(local_manager
.get_database("apitest_simpledata"));
208 Xapian::Enquire
enquire(db
);
209 enquire
.set_sort_by_value(1, false);
211 const char * terms
[] = { "paragraph", "banana" };
212 enquire
.set_query(Xapian::Query(Xapian::Query::OP_OR
, terms
, terms
+ 2));
214 Xapian::MSet mset
= enquire
.get_mset(0, 20);
216 Xapian::MSetIterator i
;
217 for (i
= mset
.begin(); i
!= mset
.end(); ++i
) {
218 // We should never achieve 100%.
219 TEST_REL(i
.get_percent(),<,100);
225 // Regression test for bug introduced temporarily by the "percent without
227 DEFINE_TESTCASE(topercent4
, backend
) {
228 Xapian::Enquire
enquire(get_database("apitest_simpledata"));
230 Xapian::Query
query(Xapian::Query::OP_FILTER
,
231 Xapian::Query("paragraph"),
232 Xapian::Query("queri"));
233 query
= Xapian::Query(Xapian::Query::OP_XOR
,
234 query
, Xapian::Query("rubbish"));
236 enquire
.set_query(query
);
237 Xapian::MSet mset
= enquire
.get_mset(0, 10);
239 // We should get 50% not 33%.
241 TEST_EQUAL(mset
[0].get_percent(), 50);
246 /// Test that a search with a non-existent term doesn't get 100%.
247 DEFINE_TESTCASE(topercent5
, backend
) {
248 Xapian::Enquire
enquire(get_database("apitest_simpledata"));
249 Xapian::Query
q(Xapian::Query::OP_OR
,
250 Xapian::Query("paragraph"), Xapian::Query("xyzzy"));
251 enquire
.set_query(q
);
252 Xapian::MSet mset
= enquire
.get_mset(0, 10);
254 TEST(mset
[0].get_percent() < 100);
255 // It would be odd if the non-existent term was worth more, but in 1.0.x
256 // the top hit got 4% in this testcase. In 1.2.x it gets 50%, which is
257 // better, but >50% would be more natural.
258 TEST(mset
[0].get_percent() >= 50);
262 /// Test that OP_FILTER doesn't affect percentages.
263 // Regression test for bug#590 fixed in 1.3.1 and 1.2.10.
264 DEFINE_TESTCASE(topercent6
, backend
) {
265 Xapian::Enquire
enquire(get_database("apitest_simpledata"));
266 Xapian::Query
q(Xapian::Query::OP_OR
,
267 Xapian::Query("rubbish"), Xapian::Query("letter"));
268 enquire
.set_query(q
);
269 Xapian::MSet mset
= enquire
.get_mset(0, 10);
271 TEST(mset
[0].get_percent() < 100);
273 q
= Xapian::Query(q
.OP_FILTER
, q
, Xapian::Query("this"));
274 enquire
.set_query(q
);
275 Xapian::MSet mset2
= enquire
.get_mset(0, 10);
276 TEST(!mset2
.empty());
277 TEST_EQUAL(mset
[0].get_percent(), mset2
[0].get_percent());
282 make_topercent7_db(Xapian::WritableDatabase
&db
, const string
&)
284 for (int i
= 1; i
<= 6; ++i
) {
287 d
.add_term("boom", 2 + (i
- 4)*(i
- 2));
289 d
.add_boolean_term("XCAT122");
295 /// Test that a term with wdf always = 0 gets counted.
296 // Regression test for bug introduced in 1.2.10 by the original fix for #590,
297 // and fixed in 1.2.13 (and in trunk before 1.3.1 was released).
298 DEFINE_TESTCASE(topercent7
, generated
) {
299 Xapian::Database
db(get_database("topercent7", make_topercent7_db
));
302 q
= Xapian::Query(q
.OP_OR
, Xapian::Query("tomb"), Xapian::Query("boom"));
303 q
= Xapian::Query(q
.OP_AND
, q
, Xapian::Query("XCAT122"));
305 Xapian::Enquire
enq(db
);
307 Xapian::MSet m
= enq
.get_mset(0, 10);
309 TEST_REL(m
[0].get_percent(),>,60);
313 class ZWeight
: public Xapian::Weight
{
317 void init(double) { }
319 Weight
* clone() const {
320 return new ZWeight();
323 double get_sumpart(Xapian::termcount
,
325 Xapian::termcount
) const {
329 double get_maxpart() const {
333 double get_sumextra(Xapian::termcount doclen
,
334 Xapian::termcount
) const {
338 double get_maxextra() const {
343 /// Regression test for bug introduced in 1.3.1 and fixed in 1.3.2.
344 DEFINE_TESTCASE(checkzeromaxpartopt1
, backend
&& !remote
) {
345 Xapian::Database db
= get_database("apitest_simpledata");
346 Xapian::Enquire
enquire(db
);
347 // "this" indexes all documents, so will get replaced with MatchAll
349 const char * terms
[] = { "this", "spoken", "blank" };
350 enquire
.set_query(Xapian::Query(Xapian::Query::OP_OR
, terms
, terms
+ 3));
352 enquire
.set_weighting_scheme(wt
);
353 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
354 // No documents match all 3 terms, so the score shouldn't be 100%.
355 TEST(mset
[0].get_percent() != 100);
356 // Make sure the percentage score isn't 0 or 1 though.
357 TEST_REL(mset
[0].get_percent(), >, 1);