Don't force the user to specify the metric
[xapian.git] / xapian-core / tests / api_weight.cc
blob0e8231fff172b6f3ca5c5f3f9272a1237188a956
1 /** @file api_weight.cc
2 * @brief tests of Xapian::Weight subclasses
3 */
4 /* Copyright (C) 2012,2013 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "api_weight.h"
25 #include <cmath>
27 #include <xapian.h>
29 #include "apitest.h"
30 #include "testutils.h"
32 using namespace std;
34 // Test exception for junk after serialised weight.
35 DEFINE_TESTCASE(tradweight3, !backend) {
36 Xapian::TradWeight wt(42);
37 try {
38 Xapian::TradWeight t;
39 Xapian::TradWeight * t2 = t.unserialise(wt.serialise() + "X");
40 // Make sure we actually use the weight.
41 bool empty = t2->name().empty();
42 delete t2;
43 if (empty)
44 FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
45 FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
46 } catch (const Xapian::SerialisationError &e) {
47 // Regression test for error in exception message fixed in 1.2.11 and
48 // 1.3.1.
49 TEST(e.get_msg().find("BM25") == string::npos);
51 return true;
54 // Test Exception for junk after serialised weight.
55 DEFINE_TESTCASE(unigramlmweight3, !backend) {
56 Xapian::LMWeight wt(79898.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0.5, 1.0);
57 try {
58 Xapian::LMWeight t;
59 Xapian::LMWeight * t2 = t.unserialise(wt.serialise() + "X");
60 // Make sure we actually use the weight.
61 bool empty = t2->name().empty();
62 delete t2;
63 if (empty)
64 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
65 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
66 } catch (const Xapian::SerialisationError &e) {
67 // Good!
69 return true;
72 // Test exception for junk after serialised weight.
73 DEFINE_TESTCASE(bm25weight3, !backend) {
74 Xapian::BM25Weight wt(2.0, 0.5, 1.3, 0.6, 0.01);
75 try {
76 Xapian::BM25Weight b;
77 Xapian::BM25Weight * b2 = b.unserialise(wt.serialise() + "X");
78 // Make sure we actually use the weight.
79 bool empty = b2->name().empty();
80 delete b2;
81 if (empty)
82 FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
83 FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
84 } catch (const Xapian::SerialisationError &) {
85 // Good!
87 return true;
90 // Test parameter combinations which should be unaffected by doclength.
91 DEFINE_TESTCASE(bm25weight4, backend) {
92 Xapian::Database db = get_database("apitest_simpledata");
93 Xapian::Enquire enquire(db);
94 enquire.set_query(Xapian::Query("paragraph"));
95 Xapian::MSet mset;
97 enquire.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
98 mset = enquire.get_mset(0, 10);
99 TEST_EQUAL(mset.size(), 5);
100 // Expect: wdf has an effect on weight, but doclen doesn't.
101 TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
102 TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
103 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
104 TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
106 enquire.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
107 mset = enquire.get_mset(0, 10);
108 TEST_EQUAL(mset.size(), 5);
109 // Expect: neither wdf nor doclen affects weight.
110 TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
112 return true;
115 /// Test non-zero k2 with zero k1.
116 // Regression test for bug fixed in 1.2.17 and 1.3.2.
117 DEFINE_TESTCASE(bm25weight5, backend) {
118 Xapian::Database db = get_database("apitest_simpledata");
119 Xapian::Enquire enquire(db);
120 enquire.set_query(Xapian::Query("paragraph"));
121 Xapian::MSet mset;
123 enquire.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
124 mset = enquire.get_mset(0, 10);
125 TEST_EQUAL(mset.size(), 5);
126 // Expect: wdf has no effect on weight; shorter docs rank higher.
127 mset_expect_order(mset, 3, 5, 1, 4, 2);
128 TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[1].get_weight());
129 TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
130 TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
131 TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
133 return true;
136 // Test exception for junk after serialised weight.
137 DEFINE_TESTCASE(inl2weight1, !backend) {
138 Xapian::InL2Weight wt(2.0);
139 try {
140 Xapian::InL2Weight b;
141 Xapian::InL2Weight * b2 = b.unserialise(wt.serialise() + "X");
142 // Make sure we actually use the weight.
143 bool empty = b2->name().empty();
144 delete b2;
145 if (empty)
146 FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
147 FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
148 } catch (const Xapian::SerialisationError &) {
152 return true;
155 // Test for invalid values of c.
156 DEFINE_TESTCASE(inl2weight2, !backend) {
157 // InvalidArgumentError should be thrown if the parameter c is invalid.
158 TEST_EXCEPTION(Xapian::InvalidArgumentError,
159 Xapian::InL2Weight wt(-2.0));
161 TEST_EXCEPTION(Xapian::InvalidArgumentError,
162 Xapian::InL2Weight wt2(0.0));
164 /* Parameter c should be set to 1.0 by constructor if none is given. */
165 Xapian::InL2Weight weight2;
166 TEST_EQUAL(weight2.serialise(), Xapian::InL2Weight(1.0).serialise());
168 return true;
171 // Feature tests for Inl2Weight
172 DEFINE_TESTCASE(inl2weight3, backend) {
173 Xapian::Database db = get_database("apitest_simpledata");
174 Xapian::Enquire enquire(db);
175 Xapian::Query query("banana");
177 enquire.set_query(query);
178 enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));
180 Xapian::MSet mset1;
181 mset1 = enquire.get_mset(0, 10);
182 TEST_EQUAL(mset1.size(), 1);
183 mset_expect_order(mset1, 6);
185 /* The value has been calculated in the python interpreter by looking at the
186 * database statistics. */
187 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.559711143842063);
189 // Test with OP_SCALE_WEIGHT.
190 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
191 enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));
193 Xapian::MSet mset2;
194 mset2 = enquire.get_mset(0, 10);
195 TEST_EQUAL(mset2.size(), 1);
196 TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
198 return true;
201 // Test exception for junk after serialised weight.
202 DEFINE_TESTCASE(ifb2weight1, !backend) {
203 Xapian::IfB2Weight wt(2.0);
204 try {
205 Xapian::IfB2Weight b;
206 Xapian::IfB2Weight * b2 = b.unserialise(wt.serialise() + "X");
207 // Make sure we actually use the weight.
208 bool empty = b2->name().empty();
209 delete b2;
210 if (empty)
211 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
212 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
213 } catch (const Xapian::SerialisationError &) {
216 return true;
219 // Test for invalid values of c.
220 DEFINE_TESTCASE(ifb2weight2, !backend) {
221 // InvalidArgumentError should be thrown if the parameter c is invalid.
222 TEST_EXCEPTION(Xapian::InvalidArgumentError,
223 Xapian::IfB2Weight wt(-2.0));
225 TEST_EXCEPTION(Xapian::InvalidArgumentError,
226 Xapian::IfB2Weight wt2(0.0));
228 /* Parameter c should be set to 1.0 by constructor if none is given. */
229 Xapian::IfB2Weight weight2;
230 TEST_EQUAL(weight2.serialise(), Xapian::IfB2Weight(1.0).serialise());
232 return true;
235 // Feature test
236 DEFINE_TESTCASE(ifb2weight3, backend) {
237 Xapian::Database db = get_database("apitest_simpledata");
238 Xapian::Enquire enquire(db);
239 Xapian::Query query("banana");
241 enquire.set_query(query);
242 enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));
244 Xapian::MSet mset1;
245 mset1 = enquire.get_mset(0, 10);
246 TEST_EQUAL(mset1.size(), 1);
248 /* The value of the weight has been manually calculated using the statistics
249 * of the test database. */
250 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.119422287684126);
252 // Test with OP_SCALE_WEIGHT.
253 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
254 enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));
256 Xapian::MSet mset2;
257 mset2 = enquire.get_mset(0, 10);
258 TEST_EQUAL(mset2.size(), 1);
259 TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
261 return true;
264 // Test exception for junk after serialised weight.
265 DEFINE_TESTCASE(ineb2weight1, !backend) {
266 Xapian::IneB2Weight wt(2.0);
267 try {
268 Xapian::IneB2Weight b;
269 Xapian::IneB2Weight * b2 = b.unserialise(wt.serialise() + "X");
270 // Make sure we actually use the weight.
271 bool empty = b2->name().empty();
272 delete b2;
273 if (empty)
274 FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
275 FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
276 } catch (const Xapian::SerialisationError &) {
280 return true;
283 // Test for invalid values of c.
284 DEFINE_TESTCASE(ineb2weight2, !backend) {
285 // InvalidArgumentError should be thrown if parameter c is invalid.
286 TEST_EXCEPTION(Xapian::InvalidArgumentError,
287 Xapian::IneB2Weight wt(-2.0));
289 TEST_EXCEPTION(Xapian::InvalidArgumentError,
290 Xapian::IneB2Weight wt2(0.0));
292 /* Parameter c should be set to 1.0 by constructor if none is given. */
293 Xapian::IneB2Weight weight2;
294 TEST_EQUAL(weight2.serialise(), Xapian::IneB2Weight(1.0).serialise());
296 return true;
299 // Feature test.
300 DEFINE_TESTCASE(ineb2weight3, backend) {
301 Xapian::Database db = get_database("apitest_simpledata");
302 Xapian::Enquire enquire(db);
303 Xapian::Query query("paragraph");
304 enquire.set_query(query);
305 enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));
307 Xapian::MSet mset1;
308 mset1 = enquire.get_mset(0, 10);
309 TEST_EQUAL(mset1.size(), 5);
311 // The third document in the database is 4th in the ranking.
312 /* The weight value has been manually calculated by using the statistics
313 * of the test database. */
314 TEST_EQUAL_DOUBLE(mset1[4].get_weight(), 0.61709730297692400036);
316 // Test with OP_SCALE_WEIGHT.
317 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
318 enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));
320 Xapian::MSet mset2;
321 mset2 = enquire.get_mset(0, 10);
322 TEST_EQUAL(mset2.size(), 5);
324 for (int i = 0; i < 5; ++i) {
325 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
328 return true;
331 // Test exception for junk after serialised weight.
332 DEFINE_TESTCASE(bb2weight1, !backend) {
333 Xapian::BB2Weight wt(2.0);
334 try {
335 Xapian::BB2Weight b;
336 Xapian::BB2Weight * b2 = b.unserialise(wt.serialise() + "X");
337 // Make sure we actually use the weight.
338 bool empty = b2->name().empty();
339 delete b2;
340 if (empty)
341 FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
342 FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
343 } catch (const Xapian::SerialisationError &) {
346 return true;
349 // Test for invalid values of c.
350 DEFINE_TESTCASE(bb2weight2, !backend) {
351 // InvalidArgumentError should be thrown if the parameter c is invalid.
352 TEST_EXCEPTION(Xapian::InvalidArgumentError,
353 Xapian::BB2Weight wt(-2.0));
355 TEST_EXCEPTION(Xapian::InvalidArgumentError,
356 Xapian::BB2Weight wt2(0.0));
358 /* Parameter c should be set to 1.0 by constructor if none is given. */
359 Xapian::BB2Weight weight2;
360 TEST_EQUAL(weight2.serialise(), Xapian::BB2Weight(1.0).serialise());
362 return true;
365 // Feature test
366 DEFINE_TESTCASE(bb2weight3, backend) {
367 Xapian::Database db = get_database("apitest_simpledata");
368 Xapian::Enquire enquire(db);
369 Xapian::Query query("paragraph");
371 enquire.set_query(query);
372 enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
374 Xapian::MSet mset1;
375 mset1 = enquire.get_mset(0, 10);
376 TEST_EQUAL(mset1.size(), 5);
377 /* The third document in the database has the highest weight and is the
378 * first in the mset. */
379 // Value calculated manually by using the statistics of the test database.
380 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.6823696969784483);
382 // Test with OP_SCALE_WEIGHT.
383 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
384 enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
386 Xapian::MSet mset2;
387 mset2 = enquire.get_mset(0, 10);
388 TEST_EQUAL(mset2.size(), 5);
390 for (int i = 0; i < 5; ++i) {
391 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
394 // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
395 // were applying the factor to the upper bound twice).
396 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 1.0/1024));
397 enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
399 Xapian::MSet mset3;
400 mset3 = enquire.get_mset(0, 10);
401 TEST_EQUAL(mset3.size(), 5);
403 for (int i = 0; i < 5; ++i) {
404 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight() * 1024);
407 return true;
410 // Regression test: we used to calculate log2(0) when there was only one doc.
411 DEFINE_TESTCASE(bb2weight4, backend) {
412 Xapian::Database db = get_database("apitest_onedoc");
413 Xapian::Enquire enquire(db);
414 Xapian::Query query("word");
416 enquire.set_query(query);
417 enquire.set_weighting_scheme(Xapian::BB2Weight());
419 Xapian::MSet mset1;
420 mset1 = enquire.get_mset(0, 10);
421 TEST_EQUAL(mset1.size(), 1);
422 // Zero weight is a bit bogus, but what we currently give.
423 TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0);
425 return true;
428 // Feature test.
429 DEFINE_TESTCASE(dlhweight1, backend) {
430 Xapian::Database db = get_database("apitest_simpledata");
431 Xapian::Enquire enquire(db);
432 Xapian::Query query("paragraph");
434 enquire.set_query(query);
435 enquire.set_weighting_scheme(Xapian::DLHWeight());
437 Xapian::MSet mset1;
438 mset1 = enquire.get_mset(0, 10);
439 TEST_EQUAL(mset1.size(), 5);
440 /* Weight has been calculated manually by obtaining the statistics from the
441 * database.*/
442 TEST_EQUAL_DOUBLE(mset1[0].get_weight() - mset1[4].get_weight(), 1.17790202016936130);
444 // Test with OP_SCALE_WEIGHT.
445 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
446 enquire.set_weighting_scheme(Xapian::DLHWeight());
448 Xapian::MSet mset2;
449 mset2 = enquire.get_mset(0, 10);
450 TEST_EQUAL(mset2.size(), 5);
452 for (int i = 0; i < 5; ++i) {
453 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
456 return true;
459 // Test exception for junk after serialised weight.
460 DEFINE_TESTCASE(pl2weight1, !backend) {
461 Xapian::PL2Weight wt(2.0);
462 try {
463 Xapian::PL2Weight b;
464 Xapian::PL2Weight * b2 = b.unserialise(wt.serialise() + "X");
465 // Make sure we actually use the weight.
466 bool empty = b2->name().empty();
467 delete b2;
468 if (empty)
469 FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
470 FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
471 } catch (const Xapian::SerialisationError &) {
474 return true;
477 // Test for invalid values of c.
478 DEFINE_TESTCASE(pl2weight2, !backend) {
479 // InvalidArgumentError should be thrown if parameter c is invalid.
480 TEST_EXCEPTION(Xapian::InvalidArgumentError,
481 Xapian::PL2Weight wt(-2.0));
483 /* Parameter c should be set to 1.0 by constructor if none is given. */
484 Xapian::PL2Weight weight2;
485 TEST_EQUAL(weight2.serialise(), Xapian::PL2Weight(1.0).serialise());
487 return true;
490 // Feature Test.
491 DEFINE_TESTCASE(pl2weight3, backend) {
492 Xapian::Database db = get_database("apitest_simpledata");
493 Xapian::Enquire enquire(db);
494 enquire.set_query(Xapian::Query("paragraph"));
495 Xapian::MSet mset;
497 enquire.set_weighting_scheme(Xapian::PL2Weight(2.0));
498 mset = enquire.get_mset(0, 10);
499 TEST_EQUAL(mset.size(), 5);
500 // Expected weight difference calculated in extended precision using stats
501 // from the test database.
502 TEST_EQUAL_DOUBLE(mset[2].get_weight(),
503 mset[3].get_weight() + 0.0086861771701328694);
505 return true;
508 // Feature test
509 DEFINE_TESTCASE(dphweight1, backend) {
510 Xapian::Database db = get_database("apitest_simpledata");
511 Xapian::Enquire enquire(db);
512 Xapian::Query query("paragraph");
514 enquire.set_query(query);
515 enquire.set_weighting_scheme(Xapian::DPHWeight());
517 Xapian::MSet mset1;
518 mset1 = enquire.get_mset(0, 10);
519 TEST_EQUAL(mset1.size(), 5);
520 /* The weight has been calculated manually by using the statistics of the
521 * test database. */
522 TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
524 // Test with OP_SCALE_WEIGHT.
525 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
526 enquire.set_weighting_scheme(Xapian::DPHWeight());
528 Xapian::MSet mset2;
529 mset2 = enquire.get_mset(0, 10);
530 TEST_EQUAL(mset2.size(), 5);
531 for (int i = 0; i < 5; ++i) {
532 TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
535 return true;
538 // Test for various cases of normalization string.
539 DEFINE_TESTCASE(tfidfweight1, !backend) {
540 // InvalidArgumentError should be thrown if normalization string is invalid
541 TEST_EXCEPTION(Xapian::InvalidArgumentError,
542 Xapian::TfIdfWeight b("JOHN_LENNON"));
544 TEST_EXCEPTION(Xapian::InvalidArgumentError,
545 Xapian::TfIdfWeight b("LOL"));
547 /* Normalization string should be set to "ntn" by constructor if none is
548 given. */
549 Xapian::TfIdfWeight weight2;
550 TEST_EQUAL(weight2.serialise(), Xapian::TfIdfWeight("ntn").serialise());
552 return true;
555 // Test exception for junk after serialised weight.
556 DEFINE_TESTCASE(tfidfweight2, !backend) {
557 Xapian::TfIdfWeight wt("ntn");
558 try {
559 Xapian::TfIdfWeight b;
560 Xapian::TfIdfWeight * b2 = b.unserialise(wt.serialise() + "X");
561 // Make sure we actually use the weight.
562 bool empty = b2->name().empty();
563 delete b2;
564 if (empty)
565 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
566 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
567 } catch (const Xapian::SerialisationError &) {
570 return true;
573 // Feature tests for various normalization functions.
574 DEFINE_TESTCASE(tfidfweight3, backend) {
575 Xapian::Database db = get_database("apitest_simpledata");
576 Xapian::Enquire enquire(db);
577 Xapian::Query query("word");
578 Xapian::MSet mset;
580 // Check for "ntn" when termfreq != N
581 enquire.set_query(query);
582 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
583 mset = enquire.get_mset(0, 10);
584 TEST_EQUAL(mset.size(), 2);
585 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
586 mset_expect_order(mset, 2, 4);
587 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * log(6.0 / 2));
589 // Test with OP_SCALE_WEIGHT.
590 enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
591 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
592 Xapian::MSet mset2;
593 mset2 = enquire.get_mset(0, 10);
594 TEST_EQUAL(mset2.size(), 2);
595 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
596 mset_expect_order(mset2, 2, 4);
597 TEST_EQUAL_DOUBLE(15 * mset[0].get_weight(), mset2[0].get_weight());
599 // Check for "bnn" and for both branches of 'b'.
600 enquire.set_query(Xapian::Query("test"));
601 enquire.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
602 mset = enquire.get_mset(0, 10);
603 TEST_EQUAL(mset.size(), 1);
604 mset_expect_order(mset, 1);
605 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1.0);
607 // Check for "lnn" and for both branches of 'l'.
608 enquire.set_query(Xapian::Query("word"));
609 enquire.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
610 mset = enquire.get_mset(0, 10);
611 TEST_EQUAL(mset.size(), 2);
612 mset_expect_order(mset, 2, 4);
613 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
614 TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1.0); // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
616 // Check for "snn"
617 enquire.set_query(Xapian::Query("paragraph"));
618 enquire.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
619 mset = enquire.get_mset(0, 10);
620 TEST_EQUAL(mset.size(), 5);
621 mset_expect_order(mset, 2, 1, 4, 3, 5);
622 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 9.0);
623 TEST_EQUAL_DOUBLE(mset[4].get_weight(), 1.0);
625 // Check for "ntn" when termfreq=N
626 enquire.set_query(Xapian::Query("this")); // N=termfreq amd so idfn=0 for "t"
627 enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
628 mset = enquire.get_mset(0, 10);
629 TEST_EQUAL(mset.size(), 6);
630 mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
631 for (int i = 0; i < 6; ++i) {
632 TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
635 // Check for "npn" and for both branches of 'p'
636 enquire.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "p"
637 enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
638 mset = enquire.get_mset(0, 10);
639 TEST_EQUAL(mset.size(), 6);
640 mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
641 for (int i = 0; i < 6; ++i) {
642 TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
645 enquire.set_query(Xapian::Query("word"));
646 enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
647 mset = enquire.get_mset(0, 10);
648 TEST_EQUAL(mset.size(), 2);
649 mset_expect_order(mset, 2, 4);
650 TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8 * log((6.0 - 2) / 2));
651 TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1 * log((6.0 - 2) / 2));
653 return true;
656 class CheckInitWeight : public Xapian::Weight {
657 public:
658 double factor;
660 unsigned & zero_inits, & non_zero_inits;
662 CheckInitWeight(unsigned &z, unsigned &n)
663 : factor(-1.0), zero_inits(z), non_zero_inits(n) { }
665 void init(double factor_) {
666 factor = factor_;
667 if (factor == 0.0)
668 ++zero_inits;
669 else
670 ++non_zero_inits;
673 Weight * clone() const {
674 return new CheckInitWeight(zero_inits, non_zero_inits);
677 double get_sumpart(Xapian::termcount, Xapian::termcount,
678 Xapian::termcount) const {
679 return 1.0;
682 double get_maxpart() const { return 1.0; }
684 double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const {
685 return 1.0 / doclen;
688 double get_maxextra() const { return 1.0; }
691 /// Regression test - check init() is called for the term-indep Weight obj.
692 DEFINE_TESTCASE(checkinitweight1, backend && !multi && !remote) {
693 Xapian::Database db = get_database("apitest_simpledata");
694 Xapian::Enquire enquire(db);
695 Xapian::Query q(Xapian::Query::OP_AND,
696 Xapian::Query("this"), Xapian::Query("paragraph"));
697 enquire.set_query(q);
698 unsigned zero_inits = 0, non_zero_inits = 0;
699 CheckInitWeight wt(zero_inits, non_zero_inits);
700 enquire.set_weighting_scheme(wt);
701 Xapian::MSet mset = enquire.get_mset(0, 3);
702 TEST_EQUAL(zero_inits, 1);
703 TEST_EQUAL(non_zero_inits, 2);
704 return true;
707 class CheckStatsWeight : public Xapian::Weight {
708 public:
709 double factor;
711 Xapian::Database db;
713 string term;
715 Xapian::termcount & sum;
716 Xapian::termcount & sum_squares;
718 mutable Xapian::termcount len_upper;
719 mutable Xapian::termcount len_lower;
720 mutable Xapian::termcount wdf_upper;
722 CheckStatsWeight(const Xapian::Database & db_,
723 const string & term_,
724 Xapian::termcount & sum_,
725 Xapian::termcount & sum_squares_)
726 : factor(-1.0), db(db_), term(term_),
727 sum(sum_), sum_squares(sum_squares_),
728 len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
730 need_stat(COLLECTION_SIZE);
731 need_stat(RSET_SIZE);
732 need_stat(AVERAGE_LENGTH);
733 need_stat(TERMFREQ);
734 need_stat(RELTERMFREQ);
735 need_stat(QUERY_LENGTH);
736 need_stat(WQF);
737 need_stat(WDF);
738 need_stat(DOC_LENGTH);
739 need_stat(DOC_LENGTH_MIN);
740 need_stat(DOC_LENGTH_MAX);
741 need_stat(WDF_MAX);
742 need_stat(COLLECTION_FREQ);
743 need_stat(UNIQUE_TERMS);
746 void init(double factor_) {
747 factor = factor_;
750 Weight * clone() const {
751 return new CheckStatsWeight(db, term, sum, sum_squares);
754 double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen,
755 Xapian::termcount uniqueterms) const {
756 TEST_EQUAL(get_collection_size(), db.get_doccount());
757 TEST_EQUAL(get_collection_freq(), db.get_collection_freq(term));
758 TEST_EQUAL(get_rset_size(), 0);
759 TEST_EQUAL(get_average_length(), db.get_avlength());
760 TEST_EQUAL(get_termfreq(), db.get_termfreq(term));
761 TEST_EQUAL(get_reltermfreq(), 0);
762 TEST_EQUAL(get_query_length(), 1);
763 TEST_EQUAL(get_wqf(), 1);
764 TEST_REL(doclen,>=,len_lower);
765 TEST_REL(doclen,<=,len_upper);
766 TEST_REL(uniqueterms,>=,1);
767 TEST_REL(uniqueterms,<=,doclen);
768 TEST_REL(wdf,<=,wdf_upper);
769 sum += wdf;
770 sum_squares += wdf * wdf;
771 return 1.0;
774 double get_maxpart() const {
775 if (len_upper == 0) {
776 len_lower = get_doclength_lower_bound();
777 len_upper = get_doclength_upper_bound();
778 wdf_upper = get_wdf_upper_bound();
780 return 1.0;
783 double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const {
784 return 1.0 / doclen;
787 double get_maxextra() const { return 1.0; }
790 /// Check the weight subclass gets the correct stats.
791 DEFINE_TESTCASE(checkstatsweight1, backend && !remote) {
792 Xapian::Database db = get_database("apitest_simpledata");
793 Xapian::Enquire enquire(db);
794 Xapian::TermIterator a;
795 for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
796 const string & term = *a;
797 enquire.set_query(Xapian::Query(term));
798 Xapian::termcount sum = 0;
799 Xapian::termcount sum_squares = 0;
800 CheckStatsWeight wt(db, term, sum, sum_squares);
801 enquire.set_weighting_scheme(wt);
802 Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
804 // The document order in the multi-db case isn't the same as the
805 // postlist order on the combined DB, so it's hard to compare the
806 // wdf for each document in the Weight objects, so we can sum
807 // the wdfs and the squares of the wdfs which provides a decent
808 // check that we're not getting the wrong wdf values (it ensures
809 // they have the right mean and standard deviation).
810 Xapian::termcount expected_sum = 0;
811 Xapian::termcount expected_sum_squares = 0;
812 Xapian::PostingIterator i;
813 for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
814 Xapian::termcount wdf = i.get_wdf();
815 expected_sum += wdf;
816 expected_sum_squares += wdf * wdf;
818 TEST_EQUAL(sum, expected_sum);
819 TEST_EQUAL(sum_squares, expected_sum_squares);
821 return true;
824 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
825 DEFINE_TESTCASE(unigramlmweight4, backend) {
826 Xapian::Database db = get_database("apitest_simpledata");
827 Xapian::Enquire enquire1(db);
828 Xapian::Enquire enquire2(db);
829 enquire1.set_query(Xapian::Query("paragraph"));
830 Xapian::MSet mset1;
831 enquire2.set_query(Xapian::Query("paragraph"));
832 Xapian::MSet mset2;
833 // 5 documents available with term paragraph so mset size should be 5
834 enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING, 1, 0));
835 enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 1, 0));
836 mset1 = enquire1.get_mset(0, 10);
837 mset2 = enquire2.get_mset(0, 10);
839 TEST_EQUAL(mset1.size(), 5);
840 TEST_EQUAL_DOUBLE(mset1[1].get_weight(), mset2[1].get_weight());
841 return true;
844 /* Test for checking if we don't use smoothing all
845 * of them should give same result i.e wdf_double/len_double */
846 DEFINE_TESTCASE(unigramlmweight5, backend) {
847 Xapian::Database db = get_database("apitest_simpledata");
848 Xapian::Enquire enquire1(db);
849 Xapian::Enquire enquire2(db);
850 Xapian::Enquire enquire3(db);
851 Xapian::Enquire enquire4(db);
852 enquire1.set_query(Xapian::Query("paragraph"));
853 Xapian::MSet mset1;
854 enquire2.set_query(Xapian::Query("paragraph"));
855 Xapian::MSet mset2;
856 enquire3.set_query(Xapian::Query("paragraph"));
857 Xapian::MSet mset3;
858 enquire4.set_query(Xapian::Query("paragraph"));
859 Xapian::MSet mset4;
860 // 5 documents available with term paragraph so mset size should be 5
861 enquire1.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING, 0, 0));
862 enquire2.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0, 0));
863 enquire3.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING, 0, 0));
864 enquire4.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING, 0, 0));
866 mset1 = enquire1.get_mset(0, 10);
867 mset2 = enquire2.get_mset(0, 10);
868 mset3 = enquire3.get_mset(0, 10);
869 mset4 = enquire4.get_mset(0, 10);
871 TEST_EQUAL(mset1.size(), 5);
872 TEST_EQUAL(mset2.size(), 5);
873 TEST_EQUAL(mset3.size(), 5);
874 TEST_EQUAL(mset4.size(), 5);
875 for (size_t i = 0; i < 5; i++) {
876 TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset4[i].get_weight());
877 TEST_EQUAL_DOUBLE(mset2[i].get_weight(), mset4[i].get_weight());
878 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset2[i].get_weight());
879 TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset2[i].get_weight());
880 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset4[i].get_weight());
881 TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight());
883 return true;