1 /** @file api_weight.cc
2 * @brief tests of Xapian::Weight subclasses
4 /* Copyright (C) 2012,2013 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "api_weight.h"
30 #include "testutils.h"
34 // Test exception for junk after serialised weight.
35 DEFINE_TESTCASE(tradweight3
, !backend
) {
36 Xapian::TradWeight
wt(42);
39 Xapian::TradWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
40 // Make sure we actually use the weight.
41 bool empty
= t2
->name().empty();
44 FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
45 FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
46 } catch (const Xapian::SerialisationError
&e
) {
47 // Regression test for error in exception message fixed in 1.2.11 and
49 TEST(e
.get_msg().find("BM25") == string::npos
);
54 // Test Exception for junk after serialised weight.
55 DEFINE_TESTCASE(unigramlmweight3
, !backend
) {
56 Xapian::LMWeight
wt(79898.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 0.5, 1.0);
59 Xapian::LMWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
60 // Make sure we actually use the weight.
61 bool empty
= t2
->name().empty();
64 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
65 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
66 } catch (const Xapian::SerialisationError
&e
) {
72 // Test exception for junk after serialised weight.
73 DEFINE_TESTCASE(bm25weight3
, !backend
) {
74 Xapian::BM25Weight
wt(2.0, 0.5, 1.3, 0.6, 0.01);
77 Xapian::BM25Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
78 // Make sure we actually use the weight.
79 bool empty
= b2
->name().empty();
82 FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
83 FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
84 } catch (const Xapian::SerialisationError
&) {
90 // Test parameter combinations which should be unaffected by doclength.
91 DEFINE_TESTCASE(bm25weight4
, backend
) {
92 Xapian::Database db
= get_database("apitest_simpledata");
93 Xapian::Enquire
enquire(db
);
94 enquire
.set_query(Xapian::Query("paragraph"));
97 enquire
.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
98 mset
= enquire
.get_mset(0, 10);
99 TEST_EQUAL(mset
.size(), 5);
100 // Expect: wdf has an effect on weight, but doclen doesn't.
101 TEST_REL(mset
[0].get_weight(),>,mset
[1].get_weight());
102 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), mset
[2].get_weight());
103 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
104 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), mset
[4].get_weight());
106 enquire
.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
107 mset
= enquire
.get_mset(0, 10);
108 TEST_EQUAL(mset
.size(), 5);
109 // Expect: neither wdf nor doclen affects weight.
110 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[4].get_weight());
115 /// Test non-zero k2 with zero k1.
116 // Regression test for bug fixed in 1.2.17 and 1.3.2.
117 DEFINE_TESTCASE(bm25weight5
, backend
) {
118 Xapian::Database db
= get_database("apitest_simpledata");
119 Xapian::Enquire
enquire(db
);
120 enquire
.set_query(Xapian::Query("paragraph"));
123 enquire
.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
124 mset
= enquire
.get_mset(0, 10);
125 TEST_EQUAL(mset
.size(), 5);
126 // Expect: wdf has no effect on weight; shorter docs rank higher.
127 mset_expect_order(mset
, 3, 5, 1, 4, 2);
128 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[1].get_weight());
129 TEST_REL(mset
[1].get_weight(),>,mset
[2].get_weight());
130 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
131 TEST_REL(mset
[3].get_weight(),>,mset
[4].get_weight());
136 // Test exception for junk after serialised weight.
137 DEFINE_TESTCASE(inl2weight1
, !backend
) {
138 Xapian::InL2Weight
wt(2.0);
140 Xapian::InL2Weight b
;
141 Xapian::InL2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
142 // Make sure we actually use the weight.
143 bool empty
= b2
->name().empty();
146 FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
147 FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
148 } catch (const Xapian::SerialisationError
&) {
155 // Test for invalid values of c.
156 DEFINE_TESTCASE(inl2weight2
, !backend
) {
157 // InvalidArgumentError should be thrown if the parameter c is invalid.
158 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
159 Xapian::InL2Weight
wt(-2.0));
161 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
162 Xapian::InL2Weight
wt2(0.0));
164 /* Parameter c should be set to 1.0 by constructor if none is given. */
165 Xapian::InL2Weight weight2
;
166 TEST_EQUAL(weight2
.serialise(), Xapian::InL2Weight(1.0).serialise());
171 // Feature tests for Inl2Weight
172 DEFINE_TESTCASE(inl2weight3
, backend
) {
173 Xapian::Database db
= get_database("apitest_simpledata");
174 Xapian::Enquire
enquire(db
);
175 Xapian::Query
query("banana");
177 enquire
.set_query(query
);
178 enquire
.set_weighting_scheme(Xapian::InL2Weight(2.0));
181 mset1
= enquire
.get_mset(0, 10);
182 TEST_EQUAL(mset1
.size(), 1);
183 mset_expect_order(mset1
, 6);
185 /* The value has been calculated in the python interpreter by looking at the
186 * database statistics. */
187 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.559711143842063);
189 // Test with OP_SCALE_WEIGHT.
190 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
191 enquire
.set_weighting_scheme(Xapian::InL2Weight(2.0));
194 mset2
= enquire
.get_mset(0, 10);
195 TEST_EQUAL(mset2
.size(), 1);
196 TEST_EQUAL_DOUBLE(15.0 * mset1
[0].get_weight(), mset2
[0].get_weight());
201 // Test exception for junk after serialised weight.
202 DEFINE_TESTCASE(ifb2weight1
, !backend
) {
203 Xapian::IfB2Weight
wt(2.0);
205 Xapian::IfB2Weight b
;
206 Xapian::IfB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
207 // Make sure we actually use the weight.
208 bool empty
= b2
->name().empty();
211 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
212 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
213 } catch (const Xapian::SerialisationError
&) {
219 // Test for invalid values of c.
220 DEFINE_TESTCASE(ifb2weight2
, !backend
) {
221 // InvalidArgumentError should be thrown if the parameter c is invalid.
222 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
223 Xapian::IfB2Weight
wt(-2.0));
225 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
226 Xapian::IfB2Weight
wt2(0.0));
228 /* Parameter c should be set to 1.0 by constructor if none is given. */
229 Xapian::IfB2Weight weight2
;
230 TEST_EQUAL(weight2
.serialise(), Xapian::IfB2Weight(1.0).serialise());
236 DEFINE_TESTCASE(ifb2weight3
, backend
) {
237 Xapian::Database db
= get_database("apitest_simpledata");
238 Xapian::Enquire
enquire(db
);
239 Xapian::Query
query("banana");
241 enquire
.set_query(query
);
242 enquire
.set_weighting_scheme(Xapian::IfB2Weight(2.0));
245 mset1
= enquire
.get_mset(0, 10);
246 TEST_EQUAL(mset1
.size(), 1);
248 /* The value of the weight has been manually calculated using the statistics
249 * of the test database. */
250 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 3.119422287684126);
252 // Test with OP_SCALE_WEIGHT.
253 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
254 enquire
.set_weighting_scheme(Xapian::IfB2Weight(2.0));
257 mset2
= enquire
.get_mset(0, 10);
258 TEST_EQUAL(mset2
.size(), 1);
259 TEST_EQUAL_DOUBLE(15.0 * mset1
[0].get_weight(), mset2
[0].get_weight());
264 // Test exception for junk after serialised weight.
265 DEFINE_TESTCASE(ineb2weight1
, !backend
) {
266 Xapian::IneB2Weight
wt(2.0);
268 Xapian::IneB2Weight b
;
269 Xapian::IneB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
270 // Make sure we actually use the weight.
271 bool empty
= b2
->name().empty();
274 FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
275 FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
276 } catch (const Xapian::SerialisationError
&) {
283 // Test for invalid values of c.
284 DEFINE_TESTCASE(ineb2weight2
, !backend
) {
285 // InvalidArgumentError should be thrown if parameter c is invalid.
286 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
287 Xapian::IneB2Weight
wt(-2.0));
289 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
290 Xapian::IneB2Weight
wt2(0.0));
292 /* Parameter c should be set to 1.0 by constructor if none is given. */
293 Xapian::IneB2Weight weight2
;
294 TEST_EQUAL(weight2
.serialise(), Xapian::IneB2Weight(1.0).serialise());
300 DEFINE_TESTCASE(ineb2weight3
, backend
) {
301 Xapian::Database db
= get_database("apitest_simpledata");
302 Xapian::Enquire
enquire(db
);
303 Xapian::Query
query("paragraph");
304 enquire
.set_query(query
);
305 enquire
.set_weighting_scheme(Xapian::IneB2Weight(2.0));
308 mset1
= enquire
.get_mset(0, 10);
309 TEST_EQUAL(mset1
.size(), 5);
311 // The third document in the database is 4th in the ranking.
312 /* The weight value has been manually calculated by using the statistics
313 * of the test database. */
314 TEST_EQUAL_DOUBLE(mset1
[4].get_weight(), 0.61709730297692400036);
316 // Test with OP_SCALE_WEIGHT.
317 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
318 enquire
.set_weighting_scheme(Xapian::IneB2Weight(2.0));
321 mset2
= enquire
.get_mset(0, 10);
322 TEST_EQUAL(mset2
.size(), 5);
324 for (int i
= 0; i
< 5; ++i
) {
325 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
331 // Test exception for junk after serialised weight.
332 DEFINE_TESTCASE(bb2weight1
, !backend
) {
333 Xapian::BB2Weight
wt(2.0);
336 Xapian::BB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
337 // Make sure we actually use the weight.
338 bool empty
= b2
->name().empty();
341 FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
342 FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
343 } catch (const Xapian::SerialisationError
&) {
349 // Test for invalid values of c.
350 DEFINE_TESTCASE(bb2weight2
, !backend
) {
351 // InvalidArgumentError should be thrown if the parameter c is invalid.
352 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
353 Xapian::BB2Weight
wt(-2.0));
355 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
356 Xapian::BB2Weight
wt2(0.0));
358 /* Parameter c should be set to 1.0 by constructor if none is given. */
359 Xapian::BB2Weight weight2
;
360 TEST_EQUAL(weight2
.serialise(), Xapian::BB2Weight(1.0).serialise());
366 DEFINE_TESTCASE(bb2weight3
, backend
) {
367 Xapian::Database db
= get_database("apitest_simpledata");
368 Xapian::Enquire
enquire(db
);
369 Xapian::Query
query("paragraph");
371 enquire
.set_query(query
);
372 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
375 mset1
= enquire
.get_mset(0, 10);
376 TEST_EQUAL(mset1
.size(), 5);
377 /* The third document in the database has the highest weight and is the
378 * first in the mset. */
379 // Value calculated manually by using the statistics of the test database.
380 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.6823696969784483);
382 // Test with OP_SCALE_WEIGHT.
383 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
384 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
387 mset2
= enquire
.get_mset(0, 10);
388 TEST_EQUAL(mset2
.size(), 5);
390 for (int i
= 0; i
< 5; ++i
) {
391 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
394 // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
395 // were applying the factor to the upper bound twice).
396 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 1.0/1024));
397 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
400 mset3
= enquire
.get_mset(0, 10);
401 TEST_EQUAL(mset3
.size(), 5);
403 for (int i
= 0; i
< 5; ++i
) {
404 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset3
[i
].get_weight() * 1024);
410 // Regression test: we used to calculate log2(0) when there was only one doc.
411 DEFINE_TESTCASE(bb2weight4
, backend
) {
412 Xapian::Database db
= get_database("apitest_onedoc");
413 Xapian::Enquire
enquire(db
);
414 Xapian::Query
query("word");
416 enquire
.set_query(query
);
417 enquire
.set_weighting_scheme(Xapian::BB2Weight());
420 mset1
= enquire
.get_mset(0, 10);
421 TEST_EQUAL(mset1
.size(), 1);
422 // Zero weight is a bit bogus, but what we currently give.
423 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 0);
429 DEFINE_TESTCASE(dlhweight1
, backend
) {
430 Xapian::Database db
= get_database("apitest_simpledata");
431 Xapian::Enquire
enquire(db
);
432 Xapian::Query
query("paragraph");
434 enquire
.set_query(query
);
435 enquire
.set_weighting_scheme(Xapian::DLHWeight());
438 mset1
= enquire
.get_mset(0, 10);
439 TEST_EQUAL(mset1
.size(), 5);
440 /* Weight has been calculated manually by obtaining the statistics from the
442 TEST_EQUAL_DOUBLE(mset1
[0].get_weight() - mset1
[4].get_weight(), 1.17790202016936130);
444 // Test with OP_SCALE_WEIGHT.
445 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
446 enquire
.set_weighting_scheme(Xapian::DLHWeight());
449 mset2
= enquire
.get_mset(0, 10);
450 TEST_EQUAL(mset2
.size(), 5);
452 for (int i
= 0; i
< 5; ++i
) {
453 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
459 // Test exception for junk after serialised weight.
460 DEFINE_TESTCASE(pl2weight1
, !backend
) {
461 Xapian::PL2Weight
wt(2.0);
464 Xapian::PL2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
465 // Make sure we actually use the weight.
466 bool empty
= b2
->name().empty();
469 FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
470 FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
471 } catch (const Xapian::SerialisationError
&) {
477 // Test for invalid values of c.
478 DEFINE_TESTCASE(pl2weight2
, !backend
) {
479 // InvalidArgumentError should be thrown if parameter c is invalid.
480 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
481 Xapian::PL2Weight
wt(-2.0));
483 /* Parameter c should be set to 1.0 by constructor if none is given. */
484 Xapian::PL2Weight weight2
;
485 TEST_EQUAL(weight2
.serialise(), Xapian::PL2Weight(1.0).serialise());
491 DEFINE_TESTCASE(pl2weight3
, backend
) {
492 Xapian::Database db
= get_database("apitest_simpledata");
493 Xapian::Enquire
enquire(db
);
494 enquire
.set_query(Xapian::Query("paragraph"));
497 enquire
.set_weighting_scheme(Xapian::PL2Weight(2.0));
498 mset
= enquire
.get_mset(0, 10);
499 TEST_EQUAL(mset
.size(), 5);
500 // Expected weight difference calculated in extended precision using stats
501 // from the test database.
502 TEST_EQUAL_DOUBLE(mset
[2].get_weight(),
503 mset
[3].get_weight() + 0.0086861771701328694);
509 DEFINE_TESTCASE(dphweight1
, backend
) {
510 Xapian::Database db
= get_database("apitest_simpledata");
511 Xapian::Enquire
enquire(db
);
512 Xapian::Query
query("paragraph");
514 enquire
.set_query(query
);
515 enquire
.set_weighting_scheme(Xapian::DPHWeight());
518 mset1
= enquire
.get_mset(0, 10);
519 TEST_EQUAL(mset1
.size(), 5);
520 /* The weight has been calculated manually by using the statistics of the
522 TEST_EQUAL_DOUBLE(mset1
[2].get_weight() - mset1
[4].get_weight(), 0.542623617687990167);
524 // Test with OP_SCALE_WEIGHT.
525 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
526 enquire
.set_weighting_scheme(Xapian::DPHWeight());
529 mset2
= enquire
.get_mset(0, 10);
530 TEST_EQUAL(mset2
.size(), 5);
531 for (int i
= 0; i
< 5; ++i
) {
532 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
538 // Test for various cases of normalization string.
539 DEFINE_TESTCASE(tfidfweight1
, !backend
) {
540 // InvalidArgumentError should be thrown if normalization string is invalid
541 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
542 Xapian::TfIdfWeight
b("JOHN_LENNON"));
544 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
545 Xapian::TfIdfWeight
b("LOL"));
547 /* Normalization string should be set to "ntn" by constructor if none is
549 Xapian::TfIdfWeight weight2
;
550 TEST_EQUAL(weight2
.serialise(), Xapian::TfIdfWeight("ntn").serialise());
555 // Test exception for junk after serialised weight.
556 DEFINE_TESTCASE(tfidfweight2
, !backend
) {
557 Xapian::TfIdfWeight
wt("ntn");
559 Xapian::TfIdfWeight b
;
560 Xapian::TfIdfWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
561 // Make sure we actually use the weight.
562 bool empty
= b2
->name().empty();
565 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
566 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
567 } catch (const Xapian::SerialisationError
&) {
573 // Feature tests for various normalization functions.
574 DEFINE_TESTCASE(tfidfweight3
, backend
) {
575 Xapian::Database db
= get_database("apitest_simpledata");
576 Xapian::Enquire
enquire(db
);
577 Xapian::Query
query("word");
580 // Check for "ntn" when termfreq != N
581 enquire
.set_query(query
);
582 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
583 mset
= enquire
.get_mset(0, 10);
584 TEST_EQUAL(mset
.size(), 2);
585 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
586 mset_expect_order(mset
, 2, 4);
587 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 * log(6.0 / 2));
589 // Test with OP_SCALE_WEIGHT.
590 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
591 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
593 mset2
= enquire
.get_mset(0, 10);
594 TEST_EQUAL(mset2
.size(), 2);
595 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
596 mset_expect_order(mset2
, 2, 4);
597 TEST_EQUAL_DOUBLE(15 * mset
[0].get_weight(), mset2
[0].get_weight());
599 // Check for "bnn" and for both branches of 'b'.
600 enquire
.set_query(Xapian::Query("test"));
601 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
602 mset
= enquire
.get_mset(0, 10);
603 TEST_EQUAL(mset
.size(), 1);
604 mset_expect_order(mset
, 1);
605 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 1.0);
607 // Check for "lnn" and for both branches of 'l'.
608 enquire
.set_query(Xapian::Query("word"));
609 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
610 mset
= enquire
.get_mset(0, 10);
611 TEST_EQUAL(mset
.size(), 2);
612 mset_expect_order(mset
, 2, 4);
613 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
614 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 1.0); // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
617 enquire
.set_query(Xapian::Query("paragraph"));
618 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
619 mset
= enquire
.get_mset(0, 10);
620 TEST_EQUAL(mset
.size(), 5);
621 mset_expect_order(mset
, 2, 1, 4, 3, 5);
622 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 9.0);
623 TEST_EQUAL_DOUBLE(mset
[4].get_weight(), 1.0);
625 // Check for "ntn" when termfreq=N
626 enquire
.set_query(Xapian::Query("this")); // N=termfreq amd so idfn=0 for "t"
627 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
628 mset
= enquire
.get_mset(0, 10);
629 TEST_EQUAL(mset
.size(), 6);
630 mset_expect_order(mset
, 1, 2, 3, 4, 5, 6);
631 for (int i
= 0; i
< 6; ++i
) {
632 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(), 0.0);
635 // Check for "npn" and for both branches of 'p'
636 enquire
.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "p"
637 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
638 mset
= enquire
.get_mset(0, 10);
639 TEST_EQUAL(mset
.size(), 6);
640 mset_expect_order(mset
, 1, 2, 3, 4, 5, 6);
641 for (int i
= 0; i
< 6; ++i
) {
642 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(), 0.0);
645 enquire
.set_query(Xapian::Query("word"));
646 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
647 mset
= enquire
.get_mset(0, 10);
648 TEST_EQUAL(mset
.size(), 2);
649 mset_expect_order(mset
, 2, 4);
650 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8 * log((6.0 - 2) / 2));
651 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 1 * log((6.0 - 2) / 2));
656 class CheckInitWeight
: public Xapian::Weight
{
660 unsigned & zero_inits
, & non_zero_inits
;
662 CheckInitWeight(unsigned &z
, unsigned &n
)
663 : factor(-1.0), zero_inits(z
), non_zero_inits(n
) { }
665 void init(double factor_
) {
673 Weight
* clone() const {
674 return new CheckInitWeight(zero_inits
, non_zero_inits
);
677 double get_sumpart(Xapian::termcount
, Xapian::termcount
,
678 Xapian::termcount
) const {
682 double get_maxpart() const { return 1.0; }
684 double get_sumextra(Xapian::termcount doclen
, Xapian::termcount
) const {
688 double get_maxextra() const { return 1.0; }
691 /// Regression test - check init() is called for the term-indep Weight obj.
692 DEFINE_TESTCASE(checkinitweight1
, backend
&& !multi
&& !remote
) {
693 Xapian::Database db
= get_database("apitest_simpledata");
694 Xapian::Enquire
enquire(db
);
695 Xapian::Query
q(Xapian::Query::OP_AND
,
696 Xapian::Query("this"), Xapian::Query("paragraph"));
697 enquire
.set_query(q
);
698 unsigned zero_inits
= 0, non_zero_inits
= 0;
699 CheckInitWeight
wt(zero_inits
, non_zero_inits
);
700 enquire
.set_weighting_scheme(wt
);
701 Xapian::MSet mset
= enquire
.get_mset(0, 3);
702 TEST_EQUAL(zero_inits
, 1);
703 TEST_EQUAL(non_zero_inits
, 2);
707 class CheckStatsWeight
: public Xapian::Weight
{
715 Xapian::termcount
& sum
;
716 Xapian::termcount
& sum_squares
;
718 mutable Xapian::termcount len_upper
;
719 mutable Xapian::termcount len_lower
;
720 mutable Xapian::termcount wdf_upper
;
722 CheckStatsWeight(const Xapian::Database
& db_
,
723 const string
& term_
,
724 Xapian::termcount
& sum_
,
725 Xapian::termcount
& sum_squares_
)
726 : factor(-1.0), db(db_
), term(term_
),
727 sum(sum_
), sum_squares(sum_squares_
),
728 len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
730 need_stat(COLLECTION_SIZE
);
731 need_stat(RSET_SIZE
);
732 need_stat(AVERAGE_LENGTH
);
734 need_stat(RELTERMFREQ
);
735 need_stat(QUERY_LENGTH
);
738 need_stat(DOC_LENGTH
);
739 need_stat(DOC_LENGTH_MIN
);
740 need_stat(DOC_LENGTH_MAX
);
742 need_stat(COLLECTION_FREQ
);
743 need_stat(UNIQUE_TERMS
);
746 void init(double factor_
) {
750 Weight
* clone() const {
751 return new CheckStatsWeight(db
, term
, sum
, sum_squares
);
754 double get_sumpart(Xapian::termcount wdf
, Xapian::termcount doclen
,
755 Xapian::termcount uniqueterms
) const {
756 TEST_EQUAL(get_collection_size(), db
.get_doccount());
757 TEST_EQUAL(get_collection_freq(), db
.get_collection_freq(term
));
758 TEST_EQUAL(get_rset_size(), 0);
759 TEST_EQUAL(get_average_length(), db
.get_avlength());
760 TEST_EQUAL(get_termfreq(), db
.get_termfreq(term
));
761 TEST_EQUAL(get_reltermfreq(), 0);
762 TEST_EQUAL(get_query_length(), 1);
763 TEST_EQUAL(get_wqf(), 1);
764 TEST_REL(doclen
,>=,len_lower
);
765 TEST_REL(doclen
,<=,len_upper
);
766 TEST_REL(uniqueterms
,>=,1);
767 TEST_REL(uniqueterms
,<=,doclen
);
768 TEST_REL(wdf
,<=,wdf_upper
);
770 sum_squares
+= wdf
* wdf
;
774 double get_maxpart() const {
775 if (len_upper
== 0) {
776 len_lower
= get_doclength_lower_bound();
777 len_upper
= get_doclength_upper_bound();
778 wdf_upper
= get_wdf_upper_bound();
783 double get_sumextra(Xapian::termcount doclen
, Xapian::termcount
) const {
787 double get_maxextra() const { return 1.0; }
790 /// Check the weight subclass gets the correct stats.
791 DEFINE_TESTCASE(checkstatsweight1
, backend
&& !remote
) {
792 Xapian::Database db
= get_database("apitest_simpledata");
793 Xapian::Enquire
enquire(db
);
794 Xapian::TermIterator a
;
795 for (a
= db
.allterms_begin(); a
!= db
.allterms_end(); ++a
) {
796 const string
& term
= *a
;
797 enquire
.set_query(Xapian::Query(term
));
798 Xapian::termcount sum
= 0;
799 Xapian::termcount sum_squares
= 0;
800 CheckStatsWeight
wt(db
, term
, sum
, sum_squares
);
801 enquire
.set_weighting_scheme(wt
);
802 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
804 // The document order in the multi-db case isn't the same as the
805 // postlist order on the combined DB, so it's hard to compare the
806 // wdf for each document in the Weight objects, so we can sum
807 // the wdfs and the squares of the wdfs which provides a decent
808 // check that we're not getting the wrong wdf values (it ensures
809 // they have the right mean and standard deviation).
810 Xapian::termcount expected_sum
= 0;
811 Xapian::termcount expected_sum_squares
= 0;
812 Xapian::PostingIterator i
;
813 for (i
= db
.postlist_begin(term
); i
!= db
.postlist_end(term
); ++i
) {
814 Xapian::termcount wdf
= i
.get_wdf();
816 expected_sum_squares
+= wdf
* wdf
;
818 TEST_EQUAL(sum
, expected_sum
);
819 TEST_EQUAL(sum_squares
, expected_sum_squares
);
824 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
825 DEFINE_TESTCASE(unigramlmweight4
, backend
) {
826 Xapian::Database db
= get_database("apitest_simpledata");
827 Xapian::Enquire
enquire1(db
);
828 Xapian::Enquire
enquire2(db
);
829 enquire1
.set_query(Xapian::Query("paragraph"));
831 enquire2
.set_query(Xapian::Query("paragraph"));
833 // 5 documents available with term paragraph so mset size should be 5
834 enquire1
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING
, 1, 0));
835 enquire2
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 1, 0));
836 mset1
= enquire1
.get_mset(0, 10);
837 mset2
= enquire2
.get_mset(0, 10);
839 TEST_EQUAL(mset1
.size(), 5);
840 TEST_EQUAL_DOUBLE(mset1
[1].get_weight(), mset2
[1].get_weight());
844 /* Test for checking if we don't use smoothing all
845 * of them should give same result i.e wdf_double/len_double */
846 DEFINE_TESTCASE(unigramlmweight5
, backend
) {
847 Xapian::Database db
= get_database("apitest_simpledata");
848 Xapian::Enquire
enquire1(db
);
849 Xapian::Enquire
enquire2(db
);
850 Xapian::Enquire
enquire3(db
);
851 Xapian::Enquire
enquire4(db
);
852 enquire1
.set_query(Xapian::Query("paragraph"));
854 enquire2
.set_query(Xapian::Query("paragraph"));
856 enquire3
.set_query(Xapian::Query("paragraph"));
858 enquire4
.set_query(Xapian::Query("paragraph"));
860 // 5 documents available with term paragraph so mset size should be 5
861 enquire1
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING
, 0, 0));
862 enquire2
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 0, 0));
863 enquire3
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING
, 0, 0));
864 enquire4
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING
, 0, 0));
866 mset1
= enquire1
.get_mset(0, 10);
867 mset2
= enquire2
.get_mset(0, 10);
868 mset3
= enquire3
.get_mset(0, 10);
869 mset4
= enquire4
.get_mset(0, 10);
871 TEST_EQUAL(mset1
.size(), 5);
872 TEST_EQUAL(mset2
.size(), 5);
873 TEST_EQUAL(mset3
.size(), 5);
874 TEST_EQUAL(mset4
.size(), 5);
875 for (size_t i
= 0; i
< 5; i
++) {
876 TEST_EQUAL_DOUBLE(mset3
[i
].get_weight(), mset4
[i
].get_weight());
877 TEST_EQUAL_DOUBLE(mset2
[i
].get_weight(), mset4
[i
].get_weight());
878 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset2
[i
].get_weight());
879 TEST_EQUAL_DOUBLE(mset3
[i
].get_weight(), mset2
[i
].get_weight());
880 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset4
[i
].get_weight());
881 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset3
[i
].get_weight());