1 /** @file api_weight.cc
2 * @brief tests of Xapian::Weight subclasses
4 /* Copyright (C) 2004,2012,2013,2016 Olly Betts
5 * Copyright (C) 2013 Aarsh Shah
6 * Copyright (C) 2016 Vivek Pal
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "api_weight.h"
31 #include "testutils.h"
35 // Test exception for junk after serialised weight.
36 DEFINE_TESTCASE(tradweight3
, !backend
) {
37 Xapian::TradWeight
wt(42);
40 Xapian::TradWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
41 // Make sure we actually use the weight.
42 bool empty
= t2
->name().empty();
45 FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
46 FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
47 } catch (const Xapian::SerialisationError
&e
) {
48 // Regression test for error in exception message fixed in 1.2.11 and
50 TEST(e
.get_msg().find("BM25") == string::npos
);
51 TEST(e
.get_msg().find("Trad") != string::npos
);
56 // Test Exception for junk after serialised weight.
57 DEFINE_TESTCASE(unigramlmweight3
, !backend
) {
58 Xapian::LMWeight
wt(79898.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 0.5, 1.0);
61 Xapian::LMWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
62 // Make sure we actually use the weight.
63 bool empty
= t2
->name().empty();
66 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
67 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
68 } catch (const Xapian::SerialisationError
&e
) {
69 TEST(e
.get_msg().find("LM") != string::npos
);
74 // Test exception for junk after serialised weight.
75 DEFINE_TESTCASE(bm25weight3
, !backend
) {
76 Xapian::BM25Weight
wt(2.0, 0.5, 1.3, 0.6, 0.01);
79 Xapian::BM25Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
80 // Make sure we actually use the weight.
81 bool empty
= b2
->name().empty();
84 FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
85 FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
86 } catch (const Xapian::SerialisationError
&e
) {
87 TEST(e
.get_msg().find("BM25") != string::npos
);
92 // Test parameter combinations which should be unaffected by doclength.
93 DEFINE_TESTCASE(bm25weight4
, backend
) {
94 Xapian::Database db
= get_database("apitest_simpledata");
95 Xapian::Enquire
enquire(db
);
96 enquire
.set_query(Xapian::Query("paragraph"));
99 enquire
.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
100 mset
= enquire
.get_mset(0, 10);
101 TEST_EQUAL(mset
.size(), 5);
102 // Expect: wdf has an effect on weight, but doclen doesn't.
103 TEST_REL(mset
[0].get_weight(),>,mset
[1].get_weight());
104 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), mset
[2].get_weight());
105 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
106 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), mset
[4].get_weight());
108 enquire
.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
109 mset
= enquire
.get_mset(0, 10);
110 TEST_EQUAL(mset
.size(), 5);
111 // Expect: neither wdf nor doclen affects weight.
112 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[4].get_weight());
117 /// Test non-zero k2 with zero k1.
118 // Regression test for bug fixed in 1.2.17 and 1.3.2.
119 DEFINE_TESTCASE(bm25weight5
, backend
) {
120 Xapian::Database db
= get_database("apitest_simpledata");
121 Xapian::Enquire
enquire(db
);
122 enquire
.set_query(Xapian::Query("paragraph"));
125 enquire
.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
126 mset
= enquire
.get_mset(0, 10);
127 TEST_EQUAL(mset
.size(), 5);
128 // Expect: wdf has no effect on weight; shorter docs rank higher.
129 mset_expect_order(mset
, 3, 5, 1, 4, 2);
130 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[1].get_weight());
131 TEST_REL(mset
[1].get_weight(),>,mset
[2].get_weight());
132 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
133 TEST_REL(mset
[3].get_weight(),>,mset
[4].get_weight());
138 // Test exception for junk after serialised weight.
139 DEFINE_TESTCASE(bm25plusweight1
, !backend
) {
140 Xapian::BM25PlusWeight
wt(2.0, 0.1, 1.3, 0.6, 0.01, 0.5);
142 Xapian::BM25PlusWeight b
;
143 Xapian::BM25PlusWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
144 // Make sure we actually use the weight.
145 bool empty
= b2
->name().empty();
148 FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised to empty name!");
149 FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised OK");
150 } catch (const Xapian::SerialisationError
&e
) {
151 TEST(e
.get_msg().find("BM25Plus") != string::npos
);
156 // Test parameter combinations which should be unaffected by doclength.
157 DEFINE_TESTCASE(bm25plusweight2
, backend
) {
158 Xapian::Database db
= get_database("apitest_simpledata");
159 Xapian::Enquire
enquire(db
);
160 enquire
.set_query(Xapian::Query("paragraph"));
163 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0, 0.5, 1));
164 mset
= enquire
.get_mset(0, 10);
165 TEST_EQUAL(mset
.size(), 5);
166 // Expect: wdf has an effect on weight, but doclen doesn't.
167 TEST_REL(mset
[0].get_weight(),>,mset
[1].get_weight());
168 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), mset
[2].get_weight());
169 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
170 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), mset
[4].get_weight());
172 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight(0, 0, 1, 1, 0.5, 1));
173 mset
= enquire
.get_mset(0, 10);
174 TEST_EQUAL(mset
.size(), 5);
175 // Expect: neither wdf nor doclen affects weight.
176 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), mset
[4].get_weight());
181 // Regression test for a mistake corrected in the BM25+ implementation.
182 DEFINE_TESTCASE(bm25plusweight3
, backend
) {
183 Xapian::Database db
= get_database("apitest_simpledata");
184 Xapian::Enquire
enquire(db
);
185 enquire
.set_query(Xapian::Query("paragraph"));
188 enquire
.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0.5, 0.5, 1));
189 mset
= enquire
.get_mset(0, 10);
190 TEST_EQUAL(mset
.size(), 5);
192 // The value of each doc weight calculated manually from the BM25+ formulae
193 // by using the respective document statistics.
194 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 0.7920796567487473);
195 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 0.7846980783848447);
196 TEST_EQUAL_DOUBLE(mset
[2].get_weight(), 0.7558817623365934);
197 TEST_EQUAL_DOUBLE(mset
[3].get_weight(), 0.7210119356168847);
198 TEST_EQUAL_DOUBLE(mset
[4].get_weight(), 0.7210119356168847);
203 // Test exception for junk after serialised weight.
204 DEFINE_TESTCASE(inl2weight1
, !backend
) {
205 Xapian::InL2Weight
wt(2.0);
207 Xapian::InL2Weight b
;
208 Xapian::InL2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
209 // Make sure we actually use the weight.
210 bool empty
= b2
->name().empty();
213 FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
214 FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
215 } catch (const Xapian::SerialisationError
&e
) {
216 TEST(e
.get_msg().find("InL2") != string::npos
);
222 // Test for invalid values of c.
223 DEFINE_TESTCASE(inl2weight2
, !backend
) {
224 // InvalidArgumentError should be thrown if the parameter c is invalid.
225 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
226 Xapian::InL2Weight
wt(-2.0));
228 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
229 Xapian::InL2Weight
wt2(0.0));
231 /* Parameter c should be set to 1.0 by constructor if none is given. */
232 Xapian::InL2Weight weight2
;
233 TEST_EQUAL(weight2
.serialise(), Xapian::InL2Weight(1.0).serialise());
238 // Feature tests for Inl2Weight
239 DEFINE_TESTCASE(inl2weight3
, backend
) {
240 Xapian::Database db
= get_database("apitest_simpledata");
241 Xapian::Enquire
enquire(db
);
242 Xapian::Query
query("banana");
244 enquire
.set_query(query
);
245 enquire
.set_weighting_scheme(Xapian::InL2Weight(2.0));
248 mset1
= enquire
.get_mset(0, 10);
249 TEST_EQUAL(mset1
.size(), 1);
250 mset_expect_order(mset1
, 6);
252 /* The value has been calculated in the python interpreter by looking at the
253 * database statistics. */
254 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.559711143842063);
256 // Test with OP_SCALE_WEIGHT.
257 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
258 enquire
.set_weighting_scheme(Xapian::InL2Weight(2.0));
261 mset2
= enquire
.get_mset(0, 10);
262 TEST_EQUAL(mset2
.size(), 1);
263 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
264 TEST_EQUAL_DOUBLE(15.0 * mset1
[0].get_weight(), mset2
[0].get_weight());
269 // Test exception for junk after serialised weight.
270 DEFINE_TESTCASE(ifb2weight1
, !backend
) {
271 Xapian::IfB2Weight
wt(2.0);
273 Xapian::IfB2Weight b
;
274 Xapian::IfB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
275 // Make sure we actually use the weight.
276 bool empty
= b2
->name().empty();
279 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
280 FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
281 } catch (const Xapian::SerialisationError
&e
) {
282 TEST(e
.get_msg().find("IfB2") != string::npos
);
287 // Test for invalid values of c.
288 DEFINE_TESTCASE(ifb2weight2
, !backend
) {
289 // InvalidArgumentError should be thrown if the parameter c is invalid.
290 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
291 Xapian::IfB2Weight
wt(-2.0));
293 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
294 Xapian::IfB2Weight
wt2(0.0));
296 /* Parameter c should be set to 1.0 by constructor if none is given. */
297 Xapian::IfB2Weight weight2
;
298 TEST_EQUAL(weight2
.serialise(), Xapian::IfB2Weight(1.0).serialise());
304 DEFINE_TESTCASE(ifb2weight3
, backend
) {
305 Xapian::Database db
= get_database("apitest_simpledata");
306 Xapian::Enquire
enquire(db
);
307 Xapian::Query
query("banana");
309 enquire
.set_query(query
);
310 enquire
.set_weighting_scheme(Xapian::IfB2Weight(2.0));
313 mset1
= enquire
.get_mset(0, 10);
314 TEST_EQUAL(mset1
.size(), 1);
316 /* The value of the weight has been manually calculated using the statistics
317 * of the test database. */
318 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 3.119422287684126);
320 // Test with OP_SCALE_WEIGHT.
321 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
322 enquire
.set_weighting_scheme(Xapian::IfB2Weight(2.0));
325 mset2
= enquire
.get_mset(0, 10);
326 TEST_EQUAL(mset2
.size(), 1);
327 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
328 TEST_EQUAL_DOUBLE(15.0 * mset1
[0].get_weight(), mset2
[0].get_weight());
333 // Test exception for junk after serialised weight.
334 DEFINE_TESTCASE(ineb2weight1
, !backend
) {
335 Xapian::IneB2Weight
wt(2.0);
337 Xapian::IneB2Weight b
;
338 Xapian::IneB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
339 // Make sure we actually use the weight.
340 bool empty
= b2
->name().empty();
343 FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
344 FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
345 } catch (const Xapian::SerialisationError
&e
) {
346 TEST(e
.get_msg().find("IneB2") != string::npos
);
352 // Test for invalid values of c.
353 DEFINE_TESTCASE(ineb2weight2
, !backend
) {
354 // InvalidArgumentError should be thrown if parameter c is invalid.
355 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
356 Xapian::IneB2Weight
wt(-2.0));
358 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
359 Xapian::IneB2Weight
wt2(0.0));
361 /* Parameter c should be set to 1.0 by constructor if none is given. */
362 Xapian::IneB2Weight weight2
;
363 TEST_EQUAL(weight2
.serialise(), Xapian::IneB2Weight(1.0).serialise());
369 DEFINE_TESTCASE(ineb2weight3
, backend
) {
370 Xapian::Database db
= get_database("apitest_simpledata");
371 Xapian::Enquire
enquire(db
);
372 Xapian::Query
query("paragraph");
373 enquire
.set_query(query
);
374 enquire
.set_weighting_scheme(Xapian::IneB2Weight(2.0));
377 mset1
= enquire
.get_mset(0, 10);
378 TEST_EQUAL(mset1
.size(), 5);
380 // The third document in the database is 4th in the ranking.
381 /* The weight value has been manually calculated by using the statistics
382 * of the test database. */
383 TEST_EQUAL_DOUBLE(mset1
[4].get_weight(), 0.61709730297692400036);
385 // Test with OP_SCALE_WEIGHT.
386 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
387 enquire
.set_weighting_scheme(Xapian::IneB2Weight(2.0));
390 mset2
= enquire
.get_mset(0, 10);
391 TEST_EQUAL(mset2
.size(), 5);
393 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
394 for (int i
= 0; i
< 5; ++i
) {
395 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
401 // Test exception for junk after serialised weight.
402 DEFINE_TESTCASE(bb2weight1
, !backend
) {
403 Xapian::BB2Weight
wt(2.0);
406 Xapian::BB2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
407 // Make sure we actually use the weight.
408 bool empty
= b2
->name().empty();
411 FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
412 FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
413 } catch (const Xapian::SerialisationError
&e
) {
414 TEST(e
.get_msg().find("BB2") != string::npos
);
419 // Test for invalid values of c.
420 DEFINE_TESTCASE(bb2weight2
, !backend
) {
421 // InvalidArgumentError should be thrown if the parameter c is invalid.
422 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
423 Xapian::BB2Weight
wt(-2.0));
425 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
426 Xapian::BB2Weight
wt2(0.0));
428 /* Parameter c should be set to 1.0 by constructor if none is given. */
429 Xapian::BB2Weight weight2
;
430 TEST_EQUAL(weight2
.serialise(), Xapian::BB2Weight(1.0).serialise());
436 DEFINE_TESTCASE(bb2weight3
, backend
) {
437 Xapian::Database db
= get_database("apitest_simpledata");
438 Xapian::Enquire
enquire(db
);
439 Xapian::Query
query("paragraph");
441 enquire
.set_query(query
);
442 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
445 mset1
= enquire
.get_mset(0, 10);
446 TEST_EQUAL(mset1
.size(), 5);
447 /* The third document in the database has the highest weight and is the
448 * first in the mset. */
449 // Value calculated manually by using the statistics of the test database.
450 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.6823696969784483);
452 // Test with OP_SCALE_WEIGHT.
453 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
454 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
457 mset2
= enquire
.get_mset(0, 10);
458 TEST_EQUAL(mset2
.size(), 5);
460 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
461 for (int i
= 0; i
< 5; ++i
) {
462 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
465 // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
466 // were applying the factor to the upper bound twice).
467 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 1.0 / 1024));
468 enquire
.set_weighting_scheme(Xapian::BB2Weight(2.0));
471 mset3
= enquire
.get_mset(0, 10);
472 TEST_EQUAL(mset3
.size(), 5);
474 for (int i
= 0; i
< 5; ++i
) {
475 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset3
[i
].get_weight() * 1024);
481 // Regression test: we used to calculate log2(0) when there was only one doc.
482 DEFINE_TESTCASE(bb2weight4
, backend
) {
483 Xapian::Database db
= get_database("apitest_onedoc");
484 Xapian::Enquire
enquire(db
);
485 Xapian::Query
query("word");
487 enquire
.set_query(query
);
488 enquire
.set_weighting_scheme(Xapian::BB2Weight());
491 mset1
= enquire
.get_mset(0, 10);
492 TEST_EQUAL(mset1
.size(), 1);
493 // Zero weight is a bit bogus, but what we currently give.
494 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 0);
500 DEFINE_TESTCASE(dlhweight1
, backend
) {
501 Xapian::Database db
= get_database("apitest_simpledata");
502 Xapian::Enquire
enquire(db
);
503 Xapian::Query
query("a");
505 enquire
.set_query(query
);
506 enquire
.set_weighting_scheme(Xapian::DLHWeight());
509 mset1
= enquire
.get_mset(0, 10);
510 TEST_EQUAL(mset1
.size(), 3);
511 mset_expect_order(mset1
, 3, 1, 2);
512 // Weights calculated manually using stats from the database.
513 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 1.0046477754371292362);
514 TEST_EQUAL_DOUBLE(mset1
[1].get_weight(), 0.97621929514640352757);
515 // The following weight would be negative but gets clamped to 0.
516 TEST_EQUAL_DOUBLE(mset1
[2].get_weight(), 0.0);
518 // Test with OP_SCALE_WEIGHT.
519 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
520 enquire
.set_weighting_scheme(Xapian::DLHWeight());
523 mset2
= enquire
.get_mset(0, 10);
524 TEST_EQUAL(mset2
.size(), 3);
526 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
527 for (Xapian::doccount i
= 0; i
< mset2
.size(); ++i
) {
528 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
534 // Test exception for junk after serialised weight.
535 DEFINE_TESTCASE(dlhweight2
, !backend
) {
536 Xapian::DLHWeight wt
;
539 Xapian::DLHWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
540 // Make sure we actually use the weight.
541 bool empty
= t2
->name().empty();
544 FAIL_TEST("Serialised DLHWeight with junk appended unserialised to empty name!");
545 FAIL_TEST("Serialised DLHWeight with junk appended unserialised OK");
546 } catch (const Xapian::SerialisationError
&e
) {
547 TEST(e
.get_msg().find("DLH") != string::npos
);
553 gen_wdf_eq_doclen_db(Xapian::WritableDatabase
& db
, const string
&)
555 Xapian::Document doc
;
556 doc
.add_term("solo", 37);
557 db
.add_document(doc
);
560 // Test wdf == doclen.
561 DEFINE_TESTCASE(dlhweight3
, generated
) {
562 Xapian::Database db
= get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db
);
563 Xapian::Enquire
enquire(db
);
564 Xapian::Query
query("solo");
566 enquire
.set_query(query
);
567 enquire
.set_weighting_scheme(Xapian::DLHWeight());
570 mset1
= enquire
.get_mset(0, 10);
571 TEST_EQUAL(mset1
.size(), 1);
572 // Weight gets clamped to zero.
573 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
578 // Test exception for junk after serialised weight.
579 DEFINE_TESTCASE(pl2weight1
, !backend
) {
580 Xapian::PL2Weight
wt(2.0);
583 Xapian::PL2Weight
* b2
= b
.unserialise(wt
.serialise() + "X");
584 // Make sure we actually use the weight.
585 bool empty
= b2
->name().empty();
588 FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
589 FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
590 } catch (const Xapian::SerialisationError
&e
) {
591 TEST(e
.get_msg().find("PL2") != string::npos
);
596 // Test for invalid values of c.
597 DEFINE_TESTCASE(pl2weight2
, !backend
) {
598 // InvalidArgumentError should be thrown if parameter c is invalid.
599 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
600 Xapian::PL2Weight
wt(-2.0));
602 /* Parameter c should be set to 1.0 by constructor if none is given. */
603 Xapian::PL2Weight weight2
;
604 TEST_EQUAL(weight2
.serialise(), Xapian::PL2Weight(1.0).serialise());
610 DEFINE_TESTCASE(pl2weight3
, backend
) {
611 Xapian::Database db
= get_database("apitest_simpledata");
612 Xapian::Enquire
enquire(db
);
613 Xapian::Query
query("paragraph");
614 enquire
.set_query(query
);
617 enquire
.set_weighting_scheme(Xapian::PL2Weight(2.0));
618 mset
= enquire
.get_mset(0, 10);
619 TEST_EQUAL(mset
.size(), 5);
620 // Expected weight difference calculated in extended precision using stats
621 // from the test database.
622 TEST_EQUAL_DOUBLE(mset
[2].get_weight(),
623 mset
[3].get_weight() + 0.0086861771701328694);
625 // Test with OP_SCALE_WEIGHT.
626 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
627 enquire
.set_weighting_scheme(Xapian::PL2Weight(2.0));
630 mset2
= enquire
.get_mset(0, 10);
631 TEST_EQUAL(mset2
.size(), 5);
632 TEST_NOT_EQUAL_DOUBLE(mset
[0].get_weight(), 0.0);
633 for (int i
= 0; i
< 5; ++i
) {
634 TEST_EQUAL_DOUBLE(15.0 * mset
[i
].get_weight(), mset2
[i
].get_weight());
640 // Test exception for junk after serialised weight.
641 DEFINE_TESTCASE(pl2plusweight1
, !backend
) {
642 Xapian::PL2PlusWeight
wt(2.0, 0.9);
644 Xapian::PL2PlusWeight b
;
645 Xapian::PL2PlusWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
646 // Make sure we actually use the weight.
647 bool empty
= b2
->name().empty();
650 FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised to empty name!");
651 FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised OK");
652 } catch (const Xapian::SerialisationError
&e
) {
653 TEST(e
.get_msg().find("PL2Plus") != string::npos
);
658 // Test for invalid values of parameters, c and delta.
659 DEFINE_TESTCASE(pl2plusweight2
, !backend
) {
660 // InvalidArgumentError should be thrown if parameter c is invalid.
661 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
662 Xapian::PL2PlusWeight
wt(-2.0, 0.9));
664 // InvalidArgumentError should be thrown if parameter delta is invalid.
665 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
666 Xapian::PL2PlusWeight
wt(1.0, -1.9));
671 // Test for default values of parameters, c and delta.
672 DEFINE_TESTCASE(pl2plusweight3
, !backend
) {
673 Xapian::PL2PlusWeight weight2
;
675 /* Parameter c should be set to 1.0 by constructor if none is given. */
676 TEST_EQUAL(weight2
.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
678 /* Parameter delta should be set to 0.8 by constructor if none is given. */
679 TEST_EQUAL(weight2
.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
684 // Feature Test 1 for PL2PlusWeight.
685 DEFINE_TESTCASE(pl2plusweight4
, backend
) {
686 Xapian::Database db
= get_database("apitest_simpledata");
687 Xapian::Enquire
enquire(db
);
688 enquire
.set_query(Xapian::Query("paragraph"));
691 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight(2.0, 0.8));
692 mset
= enquire
.get_mset(0, 10);
693 TEST_EQUAL(mset
.size(), 5);
694 // Expected weight difference calculated in extended precision using stats
695 // from the test database.
696 TEST_EQUAL_DOUBLE(mset
[2].get_weight(),
697 mset
[3].get_weight() + 0.0086861771701328694);
702 // Feature Test 2 for PL2PlusWeight
703 DEFINE_TESTCASE(pl2plusweight5
, backend
) {
704 Xapian::Database db
= get_database("apitest_simpledata");
705 Xapian::Enquire
enquire(db
);
706 Xapian::Query
query("word");
707 enquire
.set_query(query
);
710 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
711 mset
= enquire
.get_mset(0, 10);
712 // Expect MSet contains two documents having query "word".
713 TEST_EQUAL(mset
.size(), 2);
714 // Expect Document 2 has higher weight than document 4 because
715 // "word" appears more no. of times in document 2 than document 4.
716 mset_expect_order(mset
, 2, 4);
718 // Test with OP_SCALE_WEIGHT.
719 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
720 enquire
.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
723 mset2
= enquire
.get_mset(0, 10);
724 TEST_EQUAL(mset2
.size(), mset
.size());
725 TEST_NOT_EQUAL_DOUBLE(mset
[0].get_weight(), 0.0);
726 for (Xapian::doccount i
= 0; i
< mset
.size(); ++i
) {
727 TEST_EQUAL_DOUBLE(15.0 * mset
[i
].get_weight(), mset2
[i
].get_weight());
734 DEFINE_TESTCASE(dphweight1
, backend
) {
735 Xapian::Database db
= get_database("apitest_simpledata");
736 Xapian::Enquire
enquire(db
);
737 Xapian::Query
query("paragraph");
739 enquire
.set_query(query
);
740 enquire
.set_weighting_scheme(Xapian::DPHWeight());
743 mset1
= enquire
.get_mset(0, 10);
744 TEST_EQUAL(mset1
.size(), 5);
745 /* The weight has been calculated manually by using the statistics of the
747 TEST_EQUAL_DOUBLE(mset1
[2].get_weight() - mset1
[4].get_weight(), 0.542623617687990167);
749 // Test with OP_SCALE_WEIGHT.
750 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
751 enquire
.set_weighting_scheme(Xapian::DPHWeight());
754 mset2
= enquire
.get_mset(0, 10);
755 TEST_EQUAL(mset2
.size(), 5);
756 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
757 for (int i
= 0; i
< 5; ++i
) {
758 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
764 // Test exception for junk after serialised weight.
765 DEFINE_TESTCASE(dphweight2
, !backend
) {
766 Xapian::DPHWeight wt
;
769 Xapian::DPHWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
770 // Make sure we actually use the weight.
771 bool empty
= t2
->name().empty();
774 FAIL_TEST("Serialised DPHWeight with junk appended unserialised to empty name!");
775 FAIL_TEST("Serialised DPHWeight with junk appended unserialised OK");
776 } catch (const Xapian::SerialisationError
&e
) {
777 TEST(e
.get_msg().find("DPH") != string::npos
);
782 // Test wdf == doclen.
783 DEFINE_TESTCASE(dphweight3
, generated
) {
784 Xapian::Database db
= get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db
);
785 Xapian::Enquire
enquire(db
);
786 Xapian::Query
query("solo");
788 enquire
.set_query(query
);
789 enquire
.set_weighting_scheme(Xapian::DPHWeight());
792 mset1
= enquire
.get_mset(0, 10);
793 TEST_EQUAL(mset1
.size(), 1);
794 // Weight gets clamped to zero.
795 TEST_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
800 // Test for various cases of normalization string.
801 DEFINE_TESTCASE(tfidfweight1
, !backend
) {
802 // InvalidArgumentError should be thrown if normalization string is invalid
803 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
804 Xapian::TfIdfWeight
b("JOHN_LENNON"));
806 TEST_EXCEPTION(Xapian::InvalidArgumentError
,
807 Xapian::TfIdfWeight
b("LOL"));
809 /* Normalization string should be set to "ntn" by constructor if none is
811 Xapian::TfIdfWeight weight2
;
812 TEST_EQUAL(weight2
.serialise(), Xapian::TfIdfWeight("ntn").serialise());
817 // Test exception for junk after serialised weight.
818 DEFINE_TESTCASE(tfidfweight2
, !backend
) {
819 Xapian::TfIdfWeight
wt("ntn");
821 Xapian::TfIdfWeight b
;
822 Xapian::TfIdfWeight
* b2
= b
.unserialise(wt
.serialise() + "X");
823 // Make sure we actually use the weight.
824 bool empty
= b2
->name().empty();
827 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
828 FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
829 } catch (const Xapian::SerialisationError
&e
) {
830 TEST(e
.get_msg().find("TfIdf") != string::npos
);
835 // Feature tests for various normalization functions.
836 DEFINE_TESTCASE(tfidfweight3
, backend
) {
837 Xapian::Database db
= get_database("apitest_simpledata");
838 Xapian::Enquire
enquire(db
);
839 Xapian::Query
query("word");
842 // Check for "ntn" when termfreq != N
843 enquire
.set_query(query
);
844 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
845 mset
= enquire
.get_mset(0, 10);
846 TEST_EQUAL(mset
.size(), 2);
847 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
848 mset_expect_order(mset
, 2, 4);
849 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 * log(6.0 / 2));
851 // Check that wqf is taken into account.
852 enquire
.set_query(Xapian::Query("word", 2));
853 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
854 Xapian::MSet mset2
= enquire
.get_mset(0, 10);
855 TEST_EQUAL(mset2
.size(), 2);
856 // wqf is 2, so weights should be doubled.
857 TEST_EQUAL_DOUBLE(mset
[0].get_weight() * 2, mset2
[0].get_weight());
858 TEST_EQUAL_DOUBLE(mset
[1].get_weight() * 2, mset2
[1].get_weight());
860 // Test with OP_SCALE_WEIGHT.
861 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
862 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
863 mset2
= enquire
.get_mset(0, 10);
864 TEST_EQUAL(mset2
.size(), 2);
865 // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
866 mset_expect_order(mset2
, 2, 4);
867 TEST_NOT_EQUAL_DOUBLE(mset
[0].get_weight(), 0.0);
868 TEST_EQUAL_DOUBLE(15 * mset
[0].get_weight(), mset2
[0].get_weight());
870 // check for "nfn" when termfreq != N
871 enquire
.set_query(query
);
872 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("nfn"));
873 mset
= enquire
.get_mset(0, 10);
874 TEST_EQUAL(mset
.size(), 2);
875 mset_expect_order(mset
, 2, 4);
876 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 / 2);
878 // check for "nsn" when termfreq != N
879 enquire
.set_query(query
);
880 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("nsn"));
881 mset
= enquire
.get_mset(0, 10);
882 TEST_EQUAL(mset
.size(), 2);
883 mset_expect_order(mset
, 2, 4);
884 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8.0 * pow(log(6.0 / 2), 2.0));
886 // Check for "bnn" and for both branches of 'b'.
887 enquire
.set_query(Xapian::Query("test"));
888 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
889 mset
= enquire
.get_mset(0, 10);
890 TEST_EQUAL(mset
.size(), 1);
891 mset_expect_order(mset
, 1);
892 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 1.0);
894 // Check for "lnn" and for both branches of 'l'.
895 enquire
.set_query(Xapian::Query("word"));
896 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
897 mset
= enquire
.get_mset(0, 10);
898 TEST_EQUAL(mset
.size(), 2);
899 mset_expect_order(mset
, 2, 4);
900 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
901 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 1.0); // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
904 enquire
.set_query(Xapian::Query("paragraph"));
905 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
906 mset
= enquire
.get_mset(0, 10);
907 TEST_EQUAL(mset
.size(), 5);
908 mset_expect_order(mset
, 2, 1, 4, 3, 5);
909 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 9.0);
910 TEST_EQUAL_DOUBLE(mset
[4].get_weight(), 1.0);
912 // Check for "ntn" when termfreq=N
913 enquire
.set_query(Xapian::Query("this")); // N=termfreq amd so idfn=0 for "t"
914 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
915 mset
= enquire
.get_mset(0, 10);
916 TEST_EQUAL(mset
.size(), 6);
917 mset_expect_order(mset
, 1, 2, 3, 4, 5, 6);
918 for (int i
= 0; i
< 6; ++i
) {
919 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(), 0.0);
922 // Check for "npn" and for both branches of 'p'
923 enquire
.set_query(Xapian::Query("this")); // N=termfreq and so idfn=0 for "p"
924 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
925 mset
= enquire
.get_mset(0, 10);
926 TEST_EQUAL(mset
.size(), 6);
927 mset_expect_order(mset
, 1, 2, 3, 4, 5, 6);
928 for (int i
= 0; i
< 6; ++i
) {
929 TEST_EQUAL_DOUBLE(mset
[i
].get_weight(), 0.0);
932 enquire
.set_query(Xapian::Query("word"));
933 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
934 mset
= enquire
.get_mset(0, 10);
935 TEST_EQUAL(mset
.size(), 2);
936 mset_expect_order(mset
, 2, 4);
937 TEST_EQUAL_DOUBLE(mset
[0].get_weight(), 8 * log((6.0 - 2) / 2));
938 TEST_EQUAL_DOUBLE(mset
[1].get_weight(), 1 * log((6.0 - 2) / 2));
943 // Feature tests for pivoted normalization functions.
944 DEFINE_TESTCASE(tfidfweight4
, backend
) {
945 Xapian::Database db
= get_database("apitest_simpledata");
946 Xapian::Enquire
enquire(db
);
947 Xapian::Query
query("paragraph");
950 // Check for "PPn" normalization string.
951 enquire
.set_query(query
);
952 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("PPn", 0.2, 1.0));
953 mset
= enquire
.get_mset(0, 10);
954 TEST_EQUAL(mset
.size(), 5);
955 // Shorter docs should ranker higher if wqf is equal among all the docs.
956 TEST_REL(mset
[0].get_weight(),>,mset
[1].get_weight());
957 TEST_REL(mset
[2].get_weight(),>,mset
[3].get_weight());
959 // Check that wqf is taken into account.
960 enquire
.set_query(Xapian::Query("paragraph", 2));
961 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("PPn", 0.2, 1.0));
962 Xapian::MSet mset2
= enquire
.get_mset(0, 10);
963 TEST_EQUAL(mset2
.size(), 5);
964 // wqf is 2, so weights should be doubled.
965 TEST_EQUAL_DOUBLE(mset
[0].get_weight() * 2, mset2
[0].get_weight());
966 TEST_EQUAL_DOUBLE(mset
[1].get_weight() * 2, mset2
[1].get_weight());
968 // check for "nPn" which represents "xPx"
969 enquire
.set_query(Xapian::Query("word"));
970 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("nPn", 0.2, 1.0));
971 mset
= enquire
.get_mset(0, 10);
972 TEST_EQUAL(mset
.size(), 2);
973 // Expect doc 2 with query "word" to have higher weight than doc 4.
974 mset_expect_order(mset
, 2, 4);
976 // check for "Ptn" which represents "Pxx"
977 enquire
.set_query(Xapian::Query("word"));
978 enquire
.set_weighting_scheme(Xapian::TfIdfWeight("Ptn", 0.2, 1.0));
979 mset
= enquire
.get_mset(0, 10);
980 TEST_EQUAL(mset
.size(), 2);
981 // Expect doc 2 with query "word" to have higher weight than doc 4.
982 mset_expect_order(mset
, 2, 4);
987 class CheckInitWeight
: public Xapian::Weight
{
991 unsigned & zero_inits
, & non_zero_inits
;
993 CheckInitWeight(unsigned &z
, unsigned &n
)
994 : factor(-1.0), zero_inits(z
), non_zero_inits(n
) { }
996 void init(double factor_
) {
1004 Weight
* clone() const {
1005 return new CheckInitWeight(zero_inits
, non_zero_inits
);
1008 double get_sumpart(Xapian::termcount
, Xapian::termcount
,
1009 Xapian::termcount
) const {
1013 double get_maxpart() const { return 1.0; }
1015 double get_sumextra(Xapian::termcount doclen
, Xapian::termcount
) const {
1016 return 1.0 / doclen
;
1019 double get_maxextra() const { return 1.0; }
1022 /// Regression test - check init() is called for the term-indep Weight obj.
1023 DEFINE_TESTCASE(checkinitweight1
, backend
&& !multi
&& !remote
) {
1024 Xapian::Database db
= get_database("apitest_simpledata");
1025 Xapian::Enquire
enquire(db
);
1026 Xapian::Query
q(Xapian::Query::OP_AND
,
1027 Xapian::Query("this"), Xapian::Query("paragraph"));
1028 enquire
.set_query(q
);
1029 unsigned zero_inits
= 0, non_zero_inits
= 0;
1030 CheckInitWeight
wt(zero_inits
, non_zero_inits
);
1031 enquire
.set_weighting_scheme(wt
);
1032 Xapian::MSet mset
= enquire
.get_mset(0, 3);
1033 TEST_EQUAL(zero_inits
, 1);
1034 TEST_EQUAL(non_zero_inits
, 2);
1038 class CheckStatsWeight
: public Xapian::Weight
{
1042 Xapian::Database db
;
1044 // When testing OP_SYNONYM, term2 is also set.
1045 // When testing OP_WILDCARD, term2 == "*"
1046 string term1
, term2
;
1048 Xapian::termcount
& sum
;
1049 Xapian::termcount
& sum_squares
;
1051 mutable Xapian::termcount len_upper
;
1052 mutable Xapian::termcount len_lower
;
1053 mutable Xapian::termcount wdf_upper
;
1055 CheckStatsWeight(const Xapian::Database
& db_
,
1056 const string
& term1_
,
1057 const string
& term2_
,
1058 Xapian::termcount
& sum_
,
1059 Xapian::termcount
& sum_squares_
)
1060 : factor(-1.0), db(db_
), term1(term1_
), term2(term2_
),
1061 sum(sum_
), sum_squares(sum_squares_
),
1062 len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
1064 need_stat(COLLECTION_SIZE
);
1065 need_stat(RSET_SIZE
);
1066 need_stat(AVERAGE_LENGTH
);
1067 need_stat(TERMFREQ
);
1068 need_stat(RELTERMFREQ
);
1069 need_stat(QUERY_LENGTH
);
1072 need_stat(DOC_LENGTH
);
1073 need_stat(DOC_LENGTH_MIN
);
1074 need_stat(DOC_LENGTH_MAX
);
1076 need_stat(COLLECTION_FREQ
);
1077 need_stat(UNIQUE_TERMS
);
1080 CheckStatsWeight(const Xapian::Database
& db_
,
1081 const string
& term_
,
1082 Xapian::termcount
& sum_
,
1083 Xapian::termcount
& sum_squares_
)
1084 : CheckStatsWeight(db_
, term_
, string(), sum_
, sum_squares_
) { }
1086 void init(double factor_
) {
1090 Weight
* clone() const {
1091 return new CheckStatsWeight(db
, term1
, term2
, sum
, sum_squares
);
1094 double get_sumpart(Xapian::termcount wdf
, Xapian::termcount doclen
,
1095 Xapian::termcount uniqueterms
) const {
1096 Xapian::doccount num_docs
= db
.get_doccount();
1097 TEST_EQUAL(get_collection_size(), num_docs
);
1098 TEST_EQUAL(get_rset_size(), 0);
1099 TEST_EQUAL(get_average_length(), db
.get_avlength());
1100 if (term2
.empty()) {
1101 TEST_EQUAL(get_termfreq(), db
.get_termfreq(term1
));
1102 TEST_EQUAL(get_collection_freq(), db
.get_collection_freq(term1
));
1103 TEST_EQUAL(get_query_length(), 1);
1105 Xapian::doccount tfmax
= 0, tfsum
= 0;
1106 Xapian::termcount cfmax
= 0, cfsum
= 0;
1108 // OP_WILDCARD case.
1109 for (auto&& t
= db
.allterms_begin(term1
);
1110 t
!= db
.allterms_end(term1
); ++t
) {
1111 Xapian::doccount tf
= t
.get_termfreq();
1112 tout
<< "->" << *t
<< " " << tf
<< endl
;
1114 tfmax
= max(tfmax
, tf
);
1115 Xapian::termcount cf
= db
.get_collection_freq(*t
);
1117 cfmax
= max(cfmax
, cf
);
1119 TEST_EQUAL(get_query_length(), 1);
1122 Xapian::doccount tf1
= db
.get_termfreq(term1
);
1123 Xapian::doccount tf2
= db
.get_termfreq(term2
);
1125 tfmax
= max(tf1
, tf2
);
1126 Xapian::termcount cf1
= db
.get_collection_freq(term1
);
1127 Xapian::termcount cf2
= db
.get_collection_freq(term2
);
1129 cfmax
= max(cf1
, cf2
);
1130 TEST_EQUAL(get_query_length(), 2);
1132 // Synonym occurs at least as many times as any term.
1133 TEST_REL(get_termfreq(), >=, tfmax
);
1134 TEST_REL(get_collection_freq(), >=, cfmax
);
1135 // Synonym can't occur more times than the terms do.
1136 TEST_REL(get_termfreq(), <=, tfsum
);
1137 TEST_REL(get_collection_freq(), <=, cfsum
);
1138 // Synonym can't occur more times than there are documents/terms.
1139 TEST_REL(get_termfreq(), <=, num_docs
);
1140 double total_term_occurences
= get_average_length() * num_docs
;
1141 TEST_REL(get_collection_freq(), <=, total_term_occurences
);
1143 TEST_EQUAL(get_reltermfreq(), 0);
1144 TEST_EQUAL(get_wqf(), 1);
1145 TEST_REL(doclen
,>=,len_lower
);
1146 TEST_REL(doclen
,<=,len_upper
);
1147 TEST_REL(uniqueterms
,>=,1);
1148 TEST_REL(uniqueterms
,<=,doclen
);
1149 TEST_REL(wdf
,<=,wdf_upper
);
1151 sum_squares
+= wdf
* wdf
;
1155 double get_maxpart() const {
1156 if (len_upper
== 0) {
1157 len_lower
= get_doclength_lower_bound();
1158 len_upper
= get_doclength_upper_bound();
1159 wdf_upper
= get_wdf_upper_bound();
1164 double get_sumextra(Xapian::termcount doclen
, Xapian::termcount
) const {
1165 return 1.0 / doclen
;
1168 double get_maxextra() const { return 1.0; }
1171 /// Check the weight subclass gets the correct stats.
1172 DEFINE_TESTCASE(checkstatsweight1
, backend
&& !remote
) {
1173 Xapian::Database db
= get_database("apitest_simpledata");
1174 Xapian::Enquire
enquire(db
);
1175 Xapian::TermIterator a
;
1176 for (a
= db
.allterms_begin(); a
!= db
.allterms_end(); ++a
) {
1177 const string
& term
= *a
;
1178 enquire
.set_query(Xapian::Query(term
));
1179 Xapian::termcount sum
= 0;
1180 Xapian::termcount sum_squares
= 0;
1181 CheckStatsWeight
wt(db
, term
, sum
, sum_squares
);
1182 enquire
.set_weighting_scheme(wt
);
1183 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1185 // The document order in the multi-db case isn't the same as the
1186 // postlist order on the combined DB, so it's hard to compare the
1187 // wdf for each document in the Weight objects, so we can sum
1188 // the wdfs and the squares of the wdfs which provides a decent
1189 // check that we're not getting the wrong wdf values (it ensures
1190 // they have the right mean and standard deviation).
1191 Xapian::termcount expected_sum
= 0;
1192 Xapian::termcount expected_sum_squares
= 0;
1193 Xapian::PostingIterator i
;
1194 for (i
= db
.postlist_begin(term
); i
!= db
.postlist_end(term
); ++i
) {
1195 Xapian::termcount wdf
= i
.get_wdf();
1196 expected_sum
+= wdf
;
1197 expected_sum_squares
+= wdf
* wdf
;
1199 TEST_EQUAL(sum
, expected_sum
);
1200 TEST_EQUAL(sum_squares
, expected_sum_squares
);
1205 /// Check the weight subclass gets the correct stats with OP_SYNONYM.
1206 // Regression test for bugs fixed in 1.4.1.
1207 DEFINE_TESTCASE(checkstatsweight2
, backend
&& !remote
) {
1208 Xapian::Database db
= get_database("apitest_simpledata");
1209 Xapian::Enquire
enquire(db
);
1210 Xapian::TermIterator a
;
1211 for (a
= db
.allterms_begin(); a
!= db
.allterms_end(); ++a
) {
1212 const string
& term1
= *a
;
1213 if (++a
== db
.allterms_end()) break;
1214 const string
& term2
= *a
;
1215 Xapian::Query
q(Xapian::Query::OP_SYNONYM
,
1216 Xapian::Query(term1
), Xapian::Query(term2
));
1217 tout
<< q
.get_description() << endl
;
1218 enquire
.set_query(q
);
1219 Xapian::termcount sum
= 0;
1220 Xapian::termcount sum_squares
= 0;
1221 CheckStatsWeight
wt(db
, term1
, term2
, sum
, sum_squares
);
1222 enquire
.set_weighting_scheme(wt
);
1223 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1225 // The document order in the multi-db case isn't the same as the
1226 // postlist order on the combined DB, so it's hard to compare the
1227 // wdf for each document in the Weight objects, so we can sum
1228 // the wdfs and the squares of the wdfs which provides a decent
1229 // check that we're not getting the wrong wdf values (it ensures
1230 // they have the right mean and standard deviation).
1231 Xapian::termcount expected_sum
= 0;
1232 Xapian::termcount expected_sum_squares
= 0;
1233 Xapian::PostingIterator i
= db
.postlist_begin(term1
);
1234 Xapian::PostingIterator j
= db
.postlist_begin(term2
);
1235 Xapian::docid did1
= *i
, did2
= *j
;
1237 // To calculate expected_sum_squares correctly we need to square
1238 // the sum per document.
1239 Xapian::termcount wdf
;
1241 wdf
= i
.get_wdf() + j
.get_wdf();
1243 } else if (did1
< did2
) {
1250 expected_sum
+= wdf
;
1251 expected_sum_squares
+= wdf
* wdf
;
1254 if (++i
!= db
.postlist_end(term1
)) {
1257 if (did2
== Xapian::docid(-1)) break;
1258 did1
= Xapian::docid(-1);
1262 if (++j
!= db
.postlist_end(term2
)) {
1265 if (did1
== Xapian::docid(-1)) break;
1266 did2
= Xapian::docid(-1);
1270 // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1271 // the individual terms.
1272 TEST_EQUAL(sum
, expected_sum
);
1273 TEST_REL(sum_squares
, >=, expected_sum_squares
);
1278 /// Check the weight subclass gets the correct stats with OP_WILDCARD.
1279 // Regression test for bug fixed in 1.4.1.
1280 // Don't run with multi-database, as the termfreq checks don't work
1281 // there - FIXME: Investigate this - it smells like a bug.
1282 DEFINE_TESTCASE(checkstatsweight3
, backend
&& !remote
&& !multi
) {
1284 bool operator()(const Xapian::PostingIterator
& a
,
1285 const Xapian::PostingIterator
& b
) {
1290 Xapian::Database db
= get_database("apitest_simpledata");
1291 Xapian::Enquire
enquire(db
);
1292 Xapian::TermIterator a
;
1293 static const char * testcases
[] = {
1294 "a", // a* matches all documents, but no term matches all.
1295 "pa", // Expands to only "paragraph", matching 5.
1296 "zulu", // No matches.
1297 "th", // Term "this" matches all documents.
1300 for (const char ** p
= testcases
; *p
; ++p
) {
1301 const char * pattern
= *p
;
1302 Xapian::Query
q(Xapian::Query::OP_WILDCARD
, pattern
);
1303 tout
<< q
.get_description() << endl
;
1304 enquire
.set_query(q
);
1305 Xapian::termcount sum
= 0;
1306 Xapian::termcount sum_squares
= 0;
1307 CheckStatsWeight
wt(db
, pattern
, "*", sum
, sum_squares
);
1308 enquire
.set_weighting_scheme(wt
);
1309 Xapian::MSet mset
= enquire
.get_mset(0, db
.get_doccount());
1311 // The document order in the multi-db case isn't the same as the
1312 // postlist order on the combined DB, so it's hard to compare the
1313 // wdf for each document in the Weight objects, so we can sum
1314 // the wdfs and the squares of the wdfs which provides a decent
1315 // check that we're not getting the wrong wdf values (it ensures
1316 // they have the right mean and standard deviation).
1317 Xapian::termcount expected_sum
= 0;
1318 Xapian::termcount expected_sum_squares
= 0;
1319 vector
<Xapian::PostingIterator
> postlists
;
1320 for (auto&& t
= db
.allterms_begin(pattern
);
1321 t
!= db
.allterms_end(pattern
); ++t
) {
1322 postlists
.emplace_back(db
.postlist_begin(*t
));
1324 make_heap(postlists
.begin(), postlists
.end(), PlCmp());
1325 Xapian::docid did
= 0;
1326 Xapian::termcount wdf
= 0;
1327 while (!postlists
.empty()) {
1328 pop_heap(postlists
.begin(), postlists
.end(), PlCmp());
1329 Xapian::docid did_new
= *postlists
.back();
1330 Xapian::termcount wdf_new
= postlists
.back().get_wdf();
1331 if (++(postlists
.back()) == Xapian::PostingIterator()) {
1332 postlists
.pop_back();
1334 push_heap(postlists
.begin(), postlists
.end(), PlCmp());
1336 if (did_new
!= did
) {
1337 expected_sum
+= wdf
;
1338 expected_sum_squares
+= wdf
* wdf
;
1344 expected_sum
+= wdf
;
1345 expected_sum_squares
+= wdf
* wdf
;
1346 // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1347 // the individual terms.
1348 TEST_EQUAL(sum
, expected_sum
);
1349 TEST_REL(sum_squares
, >=, expected_sum_squares
);
1354 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
1355 DEFINE_TESTCASE(unigramlmweight4
, backend
) {
1356 Xapian::Database db
= get_database("apitest_simpledata");
1357 Xapian::Enquire
enquire1(db
);
1358 Xapian::Enquire
enquire2(db
);
1359 enquire1
.set_query(Xapian::Query("paragraph"));
1361 enquire2
.set_query(Xapian::Query("paragraph"));
1363 // 5 documents available with term paragraph so mset size should be 5
1364 enquire1
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING
, 1, 0));
1365 enquire2
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 1, 0));
1366 mset1
= enquire1
.get_mset(0, 10);
1367 mset2
= enquire2
.get_mset(0, 10);
1369 TEST_EQUAL(mset1
.size(), 5);
1370 TEST_EQUAL_DOUBLE(mset1
[1].get_weight(), mset2
[1].get_weight());
1374 /* Test for checking if we don't use smoothing all
1375 * of them should give same result i.e wdf_double/len_double */
1376 DEFINE_TESTCASE(unigramlmweight5
, backend
) {
1377 Xapian::Database db
= get_database("apitest_simpledata");
1378 Xapian::Enquire
enquire1(db
);
1379 Xapian::Enquire
enquire2(db
);
1380 Xapian::Enquire
enquire3(db
);
1381 Xapian::Enquire
enquire4(db
);
1382 enquire1
.set_query(Xapian::Query("paragraph"));
1384 enquire2
.set_query(Xapian::Query("paragraph"));
1386 enquire3
.set_query(Xapian::Query("paragraph"));
1388 enquire4
.set_query(Xapian::Query("paragraph"));
1390 // 5 documents available with term paragraph so mset size should be 5
1391 enquire1
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING
, 0, 0));
1392 enquire2
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING
, 0, 0));
1393 enquire3
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING
, 0, 0));
1394 enquire4
.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING
, 0, 0));
1396 mset1
= enquire1
.get_mset(0, 10);
1397 mset2
= enquire2
.get_mset(0, 10);
1398 mset3
= enquire3
.get_mset(0, 10);
1399 mset4
= enquire4
.get_mset(0, 10);
1401 TEST_EQUAL(mset1
.size(), 5);
1402 TEST_EQUAL(mset2
.size(), 5);
1403 TEST_EQUAL(mset3
.size(), 5);
1404 TEST_EQUAL(mset4
.size(), 5);
1405 for (size_t i
= 0; i
< 5; i
++) {
1406 TEST_EQUAL_DOUBLE(mset3
[i
].get_weight(), mset4
[i
].get_weight());
1407 TEST_EQUAL_DOUBLE(mset2
[i
].get_weight(), mset4
[i
].get_weight());
1408 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset2
[i
].get_weight());
1409 TEST_EQUAL_DOUBLE(mset3
[i
].get_weight(), mset2
[i
].get_weight());
1410 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset4
[i
].get_weight());
1411 TEST_EQUAL_DOUBLE(mset1
[i
].get_weight(), mset3
[i
].get_weight());
1416 // Test Exception for junk after serialised weight (with Dir+ enabled).
1417 DEFINE_TESTCASE(unigramlmweight6
, !backend
) {
1418 Xapian::LMWeight
wt(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 0.5, 1.0);
1421 Xapian::LMWeight
* d2
= d
.unserialise(wt
.serialise() + "X");
1422 // Make sure we actually use the weight.
1423 bool empty
= d2
->name().empty();
1426 FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
1427 FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
1428 } catch (const Xapian::SerialisationError
&e
) {
1429 TEST(e
.get_msg().find("LM") != string::npos
);
1434 // Feature test for Dir+ function.
1435 DEFINE_TESTCASE(unigramlmweight7
, backend
) {
1436 Xapian::Database db
= get_database("apitest_simpledata");
1437 Xapian::Enquire
enquire1(db
);
1438 Xapian::Enquire
enquire2(db
);
1439 enquire1
.set_query(Xapian::Query("paragraph"));
1440 enquire2
.set_query(Xapian::Query("paragraph"));
1444 enquire1
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 2000, 0));
1445 enquire2
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_PLUS_SMOOTHING
, 2000, 0.05));
1447 mset1
= enquire1
.get_mset(0, 10);
1448 mset2
= enquire2
.get_mset(0, 10);
1450 // mset size should be 5
1451 TEST_EQUAL(mset1
.size(), 5);
1452 TEST_EQUAL(mset2
.size(), 5);
1454 // Expect mset weights associated with Dir+ more than mset weights by Dir
1455 // because of the presence of extra weight component in Dir+ function.
1456 TEST_REL(mset2
[0].get_weight(),>,mset1
[0].get_weight());
1457 TEST_REL(mset2
[1].get_weight(),>,mset1
[1].get_weight());
1458 TEST_REL(mset2
[2].get_weight(),>,mset1
[2].get_weight());
1459 TEST_REL(mset2
[3].get_weight(),>,mset1
[3].get_weight());
1460 TEST_REL(mset2
[4].get_weight(),>,mset1
[4].get_weight());
1465 // Regression test that OP_SCALE_WEIGHT works with LMWeight (fixed in 1.4.1).
1466 DEFINE_TESTCASE(unigramlmweight8
, backend
) {
1467 Xapian::Database db
= get_database("apitest_simpledata");
1468 Xapian::Enquire
enquire(db
);
1469 Xapian::Query
query("paragraph");
1471 enquire
.set_query(query
);
1472 enquire
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 2000, 0));
1475 mset1
= enquire
.get_mset(0, 10);
1476 TEST_EQUAL(mset1
.size(), 5);
1478 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
1479 enquire
.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING
, 2000, 0));
1482 mset2
= enquire
.get_mset(0, 10);
1483 TEST_EQUAL(mset2
.size(), mset1
.size());
1484 TEST_NOT_EQUAL_DOUBLE(mset1
[0].get_weight(), 0.0);
1485 for (Xapian::doccount i
= 0; i
< mset1
.size(); ++i
) {
1486 TEST_EQUAL_DOUBLE(15.0 * mset1
[i
].get_weight(), mset2
[i
].get_weight());
1492 // Feature test for BoolWeight.
1493 // Test exception for junk after serialised weight.
1494 DEFINE_TESTCASE(boolweight1
, !backend
) {
1495 Xapian::BoolWeight wt
;
1497 Xapian::BoolWeight t
;
1498 Xapian::BoolWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
1499 // Make sure we actually use the weight.
1500 bool empty
= t2
->name().empty();
1503 FAIL_TEST("Serialised BoolWeight with junk appended unserialised to empty name!");
1504 FAIL_TEST("Serialised BoolWeight with junk appended unserialised OK");
1505 } catch (const Xapian::SerialisationError
&e
) {
1506 TEST(e
.get_msg().find("Bool") != string::npos
);
1511 // Feature test for CoordWeight.
1512 DEFINE_TESTCASE(coordweight1
, backend
) {
1513 Xapian::Enquire
enquire(get_database("apitest_simpledata"));
1514 enquire
.set_weighting_scheme(Xapian::CoordWeight());
1515 const char * terms
[] = { "this", "line", "paragraph", "rubbish" };
1516 Xapian::Query
query(Xapian::Query::OP_OR
,
1517 terms
, terms
+ sizeof(terms
) / sizeof(terms
[0]));
1518 enquire
.set_query(query
);
1519 Xapian::MSet mymset1
= enquire
.get_mset(0, 100);
1520 // CoordWeight scores 1 for each matching term, so the weight should equal
1521 // the number of matching terms.
1522 for (Xapian::MSetIterator i
= mymset1
.begin(); i
!= mymset1
.end(); ++i
) {
1523 Xapian::termcount matching_terms
= 0;
1524 Xapian::TermIterator t
= enquire
.get_matching_terms_begin(i
);
1525 while (t
!= enquire
.get_matching_terms_end(i
)) {
1529 TEST_EQUAL(i
.get_weight(), matching_terms
);
1532 // Test with OP_SCALE_WEIGHT.
1533 enquire
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, query
, 15.0));
1534 Xapian::MSet mymset2
= enquire
.get_mset(0, 100);
1535 TEST_EQUAL(mymset1
.size(), mymset2
.size());
1536 for (Xapian::doccount i
= 0; i
!= mymset1
.size(); ++i
) {
1537 TEST_EQUAL(15.0 * mymset1
[i
].get_weight(), mymset2
[i
].get_weight());
1543 // Test exception for junk after serialised weight.
1544 DEFINE_TESTCASE(coordweight2
, !backend
) {
1545 Xapian::CoordWeight wt
;
1547 Xapian::CoordWeight t
;
1548 Xapian::CoordWeight
* t2
= t
.unserialise(wt
.serialise() + "X");
1549 // Make sure we actually use the weight.
1550 bool empty
= t2
->name().empty();
1553 FAIL_TEST("Serialised CoordWeight with junk appended unserialised to empty name!");
1554 FAIL_TEST("Serialised CoordWeight with junk appended unserialised OK");
1555 } catch (const Xapian::SerialisationError
&e
) {
1556 TEST(e
.get_msg().find("Coord") != string::npos
);