xapian-core/tests/api_weight.cc

   1 /** @file api_weight.cc
   2  * @brief tests of Xapian::Weight subclasses
   3  */
   4 /* Copyright (C) 2004,2012,2013,2016,2017 Olly Betts
   5  * Copyright (C) 2013 Aarsh Shah
   6  * Copyright (C) 2016 Vivek Pal
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  21  */
  22
  23 #include <config.h>
  24
  25 #include "api_weight.h"
  26 #include <cmath>
  27
  28 #include <xapian.h>
  29
  30 #include "apitest.h"
  31 #include "testutils.h"
  32
  33 using namespace std;
  34
  35 // Test exception for junk after serialised weight.
  36 DEFINE_TESTCASE(tradweight3, !backend) {
  37     Xapian::TradWeight wt(42);
  38     try {
  39         Xapian::TradWeight t;
  40         Xapian::TradWeight * t2 = t.unserialise(wt.serialise() + "X");
  41         // Make sure we actually use the weight.
  42         bool empty = t2->name().empty();
  43         delete t2;
  44         if (empty)
  45             FAIL_TEST("Serialised TradWeight with junk appended unserialised to empty name!");
  46         FAIL_TEST("Serialised TradWeight with junk appended unserialised OK");
  47     } catch (const Xapian::SerialisationError &e) {
  48         // Regression test for error in exception message fixed in 1.2.11 and
  49         // 1.3.1.
  50         TEST(e.get_msg().find("BM25") == string::npos);
  51         TEST(e.get_msg().find("Trad") != string::npos);
  52     }
  53     return true;
  54 }
  55
  56 // Test Exception for junk after serialised weight.
  57 DEFINE_TESTCASE(unigramlmweight3, !backend) {
  58     Xapian::LMWeight wt(79898.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0.5, 1.0);
  59     try {
  60         Xapian::LMWeight t;
  61         Xapian::LMWeight * t2 = t.unserialise(wt.serialise() + "X");
  62         // Make sure we actually use the weight.
  63         bool empty = t2->name().empty();
  64         delete t2;
  65         if (empty)
  66             FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
  67         FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
  68     } catch (const Xapian::SerialisationError &e) {
  69         TEST(e.get_msg().find("LM") != string::npos);
  70     }
  71     return true;
  72 }
  73
  74 // Test exception for junk after serialised weight.
  75 DEFINE_TESTCASE(bm25weight3, !backend) {
  76     Xapian::BM25Weight wt(2.0, 0.5, 1.3, 0.6, 0.01);
  77     try {
  78         Xapian::BM25Weight b;
  79         Xapian::BM25Weight * b2 = b.unserialise(wt.serialise() + "X");
  80         // Make sure we actually use the weight.
  81         bool empty = b2->name().empty();
  82         delete b2;
  83         if (empty)
  84             FAIL_TEST("Serialised BM25Weight with junk appended unserialised to empty name!");
  85         FAIL_TEST("Serialised BM25Weight with junk appended unserialised OK");
  86     } catch (const Xapian::SerialisationError &e) {
  87         TEST(e.get_msg().find("BM25") != string::npos);
  88     }
  89     return true;
  90 }
  91
  92 // Test parameter combinations which should be unaffected by doclength.
  93 DEFINE_TESTCASE(bm25weight4, backend) {
  94     Xapian::Database db = get_database("apitest_simpledata");
  95     Xapian::Enquire enquire(db);
  96     enquire.set_query(Xapian::Query("paragraph"));
  97     Xapian::MSet mset;
  98
  99     enquire.set_weighting_scheme(Xapian::BM25Weight(1, 0, 1, 0, 0.5));
 100     mset = enquire.get_mset(0, 10);
 101     TEST_EQUAL(mset.size(), 5);
 102     // Expect: wdf has an effect on weight, but doclen doesn't.
 103     TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
 104     TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
 105     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
 106     TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
 107
 108     enquire.set_weighting_scheme(Xapian::BM25Weight(0, 0, 1, 1, 0.5));
 109     mset = enquire.get_mset(0, 10);
 110     TEST_EQUAL(mset.size(), 5);
 111     // Expect: neither wdf nor doclen affects weight.
 112     TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
 113
 114     return true;
 115 }
 116
 117 /// Test non-zero k2 with zero k1.
 118 // Regression test for bug fixed in 1.2.17 and 1.3.2.
 119 DEFINE_TESTCASE(bm25weight5, backend) {
 120     Xapian::Database db = get_database("apitest_simpledata");
 121     Xapian::Enquire enquire(db);
 122     enquire.set_query(Xapian::Query("paragraph"));
 123     Xapian::MSet mset;
 124
 125     enquire.set_weighting_scheme(Xapian::BM25Weight(0, 1, 1, 0.5, 0.5));
 126     mset = enquire.get_mset(0, 10);
 127     TEST_EQUAL(mset.size(), 5);
 128     // Expect: wdf has no effect on weight; shorter docs rank higher.
 129     mset_expect_order(mset, 3, 5, 1, 4, 2);
 130     TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[1].get_weight());
 131     TEST_REL(mset[1].get_weight(),>,mset[2].get_weight());
 132     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
 133     TEST_REL(mset[3].get_weight(),>,mset[4].get_weight());
 134
 135     return true;
 136 }
 137
 138 // Test exception for junk after serialised weight.
 139 DEFINE_TESTCASE(bm25plusweight1, !backend) {
 140     Xapian::BM25PlusWeight wt(2.0, 0.1, 1.3, 0.6, 0.01, 0.5);
 141     try {
 142         Xapian::BM25PlusWeight b;
 143         Xapian::BM25PlusWeight * b2 = b.unserialise(wt.serialise() + "X");
 144         // Make sure we actually use the weight.
 145         bool empty = b2->name().empty();
 146         delete b2;
 147         if (empty)
 148             FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised to empty name!");
 149         FAIL_TEST("Serialised BM25PlusWeight with junk appended unserialised OK");
 150     } catch (const Xapian::SerialisationError &e) {
 151         TEST(e.get_msg().find("BM25Plus") != string::npos);
 152     }
 153     return true;
 154 }
 155
 156 // Test parameter combinations which should be unaffected by doclength.
 157 DEFINE_TESTCASE(bm25plusweight2, backend) {
 158     Xapian::Database db = get_database("apitest_simpledata");
 159     Xapian::Enquire enquire(db);
 160     enquire.set_query(Xapian::Query("paragraph"));
 161     Xapian::MSet mset;
 162
 163     enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0, 0.5, 1));
 164     mset = enquire.get_mset(0, 10);
 165     TEST_EQUAL(mset.size(), 5);
 166     // Expect: wdf has an effect on weight, but doclen doesn't.
 167     TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
 168     TEST_EQUAL_DOUBLE(mset[1].get_weight(), mset[2].get_weight());
 169     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
 170     TEST_EQUAL_DOUBLE(mset[3].get_weight(), mset[4].get_weight());
 171
 172     enquire.set_weighting_scheme(Xapian::BM25PlusWeight(0, 0, 1, 1, 0.5, 1));
 173     mset = enquire.get_mset(0, 10);
 174     TEST_EQUAL(mset.size(), 5);
 175     // Expect: neither wdf nor doclen affects weight.
 176     TEST_EQUAL_DOUBLE(mset[0].get_weight(), mset[4].get_weight());
 177
 178     return true;
 179 }
 180
 181 // Regression test for a mistake corrected in the BM25+ implementation.
 182 DEFINE_TESTCASE(bm25plusweight3, backend) {
 183     Xapian::Database db = get_database("apitest_simpledata");
 184     Xapian::Enquire enquire(db);
 185     enquire.set_query(Xapian::Query("paragraph"));
 186     Xapian::MSet mset;
 187
 188     enquire.set_weighting_scheme(Xapian::BM25PlusWeight(1, 0, 1, 0.5, 0.5, 1));
 189     mset = enquire.get_mset(0, 10);
 190     TEST_EQUAL(mset.size(), 5);
 191
 192     // The value of each doc weight calculated manually from the BM25+ formulae
 193     // by using the respective document statistics.
 194     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 0.7920796567487473);
 195     TEST_EQUAL_DOUBLE(mset[1].get_weight(), 0.7846980783848447);
 196     TEST_EQUAL_DOUBLE(mset[2].get_weight(), 0.7558817623365934);
 197     TEST_EQUAL_DOUBLE(mset[3].get_weight(), 0.7210119356168847);
 198     TEST_EQUAL_DOUBLE(mset[4].get_weight(), 0.7210119356168847);
 199
 200     return true;
 201 }
 202
 203 // Test exception for junk after serialised weight.
 204 DEFINE_TESTCASE(inl2weight1, !backend) {
 205     Xapian::InL2Weight wt(2.0);
 206     try {
 207         Xapian::InL2Weight b;
 208         Xapian::InL2Weight * b2 = b.unserialise(wt.serialise() + "X");
 209         // Make sure we actually use the weight.
 210         bool empty = b2->name().empty();
 211         delete b2;
 212         if (empty)
 213             FAIL_TEST("Serialised inl2weight with junk appended unserialised to empty name!");
 214         FAIL_TEST("Serialised inl2weight with junk appended unserialised OK");
 215     } catch (const Xapian::SerialisationError &e) {
 216         TEST(e.get_msg().find("InL2") != string::npos);
 217     }
 218
 219     return true;
 220 }
 221
 222 // Test for invalid values of c.
 223 DEFINE_TESTCASE(inl2weight2, !backend) {
 224     // InvalidArgumentError should be thrown if the parameter c is invalid.
 225     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 226         Xapian::InL2Weight wt(-2.0));
 227
 228     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 229         Xapian::InL2Weight wt2(0.0));
 230
 231     /* Parameter c should be set to 1.0 by constructor if none is given. */
 232     Xapian::InL2Weight weight2;
 233     TEST_EQUAL(weight2.serialise(), Xapian::InL2Weight(1.0).serialise());
 234
 235     return true;
 236 }
 237
 238 // Feature tests for Inl2Weight
 239 DEFINE_TESTCASE(inl2weight3, backend) {
 240     Xapian::Database db = get_database("apitest_simpledata");
 241     Xapian::Enquire enquire(db);
 242     Xapian::Query query("banana");
 243
 244     enquire.set_query(query);
 245     enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));
 246
 247     Xapian::MSet mset1;
 248     mset1 = enquire.get_mset(0, 10);
 249     TEST_EQUAL(mset1.size(), 1);
 250     mset_expect_order(mset1, 6);
 251
 252     /* The value has been calculated in the python interpreter by looking at the
 253      * database statistics. */
 254     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.559711143842063);
 255
 256     // Test with OP_SCALE_WEIGHT.
 257     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 258     enquire.set_weighting_scheme(Xapian::InL2Weight(2.0));
 259
 260     Xapian::MSet mset2;
 261     mset2 = enquire.get_mset(0, 10);
 262     TEST_EQUAL(mset2.size(), 1);
 263     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 264     TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
 265
 266     return true;
 267 }
 268
 269 // Test exception for junk after serialised weight.
 270 DEFINE_TESTCASE(ifb2weight1, !backend) {
 271     Xapian::IfB2Weight wt(2.0);
 272     try {
 273         Xapian::IfB2Weight b;
 274         Xapian::IfB2Weight * b2 = b.unserialise(wt.serialise() + "X");
 275         // Make sure we actually use the weight.
 276         bool empty = b2->name().empty();
 277         delete b2;
 278         if (empty)
 279             FAIL_TEST("Serialised IfB2Weight with junk appended unserialised to empty name!");
 280         FAIL_TEST("Serialised IfB2Weight with junk appended unserialised OK");
 281     } catch (const Xapian::SerialisationError &e) {
 282         TEST(e.get_msg().find("IfB2") != string::npos);
 283     }
 284     return true;
 285 }
 286
 287 // Test for invalid values of c.
 288 DEFINE_TESTCASE(ifb2weight2, !backend) {
 289     // InvalidArgumentError should be thrown if the parameter c is invalid.
 290     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 291         Xapian::IfB2Weight wt(-2.0));
 292
 293     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 294         Xapian::IfB2Weight wt2(0.0));
 295
 296     /* Parameter c should be set to 1.0 by constructor if none is given. */
 297     Xapian::IfB2Weight weight2;
 298     TEST_EQUAL(weight2.serialise(), Xapian::IfB2Weight(1.0).serialise());
 299
 300     return true;
 301 }
 302
 303 // Feature test
 304 DEFINE_TESTCASE(ifb2weight3, backend) {
 305     Xapian::Database db = get_database("apitest_simpledata");
 306     Xapian::Enquire enquire(db);
 307     Xapian::Query query("banana");
 308
 309     enquire.set_query(query);
 310     enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));
 311
 312     Xapian::MSet mset1;
 313     mset1 = enquire.get_mset(0, 10);
 314     TEST_EQUAL(mset1.size(), 1);
 315
 316     /* The value of the weight has been manually calculated using the statistics
 317      * of the test database. */
 318     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.119422287684126);
 319
 320     // Test with OP_SCALE_WEIGHT.
 321     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 322     enquire.set_weighting_scheme(Xapian::IfB2Weight(2.0));
 323
 324     Xapian::MSet mset2;
 325     mset2 = enquire.get_mset(0, 10);
 326     TEST_EQUAL(mset2.size(), 1);
 327     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 328     TEST_EQUAL_DOUBLE(15.0 * mset1[0].get_weight(), mset2[0].get_weight());
 329
 330     return true;
 331 }
 332
 333 // Test exception for junk after serialised weight.
 334 DEFINE_TESTCASE(ineb2weight1, !backend) {
 335     Xapian::IneB2Weight wt(2.0);
 336     try {
 337         Xapian::IneB2Weight b;
 338         Xapian::IneB2Weight * b2 = b.unserialise(wt.serialise() + "X");
 339         // Make sure we actually use the weight.
 340         bool empty = b2->name().empty();
 341         delete b2;
 342         if (empty)
 343             FAIL_TEST("Serialised ineb2weight with junk appended unserialised to empty name!");
 344         FAIL_TEST("Serialised ineb2weight with junk appended unserialised OK");
 345     } catch (const Xapian::SerialisationError &e) {
 346         TEST(e.get_msg().find("IneB2") != string::npos);
 347     }
 348
 349     return true;
 350 }
 351
 352 // Test for invalid values of c.
 353 DEFINE_TESTCASE(ineb2weight2, !backend) {
 354     // InvalidArgumentError should be thrown if parameter c is invalid.
 355     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 356         Xapian::IneB2Weight wt(-2.0));
 357
 358     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 359         Xapian::IneB2Weight wt2(0.0));
 360
 361     /* Parameter c should be set to 1.0 by constructor if none is given. */
 362     Xapian::IneB2Weight weight2;
 363     TEST_EQUAL(weight2.serialise(), Xapian::IneB2Weight(1.0).serialise());
 364
 365     return true;
 366 }
 367
 368 // Feature test.
 369 DEFINE_TESTCASE(ineb2weight3, backend) {
 370     Xapian::Database db = get_database("apitest_simpledata");
 371     Xapian::Enquire enquire(db);
 372     Xapian::Query query("paragraph");
 373     enquire.set_query(query);
 374     enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));
 375
 376     Xapian::MSet mset1;
 377     mset1 = enquire.get_mset(0, 10);
 378     TEST_EQUAL(mset1.size(), 5);
 379
 380     // The third document in the database is 4th in the ranking.
 381     /* The weight value has been manually calculated by using the statistics
 382      * of the test database. */
 383     TEST_EQUAL_DOUBLE(mset1[4].get_weight(), 0.61709730297692400036);
 384
 385     // Test with OP_SCALE_WEIGHT.
 386     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 387     enquire.set_weighting_scheme(Xapian::IneB2Weight(2.0));
 388
 389     Xapian::MSet mset2;
 390     mset2 = enquire.get_mset(0, 10);
 391     TEST_EQUAL(mset2.size(), 5);
 392
 393     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 394     for (int i = 0; i < 5; ++i) {
 395         TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
 396     }
 397
 398     return true;
 399 }
 400
 401 // Test exception for junk after serialised weight.
 402 DEFINE_TESTCASE(bb2weight1, !backend) {
 403     Xapian::BB2Weight wt(2.0);
 404     try {
 405         Xapian::BB2Weight b;
 406         Xapian::BB2Weight * b2 = b.unserialise(wt.serialise() + "X");
 407         // Make sure we actually use the weight.
 408         bool empty = b2->name().empty();
 409         delete b2;
 410         if (empty)
 411             FAIL_TEST("Serialised BB2Weight with junk appended unserialised to empty name!");
 412         FAIL_TEST("Serialised BB2Weight with junk appended unserialised OK");
 413     } catch (const Xapian::SerialisationError &e) {
 414         TEST(e.get_msg().find("BB2") != string::npos);
 415     }
 416     return true;
 417 }
 418
 419 // Test for invalid values of c.
 420 DEFINE_TESTCASE(bb2weight2, !backend) {
 421     // InvalidArgumentError should be thrown if the parameter c is invalid.
 422     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 423         Xapian::BB2Weight wt(-2.0));
 424
 425     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 426         Xapian::BB2Weight wt2(0.0));
 427
 428     /* Parameter c should be set to 1.0 by constructor if none is given. */
 429     Xapian::BB2Weight weight2;
 430     TEST_EQUAL(weight2.serialise(), Xapian::BB2Weight(1.0).serialise());
 431
 432     return true;
 433 }
 434
 435 // Feature test
 436 DEFINE_TESTCASE(bb2weight3, backend) {
 437     Xapian::Database db = get_database("apitest_simpledata");
 438     Xapian::Enquire enquire(db);
 439     Xapian::Query query("paragraph");
 440
 441     enquire.set_query(query);
 442     enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
 443
 444     Xapian::MSet mset1;
 445     mset1 = enquire.get_mset(0, 10);
 446     TEST_EQUAL(mset1.size(), 5);
 447     /* The third document in the database has the highest weight and is the
 448      * first in the mset. */
 449     // Value calculated manually by using the statistics of the test database.
 450     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.6823696969784483);
 451
 452     // Test with OP_SCALE_WEIGHT.
 453     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 454     enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
 455
 456     Xapian::MSet mset2;
 457     mset2 = enquire.get_mset(0, 10);
 458     TEST_EQUAL(mset2.size(), 5);
 459
 460     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 461     for (int i = 0; i < 5; ++i) {
 462         TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
 463     }
 464
 465     // Test with OP_SCALE_WEIGHT and a small factor (regression test, as we
 466     // were applying the factor to the upper bound twice).
 467     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 1.0 / 1024));
 468     enquire.set_weighting_scheme(Xapian::BB2Weight(2.0));
 469
 470     Xapian::MSet mset3;
 471     mset3 = enquire.get_mset(0, 10);
 472     TEST_EQUAL(mset3.size(), 5);
 473
 474     for (int i = 0; i < 5; ++i) {
 475         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight() * 1024);
 476     }
 477
 478     return true;
 479 }
 480
 481 // Regression test: we used to calculate log2(0) when there was only one doc.
 482 DEFINE_TESTCASE(bb2weight4, backend) {
 483     Xapian::Database db = get_database("apitest_onedoc");
 484     Xapian::Enquire enquire(db);
 485     Xapian::Query query("word");
 486
 487     enquire.set_query(query);
 488     enquire.set_weighting_scheme(Xapian::BB2Weight());
 489
 490     Xapian::MSet mset1;
 491     mset1 = enquire.get_mset(0, 10);
 492     TEST_EQUAL(mset1.size(), 1);
 493     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 3.431020621347435);
 494
 495     return true;
 496 }
 497
 498 // Feature test.
 499 DEFINE_TESTCASE(dlhweight1, backend) {
 500     Xapian::Database db = get_database("apitest_simpledata");
 501     Xapian::Enquire enquire(db);
 502     Xapian::Query query("a");
 503
 504     enquire.set_query(query);
 505     enquire.set_weighting_scheme(Xapian::DLHWeight());
 506
 507     Xapian::MSet mset1;
 508     mset1 = enquire.get_mset(0, 10);
 509     TEST_EQUAL(mset1.size(), 3);
 510     mset_expect_order(mset1, 3, 1, 2);
 511     // Weights calculated manually using stats from the database.
 512     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 1.0046477754371292362);
 513     TEST_EQUAL_DOUBLE(mset1[1].get_weight(), 0.97621929514640352757);
 514     // The following weight would be negative but gets clamped to 0.
 515     TEST_EQUAL_DOUBLE(mset1[2].get_weight(), 0.0);
 516
 517     // Test with OP_SCALE_WEIGHT.
 518     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 519     enquire.set_weighting_scheme(Xapian::DLHWeight());
 520
 521     Xapian::MSet mset2;
 522     mset2 = enquire.get_mset(0, 10);
 523     TEST_EQUAL(mset2.size(), 3);
 524
 525     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 526     for (Xapian::doccount i = 0; i < mset2.size(); ++i) {
 527         TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
 528     }
 529
 530     return true;
 531 }
 532
 533 // Test exception for junk after serialised weight.
 534 DEFINE_TESTCASE(dlhweight2, !backend) {
 535     Xapian::DLHWeight wt;
 536     try {
 537         Xapian::DLHWeight t;
 538         Xapian::DLHWeight * t2 = t.unserialise(wt.serialise() + "X");
 539         // Make sure we actually use the weight.
 540         bool empty = t2->name().empty();
 541         delete t2;
 542         if (empty)
 543             FAIL_TEST("Serialised DLHWeight with junk appended unserialised to empty name!");
 544         FAIL_TEST("Serialised DLHWeight with junk appended unserialised OK");
 545     } catch (const Xapian::SerialisationError &e) {
 546         TEST(e.get_msg().find("DLH") != string::npos);
 547     }
 548     return true;
 549 }
 550
 551 static void
 552 gen_wdf_eq_doclen_db(Xapian::WritableDatabase& db, const string&)
 553 {
 554     Xapian::Document doc;
 555     doc.add_term("solo", 37);
 556     db.add_document(doc);
 557 }
 558
 559 // Test wdf == doclen.
 560 DEFINE_TESTCASE(dlhweight3, generated) {
 561     Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
 562     Xapian::Enquire enquire(db);
 563     Xapian::Query query("solo");
 564
 565     enquire.set_query(query);
 566     enquire.set_weighting_scheme(Xapian::DLHWeight());
 567
 568     Xapian::MSet mset1;
 569     mset1 = enquire.get_mset(0, 10);
 570     TEST_EQUAL(mset1.size(), 1);
 571     // Weight gets clamped to zero.
 572     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 573
 574     return true;
 575 }
 576
 577 // Test exception for junk after serialised weight.
 578 DEFINE_TESTCASE(pl2weight1, !backend) {
 579     Xapian::PL2Weight wt(2.0);
 580     try {
 581         Xapian::PL2Weight b;
 582         Xapian::PL2Weight * b2 = b.unserialise(wt.serialise() + "X");
 583         // Make sure we actually use the weight.
 584         bool empty = b2->name().empty();
 585         delete b2;
 586         if (empty)
 587             FAIL_TEST("Serialised PL2Weight with junk appended unserialised to empty name!");
 588         FAIL_TEST("Serialised PL2Weight with junk appended unserialised OK");
 589     } catch (const Xapian::SerialisationError &e) {
 590         TEST(e.get_msg().find("PL2") != string::npos);
 591     }
 592     return true;
 593 }
 594
 595 // Test for invalid values of c.
 596 DEFINE_TESTCASE(pl2weight2, !backend) {
 597     // InvalidArgumentError should be thrown if parameter c is invalid.
 598     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 599         Xapian::PL2Weight wt(-2.0));
 600
 601     /* Parameter c should be set to 1.0 by constructor if none is given. */
 602     Xapian::PL2Weight weight2;
 603     TEST_EQUAL(weight2.serialise(), Xapian::PL2Weight(1.0).serialise());
 604
 605     return true;
 606 }
 607
 608 // Feature Test.
 609 DEFINE_TESTCASE(pl2weight3, backend) {
 610     Xapian::Database db = get_database("apitest_simpledata");
 611     Xapian::Enquire enquire(db);
 612     Xapian::Query query("paragraph");
 613     enquire.set_query(query);
 614     Xapian::MSet mset;
 615
 616     enquire.set_weighting_scheme(Xapian::PL2Weight(2.0));
 617     mset = enquire.get_mset(0, 10);
 618     TEST_EQUAL(mset.size(), 5);
 619     // Expected weight difference calculated in extended precision using stats
 620     // from the test database.
 621     TEST_EQUAL_DOUBLE(mset[2].get_weight(),
 622                       mset[3].get_weight() + 0.0086861771701328694);
 623
 624     // Test with OP_SCALE_WEIGHT.
 625     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 626     enquire.set_weighting_scheme(Xapian::PL2Weight(2.0));
 627
 628     Xapian::MSet mset2;
 629     mset2 = enquire.get_mset(0, 10);
 630     TEST_EQUAL(mset2.size(), 5);
 631     TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
 632     for (int i = 0; i < 5; ++i) {
 633         TEST_EQUAL_DOUBLE(15.0 * mset[i].get_weight(), mset2[i].get_weight());
 634     }
 635
 636     return true;
 637 }
 638
 639 // Test exception for junk after serialised weight.
 640 DEFINE_TESTCASE(pl2plusweight1, !backend) {
 641     Xapian::PL2PlusWeight wt(2.0, 0.9);
 642     try {
 643         Xapian::PL2PlusWeight b;
 644         Xapian::PL2PlusWeight * b2 = b.unserialise(wt.serialise() + "X");
 645         // Make sure we actually use the weight.
 646         bool empty = b2->name().empty();
 647         delete b2;
 648         if (empty)
 649             FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised to empty name!");
 650         FAIL_TEST("Serialised PL2PlusWeight with junk appended unserialised OK");
 651     } catch (const Xapian::SerialisationError &e) {
 652         TEST(e.get_msg().find("PL2Plus") != string::npos);
 653     }
 654     return true;
 655 }
 656
 657 // Test for invalid values of parameters, c and delta.
 658 DEFINE_TESTCASE(pl2plusweight2, !backend) {
 659     // InvalidArgumentError should be thrown if parameter c is invalid.
 660     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 661         Xapian::PL2PlusWeight wt(-2.0, 0.9));
 662
 663     // InvalidArgumentError should be thrown if parameter delta is invalid.
 664     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 665         Xapian::PL2PlusWeight wt(1.0, -1.9));
 666
 667     return true;
 668 }
 669
 670 // Test for default values of parameters, c and delta.
 671 DEFINE_TESTCASE(pl2plusweight3, !backend) {
 672     Xapian::PL2PlusWeight weight2;
 673
 674     /* Parameter c should be set to 1.0 by constructor if none is given. */
 675     TEST_EQUAL(weight2.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
 676
 677     /* Parameter delta should be set to 0.8 by constructor if none is given. */
 678     TEST_EQUAL(weight2.serialise(), Xapian::PL2PlusWeight(1.0, 0.8).serialise());
 679
 680     return true;
 681 }
 682
 683 // Feature Test 1 for PL2PlusWeight.
 684 DEFINE_TESTCASE(pl2plusweight4, backend) {
 685     Xapian::Database db = get_database("apitest_simpledata");
 686     Xapian::Enquire enquire(db);
 687     enquire.set_query(Xapian::Query("paragraph"));
 688     Xapian::MSet mset;
 689
 690     enquire.set_weighting_scheme(Xapian::PL2PlusWeight(2.0, 0.8));
 691     mset = enquire.get_mset(0, 10);
 692     TEST_EQUAL(mset.size(), 5);
 693     // Expected weight difference calculated in extended precision using stats
 694     // from the test database.
 695     TEST_EQUAL_DOUBLE(mset[2].get_weight(),
 696                       mset[3].get_weight() + 0.0086861771701328694);
 697
 698     return true;
 699 }
 700
 701 // Feature Test 2 for PL2PlusWeight
 702 DEFINE_TESTCASE(pl2plusweight5, backend) {
 703     Xapian::Database db = get_database("apitest_simpledata");
 704     Xapian::Enquire enquire(db);
 705     Xapian::Query query("word");
 706     enquire.set_query(query);
 707     Xapian::MSet mset;
 708
 709     enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
 710     mset = enquire.get_mset(0, 10);
 711     // Expect MSet contains two documents having query "word".
 712     TEST_EQUAL(mset.size(), 2);
 713     // Expect Document 2 has higher weight than document 4 because
 714     // "word" appears more no. of times in document 2 than document 4.
 715     mset_expect_order(mset, 2, 4);
 716
 717     // Test with OP_SCALE_WEIGHT.
 718     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 719     enquire.set_weighting_scheme(Xapian::PL2PlusWeight(1.0, 0.8));
 720
 721     Xapian::MSet mset2;
 722     mset2 = enquire.get_mset(0, 10);
 723     TEST_EQUAL(mset2.size(), mset.size());
 724     TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
 725     for (Xapian::doccount i = 0; i < mset.size(); ++i) {
 726         TEST_EQUAL_DOUBLE(15.0 * mset[i].get_weight(), mset2[i].get_weight());
 727     }
 728
 729     return true;
 730 }
 731
 732 // Feature test
 733 DEFINE_TESTCASE(dphweight1, backend) {
 734     Xapian::Database db = get_database("apitest_simpledata");
 735     Xapian::Enquire enquire(db);
 736     Xapian::Query query("paragraph");
 737
 738     enquire.set_query(query);
 739     enquire.set_weighting_scheme(Xapian::DPHWeight());
 740
 741     Xapian::MSet mset1;
 742     mset1 = enquire.get_mset(0, 10);
 743     TEST_EQUAL(mset1.size(), 5);
 744     /* The weight has been calculated manually by using the statistics of the
 745      * test database. */
 746     TEST_EQUAL_DOUBLE(mset1[2].get_weight() - mset1[4].get_weight(), 0.542623617687990167);
 747
 748     // Test with OP_SCALE_WEIGHT.
 749     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 750     enquire.set_weighting_scheme(Xapian::DPHWeight());
 751
 752     Xapian::MSet mset2;
 753     mset2 = enquire.get_mset(0, 10);
 754     TEST_EQUAL(mset2.size(), 5);
 755     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 756     for (int i = 0; i < 5; ++i) {
 757         TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
 758     }
 759
 760     return true;
 761 }
 762
 763 // Test exception for junk after serialised weight.
 764 DEFINE_TESTCASE(dphweight2, !backend) {
 765     Xapian::DPHWeight wt;
 766     try {
 767         Xapian::DPHWeight t;
 768         Xapian::DPHWeight * t2 = t.unserialise(wt.serialise() + "X");
 769         // Make sure we actually use the weight.
 770         bool empty = t2->name().empty();
 771         delete t2;
 772         if (empty)
 773             FAIL_TEST("Serialised DPHWeight with junk appended unserialised to empty name!");
 774         FAIL_TEST("Serialised DPHWeight with junk appended unserialised OK");
 775     } catch (const Xapian::SerialisationError &e) {
 776         TEST(e.get_msg().find("DPH") != string::npos);
 777     }
 778     return true;
 779 }
 780
 781 // Test wdf == doclen.
 782 DEFINE_TESTCASE(dphweight3, generated) {
 783     Xapian::Database db = get_database("wdf_eq_doclen", gen_wdf_eq_doclen_db);
 784     Xapian::Enquire enquire(db);
 785     Xapian::Query query("solo");
 786
 787     enquire.set_query(query);
 788     enquire.set_weighting_scheme(Xapian::DPHWeight());
 789
 790     Xapian::MSet mset1;
 791     mset1 = enquire.get_mset(0, 10);
 792     TEST_EQUAL(mset1.size(), 1);
 793     // Weight gets clamped to zero.
 794     TEST_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
 795
 796     return true;
 797 }
 798
 799 // Test for various cases of normalization string.
 800 DEFINE_TESTCASE(tfidfweight1, !backend) {
 801     // InvalidArgumentError should be thrown if normalization string is invalid
 802     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 803         Xapian::TfIdfWeight b("JOHN_LENNON"));
 804
 805     TEST_EXCEPTION(Xapian::InvalidArgumentError,
 806         Xapian::TfIdfWeight b("LOL"));
 807
 808     /* Normalization string should be set to "ntn" by constructor if none is
 809       given. */
 810     Xapian::TfIdfWeight weight2;
 811     TEST_EQUAL(weight2.serialise(), Xapian::TfIdfWeight("ntn").serialise());
 812
 813     return true;
 814 }
 815
 816 // Test exception for junk after serialised weight.
 817 DEFINE_TESTCASE(tfidfweight2, !backend) {
 818     Xapian::TfIdfWeight wt("ntn");
 819     try {
 820         Xapian::TfIdfWeight b;
 821         Xapian::TfIdfWeight * b2 = b.unserialise(wt.serialise() + "X");
 822         // Make sure we actually use the weight.
 823         bool empty = b2->name().empty();
 824         delete b2;
 825         if (empty)
 826             FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised to empty name!");
 827         FAIL_TEST("Serialised TfIdfWeight with junk appended unserialised OK");
 828     } catch (const Xapian::SerialisationError &e) {
 829         TEST(e.get_msg().find("TfIdf") != string::npos);
 830     }
 831     return true;
 832 }
 833
 834 // Feature tests for various normalization functions.
 835 DEFINE_TESTCASE(tfidfweight3, backend) {
 836     Xapian::Database db = get_database("apitest_simpledata");
 837     Xapian::Enquire enquire(db);
 838     Xapian::Query query("word");
 839     Xapian::MSet mset;
 840
 841     // Check for "ntn" when termfreq != N
 842     enquire.set_query(query);
 843     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
 844     mset = enquire.get_mset(0, 10);
 845     TEST_EQUAL(mset.size(), 2);
 846     // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
 847     mset_expect_order(mset, 2, 4);
 848     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * log(6.0 / 2));
 849
 850     // Check that wqf is taken into account.
 851     enquire.set_query(Xapian::Query("word", 2));
 852     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
 853     Xapian::MSet mset2 = enquire.get_mset(0, 10);
 854     TEST_EQUAL(mset2.size(), 2);
 855     // wqf is 2, so weights should be doubled.
 856     TEST_EQUAL_DOUBLE(mset[0].get_weight() * 2, mset2[0].get_weight());
 857     TEST_EQUAL_DOUBLE(mset[1].get_weight() * 2, mset2[1].get_weight());
 858
 859     // Test with OP_SCALE_WEIGHT.
 860     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
 861     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
 862     mset2 = enquire.get_mset(0, 10);
 863     TEST_EQUAL(mset2.size(), 2);
 864     // doc 2 should have higher weight than 4 as only tf(wdf) will dominate.
 865     mset_expect_order(mset2, 2, 4);
 866     TEST_NOT_EQUAL_DOUBLE(mset[0].get_weight(), 0.0);
 867     TEST_EQUAL_DOUBLE(15 * mset[0].get_weight(), mset2[0].get_weight());
 868
 869     // check for "nfn" when termfreq != N
 870     enquire.set_query(query);
 871     enquire.set_weighting_scheme(Xapian::TfIdfWeight("nfn"));
 872     mset = enquire.get_mset(0, 10);
 873     TEST_EQUAL(mset.size(), 2);
 874     mset_expect_order(mset, 2, 4);
 875     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 / 2);
 876
 877     // check for "nsn" when termfreq != N
 878     enquire.set_query(query);
 879     enquire.set_weighting_scheme(Xapian::TfIdfWeight("nsn"));
 880     mset = enquire.get_mset(0, 10);
 881     TEST_EQUAL(mset.size(), 2);
 882     mset_expect_order(mset, 2, 4);
 883     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8.0 * pow(log(6.0 / 2), 2.0));
 884
 885     // Check for "bnn" and for both branches of 'b'.
 886     enquire.set_query(Xapian::Query("test"));
 887     enquire.set_weighting_scheme(Xapian::TfIdfWeight("bnn"));
 888     mset = enquire.get_mset(0, 10);
 889     TEST_EQUAL(mset.size(), 1);
 890     mset_expect_order(mset, 1);
 891     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1.0);
 892
 893     // Check for "lnn" and for both branches of 'l'.
 894     enquire.set_query(Xapian::Query("word"));
 895     enquire.set_weighting_scheme(Xapian::TfIdfWeight("lnn"));
 896     mset = enquire.get_mset(0, 10);
 897     TEST_EQUAL(mset.size(), 2);
 898     mset_expect_order(mset, 2, 4);
 899     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 1 + log(8.0)); // idfn=1 and so wt=tfn=1+log(tf)
 900     TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1.0);         // idfn=1 and wt=tfn=1+log(tf)=1+log(1)=1
 901
 902     // Check for "snn"
 903     enquire.set_query(Xapian::Query("paragraph"));
 904     enquire.set_weighting_scheme(Xapian::TfIdfWeight("snn")); // idf=1 and tfn=tf*tf
 905     mset = enquire.get_mset(0, 10);
 906     TEST_EQUAL(mset.size(), 5);
 907     mset_expect_order(mset, 2, 1, 4, 3, 5);
 908     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 9.0);
 909     TEST_EQUAL_DOUBLE(mset[4].get_weight(), 1.0);
 910
 911     // Check for "ntn" when termfreq=N
 912     enquire.set_query(Xapian::Query("this"));  // N=termfreq amd so idfn=0 for "t"
 913     enquire.set_weighting_scheme(Xapian::TfIdfWeight("ntn"));
 914     mset = enquire.get_mset(0, 10);
 915     TEST_EQUAL(mset.size(), 6);
 916     mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
 917     for (int i = 0; i < 6; ++i) {
 918         TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
 919     }
 920
 921     // Check for "npn" and for both branches of 'p'
 922     enquire.set_query(Xapian::Query("this"));  // N=termfreq and so idfn=0 for "p"
 923     enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
 924     mset = enquire.get_mset(0, 10);
 925     TEST_EQUAL(mset.size(), 6);
 926     mset_expect_order(mset, 1, 2, 3, 4, 5, 6);
 927     for (int i = 0; i < 6; ++i) {
 928         TEST_EQUAL_DOUBLE(mset[i].get_weight(), 0.0);
 929     }
 930
 931     // Check for "Lnn".
 932     enquire.set_query(Xapian::Query("word"));
 933     enquire.set_weighting_scheme(Xapian::TfIdfWeight("Lnn"));
 934     mset = enquire.get_mset(0, 10);
 935     TEST_EQUAL(mset.size(), 2);
 936     mset_expect_order(mset, 2, 4);
 937     TEST_EQUAL_DOUBLE(mset[0].get_weight(), (1 + log(8.0)) / (1 + log(81.0 / 56.0)));
 938     TEST_EQUAL_DOUBLE(mset[1].get_weight(), (1 + log(1.0)) / (1 + log(31.0 / 26.0)));
 939
 940     enquire.set_query(Xapian::Query("word"));
 941     enquire.set_weighting_scheme(Xapian::TfIdfWeight("npn"));
 942     mset = enquire.get_mset(0, 10);
 943     TEST_EQUAL(mset.size(), 2);
 944     mset_expect_order(mset, 2, 4);
 945     TEST_EQUAL_DOUBLE(mset[0].get_weight(), 8 * log((6.0 - 2) / 2));
 946     TEST_EQUAL_DOUBLE(mset[1].get_weight(), 1 * log((6.0 - 2) / 2));
 947
 948     return true;
 949 }
 950
 951 // Feature tests for pivoted normalization functions.
 952 DEFINE_TESTCASE(tfidfweight4, backend) {
 953     Xapian::Database db = get_database("apitest_simpledata");
 954     Xapian::Enquire enquire(db);
 955     Xapian::Query query("paragraph");
 956     Xapian::MSet mset;
 957
 958     // Check for "PPn" normalization string.
 959     enquire.set_query(query);
 960     enquire.set_weighting_scheme(Xapian::TfIdfWeight("PPn", 0.2, 1.0));
 961     mset = enquire.get_mset(0, 10);
 962     TEST_EQUAL(mset.size(), 5);
 963     // Shorter docs should ranker higher if wqf is equal among all the docs.
 964     TEST_REL(mset[0].get_weight(),>,mset[1].get_weight());
 965     TEST_REL(mset[2].get_weight(),>,mset[3].get_weight());
 966
 967     // Check that wqf is taken into account.
 968     enquire.set_query(Xapian::Query("paragraph", 2));
 969     enquire.set_weighting_scheme(Xapian::TfIdfWeight("PPn", 0.2, 1.0));
 970     Xapian::MSet mset2 = enquire.get_mset(0, 10);
 971     TEST_EQUAL(mset2.size(), 5);
 972     // wqf is 2, so weights should be doubled.
 973     TEST_EQUAL_DOUBLE(mset[0].get_weight() * 2, mset2[0].get_weight());
 974     TEST_EQUAL_DOUBLE(mset[1].get_weight() * 2, mset2[1].get_weight());
 975
 976     // check for "nPn" which represents "xPx"
 977     enquire.set_query(Xapian::Query("word"));
 978     enquire.set_weighting_scheme(Xapian::TfIdfWeight("nPn", 0.2, 1.0));
 979     mset = enquire.get_mset(0, 10);
 980     TEST_EQUAL(mset.size(), 2);
 981     // Expect doc 2 with query "word" to have higher weight than doc 4.
 982     mset_expect_order(mset, 2, 4);
 983
 984     // check for "Ptn" which represents "Pxx"
 985     enquire.set_query(Xapian::Query("word"));
 986     enquire.set_weighting_scheme(Xapian::TfIdfWeight("Ptn", 0.2, 1.0));
 987     mset = enquire.get_mset(0, 10);
 988     TEST_EQUAL(mset.size(), 2);
 989     // Expect doc 2 with query "word" to have higher weight than doc 4.
 990     mset_expect_order(mset, 2, 4);
 991
 992     return true;
 993 }
 994
 995 class CheckInitWeight : public Xapian::Weight {
 996   public:
 997     double factor;
 998
 999     unsigned & zero_inits, & non_zero_inits;
1000
1001     CheckInitWeight(unsigned &z, unsigned &n)
1002         : factor(-1.0), zero_inits(z), non_zero_inits(n) { }
1003
1004     void init(double factor_) {
1005         factor = factor_;
1006         if (factor == 0.0)
1007             ++zero_inits;
1008         else
1009             ++non_zero_inits;
1010     }
1011
1012     Weight * clone() const {
1013         return new CheckInitWeight(zero_inits, non_zero_inits);
1014     }
1015
1016     double get_sumpart(Xapian::termcount, Xapian::termcount,
1017                        Xapian::termcount) const {
1018         return 1.0;
1019     }
1020
1021     double get_maxpart() const { return 1.0; }
1022
1023     double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const {
1024         return 1.0 / doclen;
1025     }
1026
1027     double get_maxextra() const { return 1.0; }
1028 };
1029
1030 /// Regression test - check init() is called for the term-indep Weight obj.
1031 DEFINE_TESTCASE(checkinitweight1, backend && !multi && !remote) {
1032     Xapian::Database db = get_database("apitest_simpledata");
1033     Xapian::Enquire enquire(db);
1034     Xapian::Query q(Xapian::Query::OP_AND,
1035                     Xapian::Query("this"), Xapian::Query("paragraph"));
1036     enquire.set_query(q);
1037     unsigned zero_inits = 0, non_zero_inits = 0;
1038     CheckInitWeight wt(zero_inits, non_zero_inits);
1039     enquire.set_weighting_scheme(wt);
1040     Xapian::MSet mset = enquire.get_mset(0, 3);
1041     TEST_EQUAL(zero_inits, 1);
1042     TEST_EQUAL(non_zero_inits, 2);
1043     return true;
1044 }
1045
1046 class CheckStatsWeight : public Xapian::Weight {
1047   public:
1048     double factor;
1049
1050     Xapian::Database db;
1051
1052     string term1;
1053
1054     // When testing OP_SYNONYM, term2 is also set.
1055     // When testing OP_WILDCARD, term2 == "*".
1056     // When testing a repeated term, term2 == "=" for the first occurrence and
1057     // "_" for subsequent occurrences.
1058     mutable string term2;
1059
1060     Xapian::termcount & sum;
1061     Xapian::termcount & sum_squares;
1062
1063     mutable Xapian::termcount len_upper;
1064     mutable Xapian::termcount len_lower;
1065     mutable Xapian::termcount wdf_upper;
1066
1067     CheckStatsWeight(const Xapian::Database & db_,
1068                      const string & term1_,
1069                      const string & term2_,
1070                      Xapian::termcount & sum_,
1071                      Xapian::termcount & sum_squares_)
1072         : factor(-1.0), db(db_), term1(term1_), term2(term2_),
1073           sum(sum_), sum_squares(sum_squares_),
1074           len_upper(0), len_lower(Xapian::termcount(-1)), wdf_upper(0)
1075     {
1076         need_stat(COLLECTION_SIZE);
1077         need_stat(RSET_SIZE);
1078         need_stat(AVERAGE_LENGTH);
1079         need_stat(TERMFREQ);
1080         need_stat(RELTERMFREQ);
1081         need_stat(QUERY_LENGTH);
1082         need_stat(WQF);
1083         need_stat(WDF);
1084         need_stat(DOC_LENGTH);
1085         need_stat(DOC_LENGTH_MIN);
1086         need_stat(DOC_LENGTH_MAX);
1087         need_stat(WDF_MAX);
1088         need_stat(COLLECTION_FREQ);
1089         need_stat(UNIQUE_TERMS);
1090     }
1091
1092     CheckStatsWeight(const Xapian::Database & db_,
1093                      const string & term_,
1094                      Xapian::termcount & sum_,
1095                      Xapian::termcount & sum_squares_)
1096         : CheckStatsWeight(db_, term_, string(), sum_, sum_squares_) { }
1097
1098     void init(double factor_) {
1099         factor = factor_;
1100     }
1101
1102     Weight * clone() const {
1103         auto res = new CheckStatsWeight(db, term1, term2, sum, sum_squares);
1104         if (term2 == "=") {
1105             // The object passed to Enquire::set_weighting_scheme() is cloned
1106             // right away, and then cloned again for each term, and then
1107             // potentially once more for the term-independent weight
1108             // contribution.  In the repeated case, we want to handle the first
1109             // actual term specially, so we arrange for that to have "=" for
1110             // term2, and subsequent clones to have "_", so that we accumulate
1111             // sum and sum_squares on the first occurrence only.
1112             term2 = "_";
1113         }
1114         return res;
1115     }
1116
1117     double get_sumpart(Xapian::termcount wdf, Xapian::termcount doclen,
1118                        Xapian::termcount uniqueterms) const {
1119         Xapian::doccount num_docs = db.get_doccount();
1120         TEST_EQUAL(get_collection_size(), num_docs);
1121         TEST_EQUAL(get_rset_size(), 0);
1122         TEST_EQUAL(get_average_length(), db.get_avlength());
1123         if (term2.empty() || term2 == "=" || term2 == "_") {
1124             TEST_EQUAL(get_termfreq(), db.get_termfreq(term1));
1125             TEST_EQUAL(get_collection_freq(), db.get_collection_freq(term1));
1126             if (term2.empty()) {
1127                 TEST_EQUAL(get_query_length(), 1);
1128             } else {
1129                 TEST_EQUAL(get_query_length(), 2);
1130             }
1131         } else {
1132             Xapian::doccount tfmax = 0, tfsum = 0;
1133             Xapian::termcount cfmax = 0, cfsum = 0;
1134             if (term2 == "*") {
1135                 // OP_WILDCARD case.
1136                 for (auto&& t = db.allterms_begin(term1);
1137                      t != db.allterms_end(term1); ++t) {
1138                     Xapian::doccount tf = t.get_termfreq();
1139                     tout << "->" << *t << " " << tf << endl;
1140                     tfsum += tf;
1141                     tfmax = max(tfmax, tf);
1142                     Xapian::termcount cf = db.get_collection_freq(*t);
1143                     cfsum += cf;
1144                     cfmax = max(cfmax, cf);
1145                 }
1146                 TEST_EQUAL(get_query_length(), 1);
1147             } else {
1148                 // OP_SYNONYM case.
1149                 Xapian::doccount tf1 = db.get_termfreq(term1);
1150                 Xapian::doccount tf2 = db.get_termfreq(term2);
1151                 tfsum = tf1 + tf2;
1152                 tfmax = max(tf1, tf2);
1153                 Xapian::termcount cf1 = db.get_collection_freq(term1);
1154                 Xapian::termcount cf2 = db.get_collection_freq(term2);
1155                 cfsum = cf1 + cf2;
1156                 cfmax = max(cf1, cf2);
1157                 TEST_EQUAL(get_query_length(), 2);
1158             }
1159             // Synonym occurs at least as many times as any term.
1160             TEST_REL(get_termfreq(), >=, tfmax);
1161             TEST_REL(get_collection_freq(), >=, cfmax);
1162             // Synonym can't occur more times than the terms do.
1163             TEST_REL(get_termfreq(), <=, tfsum);
1164             TEST_REL(get_collection_freq(), <=, cfsum);
1165             // Synonym can't occur more times than there are documents/terms.
1166             TEST_REL(get_termfreq(), <=, num_docs);
1167             double total_term_occurences = get_average_length() * num_docs;
1168             TEST_REL(get_collection_freq(), <=, total_term_occurences);
1169         }
1170         TEST_EQUAL(get_reltermfreq(), 0);
1171         TEST_EQUAL(get_wqf(), 1);
1172         TEST_REL(doclen,>=,len_lower);
1173         TEST_REL(doclen,<=,len_upper);
1174         TEST_REL(uniqueterms,>=,1);
1175         TEST_REL(uniqueterms,<=,doclen);
1176         TEST_REL(wdf,<=,wdf_upper);
1177         if (term2 != "_") {
1178             sum += wdf;
1179             sum_squares += wdf * wdf;
1180         }
1181         return 1.0;
1182     }
1183
1184     double get_maxpart() const {
1185         if (len_upper == 0) {
1186             len_lower = get_doclength_lower_bound();
1187             len_upper = get_doclength_upper_bound();
1188             wdf_upper = get_wdf_upper_bound();
1189         }
1190         return 1.0;
1191     }
1192
1193     double get_sumextra(Xapian::termcount doclen, Xapian::termcount) const {
1194         return 1.0 / doclen;
1195     }
1196
1197     double get_maxextra() const { return 1.0; }
1198 };
1199
1200 /// Check the weight subclass gets the correct stats.
1201 DEFINE_TESTCASE(checkstatsweight1, backend && !remote) {
1202     Xapian::Database db = get_database("apitest_simpledata");
1203     Xapian::Enquire enquire(db);
1204     Xapian::TermIterator a;
1205     for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1206         const string & term = *a;
1207         enquire.set_query(Xapian::Query(term));
1208         Xapian::termcount sum = 0;
1209         Xapian::termcount sum_squares = 0;
1210         CheckStatsWeight wt(db, term, sum, sum_squares);
1211         enquire.set_weighting_scheme(wt);
1212         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1213
1214         // The document order in the multi-db case isn't the same as the
1215         // postlist order on the combined DB, so it's hard to compare the
1216         // wdf for each document in the Weight objects, so we can sum
1217         // the wdfs and the squares of the wdfs which provides a decent
1218         // check that we're not getting the wrong wdf values (it ensures
1219         // they have the right mean and standard deviation).
1220         Xapian::termcount expected_sum = 0;
1221         Xapian::termcount expected_sum_squares = 0;
1222         Xapian::PostingIterator i;
1223         for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
1224             Xapian::termcount wdf = i.get_wdf();
1225             expected_sum += wdf;
1226             expected_sum_squares += wdf * wdf;
1227         }
1228         TEST_EQUAL(sum, expected_sum);
1229         TEST_EQUAL(sum_squares, expected_sum_squares);
1230     }
1231     return true;
1232 }
1233
1234 /// Check the weight subclass gets the correct stats with OP_SYNONYM.
1235 // Regression test for bugs fixed in 1.4.1.
1236 DEFINE_TESTCASE(checkstatsweight2, backend && !remote) {
1237     Xapian::Database db = get_database("apitest_simpledata");
1238     Xapian::Enquire enquire(db);
1239     Xapian::TermIterator a;
1240     for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1241         const string & term1 = *a;
1242         if (++a == db.allterms_end()) break;
1243         const string & term2 = *a;
1244         Xapian::Query q(Xapian::Query::OP_SYNONYM,
1245                         Xapian::Query(term1), Xapian::Query(term2));
1246         tout << q.get_description() << endl;
1247         enquire.set_query(q);
1248         Xapian::termcount sum = 0;
1249         Xapian::termcount sum_squares = 0;
1250         CheckStatsWeight wt(db, term1, term2, sum, sum_squares);
1251         enquire.set_weighting_scheme(wt);
1252         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1253
1254         // The document order in the multi-db case isn't the same as the
1255         // postlist order on the combined DB, so it's hard to compare the
1256         // wdf for each document in the Weight objects, so we can sum
1257         // the wdfs and the squares of the wdfs which provides a decent
1258         // check that we're not getting the wrong wdf values (it ensures
1259         // they have the right mean and standard deviation).
1260         Xapian::termcount expected_sum = 0;
1261         Xapian::termcount expected_sum_squares = 0;
1262         Xapian::PostingIterator i = db.postlist_begin(term1);
1263         Xapian::PostingIterator j = db.postlist_begin(term2);
1264         Xapian::docid did1 = *i, did2 = *j;
1265         while (true) {
1266             // To calculate expected_sum_squares correctly we need to square
1267             // the sum per document.
1268             Xapian::termcount wdf;
1269             if (did1 == did2) {
1270                 wdf = i.get_wdf() + j.get_wdf();
1271                 did1 = did2 = 0;
1272             } else if (did1 < did2) {
1273                 wdf = i.get_wdf();
1274                 did1 = 0;
1275             } else {
1276                 wdf = j.get_wdf();
1277                 did2 = 0;
1278             }
1279             expected_sum += wdf;
1280             expected_sum_squares += wdf * wdf;
1281
1282             if (did1 == 0) {
1283                 if (++i != db.postlist_end(term1)) {
1284                     did1 = *i;
1285                 } else {
1286                     if (did2 == Xapian::docid(-1)) break;
1287                     did1 = Xapian::docid(-1);
1288                 }
1289             }
1290             if (did2 == 0) {
1291                 if (++j != db.postlist_end(term2)) {
1292                     did2 = *j;
1293                 } else {
1294                     if (did1 == Xapian::docid(-1)) break;
1295                     did2 = Xapian::docid(-1);
1296                 }
1297             }
1298         }
1299         // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1300         // the individual terms.
1301         TEST_EQUAL(sum, expected_sum);
1302         TEST_REL(sum_squares, >=, expected_sum_squares);
1303     }
1304     return true;
1305 }
1306
1307 /// Check the weight subclass gets the correct stats with OP_WILDCARD.
1308 // Regression test for bug fixed in 1.4.1.
1309 // Don't run with multi-database, as the termfreq checks don't work
1310 // there - FIXME: Investigate this - it smells like a bug.
1311 DEFINE_TESTCASE(checkstatsweight3, backend && !remote && !multi) {
1312     struct PlCmp {
1313         bool operator()(const Xapian::PostingIterator& a,
1314                         const Xapian::PostingIterator& b) {
1315             return *a < *b;
1316         }
1317     };
1318
1319     Xapian::Database db = get_database("apitest_simpledata");
1320     Xapian::Enquire enquire(db);
1321     Xapian::TermIterator a;
1322     static const char * const testcases[] = {
1323         "a", // a* matches all documents, but no term matches all.
1324         "pa", // Expands to only "paragraph", matching 5.
1325         "zulu", // No matches.
1326         "th", // Term "this" matches all documents.
1327     };
1328     for (auto pattern : testcases) {
1329         Xapian::Query q(Xapian::Query::OP_WILDCARD, pattern);
1330         tout << q.get_description() << endl;
1331         enquire.set_query(q);
1332         Xapian::termcount sum = 0;
1333         Xapian::termcount sum_squares = 0;
1334         CheckStatsWeight wt(db, pattern, "*", sum, sum_squares);
1335         enquire.set_weighting_scheme(wt);
1336         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1337
1338         // The document order in the multi-db case isn't the same as the
1339         // postlist order on the combined DB, so it's hard to compare the
1340         // wdf for each document in the Weight objects, so we can sum
1341         // the wdfs and the squares of the wdfs which provides a decent
1342         // check that we're not getting the wrong wdf values (it ensures
1343         // they have the right mean and standard deviation).
1344         Xapian::termcount expected_sum = 0;
1345         Xapian::termcount expected_sum_squares = 0;
1346         vector<Xapian::PostingIterator> postlists;
1347         for (auto&& t = db.allterms_begin(pattern);
1348              t != db.allterms_end(pattern); ++t) {
1349             postlists.emplace_back(db.postlist_begin(*t));
1350         }
1351         make_heap(postlists.begin(), postlists.end(), PlCmp());
1352         Xapian::docid did = 0;
1353         Xapian::termcount wdf = 0;
1354         while (!postlists.empty()) {
1355             pop_heap(postlists.begin(), postlists.end(), PlCmp());
1356             Xapian::docid did_new = *postlists.back();
1357             Xapian::termcount wdf_new = postlists.back().get_wdf();
1358             if (++(postlists.back()) == Xapian::PostingIterator()) {
1359                 postlists.pop_back();
1360             } else {
1361                 push_heap(postlists.begin(), postlists.end(), PlCmp());
1362             }
1363             if (did_new != did) {
1364                 expected_sum += wdf;
1365                 expected_sum_squares += wdf * wdf;
1366                 wdf = 0;
1367                 did = did_new;
1368             }
1369             wdf += wdf_new;
1370         }
1371         expected_sum += wdf;
1372         expected_sum_squares += wdf * wdf;
1373         // The OP_SYNONYM's wdf should be equal to the sum of the wdfs of
1374         // the individual terms.
1375         TEST_EQUAL(sum, expected_sum);
1376         TEST_REL(sum_squares, >=, expected_sum_squares);
1377     }
1378     return true;
1379 }
1380
1381 /// Check the stats for a repeated term are correct.
1382 // Regression test for bug fixed in 1.4.6.  Doesn't work with
1383 // multi as the weight object is cloned more times.
1384 DEFINE_TESTCASE(checkstatsweight4, backend && !remote && !multi) {
1385     Xapian::Database db = get_database("apitest_simpledata");
1386     Xapian::Enquire enquire(db);
1387     Xapian::TermIterator a;
1388     for (a = db.allterms_begin(); a != db.allterms_end(); ++a) {
1389         const string & term = *a;
1390         enquire.set_query(Xapian::Query(term, 1, 1) |
1391                           Xapian::Query(term, 1, 2));
1392         Xapian::termcount sum = 0;
1393         Xapian::termcount sum_squares = 0;
1394         CheckStatsWeight wt(db, term, "=", sum, sum_squares);
1395         enquire.set_weighting_scheme(wt);
1396         Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
1397
1398         // The document order in the multi-db case isn't the same as the
1399         // postlist order on the combined DB, so it's hard to compare the
1400         // wdf for each document in the Weight objects, so we can sum
1401         // the wdfs and the squares of the wdfs which provides a decent
1402         // check that we're not getting the wrong wdf values (it ensures
1403         // they have the right mean and standard deviation).
1404         Xapian::termcount expected_sum = 0;
1405         Xapian::termcount expected_sum_squares = 0;
1406         Xapian::PostingIterator i;
1407         for (i = db.postlist_begin(term); i != db.postlist_end(term); ++i) {
1408             Xapian::termcount wdf = i.get_wdf();
1409             expected_sum += wdf;
1410             expected_sum_squares += wdf * wdf;
1411         }
1412         TEST_EQUAL(sum, expected_sum);
1413         TEST_EQUAL(sum_squares, expected_sum_squares);
1414     }
1415     return true;
1416 }
1417
1418 // Two stage should perform same as Jelinek mercer if smoothing parameter for mercer is kept 1 in both.
1419 DEFINE_TESTCASE(unigramlmweight4, backend) {
1420     Xapian::Database db = get_database("apitest_simpledata");
1421     Xapian::Enquire enquire1(db);
1422     Xapian::Enquire enquire2(db);
1423     enquire1.set_query(Xapian::Query("paragraph"));
1424     Xapian::MSet mset1;
1425     enquire2.set_query(Xapian::Query("paragraph"));
1426     Xapian::MSet mset2;
1427     // 5 documents available with term paragraph so mset size should be 5
1428     enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::TWO_STAGE_SMOOTHING, 1, 0));
1429     enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 1, 0));
1430     mset1 = enquire1.get_mset(0, 10);
1431     mset2 = enquire2.get_mset(0, 10);
1432
1433     TEST_EQUAL(mset1.size(), 5);
1434     TEST_EQUAL_DOUBLE(mset1[1].get_weight(), mset2[1].get_weight());
1435     return true;
1436 }
1437
1438 /* Test for checking if we don't use smoothing all
1439  * of them should give same result i.e wdf_double/len_double */
1440 DEFINE_TESTCASE(unigramlmweight5, backend) {
1441     Xapian::Database db = get_database("apitest_simpledata");
1442     Xapian::Enquire enquire1(db);
1443     Xapian::Enquire enquire2(db);
1444     Xapian::Enquire enquire3(db);
1445     Xapian::Enquire enquire4(db);
1446     enquire1.set_query(Xapian::Query("paragraph"));
1447     Xapian::MSet mset1;
1448     enquire2.set_query(Xapian::Query("paragraph"));
1449     Xapian::MSet mset2;
1450     enquire3.set_query(Xapian::Query("paragraph"));
1451     Xapian::MSet mset3;
1452     enquire4.set_query(Xapian::Query("paragraph"));
1453     Xapian::MSet mset4;
1454     // 5 documents available with term paragraph so mset size should be 5
1455     enquire1.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::TWO_STAGE_SMOOTHING, 0, 0));
1456     enquire2.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::JELINEK_MERCER_SMOOTHING, 0, 0));
1457     enquire3.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::ABSOLUTE_DISCOUNT_SMOOTHING, 0, 0));
1458     enquire4.set_weighting_scheme(Xapian::LMWeight(10000.0, Xapian::Weight::DIRICHLET_SMOOTHING, 0, 0));
1459
1460     mset1 = enquire1.get_mset(0, 10);
1461     mset2 = enquire2.get_mset(0, 10);
1462     mset3 = enquire3.get_mset(0, 10);
1463     mset4 = enquire4.get_mset(0, 10);
1464
1465     TEST_EQUAL(mset1.size(), 5);
1466     TEST_EQUAL(mset2.size(), 5);
1467     TEST_EQUAL(mset3.size(), 5);
1468     TEST_EQUAL(mset4.size(), 5);
1469     for (size_t i = 0; i < 5; ++i) {
1470         TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset4[i].get_weight());
1471         TEST_EQUAL_DOUBLE(mset2[i].get_weight(), mset4[i].get_weight());
1472         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset2[i].get_weight());
1473         TEST_EQUAL_DOUBLE(mset3[i].get_weight(), mset2[i].get_weight());
1474         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset4[i].get_weight());
1475         TEST_EQUAL_DOUBLE(mset1[i].get_weight(), mset3[i].get_weight());
1476     }
1477     return true;
1478 }
1479
1480 // Test Exception for junk after serialised weight (with Dir+ enabled).
1481 DEFINE_TESTCASE(unigramlmweight6, !backend) {
1482     Xapian::LMWeight wt(0, Xapian::Weight::DIRICHLET_SMOOTHING, 0.5, 1.0);
1483     try {
1484         Xapian::LMWeight d;
1485         Xapian::LMWeight * d2 = d.unserialise(wt.serialise() + "X");
1486         // Make sure we actually use the weight.
1487         bool empty = d2->name().empty();
1488         delete d2;
1489         if (empty)
1490             FAIL_TEST("Serialised LMWeight with junk appended unserialised to empty name!");
1491         FAIL_TEST("Serialised LMWeight with junk appended unserialised OK");
1492     } catch (const Xapian::SerialisationError &e) {
1493         TEST(e.get_msg().find("LM") != string::npos);
1494     }
1495     return true;
1496 }
1497
1498 // Feature test for Dir+ function.
1499 DEFINE_TESTCASE(unigramlmweight7, backend) {
1500     Xapian::Database db = get_database("apitest_simpledata");
1501     Xapian::Enquire enquire1(db);
1502     Xapian::Enquire enquire2(db);
1503     enquire1.set_query(Xapian::Query("paragraph"));
1504     enquire2.set_query(Xapian::Query("paragraph"));
1505     Xapian::MSet mset1;
1506     Xapian::MSet mset2;
1507
1508     enquire1.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
1509     enquire2.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_PLUS_SMOOTHING, 2000, 0.05));
1510
1511     mset1 = enquire1.get_mset(0, 10);
1512     mset2 = enquire2.get_mset(0, 10);
1513
1514     // mset size should be 5
1515     TEST_EQUAL(mset1.size(), 5);
1516     TEST_EQUAL(mset2.size(), 5);
1517
1518     // Expect mset weights associated with Dir+ more than mset weights by Dir
1519     // because of the presence of extra weight component in Dir+ function.
1520     TEST_REL(mset2[0].get_weight(),>,mset1[0].get_weight());
1521     TEST_REL(mset2[1].get_weight(),>,mset1[1].get_weight());
1522     TEST_REL(mset2[2].get_weight(),>,mset1[2].get_weight());
1523     TEST_REL(mset2[3].get_weight(),>,mset1[3].get_weight());
1524     TEST_REL(mset2[4].get_weight(),>,mset1[4].get_weight());
1525
1526     return true;
1527 }
1528
1529 // Regression test that OP_SCALE_WEIGHT works with LMWeight (fixed in 1.4.1).
1530 DEFINE_TESTCASE(unigramlmweight8, backend) {
1531     Xapian::Database db = get_database("apitest_simpledata");
1532     Xapian::Enquire enquire(db);
1533     Xapian::Query query("paragraph");
1534
1535     enquire.set_query(query);
1536     enquire.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
1537
1538     Xapian::MSet mset1;
1539     mset1 = enquire.get_mset(0, 10);
1540     TEST_EQUAL(mset1.size(), 5);
1541
1542     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
1543     enquire.set_weighting_scheme(Xapian::LMWeight(0, Xapian::Weight::DIRICHLET_SMOOTHING, 2000, 0));
1544
1545     Xapian::MSet mset2;
1546     mset2 = enquire.get_mset(0, 10);
1547     TEST_EQUAL(mset2.size(), mset1.size());
1548     TEST_NOT_EQUAL_DOUBLE(mset1[0].get_weight(), 0.0);
1549     for (Xapian::doccount i = 0; i < mset1.size(); ++i) {
1550         TEST_EQUAL_DOUBLE(15.0 * mset1[i].get_weight(), mset2[i].get_weight());
1551     }
1552
1553     return true;
1554 }
1555
1556 // Feature test for BoolWeight.
1557 // Test exception for junk after serialised weight.
1558 DEFINE_TESTCASE(boolweight1, !backend) {
1559     Xapian::BoolWeight wt;
1560     try {
1561         Xapian::BoolWeight t;
1562         Xapian::BoolWeight * t2 = t.unserialise(wt.serialise() + "X");
1563         // Make sure we actually use the weight.
1564         bool empty = t2->name().empty();
1565         delete t2;
1566         if (empty)
1567             FAIL_TEST("Serialised BoolWeight with junk appended unserialised to empty name!");
1568         FAIL_TEST("Serialised BoolWeight with junk appended unserialised OK");
1569     } catch (const Xapian::SerialisationError &e) {
1570         TEST(e.get_msg().find("Bool") != string::npos);
1571     }
1572     return true;
1573 }
1574
1575 // Feature test for CoordWeight.
1576 DEFINE_TESTCASE(coordweight1, backend) {
1577     Xapian::Enquire enquire(get_database("apitest_simpledata"));
1578     enquire.set_weighting_scheme(Xapian::CoordWeight());
1579     static const char * const terms[] = {
1580         "this", "line", "paragraph", "rubbish"
1581     };
1582     Xapian::Query query(Xapian::Query::OP_OR,
1583                         terms, terms + sizeof(terms) / sizeof(terms[0]));
1584     enquire.set_query(query);
1585     Xapian::MSet mymset1 = enquire.get_mset(0, 100);
1586     // CoordWeight scores 1 for each matching term, so the weight should equal
1587     // the number of matching terms.
1588     for (Xapian::MSetIterator i = mymset1.begin(); i != mymset1.end(); ++i) {
1589         Xapian::termcount matching_terms = 0;
1590         Xapian::TermIterator t = enquire.get_matching_terms_begin(i);
1591         while (t != enquire.get_matching_terms_end(i)) {
1592             ++matching_terms;
1593             ++t;
1594         }
1595         TEST_EQUAL(i.get_weight(), matching_terms);
1596     }
1597
1598     // Test with OP_SCALE_WEIGHT.
1599     enquire.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, query, 15.0));
1600     Xapian::MSet mymset2 = enquire.get_mset(0, 100);
1601     TEST_EQUAL(mymset1.size(), mymset2.size());
1602     for (Xapian::doccount i = 0; i != mymset1.size(); ++i) {
1603         TEST_EQUAL(15.0 * mymset1[i].get_weight(), mymset2[i].get_weight());
1604     }
1605
1606     return true;
1607 }
1608
1609 // Test exception for junk after serialised weight.
1610 DEFINE_TESTCASE(coordweight2, !backend) {
1611     Xapian::CoordWeight wt;
1612     try {
1613         Xapian::CoordWeight t;
1614         Xapian::CoordWeight * t2 = t.unserialise(wt.serialise() + "X");
1615         // Make sure we actually use the weight.
1616         bool empty = t2->name().empty();
1617         delete t2;
1618         if (empty)
1619             FAIL_TEST("Serialised CoordWeight with junk appended unserialised to empty name!");
1620         FAIL_TEST("Serialised CoordWeight with junk appended unserialised OK");
1621     } catch (const Xapian::SerialisationError &e) {
1622         TEST(e.get_msg().find("Coord") != string::npos);
1623     }
1624     return true;
1625 }