1 /* api_postingsource.cc: tests of posting sources
3 * Copyright 2008,2009,2011,2015,2016 Olly Betts
4 * Copyright 2008,2009 Lemur Consulting Ltd
5 * Copyright 2010 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #include "api_postingsource.h"
30 #include "safeunistd.h"
33 #include "testutils.h"
38 class MyOddPostingSource
: public Xapian::PostingSource
{
39 Xapian::doccount num_docs
;
41 Xapian::doccount last_docid
;
45 MyOddPostingSource(Xapian::doccount num_docs_
,
46 Xapian::doccount last_docid_
)
47 : num_docs(num_docs_
), last_docid(last_docid_
), did(0)
51 MyOddPostingSource(const Xapian::Database
&db
)
52 : num_docs(db
.get_doccount()), last_docid(db
.get_lastdocid()), did(0)
55 PostingSource
* clone() const { return new MyOddPostingSource(num_docs
, last_docid
); }
57 void init(const Xapian::Database
&) { did
= 0; }
59 // These bounds could be better, but that's not important here.
60 Xapian::doccount
get_termfreq_min() const { return 0; }
62 Xapian::doccount
get_termfreq_est() const { return num_docs
/ 2; }
64 Xapian::doccount
get_termfreq_max() const { return num_docs
; }
66 void next(double wt
) {
69 if (did
% 2 == 0) ++did
;
72 void skip_to(Xapian::docid to_did
, double wt
) {
75 if (did
% 2 == 0) ++did
;
79 // Doesn't work if last_docid is 2^32 - 1.
80 return did
> last_docid
;
83 Xapian::docid
get_docid() const { return did
; }
85 string
get_description() const { return "MyOddPostingSource"; }
88 DEFINE_TESTCASE(externalsource1
, backend
&& !remote
&& !multi
) {
89 // Doesn't work for remote without registering with the server.
90 // Doesn't work for multi because it checks the docid in the
92 Xapian::Database
db(get_database("apitest_phrase"));
93 Xapian::Enquire
enq(db
);
94 MyOddPostingSource
src(db
);
96 // Check that passing NULL is rejected as intended.
97 Xapian::PostingSource
* nullsrc
= NULL
;
98 TEST_EXCEPTION(Xapian::InvalidArgumentError
, Xapian::Query
bad(nullsrc
));
100 enq
.set_query(Xapian::Query(&src
));
102 Xapian::MSet mset
= enq
.get_mset(0, 10);
103 mset_expect_order(mset
, 1, 3, 5, 7, 9, 11, 13, 15, 17);
105 Xapian::Query
q(Xapian::Query::OP_FILTER
,
106 Xapian::Query("leav"),
107 Xapian::Query(&src
));
110 mset
= enq
.get_mset(0, 10);
111 mset_expect_order(mset
, 5, 7, 11, 13, 9);
116 // Test that trying to use PostingSource with the remote backend throws
117 // Xapian::UnimplementedError as expected (we need to register the class
118 // in xapian-tcpsrv/xapian-progsrv for this to work).
119 DEFINE_TESTCASE(externalsource2
, remote
) {
120 Xapian::Database
db(get_database("apitest_phrase"));
121 Xapian::Enquire
enq(db
);
122 MyOddPostingSource
src(db
);
124 enq
.set_query(Xapian::Query(&src
));
126 TEST_EXCEPTION(Xapian::UnimplementedError
,
127 Xapian::MSet mset
= enq
.get_mset(0, 10));
129 Xapian::Query
q(Xapian::Query::OP_FILTER
,
130 Xapian::Query("leav"),
131 Xapian::Query(&src
));
134 TEST_EXCEPTION(Xapian::UnimplementedError
,
135 Xapian::MSet mset
= enq
.get_mset(0, 10));
140 class MyOddWeightingPostingSource
: public Xapian::PostingSource
{
141 Xapian::doccount num_docs
;
143 Xapian::doccount last_docid
;
147 MyOddWeightingPostingSource(Xapian::doccount num_docs_
,
148 Xapian::doccount last_docid_
)
149 : num_docs(num_docs_
), last_docid(last_docid_
), did(0)
155 MyOddWeightingPostingSource(const Xapian::Database
&db
)
156 : num_docs(db
.get_doccount()), last_docid(db
.get_lastdocid()), did(0)
159 PostingSource
* clone() const {
160 return new MyOddWeightingPostingSource(num_docs
, last_docid
);
163 void init(const Xapian::Database
&) { did
= 0; }
165 double get_weight() const {
166 return (did
% 2) ? 1000 : 0.001;
169 // These bounds could be better, but that's not important here.
170 Xapian::doccount
get_termfreq_min() const { return 0; }
172 Xapian::doccount
get_termfreq_est() const { return num_docs
/ 2; }
174 Xapian::doccount
get_termfreq_max() const { return num_docs
; }
176 void next(double wt
) {
181 void skip_to(Xapian::docid to_did
, double wt
) {
186 bool at_end() const {
187 // Doesn't work if last_docid is 2^32 - 1.
188 return did
> last_docid
;
191 Xapian::docid
get_docid() const { return did
; }
193 string
get_description() const {
194 return "MyOddWeightingPostingSource";
198 // Like externalsource1, except we use the weight to favour odd documents.
199 DEFINE_TESTCASE(externalsource3
, backend
&& !remote
&& !multi
) {
200 // Doesn't work for remote without registering with the server.
201 // Doesn't work for multi because it checks the docid in the
203 Xapian::Database
db(get_database("apitest_phrase"));
204 Xapian::Enquire
enq(db
);
205 MyOddWeightingPostingSource
src(db
);
207 enq
.set_query(Xapian::Query(&src
));
209 Xapian::MSet mset
= enq
.get_mset(0, 10);
210 mset_expect_order(mset
, 1, 3, 5, 7, 9, 11, 13, 15, 17, 2);
212 Xapian::Query
q(Xapian::Query::OP_OR
,
213 Xapian::Query("leav"),
214 Xapian::Query(&src
));
217 mset
= enq
.get_mset(0, 5);
218 mset_expect_order(mset
, 5, 7, 11, 13, 9);
220 tout
<< "max possible weight = " << mset
.get_max_possible() << endl
;
221 TEST(mset
.get_max_possible() > 1000);
223 enq
.set_cutoff(0, 1000.001);
224 mset
= enq
.get_mset(0, 10);
225 mset_expect_order(mset
, 5, 7, 11, 13, 9);
227 tout
<< "max possible weight = " << mset
.get_max_possible() << endl
;
228 TEST(mset
.get_max_possible() > 1000);
230 enq
.set_query(Xapian::Query(q
.OP_SCALE_WEIGHT
, Xapian::Query(&src
), 0.5));
231 mset
= enq
.get_mset(0, 10);
234 TEST_EQUAL(mset
.get_max_possible(), 500);
236 enq
.set_query(Xapian::Query(q
.OP_SCALE_WEIGHT
, Xapian::Query(&src
), 2));
237 mset
= enq
.get_mset(0, 10);
238 mset_expect_order(mset
, 1, 3, 5, 7, 9, 11, 13, 15, 17);
240 TEST_EQUAL(mset
.get_max_possible(), 2000);
245 class MyDontAskWeightPostingSource
: public Xapian::PostingSource
{
246 Xapian::doccount num_docs
;
248 Xapian::doccount last_docid
;
252 MyDontAskWeightPostingSource(Xapian::doccount num_docs_
,
253 Xapian::doccount last_docid_
)
254 : num_docs(num_docs_
), last_docid(last_docid_
), did(0)
258 MyDontAskWeightPostingSource() : Xapian::PostingSource() {}
260 PostingSource
* clone() const { return new MyDontAskWeightPostingSource(num_docs
, last_docid
); }
262 void init(const Xapian::Database
&db
) {
263 num_docs
= db
.get_doccount();
264 last_docid
= db
.get_lastdocid();
268 double get_weight() const {
269 FAIL_TEST("MyDontAskWeightPostingSource::get_weight() called");
272 // These bounds could be better, but that's not important here.
273 Xapian::doccount
get_termfreq_min() const { return num_docs
; }
275 Xapian::doccount
get_termfreq_est() const { return num_docs
; }
277 Xapian::doccount
get_termfreq_max() const { return num_docs
; }
279 void next(double wt
) {
284 void skip_to(Xapian::docid to_did
, double wt
) {
289 bool at_end() const {
290 // Doesn't work if last_docid is 2^32 - 1.
291 return did
> last_docid
;
294 Xapian::docid
get_docid() const { return did
; }
296 string
get_description() const {
297 return "MyDontAskWeightPostingSource";
301 // Check that boolean use doesn't call get_weight().
302 DEFINE_TESTCASE(externalsource4
, backend
&& !remote
) {
303 Xapian::Database
db(get_database("apitest_phrase"));
304 Xapian::Enquire
enq(db
);
305 MyDontAskWeightPostingSource src
;
307 tout
<< "OP_SCALE_WEIGHT 0" << endl
;
308 enq
.set_query(Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT
, Xapian::Query(&src
), 0));
310 Xapian::MSet mset
= enq
.get_mset(0, 5);
311 mset_expect_order(mset
, 1, 2, 3, 4, 5);
313 tout
<< "OP_FILTER" << endl
;
314 Xapian::Query
q(Xapian::Query::OP_FILTER
,
315 Xapian::Query("leav"),
316 Xapian::Query(&src
));
319 mset
= enq
.get_mset(0, 5);
320 mset_expect_order(mset
, 8, 6, 4, 5, 7);
322 tout
<< "BoolWeight" << endl
;
323 enq
.set_query(Xapian::Query(&src
));
324 enq
.set_weighting_scheme(Xapian::BoolWeight());
326 // mset = enq.get_mset(0, 5);
327 // mset_expect_order(mset, 1, 2, 3, 4, 5);
332 // Check that valueweightsource works correctly.
333 DEFINE_TESTCASE(valueweightsource1
, backend
) {
334 Xapian::Database
db(get_database("apitest_phrase"));
335 Xapian::Enquire
enq(db
);
336 Xapian::ValueWeightPostingSource
src(11);
338 // Should be in descending order of length
339 tout
<< "RAW" << endl
;
340 enq
.set_query(Xapian::Query(&src
));
341 Xapian::MSet mset
= enq
.get_mset(0, 5);
342 mset_expect_order(mset
, 3, 1, 2, 8, 14);
344 // In relevance order
345 tout
<< "OP_FILTER" << endl
;
346 Xapian::Query
q(Xapian::Query::OP_FILTER
,
347 Xapian::Query("leav"),
348 Xapian::Query(&src
));
350 mset
= enq
.get_mset(0, 5);
351 mset_expect_order(mset
, 8, 6, 4, 5, 7);
353 // Should be in descending order of length
354 tout
<< "OP_FILTER other way" << endl
;
355 q
= Xapian::Query(Xapian::Query::OP_FILTER
,
357 Xapian::Query("leav"));
359 mset
= enq
.get_mset(0, 5);
360 mset_expect_order(mset
, 8, 14, 9, 13, 7);
365 // Check that valueweightsource gives the correct bounds for those databases
366 // which support value statistics.
367 DEFINE_TESTCASE(valueweightsource2
, valuestats
) {
368 Xapian::Database
db(get_database("apitest_phrase"));
369 Xapian::ValueWeightPostingSource
src(11);
371 TEST_EQUAL(src
.get_termfreq_min(), 17);
372 TEST_EQUAL(src
.get_termfreq_est(), 17);
373 TEST_EQUAL(src
.get_termfreq_max(), 17);
374 TEST_EQUAL(src
.get_maxweight(), 135);
379 // Check that valueweightsource skip_to() can stay in the same position.
380 DEFINE_TESTCASE(valueweightsource3
, valuestats
&& !multi
) {
381 // FIXME: multi doesn't support iterating valuestreams yet.
382 Xapian::Database
db(get_database("apitest_phrase"));
383 Xapian::ValueWeightPostingSource
src(11);
388 TEST_EQUAL(src
.get_docid(), 8);
391 TEST_EQUAL(src
.get_docid(), 8);
396 // Check that fixedweightsource works correctly.
397 DEFINE_TESTCASE(fixedweightsource1
, backend
) {
398 Xapian::Database
db(get_database("apitest_phrase"));
399 Xapian::Enquire
enq(db
);
403 Xapian::FixedWeightPostingSource
src(wt
);
405 // Should be in increasing order of docid.
406 enq
.set_query(Xapian::Query(&src
));
407 Xapian::MSet mset
= enq
.get_mset(0, 5);
408 mset_expect_order(mset
, 1, 2, 3, 4, 5);
410 for (Xapian::MSetIterator i
= mset
.begin(); i
!= mset
.end(); ++i
) {
411 TEST_EQUAL(i
.get_weight(), wt
);
415 // Do some direct tests, to check the skip_to() and check() methods work.
417 // Check next and skip_to().
418 Xapian::FixedWeightPostingSource
src(wt
);
423 TEST_EQUAL(src
.get_docid(), 1);
426 TEST_EQUAL(src
.get_docid(), 2);
429 TEST_EQUAL(src
.get_docid(), 5);
434 // Check check() as the first operation, followed by next.
435 Xapian::FixedWeightPostingSource
src(wt
);
438 TEST_EQUAL(src
.check(5, 1.0), true);
440 TEST_EQUAL(src
.get_docid(), 5);
443 TEST_EQUAL(src
.get_docid(), 6);
446 // Check check() as the first operation, followed by skip_to().
447 Xapian::FixedWeightPostingSource
src(wt
);
450 TEST_EQUAL(src
.check(5, 1.0), true);
452 TEST_EQUAL(src
.get_docid(), 5);
455 TEST_EQUAL(src
.get_docid(), 6);
456 src
.skip_to(7, wt
* 2);
463 // A posting source which changes the maximum weight.
464 class ChangeMaxweightPostingSource
: public Xapian::PostingSource
{
467 // Maximum docid that get_weight() should be called for.
468 Xapian::docid maxid_accessed
;
471 ChangeMaxweightPostingSource(Xapian::docid maxid_accessed_
)
472 : did(0), maxid_accessed(maxid_accessed_
) { }
474 void init(const Xapian::Database
&) { did
= 0; }
476 double get_weight() const {
477 if (did
> maxid_accessed
) {
478 FAIL_TEST("ChangeMaxweightPostingSource::get_weight() called "
479 "for docid " + str(did
) + ", max id accessed "
480 "should be " + str(maxid_accessed
));
485 Xapian::doccount
get_termfreq_min() const { return 4; }
486 Xapian::doccount
get_termfreq_est() const { return 4; }
487 Xapian::doccount
get_termfreq_max() const { return 4; }
491 set_maxweight(5 - did
);
494 void skip_to(Xapian::docid to_did
, double) {
496 set_maxweight(5 - did
);
499 bool at_end() const { return did
>= 5; }
500 Xapian::docid
get_docid() const { return did
; }
501 string
get_description() const { return "ChangeMaxweightPostingSource"; }
504 // Test a posting source with a variable maxweight.
505 DEFINE_TESTCASE(changemaxweightsource1
, backend
&& !remote
&& !multi
) {
506 // The ChangeMaxweightPostingSource doesn't work with multi or remote.
507 Xapian::Database
db(get_database("apitest_phrase"));
508 Xapian::Enquire
enq(db
);
511 ChangeMaxweightPostingSource
src1(5);
512 Xapian::FixedWeightPostingSource
src2(2.5);
514 Xapian::Query
q(Xapian::Query::OP_AND
,
515 Xapian::Query(&src1
), Xapian::Query(&src2
));
517 // Set descending docid order so that the matcher isn't able to
518 // terminate early after 4 documents just because weight == maxweight.
519 enq
.set_docid_order(enq
.DESCENDING
);
521 Xapian::MSet mset
= enq
.get_mset(0, 4);
523 mset_expect_order(mset
, 1, 2, 3, 4);
524 for (Xapian::MSetIterator i
= mset
.begin(); i
!= mset
.end(); ++i
) {
525 TEST_EQUAL_DOUBLE(i
.get_weight(), 7.5 - *i
);
530 ChangeMaxweightPostingSource
src1(3);
531 Xapian::FixedWeightPostingSource
src2(2.5);
533 Xapian::Query
q(Xapian::Query::OP_AND
,
534 Xapian::Query(&src1
), Xapian::Query(&src2
));
537 Xapian::MSet mset
= enq
.get_mset(0, 2);
538 TEST(!src1
.at_end());
539 TEST_EQUAL(src1
.get_docid(), 3);
540 TEST_EQUAL_DOUBLE(src1
.get_maxweight(), 2.0);
541 mset_expect_order(mset
, 1, 2);
542 for (Xapian::MSetIterator i
= mset
.begin(); i
!= mset
.end(); ++i
) {
543 TEST_EQUAL_DOUBLE(i
.get_weight(), 7.5 - *i
);
550 // Test using a valueweightpostingsource which has no entries.
551 DEFINE_TESTCASE(emptyvalwtsource1
, backend
&& !remote
&& !multi
) {
552 Xapian::Database
db(get_database("apitest_phrase"));
553 Xapian::Enquire
enq(db
);
555 Xapian::ValueWeightPostingSource
src2(11); // A non-empty slot.
556 Xapian::ValueWeightPostingSource
src3(100); // An empty slot.
557 Xapian::Query
q1("leav");
558 Xapian::Query
q2(&src2
);
559 Xapian::Query
q3(&src3
);
560 Xapian::Query
q(Xapian::Query::OP_OR
, Xapian::Query(Xapian::Query::OP_AND_MAYBE
, q1
, q2
), q3
);
562 // Perform search without ORring with the posting source.
563 Xapian::doccount size1
;
566 Xapian::MSet mset
= enq
.get_mset(0, 10);
567 TEST_REL(mset
.get_max_possible(), >, 0.0);
569 TEST_REL(size1
, >, 0);
572 // Perform a search with just the non-empty posting source, checking it
573 // returns something.
576 Xapian::MSet mset
= enq
.get_mset(0, 10);
577 TEST_REL(mset
.get_max_possible(), >, 0.0);
578 TEST_REL(mset
.size(), >, 0);
581 // Perform a search with just the empty posting source, checking it returns
585 Xapian::MSet mset
= enq
.get_mset(0, 10);
587 // get_max_possible() returns 0 here for backends which track the upper
588 // bound on value slot entries, MAX_DBL for backends which don't.
590 TEST_REL(mset
.get_max_possible(), >=, 0.0);
592 TEST_EQUAL(mset
.size(), 0);
595 // Perform a search with the posting source ORred with the normal query.
596 // This is a regression test - it used to return nothing.
599 Xapian::MSet mset
= enq
.get_mset(0, 10);
600 TEST_REL(mset
.get_max_possible(), >, 0.0);
601 TEST_REL(mset
.size(), >, 0.0);
602 TEST_EQUAL(mset
.size(), size1
);
608 class SlowDecreasingValueWeightPostingSource
609 : public Xapian::DecreasingValueWeightPostingSource
{
613 SlowDecreasingValueWeightPostingSource(int & count_
)
614 : Xapian::DecreasingValueWeightPostingSource(0), count(count_
) { }
616 SlowDecreasingValueWeightPostingSource
* clone() const
618 return new SlowDecreasingValueWeightPostingSource(count
);
621 void next(double min_wt
) {
624 return Xapian::DecreasingValueWeightPostingSource::next(min_wt
);
629 make_matchtimelimit1_db(Xapian::WritableDatabase
&db
, const string
&)
631 for (int wt
= 20; wt
> 0; --wt
) {
632 Xapian::Document doc
;
633 doc
.add_value(0, Xapian::sortable_serialise(double(wt
)));
634 db
.add_document(doc
);
638 // FIXME: This doesn't run for remote databases (we'd need to register
639 // SlowDecreasingValueWeightPostingSource on the remote) or for multi
640 // databases (they don't support "generated" currently).
641 DEFINE_TESTCASE(matchtimelimit1
, generated
&& !remote
)
643 #ifndef HAVE_TIMER_CREATE
644 SKIP_TEST("Enquire::set_time_limit() not implemented for this platform");
646 Xapian::Database db
= get_database("matchtimelimit1",
647 make_matchtimelimit1_db
);
650 SlowDecreasingValueWeightPostingSource
src(count
);
652 Xapian::Enquire
enquire(db
);
653 enquire
.set_query(Xapian::Query(&src
));
655 enquire
.set_time_limit(1.5);
657 Xapian::MSet mset
= enquire
.get_mset(0, 1, 1000);
658 TEST_EQUAL(mset
.size(), 1);
659 TEST_EQUAL(count
, 2);
664 class CheckBoundsPostingSource
665 : public Xapian::DecreasingValueWeightPostingSource
{
667 Xapian::doccount
& doclen_lb
;
669 Xapian::doccount
& doclen_ub
;
671 CheckBoundsPostingSource(Xapian::doccount
& doclen_lb_
,
672 Xapian::doccount
& doclen_ub_
)
673 : Xapian::DecreasingValueWeightPostingSource(0),
674 doclen_lb(doclen_lb_
),
675 doclen_ub(doclen_ub_
) { }
677 CheckBoundsPostingSource
* clone() const
679 return new CheckBoundsPostingSource(doclen_lb
, doclen_ub
);
682 void init(const Xapian::Database
& database
) {
683 doclen_lb
= database
.get_doclength_lower_bound();
684 doclen_ub
= database
.get_doclength_upper_bound();
685 Xapian::DecreasingValueWeightPostingSource::init(database
);
689 // Test that doclength bounds are correct.
690 // Regression test for bug fixed in 1.2.25 and 1.4.1.
691 DEFINE_TESTCASE(postingsourcebounds1
, backend
&& !remote
)
693 Xapian::Database db
= get_database("apitest_simpledata");
695 Xapian::doccount doclen_lb
= 0, doclen_ub
= 0;
696 CheckBoundsPostingSource
ps(doclen_lb
, doclen_ub
);
698 Xapian::Enquire
enquire(db
);
699 enquire
.set_query(Xapian::Query(&ps
));
701 Xapian::MSet mset
= enquire
.get_mset(0, 1);
703 TEST_EQUAL(doclen_lb
, db
.get_doclength_lower_bound());
704 TEST_EQUAL(doclen_ub
, db
.get_doclength_upper_bound());
709 // PostingSource which really just counts the clone() calls.
710 // Never actually matches anything, but pretends it might.
711 class CloneTestPostingSource
: public Xapian::PostingSource
{
715 CloneTestPostingSource(int& clone_count_
)
716 : clone_count(clone_count_
)
719 PostingSource
* clone() const {
721 return new CloneTestPostingSource(clone_count
);
724 void init(const Xapian::Database
&) { }
726 Xapian::doccount
get_termfreq_min() const { return 0; }
728 Xapian::doccount
get_termfreq_est() const { return 1; }
730 Xapian::doccount
get_termfreq_max() const { return 2; }
732 void next(double) { }
734 void skip_to(Xapian::docid
, double) { }
736 bool at_end() const {
740 Xapian::docid
get_docid() const { return 0; }
742 string
get_description() const { return "CloneTestPostingSource"; }
745 /// Test cloning of initial object, which regressed in 1.3.5.
746 DEFINE_TESTCASE(postingsourceclone1
, !backend
)
748 // This fails with 1.3.5-1.4.0 inclusive.
751 CloneTestPostingSource
ps(clones
);
752 TEST_EQUAL(clones
, 0);
753 Xapian::Query
q(&ps
);
754 TEST_EQUAL(clones
, 1);
757 // Check that clone() isn't needlessly called if reference counting has
758 // been turned on for the PostingSource.
761 CloneTestPostingSource
* ps
= new CloneTestPostingSource(clones
);
762 TEST_EQUAL(clones
, 0);
763 Xapian::Query
q(ps
->release());
764 TEST_EQUAL(clones
, 0);