1 /* api_posdb.cc: tests which need a backend with positional information
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2006,2007,2009,2016 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 #include "api_posdb.h"
33 #include "testsuite.h"
34 #include "testutils.h"
38 /// Simple test of NEAR
39 DEFINE_TESTCASE(near1
, positional
) {
40 Xapian::Database
mydb(get_database("apitest_phrase"));
41 Xapian::Enquire
enquire(mydb
);
42 Xapian::Stem
stemmer("english");
43 enquire
.set_weighting_scheme(Xapian::BoolWeight());
46 vector
<Xapian::Query
> subqs
;
48 subqs
.push_back(Xapian::Query(stemmer("phrase")));
49 subqs
.push_back(Xapian::Query(stemmer("fridge")));
50 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
53 // retrieve the top ten results
54 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
55 mset_expect_order(mymset
);
58 subqs
.push_back(Xapian::Query(stemmer("phrase")));
59 subqs
.push_back(Xapian::Query(stemmer("near")));
60 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 2);
63 // retrieve the top ten results
64 mymset
= enquire
.get_mset(0, 10);
65 mset_expect_order(mymset
, 3);
68 subqs
.push_back(Xapian::Query(stemmer("phrase")));
69 subqs
.push_back(Xapian::Query(stemmer("near")));
70 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 3);
73 // retrieve the top ten results
74 mymset
= enquire
.get_mset(0, 10);
75 mset_expect_order(mymset
, 1, 3);
78 subqs
.push_back(Xapian::Query(stemmer("phrase")));
79 subqs
.push_back(Xapian::Query(stemmer("near")));
80 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 5);
83 // retrieve the top ten results
84 mymset
= enquire
.get_mset(0, 10);
85 mset_expect_order(mymset
, 1, 3);
88 subqs
.push_back(Xapian::Query(stemmer("phrase")));
89 subqs
.push_back(Xapian::Query(stemmer("near")));
90 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 6);
93 // retrieve the top ten results
94 mymset
= enquire
.get_mset(0, 10);
95 mset_expect_order(mymset
, 1, 2, 3);
98 subqs
.push_back(Xapian::Query(stemmer("leave")));
99 subqs
.push_back(Xapian::Query(stemmer("fridge")));
100 subqs
.push_back(Xapian::Query(stemmer("on")));
101 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 3);
102 enquire
.set_query(q
);
104 // retrieve the top ten results
105 mymset
= enquire
.get_mset(0, 10);
106 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9);
109 subqs
.push_back(Xapian::Query(stemmer("leave")));
110 subqs
.push_back(Xapian::Query(stemmer("fridge")));
111 subqs
.push_back(Xapian::Query(stemmer("on")));
112 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 4);
113 enquire
.set_query(q
);
115 // retrieve the top ten results
116 mymset
= enquire
.get_mset(0, 10);
117 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9, 10);
120 subqs
.push_back(Xapian::Query(stemmer("leave")));
121 subqs
.push_back(Xapian::Query(stemmer("fridge")));
122 subqs
.push_back(Xapian::Query(stemmer("on")));
123 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 5);
124 enquire
.set_query(q
);
126 // retrieve the top ten results
127 mymset
= enquire
.get_mset(0, 10);
128 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9, 10, 11);
131 subqs
.push_back(Xapian::Query(stemmer("leave")));
132 subqs
.push_back(Xapian::Query(stemmer("fridge")));
133 subqs
.push_back(Xapian::Query(stemmer("on")));
134 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 6);
135 enquire
.set_query(q
);
137 // retrieve the top ten results
138 mymset
= enquire
.get_mset(0, 10);
139 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9, 10, 11, 12);
142 subqs
.push_back(Xapian::Query(stemmer("leave")));
143 subqs
.push_back(Xapian::Query(stemmer("fridge")));
144 subqs
.push_back(Xapian::Query(stemmer("on")));
145 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 7);
146 enquire
.set_query(q
);
148 // retrieve the top twenty results
149 mymset
= enquire
.get_mset(0, 20);
150 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13);
153 subqs
.push_back(Xapian::Query(stemmer("leave")));
154 subqs
.push_back(Xapian::Query(stemmer("fridge")));
155 subqs
.push_back(Xapian::Query(stemmer("on")));
156 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 8);
157 enquire
.set_query(q
);
159 // retrieve the top twenty results
160 mymset
= enquire
.get_mset(0, 20);
161 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
164 subqs
.push_back(Xapian::Query(stemmer("leave")));
165 subqs
.push_back(Xapian::Query(stemmer("fridge")));
166 subqs
.push_back(Xapian::Query(stemmer("on")));
167 // test really large window size
168 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 999999999);
169 enquire
.set_query(q
);
171 // retrieve the top twenty results
172 mymset
= enquire
.get_mset(0, 20);
173 mset_expect_order(mymset
, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
178 /// Test NEAR over operators
179 DEFINE_TESTCASE(near2
, positional
) {
180 Xapian::Database
mydb(get_database("apitest_phrase"));
181 Xapian::Enquire
enquire(mydb
);
182 Xapian::Stem
stemmer("english");
183 enquire
.set_weighting_scheme(Xapian::BoolWeight());
187 vector
<Xapian::Query
> subqs
;
189 subqs
.push_back(Xapian::Query(Xapian::Query::OP_AND
,
190 Xapian::Query(stemmer("phrase")),
191 Xapian::Query(stemmer("near"))));
192 subqs
.push_back(Xapian::Query(stemmer("and")));
193 TEST_EXCEPTION(Xapian::UnimplementedError
,
194 q
= Xapian::Query(q
.OP_NEAR
, subqs
.begin(), subqs
.end(), 2);
195 enquire
.set_query(q
);
197 // retrieve the top ten results
198 mymset
= enquire
.get_mset(0, 10)
200 #if 0 // Disable until we reimplement this.
201 mset_expect_order(mymset
, 1);
204 subqs
.push_back(Xapian::Query(Xapian::Query::OP_AND
,
205 Xapian::Query(stemmer("phrase")),
206 Xapian::Query(stemmer("near"))));
207 subqs
.push_back(Xapian::Query(stemmer("operator")));
208 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 2);
209 enquire
.set_query(q
);
211 // retrieve the top ten results
212 mymset
= enquire
.get_mset(0, 10);
213 mset_expect_order(mymset
, 2);
216 subqs
.push_back(Xapian::Query(stemmer("operator")));
217 subqs
.push_back(Xapian::Query(Xapian::Query::OP_AND
,
218 Xapian::Query(stemmer("phrase")),
219 Xapian::Query(stemmer("near"))));
220 q
= Xapian::Query(Xapian::Query::OP_NEAR
, subqs
.begin(), subqs
.end(), 2);
221 enquire
.set_query(q
);
223 // retrieve the top ten results
224 mymset
= enquire
.get_mset(0, 10);
225 mset_expect_order(mymset
, 2);
231 /// Simple test of PHRASE
232 DEFINE_TESTCASE(phrase1
, positional
) {
233 Xapian::Database
mydb(get_database("apitest_phrase"));
234 Xapian::Enquire
enquire(mydb
);
235 Xapian::Stem
stemmer("english");
236 enquire
.set_weighting_scheme(Xapian::BoolWeight());
239 vector
<Xapian::Query
> subqs
;
241 subqs
.push_back(Xapian::Query(stemmer("phrase")));
242 subqs
.push_back(Xapian::Query(stemmer("fridge")));
243 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
244 enquire
.set_query(q
);
246 // retrieve the top ten results
247 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
248 mset_expect_order(mymset
);
251 subqs
.push_back(Xapian::Query(stemmer("phrase")));
252 subqs
.push_back(Xapian::Query(stemmer("near")));
253 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
254 enquire
.set_query(q
);
256 // retrieve the top ten results
257 mymset
= enquire
.get_mset(0, 10);
258 mset_expect_order(mymset
);
261 subqs
.push_back(Xapian::Query(stemmer("phrase")));
262 subqs
.push_back(Xapian::Query(stemmer("near")));
263 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 3);
264 enquire
.set_query(q
);
266 // retrieve the top ten results
267 mymset
= enquire
.get_mset(0, 10);
268 mset_expect_order(mymset
, 1);
271 subqs
.push_back(Xapian::Query(stemmer("phrase")));
272 subqs
.push_back(Xapian::Query(stemmer("near")));
273 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 5);
274 enquire
.set_query(q
);
276 // retrieve the top ten results
277 mymset
= enquire
.get_mset(0, 10);
278 mset_expect_order(mymset
, 1);
281 subqs
.push_back(Xapian::Query(stemmer("phrase")));
282 subqs
.push_back(Xapian::Query(stemmer("near")));
283 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 6);
284 enquire
.set_query(q
);
286 // retrieve the top ten results
287 mymset
= enquire
.get_mset(0, 10);
288 mset_expect_order(mymset
, 1, 2);
291 subqs
.push_back(Xapian::Query(stemmer("leave")));
292 subqs
.push_back(Xapian::Query(stemmer("fridge")));
293 subqs
.push_back(Xapian::Query(stemmer("on")));
294 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 3);
295 enquire
.set_query(q
);
297 // retrieve the top ten results
298 mymset
= enquire
.get_mset(0, 10);
299 mset_expect_order(mymset
, 4);
302 subqs
.push_back(Xapian::Query(stemmer("leave")));
303 subqs
.push_back(Xapian::Query(stemmer("fridge")));
304 subqs
.push_back(Xapian::Query(stemmer("on")));
305 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 4);
306 enquire
.set_query(q
);
308 // retrieve the top ten results
309 mymset
= enquire
.get_mset(0, 10);
310 mset_expect_order(mymset
, 4);
313 subqs
.push_back(Xapian::Query(stemmer("leave")));
314 subqs
.push_back(Xapian::Query(stemmer("fridge")));
315 subqs
.push_back(Xapian::Query(stemmer("on")));
316 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 5);
317 enquire
.set_query(q
);
319 // retrieve the top ten results
320 mymset
= enquire
.get_mset(0, 10);
321 mset_expect_order(mymset
, 4);
324 subqs
.push_back(Xapian::Query(stemmer("leave")));
325 subqs
.push_back(Xapian::Query(stemmer("fridge")));
326 subqs
.push_back(Xapian::Query(stemmer("on")));
327 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 6);
328 enquire
.set_query(q
);
330 // retrieve the top ten results
331 mymset
= enquire
.get_mset(0, 10);
332 mset_expect_order(mymset
, 4);
335 subqs
.push_back(Xapian::Query(stemmer("leave")));
336 subqs
.push_back(Xapian::Query(stemmer("fridge")));
337 subqs
.push_back(Xapian::Query(stemmer("on")));
338 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 7);
339 enquire
.set_query(q
);
341 // retrieve the top twenty results
342 mymset
= enquire
.get_mset(0, 20);
343 mset_expect_order(mymset
, 4);
346 subqs
.push_back(Xapian::Query(stemmer("leave")));
347 subqs
.push_back(Xapian::Query(stemmer("fridge")));
348 subqs
.push_back(Xapian::Query(stemmer("on")));
349 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 8);
350 enquire
.set_query(q
);
352 // retrieve the top 20 results
353 mymset
= enquire
.get_mset(0, 20);
354 mset_expect_order(mymset
, 4);
356 // test really large window size
358 subqs
.push_back(Xapian::Query(stemmer("leave")));
359 subqs
.push_back(Xapian::Query(stemmer("fridge")));
360 subqs
.push_back(Xapian::Query(stemmer("on")));
361 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 999999999);
362 enquire
.set_query(q
);
364 // retrieve the top 20 results
365 mymset
= enquire
.get_mset(0, 20);
366 mset_expect_order(mymset
, 4);
368 // regression test (was matching doc 15, should fail)
370 subqs
.push_back(Xapian::Query(stemmer("first")));
371 subqs
.push_back(Xapian::Query(stemmer("second")));
372 subqs
.push_back(Xapian::Query(stemmer("third")));
373 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 9);
374 enquire
.set_query(q
);
376 // retrieve the top ten results
377 mymset
= enquire
.get_mset(0, 10);
378 mset_expect_order(mymset
);
380 // regression test (should match doc 15, make sure still does with fix)
382 subqs
.push_back(Xapian::Query(stemmer("first")));
383 subqs
.push_back(Xapian::Query(stemmer("second")));
384 subqs
.push_back(Xapian::Query(stemmer("third")));
385 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 10);
386 enquire
.set_query(q
);
388 // retrieve the top ten results
389 mymset
= enquire
.get_mset(0, 10);
390 mset_expect_order(mymset
, 15);
392 // regression test (phrase matching was getting order wrong when
393 // build_and_tree reordered vector of PostLists)
395 subqs
.push_back(Xapian::Query(stemmer("milk")));
396 subqs
.push_back(Xapian::Query(stemmer("rare")));
397 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
398 enquire
.set_query(q
);
400 // retrieve the top ten results
401 mymset
= enquire
.get_mset(0, 10);
402 mset_expect_order(mymset
, 16);
404 // regression test (phrase matching was getting order wrong when
405 // build_and_tree reordered vector of PostLists)
407 subqs
.push_back(Xapian::Query(stemmer("rare")));
408 subqs
.push_back(Xapian::Query(stemmer("milk")));
409 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
410 enquire
.set_query(q
);
412 // retrieve the top ten results
413 mymset
= enquire
.get_mset(0, 10);
414 mset_expect_order(mymset
, 17);
419 /// Test PHRASE over operators
420 DEFINE_TESTCASE(phrase2
, positional
) {
421 Xapian::Database
mydb(get_database("apitest_phrase"));
422 Xapian::Enquire
enquire(mydb
);
423 Xapian::Stem
stemmer("english");
424 enquire
.set_weighting_scheme(Xapian::BoolWeight());
428 vector
<Xapian::Query
> subqs
;
430 subqs
.push_back(Xapian::Query(Xapian::Query::OP_AND
,
431 Xapian::Query(stemmer("phrase")),
432 Xapian::Query(stemmer("near"))));
433 subqs
.push_back(Xapian::Query(stemmer("and")));
434 TEST_EXCEPTION(Xapian::UnimplementedError
,
435 q
= Xapian::Query(q
.OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
436 enquire
.set_query(q
);
438 // retrieve the top ten results
439 mymset
= enquire
.get_mset(0, 10)
441 #if 0 // Disable until we reimplement this.
442 mset_expect_order(mymset
);
445 subqs
.push_back(Xapian::Query(Xapian::Query::OP_AND
,
446 Xapian::Query(stemmer("phrase")),
447 Xapian::Query(stemmer("near"))));
448 subqs
.push_back(Xapian::Query(stemmer("operator")));
449 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
450 enquire
.set_query(q
);
452 // retrieve the top ten results
453 mymset
= enquire
.get_mset(0, 10);
454 mset_expect_order(mymset
, 2);
457 subqs
.push_back(Xapian::Query(stemmer("operator")));
458 subqs
.push_back(Xapian::Query(Xapian::Query::OP_AND
,
459 Xapian::Query(stemmer("phrase")),
460 Xapian::Query(stemmer("near"))));
461 q
= Xapian::Query(Xapian::Query::OP_PHRASE
, subqs
.begin(), subqs
.end(), 2);
462 enquire
.set_query(q
);
464 // retrieve the top ten results
465 mymset
= enquire
.get_mset(0, 10);
466 mset_expect_order(mymset
);
472 /// Test getting position lists from databases
473 DEFINE_TESTCASE(poslist1
, positional
) {
474 Xapian::Database
mydb(get_database("apitest_poslist"));
476 Xapian::Stem
stemmer("english");
477 string term
= stemmer("sponge");
479 Xapian::PositionIterator pli
= mydb
.positionlist_begin(2, term
);
481 TEST(pli
!= mydb
.positionlist_end(2, term
));
484 TEST(pli
!= mydb
.positionlist_end(2, term
));
487 TEST(pli
!= mydb
.positionlist_end(2, term
));
490 TEST(pli
!= mydb
.positionlist_end(2, term
));
493 TEST(pli
!= mydb
.positionlist_end(2, term
));
496 TEST(pli
!= mydb
.positionlist_end(2, term
));
499 TEST(pli
!= mydb
.positionlist_end(2, term
));
502 TEST(pli
!= mydb
.positionlist_end(2, term
));
505 TEST(pli
== mydb
.positionlist_end(2, term
));
510 DEFINE_TESTCASE(poslist2
, positional
&& writable
) {
511 Xapian::WritableDatabase db
= get_writable_database();
513 Xapian::Document doc
;
514 doc
.add_term("nopos");
515 Xapian::docid did
= db
.add_document(doc
);
517 // Check what happens when term doesn't exist - should give an empty list.
518 // Threw RangeError in Xapian < 1.1.0.
519 TEST_EQUAL(db
.positionlist_begin(did
, "nosuchterm"),
520 db
.positionlist_end(did
, "nosuchterm"));
522 // Check what happens when the document doesn't even exist - should give
523 // an empty list. Threw DocNotFoundError in Xapian < 1.1.0.
524 TEST_EQUAL(db
.positionlist_begin(123, "nosuchterm"),
525 db
.positionlist_end(123, "nosuchterm"));
527 TEST_EQUAL(db
.positionlist_begin(did
, "nopos"),
528 db
.positionlist_end(did
, "nopos"));
530 Xapian::Document doc2
= db
.get_document(did
);
532 Xapian::TermIterator term
= doc2
.termlist_begin();
535 Xapian::PositionIterator i
= term
.positionlist_begin();
536 TEST_EQUAL(i
, term
.positionlist_end());
539 Xapian::Document doc3
;
540 doc3
.add_posting("hadpos", 1);
541 Xapian::docid did2
= db
.add_document(doc3
);
543 Xapian::Document doc4
= db
.get_document(did2
);
544 doc4
.remove_posting("hadpos", 1);
545 db
.replace_document(did2
, doc4
);
548 Xapian::PositionIterator i
= db
.positionlist_begin(did2
, "hadpos");
549 TEST_EQUAL(i
, db
.positionlist_end(did2
, "hadpos"));
552 db
.delete_document(did
);
553 // Check what happens when the document doesn't exist (but once did).
554 TEST_EQUAL(db
.positionlist_begin(did
, "nosuchterm"),
555 db
.positionlist_end(did
, "nosuchterm"));
560 /// Test playing with a positionlist, testing skip_to in particular.
561 /// (used to be quartztest's test_positionlist1).
562 DEFINE_TESTCASE(poslist3
, positional
&& writable
) {
563 Xapian::WritableDatabase db
= get_writable_database();
565 Xapian::Document document
;
566 document
.add_posting("foo", 5);
567 document
.add_posting("foo", 8);
568 document
.add_posting("foo", 10);
569 document
.add_posting("foo", 12);
570 db
.add_document(document
);
572 Xapian::PositionIterator pl
= db
.positionlist_begin(1, "foo");
573 Xapian::PositionIterator pl_end
= db
.positionlist_end(1, "foo");
589 pl
= db
.positionlist_begin(1, "foo");
612 // Regression test - in 0.9.4 (and many previous versions) you couldn't get a
613 // PositionIterator from a TermIterator from Database::termlist_begin().
615 // Also test that positionlist_count() is implemented for this case, which it
616 // wasn't in 1.0.2 and earlier.
617 DEFINE_TESTCASE(positfromtermit1
, positional
) {
618 Xapian::Database
db(get_database("apitest_phrase"));
619 Xapian::TermIterator
t(db
.termlist_begin(7));
620 TEST_NOT_EQUAL(t
, db
.termlist_end(7));
621 Xapian::PositionIterator p
= t
.positionlist_begin();
622 TEST_NOT_EQUAL(p
, t
.positionlist_end());
624 TEST_EQUAL(t
.positionlist_count(), 1);
626 TEST_NOT_EQUAL(t
, db
.termlist_end(7));
627 TEST_EQUAL(t
.positionlist_count(), 2);