Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / tests / api_query.cc
blob9a5f7743fbdc1f2800220d20ef06222b4d6e0175
1 /** @file api_query.cc
2 * @brief Query-related tests.
3 */
4 /* Copyright (C) 2008,2009,2012,2013,2015 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "api_query.h"
26 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
31 #include "apitest.h"
33 using namespace std;
35 /// Regression test - in 1.0.10 and earlier "" was included in the list.
36 DEFINE_TESTCASE(queryterms1, !backend) {
37 Xapian::Query query = Xapian::Query::MatchAll;
38 TEST(query.get_terms_begin() == query.get_terms_end());
39 query = Xapian::Query(query.OP_AND_NOT, query, Xapian::Query("fair"));
40 TEST_EQUAL(*query.get_terms_begin(), "fair");
41 return true;
44 DEFINE_TESTCASE(matchall2, !backend) {
45 TEST_STRINGS_EQUAL(Xapian::Query::MatchAll.get_description(),
46 "Query(<alldocuments>)");
47 return true;
50 DEFINE_TESTCASE(matchnothing1, !backend) {
51 TEST_STRINGS_EQUAL(Xapian::Query::MatchNothing.get_description(),
52 "Query()");
53 vector<Xapian::Query> subqs;
54 subqs.push_back(Xapian::Query("foo"));
55 subqs.push_back(Xapian::Query::MatchNothing);
56 Xapian::Query q(Xapian::Query::OP_AND, subqs.begin(), subqs.end());
57 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
59 Xapian::Query q2(Xapian::Query::OP_AND,
60 Xapian::Query("foo"), Xapian::Query::MatchNothing);
61 TEST_STRINGS_EQUAL(q2.get_description(), "Query()");
62 return true;
65 DEFINE_TESTCASE(overload1, !backend) {
66 Xapian::Query q;
67 q = Xapian::Query("foo") & Xapian::Query("bar");
68 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
69 q = Xapian::Query("foo") &~ Xapian::Query("bar");
70 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
71 q = ~Xapian::Query("bar");
72 TEST_STRINGS_EQUAL(q.get_description(), "Query((<alldocuments> AND_NOT bar))");
73 q = Xapian::Query("foo") & Xapian::Query::MatchNothing;
74 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
75 q = Xapian::Query("foo") | Xapian::Query("bar");
76 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
77 q = Xapian::Query("foo") | Xapian::Query::MatchNothing;
78 TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
79 q = Xapian::Query("foo") ^ Xapian::Query("bar");
80 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
81 q = Xapian::Query("foo") ^ Xapian::Query::MatchNothing;
82 TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
83 q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
84 TEST_STRINGS_EQUAL(q.get_description(), "Query(1.25 * (one OR two))");
85 q = (Xapian::Query("one") & Xapian::Query("two")) * 42;
86 TEST_STRINGS_EQUAL(q.get_description(), "Query(42 * (one AND two))");
87 q = Xapian::Query("one") / 2.0;
88 TEST_STRINGS_EQUAL(q.get_description(), "Query(0.5 * one)");
89 return true;
92 /** Regression test and feature test.
94 * This threw AssertionError in 1.0.9 and earlier (bug#201) and gave valgrind
95 * errors in 1.0.11 and earlier (bug#349).
97 * Having two non-leaf subqueries with OP_NEAR used to be expected to throw
98 * UnimplementedError, but now actually works.
100 DEFINE_TESTCASE(nearsubqueries1, writable) {
101 Xapian::WritableDatabase db = get_writable_database();
102 Xapian::Document doc;
103 doc.add_posting("a", 1);
104 doc.add_posting("b", 2);
105 doc.add_posting("c", 3);
106 db.add_document(doc);
108 Xapian::Query a_or_b(Xapian::Query::OP_OR,
109 Xapian::Query("a"),
110 Xapian::Query("b"));
111 Xapian::Query near(Xapian::Query::OP_NEAR, a_or_b, a_or_b);
112 // As of 1.3.0, we no longer rearrange queries at this point, so check
113 // that we don't.
114 TEST_STRINGS_EQUAL(near.get_description(),
115 "Query(((a OR b) NEAR 2 (a OR b)))");
117 Xapian::Query a_near_b(Xapian::Query::OP_NEAR,
118 Xapian::Query("a"),
119 Xapian::Query("b"));
120 Xapian::Query x_phrs_y(Xapian::Query::OP_PHRASE,
121 Xapian::Query("a"),
122 Xapian::Query("b"));
124 // FIXME: These used to be rejected when the query was constructed, but
125 // now they're only rejected when Enquire::get_mset() is called and we
126 // actually try to get positional data from the subquery. The plan is to
127 // actually try to support such cases.
128 Xapian::Query q;
129 Xapian::Enquire enq(db);
130 q = Xapian::Query(Xapian::Query::OP_NEAR, a_near_b, Xapian::Query("c"));
131 TEST_EXCEPTION(Xapian::UnimplementedError,
132 enq.set_query(q); (void)enq.get_mset(0, 10));
134 q = Xapian::Query(Xapian::Query::OP_NEAR, x_phrs_y, Xapian::Query("c"));
135 TEST_EXCEPTION(Xapian::UnimplementedError,
136 enq.set_query(q); (void)enq.get_mset(0, 10));
138 q = Xapian::Query(Xapian::Query::OP_PHRASE, a_near_b, Xapian::Query("c"));
139 TEST_EXCEPTION(Xapian::UnimplementedError,
140 enq.set_query(q); (void)enq.get_mset(0, 10));
142 q = Xapian::Query(Xapian::Query::OP_PHRASE, x_phrs_y, Xapian::Query("c"));
143 TEST_EXCEPTION(Xapian::UnimplementedError,
144 enq.set_query(q); (void)enq.get_mset(0, 10));
146 return true;
149 /// Test that XOR handles all remaining subqueries running out at the same
150 // time.
151 DEFINE_TESTCASE(xor3, backend) {
152 Xapian::Database db = get_database("apitest_simpledata");
154 const char * subqs[] = {
155 "hack", "which", "paragraph", "is", "return"
157 // Document where the subqueries run out *does* match XOR:
158 Xapian::Query q(Xapian::Query::OP_XOR, subqs, subqs + 5);
159 Xapian::Enquire enq(db);
160 enq.set_query(q);
161 Xapian::MSet mset = enq.get_mset(0, 10);
163 TEST_EQUAL(mset.size(), 3);
164 TEST_EQUAL(*mset[0], 4);
165 TEST_EQUAL(*mset[1], 2);
166 TEST_EQUAL(*mset[2], 3);
168 // Document where the subqueries run out *does not* match XOR:
169 q = Xapian::Query(Xapian::Query::OP_XOR, subqs, subqs + 4);
170 enq.set_query(q);
171 mset = enq.get_mset(0, 10);
173 TEST_EQUAL(mset.size(), 4);
174 TEST_EQUAL(*mset[0], 5);
175 TEST_EQUAL(*mset[1], 4);
176 TEST_EQUAL(*mset[2], 2);
177 TEST_EQUAL(*mset[3], 3);
179 return true;
182 /// Check encoding of non-UTF8 terms in query descriptions.
183 DEFINE_TESTCASE(nonutf8termdesc1, !backend) {
184 TEST_EQUAL(Xapian::Query("\xc0\x80\xf5\x80\x80\x80\xfe\xff").get_description(),
185 "Query(\\xc0\\x80\\xf5\\x80\\x80\\x80\\xfe\\xff)");
186 TEST_EQUAL(Xapian::Query(string("\x00\x1f", 2)).get_description(),
187 "Query(\\x00\\x1f)");
188 // Check that backslashes are encoded so output isn't ambiguous.
189 TEST_EQUAL(Xapian::Query("back\\slash").get_description(),
190 "Query(back\\x5cslash)");
191 // Check that \x7f is escaped.
192 TEST_EQUAL(Xapian::Query("D\x7f_\x7f~").get_description(),
193 "Query(D\\x7f_\\x7f~)");
194 return true;
197 /// Test introspection on Query objects.
198 DEFINE_TESTCASE(queryintro1, !backend) {
199 TEST_EQUAL(Xapian::Query::MatchAll.get_type(), Xapian::Query::LEAF_MATCH_ALL);
200 TEST_EQUAL(Xapian::Query::MatchAll.get_num_subqueries(), 0);
201 TEST_EQUAL(Xapian::Query::MatchNothing.get_type(), Xapian::Query::LEAF_MATCH_NOTHING);
202 TEST_EQUAL(Xapian::Query::MatchNothing.get_num_subqueries(), 0);
204 Xapian::Query q;
205 q = Xapian::Query(q.OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query("fair"));
206 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
207 TEST_EQUAL(q.get_num_subqueries(), 2);
208 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_MATCH_ALL);
209 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
211 q = Xapian::Query("foo") & Xapian::Query("bar");
212 TEST_EQUAL(q.get_type(), q.OP_AND);
214 q = Xapian::Query("foo") &~ Xapian::Query("bar");
215 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
217 q = ~Xapian::Query("bar");
218 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
220 q = Xapian::Query("foo") | Xapian::Query("bar");
221 TEST_EQUAL(q.get_type(), q.OP_OR);
223 q = Xapian::Query("foo") ^ Xapian::Query("bar");
224 TEST_EQUAL(q.get_type(), q.OP_XOR);
226 q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
227 TEST_EQUAL(q.get_type(), q.OP_SCALE_WEIGHT);
228 TEST_EQUAL(q.get_num_subqueries(), 1);
229 TEST_EQUAL(q.get_subquery(0).get_type(), q.OP_OR);
231 q = Xapian::Query("one") / 2.0;
232 TEST_EQUAL(q.get_type(), q.OP_SCALE_WEIGHT);
233 TEST_EQUAL(q.get_num_subqueries(), 1);
234 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
236 q = Xapian::Query(q.OP_NEAR, Xapian::Query("a"), Xapian::Query("b"));
237 TEST_EQUAL(q.get_type(), q.OP_NEAR);
238 TEST_EQUAL(q.get_num_subqueries(), 2);
239 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
240 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
242 q = Xapian::Query(q.OP_PHRASE, Xapian::Query("c"), Xapian::Query("d"));
243 TEST_EQUAL(q.get_type(), q.OP_PHRASE);
244 TEST_EQUAL(q.get_num_subqueries(), 2);
245 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
246 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
248 return true;
251 /// Regression test for bug introduced in 1.3.1 and fixed in 1.3.3.
252 // We were incorrectly converting a term which indexed all docs and was used
253 // in an unweighted phrase into an all docs postlist, so check that this
254 // case actually works.
255 DEFINE_TESTCASE(phrasealldocs1, backend) {
256 Xapian::Database db = get_database("apitest_declen");
257 Xapian::Query q;
258 const char * phrase[] = { "this", "is", "the" };
259 q = Xapian::Query(q.OP_AND_NOT,
260 Xapian::Query("paragraph"),
261 Xapian::Query(q.OP_PHRASE, phrase, phrase + 3));
262 Xapian::Enquire enq(db);
263 enq.set_query(q);
264 Xapian::MSet mset = enq.get_mset(0, 10);
265 TEST_EQUAL(mset.size(), 3);
267 return true;
270 struct wildcard_testcase {
271 const char * pattern;
272 Xapian::termcount max_expansion;
273 char max_type;
274 const char * terms[4];
277 #define WILDCARD_EXCEPTION { 0, 0, 0, "" }
278 static const
279 wildcard_testcase wildcard1_testcases[] = {
280 // Tries to expand to 7 terms.
281 { "th", 6, 'E', WILDCARD_EXCEPTION },
282 { "thou", 1, 'E', { "though", 0, 0, 0 } },
283 { "s", 2, 'F', { "say", "search", 0, 0 } },
284 { "s", 2, 'M', { "simpl", "so", 0, 0 } },
285 { 0, 0, 0, { 0, 0, 0, 0 } }
288 DEFINE_TESTCASE(wildcard1, backend) {
289 // FIXME: The counting of terms the wildcard expands to is per subdatabase,
290 // so the wildcard may expand to more terms than the limit if some aren't
291 // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
292 // frequency from the subdatabase, and so may select different terms in
293 // each subdatabase.
294 SKIP_TEST_FOR_BACKEND("multi");
295 Xapian::Database db = get_database("apitest_simpledata");
296 Xapian::Enquire enq(db);
297 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
299 const wildcard_testcase * p = wildcard1_testcases;
300 while (p->pattern) {
301 tout << p->pattern << endl;
302 const char * const * tend = p->terms + 4;
303 while (tend[-1] == NULL) --tend;
304 bool expect_exception = (tend - p->terms == 4 && tend[-1][0] == '\0');
305 Xapian::Query q;
306 if (p->max_type) {
307 int max_type;
308 switch (p->max_type) {
309 case 'E':
310 max_type = Xapian::Query::WILDCARD_LIMIT_ERROR;
311 break;
312 case 'F':
313 max_type = Xapian::Query::WILDCARD_LIMIT_FIRST;
314 break;
315 case 'M':
316 max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
317 break;
318 default:
319 return false;
321 q = Xapian::Query(o, p->pattern, p->max_expansion, max_type);
322 } else {
323 q = Xapian::Query(o, p->pattern, p->max_expansion);
325 enq.set_query(q);
326 try {
327 Xapian::MSet mset = enq.get_mset(0, 10);
328 TEST(!expect_exception);
329 q = Xapian::Query(q.OP_SYNONYM, p->terms, tend);
330 enq.set_query(q);
331 Xapian::MSet mset2 = enq.get_mset(0, 10);
332 TEST_EQUAL(mset.size(), mset2.size());
333 TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
334 } catch (const Xapian::WildcardError &) {
335 TEST(expect_exception);
337 ++p;
340 return true;
343 /// Regression test for #696, fixed in 1.3.4.
344 DEFINE_TESTCASE(wildcard2, backend) {
345 // FIXME: The counting of terms the wildcard expands to is per subdatabase,
346 // so the wildcard may expand to more terms than the limit if some aren't
347 // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
348 // frequency from the subdatabase, and so may select different terms in
349 // each subdatabase.
350 SKIP_TEST_FOR_BACKEND("multi");
351 Xapian::Database db = get_database("apitest_simpledata");
352 Xapian::Enquire enq(db);
353 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
355 const int max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
356 Xapian::Query q0(o, "w", 2, max_type);
357 Xapian::Query q(o, "s", 2, max_type);
358 Xapian::Query q2(o, "t", 2, max_type);
359 q = Xapian::Query(q.OP_OR, q0, q);
360 q = Xapian::Query(q.OP_OR, q, q2);
361 enq.set_query(q);
362 Xapian::MSet mset = enq.get_mset(0, 10);
363 TEST_EQUAL(mset.size(), 6);
365 return true;
368 DEFINE_TESTCASE(dualprefixwildcard1, backend) {
369 Xapian::Database db = get_database("apitest_simpledata");
370 Xapian::Query q(Xapian::Query::OP_SYNONYM,
371 Xapian::Query(Xapian::Query::OP_WILDCARD, "fo"),
372 Xapian::Query(Xapian::Query::OP_WILDCARD, "Sfo"));
373 tout << q.get_description() << endl;
374 Xapian::Enquire enq(db);
375 enq.set_query(q);
376 TEST_EQUAL(enq.get_mset(0, 5).size(), 2);
377 return true;
380 struct positional_testcase {
381 int window;
382 const char * terms[4];
383 Xapian::docid result;
386 static const
387 positional_testcase loosephrase1_testcases[] = {
388 { 5, { "expect", "to", "mset", 0 }, 0 },
389 { 5, { "word", "well", "the", 0 }, 2 },
390 { 5, { "if", "word", "doesnt", 0 }, 0 },
391 { 5, { "at", "line", "three", 0 }, 0 },
392 { 5, { "paragraph", "other", "the", 0 }, 0 },
393 { 5, { "other", "the", "with", 0 }, 0 },
394 { 0, { 0, 0, 0, 0 }, 0 }
397 /// Regression test for bug fixed in 1.3.3 and 1.2.21.
398 DEFINE_TESTCASE(loosephrase1, backend) {
399 Xapian::Database db = get_database("apitest_simpledata");
400 Xapian::Enquire enq(db);
402 const positional_testcase * p = loosephrase1_testcases;
403 while (p->window) {
404 const char * const * tend = p->terms + 4;
405 while (tend[-1] == NULL) --tend;
406 Xapian::Query q(Xapian::Query::OP_PHRASE, p->terms, tend, p->window);
407 enq.set_query(q);
408 Xapian::MSet mset = enq.get_mset(0, 10);
409 if (p->result == 0) {
410 TEST(mset.empty());
411 } else {
412 TEST_EQUAL(mset.size(), 1);
413 TEST_EQUAL(*mset[0], p->result);
415 ++p;
418 return true;
421 static const
422 positional_testcase loosenear1_testcases[] = {
423 { 4, { "test", "the", "with", 0 }, 1 },
424 { 4, { "expect", "word", "the", 0 }, 2 },
425 { 4, { "line", "be", "blank", 0 }, 1 },
426 { 2, { "banana", "banana", 0, 0 }, 0 },
427 { 3, { "banana", "banana", 0, 0 }, 0 },
428 { 2, { "word", "word", 0, 0 }, 2 },
429 { 4, { "work", "meant", "work", 0 }, 0 },
430 { 4, { "this", "one", "yet", "one" }, 0 },
431 { 0, { 0, 0, 0, 0 }, 0 }
434 /// Regression tests for bugs fixed in 1.3.3 and 1.2.21.
435 DEFINE_TESTCASE(loosenear1, backend) {
436 Xapian::Database db = get_database("apitest_simpledata");
437 Xapian::Enquire enq(db);
439 const positional_testcase * p = loosenear1_testcases;
440 while (p->window) {
441 const char * const * tend = p->terms + 4;
442 while (tend[-1] == NULL) --tend;
443 Xapian::Query q(Xapian::Query::OP_NEAR, p->terms, tend, p->window);
444 enq.set_query(q);
445 Xapian::MSet mset = enq.get_mset(0, 10);
446 if (p->result == 0) {
447 TEST(mset.empty());
448 } else {
449 TEST_EQUAL(mset.size(), 1);
450 TEST_EQUAL(*mset[0], p->result);
452 ++p;
455 return true;