Fix whitespace irregularities in code
[xapian.git] / xapian-core / tests / api_query.cc
blobd3b50d28365a068a83ef12b10c145bc6b3224a97
1 /** @file api_query.cc
2 * @brief Query-related tests.
3 */
4 /* Copyright (C) 2008,2009,2012,2013,2015,2016 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "api_query.h"
26 #include <xapian.h>
28 #include "testsuite.h"
29 #include "testutils.h"
31 #include "apitest.h"
33 using namespace std;
35 /// Regression test - in 1.0.10 and earlier "" was included in the list.
36 DEFINE_TESTCASE(queryterms1, !backend) {
37 Xapian::Query query = Xapian::Query::MatchAll;
38 TEST(query.get_terms_begin() == query.get_terms_end());
39 query = Xapian::Query(query.OP_AND_NOT, query, Xapian::Query("fair"));
40 TEST_EQUAL(*query.get_terms_begin(), "fair");
41 return true;
44 DEFINE_TESTCASE(matchall2, !backend) {
45 TEST_STRINGS_EQUAL(Xapian::Query::MatchAll.get_description(),
46 "Query(<alldocuments>)");
47 return true;
50 DEFINE_TESTCASE(matchnothing1, !backend) {
51 TEST_STRINGS_EQUAL(Xapian::Query::MatchNothing.get_description(),
52 "Query()");
53 vector<Xapian::Query> subqs;
54 subqs.push_back(Xapian::Query("foo"));
55 subqs.push_back(Xapian::Query::MatchNothing);
56 Xapian::Query q(Xapian::Query::OP_AND, subqs.begin(), subqs.end());
57 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
59 Xapian::Query q2(Xapian::Query::OP_AND,
60 Xapian::Query("foo"), Xapian::Query::MatchNothing);
61 TEST_STRINGS_EQUAL(q2.get_description(), "Query()");
62 return true;
65 DEFINE_TESTCASE(overload1, !backend) {
66 Xapian::Query q;
67 q = Xapian::Query("foo") & Xapian::Query("bar");
68 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND bar))");
69 q = Xapian::Query("foo") &~ Xapian::Query("bar");
70 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo AND_NOT bar))");
71 q = ~Xapian::Query("bar");
72 TEST_STRINGS_EQUAL(q.get_description(), "Query((<alldocuments> AND_NOT bar))");
73 q = Xapian::Query("foo") & Xapian::Query::MatchNothing;
74 TEST_STRINGS_EQUAL(q.get_description(), "Query()");
75 q = Xapian::Query("foo") | Xapian::Query("bar");
76 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo OR bar))");
77 q = Xapian::Query("foo") | Xapian::Query::MatchNothing;
78 TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
79 q = Xapian::Query("foo") ^ Xapian::Query("bar");
80 TEST_STRINGS_EQUAL(q.get_description(), "Query((foo XOR bar))");
81 q = Xapian::Query("foo") ^ Xapian::Query::MatchNothing;
82 TEST_STRINGS_EQUAL(q.get_description(), "Query(foo)");
83 q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
84 TEST_STRINGS_EQUAL(q.get_description(), "Query(1.25 * (one OR two))");
85 q = (Xapian::Query("one") & Xapian::Query("two")) * 42;
86 TEST_STRINGS_EQUAL(q.get_description(), "Query(42 * (one AND two))");
87 q = Xapian::Query("one") / 2.0;
88 TEST_STRINGS_EQUAL(q.get_description(), "Query(0.5 * one)");
89 return true;
92 // FIXME: As of 1.3.6, we throw Xapian::UnimplementedError at an attempt
93 // to use OP_NEAR or OP_PHRASE with a complex subquery. Once we actually
94 // implement this, these tests can be re-enabled.
95 #if 0
96 /** Regression test and feature test.
98 * This threw AssertionError in 1.0.9 and earlier (bug#201) and gave valgrind
99 * errors in 1.0.11 and earlier (bug#349).
101 * Having two non-leaf subqueries with OP_NEAR used to be expected to throw
102 * UnimplementedError, but now actually works.
104 DEFINE_TESTCASE(nearsubqueries1, writable) {
105 Xapian::WritableDatabase db = get_writable_database();
106 Xapian::Document doc;
107 doc.add_posting("a", 1);
108 doc.add_posting("b", 2);
109 doc.add_posting("c", 3);
110 db.add_document(doc);
112 Xapian::Query a_or_b(Xapian::Query::OP_OR,
113 Xapian::Query("a"),
114 Xapian::Query("b"));
115 Xapian::Query near(Xapian::Query::OP_NEAR, a_or_b, a_or_b);
116 // As of 1.3.0, we no longer rearrange queries at this point, so check
117 // that we don't.
118 TEST_STRINGS_EQUAL(near.get_description(),
119 "Query(((a OR b) NEAR 2 (a OR b)))");
121 Xapian::Query a_near_b(Xapian::Query::OP_NEAR,
122 Xapian::Query("a"),
123 Xapian::Query("b"));
124 Xapian::Query x_phrs_y(Xapian::Query::OP_PHRASE,
125 Xapian::Query("a"),
126 Xapian::Query("b"));
128 // FIXME: These used to be rejected when the query was constructed, but
129 // now they're only rejected when Enquire::get_mset() is called and we
130 // actually try to get positional data from the subquery. The plan is to
131 // actually try to support such cases.
132 Xapian::Query q;
133 Xapian::Enquire enq(db);
134 q = Xapian::Query(Xapian::Query::OP_NEAR, a_near_b, Xapian::Query("c"));
135 TEST_EXCEPTION(Xapian::UnimplementedError,
136 enq.set_query(q); (void)enq.get_mset(0, 10));
138 q = Xapian::Query(Xapian::Query::OP_NEAR, x_phrs_y, Xapian::Query("c"));
139 TEST_EXCEPTION(Xapian::UnimplementedError,
140 enq.set_query(q); (void)enq.get_mset(0, 10));
142 q = Xapian::Query(Xapian::Query::OP_PHRASE, a_near_b, Xapian::Query("c"));
143 TEST_EXCEPTION(Xapian::UnimplementedError,
144 enq.set_query(q); (void)enq.get_mset(0, 10));
146 q = Xapian::Query(Xapian::Query::OP_PHRASE, x_phrs_y, Xapian::Query("c"));
147 TEST_EXCEPTION(Xapian::UnimplementedError,
148 enq.set_query(q); (void)enq.get_mset(0, 10));
150 return true;
152 #endif
154 /// Test that XOR handles all remaining subqueries running out at the same
155 // time.
156 DEFINE_TESTCASE(xor3, backend) {
157 Xapian::Database db = get_database("apitest_simpledata");
159 const char * subqs[] = {
160 "hack", "which", "paragraph", "is", "return"
162 // Document where the subqueries run out *does* match XOR:
163 Xapian::Query q(Xapian::Query::OP_XOR, subqs, subqs + 5);
164 Xapian::Enquire enq(db);
165 enq.set_query(q);
166 Xapian::MSet mset = enq.get_mset(0, 10);
168 TEST_EQUAL(mset.size(), 3);
169 TEST_EQUAL(*mset[0], 4);
170 TEST_EQUAL(*mset[1], 2);
171 TEST_EQUAL(*mset[2], 3);
173 // Document where the subqueries run out *does not* match XOR:
174 q = Xapian::Query(Xapian::Query::OP_XOR, subqs, subqs + 4);
175 enq.set_query(q);
176 mset = enq.get_mset(0, 10);
178 TEST_EQUAL(mset.size(), 4);
179 TEST_EQUAL(*mset[0], 5);
180 TEST_EQUAL(*mset[1], 4);
181 TEST_EQUAL(*mset[2], 2);
182 TEST_EQUAL(*mset[3], 3);
184 return true;
187 /// Check encoding of non-UTF8 terms in query descriptions.
188 DEFINE_TESTCASE(nonutf8termdesc1, !backend) {
189 TEST_EQUAL(Xapian::Query("\xc0\x80\xf5\x80\x80\x80\xfe\xff").get_description(),
190 "Query(\\xc0\\x80\\xf5\\x80\\x80\\x80\\xfe\\xff)");
191 TEST_EQUAL(Xapian::Query(string("\x00\x1f", 2)).get_description(),
192 "Query(\\x00\\x1f)");
193 // Check that backslashes are encoded so output isn't ambiguous.
194 TEST_EQUAL(Xapian::Query("back\\slash").get_description(),
195 "Query(back\\x5cslash)");
196 // Check that \x7f is escaped.
197 TEST_EQUAL(Xapian::Query("D\x7f_\x7f~").get_description(),
198 "Query(D\\x7f_\\x7f~)");
199 return true;
202 /// Test introspection on Query objects.
203 DEFINE_TESTCASE(queryintro1, !backend) {
204 TEST_EQUAL(Xapian::Query::MatchAll.get_type(), Xapian::Query::LEAF_MATCH_ALL);
205 TEST_EQUAL(Xapian::Query::MatchAll.get_num_subqueries(), 0);
206 TEST_EQUAL(Xapian::Query::MatchNothing.get_type(), Xapian::Query::LEAF_MATCH_NOTHING);
207 TEST_EQUAL(Xapian::Query::MatchNothing.get_num_subqueries(), 0);
209 Xapian::Query q;
210 q = Xapian::Query(q.OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query("fair"));
211 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
212 TEST_EQUAL(q.get_num_subqueries(), 2);
213 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_MATCH_ALL);
214 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
216 q = Xapian::Query("foo") & Xapian::Query("bar");
217 TEST_EQUAL(q.get_type(), q.OP_AND);
219 q = Xapian::Query("foo") &~ Xapian::Query("bar");
220 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
222 q = ~Xapian::Query("bar");
223 TEST_EQUAL(q.get_type(), q.OP_AND_NOT);
225 q = Xapian::Query("foo") | Xapian::Query("bar");
226 TEST_EQUAL(q.get_type(), q.OP_OR);
228 q = Xapian::Query("foo") ^ Xapian::Query("bar");
229 TEST_EQUAL(q.get_type(), q.OP_XOR);
231 q = 1.25 * (Xapian::Query("one") | Xapian::Query("two"));
232 TEST_EQUAL(q.get_type(), q.OP_SCALE_WEIGHT);
233 TEST_EQUAL(q.get_num_subqueries(), 1);
234 TEST_EQUAL(q.get_subquery(0).get_type(), q.OP_OR);
236 q = Xapian::Query("one") / 2.0;
237 TEST_EQUAL(q.get_type(), q.OP_SCALE_WEIGHT);
238 TEST_EQUAL(q.get_num_subqueries(), 1);
239 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
241 q = Xapian::Query(q.OP_NEAR, Xapian::Query("a"), Xapian::Query("b"));
242 TEST_EQUAL(q.get_type(), q.OP_NEAR);
243 TEST_EQUAL(q.get_num_subqueries(), 2);
244 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
245 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
247 q = Xapian::Query(q.OP_PHRASE, Xapian::Query("c"), Xapian::Query("d"));
248 TEST_EQUAL(q.get_type(), q.OP_PHRASE);
249 TEST_EQUAL(q.get_num_subqueries(), 2);
250 TEST_EQUAL(q.get_subquery(0).get_type(), q.LEAF_TERM);
251 TEST_EQUAL(q.get_subquery(1).get_type(), q.LEAF_TERM);
253 return true;
256 /// Regression test for bug introduced in 1.3.1 and fixed in 1.3.3.
257 // We were incorrectly converting a term which indexed all docs and was used
258 // in an unweighted phrase into an all docs postlist, so check that this
259 // case actually works.
260 DEFINE_TESTCASE(phrasealldocs1, backend) {
261 Xapian::Database db = get_database("apitest_declen");
262 Xapian::Query q;
263 const char * phrase[] = { "this", "is", "the" };
264 q = Xapian::Query(q.OP_AND_NOT,
265 Xapian::Query("paragraph"),
266 Xapian::Query(q.OP_PHRASE, phrase, phrase + 3));
267 Xapian::Enquire enq(db);
268 enq.set_query(q);
269 Xapian::MSet mset = enq.get_mset(0, 10);
270 TEST_EQUAL(mset.size(), 3);
272 return true;
275 struct wildcard_testcase {
276 const char * pattern;
277 Xapian::termcount max_expansion;
278 char max_type;
279 const char * terms[4];
282 #define WILDCARD_EXCEPTION { 0, 0, 0, "" }
283 static const
284 wildcard_testcase wildcard1_testcases[] = {
285 // Tries to expand to 7 terms.
286 { "th", 6, 'E', WILDCARD_EXCEPTION },
287 { "thou", 1, 'E', { "though", 0, 0, 0 } },
288 { "s", 2, 'F', { "say", "search", 0, 0 } },
289 { "s", 2, 'M', { "simpl", "so", 0, 0 } },
290 { 0, 0, 0, { 0, 0, 0, 0 } }
293 DEFINE_TESTCASE(wildcard1, backend) {
294 // FIXME: The counting of terms the wildcard expands to is per subdatabase,
295 // so the wildcard may expand to more terms than the limit if some aren't
296 // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
297 // frequency from the subdatabase, and so may select different terms in
298 // each subdatabase.
299 SKIP_TEST_FOR_BACKEND("multi");
300 Xapian::Database db = get_database("apitest_simpledata");
301 Xapian::Enquire enq(db);
302 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
304 const wildcard_testcase * p = wildcard1_testcases;
305 while (p->pattern) {
306 tout << p->pattern << endl;
307 const char * const * tend = p->terms + 4;
308 while (tend[-1] == NULL) --tend;
309 bool expect_exception = (tend - p->terms == 4 && tend[-1][0] == '\0');
310 Xapian::Query q;
311 if (p->max_type) {
312 int max_type;
313 switch (p->max_type) {
314 case 'E':
315 max_type = Xapian::Query::WILDCARD_LIMIT_ERROR;
316 break;
317 case 'F':
318 max_type = Xapian::Query::WILDCARD_LIMIT_FIRST;
319 break;
320 case 'M':
321 max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
322 break;
323 default:
324 return false;
326 q = Xapian::Query(o, p->pattern, p->max_expansion, max_type);
327 } else {
328 q = Xapian::Query(o, p->pattern, p->max_expansion);
330 enq.set_query(q);
331 try {
332 Xapian::MSet mset = enq.get_mset(0, 10);
333 TEST(!expect_exception);
334 q = Xapian::Query(q.OP_SYNONYM, p->terms, tend);
335 enq.set_query(q);
336 Xapian::MSet mset2 = enq.get_mset(0, 10);
337 TEST_EQUAL(mset.size(), mset2.size());
338 TEST(mset_range_is_same(mset, 0, mset2, 0, mset.size()));
339 } catch (const Xapian::WildcardError &) {
340 TEST(expect_exception);
342 ++p;
345 return true;
348 /// Regression test for #696, fixed in 1.3.4.
349 DEFINE_TESTCASE(wildcard2, backend) {
350 // FIXME: The counting of terms the wildcard expands to is per subdatabase,
351 // so the wildcard may expand to more terms than the limit if some aren't
352 // in all subdatabases. Also WILDCARD_LIMIT_MOST_FREQUENT uses the
353 // frequency from the subdatabase, and so may select different terms in
354 // each subdatabase.
355 SKIP_TEST_FOR_BACKEND("multi");
356 Xapian::Database db = get_database("apitest_simpledata");
357 Xapian::Enquire enq(db);
358 const Xapian::Query::op o = Xapian::Query::OP_WILDCARD;
360 const int max_type = Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT;
361 Xapian::Query q0(o, "w", 2, max_type);
362 Xapian::Query q(o, "s", 2, max_type);
363 Xapian::Query q2(o, "t", 2, max_type);
364 q = Xapian::Query(q.OP_OR, q0, q);
365 q = Xapian::Query(q.OP_OR, q, q2);
366 enq.set_query(q);
367 Xapian::MSet mset = enq.get_mset(0, 10);
368 TEST_EQUAL(mset.size(), 6);
370 return true;
373 DEFINE_TESTCASE(dualprefixwildcard1, backend) {
374 Xapian::Database db = get_database("apitest_simpledata");
375 Xapian::Query q(Xapian::Query::OP_SYNONYM,
376 Xapian::Query(Xapian::Query::OP_WILDCARD, "fo"),
377 Xapian::Query(Xapian::Query::OP_WILDCARD, "Sfo"));
378 tout << q.get_description() << endl;
379 Xapian::Enquire enq(db);
380 enq.set_query(q);
381 TEST_EQUAL(enq.get_mset(0, 5).size(), 2);
382 return true;
385 struct positional_testcase {
386 int window;
387 const char * terms[4];
388 Xapian::docid result;
391 static const
392 positional_testcase loosephrase1_testcases[] = {
393 { 5, { "expect", "to", "mset", 0 }, 0 },
394 { 5, { "word", "well", "the", 0 }, 2 },
395 { 5, { "if", "word", "doesnt", 0 }, 0 },
396 { 5, { "at", "line", "three", 0 }, 0 },
397 { 5, { "paragraph", "other", "the", 0 }, 0 },
398 { 5, { "other", "the", "with", 0 }, 0 },
399 { 0, { 0, 0, 0, 0 }, 0 }
402 /// Regression test for bug fixed in 1.3.3 and 1.2.21.
403 DEFINE_TESTCASE(loosephrase1, backend) {
404 Xapian::Database db = get_database("apitest_simpledata");
405 Xapian::Enquire enq(db);
407 const positional_testcase * p = loosephrase1_testcases;
408 while (p->window) {
409 const char * const * tend = p->terms + 4;
410 while (tend[-1] == NULL) --tend;
411 Xapian::Query q(Xapian::Query::OP_PHRASE, p->terms, tend, p->window);
412 enq.set_query(q);
413 Xapian::MSet mset = enq.get_mset(0, 10);
414 if (p->result == 0) {
415 TEST(mset.empty());
416 } else {
417 TEST_EQUAL(mset.size(), 1);
418 TEST_EQUAL(*mset[0], p->result);
420 ++p;
423 return true;
426 static const
427 positional_testcase loosenear1_testcases[] = {
428 { 4, { "test", "the", "with", 0 }, 1 },
429 { 4, { "expect", "word", "the", 0 }, 2 },
430 { 4, { "line", "be", "blank", 0 }, 1 },
431 { 2, { "banana", "banana", 0, 0 }, 0 },
432 { 3, { "banana", "banana", 0, 0 }, 0 },
433 { 2, { "word", "word", 0, 0 }, 2 },
434 { 4, { "work", "meant", "work", 0 }, 0 },
435 { 4, { "this", "one", "yet", "one" }, 0 },
436 { 0, { 0, 0, 0, 0 }, 0 }
439 /// Regression tests for bugs fixed in 1.3.3 and 1.2.21.
440 DEFINE_TESTCASE(loosenear1, backend) {
441 Xapian::Database db = get_database("apitest_simpledata");
442 Xapian::Enquire enq(db);
444 const positional_testcase * p = loosenear1_testcases;
445 while (p->window) {
446 const char * const * tend = p->terms + 4;
447 while (tend[-1] == NULL) --tend;
448 Xapian::Query q(Xapian::Query::OP_NEAR, p->terms, tend, p->window);
449 enq.set_query(q);
450 Xapian::MSet mset = enq.get_mset(0, 10);
451 if (p->result == 0) {
452 TEST(mset.empty());
453 } else {
454 TEST_EQUAL(mset.size(), 1);
455 TEST_EQUAL(*mset[0], p->result);
457 ++p;
460 return true;
463 /// Regression test for bug fixed in 1.3.6 - this would segfault in 1.3.x.
464 DEFINE_TESTCASE(complexphrase1, backend) {
465 Xapian::Database db = get_database("apitest_simpledata");
466 Xapian::Enquire enq(db);
467 TEST_EXCEPTION(Xapian::UnimplementedError,
468 Xapian::Query query(Xapian::Query::OP_PHRASE,
469 Xapian::Query("a") | Xapian::Query("b"),
470 Xapian::Query("i"));
471 enq.set_query(query);
472 (void)enq.get_mset(0, 10););
473 return true;
476 /// Regression test for bug fixed in 1.3.6 - this would segfault in 1.3.x.
477 DEFINE_TESTCASE(complexnear1, backend) {
478 Xapian::Database db = get_database("apitest_simpledata");
479 Xapian::Enquire enq(db);
480 TEST_EXCEPTION(Xapian::UnimplementedError,
481 Xapian::Query query(Xapian::Query::OP_NEAR,
482 Xapian::Query("a") | Xapian::Query("b"),
483 Xapian::Query("i"));
484 enq.set_query(query);
485 (void)enq.get_mset(0, 10););
486 return true;
489 /// Check subqueries of MatchAll, MatchNothing and PostingSource are supported.
490 DEFINE_TESTCASE(complexphrase2, backend) {
491 Xapian::Database db = get_database("apitest_simpledata");
492 Xapian::Enquire enq(db);
493 Xapian::ValueWeightPostingSource ps(0);
494 Xapian::Query subqs[3] = {
495 Xapian::Query(Xapian::Query::OP_PHRASE,
496 Xapian::Query("a"),
497 Xapian::Query(&ps)),
498 Xapian::Query(Xapian::Query::OP_PHRASE,
499 Xapian::Query("and"),
500 Xapian::Query::MatchAll),
501 Xapian::Query(Xapian::Query::OP_PHRASE,
502 Xapian::Query("at"),
503 Xapian::Query::MatchNothing)
505 Xapian::Query query(Xapian::Query::OP_OR, subqs, subqs + 3);
506 enq.set_query(query);
507 (void)enq.get_mset(0, 10);
508 return true;
511 /// Check subqueries of MatchAll, MatchNothing and PostingSource are supported.
512 DEFINE_TESTCASE(complexnear2, backend) {
513 Xapian::Database db = get_database("apitest_simpledata");
514 Xapian::Enquire enq(db);
515 Xapian::ValueWeightPostingSource ps(0);
516 Xapian::Query subqs[3] = {
517 Xapian::Query(Xapian::Query::OP_NEAR,
518 Xapian::Query("a"),
519 Xapian::Query(&ps)),
520 Xapian::Query(Xapian::Query::OP_NEAR,
521 Xapian::Query("and"),
522 Xapian::Query::MatchAll),
523 Xapian::Query(Xapian::Query::OP_NEAR,
524 Xapian::Query("at"),
525 Xapian::Query::MatchNothing)
527 Xapian::Query query(Xapian::Query::OP_OR, subqs, subqs + 3);
528 enq.set_query(query);
529 (void)enq.get_mset(0, 10);
530 return true;