Update for 1.4.18
[xapian.git] / xapian-core / tests / api_matchspy.cc
blobbdb131fe3a79b7d58e246e1d2dbcba273d702234
1 /** @file
2 * @brief tests of MatchSpy usage
3 */
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5 * Copyright 2009,2011,2012,2015,2019 Olly Betts
6 * Copyright 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "api_matchspy.h"
28 #include <xapian.h>
30 #include <cmath>
31 #include <map>
32 #include <vector>
34 #include "backendmanager.h"
35 #include "str.h"
36 #include "testsuite.h"
37 #include "testutils.h"
38 #include "apitest.h"
40 using namespace std;
42 // #######################################################################
43 // # Tests start here
45 class SimpleMatchSpy : public Xapian::MatchSpy {
46 public:
47 // Vector which will be filled with all the document contents seen.
48 std::vector<std::string> seen;
50 void operator()(const Xapian::Document &doc, double) {
51 // Note that this is not recommended usage of get_data() - you
52 // generally shouldn't call get_data() from inside a MatchSpy, because
53 // it is (likely to be) a slow operation resulting in considerable IO.
54 seen.push_back(doc.get_data());
58 // Basic test of a matchspy.
59 DEFINE_TESTCASE(matchspy1, backend && !remote) {
60 Xapian::Database db(get_database("apitest_simpledata"));
61 Xapian::Enquire enquire(db);
62 enquire.set_query(Xapian::Query("this"));
64 SimpleMatchSpy myspy;
66 Xapian::MSet nospymset = enquire.get_mset(0, 100);
67 enquire.add_matchspy(&myspy);
68 Xapian::MSet spymset = enquire.get_mset(0, 100);
70 // Check that the match estimates aren't affected by the matchspy.
71 TEST_EQUAL(nospymset, spymset);
73 vector<bool> docid_checked(db.get_lastdocid());
75 // Check that we get the expected number of matches, and that the stored
76 // document contents are right.
77 Xapian::MSetIterator i = spymset.begin();
78 TEST(i != spymset.end());
79 TEST_EQUAL(spymset.size(), 6);
80 TEST_EQUAL(myspy.seen.size(), spymset.size());
82 std::sort(myspy.seen.begin(), myspy.seen.end());
84 std::vector<std::string> seen2;
85 for ( ; i != spymset.end(); ++i) {
86 const Xapian::Document doc(i.get_document());
87 seen2.push_back(doc.get_data());
89 std::sort(seen2.begin(), seen2.end());
91 TEST_EQUAL(myspy.seen.size(), seen2.size());
92 std::vector<std::string>::const_iterator j = myspy.seen.begin();
93 std::vector<std::string>::const_iterator j2 = seen2.begin();
94 for (; j != myspy.seen.end(); ++j, ++j2) {
95 TEST_EQUAL(*j, *j2);
99 static string values_to_repr(const Xapian::ValueCountMatchSpy & spy) {
100 string resultrepr("|");
101 for (Xapian::TermIterator i = spy.values_begin();
102 i != spy.values_end();
103 ++i) {
104 resultrepr += *i;
105 resultrepr += ':';
106 resultrepr += str(i.get_termfreq());
107 resultrepr += '|';
109 return resultrepr;
112 static void
113 make_matchspy2_db(Xapian::WritableDatabase &db, const string &)
115 for (int c = 1; c <= 25; ++c) {
116 Xapian::Document doc;
117 doc.set_data("Document " + str(c));
118 int factors = 0;
119 for (int factor = 1; factor <= c; ++factor) {
120 doc.add_term("all");
121 if (c % factor == 0) {
122 doc.add_term("XFACT" + str(factor));
123 ++factors;
127 // Number of factors.
128 doc.add_value(0, str(factors));
129 // Units digits.
130 doc.add_value(1, str(c % 10));
131 // Constant.
132 doc.add_value(2, "fish");
133 // Number of digits.
134 doc.add_value(3, str(str(c).size()));
136 db.add_document(doc);
140 DEFINE_TESTCASE(matchspy2, generated)
142 Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
144 Xapian::ValueCountMatchSpy spy0(0);
145 Xapian::ValueCountMatchSpy spy1(1);
146 Xapian::ValueCountMatchSpy spy3(3);
148 Xapian::Enquire enq(db);
150 enq.set_query(Xapian::Query("all"));
151 if (startswith(get_dbtype(), "multi")) {
152 // Without this, we short-cut on the second shard because we don't get
153 // the documents in ascending weight order.
154 enq.set_weighting_scheme(Xapian::CoordWeight());
157 enq.add_matchspy(&spy0);
158 enq.add_matchspy(&spy1);
159 enq.add_matchspy(&spy3);
160 Xapian::MSet mset = enq.get_mset(0, 10);
162 TEST_EQUAL(spy0.get_total(), 25);
163 TEST_EQUAL(spy1.get_total(), 25);
164 TEST_EQUAL(spy3.get_total(), 25);
166 static const char * const results[] = {
167 "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
168 "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
169 "|1:9|2:16|",
171 TEST_STRINGS_EQUAL(values_to_repr(spy0), results[0]);
172 TEST_STRINGS_EQUAL(values_to_repr(spy1), results[1]);
173 TEST_STRINGS_EQUAL(values_to_repr(spy3), results[2]);
176 DEFINE_TESTCASE(matchspy4, generated)
178 XFAIL_FOR_BACKEND("multi_remote",
179 "Matchspy counts hits on remote and locally");
180 XFAIL_FOR_BACKEND("multi_glass_remote",
181 "Matchspy counts hits on remote and locally");
183 Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
185 // We're going to run the match twice - once sorted by relevance, and once
186 // sorted by a value. This is a regression test - the matcher used to fail
187 // to show some documents to the spy when sorting by non-pure-relevance.
188 Xapian::ValueCountMatchSpy spya0(0);
189 Xapian::ValueCountMatchSpy spya1(1);
190 Xapian::ValueCountMatchSpy spya3(3);
191 Xapian::ValueCountMatchSpy spyb0(0);
192 Xapian::ValueCountMatchSpy spyb1(1);
193 Xapian::ValueCountMatchSpy spyb3(3);
195 Xapian::Enquire enqa(db);
196 Xapian::Enquire enqb(db);
198 enqa.set_query(Xapian::Query("all"));
199 if (startswith(get_dbtype(), "multi")) {
200 // Without this, we short-cut on the second shard because we don't get
201 // the documents in ascending weight order.
202 enqa.set_weighting_scheme(Xapian::CoordWeight());
204 enqb.set_query(Xapian::Query("all"));
206 enqa.add_matchspy(&spya0);
207 enqa.add_matchspy(&spya1);
208 enqa.add_matchspy(&spya3);
209 enqb.add_matchspy(&spyb0);
210 enqb.add_matchspy(&spyb1);
211 enqb.add_matchspy(&spyb3);
213 Xapian::MSet mseta = enqa.get_mset(0, 10);
214 enqb.set_sort_by_value(0, false);
215 Xapian::MSet msetb = enqb.get_mset(0, 10, 100);
217 TEST_EQUAL(spya0.get_total(), 25);
218 TEST_EQUAL(spya1.get_total(), 25);
219 TEST_EQUAL(spya3.get_total(), 25);
220 TEST_EQUAL(spyb0.get_total(), 25);
221 TEST_EQUAL(spyb1.get_total(), 25);
222 TEST_EQUAL(spyb3.get_total(), 25);
224 static const char * const results[] = {
225 "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
226 "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
227 "|",
228 "|2:16|1:9|",
229 "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
230 "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
231 "|",
232 "|2:16|1:9|",
233 NULL
235 std::vector<Xapian::ValueCountMatchSpy *> spies;
236 spies.push_back(&spya0);
237 spies.push_back(&spya1);
238 spies.push_back(NULL);
239 spies.push_back(&spya3);
240 spies.push_back(&spyb0);
241 spies.push_back(&spyb1);
242 spies.push_back(NULL);
243 spies.push_back(&spyb3);
244 for (Xapian::valueno v = 0; results[v]; ++v) {
245 tout << "value " << v << endl;
246 Xapian::ValueCountMatchSpy * spy = spies[v];
247 string allvals_str("|");
248 if (spy != NULL) {
249 size_t allvals_size = 0;
250 for (Xapian::TermIterator i = spy->top_values_begin(100);
251 i != spy->top_values_end(100);
252 ++i, ++allvals_size) {
253 allvals_str += *i;
254 allvals_str += ':';
255 allvals_str += str(i.get_termfreq());
256 allvals_str += '|';
258 tout << allvals_str << endl;
259 TEST_STRINGS_EQUAL(allvals_str, results[v]);
261 for (size_t count = 0; count < allvals_size; ++count) {
262 tout << "count " << count << endl;
263 for (Xapian::TermIterator i = spy->top_values_begin(100),
264 j = spy->top_values_begin(count);
265 i != spy->top_values_end(100) &&
266 j != spy->top_values_end(count);
267 ++i, ++j) {
268 tout << "j " << j << endl;
269 TEST_EQUAL(*i, *j);
270 TEST_EQUAL(i.get_termfreq(), j.get_termfreq());
277 // Test builtin match spies
278 DEFINE_TESTCASE(matchspy5, backend)
280 Xapian::Database db(get_database("apitest_simpledata"));
281 Xapian::Enquire enquire(db);
282 enquire.set_query(Xapian::Query("this"));
284 Xapian::ValueCountMatchSpy myspy1(1);
285 Xapian::ValueCountMatchSpy myspy2(1);
287 enquire.add_matchspy(&myspy1);
288 enquire.add_matchspy(&myspy2);
289 Xapian::MSet mymset = enquire.get_mset(0, 100);
290 TEST_EQUAL(mymset.size(), 6);
292 Xapian::TermIterator i = myspy1.values_begin();
293 TEST(i != myspy1.values_end());
294 TEST(*i == "h");
295 TEST_EQUAL(i.get_termfreq(), 5);
296 ++i;
297 TEST(i != myspy1.values_end());
298 TEST(*i == "n");
299 TEST_EQUAL(i.get_termfreq(), 1);
300 ++i;
301 TEST(i == myspy1.values_end());
303 i = myspy2.values_begin();
304 TEST(i != myspy2.values_end());
305 TEST(*i == "h");
306 TEST_EQUAL(i.get_termfreq(), 5);
307 ++i;
308 TEST(i != myspy2.values_end());
309 TEST(*i == "n");
310 TEST_EQUAL(i.get_termfreq(), 1);
311 ++i;
312 TEST(i == myspy2.values_end());
315 class MySpy : public Xapian::MatchSpy {
316 void operator()(const Xapian::Document &, double) {
320 // Test exceptions from matchspy base class, and get_description method.
321 DEFINE_TESTCASE(matchspy6, !backend)
323 MySpy spy;
325 TEST_EXCEPTION(Xapian::UnimplementedError, spy.clone());
326 TEST_EXCEPTION(Xapian::UnimplementedError, spy.name());
327 TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise());
328 TEST_EXCEPTION(Xapian::UnimplementedError,
329 spy.unserialise(std::string(), Xapian::Registry()));
330 TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise_results());
331 TEST_EXCEPTION(Xapian::UnimplementedError,
332 spy.merge_results(std::string()));
333 TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
336 /// Regression test for bug fixed in 1.4.12.
337 DEFINE_TESTCASE(matchspy7, !backend)
339 Xapian::ValueCountMatchSpy myspy(1);
340 string s = myspy.serialise_results();
341 s += 'x';
342 // This merge_results() call used to enter an infinite loop.
343 TEST_EXCEPTION(Xapian::NetworkError, myspy.merge_results(s));