Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / tests / api_matchspy.cc
blobb03a1f3d48b67a456475a95f2efffde7d7842c0e
1 /** @file api_matchspy.cc
2 * @brief tests of MatchSpy usage
3 */
4 /* Copyright 2007,2009 Lemur Consulting Ltd
5 * Copyright 2009,2011,2012,2015 Olly Betts
6 * Copyright 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include "api_matchspy.h"
28 #include <xapian.h>
30 #include <cmath>
31 #include <map>
32 #include <vector>
34 #include "backendmanager.h"
35 #include "str.h"
36 #include "testsuite.h"
37 #include "testutils.h"
38 #include "apitest.h"
40 using namespace std;
42 // #######################################################################
43 // # Tests start here
45 class SimpleMatchSpy : public Xapian::MatchSpy {
46 public:
47 // Vector which will be filled with all the document contents seen.
48 std::vector<std::string> seen;
50 void operator()(const Xapian::Document &doc, double) {
51 // Note that this is not recommended usage of get_data() - you
52 // generally shouldn't call get_data() from inside a MatchSpy, because
53 // it is (likely to be) a slow operation resulting in considerable IO.
54 seen.push_back(doc.get_data());
58 // Basic test of a matchspy.
59 DEFINE_TESTCASE(matchspy1, backend && !remote) {
60 Xapian::Database db(get_database("apitest_simpledata"));
61 Xapian::Enquire enquire(db);
62 enquire.set_query(Xapian::Query("this"));
64 SimpleMatchSpy myspy;
66 Xapian::MSet nospymset = enquire.get_mset(0, 100);
67 enquire.add_matchspy(&myspy);
68 Xapian::MSet spymset = enquire.get_mset(0, 100);
70 // Check that the match estimates aren't affected by the matchspy.
71 TEST_EQUAL(nospymset, spymset);
73 vector<bool> docid_checked(db.get_lastdocid());
75 // Check that we get the expected number of matches, and that the stored
76 // document contents are right.
77 Xapian::MSetIterator i = spymset.begin();
78 TEST(i != spymset.end());
79 TEST_EQUAL(spymset.size(), 6);
80 TEST_EQUAL(myspy.seen.size(), spymset.size());
82 std::sort(myspy.seen.begin(), myspy.seen.end());
84 std::vector<std::string> seen2;
85 for ( ; i != spymset.end(); ++i) {
86 const Xapian::Document doc(i.get_document());
87 seen2.push_back(doc.get_data());
89 std::sort(seen2.begin(), seen2.end());
91 TEST_EQUAL(myspy.seen.size(), seen2.size());
92 std::vector<std::string>::const_iterator j = myspy.seen.begin();
93 std::vector<std::string>::const_iterator j2 = seen2.begin();
94 for (; j != myspy.seen.end(); ++j, ++j2) {
95 TEST_EQUAL(*j, *j2);
98 return true;
101 static string values_to_repr(const Xapian::ValueCountMatchSpy & spy) {
102 string resultrepr("|");
103 for (Xapian::TermIterator i = spy.values_begin();
104 i != spy.values_end();
105 ++i) {
106 resultrepr += *i;
107 resultrepr += ':';
108 resultrepr += str(i.get_termfreq());
109 resultrepr += '|';
111 return resultrepr;
114 static void
115 make_matchspy2_db(Xapian::WritableDatabase &db, const string &)
117 for (int c = 1; c <= 25; ++c) {
118 Xapian::Document doc;
119 doc.set_data("Document " + str(c));
120 int factors = 0;
121 for (int factor = 1; factor <= c; ++factor) {
122 doc.add_term("all");
123 if (c % factor == 0) {
124 doc.add_term("XFACT" + str(factor));
125 ++factors;
129 // Number of factors.
130 doc.add_value(0, str(factors));
131 // Units digits.
132 doc.add_value(1, str(c % 10));
133 // Constant.
134 doc.add_value(2, "fish");
135 // Number of digits.
136 doc.add_value(3, str(str(c).size()));
138 db.add_document(doc);
142 DEFINE_TESTCASE(matchspy2, generated)
144 Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
146 Xapian::ValueCountMatchSpy spy0(0);
147 Xapian::ValueCountMatchSpy spy1(1);
148 Xapian::ValueCountMatchSpy spy3(3);
150 Xapian::Enquire enq(db);
152 enq.set_query(Xapian::Query("all"));
154 enq.add_matchspy(&spy0);
155 enq.add_matchspy(&spy1);
156 enq.add_matchspy(&spy3);
157 Xapian::MSet mset = enq.get_mset(0, 10);
159 TEST_EQUAL(spy0.get_total(), 25);
160 TEST_EQUAL(spy1.get_total(), 25);
161 TEST_EQUAL(spy3.get_total(), 25);
163 static const char * results[] = {
164 "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
165 "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
166 "|1:9|2:16|",
168 TEST_STRINGS_EQUAL(values_to_repr(spy0), results[0]);
169 TEST_STRINGS_EQUAL(values_to_repr(spy1), results[1]);
170 TEST_STRINGS_EQUAL(values_to_repr(spy3), results[2]);
172 return true;
175 DEFINE_TESTCASE(matchspy4, generated)
177 Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
179 // We're going to run the match twice - once sorted by relevance, and once
180 // sorted by a value. This is a regression test - the matcher used to fail
181 // to show some documents to the spy when sorting by non-pure-relevance.
182 Xapian::ValueCountMatchSpy spya0(0);
183 Xapian::ValueCountMatchSpy spya1(1);
184 Xapian::ValueCountMatchSpy spya3(3);
185 Xapian::ValueCountMatchSpy spyb0(0);
186 Xapian::ValueCountMatchSpy spyb1(1);
187 Xapian::ValueCountMatchSpy spyb3(3);
189 Xapian::Enquire enqa(db);
190 Xapian::Enquire enqb(db);
192 enqa.set_query(Xapian::Query("all"));
193 enqb.set_query(Xapian::Query("all"));
195 enqa.add_matchspy(&spya0);
196 enqa.add_matchspy(&spya1);
197 enqa.add_matchspy(&spya3);
198 enqb.add_matchspy(&spyb0);
199 enqb.add_matchspy(&spyb1);
200 enqb.add_matchspy(&spyb3);
202 Xapian::MSet mseta = enqa.get_mset(0, 10);
203 enqb.set_sort_by_value(0, false);
204 Xapian::MSet msetb = enqb.get_mset(0, 10, 100);
206 TEST_EQUAL(spya0.get_total(), 25);
207 TEST_EQUAL(spya1.get_total(), 25);
208 TEST_EQUAL(spya3.get_total(), 25);
209 TEST_EQUAL(spyb0.get_total(), 25);
210 TEST_EQUAL(spyb1.get_total(), 25);
211 TEST_EQUAL(spyb3.get_total(), 25);
213 static const char * results[] = {
214 "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
215 "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
216 "|",
217 "|2:16|1:9|",
218 "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
219 "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
220 "|",
221 "|2:16|1:9|",
222 NULL
224 std::vector<Xapian::ValueCountMatchSpy *> spies;
225 spies.push_back(&spya0);
226 spies.push_back(&spya1);
227 spies.push_back(NULL);
228 spies.push_back(&spya3);
229 spies.push_back(&spyb0);
230 spies.push_back(&spyb1);
231 spies.push_back(NULL);
232 spies.push_back(&spyb3);
233 for (Xapian::valueno v = 0; results[v]; ++v) {
234 tout << "value " << v << endl;
235 Xapian::ValueCountMatchSpy * spy = spies[v];
236 string allvals_str("|");
237 if (spy != NULL) {
238 size_t allvals_size = 0;
239 for (Xapian::TermIterator i = spy->top_values_begin(100);
240 i != spy->top_values_end(100);
241 ++i, ++allvals_size) {
242 allvals_str += *i;
243 allvals_str += ':';
244 allvals_str += str(i.get_termfreq());
245 allvals_str += '|';
247 tout << allvals_str << endl;
248 TEST_STRINGS_EQUAL(allvals_str, results[v]);
250 for (size_t count = 0; count < allvals_size; ++count) {
251 tout << "count " << count << endl;
252 for (Xapian::TermIterator i = spy->top_values_begin(100),
253 j = spy->top_values_begin(count);
254 i != spy->top_values_end(100) &&
255 j != spy->top_values_end(count);
256 ++i, ++j) {
257 tout << "j " << j << endl;
258 TEST_EQUAL(*i, *j);
259 TEST_EQUAL(i.get_termfreq(), j.get_termfreq());
265 return true;
268 // Test builtin match spies
269 DEFINE_TESTCASE(matchspy5, backend)
271 Xapian::Database db(get_database("apitest_simpledata"));
272 Xapian::Enquire enquire(db);
273 enquire.set_query(Xapian::Query("this"));
275 Xapian::ValueCountMatchSpy myspy1(1);
276 Xapian::ValueCountMatchSpy myspy2(1);
278 enquire.add_matchspy(&myspy1);
279 enquire.add_matchspy(&myspy2);
280 Xapian::MSet mymset = enquire.get_mset(0, 100);
281 TEST_EQUAL(mymset.size(), 6);
283 Xapian::TermIterator i = myspy1.values_begin();
284 TEST(i != myspy1.values_end());
285 TEST(*i == "h");
286 TEST_EQUAL(i.get_termfreq(), 5);
287 ++i;
288 TEST(i != myspy1.values_end());
289 TEST(*i == "n");
290 TEST_EQUAL(i.get_termfreq(), 1);
291 ++i;
292 TEST(i == myspy1.values_end());
294 i = myspy2.values_begin();
295 TEST(i != myspy2.values_end());
296 TEST(*i == "h");
297 TEST_EQUAL(i.get_termfreq(), 5);
298 ++i;
299 TEST(i != myspy2.values_end());
300 TEST(*i == "n");
301 TEST_EQUAL(i.get_termfreq(), 1);
302 ++i;
303 TEST(i == myspy2.values_end());
305 return true;
308 class MySpy : public Xapian::MatchSpy {
309 void operator()(const Xapian::Document &, double) {
313 // Test exceptions from matchspy base class, and get_description method.
314 DEFINE_TESTCASE(matchspy6, !backend)
316 MySpy spy;
318 TEST_EXCEPTION(Xapian::UnimplementedError, spy.clone());
319 TEST_EXCEPTION(Xapian::UnimplementedError, spy.name());
320 TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise());
321 TEST_EXCEPTION(Xapian::UnimplementedError,
322 spy.unserialise(std::string(), Xapian::Registry()));
323 TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise_results());
324 TEST_EXCEPTION(Xapian::UnimplementedError,
325 spy.merge_results(std::string()));
326 TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
328 return true;