Support: quest -f cjk_ngram
[xapian.git] / xapian-core / tests / api_valuestats.cc
blob3e7a8545e556150c9f8e8ba1cf655647f68e9a1d
1 /* api_valuestats.cc: tests of the value statistics functions.
3 * Copyright 2008 Lemur Consulting Ltd
4 * Copyright 2008,2009,2011 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "api_valuestats.h"
26 #include <xapian.h>
27 #include "testsuite.h"
28 #include "testutils.h"
30 #include "apitest.h"
32 using namespace std;
34 // #######################################################################
35 // # Tests start here
37 /// Test of value statistics methods.
38 DEFINE_TESTCASE(valuestats1, writable && valuestats) {
39 Xapian::WritableDatabase db_w = get_writable_database();
41 // Check that counts are initially zero.
42 TEST_EQUAL(db_w.get_value_freq(0), 0);
43 TEST_EQUAL(db_w.get_value_lower_bound(0), "");
44 TEST_EQUAL(db_w.get_value_upper_bound(0), "");
45 TEST_EQUAL(db_w.get_value_freq(1), 0);
46 TEST_EQUAL(db_w.get_value_lower_bound(1), "");
47 TEST_EQUAL(db_w.get_value_upper_bound(1), "");
49 Xapian::Document doc;
50 doc.add_value(0, "hello");
52 // Check that statistics for the correct value slot increase when document
53 // is added. (Check slot 1 first, so that cache invalidation of the last
54 // slot read also gets checked.)
55 db_w.add_document(doc);
56 TEST_EQUAL(db_w.get_value_freq(1), 0);
57 TEST_EQUAL(db_w.get_value_lower_bound(1), "");
58 TEST_EQUAL(db_w.get_value_upper_bound(1), "");
59 TEST_EQUAL(db_w.get_value_freq(0), 1);
60 TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
61 TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
63 // Check that statistics work correctly when second document is added.
64 doc = Xapian::Document();
65 doc.add_value(0, "world");
66 doc.add_value(1, "cheese");
67 db_w.replace_document(2, doc);
68 TEST_EQUAL(db_w.get_value_freq(0), 2);
69 TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
70 TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
71 TEST_EQUAL(db_w.get_value_freq(1), 1);
72 TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
73 TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
75 // Deleting a document affects the count, but not the bounds.
76 db_w.delete_document(1);
77 TEST_EQUAL(db_w.get_value_freq(1), 1);
78 TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
79 TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
80 TEST_EQUAL(db_w.get_value_freq(0), 1);
81 TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
82 TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
84 // Deleting all the documents returns the bounds to their original value.
85 db_w.delete_document(2);
86 TEST_EQUAL(db_w.get_value_freq(0), 0);
87 TEST_EQUAL(db_w.get_value_lower_bound(0), "");
88 TEST_EQUAL(db_w.get_value_upper_bound(0), "");
89 TEST_EQUAL(db_w.get_value_freq(1), 0);
90 TEST_EQUAL(db_w.get_value_lower_bound(1), "");
91 TEST_EQUAL(db_w.get_value_upper_bound(1), "");
93 // Adding a document with a value in one of the old slots should still
94 // end up with tight bounds on it.
95 doc = Xapian::Document();
96 doc.add_value(1, "newval");
97 db_w.replace_document(2, doc);
98 TEST_EQUAL(db_w.get_value_freq(1), 1);
99 TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
100 TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
101 TEST_EQUAL(db_w.get_value_freq(0), 0);
102 TEST_EQUAL(db_w.get_value_lower_bound(0), "");
103 TEST_EQUAL(db_w.get_value_upper_bound(0), "");
105 return true;
108 /// Test that value statistics stuff obeys transactions.
109 DEFINE_TESTCASE(valuestats2, transactions && valuestats) {
110 Xapian::WritableDatabase db_w = get_writable_database();
111 Xapian::Database db = get_writable_database_as_database();
113 // Check that counts are initially zero.
114 TEST_EQUAL(db_w.get_value_freq(0), 0);
115 TEST_EQUAL(db_w.get_value_lower_bound(0), "");
116 TEST_EQUAL(db_w.get_value_upper_bound(0), "");
117 TEST_EQUAL(db_w.get_value_freq(1), 0);
118 TEST_EQUAL(db_w.get_value_lower_bound(1), "");
119 TEST_EQUAL(db_w.get_value_upper_bound(1), "");
120 TEST_EQUAL(db.get_value_freq(0), 0);
121 TEST_EQUAL(db.get_value_lower_bound(0), "");
122 TEST_EQUAL(db.get_value_upper_bound(0), "");
123 TEST_EQUAL(db.get_value_freq(1), 0);
124 TEST_EQUAL(db.get_value_lower_bound(1), "");
125 TEST_EQUAL(db.get_value_upper_bound(1), "");
127 Xapian::Document doc;
128 doc.add_value(0, "hello");
130 // Check that statistics for the correct value slot increase when document
131 // is added. (Check slot 1 first, so that cache invalidation of the last
132 // slot read also gets checked.)
133 db_w.add_document(doc);
134 TEST_EQUAL(db_w.get_value_freq(1), 0);
135 TEST_EQUAL(db_w.get_value_lower_bound(1), "");
136 TEST_EQUAL(db_w.get_value_upper_bound(1), "");
137 TEST_EQUAL(db_w.get_value_freq(0), 1);
138 TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
139 TEST_EQUAL(db_w.get_value_upper_bound(0), "hello");
141 // The readonly database shouldn't change, though.
142 TEST_EQUAL(db.get_value_freq(1), 0);
143 TEST_EQUAL(db.get_value_lower_bound(1), "");
144 TEST_EQUAL(db.get_value_upper_bound(1), "");
145 TEST_EQUAL(db.get_value_freq(0), 0);
146 TEST_EQUAL(db.get_value_lower_bound(0), "");
147 TEST_EQUAL(db.get_value_upper_bound(0), "");
149 // Check that statistics work correctly when second document is added.
150 doc = Xapian::Document();
151 doc.add_value(0, "world");
152 doc.add_value(1, "cheese");
153 db_w.replace_document(2, doc);
154 TEST_EQUAL(db_w.get_value_freq(0), 2);
155 TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
156 TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
157 TEST_EQUAL(db_w.get_value_freq(1), 1);
158 TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
159 TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
161 // The readonly database shouldn't change, though.
162 TEST_EQUAL(db.get_value_freq(0), 0);
163 TEST_EQUAL(db.get_value_lower_bound(0), "");
164 TEST_EQUAL(db.get_value_upper_bound(0), "");
165 TEST_EQUAL(db.get_value_freq(1), 0);
166 TEST_EQUAL(db.get_value_lower_bound(1), "");
167 TEST_EQUAL(db.get_value_upper_bound(1), "");
169 // Check that readonly database catches up when a commit is done.
170 db_w.commit();
171 TEST(db.reopen());
172 TEST_EQUAL(db.get_value_freq(1), 1);
173 TEST_EQUAL(db.get_value_lower_bound(1), "cheese");
174 TEST_EQUAL(db.get_value_upper_bound(1), "cheese");
175 TEST_EQUAL(db.get_value_freq(0), 2);
176 TEST_EQUAL(db.get_value_lower_bound(0), "hello");
177 TEST_EQUAL(db.get_value_upper_bound(0), "world");
179 // Deleting a document affects the count, but not the bounds.
180 db_w.delete_document(1);
181 TEST_EQUAL(db_w.get_value_freq(1), 1);
182 TEST_EQUAL(db_w.get_value_lower_bound(1), "cheese");
183 TEST_EQUAL(db_w.get_value_upper_bound(1), "cheese");
184 TEST_EQUAL(db_w.get_value_freq(0), 1);
185 TEST_EQUAL(db_w.get_value_lower_bound(0), "hello");
186 TEST_EQUAL(db_w.get_value_upper_bound(0), "world");
188 // Deleting all the documents returns the bounds to their original value.
189 db_w.delete_document(2);
190 TEST_EQUAL(db_w.get_value_freq(0), 0);
191 TEST_EQUAL(db_w.get_value_lower_bound(0), "");
192 TEST_EQUAL(db_w.get_value_upper_bound(0), "");
193 TEST_EQUAL(db_w.get_value_freq(1), 0);
194 TEST_EQUAL(db_w.get_value_lower_bound(1), "");
195 TEST_EQUAL(db_w.get_value_upper_bound(1), "");
197 // Adding a document with a value in one of the old slots should still
198 // end up with tight bounds on it.
199 doc = Xapian::Document();
200 doc.add_value(1, "newval");
201 db_w.replace_document(2, doc);
202 TEST_EQUAL(db_w.get_value_freq(1), 1);
203 TEST_EQUAL(db_w.get_value_lower_bound(1), "newval");
204 TEST_EQUAL(db_w.get_value_upper_bound(1), "newval");
205 TEST_EQUAL(db_w.get_value_freq(0), 0);
206 TEST_EQUAL(db_w.get_value_lower_bound(0), "");
207 TEST_EQUAL(db_w.get_value_upper_bound(0), "");
209 // Check that a readonly database gets the right statistics, too.
210 db_w.commit();
211 TEST(db.reopen());
212 TEST_EQUAL(db.get_value_freq(0), 0);
213 TEST_EQUAL(db.get_value_lower_bound(0), "");
214 TEST_EQUAL(db.get_value_upper_bound(0), "");
215 TEST_EQUAL(db.get_value_freq(1), 1);
216 TEST_EQUAL(db.get_value_lower_bound(1), "newval");
217 TEST_EQUAL(db.get_value_upper_bound(1), "newval");
219 return true;
222 /// Test reading value statistics from prebuilt databases.
223 DEFINE_TESTCASE(valuestats3, valuestats) {
224 Xapian::Database db = get_database("apitest_simpledata");
226 TEST_EQUAL(db.get_value_freq(1), 6);
227 TEST_EQUAL(db.get_value_lower_bound(1), "h");
228 TEST_EQUAL(db.get_value_upper_bound(1), "n");
229 TEST_EQUAL(db.get_value_freq(2), 6);
230 TEST_EQUAL(db.get_value_lower_bound(2), "d");
231 TEST_EQUAL(db.get_value_upper_bound(2), "i");
232 TEST_EQUAL(db.get_value_freq(3), 6);
233 TEST_EQUAL(db.get_value_lower_bound(3), " ");
234 TEST_EQUAL(db.get_value_upper_bound(3), "s");
235 TEST_EQUAL(db.get_value_freq(4), 6);
236 TEST_EQUAL(db.get_value_lower_bound(4), " ");
237 TEST_EQUAL(db.get_value_upper_bound(4), "y");
238 TEST_EQUAL(db.get_value_freq(5), 6);
239 TEST_EQUAL(db.get_value_lower_bound(5), "e");
240 TEST_EQUAL(db.get_value_upper_bound(5), "p");
241 TEST_EQUAL(db.get_value_freq(6), 6);
242 TEST_EQUAL(db.get_value_lower_bound(6), "a");
243 TEST_EQUAL(db.get_value_upper_bound(6), "t");
244 TEST_EQUAL(db.get_value_freq(7), 6);
245 TEST_EQUAL(db.get_value_lower_bound(7), " ");
246 TEST_EQUAL(db.get_value_upper_bound(7), "r");
247 TEST_EQUAL(db.get_value_freq(8), 6);
248 TEST_EQUAL(db.get_value_lower_bound(8), "a");
249 TEST_EQUAL(db.get_value_upper_bound(8), "t");
250 TEST_EQUAL(db.get_value_freq(9), 6);
251 TEST_EQUAL(db.get_value_lower_bound(9), " ");
252 TEST_EQUAL(db.get_value_upper_bound(9), "n");
253 TEST_EQUAL(db.get_value_freq(10), 6);
254 TEST_EQUAL(db.get_value_lower_bound(10), "e");
255 TEST_EQUAL(db.get_value_upper_bound(10), "w");
256 TEST_EQUAL(db.get_value_freq(11), 6);
257 TEST_EQUAL(db.get_value_lower_bound(11), "\xb9P");
258 TEST_EQUAL(db.get_value_upper_bound(11), "\xc7\x04");
260 return true;
263 DEFINE_TESTCASE(valuestats4, transactions && valuestats) {
264 const size_t FLUSH_THRESHOLD = 10000;
266 Xapian::WritableDatabase db_w = get_writable_database();
267 Xapian::Document doc;
268 doc.add_value(1, "test");
269 for (size_t i = 0; i < FLUSH_THRESHOLD; ++i) {
270 db_w.add_document(doc);
273 Xapian::Database db = get_writable_database_as_database();
274 // Check that we had an automatic-commit.
275 TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
276 // Check that the value stats are there.
277 TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
278 TEST_EQUAL(db.get_value_lower_bound(1), "test");
279 TEST_EQUAL(db.get_value_upper_bound(1), "test");
281 db_w.begin_transaction();
282 doc.add_value(1, "umbrella");
283 db_w.cancel_transaction();
287 Xapian::Database db = get_writable_database_as_database();
288 // Check that we had an automatic-commit.
289 TEST_EQUAL(db.get_doccount(), FLUSH_THRESHOLD);
290 // Check that the value stats are there.
291 TEST_EQUAL(db.get_value_freq(1), FLUSH_THRESHOLD);
292 TEST_EQUAL(db.get_value_lower_bound(1), "test");
293 TEST_EQUAL(db.get_value_upper_bound(1), "test");
296 return true;
299 /// Regression test for bug fixed in 1.1.1 which led to incorrect valuestats.
300 DEFINE_TESTCASE(valuestats5, !backend) {
301 Xapian::Document doc;
302 doc.add_value(0, "zero");
303 doc.add_value(1, "one");
304 doc.add_value(2, "two");
305 doc.add_value(3, "three");
306 doc.add_value(4, "");
307 doc.add_value(5, "five");
308 doc.remove_value(3);
309 doc.add_value(1, "");
311 // Check that we don't have any empty values reported.
312 size_t c = 0;
313 Xapian::ValueIterator v = doc.values_begin();
314 while (v != doc.values_end()) {
315 TEST(!(*v).empty());
316 ++c;
317 ++v;
319 TEST_EQUAL(c, 3); // 0, 2, 5
321 return true;