1 /* api_valuestats.cc: tests of the value statistics functions.
3 * Copyright 2008 Lemur Consulting Ltd
4 * Copyright 2008,2009,2011 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24 #include "api_valuestats.h"
27 #include "testsuite.h"
28 #include "testutils.h"
34 // #######################################################################
37 /// Test of value statistics methods.
38 DEFINE_TESTCASE(valuestats1
, writable
&& valuestats
) {
39 Xapian::WritableDatabase db_w
= get_writable_database();
41 // Check that counts are initially zero.
42 TEST_EQUAL(db_w
.get_value_freq(0), 0);
43 TEST_EQUAL(db_w
.get_value_lower_bound(0), "");
44 TEST_EQUAL(db_w
.get_value_upper_bound(0), "");
45 TEST_EQUAL(db_w
.get_value_freq(1), 0);
46 TEST_EQUAL(db_w
.get_value_lower_bound(1), "");
47 TEST_EQUAL(db_w
.get_value_upper_bound(1), "");
50 doc
.add_value(0, "hello");
52 // Check that statistics for the correct value slot increase when document
53 // is added. (Check slot 1 first, so that cache invalidation of the last
54 // slot read also gets checked.)
55 db_w
.add_document(doc
);
56 TEST_EQUAL(db_w
.get_value_freq(1), 0);
57 TEST_EQUAL(db_w
.get_value_lower_bound(1), "");
58 TEST_EQUAL(db_w
.get_value_upper_bound(1), "");
59 TEST_EQUAL(db_w
.get_value_freq(0), 1);
60 TEST_EQUAL(db_w
.get_value_lower_bound(0), "hello");
61 TEST_EQUAL(db_w
.get_value_upper_bound(0), "hello");
63 // Check that statistics work correctly when second document is added.
64 doc
= Xapian::Document();
65 doc
.add_value(0, "world");
66 doc
.add_value(1, "cheese");
67 db_w
.replace_document(2, doc
);
68 TEST_EQUAL(db_w
.get_value_freq(0), 2);
69 TEST_EQUAL(db_w
.get_value_lower_bound(0), "hello");
70 TEST_EQUAL(db_w
.get_value_upper_bound(0), "world");
71 TEST_EQUAL(db_w
.get_value_freq(1), 1);
72 TEST_EQUAL(db_w
.get_value_lower_bound(1), "cheese");
73 TEST_EQUAL(db_w
.get_value_upper_bound(1), "cheese");
75 // Deleting a document affects the count, but not the bounds.
76 db_w
.delete_document(1);
77 TEST_EQUAL(db_w
.get_value_freq(1), 1);
78 TEST_EQUAL(db_w
.get_value_lower_bound(1), "cheese");
79 TEST_EQUAL(db_w
.get_value_upper_bound(1), "cheese");
80 TEST_EQUAL(db_w
.get_value_freq(0), 1);
81 TEST_EQUAL(db_w
.get_value_lower_bound(0), "hello");
82 TEST_EQUAL(db_w
.get_value_upper_bound(0), "world");
84 // Deleting all the documents returns the bounds to their original value.
85 db_w
.delete_document(2);
86 TEST_EQUAL(db_w
.get_value_freq(0), 0);
87 TEST_EQUAL(db_w
.get_value_lower_bound(0), "");
88 TEST_EQUAL(db_w
.get_value_upper_bound(0), "");
89 TEST_EQUAL(db_w
.get_value_freq(1), 0);
90 TEST_EQUAL(db_w
.get_value_lower_bound(1), "");
91 TEST_EQUAL(db_w
.get_value_upper_bound(1), "");
93 // Adding a document with a value in one of the old slots should still
94 // end up with tight bounds on it.
95 doc
= Xapian::Document();
96 doc
.add_value(1, "newval");
97 db_w
.replace_document(2, doc
);
98 TEST_EQUAL(db_w
.get_value_freq(1), 1);
99 TEST_EQUAL(db_w
.get_value_lower_bound(1), "newval");
100 TEST_EQUAL(db_w
.get_value_upper_bound(1), "newval");
101 TEST_EQUAL(db_w
.get_value_freq(0), 0);
102 TEST_EQUAL(db_w
.get_value_lower_bound(0), "");
103 TEST_EQUAL(db_w
.get_value_upper_bound(0), "");
108 /// Test that value statistics stuff obeys transactions.
109 DEFINE_TESTCASE(valuestats2
, transactions
&& valuestats
) {
110 Xapian::WritableDatabase db_w
= get_writable_database();
111 Xapian::Database db
= get_writable_database_as_database();
113 // Check that counts are initially zero.
114 TEST_EQUAL(db_w
.get_value_freq(0), 0);
115 TEST_EQUAL(db_w
.get_value_lower_bound(0), "");
116 TEST_EQUAL(db_w
.get_value_upper_bound(0), "");
117 TEST_EQUAL(db_w
.get_value_freq(1), 0);
118 TEST_EQUAL(db_w
.get_value_lower_bound(1), "");
119 TEST_EQUAL(db_w
.get_value_upper_bound(1), "");
120 TEST_EQUAL(db
.get_value_freq(0), 0);
121 TEST_EQUAL(db
.get_value_lower_bound(0), "");
122 TEST_EQUAL(db
.get_value_upper_bound(0), "");
123 TEST_EQUAL(db
.get_value_freq(1), 0);
124 TEST_EQUAL(db
.get_value_lower_bound(1), "");
125 TEST_EQUAL(db
.get_value_upper_bound(1), "");
127 Xapian::Document doc
;
128 doc
.add_value(0, "hello");
130 // Check that statistics for the correct value slot increase when document
131 // is added. (Check slot 1 first, so that cache invalidation of the last
132 // slot read also gets checked.)
133 db_w
.add_document(doc
);
134 TEST_EQUAL(db_w
.get_value_freq(1), 0);
135 TEST_EQUAL(db_w
.get_value_lower_bound(1), "");
136 TEST_EQUAL(db_w
.get_value_upper_bound(1), "");
137 TEST_EQUAL(db_w
.get_value_freq(0), 1);
138 TEST_EQUAL(db_w
.get_value_lower_bound(0), "hello");
139 TEST_EQUAL(db_w
.get_value_upper_bound(0), "hello");
141 // The readonly database shouldn't change, though.
142 TEST_EQUAL(db
.get_value_freq(1), 0);
143 TEST_EQUAL(db
.get_value_lower_bound(1), "");
144 TEST_EQUAL(db
.get_value_upper_bound(1), "");
145 TEST_EQUAL(db
.get_value_freq(0), 0);
146 TEST_EQUAL(db
.get_value_lower_bound(0), "");
147 TEST_EQUAL(db
.get_value_upper_bound(0), "");
149 // Check that statistics work correctly when second document is added.
150 doc
= Xapian::Document();
151 doc
.add_value(0, "world");
152 doc
.add_value(1, "cheese");
153 db_w
.replace_document(2, doc
);
154 TEST_EQUAL(db_w
.get_value_freq(0), 2);
155 TEST_EQUAL(db_w
.get_value_lower_bound(0), "hello");
156 TEST_EQUAL(db_w
.get_value_upper_bound(0), "world");
157 TEST_EQUAL(db_w
.get_value_freq(1), 1);
158 TEST_EQUAL(db_w
.get_value_lower_bound(1), "cheese");
159 TEST_EQUAL(db_w
.get_value_upper_bound(1), "cheese");
161 // The readonly database shouldn't change, though.
162 TEST_EQUAL(db
.get_value_freq(0), 0);
163 TEST_EQUAL(db
.get_value_lower_bound(0), "");
164 TEST_EQUAL(db
.get_value_upper_bound(0), "");
165 TEST_EQUAL(db
.get_value_freq(1), 0);
166 TEST_EQUAL(db
.get_value_lower_bound(1), "");
167 TEST_EQUAL(db
.get_value_upper_bound(1), "");
169 // Check that readonly database catches up when a commit is done.
172 TEST_EQUAL(db
.get_value_freq(1), 1);
173 TEST_EQUAL(db
.get_value_lower_bound(1), "cheese");
174 TEST_EQUAL(db
.get_value_upper_bound(1), "cheese");
175 TEST_EQUAL(db
.get_value_freq(0), 2);
176 TEST_EQUAL(db
.get_value_lower_bound(0), "hello");
177 TEST_EQUAL(db
.get_value_upper_bound(0), "world");
179 // Deleting a document affects the count, but not the bounds.
180 db_w
.delete_document(1);
181 TEST_EQUAL(db_w
.get_value_freq(1), 1);
182 TEST_EQUAL(db_w
.get_value_lower_bound(1), "cheese");
183 TEST_EQUAL(db_w
.get_value_upper_bound(1), "cheese");
184 TEST_EQUAL(db_w
.get_value_freq(0), 1);
185 TEST_EQUAL(db_w
.get_value_lower_bound(0), "hello");
186 TEST_EQUAL(db_w
.get_value_upper_bound(0), "world");
188 // Deleting all the documents returns the bounds to their original value.
189 db_w
.delete_document(2);
190 TEST_EQUAL(db_w
.get_value_freq(0), 0);
191 TEST_EQUAL(db_w
.get_value_lower_bound(0), "");
192 TEST_EQUAL(db_w
.get_value_upper_bound(0), "");
193 TEST_EQUAL(db_w
.get_value_freq(1), 0);
194 TEST_EQUAL(db_w
.get_value_lower_bound(1), "");
195 TEST_EQUAL(db_w
.get_value_upper_bound(1), "");
197 // Adding a document with a value in one of the old slots should still
198 // end up with tight bounds on it.
199 doc
= Xapian::Document();
200 doc
.add_value(1, "newval");
201 db_w
.replace_document(2, doc
);
202 TEST_EQUAL(db_w
.get_value_freq(1), 1);
203 TEST_EQUAL(db_w
.get_value_lower_bound(1), "newval");
204 TEST_EQUAL(db_w
.get_value_upper_bound(1), "newval");
205 TEST_EQUAL(db_w
.get_value_freq(0), 0);
206 TEST_EQUAL(db_w
.get_value_lower_bound(0), "");
207 TEST_EQUAL(db_w
.get_value_upper_bound(0), "");
209 // Check that a readonly database gets the right statistics, too.
212 TEST_EQUAL(db
.get_value_freq(0), 0);
213 TEST_EQUAL(db
.get_value_lower_bound(0), "");
214 TEST_EQUAL(db
.get_value_upper_bound(0), "");
215 TEST_EQUAL(db
.get_value_freq(1), 1);
216 TEST_EQUAL(db
.get_value_lower_bound(1), "newval");
217 TEST_EQUAL(db
.get_value_upper_bound(1), "newval");
222 /// Test reading value statistics from prebuilt databases.
223 DEFINE_TESTCASE(valuestats3
, valuestats
) {
224 Xapian::Database db
= get_database("apitest_simpledata");
226 TEST_EQUAL(db
.get_value_freq(1), 6);
227 TEST_EQUAL(db
.get_value_lower_bound(1), "h");
228 TEST_EQUAL(db
.get_value_upper_bound(1), "n");
229 TEST_EQUAL(db
.get_value_freq(2), 6);
230 TEST_EQUAL(db
.get_value_lower_bound(2), "d");
231 TEST_EQUAL(db
.get_value_upper_bound(2), "i");
232 TEST_EQUAL(db
.get_value_freq(3), 6);
233 TEST_EQUAL(db
.get_value_lower_bound(3), " ");
234 TEST_EQUAL(db
.get_value_upper_bound(3), "s");
235 TEST_EQUAL(db
.get_value_freq(4), 6);
236 TEST_EQUAL(db
.get_value_lower_bound(4), " ");
237 TEST_EQUAL(db
.get_value_upper_bound(4), "y");
238 TEST_EQUAL(db
.get_value_freq(5), 6);
239 TEST_EQUAL(db
.get_value_lower_bound(5), "e");
240 TEST_EQUAL(db
.get_value_upper_bound(5), "p");
241 TEST_EQUAL(db
.get_value_freq(6), 6);
242 TEST_EQUAL(db
.get_value_lower_bound(6), "a");
243 TEST_EQUAL(db
.get_value_upper_bound(6), "t");
244 TEST_EQUAL(db
.get_value_freq(7), 6);
245 TEST_EQUAL(db
.get_value_lower_bound(7), " ");
246 TEST_EQUAL(db
.get_value_upper_bound(7), "r");
247 TEST_EQUAL(db
.get_value_freq(8), 6);
248 TEST_EQUAL(db
.get_value_lower_bound(8), "a");
249 TEST_EQUAL(db
.get_value_upper_bound(8), "t");
250 TEST_EQUAL(db
.get_value_freq(9), 6);
251 TEST_EQUAL(db
.get_value_lower_bound(9), " ");
252 TEST_EQUAL(db
.get_value_upper_bound(9), "n");
253 TEST_EQUAL(db
.get_value_freq(10), 6);
254 TEST_EQUAL(db
.get_value_lower_bound(10), "e");
255 TEST_EQUAL(db
.get_value_upper_bound(10), "w");
256 TEST_EQUAL(db
.get_value_freq(11), 6);
257 TEST_EQUAL(db
.get_value_lower_bound(11), "\xb9P");
258 TEST_EQUAL(db
.get_value_upper_bound(11), "\xc7\x04");
263 DEFINE_TESTCASE(valuestats4
, transactions
&& valuestats
) {
264 const size_t FLUSH_THRESHOLD
= 10000;
266 Xapian::WritableDatabase db_w
= get_writable_database();
267 Xapian::Document doc
;
268 doc
.add_value(1, "test");
269 for (size_t i
= 0; i
< FLUSH_THRESHOLD
; ++i
) {
270 db_w
.add_document(doc
);
273 Xapian::Database db
= get_writable_database_as_database();
274 // Check that we had an automatic-commit.
275 TEST_EQUAL(db
.get_doccount(), FLUSH_THRESHOLD
);
276 // Check that the value stats are there.
277 TEST_EQUAL(db
.get_value_freq(1), FLUSH_THRESHOLD
);
278 TEST_EQUAL(db
.get_value_lower_bound(1), "test");
279 TEST_EQUAL(db
.get_value_upper_bound(1), "test");
281 db_w
.begin_transaction();
282 doc
.add_value(1, "umbrella");
283 db_w
.cancel_transaction();
287 Xapian::Database db
= get_writable_database_as_database();
288 // Check that we had an automatic-commit.
289 TEST_EQUAL(db
.get_doccount(), FLUSH_THRESHOLD
);
290 // Check that the value stats are there.
291 TEST_EQUAL(db
.get_value_freq(1), FLUSH_THRESHOLD
);
292 TEST_EQUAL(db
.get_value_lower_bound(1), "test");
293 TEST_EQUAL(db
.get_value_upper_bound(1), "test");
299 /// Regression test for bug fixed in 1.1.1 which led to incorrect valuestats.
300 DEFINE_TESTCASE(valuestats5
, !backend
) {
301 Xapian::Document doc
;
302 doc
.add_value(0, "zero");
303 doc
.add_value(1, "one");
304 doc
.add_value(2, "two");
305 doc
.add_value(3, "three");
306 doc
.add_value(4, "");
307 doc
.add_value(5, "five");
309 doc
.add_value(1, "");
311 // Check that we don't have any empty values reported.
313 Xapian::ValueIterator v
= doc
.values_begin();
314 while (v
!= doc
.values_end()) {
319 TEST_EQUAL(c
, 3); // 0, 2, 5