[honey] New format for more keys in postlist table
[xapian.git] / xapian-core / backends / honey / honey_alldocspostlist.cc
blob4b4d0d23b3e2f7cd1361da54a83b9078c65d05b1
1 /** @file honey_alldocspostlist.cc
2 * @brief A PostList which iterates over all documents in a HoneyDatabase.
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2018 Olly Betts
5 * Copyright (C) 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
23 #include "honey_alldocspostlist.h"
25 #include "honey_database.h"
26 #include "honey_defs.h"
28 #include "debuglog.h"
29 #include "str.h"
30 #include "wordaccess.h"
32 #include <string>
34 using namespace Honey;
35 using namespace std;
37 HoneyAllDocsPostList::HoneyAllDocsPostList(const HoneyDatabase* db,
38 Xapian::doccount doccount_)
39 : LeafPostList(string()),
40 cursor(db->get_postlist_cursor()),
41 doccount(doccount_)
43 LOGCALL_CTOR(DB, "HoneyAllDocsPostList", db | doccount_);
44 static const char doclen_key_prefix[2] = {
45 0, char(Honey::KEY_DOCLEN_CHUNK)
47 cursor->find_entry_ge(string(doclen_key_prefix, 2));
50 HoneyAllDocsPostList::~HoneyAllDocsPostList()
52 delete cursor;
55 Xapian::doccount
56 HoneyAllDocsPostList::get_termfreq() const
58 LOGCALL(DB, Xapian::doccount, "HoneyAllDocsPostList::get_termfreq", NO_ARGS);
59 RETURN(doccount);
62 Xapian::termcount
63 HoneyAllDocsPostList::get_doclength() const
65 LOGCALL(DB, Xapian::termcount, "HoneyAllDocsPostList::get_doclength", NO_ARGS);
66 RETURN(reader.get_doclength());
69 Xapian::docid
70 HoneyAllDocsPostList::get_docid() const
72 return reader.get_docid();
75 Xapian::termcount
76 HoneyAllDocsPostList::get_wdf() const
78 LOGCALL(DB, Xapian::termcount, "HoneyAllDocsPostList::get_wdf", NO_ARGS);
79 AssertParanoid(!at_end());
80 RETURN(1);
83 bool
84 HoneyAllDocsPostList::at_end() const
86 return cursor == NULL;
89 PostList*
90 HoneyAllDocsPostList::next(double)
92 Assert(cursor);
93 if (!reader.at_end()) {
94 if (reader.next()) return NULL;
95 cursor->next();
98 if (!cursor->after_end()) {
99 if (reader.update(cursor)) {
100 if (!reader.at_end()) return NULL;
104 // We've reached the end.
105 delete cursor;
106 cursor = NULL;
107 return NULL;
110 PostList*
111 HoneyAllDocsPostList::skip_to(Xapian::docid did, double)
113 if (rare(!cursor)) {
114 // No-op if already at_end.
115 return NULL;
118 Assert(!reader.at_end());
120 if (reader.skip_to(did))
121 return NULL;
123 if (cursor->find_entry_ge(make_doclenchunk_key(did))) {
124 // Exact match.
125 if (rare(!reader.update(cursor))) {
126 // Shouldn't be possible.
127 Assert(false);
129 if (reader.skip_to(did)) return NULL;
130 // The chunk's last docid is did, so skip_to() should always succeed.
131 Assert(false);
132 } else if (!cursor->after_end()) {
133 if (reader.update(cursor)) {
134 if (reader.skip_to(did)) return NULL;
135 // The chunk's last docid is >= did, so skip_to() should always
136 // succeed.
137 Assert(false);
141 // We've reached the end.
142 delete cursor;
143 cursor = NULL;
144 return NULL;
147 PostList*
148 HoneyAllDocsPostList::check(Xapian::docid did, double, bool& valid)
150 if (rare(!cursor)) {
151 // Already at_end.
152 valid = true;
153 return NULL;
156 if (!reader.at_end()) {
157 // Check for the requested docid in the current block.
158 if (reader.skip_to(did)) {
159 valid = true;
160 return NULL;
164 // Try moving to the appropriate chunk.
165 if (!cursor->find_entry_ge(make_doclenchunk_key(did))) {
166 // We're in a chunk which might contain the docid.
167 if (reader.update(cursor)) {
168 if (reader.skip_to(did)) {
169 valid = true;
170 return NULL;
173 valid = false;
174 return NULL;
177 // We had an exact match for a chunk starting with specified docid.
178 Assert(!cursor->after_end());
179 if (!reader.update(cursor)) {
180 // We found the exact key we built so it must be a doclen chunk.
181 // Therefore reader.update() "can't possibly fail".
182 Assert(false);
185 valid = true;
186 return NULL;
189 string
190 HoneyAllDocsPostList::get_description() const
192 string desc = "HoneyAllDocsPostList(did=";
193 desc += str(get_docid());
194 desc += ",doccount=";
195 desc += str(doccount);
196 desc += ')';
197 return desc;
200 namespace Honey {
202 bool
203 DocLenChunkReader::read_doclen(const unsigned char* q)
205 switch (width) {
206 case 1:
207 doclen = *q;
208 return doclen != 0xff;
209 case 2:
210 doclen = unaligned_read2(q);
211 return doclen != 0xffff;
212 case 3:
213 // q - 1 is always a valid byte - either the leading byte holding
214 // the data width, or else the last byte of the previous value.
215 // unaligned_read4() uses bigendian order, so we just need to mask
216 // off the most significant byte.
217 doclen = unaligned_read4(q - 1) & 0xffffff;
218 return doclen != 0xffffff;
219 default:
220 doclen = unaligned_read4(q);
221 return doclen != 0xffffffff;
225 bool
226 DocLenChunkReader::update(HoneyCursor* cursor)
228 Xapian::docid last_did = docid_from_key(cursor->current_key);
229 if (!last_did) return false;
231 cursor->read_tag();
233 size_t len = cursor->current_tag.size();
234 if (rare(len == 0))
235 throw Xapian::DatabaseCorruptError("Doclen data chunk is empty");
237 p = reinterpret_cast<const unsigned char*>(cursor->current_tag.data());
238 end = p + len;
239 width = *p++;
240 if (((width - 8) &~ 0x18) != 0) {
241 throw Xapian::DatabaseCorruptError("Invalid doclen width - currently "
242 "8, 16, 24 and 32 are supported");
244 width /= 8;
245 if ((len - 1) % width != 0)
246 throw Xapian::DatabaseCorruptError("Doclen data chunk has junk at end");
247 Xapian::docid first_did = last_did - (len - 1) / width + 1;
249 did = first_did;
250 if (!read_doclen(p)) {
251 // The first doclen value shouldn't be missing.
252 throw Xapian::DatabaseCorruptError("Invalid first doclen value");
254 return true;
257 bool
258 DocLenChunkReader::next()
260 do {
261 p += width;
262 if (p == end) {
263 p = NULL;
264 return false;
267 ++did;
268 } while (!read_doclen(p));
269 return true;
272 bool
273 DocLenChunkReader::skip_to(Xapian::docid target)
275 if (p == NULL)
276 return false;
278 if (target <= did)
279 return true;
281 Xapian::docid delta = target - did;
282 if (delta >= Xapian::docid(end - p) / width) {
283 p = NULL;
284 return false;
287 did = target;
288 p += delta * width;
290 return read_doclen(p) || next();
293 // FIXME: Add check() method, which doesn't advance when read_doclen() returns
294 // false?
296 bool
297 DocLenChunkReader::find_doclength(Xapian::docid target)
299 if (target < did)
300 return false;
302 Xapian::docid delta = target - did;
303 Assert(width > 0);
304 if (delta >= Xapian::docid(end - p) / width) {
305 return false;
308 return read_doclen(p + delta * width);