posting list iterating now works
[xapian.git] / xapian-core / backends / honey / honey_postlist.cc
blob949cd08e2f2d4f1b5cb45b492739d5042d2742f4
2 #include <config.h>
4 #include "honey_postlist.h"
6 #include "honey_cursor.h"
7 #include "honey_database.h"
8 #include "honey_positionlist.h"
9 #include "honey_postlist_encodings.h"
10 #include "wordaccess.h"
12 #include <string>
14 using namespace Honey;
15 using namespace std;
17 bool
18 HoneyPostList::update_reader()
20 Xapian::docid first_did = docid_from_key(term, cursor->current_key);
21 if (!first_did) return false;
23 cursor->read_tag();
24 const string& tag = cursor->current_tag;
25 reader.assign(tag.data(), tag.size(), first_did);
26 return true;
29 HoneyPostList::HoneyPostList(const HoneyDatabase* db_,
30 const string& term_,
31 HoneyCursor* cursor_)
32 : LeafPostList(term_), cursor(cursor_), db(db_)
34 cursor->read_tag();
35 const string& chunk = cursor->current_tag;
37 const char* p = chunk.data();
38 const char* pend = p + chunk.size();
39 // FIXME: Make use of [first,last] ranges to calculate better estimates and
40 // potentially to spot subqueries that can't match anything.
41 Xapian::termcount cf;
42 Xapian::docid first_did;
43 if (!decode_initial_chunk_header(&p, pend, termfreq, cf,
44 first_did, last_did))
45 throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
46 reader.assign(p, pend - p, first_did);
49 HoneyPostList::~HoneyPostList()
51 delete cursor;
52 delete position_list;
55 Xapian::doccount
56 HoneyPostList::get_termfreq() const
58 return termfreq;
61 LeafPostList*
62 HoneyPostList::open_nearby_postlist(const string& term_) const
64 Assert(!term_.empty());
65 // FIXME: Once Honey supports writing, we need to return NULL here if the DB is writable.
66 return new HoneyPostList(db, term_, new HoneyCursor(*cursor));
69 Xapian::docid
70 HoneyPostList::get_docid() const
72 return reader.get_docid();
75 Xapian::termcount
76 HoneyPostList::get_wdf() const
78 return reader.get_wdf();
81 bool
82 HoneyPostList::at_end() const
84 return cursor == NULL;
87 PositionList*
88 HoneyPostList::read_position_list()
90 if (rare(position_list == NULL))
91 position_list = new HoneyPositionList();
92 if (!position_list->read_data(db->position_table, get_docid(), term))
93 return NULL;
94 return position_list;
97 PositionList*
98 HoneyPostList::open_position_list() const
100 return new HoneyPositionList(db->position_table, get_docid(), term);
103 PostList*
104 HoneyPostList::next(double)
106 Assert(cursor);
107 if (!reader.at_end()) {
108 reader.next();
109 if (!reader.at_end()) return NULL;
110 cursor->next();
113 if (!cursor->after_end()) {
114 if (update_reader()) {
115 if (!reader.at_end()) return NULL;
119 // We've reached the end.
120 delete cursor;
121 cursor = NULL;
122 return NULL;
125 PostList*
126 HoneyPostList::skip_to(Xapian::docid did, double)
128 if (rare(!cursor)) {
129 // No-op if already at_end.
130 return NULL;
133 if (!reader.at_end()) {
134 reader.skip_to(did);
135 if (!reader.at_end()) return NULL;
138 if (did > last_did) goto set_at_end;
140 if (!cursor->find_entry(make_postingchunk_key(term, did))) {
141 if (update_reader()) {
142 reader.skip_to(did);
143 if (!reader.at_end()) return NULL;
145 // The requested docid is between two chunks.
146 cursor->next();
149 // Either an exact match, or in a gap before the start of a chunk.
150 if (!cursor->after_end()) {
151 if (update_reader()) {
152 if (!reader.at_end()) return NULL;
156 set_at_end:
157 // We've reached the end.
158 delete cursor;
159 cursor = NULL;
160 return NULL;
163 PostList*
164 HoneyPostList::check(Xapian::docid did, double, bool& valid)
166 if (rare(!cursor)) {
167 // Already at_end.
168 valid = true;
169 return NULL;
172 if (!reader.at_end()) {
173 // Check for the requested docid in the current block.
174 reader.skip_to(did);
175 if (!reader.at_end()) {
176 valid = true;
177 return NULL;
181 if (did > last_did) goto set_at_end;
183 // Try moving to the appropriate chunk.
184 if (!cursor->find_entry(make_postingchunk_key(term, did))) {
185 // We're in a chunk which might contain the docid.
186 if (update_reader()) {
187 reader.skip_to(did);
188 if (!reader.at_end()) {
189 valid = true;
190 return NULL;
194 set_at_end:
195 // We've reached the end.
196 delete cursor;
197 cursor = NULL;
198 valid = true;
199 return NULL;
202 // We had an exact match for a chunk starting with specified docid.
203 Assert(!cursor->after_end());
204 if (!update_reader()) {
205 // We found the exact key we built so it must be a posting chunk.
206 // Therefore update_reader() "can't possibly fail".
207 Assert(false);
210 valid = true;
211 return NULL;
214 string
215 HoneyPostList::get_description() const
217 string desc = "HoneyPostList(";
218 desc += term;
219 desc += ')';
220 return desc;
223 namespace Honey {
225 void
226 PostingChunkReader::assign(const char * p_, size_t len, Xapian::docid did_)
228 Xapian::docid last_did_in_chunk;
229 const char* pend = p_ + len;
230 if (!decode_delta_chunk_header(&p_, pend, did_, last_did_in_chunk)) {
231 throw Xapian::DatabaseCorruptError("Postlist delta chunk header");
233 if ((pend - p_) % 8 != 4)
234 throw Xapian::DatabaseCorruptError("Doclen data length not 4 more than a multiple of 8");
235 if (rare(p_ == pend)) {
236 p = NULL;
237 return;
239 p = reinterpret_cast<const unsigned char*>(p_);
240 end = reinterpret_cast<const unsigned char*>(pend);
241 did = did_;
242 last_did = last_did_in_chunk;
245 void
246 PostingChunkReader::next()
248 if ((end - p) % 8 != 0) {
249 // FIXME: Alignment guarantees? Hard with header.
250 wdf = unaligned_read4(p);
251 p += 4;
252 return;
255 if (p == end) {
256 p = NULL;
257 return;
260 // FIXME: Alignment guarantees? Hard with header.
261 did += unaligned_read4(p) + 1;
262 wdf = unaligned_read4(p + 4);
263 p += 8;
266 void
267 PostingChunkReader::skip_to(Xapian::docid target)
269 if (p == NULL || target <= did)
270 return;
272 if ((end - p) % 8 != 0) {
273 if (target == did) {
274 // FIXME: Alignment guarantees? Hard with header.
275 wdf = unaligned_read4(p);
276 p += 4;
277 return;
279 p += 4;
282 if (target > last_did) {
283 p = NULL;
284 return;
287 // FIXME: Special case target == last_did to just decode the wdf from the
288 // end?
290 do {
291 if (rare(p == end)) {
292 // FIXME: Shouldn't happen unless last_did was wrong.
293 p = NULL;
294 return;
297 // FIXME: Alignment guarantees? Hard with header.
298 did += unaligned_read4(p) + 1;
299 p += 8;
300 } while (target > did);
301 wdf = unaligned_read4(p - 4);