4 #include "honey_postlist.h"
6 #include "honey_cursor.h"
7 #include "honey_database.h"
8 #include "honey_positionlist.h"
9 #include "honey_postlist_encodings.h"
10 #include "wordaccess.h"
14 using namespace Honey
;
18 HoneyPostList::update_reader()
20 Xapian::docid first_did
= docid_from_key(term
, cursor
->current_key
);
21 if (!first_did
) return false;
24 const string
& tag
= cursor
->current_tag
;
25 reader
.assign(tag
.data(), tag
.size(), first_did
);
29 HoneyPostList::HoneyPostList(const HoneyDatabase
* db_
,
32 : LeafPostList(term_
), cursor(cursor_
), db(db_
)
35 // Term not present in db.
41 const string
& chunk
= cursor
->current_tag
;
43 const char* p
= chunk
.data();
44 const char* pend
= p
+ chunk
.size();
45 // FIXME: Make use of [first,last] ranges to calculate better estimates and
46 // potentially to spot subqueries that can't match anything.
48 Xapian::docid first_did
;
49 if (!decode_initial_chunk_header(&p
, pend
, termfreq
, cf
,
51 throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
52 reader
.assign(p
, pend
- p
, first_did
);
55 HoneyPostList::~HoneyPostList()
62 HoneyPostList::get_termfreq() const
68 HoneyPostList::open_nearby_postlist(const string
& term_
) const
70 Assert(!term_
.empty());
71 return NULL
; // FIXME: reenable once fixed
72 // FIXME: Once Honey supports writing, we need to return NULL here if the DB is writable.
73 //return new HoneyPostList(db, term_, new HoneyCursor(*cursor));
77 HoneyPostList::get_docid() const
79 return reader
.get_docid();
83 HoneyPostList::get_wdf() const
85 return reader
.get_wdf();
89 HoneyPostList::at_end() const
91 return cursor
== NULL
;
95 HoneyPostList::read_position_list()
97 if (rare(position_list
== NULL
))
98 position_list
= new HoneyPositionList();
99 if (!position_list
->read_data(db
->position_table
, get_docid(), term
))
101 return position_list
;
105 HoneyPostList::open_position_list() const
107 return new HoneyPositionList(db
->position_table
, get_docid(), term
);
111 HoneyPostList::next(double)
114 // This happens for terms not present in db.
115 AssertEq(termfreq
, 0);
119 if (!reader
.at_end()) {
121 if (!reader
.at_end()) return NULL
;
125 if (!cursor
->after_end()) {
126 if (update_reader()) {
127 if (!reader
.at_end()) return NULL
;
131 // We've reached the end.
138 HoneyPostList::skip_to(Xapian::docid did
, double)
141 // No-op if already at_end.
145 if (!reader
.at_end()) {
147 if (!reader
.at_end()) return NULL
;
150 if (did
> last_did
) goto set_at_end
;
152 if (!cursor
->find_entry(make_postingchunk_key(term
, did
))) {
153 if (update_reader()) {
155 if (!reader
.at_end()) return NULL
;
157 // The requested docid is between two chunks.
161 // Either an exact match, or in a gap before the start of a chunk.
162 if (!cursor
->after_end()) {
163 if (update_reader()) {
164 if (!reader
.at_end()) return NULL
;
169 // We've reached the end.
176 HoneyPostList::check(Xapian::docid did
, double, bool& valid
)
184 if (!reader
.at_end()) {
185 // Check for the requested docid in the current block.
187 if (!reader
.at_end()) {
193 if (did
> last_did
) goto set_at_end
;
195 // Try moving to the appropriate chunk.
196 if (!cursor
->find_entry(make_postingchunk_key(term
, did
))) {
197 // We're in a chunk which might contain the docid.
198 if (update_reader()) {
200 if (!reader
.at_end()) {
207 // We've reached the end.
214 // We had an exact match for a chunk starting with specified docid.
215 Assert(!cursor
->after_end());
216 if (!update_reader()) {
217 // We found the exact key we built so it must be a posting chunk.
218 // Therefore update_reader() "can't possibly fail".
227 HoneyPostList::get_description() const
229 string desc
= "HoneyPostList(";
238 PostingChunkReader::assign(const char * p_
, size_t len
, Xapian::docid did_
)
240 Xapian::docid last_did_in_chunk
;
241 const char* pend
= p_
+ len
;
242 if (!decode_delta_chunk_header(&p_
, pend
, did_
, last_did_in_chunk
)) {
243 throw Xapian::DatabaseCorruptError("Postlist delta chunk header");
245 if ((pend
- p_
) % 8 != 4)
246 throw Xapian::DatabaseCorruptError("Doclen data length not 4 more than a multiple of 8");
247 if (rare(p_
== pend
)) {
251 p
= reinterpret_cast<const unsigned char*>(p_
);
252 end
= reinterpret_cast<const unsigned char*>(pend
);
254 last_did
= last_did_in_chunk
;
258 PostingChunkReader::next()
260 if ((end
- p
) % 8 != 0) {
261 // FIXME: Alignment guarantees? Hard with header.
262 wdf
= unaligned_read4(p
);
272 // FIXME: Alignment guarantees? Hard with header.
273 did
+= unaligned_read4(p
) + 1;
274 wdf
= unaligned_read4(p
+ 4);
279 PostingChunkReader::skip_to(Xapian::docid target
)
281 if (p
== NULL
|| target
<= did
)
284 if ((end
- p
) % 8 != 0) {
286 // FIXME: Alignment guarantees? Hard with header.
287 wdf
= unaligned_read4(p
);
294 if (target
> last_did
) {
299 // FIXME: Special case target == last_did to just decode the wdf from the
303 if (rare(p
== end
)) {
304 // FIXME: Shouldn't happen unless last_did was wrong.
309 // FIXME: Alignment guarantees? Hard with header.
310 did
+= unaligned_read4(p
) + 1;
312 } while (target
> did
);
313 wdf
= unaligned_read4(p
- 4);