1 /** @file honey_alldocspostlist.cc
2 * @brief A PostList which iterates over all documents in a HoneyDatabase.
4 /* Copyright (C) 2006,2007,2008,2009,2018 Olly Betts
5 * Copyright (C) 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "honey_alldocspostlist.h"
25 #include "honey_database.h"
26 #include "honey_defs.h"
30 #include "wordaccess.h"
34 using namespace Honey
;
37 HoneyAllDocsPostList::HoneyAllDocsPostList(const HoneyDatabase
* db
,
38 Xapian::doccount doccount_
)
39 : LeafPostList(string()),
40 cursor(db
->get_postlist_cursor()),
43 LOGCALL_CTOR(DB
, "HoneyAllDocsPostList", db
| doccount_
);
44 static const char doclen_key_prefix
[2] = {
45 0, char(Honey::KEY_DOCLEN_CHUNK
)
47 cursor
->find_entry_ge(string(doclen_key_prefix
, 2));
50 HoneyAllDocsPostList::~HoneyAllDocsPostList()
56 HoneyAllDocsPostList::get_termfreq() const
58 LOGCALL(DB
, Xapian::doccount
, "HoneyAllDocsPostList::get_termfreq", NO_ARGS
);
63 HoneyAllDocsPostList::get_doclength() const
65 LOGCALL(DB
, Xapian::termcount
, "HoneyAllDocsPostList::get_doclength", NO_ARGS
);
66 RETURN(reader
.get_doclength());
70 HoneyAllDocsPostList::get_docid() const
72 return reader
.get_docid();
76 HoneyAllDocsPostList::get_wdf() const
78 LOGCALL(DB
, Xapian::termcount
, "HoneyAllDocsPostList::get_wdf", NO_ARGS
);
79 AssertParanoid(!at_end());
84 HoneyAllDocsPostList::at_end() const
86 return cursor
== NULL
;
90 HoneyAllDocsPostList::next(double)
93 if (!reader
.at_end()) {
94 if (reader
.next()) return NULL
;
98 if (!cursor
->after_end()) {
99 if (reader
.update(cursor
)) {
100 if (!reader
.at_end()) return NULL
;
104 // We've reached the end.
111 HoneyAllDocsPostList::skip_to(Xapian::docid did
, double)
114 // No-op if already at_end.
118 Assert(!reader
.at_end());
120 if (reader
.skip_to(did
))
123 if (cursor
->find_entry_ge(make_doclenchunk_key(did
))) {
125 if (rare(!reader
.update(cursor
))) {
126 // Shouldn't be possible.
129 if (reader
.skip_to(did
)) return NULL
;
130 // The chunk's last docid is did, so skip_to() should always succeed.
132 } else if (!cursor
->after_end()) {
133 if (reader
.update(cursor
)) {
134 if (reader
.skip_to(did
)) return NULL
;
135 // The chunk's last docid is >= did, so skip_to() should always
141 // We've reached the end.
148 HoneyAllDocsPostList::check(Xapian::docid did
, double, bool& valid
)
156 if (!reader
.at_end()) {
157 // Check for the requested docid in the current block.
158 if (reader
.skip_to(did
)) {
164 // Try moving to the appropriate chunk.
165 if (!cursor
->find_entry_ge(make_doclenchunk_key(did
))) {
166 // We're in a chunk which might contain the docid.
167 if (reader
.update(cursor
)) {
168 if (reader
.skip_to(did
)) {
177 // We had an exact match for a chunk starting with specified docid.
178 Assert(!cursor
->after_end());
179 if (!reader
.update(cursor
)) {
180 // We found the exact key we built so it must be a doclen chunk.
181 // Therefore reader.update() "can't possibly fail".
190 HoneyAllDocsPostList::get_description() const
192 string desc
= "HoneyAllDocsPostList(did=";
193 desc
+= str(get_docid());
194 desc
+= ",doccount=";
195 desc
+= str(doccount
);
203 DocLenChunkReader::read_doclen(const unsigned char* q
)
208 return doclen
!= 0xff;
210 doclen
= unaligned_read2(q
);
211 return doclen
!= 0xffff;
213 // q - 1 is always a valid byte - either the leading byte holding
214 // the data width, or else the last byte of the previous value.
215 // unaligned_read4() uses bigendian order, so we just need to mask
216 // off the most significant byte.
217 doclen
= unaligned_read4(q
- 1) & 0xffffff;
218 return doclen
!= 0xffffff;
220 doclen
= unaligned_read4(q
);
221 return doclen
!= 0xffffffff;
226 DocLenChunkReader::update(HoneyCursor
* cursor
)
228 Xapian::docid last_did
= docid_from_key(cursor
->current_key
);
229 if (!last_did
) return false;
233 size_t len
= cursor
->current_tag
.size();
235 throw Xapian::DatabaseCorruptError("Doclen data chunk is empty");
237 p
= reinterpret_cast<const unsigned char*>(cursor
->current_tag
.data());
240 if (((width
- 8) &~ 0x18) != 0) {
241 throw Xapian::DatabaseCorruptError("Invalid doclen width - currently "
242 "8, 16, 24 and 32 are supported");
245 if ((len
- 1) % width
!= 0)
246 throw Xapian::DatabaseCorruptError("Doclen data chunk has junk at end");
247 Xapian::docid first_did
= last_did
- (len
- 1) / width
+ 1;
250 if (!read_doclen(p
)) {
251 // The first doclen value shouldn't be missing.
252 throw Xapian::DatabaseCorruptError("Invalid first doclen value");
258 DocLenChunkReader::next()
268 } while (!read_doclen(p
));
273 DocLenChunkReader::skip_to(Xapian::docid target
)
281 Xapian::docid delta
= target
- did
;
282 if (delta
>= Xapian::docid(end
- p
) / width
) {
290 return read_doclen(p
) || next();
293 // FIXME: Add check() method, which doesn't advance when read_doclen() returns
297 DocLenChunkReader::find_doclength(Xapian::docid target
)
302 Xapian::docid delta
= target
- did
;
304 if (delta
>= Xapian::docid(end
- p
) / width
) {
308 return read_doclen(p
+ delta
* width
);