1 /** @file honey_alldocspostlist.cc
2 * @brief A PostList which iterates over all documents in a HoneyDatabase.
4 /* Copyright (C) 2006,2007,2008,2009,2018 Olly Betts
5 * Copyright (C) 2008 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "honey_alldocspostlist.h"
25 #include "honey_database.h"
29 #include "wordaccess.h"
33 using namespace Honey
;
36 HoneyAllDocsPostList::HoneyAllDocsPostList(const HoneyDatabase
* db
,
37 Xapian::doccount doccount_
)
38 : LeafPostList(string()),
39 cursor(db
->get_postlist_cursor()),
42 LOGCALL_CTOR(DB
, "HoneyAllDocsPostList", db
| doccount_
);
43 cursor
->find_entry_ge(string("\0\xe0", 2));
46 HoneyAllDocsPostList::~HoneyAllDocsPostList()
52 HoneyAllDocsPostList::get_termfreq() const
54 LOGCALL(DB
, Xapian::doccount
, "HoneyAllDocsPostList::get_termfreq", NO_ARGS
);
59 HoneyAllDocsPostList::get_doclength() const
61 LOGCALL(DB
, Xapian::termcount
, "HoneyAllDocsPostList::get_doclength", NO_ARGS
);
62 RETURN(reader
.get_doclength());
66 HoneyAllDocsPostList::get_docid() const
68 return reader
.get_docid();
72 HoneyAllDocsPostList::get_wdf() const
74 LOGCALL(DB
, Xapian::termcount
, "HoneyAllDocsPostList::get_wdf", NO_ARGS
);
75 AssertParanoid(!at_end());
80 HoneyAllDocsPostList::at_end() const
82 return cursor
== NULL
;
86 HoneyAllDocsPostList::next(double)
89 if (!reader
.at_end()) {
90 if (reader
.next()) return NULL
;
94 if (!cursor
->after_end()) {
95 if (reader
.update(cursor
)) {
96 if (!reader
.at_end()) return NULL
;
100 // We've reached the end.
107 HoneyAllDocsPostList::skip_to(Xapian::docid did
, double)
110 // No-op if already at_end.
114 Assert(!reader
.at_end());
116 if (reader
.skip_to(did
))
119 if (cursor
->find_entry_ge(make_doclenchunk_key(did
))) {
121 if (rare(!reader
.update(cursor
))) {
122 // Shouldn't be possible.
125 if (reader
.skip_to(did
)) return NULL
;
126 // The chunk's last docid is did, so skip_to() should always succeed.
128 } else if (!cursor
->after_end()) {
129 if (reader
.update(cursor
)) {
130 if (reader
.skip_to(did
)) return NULL
;
131 // The chunk's last docid is >= did, so skip_to() should always
137 // We've reached the end.
144 HoneyAllDocsPostList::check(Xapian::docid did
, double, bool& valid
)
152 if (!reader
.at_end()) {
153 // Check for the requested docid in the current block.
154 if (reader
.skip_to(did
)) {
160 // Try moving to the appropriate chunk.
161 if (!cursor
->find_entry_ge(make_doclenchunk_key(did
))) {
162 // We're in a chunk which might contain the docid.
163 if (reader
.update(cursor
)) {
164 if (reader
.skip_to(did
)) {
173 // We had an exact match for a chunk starting with specified docid.
174 Assert(!cursor
->after_end());
175 if (!reader
.update(cursor
)) {
176 // We found the exact key we built so it must be a doclen chunk.
177 // Therefore reader.update() "can't possibly fail".
186 HoneyAllDocsPostList::get_description() const
188 string desc
= "HoneyAllDocsPostList(did=";
189 desc
+= str(get_docid());
190 desc
+= ",doccount=";
191 desc
+= str(doccount
);
199 DocLenChunkReader::read_doclen(const unsigned char* q
)
204 return doclen
!= 0xff;
206 doclen
= unaligned_read2(q
);
207 return doclen
!= 0xffff;
209 // q - 1 is always a valid byte - either the leading byte holding
210 // the data width, or else the last byte of the previous value.
211 // unaligned_read4() uses bigendian order, so we just need to mask
212 // off the most significant byte.
213 doclen
= unaligned_read4(q
- 1) & 0xffffff;
214 return doclen
!= 0xffffff;
216 doclen
= unaligned_read4(q
);
217 return doclen
!= 0xffffffff;
222 DocLenChunkReader::update(HoneyCursor
* cursor
)
224 Xapian::docid last_did
= docid_from_key(cursor
->current_key
);
225 if (!last_did
) return false;
229 size_t len
= cursor
->current_tag
.size();
231 throw Xapian::DatabaseCorruptError("Doclen data chunk is empty");
233 p
= reinterpret_cast<const unsigned char*>(cursor
->current_tag
.data());
236 if (((width
- 8) &~ 0x18) != 0) {
237 throw Xapian::DatabaseCorruptError("Invalid doclen width - currently "
238 "8, 16, 24 and 32 are supported");
241 if ((len
- 1) % width
!= 0)
242 throw Xapian::DatabaseCorruptError("Doclen data chunk has junk at end");
243 Xapian::docid first_did
= last_did
- (len
- 1) / width
+ 1;
246 if (!read_doclen(p
)) {
247 // The first doclen value shouldn't be missing.
248 throw Xapian::DatabaseCorruptError("Invalid first doclen value");
254 DocLenChunkReader::next()
264 } while (!read_doclen(p
));
269 DocLenChunkReader::skip_to(Xapian::docid target
)
277 Xapian::docid delta
= target
- did
;
278 if (delta
>= Xapian::docid(end
- p
) / width
) {
286 return read_doclen(p
) || next();
289 // FIXME: Add check() method, which doesn't advance when read_doclen() returns
293 DocLenChunkReader::find_doclength(Xapian::docid target
)
298 Xapian::docid delta
= target
- did
;
300 if (delta
>= Xapian::docid(end
- p
) / width
) {
304 return read_doclen(p
+ delta
* width
);