1 /** @file honey_postlisttable.cc
2 * @brief Subclass of HoneyTable which holds postlists.
4 /* Copyright (C) 2007,2008,2009,2010,2013,2014,2015,2016,2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "honey_postlisttable.h"
25 #include "honey_alldocspostlist.h"
26 #include "honey_cursor.h"
27 #include "honey_database.h"
28 #include "honey_defs.h"
29 #include "honey_postlist.h"
30 #include "honey_postlist_encodings.h"
34 using namespace Honey
;
38 HoneyPostListTable::open_post_list(const HoneyDatabase
* db
,
39 const std::string
& term
,
42 Assert(!term
.empty());
43 // Try to position cursor first so we avoid creating HoneyPostList objects
44 // for terms which don't exist.
45 unique_ptr
<HoneyCursor
> cursor(cursor_get());
46 if (!cursor
->find_exact(Honey::make_postingchunk_key(term
))) {
47 // FIXME: Return NULL here and handle that in Query::Internal
48 // postlist() methods as we build the PostList tree.
50 return new HoneyPostList(db
, term
, NULL
);
54 return new HoneyPosPostList(db
, term
, cursor
.release());
55 return new HoneyPostList(db
, term
, cursor
.release());
59 HoneyPostListTable::get_freqs(const std::string
& term
,
60 Xapian::doccount
* termfreq_ptr
,
61 Xapian::termcount
* collfreq_ptr
) const
64 if (!get_exact_entry(Honey::make_postingchunk_key(term
), chunk
)) {
65 if (termfreq_ptr
) *termfreq_ptr
= 0;
66 if (collfreq_ptr
) *collfreq_ptr
= 0;
70 const char* p
= chunk
.data();
71 const char* pend
= p
+ chunk
.size();
74 if (!decode_initial_chunk_header_freqs(&p
, pend
, tf
, cf
))
75 throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
76 if (termfreq_ptr
) *termfreq_ptr
= tf
;
77 if (collfreq_ptr
) *collfreq_ptr
= cf
;
81 HoneyPostListTable::get_used_docid_range(Xapian::doccount doccount
,
83 Xapian::docid
& last
) const
85 unique_ptr
<HoneyCursor
> cursor
;
86 static const char doclen_key_prefix
[2] = {
87 0, char(Honey::KEY_DOCLEN_CHUNK
)
89 if (cursor
->find_entry_ge(string(doclen_key_prefix
, 2))) {
92 // doccount == 0 should be handled by our caller.
93 Assert(!cursor
->after_end());
94 Xapian::docid last_in_first_chunk
= docid_from_key(cursor
->current_key
);
95 if (last_in_first_chunk
== 0) {
96 // Note that our caller checks for doccount == 0 and handles that.
97 throw Xapian::DatabaseCorruptError("Bad first doclen chunk key");
101 const char* p
= cursor
->current_tag
.data();
102 const char* pend
= p
+ cursor
->current_tag
.size();
103 if (!unpack_uint(&p
, pend
, &delta
)) {
104 throw Xapian::DatabaseCorruptError("Bad first doclen chunk delta");
106 first
= last_in_first_chunk
- delta
;
109 // We know the last docid is at least first - 1 + doccount, so seek
110 // to there and then scan forwards. If we match exactly, then that
111 // is exactly the last docid (our caller handles this case when
112 // first == 1, but not otherwise).
113 last
= first
- 1 + doccount
;
114 if (cursor
->find_entry_ge(make_doclenchunk_key(last
)))
117 if (cursor
->after_end())
118 throw Xapian::DatabaseCorruptError("Missing doclen chunk");
121 Xapian::docid new_last
= docid_from_key(cursor
->current_key
);
123 // We've hit a non-doclen item.
128 } while (!cursor
->after_end());
130 // We've reached the end of the table (only possible if there are no terms
135 HoneyPostListTable::get_wdf_upper_bound(const std::string
& term
) const
138 if (!get_exact_entry(Honey::make_postingchunk_key(term
), chunk
)) {
143 const char* p
= chunk
.data();
144 const char* pend
= p
+ chunk
.size();
146 Xapian::termcount cf
;
149 Xapian::docid chunk_last
;
150 Xapian::termcount first_wdf
;
151 Xapian::termcount wdf_max
;
152 if (!decode_initial_chunk_header(&p
, pend
, tf
, cf
, first
, last
, chunk_last
,
154 throw Xapian::DatabaseCorruptError("Postlist initial chunk header");