[honey] New format for more keys in postlist table
[xapian.git] / xapian-core / backends / honey / honey_postlisttable.cc
blob88076dc1dcde13b1c3c011e3b60b7a43e31e4bfb
1 /** @file honey_postlisttable.cc
2 * @brief Subclass of HoneyTable which holds postlists.
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2013,2014,2015,2016,2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "honey_postlisttable.h"
25 #include "honey_alldocspostlist.h"
26 #include "honey_cursor.h"
27 #include "honey_database.h"
28 #include "honey_defs.h"
29 #include "honey_postlist.h"
30 #include "honey_postlist_encodings.h"
32 #include <memory>
34 using namespace Honey;
35 using namespace std;
37 HoneyPostList*
38 HoneyPostListTable::open_post_list(const HoneyDatabase* db,
39 const std::string& term,
40 bool need_pos) const
42 Assert(!term.empty());
43 // Try to position cursor first so we avoid creating HoneyPostList objects
44 // for terms which don't exist.
45 unique_ptr<HoneyCursor> cursor(cursor_get());
46 if (!cursor->find_exact(Honey::make_postingchunk_key(term))) {
47 // FIXME: Return NULL here and handle that in Query::Internal
48 // postlist() methods as we build the PostList tree.
49 // return NULL;
50 return new HoneyPostList(db, term, NULL);
53 if (need_pos)
54 return new HoneyPosPostList(db, term, cursor.release());
55 return new HoneyPostList(db, term, cursor.release());
58 void
59 HoneyPostListTable::get_freqs(const std::string& term,
60 Xapian::doccount* termfreq_ptr,
61 Xapian::termcount* collfreq_ptr) const
63 string chunk;
64 if (!get_exact_entry(Honey::make_postingchunk_key(term), chunk)) {
65 if (termfreq_ptr) *termfreq_ptr = 0;
66 if (collfreq_ptr) *collfreq_ptr = 0;
67 return;
70 const char* p = chunk.data();
71 const char* pend = p + chunk.size();
72 Xapian::doccount tf;
73 Xapian::termcount cf;
74 if (!decode_initial_chunk_header_freqs(&p, pend, tf, cf))
75 throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
76 if (termfreq_ptr) *termfreq_ptr = tf;
77 if (collfreq_ptr) *collfreq_ptr = cf;
80 void
81 HoneyPostListTable::get_used_docid_range(Xapian::doccount doccount,
82 Xapian::docid& first,
83 Xapian::docid& last) const
85 unique_ptr<HoneyCursor> cursor;
86 static const char doclen_key_prefix[2] = {
87 0, char(Honey::KEY_DOCLEN_CHUNK)
89 if (cursor->find_entry_ge(string(doclen_key_prefix, 2))) {
90 first = 1;
91 } else {
92 // doccount == 0 should be handled by our caller.
93 Assert(!cursor->after_end());
94 Xapian::docid last_in_first_chunk = docid_from_key(cursor->current_key);
95 if (last_in_first_chunk == 0) {
96 // Note that our caller checks for doccount == 0 and handles that.
97 throw Xapian::DatabaseCorruptError("Bad first doclen chunk key");
99 cursor->read_tag();
100 Xapian::docid delta;
101 const char* p = cursor->current_tag.data();
102 const char* pend = p + cursor->current_tag.size();
103 if (!unpack_uint(&p, pend, &delta)) {
104 throw Xapian::DatabaseCorruptError("Bad first doclen chunk delta");
106 first = last_in_first_chunk - delta;
109 // We know the last docid is at least first - 1 + doccount, so seek
110 // to there and then scan forwards. If we match exactly, then that
111 // is exactly the last docid (our caller handles this case when
112 // first == 1, but not otherwise).
113 last = first - 1 + doccount;
114 if (cursor->find_entry_ge(make_doclenchunk_key(last)))
115 return;
117 if (cursor->after_end())
118 throw Xapian::DatabaseCorruptError("Missing doclen chunk");
120 do {
121 Xapian::docid new_last = docid_from_key(cursor->current_key);
122 if (new_last == 0) {
123 // We've hit a non-doclen item.
124 return;
126 last = new_last;
127 cursor->next();
128 } while (!cursor->after_end());
130 // We've reached the end of the table (only possible if there are no terms
131 // at all!)
134 Xapian::termcount
135 HoneyPostListTable::get_wdf_upper_bound(const std::string& term) const
137 string chunk;
138 if (!get_exact_entry(Honey::make_postingchunk_key(term), chunk)) {
139 // Term not present.
140 return 0;
143 const char* p = chunk.data();
144 const char* pend = p + chunk.size();
145 Xapian::doccount tf;
146 Xapian::termcount cf;
147 Xapian::docid first;
148 Xapian::docid last;
149 Xapian::docid chunk_last;
150 Xapian::termcount first_wdf;
151 Xapian::termcount wdf_max;
152 if (!decode_initial_chunk_header(&p, pend, tf, cf, first, last, chunk_last,
153 first_wdf, wdf_max))
154 throw Xapian::DatabaseCorruptError("Postlist initial chunk header");
155 return wdf_max;