[honey] Use pack_uint() for postlist chunk data
[xapian.git] / xapian-core / backends / honey / honey_postlist.h
blob2bc2c242c510e3124a57054417b953c9822b2864
1 /** @file honey_postlist.h
2 * @brief PostList in a honey database.
3 */
4 /* Copyright (C) 2007,2009,2011,2013,2015,2016,2017 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_HONEY_POSTLIST_H
23 #define XAPIAN_INCLUDED_HONEY_POSTLIST_H
25 #include "api/leafpostlist.h"
26 #include "honey_positionlist.h"
27 #include "pack.h"
29 #include <string>
31 class HoneyCursor;
32 class HoneyDatabase;
34 namespace Honey {
36 /** Generate a key for a posting initial chunk. */
37 inline std::string
38 make_postingchunk_key(const std::string& term)
40 std::string key;
41 pack_string_preserving_sort(key, term, true);
42 return key;
45 /** Generate a key for a posting continuation chunk. */
46 inline std::string
47 make_postingchunk_key(const std::string& term, Xapian::docid did)
49 std::string key;
50 pack_string_preserving_sort(key, term);
51 pack_uint_preserving_sort(key, did);
52 return key;
55 inline Xapian::docid
56 docid_from_key(const std::string& term, const std::string& key)
58 if (key.size() < term.size()) {
59 // A key can't be shorter than the term it contains.
60 return false;
62 const char * p = key.data();
63 const char * end = p + key.size();
64 // Most terms don't contain zero bytes, so we could optimise this.
65 std::string term_in_key;
66 // FIXME: the next key might not be for a postlist chunk...
67 if (!unpack_string_preserving_sort(&p, end, term_in_key))
68 throw Xapian::DatabaseCorruptError("bad postlist key");
69 if (term_in_key != term)
70 return false;
71 Xapian::docid did;
72 if (!unpack_uint_preserving_sort(&p, end, &did))
73 throw Xapian::DatabaseCorruptError("bad postlist key");
74 return did;
77 class PostingChunkReader {
78 const char *p;
79 const char *end;
81 Xapian::docid did;
83 Xapian::termcount wdf;
85 /// The last docid in this chunk.
86 Xapian::docid last_did;
88 Xapian::doccount termfreq;
90 Xapian::termcount collfreq;
92 public:
93 /// Create an uninitialised PostingChunkReader.
94 PostingChunkReader() : p(NULL) { }
96 /// Initialise already at_end().
97 void init() {
98 p = NULL;
99 termfreq = 0;
102 /// Initialise.
103 void init(Xapian::doccount tf, Xapian::termcount cf) {
104 p = NULL;
105 termfreq = tf;
106 collfreq = cf;
109 void assign(const char * p_, size_t len, Xapian::docid did);
111 void assign(const char * p_, size_t len, Xapian::docid did_,
112 Xapian::docid last_did_in_chunk,
113 Xapian::termcount wdf_);
115 bool at_end() const { return p == NULL; }
117 Xapian::doccount get_termfreq() const { return termfreq; }
119 Xapian::docid get_docid() const { return did; }
121 Xapian::termcount get_wdf() const { return wdf; }
123 /// Advance, returning false if we've run out of data.
124 bool next();
126 /// Skip ahead, returning false if we've run out of data.
127 bool skip_to(Xapian::docid target);
132 /** PostList in a honey database. */
133 class HoneyPostList : public LeafPostList {
134 /// Don't allow assignment.
135 HoneyPostList& operator=(const HoneyPostList&) = delete;
137 /// Don't allow copying.
138 HoneyPostList(const HoneyPostList&) = delete;
140 /// Cursor on the postlist table.
141 HoneyCursor* cursor;
143 Honey::PostingChunkReader reader;
145 /// The highest document id in this posting list.
146 Xapian::docid last_did;
148 /// HoneyDatabase to get position table object from.
149 const HoneyDatabase* db;
151 /** Needed so that first next() does nothing.
153 * FIXME: Can we arrange not to need this?
155 bool started = false;
157 /// Update @a reader to use the chunk currently pointed to by @a cursor.
158 bool update_reader();
160 public:
161 /// Create HoneyPostList from already positioned @a cursor_.
162 HoneyPostList(const HoneyDatabase* db_,
163 const std::string& term_,
164 HoneyCursor* cursor_);
166 ~HoneyPostList();
168 Xapian::doccount get_termfreq() const;
170 LeafPostList* open_nearby_postlist(const std::string& term_,
171 bool need_pos) const;
173 Xapian::docid get_docid() const;
175 Xapian::termcount get_wdf() const;
177 bool at_end() const;
179 PositionList* open_position_list() const;
181 PostList* next(double w_min);
183 PostList* skip_to(Xapian::docid did, double w_min);
185 std::string get_description() const;
188 /** PostList in a honey database with positions.
190 * Use a special subclass to avoid the size cost for the common case where we
191 * don't want positional data.
193 class HoneyPosPostList : public HoneyPostList {
194 /** PositionList object to reuse for OP_NEAR and OP_PHRASE.
196 * This saves the overhead of creating objects for every document
197 * considered.
199 HoneyRePositionList position_list;
201 public:
202 HoneyPosPostList(const HoneyDatabase* db_,
203 const std::string& term_,
204 HoneyCursor* cursor_);
206 PositionList* read_position_list();
208 std::string get_description() const;
211 #endif // XAPIAN_INCLUDED_HONEY_POSTLIST_H