1 /** @file honey_postlist.h
2 * @brief PostList in a honey database.
4 /* Copyright (C) 2007,2009,2011,2013,2015,2016,2017 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_HONEY_POSTLIST_H
23 #define XAPIAN_INCLUDED_HONEY_POSTLIST_H
25 #include "api/leafpostlist.h"
26 #include "honey_positionlist.h"
36 /** Generate a key for a posting initial chunk. */
38 make_postingchunk_key(const std::string
& term
)
41 pack_string_preserving_sort(key
, term
, true);
45 /** Generate a key for a posting continuation chunk. */
47 make_postingchunk_key(const std::string
& term
, Xapian::docid did
)
50 pack_string_preserving_sort(key
, term
);
51 pack_uint_preserving_sort(key
, did
);
56 docid_from_key(const std::string
& term
, const std::string
& key
)
58 if (key
.size() < term
.size()) {
59 // A key can't be shorter than the term it contains.
62 const char * p
= key
.data();
63 const char * end
= p
+ key
.size();
64 // Most terms don't contain zero bytes, so we could optimise this.
65 std::string term_in_key
;
66 // FIXME: the next key might not be for a postlist chunk...
67 if (!unpack_string_preserving_sort(&p
, end
, term_in_key
))
68 throw Xapian::DatabaseCorruptError("bad postlist key");
69 if (term_in_key
!= term
)
72 if (!unpack_uint_preserving_sort(&p
, end
, &did
))
73 throw Xapian::DatabaseCorruptError("bad postlist key");
77 class PostingChunkReader
{
83 Xapian::termcount wdf
;
85 /// The last docid in this chunk.
86 Xapian::docid last_did
;
88 Xapian::doccount termfreq
;
90 Xapian::termcount collfreq
;
93 /// Create an uninitialised PostingChunkReader.
94 PostingChunkReader() : p(NULL
) { }
96 /// Initialise already at_end().
103 void init(Xapian::doccount tf
, Xapian::termcount cf
) {
109 void assign(const char * p_
, size_t len
, Xapian::docid did
);
111 void assign(const char * p_
, size_t len
, Xapian::docid did_
,
112 Xapian::docid last_did_in_chunk
,
113 Xapian::termcount wdf_
);
115 bool at_end() const { return p
== NULL
; }
117 Xapian::doccount
get_termfreq() const { return termfreq
; }
119 Xapian::docid
get_docid() const { return did
; }
121 Xapian::termcount
get_wdf() const { return wdf
; }
123 /// Advance, returning false if we've run out of data.
126 /// Skip ahead, returning false if we've run out of data.
127 bool skip_to(Xapian::docid target
);
132 /** PostList in a honey database. */
133 class HoneyPostList
: public LeafPostList
{
134 /// Don't allow assignment.
135 HoneyPostList
& operator=(const HoneyPostList
&) = delete;
137 /// Don't allow copying.
138 HoneyPostList(const HoneyPostList
&) = delete;
140 /// Cursor on the postlist table.
143 Honey::PostingChunkReader reader
;
145 /// The highest document id in this posting list.
146 Xapian::docid last_did
;
148 /// HoneyDatabase to get position table object from.
149 const HoneyDatabase
* db
;
151 /** Needed so that first next() does nothing.
153 * FIXME: Can we arrange not to need this?
155 bool started
= false;
157 /// Update @a reader to use the chunk currently pointed to by @a cursor.
158 bool update_reader();
161 /// Create HoneyPostList from already positioned @a cursor_.
162 HoneyPostList(const HoneyDatabase
* db_
,
163 const std::string
& term_
,
164 HoneyCursor
* cursor_
);
168 Xapian::doccount
get_termfreq() const;
170 LeafPostList
* open_nearby_postlist(const std::string
& term_
,
171 bool need_pos
) const;
173 Xapian::docid
get_docid() const;
175 Xapian::termcount
get_wdf() const;
179 PositionList
* open_position_list() const;
181 PostList
* next(double w_min
);
183 PostList
* skip_to(Xapian::docid did
, double w_min
);
185 std::string
get_description() const;
188 /** PostList in a honey database with positions.
190 * Use a special subclass to avoid the size cost for the common case where we
191 * don't want positional data.
193 class HoneyPosPostList
: public HoneyPostList
{
194 /** PositionList object to reuse for OP_NEAR and OP_PHRASE.
196 * This saves the overhead of creating objects for every document
199 HoneyRePositionList position_list
;
202 HoneyPosPostList(const HoneyDatabase
* db_
,
203 const std::string
& term_
,
204 HoneyCursor
* cursor_
);
206 PositionList
* read_position_list();
208 std::string
get_description() const;
211 #endif // XAPIAN_INCLUDED_HONEY_POSTLIST_H