[ci] Try to get .gitignore check to work
[xapian.git] / xapian-core / matcher / postlisttree.h
blobed2defadcba6b7dd4265baec5c2c50bf22e28ab5
1 /** @file postlisttree.h
2 * @brief Class for managing a tree of PostList objects
3 */
4 /* Copyright 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_POSTLISTTREE_H
22 #define XAPIAN_INCLUDED_POSTLISTTREE_H
24 #include "api/postlist.h"
25 #include "backends/multi.h"
26 #include "valuestreamdocument.h"
28 class PostListTree {
29 PostList* pl = NULL;
31 bool use_cached_max_weight = false;
33 bool need_doclength;
35 bool need_unique_terms;
37 double max_weight;
39 /// The current shard.
40 Xapian::doccount current_shard = 0;
42 /** The postlists for the shards.
44 * Entries corresponding to remote shards will be NULL - the results from
45 * remote shards are included by merging MSet objects, rather than via the
46 * PostList tree.
48 PostList** shard_pls = NULL;
50 /// The number of shards.
51 Xapian::doccount n_shards = 0;
53 /** Document proxy used for valuestream caching.
55 * Each time we move to a new shard we must notify this object so it can
56 * invalidate any cached valuestreams (which are specific to the shard).
58 ValueStreamDocument& vsdoc;
60 Xapian::Database& db;
62 public:
63 PostListTree(ValueStreamDocument& vsdoc_,
64 Xapian::Database& db_,
65 const Xapian::Weight& wtscheme)
66 : need_doclength(wtscheme.get_sumpart_needs_doclength_()),
67 need_unique_terms(wtscheme.get_sumpart_needs_uniqueterms_()),
68 vsdoc(vsdoc_),
69 db(db_) {}
71 ~PostListTree() {
72 for (Xapian::doccount i = 0; i != n_shards; ++i)
73 delete shard_pls[i];
76 void set_postlists(PostList** pls, Xapian::doccount n_shards_) {
77 shard_pls = pls;
78 n_shards = n_shards_;
79 while (shard_pls[current_shard] == NULL) {
80 ++current_shard;
81 Assert(current_shard != n_shards);
83 pl = shard_pls[current_shard];
84 if (current_shard > 0)
85 vsdoc.new_shard(current_shard);
88 double recalc_maxweight() {
89 if (!use_cached_max_weight) {
90 use_cached_max_weight = true;
91 double w = 0.0;
92 // Start at the current shard.
93 for (Xapian::doccount i = current_shard; i != n_shards; ++i) {
94 if (shard_pls[i])
95 w = max(w, shard_pls[i]->recalc_maxweight());
97 max_weight = w;
99 return max_weight;
102 void force_recalc() {
103 use_cached_max_weight = false;
106 Xapian::doccount get_termfreq_min() const {
107 Xapian::doccount result = 0;
108 for (Xapian::doccount i = 0; i != n_shards; ++i)
109 if (shard_pls[i])
110 result += shard_pls[i]->get_termfreq_min();
111 return result;
114 Xapian::doccount get_termfreq_max() const {
115 Xapian::doccount result = 0;
116 for (Xapian::doccount i = 0; i != n_shards; ++i)
117 if (shard_pls[i])
118 result += shard_pls[i]->get_termfreq_max();
119 return result;
122 Xapian::doccount get_termfreq_est() const {
123 Xapian::doccount result = 0;
124 for (Xapian::doccount i = 0; i != n_shards; ++i)
125 if (shard_pls[i])
126 result += shard_pls[i]->get_termfreq_est();
127 return result;
130 Xapian::docid get_docid() const {
131 return unshard(pl->get_docid(), current_shard, n_shards);
134 double get_weight() const {
135 Xapian::termcount doclen = 0, unique_terms = 0;
136 get_doc_stats(doclen, unique_terms);
137 return pl->get_weight(doclen, unique_terms);
140 /// Return false if we're done.
141 bool next(double w_min) {
142 if (w_min > 0.0 && recalc_maxweight() < w_min) {
143 // We can't now achieve w_min so we're done.
144 return false;
147 while (true) {
148 PostList* result = pl->next(w_min);
149 if (rare(result)) {
150 delete pl;
151 shard_pls[current_shard] = pl = result;
152 if (usual(!pl->at_end())) {
153 if (w_min > 0.0) {
154 use_cached_max_weight = false;
155 if (recalc_maxweight() < w_min) {
156 // We can't now achieve w_min so we're done.
157 return false;
160 return true;
162 } else {
163 if (usual(!pl->at_end())) {
164 return true;
168 do {
169 if (++current_shard == n_shards)
170 return false;
171 } while (shard_pls[current_shard] == NULL);
172 pl = shard_pls[current_shard];
173 vsdoc.new_shard(current_shard);
174 use_cached_max_weight = false;
178 void get_doc_stats(Xapian::termcount& doclen,
179 Xapian::termcount& unique_terms) const {
180 // Fetching the document length and number of unique terms is work we
181 // can avoid if the weighting scheme doesn't use them.
182 if (need_doclength || need_unique_terms) {
183 Xapian::docid did = get_docid();
184 if (need_doclength)
185 doclen = db.get_doclength(did);
186 if (need_unique_terms)
187 unique_terms = db.get_unique_terms(did);
191 Xapian::termcount count_matching_subqs() const {
192 return pl->count_matching_subqs();
195 std::string get_description() const {
196 string desc = "PostListTree(";
197 for (Xapian::doccount i = 0; i != n_shards; ++i) {
198 if (i == current_shard)
199 desc += '*';
200 if (shard_pls[i]) {
201 desc += shard_pls[i]->get_description();
202 desc += ',';
203 } else {
204 desc += "NULL,";
207 desc.back() = ')';
208 return desc;
212 #endif // XAPIAN_INCLUDED_POSTLISTTREE_H