Remove unused includes of multimatch.h
[xapian.git] / xapian-core / matcher / maxpostlist.cc
blobf0a0887172269c3de70be4901bf687282f4025ce
1 /** @file maxpostlist.cc
2 * @brief N-way OR postlist with wt=max(wt_i)
3 */
4 /* Copyright (C) 2007,2009,2010,2011,2012,2013,2014 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "maxpostlist.h"
26 #include "debuglog.h"
27 #include "omassert.h"
29 using namespace std;
31 MaxPostList::~MaxPostList()
33 if (plist) {
34 for (size_t i = 0; i < n_kids; ++i) {
35 delete plist[i];
37 delete [] plist;
41 Xapian::doccount
42 MaxPostList::get_termfreq_min() const
44 Xapian::doccount res = plist[0]->get_termfreq_min();
45 for (size_t i = 1; i < n_kids; ++i) {
46 res = std::max(res, plist[i]->get_termfreq_min());
48 return res;
51 Xapian::doccount
52 MaxPostList::get_termfreq_max() const
54 Xapian::doccount res = plist[0]->get_termfreq_max();
55 for (size_t i = 1; i < n_kids; ++i) {
56 Xapian::doccount c = plist[i]->get_termfreq_max();
57 if (db_size - res <= c)
58 return db_size;
59 res += c;
61 return res;
64 Xapian::doccount
65 MaxPostList::get_termfreq_est() const
67 if (rare(db_size == 0))
68 return 0;
70 // We calculate the estimate assuming independence. The simplest
71 // way to calculate this seems to be a series of (n_kids - 1) pairwise
72 // calculations, which gives the same answer regardless of the order.
73 double scale = 1.0 / db_size;
74 double P_est = plist[0]->get_termfreq_est() * scale;
75 for (size_t i = 1; i < n_kids; ++i) {
76 double P_i = plist[i]->get_termfreq_est() * scale;
77 P_est += P_i - P_est * P_i;
79 return static_cast<Xapian::doccount>(P_est * db_size + 0.5);
82 TermFreqs
83 MaxPostList::get_termfreq_est_using_stats(
84 const Xapian::Weight::Internal & stats) const
86 // We calculate the estimate assuming independence. The simplest
87 // way to calculate this seems to be a series of (n_kids - 1) pairwise
88 // calculations, which gives the same answer regardless of the order.
89 TermFreqs freqs(plist[0]->get_termfreq_est_using_stats(stats));
91 // Our caller should have ensured this.
92 Assert(stats.collection_size);
93 double scale = 1.0 / stats.collection_size;
94 double P_est = freqs.termfreq * scale;
95 double Pr_est = freqs.reltermfreq * scale;
96 double Pc_est = freqs.collfreq * scale;
98 for (size_t i = 1; i < n_kids; ++i) {
99 double P_i = freqs.termfreq * scale;
100 P_est += P_i - P_est * P_i;
101 double Pc_i = freqs.collfreq * scale;
102 Pc_est += Pc_i - Pc_est * Pc_i;
103 // If the rset is empty, Pr_est should be 0 already, so leave
104 // it alone.
105 if (stats.rset_size != 0) {
106 double Pr_i = freqs.reltermfreq / stats.rset_size;
107 Pr_est += Pr_i - Pr_est * Pr_i;
110 return TermFreqs(Xapian::doccount(P_est * stats.collection_size + 0.5),
111 Xapian::doccount(Pr_est * stats.rset_size + 0.5),
112 Xapian::termcount(Pc_est * stats.total_term_count));
115 Xapian::docid
116 MaxPostList::get_docid() const
118 return did;
121 double
122 MaxPostList::get_weight() const
124 Assert(did);
125 double res = 0.0;
126 for (size_t i = 0; i < n_kids; ++i) {
127 if (plist[i]->get_docid() == did)
128 res = std::max(res, plist[i]->get_weight());
130 return res;
133 bool
134 MaxPostList::at_end() const
136 return (did == 0);
139 double
140 MaxPostList::recalc_maxweight()
142 double result = plist[0]->recalc_maxweight();
143 for (size_t i = 1; i < n_kids; ++i) {
144 result = std::max(result, plist[i]->recalc_maxweight());
146 return result;
149 PostList *
150 MaxPostList::next(double w_min)
152 Xapian::docid old_did = did;
153 did = 0;
154 for (size_t i = 0; i < n_kids; ++i) {
155 Xapian::docid cur_did = 0;
156 if (old_did != 0)
157 cur_did = plist[i]->get_docid();
158 if (cur_did <= old_did) {
159 PostList * res;
160 if (old_did == 0 || cur_did == old_did) {
161 res = plist[i]->next(w_min);
162 } else {
163 res = plist[i]->skip_to(old_did + 1, w_min);
165 if (res) {
166 delete plist[i];
167 plist[i] = res;
170 if (plist[i]->at_end()) {
171 erase_sublist(i--);
172 continue;
175 if (res)
176 matcher->force_recalc();
178 cur_did = plist[i]->get_docid();
181 if (did == 0 || cur_did < did) {
182 did = cur_did;
186 if (n_kids == 1) {
187 n_kids = 0;
188 return plist[0];
191 return NULL;
194 PostList *
195 MaxPostList::skip_to(Xapian::docid did_min, double w_min)
197 Xapian::docid old_did = did;
198 did = 0;
199 for (size_t i = 0; i < n_kids; ++i) {
200 Xapian::docid cur_did = 0;
201 if (old_did != 0)
202 cur_did = plist[i]->get_docid();
203 if (cur_did < did_min) {
204 PostList * res = plist[i]->skip_to(did_min, w_min);
205 if (res) {
206 delete plist[i];
207 plist[i] = res;
210 if (plist[i]->at_end()) {
211 erase_sublist(i--);
212 continue;
215 if (res)
216 matcher->force_recalc();
218 cur_did = plist[i]->get_docid();
221 if (did == 0 || cur_did < did) {
222 did = cur_did;
226 if (n_kids == 1) {
227 n_kids = 0;
228 return plist[0];
231 return NULL;
234 string
235 MaxPostList::get_description() const
237 string desc("(");
238 desc += plist[0]->get_description();
239 for (size_t i = 1; i < n_kids; ++i) {
240 desc += " MAX ";
241 desc += plist[i]->get_description();
243 desc += ')';
244 return desc;
247 Xapian::termcount
248 MaxPostList::get_wdf() const
250 Xapian::termcount totwdf = 0;
251 for (size_t i = 0; i < n_kids; ++i) {
252 if (plist[i]->get_docid() == did)
253 totwdf += plist[i]->get_wdf();
255 return totwdf;
258 Xapian::termcount
259 MaxPostList::count_matching_subqs() const
261 return 1;