Remove some unnecessary std:: qualification
[xapian.git] / xapian-core / matcher / maxpostlist.cc
blob70a0913788f7f2839cdf5e017673e4b919967c36
1 /** @file maxpostlist.cc
2 * @brief N-way OR postlist with wt=max(wt_i)
3 */
4 /* Copyright (C) 2007,2009,2010,2011,2012,2013,2014,2017 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "maxpostlist.h"
26 #include "debuglog.h"
27 #include "omassert.h"
29 using namespace std;
31 MaxPostList::~MaxPostList()
33 if (plist) {
34 for (size_t i = 0; i < n_kids; ++i) {
35 delete plist[i];
37 delete [] plist;
41 Xapian::doccount
42 MaxPostList::get_termfreq_min() const
44 Xapian::doccount res = plist[0]->get_termfreq_min();
45 for (size_t i = 1; i < n_kids; ++i) {
46 res = max(res, plist[i]->get_termfreq_min());
48 return res;
51 Xapian::doccount
52 MaxPostList::get_termfreq_max() const
54 Xapian::doccount res = plist[0]->get_termfreq_max();
55 for (size_t i = 1; i < n_kids; ++i) {
56 Xapian::doccount c = plist[i]->get_termfreq_max();
57 if (db_size - res <= c)
58 return db_size;
59 res += c;
61 return res;
64 Xapian::doccount
65 MaxPostList::get_termfreq_est() const
67 if (rare(db_size == 0))
68 return 0;
70 // We calculate the estimate assuming independence. The simplest
71 // way to calculate this seems to be a series of (n_kids - 1) pairwise
72 // calculations, which gives the same answer regardless of the order.
73 double scale = 1.0 / db_size;
74 double P_est = plist[0]->get_termfreq_est() * scale;
75 for (size_t i = 1; i < n_kids; ++i) {
76 double P_i = plist[i]->get_termfreq_est() * scale;
77 P_est += P_i - P_est * P_i;
79 return static_cast<Xapian::doccount>(P_est * db_size + 0.5);
82 TermFreqs
83 MaxPostList::get_termfreq_est_using_stats(
84 const Xapian::Weight::Internal & stats) const
86 // We calculate the estimate assuming independence. The simplest
87 // way to calculate this seems to be a series of (n_kids - 1) pairwise
88 // calculations, which gives the same answer regardless of the order.
89 TermFreqs freqs(plist[0]->get_termfreq_est_using_stats(stats));
91 // Our caller should have ensured this.
92 Assert(stats.collection_size);
93 double scale = 1.0 / stats.collection_size;
94 double P_est = freqs.termfreq * scale;
95 double Pr_est = freqs.reltermfreq * scale;
96 double Pc_est = freqs.collfreq * scale;
98 for (size_t i = 1; i < n_kids; ++i) {
99 double P_i = freqs.termfreq * scale;
100 P_est += P_i - P_est * P_i;
101 double Pc_i = freqs.collfreq * scale;
102 Pc_est += Pc_i - Pc_est * Pc_i;
103 // If the rset is empty, Pr_est should be 0 already, so leave
104 // it alone.
105 if (stats.rset_size != 0) {
106 double Pr_i = freqs.reltermfreq / stats.rset_size;
107 Pr_est += Pr_i - Pr_est * Pr_i;
110 return TermFreqs(Xapian::doccount(P_est * stats.collection_size + 0.5),
111 Xapian::doccount(Pr_est * stats.rset_size + 0.5),
112 Xapian::termcount(Pc_est * stats.total_term_count));
115 Xapian::docid
116 MaxPostList::get_docid() const
118 return did;
121 double
122 MaxPostList::get_weight(Xapian::termcount doclen,
123 Xapian::termcount unique_terms) const
125 Assert(did);
126 double res = 0.0;
127 for (size_t i = 0; i < n_kids; ++i) {
128 if (plist[i]->get_docid() == did)
129 res = max(res, plist[i]->get_weight(doclen, unique_terms));
131 return res;
134 bool
135 MaxPostList::at_end() const
137 return (did == 0);
140 double
141 MaxPostList::recalc_maxweight()
143 double result = plist[0]->recalc_maxweight();
144 for (size_t i = 1; i < n_kids; ++i) {
145 result = max(result, plist[i]->recalc_maxweight());
147 return result;
150 PostList *
151 MaxPostList::next(double w_min)
153 Xapian::docid old_did = did;
154 did = 0;
155 for (size_t i = 0; i < n_kids; ++i) {
156 Xapian::docid cur_did = 0;
157 if (old_did != 0)
158 cur_did = plist[i]->get_docid();
159 if (cur_did <= old_did) {
160 PostList * res;
161 if (old_did == 0 || cur_did == old_did) {
162 res = plist[i]->next(w_min);
163 } else {
164 res = plist[i]->skip_to(old_did + 1, w_min);
166 if (res) {
167 delete plist[i];
168 plist[i] = res;
171 if (plist[i]->at_end()) {
172 erase_sublist(i--);
173 continue;
176 if (res)
177 matcher->force_recalc();
179 cur_did = plist[i]->get_docid();
182 if (did == 0 || cur_did < did) {
183 did = cur_did;
187 if (n_kids == 1) {
188 n_kids = 0;
189 return plist[0];
192 return NULL;
195 PostList *
196 MaxPostList::skip_to(Xapian::docid did_min, double w_min)
198 Xapian::docid old_did = did;
199 did = 0;
200 for (size_t i = 0; i < n_kids; ++i) {
201 Xapian::docid cur_did = 0;
202 if (old_did != 0)
203 cur_did = plist[i]->get_docid();
204 if (cur_did < did_min) {
205 PostList * res = plist[i]->skip_to(did_min, w_min);
206 if (res) {
207 delete plist[i];
208 plist[i] = res;
211 if (plist[i]->at_end()) {
212 erase_sublist(i--);
213 continue;
216 if (res)
217 matcher->force_recalc();
219 cur_did = plist[i]->get_docid();
222 if (did == 0 || cur_did < did) {
223 did = cur_did;
227 if (n_kids == 1) {
228 n_kids = 0;
229 return plist[0];
232 return NULL;
235 string
236 MaxPostList::get_description() const
238 string desc("(");
239 desc += plist[0]->get_description();
240 for (size_t i = 1; i < n_kids; ++i) {
241 desc += " MAX ";
242 desc += plist[i]->get_description();
244 desc += ')';
245 return desc;
248 Xapian::termcount
249 MaxPostList::get_wdf() const
251 Xapian::termcount totwdf = 0;
252 for (size_t i = 0; i < n_kids; ++i) {
253 if (plist[i]->get_docid() == did)
254 totwdf += plist[i]->get_wdf();
256 return totwdf;
259 Xapian::termcount
260 MaxPostList::count_matching_subqs() const
262 return 1;