1 /** @file maxpostlist.cc
2 * @brief N-way OR postlist with wt=max(wt_i)
4 /* Copyright (C) 2007,2009,2010,2011,2012,2013,2014 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "maxpostlist.h"
31 MaxPostList::~MaxPostList()
34 for (size_t i
= 0; i
< n_kids
; ++i
) {
42 MaxPostList::get_termfreq_min() const
44 Xapian::doccount res
= plist
[0]->get_termfreq_min();
45 for (size_t i
= 1; i
< n_kids
; ++i
) {
46 res
= std::max(res
, plist
[i
]->get_termfreq_min());
52 MaxPostList::get_termfreq_max() const
54 Xapian::doccount res
= plist
[0]->get_termfreq_max();
55 for (size_t i
= 1; i
< n_kids
; ++i
) {
56 Xapian::doccount c
= plist
[i
]->get_termfreq_max();
57 if (db_size
- res
<= c
)
65 MaxPostList::get_termfreq_est() const
67 if (rare(db_size
== 0))
70 // We calculate the estimate assuming independence. The simplest
71 // way to calculate this seems to be a series of (n_kids - 1) pairwise
72 // calculations, which gives the same answer regardless of the order.
73 double scale
= 1.0 / db_size
;
74 double P_est
= plist
[0]->get_termfreq_est() * scale
;
75 for (size_t i
= 1; i
< n_kids
; ++i
) {
76 double P_i
= plist
[i
]->get_termfreq_est() * scale
;
77 P_est
+= P_i
- P_est
* P_i
;
79 return static_cast<Xapian::doccount
>(P_est
* db_size
+ 0.5);
83 MaxPostList::get_termfreq_est_using_stats(
84 const Xapian::Weight::Internal
& stats
) const
86 // We calculate the estimate assuming independence. The simplest
87 // way to calculate this seems to be a series of (n_kids - 1) pairwise
88 // calculations, which gives the same answer regardless of the order.
89 TermFreqs
freqs(plist
[0]->get_termfreq_est_using_stats(stats
));
91 // Our caller should have ensured this.
92 Assert(stats
.collection_size
);
93 double scale
= 1.0 / stats
.collection_size
;
94 double P_est
= freqs
.termfreq
* scale
;
95 double Pr_est
= freqs
.reltermfreq
* scale
;
96 double Pc_est
= freqs
.collfreq
* scale
;
98 for (size_t i
= 1; i
< n_kids
; ++i
) {
99 double P_i
= freqs
.termfreq
* scale
;
100 P_est
+= P_i
- P_est
* P_i
;
101 double Pc_i
= freqs
.collfreq
* scale
;
102 Pc_est
+= Pc_i
- Pc_est
* Pc_i
;
103 // If the rset is empty, Pr_est should be 0 already, so leave
105 if (stats
.rset_size
!= 0) {
106 double Pr_i
= freqs
.reltermfreq
/ stats
.rset_size
;
107 Pr_est
+= Pr_i
- Pr_est
* Pr_i
;
110 return TermFreqs(Xapian::doccount(P_est
* stats
.collection_size
+ 0.5),
111 Xapian::doccount(Pr_est
* stats
.rset_size
+ 0.5),
112 Xapian::termcount(Pc_est
* stats
.total_term_count
));
116 MaxPostList::get_docid() const
122 MaxPostList::get_weight() const
126 for (size_t i
= 0; i
< n_kids
; ++i
) {
127 if (plist
[i
]->get_docid() == did
)
128 res
= std::max(res
, plist
[i
]->get_weight());
134 MaxPostList::at_end() const
140 MaxPostList::recalc_maxweight()
142 double result
= plist
[0]->recalc_maxweight();
143 for (size_t i
= 1; i
< n_kids
; ++i
) {
144 result
= std::max(result
, plist
[i
]->recalc_maxweight());
150 MaxPostList::next(double w_min
)
152 Xapian::docid old_did
= did
;
154 for (size_t i
= 0; i
< n_kids
; ++i
) {
155 Xapian::docid cur_did
= 0;
157 cur_did
= plist
[i
]->get_docid();
158 if (cur_did
<= old_did
) {
160 if (old_did
== 0 || cur_did
== old_did
) {
161 res
= plist
[i
]->next(w_min
);
163 res
= plist
[i
]->skip_to(old_did
+ 1, w_min
);
170 if (plist
[i
]->at_end()) {
176 matcher
->force_recalc();
178 cur_did
= plist
[i
]->get_docid();
181 if (did
== 0 || cur_did
< did
) {
195 MaxPostList::skip_to(Xapian::docid did_min
, double w_min
)
197 Xapian::docid old_did
= did
;
199 for (size_t i
= 0; i
< n_kids
; ++i
) {
200 Xapian::docid cur_did
= 0;
202 cur_did
= plist
[i
]->get_docid();
203 if (cur_did
< did_min
) {
204 PostList
* res
= plist
[i
]->skip_to(did_min
, w_min
);
210 if (plist
[i
]->at_end()) {
216 matcher
->force_recalc();
218 cur_did
= plist
[i
]->get_docid();
221 if (did
== 0 || cur_did
< did
) {
235 MaxPostList::get_description() const
238 desc
+= plist
[0]->get_description();
239 for (size_t i
= 1; i
< n_kids
; ++i
) {
241 desc
+= plist
[i
]->get_description();
248 MaxPostList::get_wdf() const
250 Xapian::termcount totwdf
= 0;
251 for (size_t i
= 0; i
< n_kids
; ++i
) {
252 if (plist
[i
]->get_docid() == did
)
253 totwdf
+= plist
[i
]->get_wdf();
259 MaxPostList::count_matching_subqs() const