Sync arabic stemming algorithm change from Snowball
[xapian.git] / xapian-core / matcher / andnotpostlist.cc
blobab68eff946cc6861aca4bdecf289dfccb986df73
1 /** @file andnotpostlist.cc
2 * @brief PostList class implementing Query::OP_AND_NOT
3 */
4 /* Copyright 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "andnotpostlist.h"
25 #include <algorithm>
27 using namespace std;
29 Xapian::doccount
30 AndNotPostList::get_termfreq_min() const
32 Xapian::doccount l_tf_min = pl->get_termfreq_min();
33 Xapian::doccount r_tf_max = r->get_termfreq_max();
34 if (l_tf_min <= r_tf_max)
35 return 0;
36 return l_tf_min - r_tf_max;
39 Xapian::doccount
40 AndNotPostList::get_termfreq_max() const
42 // We can't match more documents than our left-side does.
43 Xapian::doccount l_tf_max = pl->get_termfreq_max();
44 // We also can't more documents than our right-side *doesn't*.
45 Xapian::doccount r_tf_min = r->get_termfreq_min();
46 return min(db_size - r_tf_min, l_tf_max);
49 Xapian::doccount
50 AndNotPostList::get_termfreq_est() const
52 if (rare(db_size == 0))
53 return 0;
54 // We calculate the estimate assuming independence. With this assumption,
55 // the estimate is the product of the estimates for the sub-postlists
56 // (for the right side this is inverted by subtracting from db_size),
57 // divided by db_size.
58 double result = pl->get_termfreq_est();
59 result = (result * (db_size - r->get_termfreq_est())) / db_size;
60 return static_cast<Xapian::doccount>(result + 0.5);
63 TermFreqs
64 AndNotPostList::get_termfreq_est_using_stats(const Xapian::Weight::Internal & stats) const
66 // We calculate the estimate assuming independence. With this assumption,
67 // the estimate is the product of the estimates for the sub-postlists
68 // (for the right side this is inverted by subtracting from db_size),
69 // divided by db_size.
70 TermFreqs freqs(pl->get_termfreq_est_using_stats(stats));
72 double freqest = double(freqs.termfreq);
73 double relfreqest = double(freqs.reltermfreq);
74 double collfreqest = double(freqs.collfreq);
76 freqs = r->get_termfreq_est_using_stats(stats);
78 // Our caller should have ensured this.
79 Assert(stats.collection_size);
81 freqs.termfreq = stats.collection_size - freqs.termfreq;
82 freqest = (freqest * freqs.termfreq) / stats.collection_size;
84 if (stats.total_term_count != 0) {
85 freqs.collfreq = stats.total_term_count - freqs.termfreq;
86 collfreqest = (collfreqest * freqs.collfreq) / stats.total_term_count;
89 // If the rset is empty, relfreqest should be 0 already, so leave
90 // it alone.
91 if (stats.rset_size != 0) {
92 freqs.reltermfreq = stats.rset_size - freqs.reltermfreq;
93 relfreqest = (relfreqest * freqs.reltermfreq) / stats.rset_size;
96 return TermFreqs(static_cast<Xapian::doccount>(freqest + 0.5),
97 static_cast<Xapian::doccount>(relfreqest + 0.5),
98 static_cast<Xapian::termcount>(collfreqest + 0.5));
101 PostList*
102 AndNotPostList::next(double w_min)
104 while (true) {
105 PostList* result = pl->next(w_min);
106 if (result) {
107 delete pl;
108 pl = result;
110 if (pl->at_end()) {
111 result = pl;
112 pl = NULL;
113 return result;
115 Xapian::docid l_did = pl->get_docid();
116 if (l_did > r_did) {
117 bool r_valid;
118 result = r->check(l_did, 0, r_valid);
119 if (result) {
120 delete r;
121 r = result;
123 if (!r_valid)
124 return NULL;
125 if (r->at_end()) {
126 result = pl;
127 pl = NULL;
128 return result;
130 r_did = r->get_docid();
132 if (l_did < r_did)
133 break;
135 return NULL;
138 PostList*
139 AndNotPostList::skip_to(Xapian::docid did, double w_min)
141 if (did > pl->get_docid()) {
142 PostList* result = pl->skip_to(did, w_min);
143 if (result) {
144 delete pl;
145 pl = result;
147 if (pl->at_end()) {
148 result = pl;
149 pl = NULL;
150 return result;
152 Xapian::docid l_did = pl->get_docid();
153 if (l_did > r_did) {
154 bool r_valid;
155 result = r->check(l_did, 0, r_valid);
156 if (result) {
157 delete r;
158 r = result;
160 if (!r_valid)
161 return NULL;
162 if (r->at_end()) {
163 result = pl;
164 pl = NULL;
165 return result;
167 r_did = r->get_docid();
169 if (l_did == r_did) {
170 // Advance to the next match.
171 return AndNotPostList::next(w_min);
174 return NULL;
177 PostList*
178 AndNotPostList::check(Xapian::docid did, double w_min, bool& valid)
180 PostList* result = pl->check(did, w_min, valid);
181 if (result) {
182 delete pl;
183 pl = result;
185 if (valid) {
186 if (pl->at_end()) {
187 result = pl;
188 pl = NULL;
189 return result;
191 Xapian::docid l_did = pl->get_docid();
192 if (l_did > r_did) {
193 bool r_valid;
194 result = r->check(l_did, 0, r_valid);
195 if (result) {
196 delete r;
197 r = result;
199 if (!r_valid)
200 return NULL;
201 if (r->at_end()) {
202 result = pl;
203 pl = NULL;
204 return result;
206 r_did = r->get_docid();
208 if (l_did == r_did) {
209 // For check() we can simply indicate !valid.
210 valid = false;
213 return NULL;
216 string
217 AndNotPostList::get_description() const
219 string desc = "AndNotPostList(";
220 desc += pl->get_description();
221 desc += ", ";
222 desc += r->get_description();
223 desc += ')';
224 return desc;