1 /** @file andnotpostlist.cc
2 * @brief PostList class implementing Query::OP_AND_NOT
4 /* Copyright 2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "andnotpostlist.h"
30 AndNotPostList::get_termfreq_min() const
32 Xapian::doccount l_tf_min
= pl
->get_termfreq_min();
33 Xapian::doccount r_tf_max
= r
->get_termfreq_max();
34 if (l_tf_min
<= r_tf_max
)
36 return l_tf_min
- r_tf_max
;
40 AndNotPostList::get_termfreq_max() const
42 // We can't match more documents than our left-side does.
43 Xapian::doccount l_tf_max
= pl
->get_termfreq_max();
44 // We also can't more documents than our right-side *doesn't*.
45 Xapian::doccount r_tf_min
= r
->get_termfreq_min();
46 return min(db_size
- r_tf_min
, l_tf_max
);
50 AndNotPostList::get_termfreq_est() const
52 if (rare(db_size
== 0))
54 // We calculate the estimate assuming independence. With this assumption,
55 // the estimate is the product of the estimates for the sub-postlists
56 // (for the right side this is inverted by subtracting from db_size),
57 // divided by db_size.
58 double result
= pl
->get_termfreq_est();
59 result
= (result
* (db_size
- r
->get_termfreq_est())) / db_size
;
60 return static_cast<Xapian::doccount
>(result
+ 0.5);
64 AndNotPostList::get_termfreq_est_using_stats(const Xapian::Weight::Internal
& stats
) const
66 // We calculate the estimate assuming independence. With this assumption,
67 // the estimate is the product of the estimates for the sub-postlists
68 // (for the right side this is inverted by subtracting from db_size),
69 // divided by db_size.
70 TermFreqs
freqs(pl
->get_termfreq_est_using_stats(stats
));
72 double freqest
= double(freqs
.termfreq
);
73 double relfreqest
= double(freqs
.reltermfreq
);
74 double collfreqest
= double(freqs
.collfreq
);
76 freqs
= r
->get_termfreq_est_using_stats(stats
);
78 // Our caller should have ensured this.
79 Assert(stats
.collection_size
);
81 freqs
.termfreq
= stats
.collection_size
- freqs
.termfreq
;
82 freqest
= (freqest
* freqs
.termfreq
) / stats
.collection_size
;
84 if (stats
.total_term_count
!= 0) {
85 freqs
.collfreq
= stats
.total_term_count
- freqs
.termfreq
;
86 collfreqest
= (collfreqest
* freqs
.collfreq
) / stats
.total_term_count
;
89 // If the rset is empty, relfreqest should be 0 already, so leave
91 if (stats
.rset_size
!= 0) {
92 freqs
.reltermfreq
= stats
.rset_size
- freqs
.reltermfreq
;
93 relfreqest
= (relfreqest
* freqs
.reltermfreq
) / stats
.rset_size
;
96 return TermFreqs(static_cast<Xapian::doccount
>(freqest
+ 0.5),
97 static_cast<Xapian::doccount
>(relfreqest
+ 0.5),
98 static_cast<Xapian::termcount
>(collfreqest
+ 0.5));
102 AndNotPostList::next(double w_min
)
105 PostList
* result
= pl
->next(w_min
);
115 Xapian::docid l_did
= pl
->get_docid();
118 result
= r
->check(l_did
, 0, r_valid
);
130 r_did
= r
->get_docid();
139 AndNotPostList::skip_to(Xapian::docid did
, double w_min
)
141 if (did
> pl
->get_docid()) {
142 PostList
* result
= pl
->skip_to(did
, w_min
);
152 Xapian::docid l_did
= pl
->get_docid();
155 result
= r
->check(l_did
, 0, r_valid
);
167 r_did
= r
->get_docid();
169 if (l_did
== r_did
) {
170 // Advance to the next match.
171 return AndNotPostList::next(w_min
);
178 AndNotPostList::check(Xapian::docid did
, double w_min
, bool& valid
)
180 PostList
* result
= pl
->check(did
, w_min
, valid
);
191 Xapian::docid l_did
= pl
->get_docid();
194 result
= r
->check(l_did
, 0, r_valid
);
206 r_did
= r
->get_docid();
208 if (l_did
== r_did
) {
209 // For check() we can simply indicate !valid.
217 AndNotPostList::get_description() const
219 string desc
= "AndNotPostList(";
220 desc
+= pl
->get_description();
222 desc
+= r
->get_description();