[python3] Simplify generated wrapper post-processing
[xapian.git] / xapian-core / api / enquire.cc
blob0e7b72f1468e1657fd02616367dada9fc8c52ca8
1 /** @file enquire.cc
2 * @brief Xapian::Enquire class
3 */
4 /* Copyright (C) 2009,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "xapian/enquire.h"
24 #include "enquireinternal.h"
26 #include "expand/esetinternal.h"
27 #include "expand/expandweight.h"
28 #include "matcher/matcher.h"
29 #include "msetinternal.h"
30 #include "vectortermlist.h"
31 #include "weight/weightinternal.h"
32 #include "xapian/database.h"
33 #include "xapian/error.h"
34 #include "xapian/expanddecider.h"
35 #include "xapian/intrusive_ptr.h"
36 #include "xapian/keymaker.h"
37 #include "xapian/matchspy.h"
38 #include "xapian/query.h"
39 #include "xapian/rset.h"
40 #include "xapian/weight.h"
42 #include <memory>
43 #include <string>
44 #include <vector>
46 using namespace std;
48 [[noreturn]]
49 static void
50 throw_invalid_arg(const char* msg) {
51 throw Xapian::InvalidArgumentError(msg);
54 namespace Xapian {
56 Enquire::Enquire(const Enquire& o) : internal(o.internal) {}
58 Enquire&
59 Enquire::operator=(const Enquire& o)
61 internal = o.internal;
62 return *this;
65 Enquire::Enquire(const Database& db) : internal(new Enquire::Internal(db)) {}
67 Enquire::~Enquire() {}
69 void
70 Enquire::set_query(const Query& query, termcount query_length)
72 internal->query = query;
73 internal->query_length = query_length;
76 const Query&
77 Enquire::get_query() const
79 return internal->query;
82 void
83 Enquire::set_weighting_scheme(const Weight& weight)
85 internal->weight.reset(weight.clone());
88 void
89 Enquire::set_docid_order(docid_order order)
91 internal->order = order;
94 void
95 Enquire::set_sort_by_relevance()
97 internal->sort_by = Internal::REL;
100 void
101 Enquire::set_sort_by_value(valueno sort_key, bool reverse)
103 internal->sort_by = Internal::VAL;
104 internal->sort_functor = NULL;
105 internal->sort_key = sort_key;
106 internal->sort_val_reverse = reverse;
109 void
110 Enquire::set_sort_by_key(KeyMaker* sorter, bool reverse)
112 if (sorter == NULL) {
113 throw_invalid_arg("Enquire::set_sort_by_key(): sorter cannot be NULL");
115 internal->sort_by = Internal::VAL;
116 internal->sort_functor = sorter;
117 internal->sort_val_reverse = reverse;
120 void
121 Enquire::set_sort_by_value_then_relevance(valueno sort_key, bool reverse)
123 internal->sort_by = Internal::VAL_REL;
124 internal->sort_functor = NULL;
125 internal->sort_key = sort_key;
126 internal->sort_val_reverse = reverse;
129 void
130 Enquire::set_sort_by_key_then_relevance(KeyMaker* sorter, bool reverse)
132 if (sorter == NULL) {
133 throw_invalid_arg("Enquire::set_sort_by_key_then_relevance(): "
134 "sorter cannot be NULL");
136 internal->sort_by = Internal::VAL_REL;
137 internal->sort_functor = sorter;
138 internal->sort_val_reverse = reverse;
141 void
142 Enquire::set_sort_by_relevance_then_value(valueno sort_key, bool reverse)
144 internal->sort_by = Internal::REL_VAL;
145 internal->sort_functor = NULL;
146 internal->sort_key = sort_key;
147 internal->sort_val_reverse = reverse;
150 void
151 Enquire::set_sort_by_relevance_then_key(KeyMaker* sorter, bool reverse)
153 if (sorter == NULL) {
154 throw_invalid_arg("Enquire::set_sort_by_relevance_then_key(): "
155 "sorter cannot be NULL");
157 internal->sort_by = Internal::REL_VAL;
158 internal->sort_functor = sorter;
159 internal->sort_val_reverse = reverse;
162 void
163 Enquire::set_collapse_key(valueno collapse_key, doccount collapse_max)
165 internal->collapse_key = collapse_key;
166 internal->collapse_max = collapse_max;
169 void
170 Enquire::set_cutoff(int percent_threshold, double weight_threshold)
172 internal->percent_threshold = percent_threshold;
173 internal->weight_threshold = weight_threshold;
176 void
177 Enquire::add_matchspy(MatchSpy* spy)
179 using Xapian::Internal::opt_intrusive_ptr;
180 if (spy == NULL)
181 throw_invalid_arg("Enquire::add_matchspy(): spy cannot be NULL");
182 internal->matchspies.push_back(opt_intrusive_ptr<MatchSpy>(spy));
185 void
186 Enquire::clear_matchspies()
188 internal->matchspies.clear();
191 void
192 Enquire::set_time_limit(double time_limit)
194 internal->time_limit = time_limit;
197 MSet
198 Enquire::get_mset(doccount first,
199 doccount maxitems,
200 doccount checkatleast,
201 const RSet* rset,
202 const MatchDecider* mdecider) const
204 return internal->get_mset(first, maxitems, checkatleast, rset, mdecider);
207 TermIterator
208 Enquire::get_matching_terms_begin(docid did) const
210 return internal->get_matching_terms_begin(did);
213 void
214 Enquire::set_expansion_scheme(const std::string &eweightname, double expand_k) const
216 if (eweightname == "bo1") {
217 internal->eweight = Enquire::Internal::EXPAND_BO1;
218 } else if (eweightname == "trad") {
219 internal->eweight = Enquire::Internal::EXPAND_TRAD;
220 } else {
221 throw_invalid_arg("Enquire::set_expansion_scheme(): eweightname must "
222 "be 'bo1' or 'trad'");
224 internal->expand_k = expand_k;
227 ESet
228 Enquire::get_eset(termcount maxitems,
229 const RSet& rset,
230 int flags,
231 const ExpandDecider* edecider,
232 double min_weight) const
234 return internal->get_eset(maxitems, rset, flags, edecider, min_weight);
237 std::string
238 Enquire::get_description() const
240 string desc = "Enquire(db=";
241 desc += internal->db.get_description();
242 if (!internal->query.empty()) {
243 desc += ", query=";
244 desc += internal->query.get_description();
246 desc += ')';
247 return desc;
250 Enquire::Internal::Internal(const Database& db_)
251 : db(db_) {}
253 MSet
254 Enquire::Internal::get_mset(doccount first,
255 doccount maxitems,
256 doccount checkatleast,
257 const RSet* rset,
258 const MatchDecider* mdecider) const
260 if (query.empty()) {
261 MSet mset;
262 mset.internal->set_first(first);
263 return mset;
266 if (percent_threshold && (sort_by == VAL || sort_by == VAL_REL)) {
267 throw Xapian::UnimplementedError("Use of a percentage cutoff while "
268 "sorting primary by value isn't "
269 "currently supported");
272 // Lazily initialise weight to its default if necessary.
273 if (!weight.get())
274 weight.reset(new BM25Weight);
276 // Lazily initialise query_length if it wasn't explicitly specified.
277 if (query_length == 0) {
278 query_length = query.get_length();
281 Xapian::doccount first_orig = first;
283 Xapian::doccount docs = db.get_doccount();
284 first = min(first, docs);
285 maxitems = min(maxitems, docs - first);
286 checkatleast = min(checkatleast, docs);
287 checkatleast = max(checkatleast, first + maxitems);
290 unique_ptr<Xapian::Weight::Internal> stats(new Xapian::Weight::Internal);
291 ::Matcher match(db,
292 query,
293 query_length,
294 rset,
295 *stats,
296 *weight,
297 (sort_functor.get() != NULL),
298 (mdecider != NULL),
299 collapse_key,
300 collapse_max,
301 percent_threshold,
302 weight_threshold,
303 order,
304 sort_key,
305 sort_by,
306 sort_val_reverse,
307 time_limit,
308 matchspies);
310 MSet mset = match.get_mset(first,
311 maxitems,
312 checkatleast,
313 *stats,
314 *weight,
315 mdecider,
316 sort_functor.get(),
317 collapse_key,
318 collapse_max,
319 percent_threshold,
320 weight_threshold,
321 order,
322 sort_key,
323 sort_by,
324 sort_val_reverse,
325 time_limit,
326 matchspies);
328 if (first_orig != first && mset.internal.get()) {
329 mset.internal->set_first(first_orig);
332 mset.internal->set_enquire(this);
334 if (!mset.internal->get_stats()) {
335 mset.internal->set_stats(stats.release());
338 return mset;
341 TermIterator
342 Enquire::Internal::get_matching_terms_begin(docid did) const
344 if (query.empty())
345 return TermIterator();
347 struct term_and_pos {
348 string term;
349 Xapian::termpos pos;
351 term_and_pos(const string& term_, Xapian::termpos pos_)
352 : term(term_), pos(pos_) {}
355 vector<term_and_pos> query_terms;
356 Xapian::termpos pos = 1;
357 for (auto t = query.get_terms_begin(); t != query.get_terms_end(); ++t) {
358 query_terms.emplace_back(*t, pos++);
361 if (query_terms.empty())
362 return TermIterator();
364 // Reorder by term, secondary sort by position.
365 sort(query_terms.begin(), query_terms.end(),
366 [](const term_and_pos& a, const term_and_pos& b) {
367 int cmp = a.term.compare(b.term);
368 return cmp ? cmp < 0 : a.pos < b.pos;
371 // Loop through the query terms, skipping the document terms for each to
372 // see which match, and shuffling down the matching ones. Also discard
373 // repeats, keeping the smallest position.
374 size_t i = 0, j = 0;
375 auto t = db.termlist_begin(did);
376 do {
377 const string& term = query_terms[i].term;
378 if (j == 0 || term != query_terms[j - 1].term) {
379 t.skip_to(term);
380 if (t == db.termlist_end(did)) {
381 break;
384 if (*t == term) {
385 // Matched, so move down if necessary.
386 if (i != j)
387 query_terms[j] = std::move(query_terms[i]);
388 ++j;
391 } while (++i != query_terms.size());
393 // Truncate to leave just the matching terms.
394 query_terms.erase(query_terms.begin() + j, query_terms.end());
396 // Reorder by ascending query position.
397 sort(query_terms.begin(), query_terms.end(),
398 [](const term_and_pos& a, const term_and_pos& b) {
399 return a.pos < b.pos;
402 // Iterator adaptor to present query_terms as a container of just strings.
403 struct Itor {
404 vector<term_and_pos>::const_iterator it;
406 explicit
407 Itor(vector<term_and_pos>::const_iterator it_) : it(it_) {}
409 const std::string& operator*() const {
410 return it->term;
413 Itor& operator++() {
414 ++it;
415 return *this;
418 Itor operator++(int) {
419 Itor retval = *this;
420 ++it;
421 return retval;
424 bool operator!=(const Itor& o) { return it != o.it; }
427 return TermIterator(new VectorTermList(Itor(query_terms.cbegin()),
428 Itor(query_terms.cend())));
431 ESet
432 Enquire::Internal::get_eset(termcount maxitems,
433 const RSet& rset,
434 int flags,
435 const ExpandDecider* edecider_,
436 double min_weight) const
438 using Xapian::Internal::opt_intrusive_ptr;
439 opt_intrusive_ptr<const ExpandDecider> edecider(edecider_);
441 Xapian::ESet eset;
443 if (maxitems == 0 || rset.empty()) {
444 // Either we were asked for no results, or wouldn't produce any
445 // because no documents were marked as relevant.
446 return eset;
449 // Excluding query terms is a no-op without a query.
450 if ((flags & Enquire::INCLUDE_QUERY_TERMS) == 0 && !query.empty()) {
451 auto edft = new ExpandDeciderFilterTerms(query.get_terms_begin(),
452 query.get_terms_end());
453 if (edecider.get() == NULL) {
454 edecider = edft->release();
455 } else {
456 // Make sure ExpandDeciderFilterTerms doesn't leak if new throws.
457 opt_intrusive_ptr<const ExpandDecider> ptr(edft->release());
458 edecider = (new ExpandDeciderAnd(ptr.get(),
459 edecider.get()))->release();
463 bool use_exact_termfreq = flags & Enquire::USE_EXACT_TERMFREQ;
464 if (eweight == Enquire::Internal::EXPAND_BO1) {
465 using Xapian::Internal::Bo1EWeight;
466 Bo1EWeight bo1eweight(db, rset.size(), use_exact_termfreq);
467 eset.internal->expand(maxitems, db, rset, edecider.get(), bo1eweight,
468 min_weight);
469 } else {
470 AssertEq(eweight, Enquire::Internal::EXPAND_TRAD);
471 using Xapian::Internal::TradEWeight;
472 TradEWeight tradeweight(db, rset.size(), use_exact_termfreq, expand_k);
473 eset.internal->expand(maxitems, db, rset, edecider.get(), tradeweight,
474 min_weight);
477 return eset;