2 * @brief Xapian::Enquire class
4 /* Copyright (C) 2009,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "xapian/enquire.h"
24 #include "enquireinternal.h"
26 #include "expand/esetinternal.h"
27 #include "expand/expandweight.h"
28 #include "matcher/matcher.h"
29 #include "msetinternal.h"
30 #include "vectortermlist.h"
31 #include "weight/weightinternal.h"
32 #include "xapian/database.h"
33 #include "xapian/error.h"
34 #include "xapian/expanddecider.h"
35 #include "xapian/intrusive_ptr.h"
36 #include "xapian/keymaker.h"
37 #include "xapian/matchspy.h"
38 #include "xapian/query.h"
39 #include "xapian/rset.h"
40 #include "xapian/weight.h"
50 throw_invalid_arg(const char* msg
) {
51 throw Xapian::InvalidArgumentError(msg
);
56 Enquire::Enquire(const Enquire
& o
) : internal(o
.internal
) {}
59 Enquire::operator=(const Enquire
& o
)
61 internal
= o
.internal
;
65 Enquire::Enquire(const Database
& db
) : internal(new Enquire::Internal(db
)) {}
67 Enquire::~Enquire() {}
70 Enquire::set_query(const Query
& query
, termcount query_length
)
72 internal
->query
= query
;
73 internal
->query_length
= query_length
;
77 Enquire::get_query() const
79 return internal
->query
;
83 Enquire::set_weighting_scheme(const Weight
& weight
)
85 internal
->weight
.reset(weight
.clone());
89 Enquire::set_docid_order(docid_order order
)
91 internal
->order
= order
;
95 Enquire::set_sort_by_relevance()
97 internal
->sort_by
= Internal::REL
;
101 Enquire::set_sort_by_value(valueno sort_key
, bool reverse
)
103 internal
->sort_by
= Internal::VAL
;
104 internal
->sort_functor
= NULL
;
105 internal
->sort_key
= sort_key
;
106 internal
->sort_val_reverse
= reverse
;
110 Enquire::set_sort_by_key(KeyMaker
* sorter
, bool reverse
)
112 if (sorter
== NULL
) {
113 throw_invalid_arg("Enquire::set_sort_by_key(): sorter cannot be NULL");
115 internal
->sort_by
= Internal::VAL
;
116 internal
->sort_functor
= sorter
;
117 internal
->sort_val_reverse
= reverse
;
121 Enquire::set_sort_by_value_then_relevance(valueno sort_key
, bool reverse
)
123 internal
->sort_by
= Internal::VAL_REL
;
124 internal
->sort_functor
= NULL
;
125 internal
->sort_key
= sort_key
;
126 internal
->sort_val_reverse
= reverse
;
130 Enquire::set_sort_by_key_then_relevance(KeyMaker
* sorter
, bool reverse
)
132 if (sorter
== NULL
) {
133 throw_invalid_arg("Enquire::set_sort_by_key_then_relevance(): "
134 "sorter cannot be NULL");
136 internal
->sort_by
= Internal::VAL_REL
;
137 internal
->sort_functor
= sorter
;
138 internal
->sort_val_reverse
= reverse
;
142 Enquire::set_sort_by_relevance_then_value(valueno sort_key
, bool reverse
)
144 internal
->sort_by
= Internal::REL_VAL
;
145 internal
->sort_functor
= NULL
;
146 internal
->sort_key
= sort_key
;
147 internal
->sort_val_reverse
= reverse
;
151 Enquire::set_sort_by_relevance_then_key(KeyMaker
* sorter
, bool reverse
)
153 if (sorter
== NULL
) {
154 throw_invalid_arg("Enquire::set_sort_by_relevance_then_key(): "
155 "sorter cannot be NULL");
157 internal
->sort_by
= Internal::REL_VAL
;
158 internal
->sort_functor
= sorter
;
159 internal
->sort_val_reverse
= reverse
;
163 Enquire::set_collapse_key(valueno collapse_key
, doccount collapse_max
)
165 internal
->collapse_key
= collapse_key
;
166 internal
->collapse_max
= collapse_max
;
170 Enquire::set_cutoff(int percent_threshold
, double weight_threshold
)
172 internal
->percent_threshold
= percent_threshold
;
173 internal
->weight_threshold
= weight_threshold
;
177 Enquire::add_matchspy(MatchSpy
* spy
)
179 using Xapian::Internal::opt_intrusive_ptr
;
181 throw_invalid_arg("Enquire::add_matchspy(): spy cannot be NULL");
182 internal
->matchspies
.push_back(opt_intrusive_ptr
<MatchSpy
>(spy
));
186 Enquire::clear_matchspies()
188 internal
->matchspies
.clear();
192 Enquire::set_time_limit(double time_limit
)
194 internal
->time_limit
= time_limit
;
198 Enquire::get_mset(doccount first
,
200 doccount checkatleast
,
202 const MatchDecider
* mdecider
) const
204 return internal
->get_mset(first
, maxitems
, checkatleast
, rset
, mdecider
);
208 Enquire::get_matching_terms_begin(docid did
) const
210 return internal
->get_matching_terms_begin(did
);
214 Enquire::set_expansion_scheme(const std::string
&eweightname
, double expand_k
) const
216 if (eweightname
== "bo1") {
217 internal
->eweight
= Enquire::Internal::EXPAND_BO1
;
218 } else if (eweightname
== "trad") {
219 internal
->eweight
= Enquire::Internal::EXPAND_TRAD
;
221 throw_invalid_arg("Enquire::set_expansion_scheme(): eweightname must "
222 "be 'bo1' or 'trad'");
224 internal
->expand_k
= expand_k
;
228 Enquire::get_eset(termcount maxitems
,
231 const ExpandDecider
* edecider
,
232 double min_weight
) const
234 return internal
->get_eset(maxitems
, rset
, flags
, edecider
, min_weight
);
238 Enquire::get_description() const
240 string desc
= "Enquire(db=";
241 desc
+= internal
->db
.get_description();
242 if (!internal
->query
.empty()) {
244 desc
+= internal
->query
.get_description();
250 Enquire::Internal::Internal(const Database
& db_
)
254 Enquire::Internal::get_mset(doccount first
,
256 doccount checkatleast
,
258 const MatchDecider
* mdecider
) const
262 mset
.internal
->set_first(first
);
266 if (percent_threshold
&& (sort_by
== VAL
|| sort_by
== VAL_REL
)) {
267 throw Xapian::UnimplementedError("Use of a percentage cutoff while "
268 "sorting primary by value isn't "
269 "currently supported");
272 // Lazily initialise weight to its default if necessary.
274 weight
.reset(new BM25Weight
);
276 // Lazily initialise query_length if it wasn't explicitly specified.
277 if (query_length
== 0) {
278 query_length
= query
.get_length();
281 Xapian::doccount first_orig
= first
;
283 Xapian::doccount docs
= db
.get_doccount();
284 first
= min(first
, docs
);
285 maxitems
= min(maxitems
, docs
- first
);
286 checkatleast
= min(checkatleast
, docs
);
287 checkatleast
= max(checkatleast
, first
+ maxitems
);
290 unique_ptr
<Xapian::Weight::Internal
> stats(new Xapian::Weight::Internal
);
297 (sort_functor
.get() != NULL
),
310 MSet mset
= match
.get_mset(first
,
327 if (first_orig
!= first
&& mset
.internal
.get()) {
328 mset
.internal
->set_first(first_orig
);
331 mset
.internal
->set_enquire(this);
333 if (!mset
.internal
->get_stats()) {
334 mset
.internal
->set_stats(stats
.release());
341 Enquire::Internal::get_matching_terms_begin(docid did
) const
344 return TermIterator();
346 struct term_and_pos
{
350 term_and_pos(const string
& term_
, Xapian::termpos pos_
)
351 : term(term_
), pos(pos_
) {}
354 vector
<term_and_pos
> query_terms
;
355 Xapian::termpos pos
= 1;
356 for (auto t
= query
.get_terms_begin(); t
!= query
.get_terms_end(); ++t
) {
357 query_terms
.emplace_back(*t
, pos
++);
360 if (query_terms
.empty())
361 return TermIterator();
363 // Reorder by term, secondary sort by position.
364 sort(query_terms
.begin(), query_terms
.end(),
365 [](const term_and_pos
& a
, const term_and_pos
& b
) {
366 int cmp
= a
.term
.compare(b
.term
);
367 return cmp
? cmp
< 0 : a
.pos
< b
.pos
;
370 // Loop through the query terms, skipping the document terms for each to
371 // see which match, and shuffling down the matching ones. Also discard
372 // repeats, keeping the smallest position.
374 auto t
= db
.termlist_begin(did
);
376 const string
& term
= query_terms
[i
].term
;
377 if (j
== 0 || term
!= query_terms
[j
- 1].term
) {
379 if (t
== db
.termlist_end(did
)) {
384 // Matched, so move down if necessary.
386 query_terms
[j
] = std::move(query_terms
[i
]);
390 } while (++i
!= query_terms
.size());
392 // Truncate to leave just the matching terms.
393 query_terms
.erase(query_terms
.begin() + j
, query_terms
.end());
395 // Reorder by ascending query position.
396 sort(query_terms
.begin(), query_terms
.end(),
397 [](const term_and_pos
& a
, const term_and_pos
& b
) {
398 return a
.pos
< b
.pos
;
401 // Iterator adaptor to present query_terms as a container of just strings.
403 vector
<term_and_pos
>::const_iterator it
;
406 Itor(vector
<term_and_pos
>::const_iterator it_
) : it(it_
) {}
408 const std::string
& operator*() const {
417 Itor
operator++(int) {
423 bool operator!=(const Itor
& o
) { return it
!= o
.it
; }
426 return TermIterator(new VectorTermList(Itor(query_terms
.cbegin()),
427 Itor(query_terms
.cend())));
431 Enquire::Internal::get_eset(termcount maxitems
,
434 const ExpandDecider
* edecider_
,
435 double min_weight
) const
437 using Xapian::Internal::opt_intrusive_ptr
;
438 opt_intrusive_ptr
<const ExpandDecider
> edecider(edecider_
);
442 if (maxitems
== 0 || rset
.empty()) {
443 // Either we were asked for no results, or wouldn't produce any
444 // because no documents were marked as relevant.
448 // Excluding query terms is a no-op without a query.
449 if ((flags
& Enquire::INCLUDE_QUERY_TERMS
) == 0 && !query
.empty()) {
450 auto edft
= new ExpandDeciderFilterTerms(query
.get_terms_begin(),
451 query
.get_terms_end());
452 if (edecider
.get() == NULL
) {
453 edecider
= edft
->release();
455 // Make sure ExpandDeciderFilterTerms doesn't leak if new throws.
456 opt_intrusive_ptr
<const ExpandDecider
> ptr(edft
->release());
457 edecider
= (new ExpandDeciderAnd(ptr
.get(),
458 edecider
.get()))->release();
462 bool use_exact_termfreq
= flags
& Enquire::USE_EXACT_TERMFREQ
;
463 if (eweight
== Enquire::Internal::EXPAND_BO1
) {
464 using Xapian::Internal::Bo1EWeight
;
465 Bo1EWeight
bo1eweight(db
, rset
.size(), use_exact_termfreq
);
466 eset
.internal
->expand(maxitems
, db
, rset
, edecider
.get(), bo1eweight
,
469 AssertEq(eweight
, Enquire::Internal::EXPAND_TRAD
);
470 using Xapian::Internal::TradEWeight
;
471 TradEWeight
tradeweight(db
, rset
.size(), use_exact_termfreq
, expand_k
);
472 eset
.internal
->expand(maxitems
, db
, rset
, edecider
.get(), tradeweight
,