1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "queryinternal.h"
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
31 #include "leafpostlist.h"
32 #include "matcher/andmaybepostlist.h"
33 #include "matcher/andnotpostlist.h"
34 #include "emptypostlist.h"
35 #include "matcher/exactphrasepostlist.h"
36 #include "matcher/externalpostlist.h"
37 #include "matcher/maxpostlist.h"
38 #include "matcher/multiandpostlist.h"
39 #include "matcher/multixorpostlist.h"
40 #include "matcher/nearpostlist.h"
41 #include "matcher/orpospostlist.h"
42 #include "matcher/orpostlist.h"
43 #include "matcher/phrasepostlist.h"
44 #include "matcher/queryoptimiser.h"
45 #include "matcher/valuerangepostlist.h"
46 #include "matcher/valuegepostlist.h"
47 #include "net/length.h"
48 #include "serialise-double.h"
49 #include "stringutils.h"
55 #include "unicode/description_append.h"
62 #include <unordered_set>
67 template<class CLASS
> struct delete_ptr
{
68 void operator()(CLASS
*p
) const { delete p
; }
71 using Xapian::Internal::AndContext
;
72 using Xapian::Internal::OrContext
;
73 using Xapian::Internal::XorContext
;
79 /** Class providing an operator which sorts postlists to select max or terms.
80 * This returns true if a has a (strictly) greater termweight than b.
82 * Using this comparator will tend to result in multiple calls to
83 * recalc_maxweight() for each of the subqueries (we use it with nth_element
84 * which should be O(n)) - perhaps it'd be better to call recalc_maxweight()
85 * once and then sort on that.
87 struct CmpMaxOrTerms
{
88 /** Return true if and only if a has a strictly greater termweight than b. */
89 bool operator()(PostList
*a
, PostList
*b
) {
90 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
91 defined(__mc68000__) || defined(__mc68010__) || \
92 defined(__mc68020__) || defined(__mc68030__)
93 // On some architectures, most common of which is x86, floating point
94 // values are calculated and stored in registers with excess precision.
95 // If the two recalc_maxweight() calls below return identical values in a
96 // register, the excess precision may be dropped for one of them but
97 // not the other (e.g. because the compiler saves the first calculated
98 // weight to memory while calculating the second, then reloads it to
99 // compare). This leads to both a > b and b > a being true, which
100 // violates the antisymmetry property of the strict weak ordering
101 // required by nth_element(). This can have serious consequences (e.g.
104 // Note that m68k only has excess precision in earlier models - 68040
106 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
108 // To avoid this, we store each result in a volatile double prior to
109 // comparing them. This means that the result of this test should
110 // match that on other architectures with the same double format (which
111 // is desirable), and actually has less overhead than rounding both
112 // results to float (which is another approach which works).
113 volatile double a_max_wt
= a
->recalc_maxweight();
114 volatile double b_max_wt
= b
->recalc_maxweight();
115 return a_max_wt
> b_max_wt
;
117 return a
->recalc_maxweight() > b
->recalc_maxweight();
122 /// Comparison functor which orders PostList* by descending get_termfreq_est().
123 struct ComparePostListTermFreqAscending
{
124 /// Order by descending get_termfreq_est().
125 bool operator()(const PostList
*a
, const PostList
*b
) const {
126 return a
->get_termfreq_est() > b
->get_termfreq_est();
132 QueryOptimiser
* qopt
;
134 vector
<PostList
*> pls
;
137 Context(QueryOptimiser
* qopt_
, size_t reserve
);
141 void add_postlist(PostList
* pl
) {
149 size_t size() const {
153 void shrink(size_t new_size
);
156 Context::Context(QueryOptimiser
* qopt_
, size_t reserve
)
159 pls
.reserve(reserve
);
163 Context::shrink(size_t new_size
)
165 AssertRel(new_size
, <=, pls
.size());
166 if (new_size
>= pls
.size())
169 const PostList
* hint_pl
= qopt
->get_hint_postlist();
170 for (auto&& i
= pls
.begin() + new_size
; i
!= pls
.end(); ++i
) {
171 const PostList
* pl
= *i
;
172 if (rare(pl
== hint_pl
)) {
173 // We were about to delete qopt's hint - instead tell qopt to take
175 qopt
->take_hint_ownership();
181 pls
.resize(new_size
);
189 class OrContext
: public Context
{
191 OrContext(QueryOptimiser
* qopt_
, size_t reserve
)
192 : Context(qopt_
, reserve
) { }
194 /// Select the best set_size postlists from the last out_of added.
195 void select_elite_set(size_t set_size
, size_t out_of
);
197 /// Select the set_size postlists with the highest term frequency.
198 void select_most_frequent(size_t set_size
);
200 PostList
* postlist();
201 PostList
* postlist_max();
205 OrContext::select_elite_set(size_t set_size
, size_t out_of
)
207 vector
<PostList
*>::iterator begin
= pls
.begin() + pls
.size() - out_of
;
208 nth_element(begin
, begin
+ set_size
- 1, pls
.end(), CmpMaxOrTerms());
209 shrink(pls
.size() - out_of
+ set_size
);
213 OrContext::select_most_frequent(size_t set_size
)
215 vector
<PostList
*>::iterator begin
= pls
.begin();
216 nth_element(begin
, begin
+ set_size
- 1, pls
.end(),
217 ComparePostListTermFreqAscending());
222 OrContext::postlist()
224 Assert(!pls
.empty());
226 if (pls
.size() == 1) {
227 PostList
* pl
= pls
[0];
232 // Make postlists into a heap so that the postlist with the greatest term
233 // frequency is at the top of the heap.
234 Heap::make(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
236 // Now build a tree of binary OrPostList objects.
238 // The algorithm used to build the tree is like that used to build an
239 // optimal Huffman coding tree. If we called next() repeatedly, this
240 // arrangement would minimise the number of method calls. Generally we
241 // don't actually do that, but this arrangement is still likely to be a
242 // good one, and it does minimise the work in the worst case.
244 // We build the tree such that at each branch:
246 // l.get_termfreq_est() >= r.get_termfreq_est()
248 // We do this so that the OrPostList class can be optimised assuming
249 // that this is the case.
250 PostList
* r
= pls
.front();
251 Heap::pop(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
254 pl
= new OrPostList(pls
.front(), r
, qopt
->matcher
, qopt
->db_size
);
256 if (pls
.size() == 1) {
262 Heap::replace(pls
.begin(), pls
.end(),
263 ComparePostListTermFreqAscending());
268 OrContext::postlist_max()
270 Assert(!pls
.empty());
272 if (pls
.size() == 1) {
273 PostList
* pl
= pls
[0];
278 // Sort the postlists so that the postlist with the greatest term frequency
280 sort(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
283 pl
= new MaxPostList(pls
.begin(), pls
.end(), qopt
->matcher
, qopt
->db_size
);
289 class XorContext
: public Context
{
291 XorContext(QueryOptimiser
* qopt_
, size_t reserve
)
292 : Context(qopt_
, reserve
) { }
294 PostList
* postlist();
298 XorContext::postlist()
300 Xapian::doccount db_size
= qopt
->db_size
;
302 pl
= new MultiXorPostList(pls
.begin(), pls
.end(), qopt
->matcher
, db_size
);
304 // Empty pls so our destructor doesn't delete them all!
309 class AndContext
: public Context
{
311 Xapian::Query::op op_
;
313 /// Start and end indices for the PostLists this positional filter uses.
316 Xapian::termcount window
;
319 PosFilter(Xapian::Query::op op__
, size_t begin_
, size_t end_
,
320 Xapian::termcount window_
)
321 : op_(op__
), begin(begin_
), end(end_
), window(window_
) { }
323 PostList
* postlist(PostList
* pl
,
324 const vector
<PostList
*>& pls
,
325 PostListTree
* pltree
) const;
328 list
<PosFilter
> pos_filters
;
331 AndContext(QueryOptimiser
* qopt_
, size_t reserve
)
332 : Context(qopt_
, reserve
) { }
334 void add_pos_filter(Query::op op_
,
336 Xapian::termcount window
);
338 PostList
* postlist();
342 AndContext::PosFilter::postlist(PostList
* pl
,
343 const vector
<PostList
*>& pls
,
344 PostListTree
* pltree
) const
346 vector
<PostList
*>::const_iterator terms_begin
= pls
.begin() + begin
;
347 vector
<PostList
*>::const_iterator terms_end
= pls
.begin() + end
;
349 if (op_
== Xapian::Query::OP_NEAR
) {
350 pl
= new NearPostList(pl
, window
, terms_begin
, terms_end
, pltree
);
351 } else if (window
== end
- begin
) {
352 AssertEq(op_
, Xapian::Query::OP_PHRASE
);
353 pl
= new ExactPhrasePostList(pl
, terms_begin
, terms_end
, pltree
);
355 AssertEq(op_
, Xapian::Query::OP_PHRASE
);
356 pl
= new PhrasePostList(pl
, window
, terms_begin
, terms_end
, pltree
);
365 AndContext::add_pos_filter(Query::op op_
,
367 Xapian::termcount window
)
370 size_t end
= pls
.size();
371 size_t begin
= end
- n_subqs
;
372 pos_filters
.push_back(PosFilter(op_
, begin
, end
, window
));
376 AndContext::postlist()
379 // This case only happens if this sub-database has no positional data
380 // (but another sub-database does).
381 Assert(pos_filters
.empty());
382 return new EmptyPostList
;
385 unique_ptr
<PostList
> pl(new MultiAndPostList(pls
.begin(), pls
.end(),
386 qopt
->matcher
, qopt
->db_size
));
388 // Sort the positional filters to try to apply them in an efficient order.
389 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
392 // Apply any positional filters.
393 list
<PosFilter
>::const_iterator i
;
394 for (i
= pos_filters
.begin(); i
!= pos_filters
.end(); ++i
) {
395 const PosFilter
& filter
= *i
;
396 pl
.reset(filter
.postlist(pl
.release(), pls
, qopt
->matcher
));
399 // Empty pls so our destructor doesn't delete them all!
406 Query::Internal::~Internal() { }
409 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
415 Query::Internal::get_subquery(size_t) const
417 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
421 Query::Internal::get_wqf() const
423 throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
427 Query::Internal::get_pos() const
429 throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
433 Query::Internal::gather_terms(void *) const
438 Query::Internal::get_length() const XAPIAN_NOEXCEPT
444 Query::Internal::unserialise(const char ** p
, const char * end
,
445 const Registry
& reg
)
449 unsigned char ch
= *(*p
)++;
451 case 4: case 5: case 6: case 7: {
455 // nnn -> n_subqs (0 means encoded value follows)
456 // cccc -> code (which OP_XXX)
457 size_t n_subqs
= ch
& 0x07;
459 decode_length(p
, end
, n_subqs
);
462 unsigned char code
= (ch
>> 3) & 0x0f;
463 Xapian::termcount parameter
= 0;
465 decode_length(p
, end
, parameter
);
466 Xapian::Internal::QueryBranch
* result
;
469 result
= new Xapian::Internal::QueryAnd(n_subqs
);
472 result
= new Xapian::Internal::QueryOr(n_subqs
);
474 case 2: // OP_AND_NOT
475 result
= new Xapian::Internal::QueryAndNot(n_subqs
);
478 result
= new Xapian::Internal::QueryXor(n_subqs
);
480 case 4: // OP_AND_MAYBE
481 result
= new Xapian::Internal::QueryAndMaybe(n_subqs
);
484 result
= new Xapian::Internal::QueryFilter(n_subqs
);
486 case 6: // OP_SYNONYM
487 result
= new Xapian::Internal::QuerySynonym(n_subqs
);
490 result
= new Xapian::Internal::QueryMax(n_subqs
);
492 case 13: // OP_ELITE_SET
493 result
= new Xapian::Internal::QueryEliteSet(n_subqs
,
497 result
= new Xapian::Internal::QueryNear(n_subqs
,
500 case 15: // OP_PHRASE
501 result
= new Xapian::Internal::QueryPhrase(n_subqs
,
505 // 8 to 12 are currently unused.
506 throw SerialisationError("Unknown multi-way branch Query operator");
509 result
->add_subquery(Xapian::Query(unserialise(p
, end
, reg
)));
514 case 2: case 3: { // Term
518 // LLLL -> length (0 means encoded value follows)
520 // 0: wqf = 0; pos = 0
521 // 1: wqf = 1; pos = 0
522 // 2: wqf = 1; pos -> encoded value follows
523 // 3: wqf -> encoded value follows; pos -> encoded value follows
524 size_t len
= ch
& 0x0f;
526 decode_length(p
, end
, len
);
529 if (size_t(end
- *p
) < len
)
530 throw SerialisationError("Not enough data");
531 string
term(*p
, len
);
534 int code
= ((ch
>> 4) & 0x03);
536 Xapian::termcount wqf
= static_cast<Xapian::termcount
>(code
> 0);
538 decode_length(p
, end
, wqf
);
540 Xapian::termpos pos
= 0;
542 decode_length(p
, end
, pos
);
544 return new Xapian::Internal::QueryTerm(term
, wqf
, pos
);
547 // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
550 // ssss -> slot number (15 means encoded value follows)
552 // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
554 Xapian::valueno slot
= ch
& 15;
556 decode_length(p
, end
, slot
);
560 decode_length_and_check(p
, end
, len
);
561 string
begin(*p
, len
);
565 return new Xapian::Internal::QueryValueGE(slot
, begin
);
569 decode_length_and_check(p
, end
, len
);
570 string
end_(*p
, len
);
572 if (begin
.empty()) // FIXME: is this right?
573 return new Xapian::Internal::QueryValueLE(slot
, end_
);
574 return new Xapian::Internal::QueryValueRange(slot
, begin
, end_
);
580 // ttttt -> encodes which OP_XXX
582 case 0x00: // OP_INVALID
583 return new Xapian::Internal::QueryInvalid();
584 case 0x0b: { // Wildcard
586 throw SerialisationError("not enough data");
587 Xapian::termcount max_expansion
;
588 decode_length(p
, end
, max_expansion
);
590 throw SerialisationError("not enough data");
591 int max_type
= static_cast<unsigned char>(*(*p
)++);
592 op combiner
= static_cast<op
>(*(*p
)++);
594 decode_length_and_check(p
, end
, len
);
595 string
pattern(*p
, len
);
597 return new Xapian::Internal::QueryWildcard(pattern
,
602 case 0x0c: { // PostingSource
604 decode_length_and_check(p
, end
, len
);
605 string
name(*p
, len
);
608 const PostingSource
* reg_source
= reg
.get_posting_source(name
);
610 string m
= "PostingSource ";
612 m
+= " not registered";
613 throw SerialisationError(m
);
616 decode_length_and_check(p
, end
, len
);
617 PostingSource
* source
=
618 reg_source
->unserialise_with_registry(string(*p
, len
),
621 return new Xapian::Internal::QueryPostingSource(source
->release());
624 using Xapian::Internal::QueryScaleWeight
;
625 double scale_factor
= unserialise_double(p
, end
);
626 return new QueryScaleWeight(scale_factor
,
627 Query(unserialise(p
, end
, reg
)));
630 Xapian::termcount wqf
;
632 decode_length(p
, end
, wqf
);
633 decode_length(p
, end
, pos
);
634 return new Xapian::Internal::QueryTerm(string(), wqf
, pos
);
637 return new Xapian::Internal::QueryTerm();
638 default: // Others currently unused.
644 string msg
= "Unknown Query serialisation: ";
646 throw SerialisationError(msg
);
650 Query::Internal::postlist_sub_and_like(AndContext
& ctx
,
651 QueryOptimiser
* qopt
,
654 ctx
.add_postlist(postlist(qopt
, factor
));
658 Query::Internal::postlist_sub_or_like(OrContext
& ctx
,
659 QueryOptimiser
* qopt
,
662 ctx
.add_postlist(postlist(qopt
, factor
));
666 Query::Internal::postlist_sub_xor(XorContext
& ctx
,
667 QueryOptimiser
* qopt
,
670 ctx
.add_postlist(postlist(qopt
, factor
));
676 QueryTerm::get_type() const XAPIAN_NOEXCEPT
678 return term
.empty() ? Query::LEAF_MATCH_ALL
: Query::LEAF_TERM
;
682 QueryTerm::get_description() const
686 desc
= "<alldocuments>";
688 description_append(desc
, term
);
701 QueryPostingSource::QueryPostingSource(PostingSource
* source_
)
705 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
706 if (source
->_refs
== 0) {
707 // source_ isn't reference counted, so try to clone it. If clone()
708 // isn't implemented, just use the object provided and it's the
709 // caller's responsibility to ensure it stays valid while in use.
710 PostingSource
* cloned_source
= source
->clone();
711 if (cloned_source
) source
= cloned_source
->release();
716 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
718 return Query::LEAF_POSTING_SOURCE
;
722 QueryPostingSource::get_description() const
724 string desc
= "PostingSource(";
725 desc
+= source
->get_description();
730 QueryScaleWeight::QueryScaleWeight(double factor
, const Query
& subquery_
)
731 : scale_factor(factor
), subquery(subquery_
)
733 if (rare(scale_factor
< 0.0))
734 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
738 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
740 return Query::OP_SCALE_WEIGHT
;
744 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
750 QueryScaleWeight::get_subquery(size_t) const
756 QueryScaleWeight::get_description() const
758 Assert(subquery
.internal
.get());
759 string desc
= str(scale_factor
);
761 desc
+= subquery
.internal
->get_description();
766 QueryTerm::postlist(QueryOptimiser
* qopt
, double factor
) const
768 LOGCALL(QUERY
, PostList
*, "QueryTerm::postlist", qopt
| factor
);
770 qopt
->inc_total_subqs();
771 RETURN(qopt
->open_post_list(term
, wqf
, factor
));
775 QueryPostingSource::postlist(QueryOptimiser
* qopt
, double factor
) const
777 LOGCALL(QUERY
, PostList
*, "QueryPostingSource::postlist", qopt
| factor
);
778 Assert(source
.get());
780 qopt
->inc_total_subqs();
781 // Casting away const on the Database::Internal here is OK, as we wrap
782 // them in a const Xapian::Database so non-const methods can't actually
783 // be called on the Database::Internal object.
784 const Xapian::Database
wrappeddb(
785 const_cast<Xapian::Database::Internal
*>(&(qopt
->db
)));
786 RETURN(new ExternalPostList(wrappeddb
, source
.get(), factor
, qopt
->matcher
));
790 QueryScaleWeight::postlist(QueryOptimiser
* qopt
, double factor
) const
792 LOGCALL(QUERY
, PostList
*, "QueryScaleWeight::postlist", qopt
| factor
);
793 RETURN(subquery
.internal
->postlist(qopt
, factor
* scale_factor
));
797 QueryTerm::gather_terms(void * void_terms
) const
799 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
801 vector
<pair
<Xapian::termpos
, string
>> &terms
=
802 *static_cast<vector
<pair
<Xapian::termpos
, string
>>*>(void_terms
);
803 terms
.push_back(make_pair(pos
, term
));
808 QueryValueRange::postlist(QueryOptimiser
*qopt
, double factor
) const
810 LOGCALL(QUERY
, PostList
*, "QueryValueRange::postlist", qopt
| factor
);
812 qopt
->inc_total_subqs();
813 const Xapian::Database::Internal
& db
= qopt
->db
;
814 const string
& lb
= db
.get_value_lower_bound(slot
);
816 // This should only happen if there are no values in this slot (which
817 // could be because the backend just doesn't support values at all).
818 // If there were values in the slot, the backend should have a
819 // non-empty lower bound, even if it isn't a tight one.
820 AssertEq(db
.get_value_freq(slot
), 0);
821 RETURN(new EmptyPostList
);
824 RETURN(new EmptyPostList
);
826 const string
& ub
= db
.get_value_upper_bound(slot
);
828 RETURN(new EmptyPostList
);
831 // If begin <= lb too, then the range check isn't needed, but we do
832 // still need to consider which documents have a value set in this
833 // slot. If this value is set for all documents, we can replace it
834 // with the MatchAll postlist, which is especially efficient if
835 // there are no gaps in the docids.
836 if (begin
<= lb
&& db
.get_value_freq(slot
) == db
.get_doccount()) {
837 RETURN(db
.open_post_list(string()));
839 RETURN(new ValueGePostList(&db
, slot
, begin
));
841 RETURN(new ValueRangePostList(&db
, slot
, begin
, end
));
845 QueryValueRange::serialise(string
& result
) const
848 result
+= static_cast<char>(0x20 | slot
);
850 result
+= static_cast<char>(0x20 | 15);
851 result
+= encode_length(slot
- 15);
853 result
+= encode_length(begin
.size());
855 result
+= encode_length(end
.size());
860 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
862 return Query::OP_VALUE_RANGE
;
866 QueryValueRange::get_description() const
868 string desc
= "VALUE_RANGE ";
871 description_append(desc
, begin
);
873 description_append(desc
, end
);
878 QueryValueLE::postlist(QueryOptimiser
*qopt
, double factor
) const
880 LOGCALL(QUERY
, PostList
*, "QueryValueLE::postlist", qopt
| factor
);
882 qopt
->inc_total_subqs();
883 const Xapian::Database::Internal
& db
= qopt
->db
;
884 const string
& lb
= db
.get_value_lower_bound(slot
);
885 // If lb.empty(), the backend doesn't provide value bounds.
888 RETURN(new EmptyPostList
);
890 if (limit
>= db
.get_value_upper_bound(slot
)) {
891 // The range check isn't needed, but we do still need to consider
892 // which documents have a value set in this slot. If this value is
893 // set for all documents, we can replace it with the MatchAll
894 // postlist, which is especially efficient if there are no gaps in
896 if (db
.get_value_freq(slot
) == db
.get_doccount()) {
897 RETURN(db
.open_post_list(string()));
901 RETURN(new ValueRangePostList(&db
, slot
, string(), limit
));
905 QueryValueLE::serialise(string
& result
) const
907 // Encode as a range with an empty start (which only takes a single byte to
910 result
+= static_cast<char>(0x20 | slot
);
912 result
+= static_cast<char>(0x20 | 15);
913 result
+= encode_length(slot
- 15);
915 result
+= encode_length(0);
916 result
+= encode_length(limit
.size());
921 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
923 return Query::OP_VALUE_LE
;
927 QueryValueLE::get_description() const
929 string desc
= "VALUE_LE ";
932 description_append(desc
, limit
);
937 QueryValueGE::postlist(QueryOptimiser
*qopt
, double factor
) const
939 LOGCALL(QUERY
, PostList
*, "QueryValueGE::postlist", qopt
| factor
);
941 qopt
->inc_total_subqs();
942 const Xapian::Database::Internal
& db
= qopt
->db
;
943 const string
& lb
= db
.get_value_lower_bound(slot
);
944 // If lb.empty(), the backend doesn't provide value bounds.
946 if (limit
> db
.get_value_upper_bound(slot
)) {
947 RETURN(new EmptyPostList
);
950 // The range check isn't needed, but we do still need to consider
951 // which documents have a value set in this slot. If this value is
952 // set for all documents, we can replace it with the MatchAll
953 // postlist, which is especially efficient if there are no gaps in
955 if (db
.get_value_freq(slot
) == db
.get_doccount()) {
956 RETURN(db
.open_post_list(string()));
960 RETURN(new ValueGePostList(&db
, slot
, limit
));
964 QueryValueGE::serialise(string
& result
) const
967 result
+= static_cast<char>(0x20 | 0x10 | slot
);
969 result
+= static_cast<char>(0x20 | 0x10 | 15);
970 result
+= encode_length(slot
- 15);
972 result
+= encode_length(limit
.size());
977 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
979 return Query::OP_VALUE_GE
;
983 QueryValueGE::get_description() const
985 string desc
= "VALUE_GE ";
988 description_append(desc
, limit
);
993 QueryWildcard::postlist(QueryOptimiser
* qopt
, double factor
) const
995 LOGCALL(QUERY
, PostList
*, "QueryWildcard::postlist", qopt
| factor
);
996 Query::op op
= combiner
;
997 double or_factor
= 0.0;
999 // If we have a factor of 0, we don't care about the weights, so
1000 // we're just like a normal OR query.
1002 } else if (op
!= Query::OP_SYNONYM
) {
1006 bool old_in_synonym
= qopt
->in_synonym
;
1007 if (!old_in_synonym
) {
1008 qopt
->in_synonym
= (op
== Query::OP_SYNONYM
);
1011 OrContext
ctx(qopt
, 0);
1012 unique_ptr
<TermList
> t(qopt
->db
.open_allterms(pattern
));
1013 Xapian::termcount expansions_left
= max_expansion
;
1014 // If there's no expansion limit, set expansions_left to the maximum
1015 // value Xapian::termcount can hold.
1016 if (expansions_left
== 0)
1022 if (max_type
< Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT
) {
1023 if (expansions_left
-- == 0) {
1024 if (max_type
== Xapian::Query::WILDCARD_LIMIT_FIRST
)
1026 string
msg("Wildcard ");
1028 msg
+= "* expands to more than ";
1029 msg
+= str(max_expansion
);
1031 throw Xapian::WildcardError(msg
);
1034 const string
& term
= t
->get_termname();
1035 ctx
.add_postlist(qopt
->open_lazy_post_list(term
, 1, or_factor
));
1038 if (max_type
== Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT
) {
1039 // FIXME: open_lazy_post_list() results in the term getting registered
1040 // for stats, so we still incur an avoidable cost from the full
1041 // expansion size of the wildcard, which is most likely to be visible
1042 // with the remote backend. Perhaps we should split creating the lazy
1043 // postlist from registering the term for stats.
1044 if (ctx
.size() > max_expansion
)
1045 ctx
.select_most_frequent(max_expansion
);
1048 if (factor
!= 0.0) {
1049 if (op
!= Query::OP_SYNONYM
) {
1050 qopt
->set_total_subqs(qopt
->get_total_subqs() + ctx
.size());
1052 qopt
->inc_total_subqs();
1056 qopt
->in_synonym
= old_in_synonym
;
1059 RETURN(new EmptyPostList
);
1061 if (op
== Query::OP_MAX
)
1062 RETURN(ctx
.postlist_max());
1064 PostList
* pl
= ctx
.postlist();
1065 if (op
== Query::OP_OR
)
1068 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1069 // SynonymPostList, which supplies the weights.
1071 // We know the subqueries from a wildcard expansion are wdf-disjoint
1072 // (i.e. each wdf from the document contributes at most itself to the
1073 // wdf of the subquery).
1074 RETURN(qopt
->make_synonym_postlist(pl
, factor
, true));
1078 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1080 // We currently assume wqf is 1 for calculating the synonym's weight
1081 // since conceptually the synonym is one "virtual" term. If we were
1082 // to combine multiple occurrences of the same synonym expansion into
1083 // a single instance with wqf set, we would want to track the wqf.
1088 QueryWildcard::serialise(string
& result
) const
1090 result
+= static_cast<char>(0x0b);
1091 result
+= encode_length(max_expansion
);
1092 result
+= static_cast<unsigned char>(max_type
);
1093 result
+= static_cast<unsigned char>(combiner
);
1094 result
+= encode_length(pattern
.size());
1099 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1101 return Query::OP_WILDCARD
;
1105 QueryWildcard::get_description() const
1107 string desc
= "WILDCARD ";
1109 case Query::OP_SYNONYM
:
1122 description_append(desc
, pattern
);
1127 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1129 // Sum results from all subqueries.
1130 Xapian::termcount result
= 0;
1131 QueryVector::const_iterator i
;
1132 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1133 // MatchNothing subqueries should have been removed by done(), but we
1134 // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1136 result
+= (*i
).internal
->get_length();
1141 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1142 #define MISC(X) static_cast<unsigned char>(X)
1144 QueryBranch::serialise_(string
& result
, Xapian::termcount parameter
) const
1146 static const unsigned char first_byte
[] = {
1147 MULTIWAY(0), // OP_AND
1148 MULTIWAY(1), // OP_OR
1149 MULTIWAY(2), // OP_AND_NOT
1150 MULTIWAY(3), // OP_XOR
1151 MULTIWAY(4), // OP_AND_MAYBE
1152 MULTIWAY(5), // OP_FILTER
1153 MULTIWAY(14), // OP_NEAR
1154 MULTIWAY(15), // OP_PHRASE
1155 0, // OP_VALUE_RANGE
1156 MISC(3), // OP_SCALE_WEIGHT
1157 MULTIWAY(13), // OP_ELITE_SET
1160 MULTIWAY(6), // OP_SYNONYM
1161 MULTIWAY(7) // OP_MAX
1163 Xapian::Query::op op_
= get_op();
1164 AssertRel(size_t(op_
),<,sizeof(first_byte
));
1165 unsigned char ch
= first_byte
[op_
];
1167 // Multi-way operator.
1168 if (subqueries
.size() < 8)
1169 ch
|= subqueries
.size();
1171 if (subqueries
.size() >= 8)
1172 result
+= encode_length(subqueries
.size() - 8);
1173 if (ch
>= MULTIWAY(13))
1174 result
+= encode_length(parameter
);
1179 QueryVector::const_iterator i
;
1180 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1181 // MatchNothing subqueries should have been removed by done().
1182 Assert((*i
).internal
.get());
1183 (*i
).internal
->serialise(result
);
1186 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1187 // appended next by an overloaded serialise() method in the subclass.
1191 QueryBranch::serialise(string
& result
) const
1193 QueryBranch::serialise_(result
);
1197 QueryNear::serialise(string
& result
) const
1199 // FIXME: window - subqueries.size() ?
1200 QueryBranch::serialise_(result
, window
);
1204 QueryPhrase::serialise(string
& result
) const
1206 // FIXME: window - subqueries.size() ?
1207 QueryBranch::serialise_(result
, window
);
1211 QueryEliteSet::serialise(string
& result
) const
1213 // FIXME: set_size - subqueries.size() ?
1214 QueryBranch::serialise_(result
, set_size
);
1218 QueryBranch::gather_terms(void * void_terms
) const
1220 // Gather results from all subqueries.
1221 QueryVector::const_iterator i
;
1222 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1223 // MatchNothing subqueries should have been removed by done().
1224 Assert((*i
).internal
.get());
1225 (*i
).internal
->gather_terms(void_terms
);
1230 QueryBranch::do_or_like(OrContext
& ctx
, QueryOptimiser
* qopt
, double factor
,
1231 Xapian::termcount elite_set_size
, size_t first
) const
1233 LOGCALL_VOID(MATCH
, "QueryBranch::do_or_like", ctx
| qopt
| factor
| elite_set_size
);
1235 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1236 // for AND-like operations.
1238 // OP_SYNONYM with a single subquery is only simplified by
1239 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1240 Assert(subqueries
.size() >= 2 || get_op() == Query::OP_SYNONYM
);
1242 vector
<PostList
*> postlists
;
1243 postlists
.reserve(subqueries
.size() - first
);
1245 QueryVector::const_iterator q
;
1246 for (q
= subqueries
.begin() + first
; q
!= subqueries
.end(); ++q
) {
1247 // MatchNothing subqueries should have been removed by done().
1248 Assert((*q
).internal
.get());
1249 (*q
).internal
->postlist_sub_or_like(ctx
, qopt
, factor
);
1252 if (elite_set_size
&& elite_set_size
< subqueries
.size()) {
1253 ctx
.select_elite_set(elite_set_size
, subqueries
.size());
1254 // FIXME: not right!
1259 QueryBranch::do_synonym(QueryOptimiser
* qopt
, double factor
) const
1261 LOGCALL(MATCH
, PostList
*, "QueryBranch::do_synonym", qopt
| factor
);
1262 OrContext
ctx(qopt
, subqueries
.size());
1263 if (factor
== 0.0) {
1264 // If we have a factor of 0, we don't care about the weights, so
1265 // we're just like a normal OR query.
1266 do_or_like(ctx
, qopt
, 0.0);
1267 return ctx
.postlist();
1270 bool old_in_synonym
= qopt
->in_synonym
;
1271 qopt
->in_synonym
= true;
1272 do_or_like(ctx
, qopt
, 0.0);
1273 PostList
* pl
= ctx
.postlist();
1274 qopt
->in_synonym
= old_in_synonym
;
1276 bool wdf_disjoint
= false;
1277 Assert(!subqueries
.empty());
1278 auto type
= subqueries
.front().get_type();
1279 if (type
== Query::OP_WILDCARD
) {
1280 // Detect common easy case where all subqueries are OP_WILDCARD whose
1281 // constant prefixes form a prefix-free set.
1282 wdf_disjoint
= true;
1283 vector
<string
> prefixes
;
1284 for (auto&& q
: subqueries
) {
1285 if (q
.get_type() != Query::OP_WILDCARD
) {
1286 wdf_disjoint
= false;
1289 auto qw
= static_cast<const QueryWildcard
*>(q
.internal
.get());
1290 prefixes
.push_back(qw
->get_pattern());
1294 sort(prefixes
.begin(), prefixes
.end());
1295 const string
* prev
= nullptr;
1296 for (const auto& i
: prefixes
) {
1298 if (startswith(i
, *prev
)) {
1299 wdf_disjoint
= false;
1306 } else if (type
== Query::LEAF_TERM
) {
1307 // Detect common easy case where all subqueries are terms, none of
1308 // which are the same.
1309 wdf_disjoint
= true;
1310 unordered_set
<string
> terms
;
1311 for (auto&& q
: subqueries
) {
1312 if (q
.get_type() != Query::LEAF_TERM
) {
1313 wdf_disjoint
= false;
1316 auto qt
= static_cast<const QueryTerm
*>(q
.internal
.get());
1317 if (!terms
.insert(qt
->get_term()).second
) {
1318 wdf_disjoint
= false;
1324 // We currently assume wqf is 1 for calculating the synonym's weight
1325 // since conceptually the synonym is one "virtual" term. If we were
1326 // to combine multiple occurrences of the same synonym expansion into
1327 // a single instance with wqf set, we would want to track the wqf.
1329 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1330 // SynonymPostList, which supplies the weights.
1331 RETURN(qopt
->make_synonym_postlist(pl
, factor
, wdf_disjoint
));
1335 QueryBranch::do_max(QueryOptimiser
* qopt
, double factor
) const
1337 LOGCALL(MATCH
, PostList
*, "QueryBranch::do_max", qopt
| factor
);
1338 OrContext
ctx(qopt
, subqueries
.size());
1339 do_or_like(ctx
, qopt
, factor
);
1340 if (factor
== 0.0) {
1341 // If we have a factor of 0, we don't care about the weights, so
1342 // we're just like a normal OR query.
1343 RETURN(ctx
.postlist());
1346 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1347 // since conceptually the OP_MAX is one "virtual" term. If we were
1348 // to combine multiple occurrences of the same OP_MAX expansion into
1349 // a single instance with wqf set, we would want to track the wqf.
1350 RETURN(ctx
.postlist_max());
1354 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1360 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1362 return subqueries
.size();
1366 QueryBranch::get_subquery(size_t n
) const
1368 return subqueries
[n
];
1372 QueryBranch::get_description_helper(const char * op
,
1373 Xapian::termcount parameter
) const
1376 QueryVector::const_iterator i
;
1377 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1378 if (desc
.size() > 1) {
1381 desc
+= str(parameter
);
1385 Assert((*i
).internal
.get());
1386 // MatchNothing subqueries should have been removed by done(), and we
1387 // shouldn't get called before done() is, since that happens at the
1388 // end of the Xapian::Query constructor.
1389 desc
+= (*i
).internal
->get_description();
1396 QueryWindowed::done()
1398 // If window size not specified, default it.
1400 window
= subqueries
.size();
1401 return QueryAndLike::done();
1405 QueryScaleWeight::gather_terms(void * void_terms
) const
1407 subquery
.internal
->gather_terms(void_terms
);
1410 void QueryTerm::serialise(string
& result
) const
1412 size_t len
= term
.size();
1414 if (wqf
== 1 && pos
== 0) {
1418 // Weird mutant versions of MatchAll
1420 result
+= encode_length(wqf
);
1421 result
+= encode_length(pos
);
1423 } else if (wqf
== 1) {
1425 // Single occurrence free-text term without position set.
1427 result
+= static_cast<char>(0x40 | 0x10);
1428 result
+= encode_length(term
.size() - 16);
1430 result
+= static_cast<char>(0x40 | 0x10 | len
);
1434 // Single occurrence free-text term with position set.
1436 result
+= static_cast<char>(0x40 | 0x20);
1437 result
+= encode_length(term
.size() - 16);
1439 result
+= static_cast<char>(0x40 | 0x20 | len
);
1442 result
+= encode_length(pos
);
1444 } else if (wqf
> 1 || pos
> 0) {
1447 result
+= static_cast<char>(0x40 | 0x30);
1448 result
+= encode_length(term
.size() - 16);
1450 result
+= static_cast<char>(0x40 | 0x30 | len
);
1453 result
+= encode_length(wqf
);
1454 result
+= encode_length(pos
);
1456 // Typical boolean term.
1460 result
+= static_cast<char>(0x40);
1461 result
+= encode_length(term
.size() - 16);
1463 result
+= static_cast<char>(0x40 | len
);
1469 void QueryPostingSource::serialise(string
& result
) const
1471 result
+= static_cast<char>(0x0c);
1473 const string
& n
= source
->name();
1474 result
+= encode_length(n
.size());
1477 const string
& s
= source
->serialise();
1478 result
+= encode_length(s
.size());
1482 void QueryScaleWeight::serialise(string
& result
) const
1484 Assert(subquery
.internal
.get());
1485 const string
& s
= serialise_double(scale_factor
);
1488 subquery
.internal
->serialise(result
);
1491 struct is_matchnothing
{
1492 bool operator()(const Xapian::Query
& q
) const {
1493 return q
.internal
.get() == NULL
;
1498 QueryAndLike::add_subquery(const Xapian::Query
& subquery
)
1500 // If the AndLike is already MatchNothing, do nothing.
1501 if (subqueries
.size() == 1 && subqueries
[0].internal
.get() == NULL
)
1503 // If we're adding MatchNothing, discard any previous subqueries.
1504 if (subquery
.internal
.get() == NULL
)
1506 subqueries
.push_back(subquery
);
1510 QueryAndLike::done()
1512 // Empty AndLike gives MatchNothing.
1513 if (subqueries
.empty())
1515 // We handle any subquery being MatchNothing in add_subquery() by leaving
1516 // a single MatchNothing subquery, and so this check results in AndLike
1517 // giving MatchNothing.
1518 if (subqueries
.size() == 1)
1519 return subqueries
[0].internal
.get();
1524 QueryAndLike::postlist(QueryOptimiser
* qopt
, double factor
) const
1526 LOGCALL(QUERY
, PostList
*, "QueryAndLike::postlist", qopt
| factor
);
1527 AndContext
ctx(qopt
, subqueries
.size());
1528 postlist_sub_and_like(ctx
, qopt
, factor
);
1529 RETURN(ctx
.postlist());
1533 QueryAndLike::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1535 QueryVector::const_iterator i
;
1536 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1537 // MatchNothing subqueries should have been removed by done().
1538 Assert((*i
).internal
.get());
1539 (*i
).internal
->postlist_sub_and_like(ctx
, qopt
, factor
);
1544 QueryOrLike::add_subquery(const Xapian::Query
& subquery
)
1546 // Drop any subqueries which are MatchNothing.
1547 if (subquery
.internal
.get() != NULL
)
1548 subqueries
.push_back(subquery
);
1554 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1555 // subqueries which are MatchNothing.
1556 if (subqueries
.empty())
1558 if (subqueries
.size() == 1)
1559 return subqueries
[0].internal
.get();
1564 QueryAndNot::add_subquery(const Xapian::Query
& subquery
)
1566 if (!subqueries
.empty()) {
1567 // We're adding the 2nd or subsequent subquery, so this subquery is
1569 if (subqueries
[0].internal
.get() == NULL
) {
1570 // The left side is already MatchNothing so drop any right side.
1572 // MatchNothing AND_NOT X == MatchNothing
1575 if (subquery
.internal
.get() == NULL
) {
1576 // Drop MatchNothing on the right of AndNot.
1578 // X AND_NOT MatchNothing == X
1581 if (subquery
.get_type() == subquery
.OP_SCALE_WEIGHT
) {
1582 // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1583 // AndNot as no weight is taken from them.
1584 subqueries
.push_back(subquery
.get_subquery(0));
1585 // The Query constructor for OP_SCALE_WEIGHT constructor should
1586 // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1587 Assert(subquery
.get_subquery(0).internal
.get() != NULL
);
1591 subqueries
.push_back(subquery
);
1597 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1598 // that leaves just the left subquery, return that.
1600 // If left subquery is MatchNothing, then add_subquery() discards all right
1601 // subqueries, so this check also gives MatchNothing for this case.
1602 if (subqueries
.size() == 1)
1603 return subqueries
[0].internal
.get();
1608 QueryAndMaybe::add_subquery(const Xapian::Query
& subquery
)
1610 // If the left side of AndMaybe is already MatchNothing, do nothing.
1611 if (subqueries
.size() == 1 && subqueries
[0].internal
.get() == NULL
)
1613 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1614 if (subquery
.internal
.get() != NULL
|| subqueries
.empty())
1615 subqueries
.push_back(subquery
);
1619 QueryAndMaybe::done()
1621 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1622 // that leaves just the left subquery, return that.
1624 // If left subquery is MatchNothing, then add_subquery() discards all right
1625 // subqueries, so this check also gives MatchNothing for this case.
1626 if (subqueries
.size() == 1)
1627 return subqueries
[0].internal
.get();
1632 QueryOr::postlist(QueryOptimiser
* qopt
, double factor
) const
1634 LOGCALL(QUERY
, PostList
*, "QueryOr::postlist", qopt
| factor
);
1635 OrContext
ctx(qopt
, subqueries
.size());
1636 do_or_like(ctx
, qopt
, factor
);
1637 RETURN(ctx
.postlist());
1641 QueryOr::postlist_sub_or_like(OrContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1643 do_or_like(ctx
, qopt
, factor
);
1647 QueryAndNot::postlist(QueryOptimiser
* qopt
, double factor
) const
1649 LOGCALL(QUERY
, PostList
*, "QueryAndNot::postlist", qopt
| factor
);
1650 // FIXME: Combine and-like side with and-like stuff above.
1651 unique_ptr
<PostList
> l(subqueries
[0].internal
->postlist(qopt
, factor
));
1652 OrContext
ctx(qopt
, subqueries
.size() - 1);
1653 do_or_like(ctx
, qopt
, 0.0, 0, 1);
1654 unique_ptr
<PostList
> r(ctx
.postlist());
1655 RETURN(new AndNotPostList(l
.release(), r
.release(),
1656 qopt
->matcher
, qopt
->db_size
));
1660 QueryXor::postlist(QueryOptimiser
* qopt
, double factor
) const
1662 LOGCALL(QUERY
, PostList
*, "QueryXor::postlist", qopt
| factor
);
1663 XorContext
ctx(qopt
, subqueries
.size());
1664 postlist_sub_xor(ctx
, qopt
, factor
);
1665 RETURN(ctx
.postlist());
1669 QueryXor::postlist_sub_xor(XorContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1671 QueryVector::const_iterator i
;
1672 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1673 // MatchNothing subqueries should have been removed by done().
1674 Assert((*i
).internal
.get());
1675 (*i
).internal
->postlist_sub_xor(ctx
, qopt
, factor
);
1680 QueryAndMaybe::postlist(QueryOptimiser
* qopt
, double factor
) const
1682 LOGCALL(QUERY
, PostList
*, "QueryAndMaybe::postlist", qopt
| factor
);
1683 // FIXME: Combine and-like side with and-like stuff above.
1684 unique_ptr
<PostList
> l(subqueries
[0].internal
->postlist(qopt
, factor
));
1685 if (factor
== 0.0) {
1686 // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1687 RETURN(l
.release());
1689 OrContext
ctx(qopt
, subqueries
.size() - 1);
1690 do_or_like(ctx
, qopt
, factor
, 0, 1);
1691 unique_ptr
<PostList
> r(ctx
.postlist());
1692 RETURN(new AndMaybePostList(l
.release(), r
.release(),
1693 qopt
->matcher
, qopt
->db_size
));
1697 QueryFilter::postlist(QueryOptimiser
* qopt
, double factor
) const
1699 LOGCALL(QUERY
, PostList
*, "QueryFilter::postlist", qopt
| factor
);
1700 // FIXME: Combine and-like stuff, like QueryOptimiser.
1701 AssertEq(subqueries
.size(), 2);
1703 unique_ptr
<PostList
> l(subqueries
[0].internal
->postlist(qopt
, factor
));
1704 pls
[1] = subqueries
[1].internal
->postlist(qopt
, 0.0);
1705 pls
[0] = l
.release();
1706 RETURN(new MultiAndPostList(pls
, pls
+ 2, qopt
->matcher
, qopt
->db_size
));
1710 QueryFilter::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1712 QueryVector::const_iterator i
;
1713 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1714 // MatchNothing subqueries should have been removed by done().
1715 Assert((*i
).internal
.get());
1716 (*i
).internal
->postlist_sub_and_like(ctx
, qopt
, factor
);
1717 // Second and subsequent subqueries are unweighted.
1723 QueryWindowed::postlist_windowed(Query::op op
, AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1725 if (!qopt
->full_db_has_positions
) {
1726 // No positional data anywhere, so just handle as AND.
1727 QueryAndLike::postlist_sub_and_like(ctx
, qopt
, factor
);
1731 if (!qopt
->db
.has_positions()) {
1732 // No positions in this subdatabase so this matches nothing, which
1733 // means the whole andcontext matches nothing.
1735 // Bailing out here means we don't recurse deeper and that means we
1736 // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1737 // the phrase, but at least one shard will count them, and the matcher
1738 // takes the highest answer (since 1.4.6).
1743 bool old_need_positions
= qopt
->need_positions
;
1744 qopt
->need_positions
= true;
1746 QueryVector::const_iterator i
;
1747 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1748 // MatchNothing subqueries should have been removed by done().
1749 Assert((*i
).internal
.get());
1750 bool is_term
= ((*i
).internal
->get_type() == Query::LEAF_TERM
);
1751 PostList
* pl
= (*i
).internal
->postlist(qopt
, factor
);
1753 pl
= new OrPosPostList(pl
);
1754 ctx
.add_postlist(pl
);
1756 // Record the positional filter to apply higher up the tree.
1757 ctx
.add_pos_filter(op
, subqueries
.size(), window
);
1759 qopt
->need_positions
= old_need_positions
;
1763 QueryPhrase::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1765 QueryWindowed::postlist_windowed(Query::OP_PHRASE
, ctx
, qopt
, factor
);
1769 QueryNear::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1771 QueryWindowed::postlist_windowed(Query::OP_NEAR
, ctx
, qopt
, factor
);
1775 QueryEliteSet::postlist(QueryOptimiser
* qopt
, double factor
) const
1777 LOGCALL(QUERY
, PostList
*, "QueryEliteSet::postlist", qopt
| factor
);
1778 OrContext
ctx(qopt
, subqueries
.size());
1779 do_or_like(ctx
, qopt
, factor
, set_size
);
1780 RETURN(ctx
.postlist());
1784 QueryEliteSet::postlist_sub_or_like(OrContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1786 do_or_like(ctx
, qopt
, factor
, set_size
);
1790 QuerySynonym::postlist(QueryOptimiser
* qopt
, double factor
) const
1792 LOGCALL(QUERY
, PostList
*, "QuerySynonym::postlist", qopt
| factor
);
1793 // Save and restore total_subqs so we only add one for the whole
1794 // OP_SYNONYM subquery (or none if we're not weighted).
1795 Xapian::termcount save_total_subqs
= qopt
->get_total_subqs();
1798 PostList
* pl
= do_synonym(qopt
, factor
);
1799 qopt
->set_total_subqs(save_total_subqs
);
1804 QuerySynonym::done()
1806 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1807 // subqueries which are MatchNothing.
1808 if (subqueries
.empty())
1810 if (subqueries
.size() == 1) {
1811 Query::op sub_type
= subqueries
[0].get_type();
1812 // Synonym of a single subquery should only be simplified if that
1813 // subquery is a term (or MatchAll), or if it's also OP_SYNONYM. Note
1814 // that MatchNothing subqueries are dropped, so we'd never get here
1815 // with a single MatchNothing subquery.
1816 if (sub_type
== Query::LEAF_TERM
|| sub_type
== Query::LEAF_MATCH_ALL
||
1817 sub_type
== Query::OP_SYNONYM
) {
1818 return subqueries
[0].internal
.get();
1820 if (sub_type
== Query::OP_WILDCARD
) {
1821 auto q
= static_cast<QueryWildcard
*>(subqueries
[0].internal
.get());
1822 // SYNONYM over WILDCARD X -> WILDCARD SYNONYM for any combiner X.
1823 return q
->change_combiner(Query::OP_SYNONYM
);
1830 QueryMax::postlist(QueryOptimiser
* qopt
, double factor
) const
1832 LOGCALL(QUERY
, PostList
*, "QueryMax::postlist", qopt
| factor
);
1833 // Save and restore total_subqs so we only add one for the whole
1834 // OP_MAX subquery (or none if we're not weighted).
1835 Xapian::termcount save_total_subqs
= qopt
->get_total_subqs();
1838 PostList
* pl
= do_max(qopt
, factor
);
1839 qopt
->set_total_subqs(save_total_subqs
);
1844 QueryAnd::get_op() const
1846 return Xapian::Query::OP_AND
;
1850 QueryOr::get_op() const
1852 return Xapian::Query::OP_OR
;
1856 QueryAndNot::get_op() const
1858 return Xapian::Query::OP_AND_NOT
;
1862 QueryXor::get_op() const
1864 return Xapian::Query::OP_XOR
;
1868 QueryAndMaybe::get_op() const
1870 return Xapian::Query::OP_AND_MAYBE
;
1874 QueryFilter::get_op() const
1876 return Xapian::Query::OP_FILTER
;
1880 QueryNear::get_op() const
1882 return Xapian::Query::OP_NEAR
;
1886 QueryPhrase::get_op() const
1888 return Xapian::Query::OP_PHRASE
;
1892 QueryEliteSet::get_op() const
1894 return Xapian::Query::OP_ELITE_SET
;
1898 QuerySynonym::get_op() const
1900 return Xapian::Query::OP_SYNONYM
;
1904 QueryMax::get_op() const
1906 return Xapian::Query::OP_MAX
;
1910 QueryWildcard::get_op() const
1912 return Xapian::Query::OP_WILDCARD
;
1916 QueryAnd::get_description() const
1918 return get_description_helper(" AND ");
1922 QueryOr::get_description() const
1924 return get_description_helper(" OR ");
1928 QueryAndNot::get_description() const
1930 return get_description_helper(" AND_NOT ");
1934 QueryXor::get_description() const
1936 return get_description_helper(" XOR ");
1940 QueryAndMaybe::get_description() const
1942 return get_description_helper(" AND_MAYBE ");
1946 QueryFilter::get_description() const
1948 return get_description_helper(" FILTER ");
1952 QueryNear::get_description() const
1954 return get_description_helper(" NEAR ", window
);
1958 QueryPhrase::get_description() const
1960 return get_description_helper(" PHRASE ", window
);
1964 QueryEliteSet::get_description() const
1966 return get_description_helper(" ELITE_SET ", set_size
);
1970 QuerySynonym::get_description() const
1972 if (subqueries
.size() == 1) {
1973 string d
= "(SYNONYM ";
1974 d
+= subqueries
[0].internal
->get_description();
1978 return get_description_helper(" SYNONYM ");
1982 QueryMax::get_description() const
1984 return get_description_helper(" MAX ");
1988 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1990 return Xapian::Query::OP_INVALID
;
1994 QueryInvalid::postlist(QueryOptimiser
*, double) const
1996 throw Xapian::InvalidOperationError("Query is invalid");
2000 QueryInvalid::serialise(std::string
& result
) const
2002 result
+= static_cast<char>(0x00);
2006 QueryInvalid::get_description() const