1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "queryinternal.h"
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
30 #include "leafpostlist.h"
31 #include "matcher/andmaybepostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
34 #include "matcher/exactphrasepostlist.h"
35 #include "matcher/externalpostlist.h"
36 #include "matcher/maxpostlist.h"
37 #include "matcher/multiandpostlist.h"
38 #include "matcher/multixorpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpospostlist.h"
41 #include "matcher/orpostlist.h"
42 #include "matcher/phrasepostlist.h"
43 #include "matcher/queryoptimiser.h"
44 #include "matcher/valuerangepostlist.h"
45 #include "matcher/valuegepostlist.h"
46 #include "net/length.h"
47 #include "serialise-double.h"
53 #include "unicode/description_append.h"
64 template<class CLASS
> struct delete_ptr
{
65 void operator()(CLASS
*p
) const { delete p
; }
68 using Xapian::Internal::AndContext
;
69 using Xapian::Internal::OrContext
;
70 using Xapian::Internal::XorContext
;
76 /** Class providing an operator which sorts postlists to select max or terms.
77 * This returns true if a has a (strictly) greater termweight than b.
79 * Using this comparator will tend to result in multiple calls to
80 * recalc_maxweight() for each of the subqueries (we use it with nth_element
81 * which should be O(n)) - perhaps it'd be better to call recalc_maxweight()
82 * once and then sort on that.
84 struct CmpMaxOrTerms
{
85 /** Return true if and only if a has a strictly greater termweight than b. */
86 bool operator()(PostList
*a
, PostList
*b
) {
87 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
88 defined(__mc68000__) || defined(__mc68010__) || \
89 defined(__mc68020__) || defined(__mc68030__)
90 // On some architectures, most common of which is x86, floating point
91 // values are calculated and stored in registers with excess precision.
92 // If the two recalc_maxweight() calls below return identical values in a
93 // register, the excess precision may be dropped for one of them but
94 // not the other (e.g. because the compiler saves the first calculated
95 // weight to memory while calculating the second, then reloads it to
96 // compare). This leads to both a > b and b > a being true, which
97 // violates the antisymmetry property of the strict weak ordering
98 // required by nth_element(). This can have serious consequences (e.g.
101 // Note that m68k only has excess precision in earlier models - 68040
103 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
105 // To avoid this, we store each result in a volatile double prior to
106 // comparing them. This means that the result of this test should
107 // match that on other architectures with the same double format (which
108 // is desirable), and actually has less overhead than rounding both
109 // results to float (which is another approach which works).
110 volatile double a_max_wt
= a
->recalc_maxweight();
111 volatile double b_max_wt
= b
->recalc_maxweight();
112 return a_max_wt
> b_max_wt
;
114 return a
->recalc_maxweight() > b
->recalc_maxweight();
119 /// Comparison functor which orders PostList* by descending get_termfreq_est().
120 struct ComparePostListTermFreqAscending
{
121 /// Order by descending get_termfreq_est().
122 bool operator()(const PostList
*a
, const PostList
*b
) const {
123 return a
->get_termfreq_est() > b
->get_termfreq_est();
129 QueryOptimiser
* qopt
;
131 vector
<PostList
*> pls
;
134 Context(QueryOptimiser
* qopt_
, size_t reserve
);
138 void add_postlist(PostList
* pl
) {
146 size_t size() const {
150 void shrink(size_t new_size
);
153 Context::Context(QueryOptimiser
* qopt_
, size_t reserve
)
156 pls
.reserve(reserve
);
160 Context::shrink(size_t new_size
)
162 AssertRel(new_size
, <=, pls
.size());
163 if (new_size
>= pls
.size())
166 const PostList
* hint_pl
= qopt
->get_hint_postlist();
167 for (auto&& i
= pls
.begin() + new_size
; i
!= pls
.end(); ++i
) {
168 const PostList
* pl
= *i
;
169 if (rare(pl
== hint_pl
)) {
170 // We were about to delete qopt's hint - instead tell qopt to take
172 qopt
->take_hint_ownership();
178 pls
.resize(new_size
);
186 class OrContext
: public Context
{
188 OrContext(QueryOptimiser
* qopt_
, size_t reserve
)
189 : Context(qopt_
, reserve
) { }
191 /// Select the best set_size postlists from the last out_of added.
192 void select_elite_set(size_t set_size
, size_t out_of
);
194 /// Select the set_size postlists with the highest term frequency.
195 void select_most_frequent(size_t set_size
);
197 PostList
* postlist();
198 PostList
* postlist_max();
202 OrContext::select_elite_set(size_t set_size
, size_t out_of
)
204 vector
<PostList
*>::iterator begin
= pls
.begin() + pls
.size() - out_of
;
205 nth_element(begin
, begin
+ set_size
- 1, pls
.end(), CmpMaxOrTerms());
206 shrink(pls
.size() - out_of
+ set_size
);
210 OrContext::select_most_frequent(size_t set_size
)
212 vector
<PostList
*>::iterator begin
= pls
.begin();
213 nth_element(begin
, begin
+ set_size
- 1, pls
.end(),
214 ComparePostListTermFreqAscending());
219 OrContext::postlist()
221 Assert(!pls
.empty());
223 if (pls
.size() == 1) {
224 PostList
* pl
= pls
[0];
229 // Make postlists into a heap so that the postlist with the greatest term
230 // frequency is at the top of the heap.
231 make_heap(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
233 // Now build a tree of binary OrPostList objects.
235 // The algorithm used to build the tree is like that used to build an
236 // optimal Huffman coding tree. If we called next() repeatedly, this
237 // arrangement would minimise the number of method calls. Generally we
238 // don't actually do that, but this arrangement is still likely to be a
239 // good one, and it does minimise the work in the worst case.
241 // We build the tree such that at each branch:
243 // l.get_termfreq_est() >= r.get_termfreq_est()
245 // We do this so that the OrPostList class can be optimised assuming
246 // that this is the case.
247 PostList
* r
= pls
.front();
248 pop_heap(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
251 pl
= new OrPostList(pls
.front(), r
, qopt
->matcher
, qopt
->db_size
);
253 if (pls
.size() == 1) {
258 pop_heap(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
260 push_heap(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
265 OrContext::postlist_max()
267 Assert(!pls
.empty());
269 if (pls
.size() == 1) {
270 PostList
* pl
= pls
[0];
275 // Sort the postlists so that the postlist with the greatest term frequency
277 sort(pls
.begin(), pls
.end(), ComparePostListTermFreqAscending());
280 pl
= new MaxPostList(pls
.begin(), pls
.end(), qopt
->matcher
, qopt
->db_size
);
286 class XorContext
: public Context
{
288 XorContext(QueryOptimiser
* qopt_
, size_t reserve
)
289 : Context(qopt_
, reserve
) { }
291 PostList
* postlist();
295 XorContext::postlist()
297 Xapian::doccount db_size
= qopt
->db_size
;
299 pl
= new MultiXorPostList(pls
.begin(), pls
.end(), qopt
->matcher
, db_size
);
301 // Empty pls so our destructor doesn't delete them all!
306 class AndContext
: public Context
{
308 Xapian::Query::op op_
;
310 /// Start and end indices for the PostLists this positional filter uses.
313 Xapian::termcount window
;
316 PosFilter(Xapian::Query::op op__
, size_t begin_
, size_t end_
,
317 Xapian::termcount window_
)
318 : op_(op__
), begin(begin_
), end(end_
), window(window_
) { }
320 PostList
* postlist(PostList
* pl
, const vector
<PostList
*>& pls
) const;
323 list
<PosFilter
> pos_filters
;
326 AndContext(QueryOptimiser
* qopt_
, size_t reserve
)
327 : Context(qopt_
, reserve
) { }
329 void add_pos_filter(Query::op op_
,
331 Xapian::termcount window
);
333 PostList
* postlist();
337 AndContext::PosFilter::postlist(PostList
* pl
, const vector
<PostList
*>& pls
) const
339 vector
<PostList
*>::const_iterator terms_begin
= pls
.begin() + begin
;
340 vector
<PostList
*>::const_iterator terms_end
= pls
.begin() + end
;
342 if (op_
== Xapian::Query::OP_NEAR
) {
343 pl
= new NearPostList(pl
, window
, terms_begin
, terms_end
);
344 } else if (window
== end
- begin
) {
345 AssertEq(op_
, Xapian::Query::OP_PHRASE
);
346 pl
= new ExactPhrasePostList(pl
, terms_begin
, terms_end
);
348 AssertEq(op_
, Xapian::Query::OP_PHRASE
);
349 pl
= new PhrasePostList(pl
, window
, terms_begin
, terms_end
);
358 AndContext::add_pos_filter(Query::op op_
,
360 Xapian::termcount window
)
363 size_t end
= pls
.size();
364 size_t begin
= end
- n_subqs
;
365 pos_filters
.push_back(PosFilter(op_
, begin
, end
, window
));
369 AndContext::postlist()
372 // This case only happens if this sub-database has no positional data
373 // (but another sub-database does).
374 Assert(pos_filters
.empty());
375 return new EmptyPostList
;
378 unique_ptr
<PostList
> pl(new MultiAndPostList(pls
.begin(), pls
.end(),
379 qopt
->matcher
, qopt
->db_size
));
381 // Sort the positional filters to try to apply them in an efficient order.
382 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
385 // Apply any positional filters.
386 list
<PosFilter
>::const_iterator i
;
387 for (i
= pos_filters
.begin(); i
!= pos_filters
.end(); ++i
) {
388 const PosFilter
& filter
= *i
;
389 pl
.reset(filter
.postlist(pl
.release(), pls
));
392 // Empty pls so our destructor doesn't delete them all!
399 Query::Internal::~Internal() { }
402 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
408 Query::Internal::get_subquery(size_t) const
410 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
414 Query::Internal::get_wqf() const
416 throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
420 Query::Internal::get_pos() const
422 throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
426 Query::Internal::gather_terms(void *) const
431 Query::Internal::get_length() const XAPIAN_NOEXCEPT
437 Query::Internal::unserialise(const char ** p
, const char * end
,
438 const Registry
& reg
)
442 unsigned char ch
= *(*p
)++;
444 case 4: case 5: case 6: case 7: {
448 // nnn -> n_subqs (0 means encoded value follows)
449 // cccc -> code (which OP_XXX)
450 size_t n_subqs
= ch
& 0x07;
452 decode_length(p
, end
, n_subqs
);
455 unsigned char code
= (ch
>> 3) & 0x0f;
456 Xapian::termcount parameter
= 0;
458 decode_length(p
, end
, parameter
);
459 Xapian::Internal::QueryBranch
* result
;
462 result
= new Xapian::Internal::QueryAnd(n_subqs
);
465 result
= new Xapian::Internal::QueryOr(n_subqs
);
467 case 2: // OP_AND_NOT
468 result
= new Xapian::Internal::QueryAndNot(n_subqs
);
471 result
= new Xapian::Internal::QueryXor(n_subqs
);
473 case 4: // OP_AND_MAYBE
474 result
= new Xapian::Internal::QueryAndMaybe(n_subqs
);
477 result
= new Xapian::Internal::QueryFilter(n_subqs
);
479 case 6: // OP_SYNONYM
480 result
= new Xapian::Internal::QuerySynonym(n_subqs
);
483 result
= new Xapian::Internal::QueryMax(n_subqs
);
485 case 13: // OP_ELITE_SET
486 result
= new Xapian::Internal::QueryEliteSet(n_subqs
,
490 result
= new Xapian::Internal::QueryNear(n_subqs
,
493 case 15: // OP_PHRASE
494 result
= new Xapian::Internal::QueryPhrase(n_subqs
,
498 // 8 to 12 are currently unused.
499 throw SerialisationError("Unknown multi-way branch Query operator");
502 result
->add_subquery(Xapian::Query(unserialise(p
, end
, reg
)));
507 case 2: case 3: { // Term
511 // LLLL -> length (0 means encoded value follows)
513 // 0: wqf = 0; pos = 0
514 // 1: wqf = 1; pos = 0
515 // 2: wqf = 1; pos -> encoded value follows
516 // 3: wqf -> encoded value follows; pos -> encoded value follows
517 size_t len
= ch
& 0x0f;
519 decode_length(p
, end
, len
);
522 if (size_t(end
- *p
) < len
)
523 throw SerialisationError("Not enough data");
524 string
term(*p
, len
);
527 int code
= ((ch
>> 4) & 0x03);
529 Xapian::termcount wqf
= static_cast<Xapian::termcount
>(code
> 0);
531 decode_length(p
, end
, wqf
);
533 Xapian::termpos pos
= 0;
535 decode_length(p
, end
, pos
);
537 return new Xapian::Internal::QueryTerm(term
, wqf
, pos
);
540 // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
543 // ssss -> slot number (15 means encoded value follows)
545 // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
547 Xapian::valueno slot
= ch
& 15;
549 decode_length(p
, end
, slot
);
553 decode_length_and_check(p
, end
, len
);
554 string
begin(*p
, len
);
558 return new Xapian::Internal::QueryValueGE(slot
, begin
);
562 decode_length_and_check(p
, end
, len
);
563 string
end_(*p
, len
);
565 if (begin
.empty()) // FIXME: is this right?
566 return new Xapian::Internal::QueryValueLE(slot
, end_
);
567 return new Xapian::Internal::QueryValueRange(slot
, begin
, end_
);
573 // ttttt -> encodes which OP_XXX
575 case 0x00: // OP_INVALID
576 return new Xapian::Internal::QueryInvalid();
577 case 0x0b: { // Wildcard
579 throw SerialisationError("not enough data");
580 Xapian::termcount max_expansion
;
581 decode_length(p
, end
, max_expansion
);
583 throw SerialisationError("not enough data");
584 int max_type
= static_cast<unsigned char>(*(*p
)++);
585 op combiner
= static_cast<op
>(*(*p
)++);
587 decode_length_and_check(p
, end
, len
);
588 string
pattern(*p
, len
);
590 return new Xapian::Internal::QueryWildcard(pattern
,
595 case 0x0c: { // PostingSource
597 decode_length_and_check(p
, end
, len
);
598 string
name(*p
, len
);
601 const PostingSource
* reg_source
= reg
.get_posting_source(name
);
603 string m
= "PostingSource ";
605 m
+= " not registered";
606 throw SerialisationError(m
);
609 decode_length_and_check(p
, end
, len
);
610 PostingSource
* source
=
611 reg_source
->unserialise_with_registry(string(*p
, len
),
614 return new Xapian::Internal::QueryPostingSource(source
->release());
617 using Xapian::Internal::QueryScaleWeight
;
618 double scale_factor
= unserialise_double(p
, end
);
619 return new QueryScaleWeight(scale_factor
,
620 Query(unserialise(p
, end
, reg
)));
623 Xapian::termcount wqf
;
625 decode_length(p
, end
, wqf
);
626 decode_length(p
, end
, pos
);
627 return new Xapian::Internal::QueryTerm(string(), wqf
, pos
);
630 return Query::MatchAll
.internal
.get();
631 default: // Others currently unused.
637 string msg
= "Unknown Query serialisation: ";
639 throw SerialisationError(msg
);
643 Query::Internal::postlist_sub_and_like(AndContext
& ctx
,
644 QueryOptimiser
* qopt
,
647 ctx
.add_postlist(postlist(qopt
, factor
));
651 Query::Internal::postlist_sub_or_like(OrContext
& ctx
,
652 QueryOptimiser
* qopt
,
655 ctx
.add_postlist(postlist(qopt
, factor
));
659 Query::Internal::postlist_sub_xor(XorContext
& ctx
,
660 QueryOptimiser
* qopt
,
663 ctx
.add_postlist(postlist(qopt
, factor
));
669 QueryTerm::get_type() const XAPIAN_NOEXCEPT
671 return term
.empty() ? Query::LEAF_MATCH_ALL
: Query::LEAF_TERM
;
675 QueryTerm::get_description() const
679 desc
= "<alldocuments>";
681 description_append(desc
, term
);
694 QueryPostingSource::QueryPostingSource(PostingSource
* source_
)
698 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
699 if (source
->_refs
== 0) {
700 // source_ isn't reference counted, so try to clone it. If clone()
701 // isn't implemented, just use the object provided and it's the
702 // caller's responsibility to ensure it stays valid while in use.
703 PostingSource
* cloned_source
= source
->clone();
704 if (cloned_source
) source
= cloned_source
->release();
709 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
711 return Query::LEAF_POSTING_SOURCE
;
715 QueryPostingSource::get_description() const
717 string desc
= "PostingSource(";
718 desc
+= source
->get_description();
723 QueryScaleWeight::QueryScaleWeight(double factor
, const Query
& subquery_
)
724 : scale_factor(factor
), subquery(subquery_
)
726 if (rare(scale_factor
< 0.0))
727 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
731 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
733 return Query::OP_SCALE_WEIGHT
;
737 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
743 QueryScaleWeight::get_subquery(size_t) const
749 QueryScaleWeight::get_description() const
751 Assert(subquery
.internal
.get());
752 string desc
= str(scale_factor
);
754 desc
+= subquery
.internal
->get_description();
758 PostingIterator::Internal
*
759 QueryTerm::postlist(QueryOptimiser
* qopt
, double factor
) const
761 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryTerm::postlist", qopt
| factor
);
763 qopt
->inc_total_subqs();
764 RETURN(qopt
->open_post_list(term
, wqf
, factor
));
767 PostingIterator::Internal
*
768 QueryPostingSource::postlist(QueryOptimiser
* qopt
, double factor
) const
770 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryPostingSource::postlist", qopt
| factor
);
771 Assert(source
.get());
773 qopt
->inc_total_subqs();
774 // Casting away const on the Database::Internal here is OK, as we wrap
775 // them in a const Xapian::Database so non-const methods can't actually
776 // be called on the Database::Internal object.
777 const Xapian::Database
wrappeddb(
778 const_cast<Xapian::Database::Internal
*>(&(qopt
->db
)));
779 RETURN(new ExternalPostList(wrappeddb
, source
.get(), factor
, qopt
->matcher
));
782 PostingIterator::Internal
*
783 QueryScaleWeight::postlist(QueryOptimiser
* qopt
, double factor
) const
785 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryScaleWeight::postlist", qopt
| factor
);
786 RETURN(subquery
.internal
->postlist(qopt
, factor
* scale_factor
));
790 QueryTerm::gather_terms(void * void_terms
) const
792 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
794 vector
<pair
<Xapian::termpos
, string
> > &terms
=
795 *static_cast<vector
<pair
<Xapian::termpos
, string
> >*>(void_terms
);
796 terms
.push_back(make_pair(pos
, term
));
800 PostingIterator::Internal
*
801 QueryValueRange::postlist(QueryOptimiser
*qopt
, double factor
) const
803 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryValueRange::postlist", qopt
| factor
);
805 qopt
->inc_total_subqs();
806 const Xapian::Database::Internal
& db
= qopt
->db
;
807 const string
& lb
= db
.get_value_lower_bound(slot
);
809 // This should only happen if there are no values in this slot (which
810 // could be because the backend just doesn't support values at all).
811 // If there were values in the slot, the backend should have a
812 // non-empty lower bound, even if it isn't a tight one.
813 AssertEq(db
.get_value_freq(slot
), 0);
814 RETURN(new EmptyPostList
);
817 RETURN(new EmptyPostList
);
819 const string
& ub
= db
.get_value_upper_bound(slot
);
821 RETURN(new EmptyPostList
);
824 // If begin <= lb too, then the range check isn't needed, but we do
825 // still need to consider which documents have a value set in this
826 // slot. If this value is set for all documents, we can replace it
827 // with the MatchAll postlist, which is especially efficient if
828 // there are no gaps in the docids.
829 if (begin
<= lb
&& db
.get_value_freq(slot
) == db
.get_doccount()) {
830 RETURN(db
.open_post_list(string()));
832 RETURN(new ValueGePostList(&db
, slot
, begin
));
834 RETURN(new ValueRangePostList(&db
, slot
, begin
, end
));
838 QueryValueRange::serialise(string
& result
) const
841 result
+= static_cast<char>(0x20 | slot
);
843 result
+= static_cast<char>(0x20 | 15);
844 result
+= encode_length(slot
- 15);
846 result
+= encode_length(begin
.size());
848 result
+= encode_length(end
.size());
853 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
855 return Query::OP_VALUE_RANGE
;
859 QueryValueRange::get_description() const
861 string desc
= "VALUE_RANGE ";
864 description_append(desc
, begin
);
866 description_append(desc
, end
);
870 PostingIterator::Internal
*
871 QueryValueLE::postlist(QueryOptimiser
*qopt
, double factor
) const
873 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryValueLE::postlist", qopt
| factor
);
875 qopt
->inc_total_subqs();
876 const Xapian::Database::Internal
& db
= qopt
->db
;
877 const string
& lb
= db
.get_value_lower_bound(slot
);
878 // If lb.empty(), the backend doesn't provide value bounds.
881 RETURN(new EmptyPostList
);
883 if (limit
>= db
.get_value_upper_bound(slot
)) {
884 // The range check isn't needed, but we do still need to consider
885 // which documents have a value set in this slot. If this value is
886 // set for all documents, we can replace it with the MatchAll
887 // postlist, which is especially efficient if there are no gaps in
889 if (db
.get_value_freq(slot
) == db
.get_doccount()) {
890 RETURN(db
.open_post_list(string()));
894 RETURN(new ValueRangePostList(&db
, slot
, string(), limit
));
898 QueryValueLE::serialise(string
& result
) const
900 // Encode as a range with an empty start (which only takes a single byte to
903 result
+= static_cast<char>(0x20 | slot
);
905 result
+= static_cast<char>(0x20 | 15);
906 result
+= encode_length(slot
- 15);
908 result
+= encode_length(0);
909 result
+= encode_length(limit
.size());
914 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
916 return Query::OP_VALUE_LE
;
920 QueryValueLE::get_description() const
922 string desc
= "VALUE_LE ";
925 description_append(desc
, limit
);
929 PostingIterator::Internal
*
930 QueryValueGE::postlist(QueryOptimiser
*qopt
, double factor
) const
932 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryValueGE::postlist", qopt
| factor
);
934 qopt
->inc_total_subqs();
935 const Xapian::Database::Internal
& db
= qopt
->db
;
936 const string
& lb
= db
.get_value_lower_bound(slot
);
937 // If lb.empty(), the backend doesn't provide value bounds.
939 if (limit
> db
.get_value_upper_bound(slot
)) {
940 RETURN(new EmptyPostList
);
943 // The range check isn't needed, but we do still need to consider
944 // which documents have a value set in this slot. If this value is
945 // set for all documents, we can replace it with the MatchAll
946 // postlist, which is especially efficient if there are no gaps in
948 if (db
.get_value_freq(slot
) == db
.get_doccount()) {
949 RETURN(db
.open_post_list(string()));
953 RETURN(new ValueGePostList(&db
, slot
, limit
));
957 QueryValueGE::serialise(string
& result
) const
960 result
+= static_cast<char>(0x20 | 0x10 | slot
);
962 result
+= static_cast<char>(0x20 | 0x10 | 15);
963 result
+= encode_length(slot
- 15);
965 result
+= encode_length(limit
.size());
970 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
972 return Query::OP_VALUE_GE
;
976 QueryValueGE::get_description() const
978 string desc
= "VALUE_GE ";
981 description_append(desc
, limit
);
985 PostingIterator::Internal
*
986 QueryWildcard::postlist(QueryOptimiser
* qopt
, double factor
) const
988 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryWildcard::postlist", qopt
| factor
);
989 Query::op op
= combiner
;
990 double or_factor
= 0.0;
992 // If we have a factor of 0, we don't care about the weights, so
993 // we're just like a normal OR query.
995 } else if (op
!= Query::OP_SYNONYM
) {
999 bool old_in_synonym
= qopt
->in_synonym
;
1000 if (!old_in_synonym
) {
1001 qopt
->in_synonym
= (op
== Query::OP_SYNONYM
);
1004 OrContext
ctx(qopt
, 0);
1005 unique_ptr
<TermList
> t(qopt
->db
.open_allterms(pattern
));
1006 Xapian::termcount expansions_left
= max_expansion
;
1007 // If there's no expansion limit, set expansions_left to the maximum
1008 // value Xapian::termcount can hold.
1009 if (expansions_left
== 0)
1015 if (max_type
< Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT
) {
1016 if (expansions_left
-- == 0) {
1017 if (max_type
== Xapian::Query::WILDCARD_LIMIT_FIRST
)
1019 string
msg("Wildcard ");
1021 msg
+= "* expands to more than ";
1022 msg
+= str(max_expansion
);
1024 throw Xapian::WildcardError(msg
);
1027 const string
& term
= t
->get_termname();
1028 ctx
.add_postlist(qopt
->open_lazy_post_list(term
, 1, or_factor
));
1031 if (max_type
== Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT
) {
1032 // FIXME: open_lazy_post_list() results in the term getting registered
1033 // for stats, so we still incur an avoidable cost from the full
1034 // expansion size of the wildcard, which is most likely to be visible
1035 // with the remote backend. Perhaps we should split creating the lazy
1036 // postlist from registering the term for stats.
1037 if (ctx
.size() > max_expansion
)
1038 ctx
.select_most_frequent(max_expansion
);
1041 if (factor
!= 0.0) {
1042 if (op
!= Query::OP_SYNONYM
) {
1043 qopt
->set_total_subqs(qopt
->get_total_subqs() + ctx
.size());
1045 qopt
->inc_total_subqs();
1049 qopt
->in_synonym
= old_in_synonym
;
1052 RETURN(new EmptyPostList
);
1054 if (op
== Query::OP_MAX
)
1055 RETURN(ctx
.postlist_max());
1057 PostList
* pl
= ctx
.postlist();
1058 if (op
== Query::OP_OR
)
1061 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1062 // SynonymPostList, which supplies the weights.
1063 PostingIterator::Internal
* r
= qopt
->make_synonym_postlist(pl
, factor
);
1068 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1070 // We currently assume wqf is 1 for calculating the synonym's weight
1071 // since conceptually the synonym is one "virtual" term. If we were
1072 // to combine multiple occurrences of the same synonym expansion into
1073 // a single instance with wqf set, we would want to track the wqf.
1078 QueryWildcard::serialise(string
& result
) const
1080 result
+= static_cast<char>(0x0b);
1081 result
+= encode_length(max_expansion
);
1082 result
+= static_cast<unsigned char>(max_type
);
1083 result
+= static_cast<unsigned char>(combiner
);
1084 result
+= encode_length(pattern
.size());
1089 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1091 return Query::OP_WILDCARD
;
1095 QueryWildcard::get_description() const
1097 string desc
= "WILDCARD ";
1099 case Query::OP_SYNONYM
:
1112 description_append(desc
, pattern
);
1117 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1119 // Sum results from all subqueries.
1120 Xapian::termcount result
= 0;
1121 QueryVector::const_iterator i
;
1122 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1123 // MatchNothing subqueries should have been removed by done(), but we
1124 // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1126 result
+= (*i
).internal
->get_length();
1131 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1132 #define MISC(X) static_cast<unsigned char>(X)
1134 QueryBranch::serialise_(string
& result
, Xapian::termcount parameter
) const
1136 static const unsigned char first_byte
[] = {
1137 MULTIWAY(0), // OP_AND
1138 MULTIWAY(1), // OP_OR
1139 MULTIWAY(2), // OP_AND_NOT
1140 MULTIWAY(3), // OP_XOR
1141 MULTIWAY(4), // OP_AND_MAYBE
1142 MULTIWAY(5), // OP_FILTER
1143 MULTIWAY(14), // OP_NEAR
1144 MULTIWAY(15), // OP_PHRASE
1145 0, // OP_VALUE_RANGE
1146 MISC(3), // OP_SCALE_WEIGHT
1147 MULTIWAY(13), // OP_ELITE_SET
1150 MULTIWAY(6), // OP_SYNONYM
1151 MULTIWAY(7) // OP_MAX
1153 Xapian::Query::op op_
= get_op();
1154 AssertRel(size_t(op_
),<,sizeof(first_byte
));
1155 unsigned char ch
= first_byte
[op_
];
1157 // Multi-way operator.
1158 if (subqueries
.size() < 8)
1159 ch
|= subqueries
.size();
1161 if (subqueries
.size() >= 8)
1162 result
+= encode_length(subqueries
.size() - 8);
1163 if (ch
>= MULTIWAY(13))
1164 result
+= encode_length(parameter
);
1169 QueryVector::const_iterator i
;
1170 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1171 // MatchNothing subqueries should have been removed by done().
1172 Assert((*i
).internal
.get());
1173 (*i
).internal
->serialise(result
);
1176 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1177 // appended next by an overloaded serialise() method in the subclass.
1181 QueryBranch::serialise(string
& result
) const
1183 QueryBranch::serialise_(result
);
1187 QueryNear::serialise(string
& result
) const
1189 // FIXME: window - subqueries.size() ?
1190 QueryBranch::serialise_(result
, window
);
1194 QueryPhrase::serialise(string
& result
) const
1196 // FIXME: window - subqueries.size() ?
1197 QueryBranch::serialise_(result
, window
);
1201 QueryEliteSet::serialise(string
& result
) const
1203 // FIXME: set_size - subqueries.size() ?
1204 QueryBranch::serialise_(result
, set_size
);
1208 QueryBranch::gather_terms(void * void_terms
) const
1210 // Gather results from all subqueries.
1211 QueryVector::const_iterator i
;
1212 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1213 // MatchNothing subqueries should have been removed by done().
1214 Assert((*i
).internal
.get());
1215 (*i
).internal
->gather_terms(void_terms
);
1220 QueryBranch::do_or_like(OrContext
& ctx
, QueryOptimiser
* qopt
, double factor
,
1221 Xapian::termcount elite_set_size
, size_t first
) const
1223 LOGCALL_VOID(MATCH
, "QueryBranch::do_or_like", ctx
| qopt
| factor
| elite_set_size
);
1225 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1226 // for AND-like operations.
1228 // OP_SYNONYM with a single subquery is only simplified by
1229 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1230 Assert(subqueries
.size() >= 2 || get_op() == Query::OP_SYNONYM
);
1232 vector
<PostList
*> postlists
;
1233 postlists
.reserve(subqueries
.size() - first
);
1235 QueryVector::const_iterator q
;
1236 for (q
= subqueries
.begin() + first
; q
!= subqueries
.end(); ++q
) {
1237 // MatchNothing subqueries should have been removed by done().
1238 Assert((*q
).internal
.get());
1239 (*q
).internal
->postlist_sub_or_like(ctx
, qopt
, factor
);
1242 if (elite_set_size
&& elite_set_size
< subqueries
.size()) {
1243 ctx
.select_elite_set(elite_set_size
, subqueries
.size());
1244 // FIXME: not right!
1249 QueryBranch::do_synonym(QueryOptimiser
* qopt
, double factor
) const
1251 LOGCALL(MATCH
, PostList
*, "QueryBranch::do_synonym", qopt
| factor
);
1252 OrContext
ctx(qopt
, subqueries
.size());
1253 if (factor
== 0.0) {
1254 // If we have a factor of 0, we don't care about the weights, so
1255 // we're just like a normal OR query.
1256 do_or_like(ctx
, qopt
, 0.0);
1257 return ctx
.postlist();
1260 bool old_in_synonym
= qopt
->in_synonym
;
1261 qopt
->in_synonym
= true;
1262 do_or_like(ctx
, qopt
, 0.0);
1263 PostList
* pl
= ctx
.postlist();
1264 qopt
->in_synonym
= old_in_synonym
;
1266 // We currently assume wqf is 1 for calculating the synonym's weight
1267 // since conceptually the synonym is one "virtual" term. If we were
1268 // to combine multiple occurrences of the same synonym expansion into
1269 // a single instance with wqf set, we would want to track the wqf.
1271 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1272 // SynonymPostList, which supplies the weights.
1273 RETURN(qopt
->make_synonym_postlist(pl
, factor
));
1277 QueryBranch::do_max(QueryOptimiser
* qopt
, double factor
) const
1279 LOGCALL(MATCH
, PostList
*, "QueryBranch::do_max", qopt
| factor
);
1280 OrContext
ctx(qopt
, subqueries
.size());
1281 do_or_like(ctx
, qopt
, factor
);
1282 if (factor
== 0.0) {
1283 // If we have a factor of 0, we don't care about the weights, so
1284 // we're just like a normal OR query.
1285 RETURN(ctx
.postlist());
1288 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1289 // since conceptually the OP_MAX is one "virtual" term. If we were
1290 // to combine multiple occurrences of the same OP_MAX expansion into
1291 // a single instance with wqf set, we would want to track the wqf.
1292 RETURN(ctx
.postlist_max());
1296 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1302 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1304 return subqueries
.size();
1308 QueryBranch::get_subquery(size_t n
) const
1310 return subqueries
[n
];
1314 QueryBranch::get_description_helper(const char * op
,
1315 Xapian::termcount parameter
) const
1318 QueryVector::const_iterator i
;
1319 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1320 if (desc
.size() > 1) {
1323 desc
+= str(parameter
);
1327 Assert((*i
).internal
.get());
1328 // MatchNothing subqueries should have been removed by done(), and we
1329 // shouldn't get called before done() is, since that happens at the
1330 // end of the Xapian::Query constructor.
1331 desc
+= (*i
).internal
->get_description();
1338 QueryWindowed::done()
1340 // If window size not specified, default it.
1342 window
= subqueries
.size();
1343 return QueryAndLike::done();
1347 QueryScaleWeight::gather_terms(void * void_terms
) const
1349 subquery
.internal
->gather_terms(void_terms
);
1352 void QueryTerm::serialise(string
& result
) const
1354 size_t len
= term
.size();
1356 if (wqf
== 1 && pos
== 0) {
1360 // Weird mutant versions of MatchAll
1362 result
+= encode_length(wqf
);
1363 result
+= encode_length(pos
);
1365 } else if (wqf
== 1) {
1367 // Single occurrence probabilistic term without position set.
1369 result
+= static_cast<char>(0x40 | 0x10);
1370 result
+= encode_length(term
.size() - 16);
1372 result
+= static_cast<char>(0x40 | 0x10 | len
);
1376 // Single occurrence probabilistic term with position set.
1378 result
+= static_cast<char>(0x40 | 0x20);
1379 result
+= encode_length(term
.size() - 16);
1381 result
+= static_cast<char>(0x40 | 0x20 | len
);
1384 result
+= encode_length(pos
);
1386 } else if (wqf
> 1 || pos
> 0) {
1389 result
+= static_cast<char>(0x40 | 0x30);
1390 result
+= encode_length(term
.size() - 16);
1392 result
+= static_cast<char>(0x40 | 0x30 | len
);
1395 result
+= encode_length(wqf
);
1396 result
+= encode_length(pos
);
1398 // Typical boolean term.
1402 result
+= static_cast<char>(0x40);
1403 result
+= encode_length(term
.size() - 16);
1405 result
+= static_cast<char>(0x40 | len
);
1411 void QueryPostingSource::serialise(string
& result
) const
1413 result
+= static_cast<char>(0x0c);
1415 const string
& n
= source
->name();
1416 result
+= encode_length(n
.size());
1419 const string
& s
= source
->serialise();
1420 result
+= encode_length(s
.size());
1424 void QueryScaleWeight::serialise(string
& result
) const
1426 Assert(subquery
.internal
.get());
1427 const string
& s
= serialise_double(scale_factor
);
1430 subquery
.internal
->serialise(result
);
1433 struct is_matchnothing
{
1434 bool operator()(const Xapian::Query
& q
) const {
1435 return q
.internal
.get() == NULL
;
1440 QueryAndLike::add_subquery(const Xapian::Query
& subquery
)
1442 // If the AndLike is already MatchNothing, do nothing.
1443 if (subqueries
.size() == 1 && subqueries
[0].internal
.get() == NULL
)
1445 // If we're adding MatchNothing, discard any previous subqueries.
1446 if (subquery
.internal
.get() == NULL
)
1448 subqueries
.push_back(subquery
);
1452 QueryAndLike::done()
1454 // Empty AndLike gives MatchNothing.
1455 if (subqueries
.empty())
1457 // We handle any subquery being MatchNothing in add_subquery() by leaving
1458 // a single MatchNothing subquery, and so this check results in AndLike
1459 // giving MatchNothing.
1460 if (subqueries
.size() == 1)
1461 return subqueries
[0].internal
.get();
1465 PostingIterator::Internal
*
1466 QueryAndLike::postlist(QueryOptimiser
* qopt
, double factor
) const
1468 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryAndLike::postlist", qopt
| factor
);
1469 AndContext
ctx(qopt
, subqueries
.size());
1470 postlist_sub_and_like(ctx
, qopt
, factor
);
1471 RETURN(ctx
.postlist());
1475 QueryAndLike::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1477 QueryVector::const_iterator i
;
1478 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1479 // MatchNothing subqueries should have been removed by done().
1480 Assert((*i
).internal
.get());
1481 (*i
).internal
->postlist_sub_and_like(ctx
, qopt
, factor
);
1486 QueryOrLike::add_subquery(const Xapian::Query
& subquery
)
1488 // Drop any subqueries which are MatchNothing.
1489 if (subquery
.internal
.get() != NULL
)
1490 subqueries
.push_back(subquery
);
1496 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1497 // subqueries which are MatchNothing.
1498 if (subqueries
.empty())
1500 if (subqueries
.size() == 1)
1501 return subqueries
[0].internal
.get();
1506 QueryAndNot::add_subquery(const Xapian::Query
& subquery
)
1508 if (!subqueries
.empty()) {
1509 // We're adding the 2nd or subsequent subquery, so this subquery is
1511 if (subqueries
[0].internal
.get() == NULL
) {
1512 // The left side is already MatchNothing so drop any right side.
1514 // MatchNothing AND_NOT X == MatchNothing
1517 if (subquery
.internal
.get() == NULL
) {
1518 // Drop MatchNothing on the right of AndNot.
1520 // X AND_NOT MatchNothing == X
1523 if (subquery
.get_type() == subquery
.OP_SCALE_WEIGHT
) {
1524 // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1525 // AndNot as no weight is taken from them.
1526 subqueries
.push_back(subquery
.get_subquery(0));
1527 // The Query constructor for OP_SCALE_WEIGHT constructor should
1528 // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1529 Assert(subquery
.get_subquery(0).internal
.get() != NULL
);
1533 subqueries
.push_back(subquery
);
1539 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1540 // that leaves just the left subquery, return that.
1542 // If left subquery is MatchNothing, then add_subquery() discards all right
1543 // subqueries, so this check also gives MatchNothing for this case.
1544 if (subqueries
.size() == 1)
1545 return subqueries
[0].internal
.get();
1550 QueryAndMaybe::add_subquery(const Xapian::Query
& subquery
)
1552 // If the left side of AndMaybe is already MatchNothing, do nothing.
1553 if (subqueries
.size() == 1 && subqueries
[0].internal
.get() == NULL
)
1555 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1556 if (subquery
.internal
.get() != NULL
|| subqueries
.empty())
1557 subqueries
.push_back(subquery
);
1561 QueryAndMaybe::done()
1563 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1564 // that leaves just the left subquery, return that.
1566 // If left subquery is MatchNothing, then add_subquery() discards all right
1567 // subqueries, so this check also gives MatchNothing for this case.
1568 if (subqueries
.size() == 1)
1569 return subqueries
[0].internal
.get();
1573 PostingIterator::Internal
*
1574 QueryOr::postlist(QueryOptimiser
* qopt
, double factor
) const
1576 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryOr::postlist", qopt
| factor
);
1577 OrContext
ctx(qopt
, subqueries
.size());
1578 do_or_like(ctx
, qopt
, factor
);
1579 RETURN(ctx
.postlist());
1583 QueryOr::postlist_sub_or_like(OrContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1585 do_or_like(ctx
, qopt
, factor
);
1588 PostingIterator::Internal
*
1589 QueryAndNot::postlist(QueryOptimiser
* qopt
, double factor
) const
1591 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryAndNot::postlist", qopt
| factor
);
1592 // FIXME: Combine and-like side with and-like stuff above.
1593 unique_ptr
<PostList
> l(subqueries
[0].internal
->postlist(qopt
, factor
));
1594 OrContext
ctx(qopt
, subqueries
.size() - 1);
1595 do_or_like(ctx
, qopt
, 0.0, 0, 1);
1596 unique_ptr
<PostList
> r(ctx
.postlist());
1597 RETURN(new AndNotPostList(l
.release(), r
.release(),
1598 qopt
->matcher
, qopt
->db_size
));
1601 PostingIterator::Internal
*
1602 QueryXor::postlist(QueryOptimiser
* qopt
, double factor
) const
1604 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryXor::postlist", qopt
| factor
);
1605 XorContext
ctx(qopt
, subqueries
.size());
1606 postlist_sub_xor(ctx
, qopt
, factor
);
1607 RETURN(ctx
.postlist());
1611 QueryXor::postlist_sub_xor(XorContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1613 QueryVector::const_iterator i
;
1614 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1615 // MatchNothing subqueries should have been removed by done().
1616 Assert((*i
).internal
.get());
1617 (*i
).internal
->postlist_sub_xor(ctx
, qopt
, factor
);
1621 PostingIterator::Internal
*
1622 QueryAndMaybe::postlist(QueryOptimiser
* qopt
, double factor
) const
1624 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryAndMaybe::postlist", qopt
| factor
);
1625 // FIXME: Combine and-like side with and-like stuff above.
1626 unique_ptr
<PostList
> l(subqueries
[0].internal
->postlist(qopt
, factor
));
1627 if (factor
== 0.0) {
1628 // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1629 RETURN(l
.release());
1631 OrContext
ctx(qopt
, subqueries
.size() - 1);
1632 do_or_like(ctx
, qopt
, factor
, 0, 1);
1633 unique_ptr
<PostList
> r(ctx
.postlist());
1634 RETURN(new AndMaybePostList(l
.release(), r
.release(),
1635 qopt
->matcher
, qopt
->db_size
));
1638 PostingIterator::Internal
*
1639 QueryFilter::postlist(QueryOptimiser
* qopt
, double factor
) const
1641 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryFilter::postlist", qopt
| factor
);
1642 // FIXME: Combine and-like stuff, like QueryOptimiser.
1643 AssertEq(subqueries
.size(), 2);
1645 unique_ptr
<PostList
> l(subqueries
[0].internal
->postlist(qopt
, factor
));
1646 pls
[1] = subqueries
[1].internal
->postlist(qopt
, 0.0);
1647 pls
[0] = l
.release();
1648 RETURN(new MultiAndPostList(pls
, pls
+ 2, qopt
->matcher
, qopt
->db_size
));
1652 QueryFilter::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1654 QueryVector::const_iterator i
;
1655 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1656 // MatchNothing subqueries should have been removed by done().
1657 Assert((*i
).internal
.get());
1658 (*i
).internal
->postlist_sub_and_like(ctx
, qopt
, factor
);
1659 // Second and subsequent subqueries are unweighted.
1665 QueryWindowed::postlist_windowed(Query::op op
, AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1667 if (!qopt
->full_db_has_positions
) {
1668 // No positional data anywhere, so just handle as AND.
1669 QueryAndLike::postlist_sub_and_like(ctx
, qopt
, factor
);
1673 if (!qopt
->db
.has_positions()) {
1674 // No positions in this subdatabase so this matches nothing, which
1675 // means the whole andcontext matches nothing.
1677 // Bailing out here means we don't recurse deeper and that means we
1678 // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1679 // the phrase, but at least one shard will count them, and the matcher
1680 // takes the highest answer (since 1.4.6).
1685 bool old_need_positions
= qopt
->need_positions
;
1686 qopt
->need_positions
= true;
1688 QueryVector::const_iterator i
;
1689 for (i
= subqueries
.begin(); i
!= subqueries
.end(); ++i
) {
1690 // MatchNothing subqueries should have been removed by done().
1691 Assert((*i
).internal
.get());
1692 bool is_term
= ((*i
).internal
->get_type() == Query::LEAF_TERM
);
1693 PostList
* pl
= (*i
).internal
->postlist(qopt
, factor
);
1695 pl
= new OrPosPostList(pl
);
1696 ctx
.add_postlist(pl
);
1698 // Record the positional filter to apply higher up the tree.
1699 ctx
.add_pos_filter(op
, subqueries
.size(), window
);
1701 qopt
->need_positions
= old_need_positions
;
1705 QueryPhrase::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1707 QueryWindowed::postlist_windowed(Query::OP_PHRASE
, ctx
, qopt
, factor
);
1711 QueryNear::postlist_sub_and_like(AndContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1713 QueryWindowed::postlist_windowed(Query::OP_NEAR
, ctx
, qopt
, factor
);
1716 PostingIterator::Internal
*
1717 QueryEliteSet::postlist(QueryOptimiser
* qopt
, double factor
) const
1719 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryEliteSet::postlist", qopt
| factor
);
1720 OrContext
ctx(qopt
, subqueries
.size());
1721 do_or_like(ctx
, qopt
, factor
, set_size
);
1722 RETURN(ctx
.postlist());
1726 QueryEliteSet::postlist_sub_or_like(OrContext
& ctx
, QueryOptimiser
* qopt
, double factor
) const
1728 do_or_like(ctx
, qopt
, factor
, set_size
);
1731 PostingIterator::Internal
*
1732 QuerySynonym::postlist(QueryOptimiser
* qopt
, double factor
) const
1734 LOGCALL(QUERY
, PostingIterator::Internal
*, "QuerySynonym::postlist", qopt
| factor
);
1735 // Save and restore total_subqs so we only add one for the whole
1736 // OP_SYNONYM subquery (or none if we're not weighted).
1737 Xapian::termcount save_total_subqs
= qopt
->get_total_subqs();
1740 PostList
* pl
= do_synonym(qopt
, factor
);
1741 qopt
->set_total_subqs(save_total_subqs
);
1746 QuerySynonym::done()
1748 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1749 // subqueries which are MatchNothing.
1750 if (subqueries
.empty())
1752 // Synonym of a single subquery should only be simplified if that subquery
1753 // is a term (or MatchAll), or if it's also OP_SYNONYM. Note that
1754 // MatchNothing subqueries are dropped, so we'd never get here with a
1755 // single MatchNothing subquery.
1756 if (subqueries
.size() == 1) {
1757 Query::op sub_type
= subqueries
[0].get_type();
1758 if (sub_type
== Query::LEAF_TERM
|| sub_type
== Query::LEAF_MATCH_ALL
||
1759 sub_type
== Query::OP_SYNONYM
) {
1760 return subqueries
[0].internal
.get();
1766 PostingIterator::Internal
*
1767 QueryMax::postlist(QueryOptimiser
* qopt
, double factor
) const
1769 LOGCALL(QUERY
, PostingIterator::Internal
*, "QueryMax::postlist", qopt
| factor
);
1770 // Save and restore total_subqs so we only add one for the whole
1771 // OP_MAX subquery (or none if we're not weighted).
1772 Xapian::termcount save_total_subqs
= qopt
->get_total_subqs();
1775 PostList
* pl
= do_max(qopt
, factor
);
1776 qopt
->set_total_subqs(save_total_subqs
);
1781 QueryAnd::get_op() const
1783 return Xapian::Query::OP_AND
;
1787 QueryOr::get_op() const
1789 return Xapian::Query::OP_OR
;
1793 QueryAndNot::get_op() const
1795 return Xapian::Query::OP_AND_NOT
;
1799 QueryXor::get_op() const
1801 return Xapian::Query::OP_XOR
;
1805 QueryAndMaybe::get_op() const
1807 return Xapian::Query::OP_AND_MAYBE
;
1811 QueryFilter::get_op() const
1813 return Xapian::Query::OP_FILTER
;
1817 QueryNear::get_op() const
1819 return Xapian::Query::OP_NEAR
;
1823 QueryPhrase::get_op() const
1825 return Xapian::Query::OP_PHRASE
;
1829 QueryEliteSet::get_op() const
1831 return Xapian::Query::OP_ELITE_SET
;
1835 QuerySynonym::get_op() const
1837 return Xapian::Query::OP_SYNONYM
;
1841 QueryMax::get_op() const
1843 return Xapian::Query::OP_MAX
;
1847 QueryWildcard::get_op() const
1849 return Xapian::Query::OP_WILDCARD
;
1853 QueryAnd::get_description() const
1855 return get_description_helper(" AND ");
1859 QueryOr::get_description() const
1861 return get_description_helper(" OR ");
1865 QueryAndNot::get_description() const
1867 return get_description_helper(" AND_NOT ");
1871 QueryXor::get_description() const
1873 return get_description_helper(" XOR ");
1877 QueryAndMaybe::get_description() const
1879 return get_description_helper(" AND_MAYBE ");
1883 QueryFilter::get_description() const
1885 return get_description_helper(" FILTER ");
1889 QueryNear::get_description() const
1891 return get_description_helper(" NEAR ", window
);
1895 QueryPhrase::get_description() const
1897 return get_description_helper(" PHRASE ", window
);
1901 QueryEliteSet::get_description() const
1903 return get_description_helper(" ELITE_SET ", set_size
);
1907 QuerySynonym::get_description() const
1909 if (subqueries
.size() == 1) {
1910 string d
= "(SYNONYM ";
1911 d
+= subqueries
[0].internal
->get_description();
1915 return get_description_helper(" SYNONYM ");
1919 QueryMax::get_description() const
1921 return get_description_helper(" MAX ");
1925 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1927 return Xapian::Query::OP_INVALID
;
1930 PostingIterator::Internal
*
1931 QueryInvalid::postlist(QueryOptimiser
*, double) const
1933 throw Xapian::InvalidOperationError("Query is invalid");
1937 QueryInvalid::serialise(std::string
& result
) const
1939 result
+= static_cast<char>(0x00);
1943 QueryInvalid::get_description() const