Detect common wdf-disjoint OP_SYNONYM subqueries
[xapian.git] / xapian-core / api / queryinternal.cc
blobd33e1fc0e20fcb1a76304198c72126a4b96f19e2
1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "queryinternal.h"
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
30 #include "heap.h"
31 #include "leafpostlist.h"
32 #include "matcher/andmaybepostlist.h"
33 #include "matcher/andnotpostlist.h"
34 #include "emptypostlist.h"
35 #include "matcher/exactphrasepostlist.h"
36 #include "matcher/externalpostlist.h"
37 #include "matcher/maxpostlist.h"
38 #include "matcher/multiandpostlist.h"
39 #include "matcher/multixorpostlist.h"
40 #include "matcher/nearpostlist.h"
41 #include "matcher/orpospostlist.h"
42 #include "matcher/orpostlist.h"
43 #include "matcher/phrasepostlist.h"
44 #include "matcher/queryoptimiser.h"
45 #include "matcher/valuerangepostlist.h"
46 #include "matcher/valuegepostlist.h"
47 #include "net/length.h"
48 #include "serialise-double.h"
49 #include "stringutils.h"
50 #include "termlist.h"
52 #include "debuglog.h"
53 #include "omassert.h"
54 #include "str.h"
55 #include "unicode/description_append.h"
57 #include <algorithm>
58 #include <functional>
59 #include <list>
60 #include <memory>
61 #include <string>
62 #include <unordered_set>
63 #include <vector>
65 using namespace std;
67 template<class CLASS> struct delete_ptr {
68 void operator()(CLASS *p) const { delete p; }
71 using Xapian::Internal::AndContext;
72 using Xapian::Internal::OrContext;
73 using Xapian::Internal::XorContext;
75 namespace Xapian {
77 namespace Internal {
79 /** Class providing an operator which sorts postlists to select max or terms.
80 * This returns true if a has a (strictly) greater termweight than b.
82 * Using this comparator will tend to result in multiple calls to
83 * recalc_maxweight() for each of the subqueries (we use it with nth_element
84 * which should be O(n)) - perhaps it'd be better to call recalc_maxweight()
85 * once and then sort on that.
87 struct CmpMaxOrTerms {
88 /** Return true if and only if a has a strictly greater termweight than b. */
89 bool operator()(PostList *a, PostList *b) {
90 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
91 defined(__mc68000__) || defined(__mc68010__) || \
92 defined(__mc68020__) || defined(__mc68030__)
93 // On some architectures, most common of which is x86, floating point
94 // values are calculated and stored in registers with excess precision.
95 // If the two recalc_maxweight() calls below return identical values in a
96 // register, the excess precision may be dropped for one of them but
97 // not the other (e.g. because the compiler saves the first calculated
98 // weight to memory while calculating the second, then reloads it to
99 // compare). This leads to both a > b and b > a being true, which
100 // violates the antisymmetry property of the strict weak ordering
101 // required by nth_element(). This can have serious consequences (e.g.
102 // segfaults).
104 // Note that m68k only has excess precision in earlier models - 68040
105 // and later are OK:
106 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
108 // To avoid this, we store each result in a volatile double prior to
109 // comparing them. This means that the result of this test should
110 // match that on other architectures with the same double format (which
111 // is desirable), and actually has less overhead than rounding both
112 // results to float (which is another approach which works).
113 volatile double a_max_wt = a->recalc_maxweight();
114 volatile double b_max_wt = b->recalc_maxweight();
115 return a_max_wt > b_max_wt;
116 #else
117 return a->recalc_maxweight() > b->recalc_maxweight();
118 #endif
122 /// Comparison functor which orders PostList* by descending get_termfreq_est().
123 struct ComparePostListTermFreqAscending {
124 /// Order by descending get_termfreq_est().
125 bool operator()(const PostList *a, const PostList *b) const {
126 return a->get_termfreq_est() > b->get_termfreq_est();
130 class Context {
131 protected:
132 QueryOptimiser* qopt;
134 vector<PostList*> pls;
136 public:
137 Context(QueryOptimiser* qopt_, size_t reserve);
139 ~Context();
141 void add_postlist(PostList * pl) {
142 pls.push_back(pl);
145 bool empty() const {
146 return pls.empty();
149 size_t size() const {
150 return pls.size();
153 void shrink(size_t new_size);
156 Context::Context(QueryOptimiser* qopt_, size_t reserve)
157 : qopt(qopt_)
159 pls.reserve(reserve);
162 void
163 Context::shrink(size_t new_size)
165 AssertRel(new_size, <=, pls.size());
166 if (new_size >= pls.size())
167 return;
169 const PostList * hint_pl = qopt->get_hint_postlist();
170 for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
171 const PostList * pl = *i;
172 if (rare(pl == hint_pl)) {
173 // We were about to delete qopt's hint - instead tell qopt to take
174 // ownership.
175 qopt->take_hint_ownership();
176 hint_pl = NULL;
177 } else {
178 delete pl;
181 pls.resize(new_size);
184 Context::~Context()
186 shrink(0);
189 class OrContext : public Context {
190 public:
191 OrContext(QueryOptimiser* qopt_, size_t reserve)
192 : Context(qopt_, reserve) { }
194 /// Select the best set_size postlists from the last out_of added.
195 void select_elite_set(size_t set_size, size_t out_of);
197 /// Select the set_size postlists with the highest term frequency.
198 void select_most_frequent(size_t set_size);
200 PostList * postlist();
201 PostList * postlist_max();
204 void
205 OrContext::select_elite_set(size_t set_size, size_t out_of)
207 vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
208 nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
209 shrink(pls.size() - out_of + set_size);
212 void
213 OrContext::select_most_frequent(size_t set_size)
215 vector<PostList*>::iterator begin = pls.begin();
216 nth_element(begin, begin + set_size - 1, pls.end(),
217 ComparePostListTermFreqAscending());
218 shrink(set_size);
221 PostList *
222 OrContext::postlist()
224 Assert(!pls.empty());
226 if (pls.size() == 1) {
227 PostList * pl = pls[0];
228 pls.clear();
229 return pl;
232 // Make postlists into a heap so that the postlist with the greatest term
233 // frequency is at the top of the heap.
234 Heap::make(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
236 // Now build a tree of binary OrPostList objects.
238 // The algorithm used to build the tree is like that used to build an
239 // optimal Huffman coding tree. If we called next() repeatedly, this
240 // arrangement would minimise the number of method calls. Generally we
241 // don't actually do that, but this arrangement is still likely to be a
242 // good one, and it does minimise the work in the worst case.
243 while (true) {
244 // We build the tree such that at each branch:
246 // l.get_termfreq_est() >= r.get_termfreq_est()
248 // We do this so that the OrPostList class can be optimised assuming
249 // that this is the case.
250 PostList * r = pls.front();
251 Heap::pop(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
252 pls.pop_back();
253 PostList * pl;
254 pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
256 if (pls.size() == 1) {
257 pls.clear();
258 return pl;
261 pls[0] = pl;
262 Heap::replace(pls.begin(), pls.end(),
263 ComparePostListTermFreqAscending());
267 PostList *
268 OrContext::postlist_max()
270 Assert(!pls.empty());
272 if (pls.size() == 1) {
273 PostList * pl = pls[0];
274 pls.clear();
275 return pl;
278 // Sort the postlists so that the postlist with the greatest term frequency
279 // is first.
280 sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
282 PostList * pl;
283 pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
285 pls.clear();
286 return pl;
289 class XorContext : public Context {
290 public:
291 XorContext(QueryOptimiser* qopt_, size_t reserve)
292 : Context(qopt_, reserve) { }
294 PostList * postlist();
297 PostList *
298 XorContext::postlist()
300 Xapian::doccount db_size = qopt->db_size;
301 PostList * pl;
302 pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
304 // Empty pls so our destructor doesn't delete them all!
305 pls.clear();
306 return pl;
309 class AndContext : public Context {
310 class PosFilter {
311 Xapian::Query::op op_;
313 /// Start and end indices for the PostLists this positional filter uses.
314 size_t begin, end;
316 Xapian::termcount window;
318 public:
319 PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
320 Xapian::termcount window_)
321 : op_(op__), begin(begin_), end(end_), window(window_) { }
323 PostList * postlist(PostList* pl,
324 const vector<PostList*>& pls,
325 PostListTree* pltree) const;
328 list<PosFilter> pos_filters;
330 public:
331 AndContext(QueryOptimiser* qopt_, size_t reserve)
332 : Context(qopt_, reserve) { }
334 void add_pos_filter(Query::op op_,
335 size_t n_subqs,
336 Xapian::termcount window);
338 PostList * postlist();
341 PostList *
342 AndContext::PosFilter::postlist(PostList* pl,
343 const vector<PostList*>& pls,
344 PostListTree* pltree) const
345 try {
346 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
347 vector<PostList *>::const_iterator terms_end = pls.begin() + end;
349 if (op_ == Xapian::Query::OP_NEAR) {
350 pl = new NearPostList(pl, window, terms_begin, terms_end, pltree);
351 } else if (window == end - begin) {
352 AssertEq(op_, Xapian::Query::OP_PHRASE);
353 pl = new ExactPhrasePostList(pl, terms_begin, terms_end, pltree);
354 } else {
355 AssertEq(op_, Xapian::Query::OP_PHRASE);
356 pl = new PhrasePostList(pl, window, terms_begin, terms_end, pltree);
358 return pl;
359 } catch (...) {
360 delete pl;
361 throw;
364 void
365 AndContext::add_pos_filter(Query::op op_,
366 size_t n_subqs,
367 Xapian::termcount window)
369 Assert(n_subqs > 1);
370 size_t end = pls.size();
371 size_t begin = end - n_subqs;
372 pos_filters.push_back(PosFilter(op_, begin, end, window));
375 PostList *
376 AndContext::postlist()
378 if (pls.empty()) {
379 // This case only happens if this sub-database has no positional data
380 // (but another sub-database does).
381 Assert(pos_filters.empty());
382 return new EmptyPostList;
385 unique_ptr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
386 qopt->matcher, qopt->db_size));
388 // Sort the positional filters to try to apply them in an efficient order.
389 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
390 // first?
392 // Apply any positional filters.
393 list<PosFilter>::const_iterator i;
394 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
395 const PosFilter & filter = *i;
396 pl.reset(filter.postlist(pl.release(), pls, qopt->matcher));
399 // Empty pls so our destructor doesn't delete them all!
400 pls.clear();
401 return pl.release();
406 Query::Internal::~Internal() { }
408 size_t
409 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
411 return 0;
414 const Query
415 Query::Internal::get_subquery(size_t) const
417 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
420 Xapian::termcount
421 Query::Internal::get_wqf() const
423 throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
426 Xapian::termpos
427 Query::Internal::get_pos() const
429 throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
432 void
433 Query::Internal::gather_terms(void *) const
437 Xapian::termcount
438 Query::Internal::get_length() const XAPIAN_NOEXCEPT
440 return 0;
443 Query::Internal *
444 Query::Internal::unserialise(const char ** p, const char * end,
445 const Registry & reg)
447 if (*p == end)
448 return NULL;
449 unsigned char ch = *(*p)++;
450 switch (ch >> 5) {
451 case 4: case 5: case 6: case 7: {
452 // Multi-way branch
454 // 1ccccnnn where:
455 // nnn -> n_subqs (0 means encoded value follows)
456 // cccc -> code (which OP_XXX)
457 size_t n_subqs = ch & 0x07;
458 if (n_subqs == 0) {
459 decode_length(p, end, n_subqs);
460 n_subqs += 8;
462 unsigned char code = (ch >> 3) & 0x0f;
463 Xapian::termcount parameter = 0;
464 if (code >= 13)
465 decode_length(p, end, parameter);
466 Xapian::Internal::QueryBranch * result;
467 switch (code) {
468 case 0: // OP_AND
469 result = new Xapian::Internal::QueryAnd(n_subqs);
470 break;
471 case 1: // OP_OR
472 result = new Xapian::Internal::QueryOr(n_subqs);
473 break;
474 case 2: // OP_AND_NOT
475 result = new Xapian::Internal::QueryAndNot(n_subqs);
476 break;
477 case 3: // OP_XOR
478 result = new Xapian::Internal::QueryXor(n_subqs);
479 break;
480 case 4: // OP_AND_MAYBE
481 result = new Xapian::Internal::QueryAndMaybe(n_subqs);
482 break;
483 case 5: // OP_FILTER
484 result = new Xapian::Internal::QueryFilter(n_subqs);
485 break;
486 case 6: // OP_SYNONYM
487 result = new Xapian::Internal::QuerySynonym(n_subqs);
488 break;
489 case 7: // OP_MAX
490 result = new Xapian::Internal::QueryMax(n_subqs);
491 break;
492 case 13: // OP_ELITE_SET
493 result = new Xapian::Internal::QueryEliteSet(n_subqs,
494 parameter);
495 break;
496 case 14: // OP_NEAR
497 result = new Xapian::Internal::QueryNear(n_subqs,
498 parameter);
499 break;
500 case 15: // OP_PHRASE
501 result = new Xapian::Internal::QueryPhrase(n_subqs,
502 parameter);
503 break;
504 default:
505 // 8 to 12 are currently unused.
506 throw SerialisationError("Unknown multi-way branch Query operator");
508 do {
509 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
510 } while (--n_subqs);
511 result->done();
512 return result;
514 case 2: case 3: { // Term
515 // Term
517 // 01ccLLLL where:
518 // LLLL -> length (0 means encoded value follows)
519 // cc -> code:
520 // 0: wqf = 0; pos = 0
521 // 1: wqf = 1; pos = 0
522 // 2: wqf = 1; pos -> encoded value follows
523 // 3: wqf -> encoded value follows; pos -> encoded value follows
524 size_t len = ch & 0x0f;
525 if (len == 0) {
526 decode_length(p, end, len);
527 len += 16;
529 if (size_t(end - *p) < len)
530 throw SerialisationError("Not enough data");
531 string term(*p, len);
532 *p += len;
534 int code = ((ch >> 4) & 0x03);
536 Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
537 if (code == 3)
538 decode_length(p, end, wqf);
540 Xapian::termpos pos = 0;
541 if (code >= 2)
542 decode_length(p, end, pos);
544 return new Xapian::Internal::QueryTerm(term, wqf, pos);
546 case 1: {
547 // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
549 // 001tssss where:
550 // ssss -> slot number (15 means encoded value follows)
551 // t -> op:
552 // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
553 // 1: OP_VALUE_GE
554 Xapian::valueno slot = ch & 15;
555 if (slot == 15) {
556 decode_length(p, end, slot);
557 slot += 15;
559 size_t len;
560 decode_length_and_check(p, end, len);
561 string begin(*p, len);
562 *p += len;
563 if (ch & 0x10) {
564 // OP_VALUE_GE
565 return new Xapian::Internal::QueryValueGE(slot, begin);
568 // OP_VALUE_RANGE
569 decode_length_and_check(p, end, len);
570 string end_(*p, len);
571 *p += len;
572 if (begin.empty()) // FIXME: is this right?
573 return new Xapian::Internal::QueryValueLE(slot, end_);
574 return new Xapian::Internal::QueryValueRange(slot, begin, end_);
576 case 0: {
577 // Other operators
579 // 000ttttt where:
580 // ttttt -> encodes which OP_XXX
581 switch (ch & 0x1f) {
582 case 0x00: // OP_INVALID
583 return new Xapian::Internal::QueryInvalid();
584 case 0x0b: { // Wildcard
585 if (*p == end)
586 throw SerialisationError("not enough data");
587 Xapian::termcount max_expansion;
588 decode_length(p, end, max_expansion);
589 if (end - *p < 2)
590 throw SerialisationError("not enough data");
591 int max_type = static_cast<unsigned char>(*(*p)++);
592 op combiner = static_cast<op>(*(*p)++);
593 size_t len;
594 decode_length_and_check(p, end, len);
595 string pattern(*p, len);
596 *p += len;
597 return new Xapian::Internal::QueryWildcard(pattern,
598 max_expansion,
599 max_type,
600 combiner);
602 case 0x0c: { // PostingSource
603 size_t len;
604 decode_length_and_check(p, end, len);
605 string name(*p, len);
606 *p += len;
608 const PostingSource * reg_source = reg.get_posting_source(name);
609 if (!reg_source) {
610 string m = "PostingSource ";
611 m += name;
612 m += " not registered";
613 throw SerialisationError(m);
616 decode_length_and_check(p, end, len);
617 PostingSource * source =
618 reg_source->unserialise_with_registry(string(*p, len),
619 reg);
620 *p += len;
621 return new Xapian::Internal::QueryPostingSource(source->release());
623 case 0x0d: {
624 using Xapian::Internal::QueryScaleWeight;
625 double scale_factor = unserialise_double(p, end);
626 return new QueryScaleWeight(scale_factor,
627 Query(unserialise(p, end, reg)));
629 case 0x0e: {
630 Xapian::termcount wqf;
631 Xapian::termpos pos;
632 decode_length(p, end, wqf);
633 decode_length(p, end, pos);
634 return new Xapian::Internal::QueryTerm(string(), wqf, pos);
636 case 0x0f:
637 return new Xapian::Internal::QueryTerm();
638 default: // Others currently unused.
639 break;
641 break;
644 string msg = "Unknown Query serialisation: ";
645 msg += str(ch);
646 throw SerialisationError(msg);
649 void
650 Query::Internal::postlist_sub_and_like(AndContext& ctx,
651 QueryOptimiser * qopt,
652 double factor) const
654 ctx.add_postlist(postlist(qopt, factor));
657 void
658 Query::Internal::postlist_sub_or_like(OrContext& ctx,
659 QueryOptimiser * qopt,
660 double factor) const
662 ctx.add_postlist(postlist(qopt, factor));
665 void
666 Query::Internal::postlist_sub_xor(XorContext& ctx,
667 QueryOptimiser * qopt,
668 double factor) const
670 ctx.add_postlist(postlist(qopt, factor));
673 namespace Internal {
675 Query::op
676 QueryTerm::get_type() const XAPIAN_NOEXCEPT
678 return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
681 string
682 QueryTerm::get_description() const
684 string desc;
685 if (term.empty()) {
686 desc = "<alldocuments>";
687 } else {
688 description_append(desc, term);
690 if (wqf != 1) {
691 desc += '#';
692 desc += str(wqf);
694 if (pos) {
695 desc += '@';
696 desc += str(pos);
698 return desc;
701 QueryPostingSource::QueryPostingSource(PostingSource * source_)
702 : source(source_)
704 if (!source_)
705 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
706 if (source->_refs == 0) {
707 // source_ isn't reference counted, so try to clone it. If clone()
708 // isn't implemented, just use the object provided and it's the
709 // caller's responsibility to ensure it stays valid while in use.
710 PostingSource * cloned_source = source->clone();
711 if (cloned_source) source = cloned_source->release();
715 Query::op
716 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
718 return Query::LEAF_POSTING_SOURCE;
721 string
722 QueryPostingSource::get_description() const
724 string desc = "PostingSource(";
725 desc += source->get_description();
726 desc += ')';
727 return desc;
730 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
731 : scale_factor(factor), subquery(subquery_)
733 if (rare(scale_factor < 0.0))
734 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
737 Query::op
738 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
740 return Query::OP_SCALE_WEIGHT;
743 size_t
744 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
746 return 1;
749 const Query
750 QueryScaleWeight::get_subquery(size_t) const
752 return subquery;
755 string
756 QueryScaleWeight::get_description() const
758 Assert(subquery.internal.get());
759 string desc = str(scale_factor);
760 desc += " * ";
761 desc += subquery.internal->get_description();
762 return desc;
765 PostList*
766 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
768 LOGCALL(QUERY, PostList*, "QueryTerm::postlist", qopt | factor);
769 if (factor != 0.0)
770 qopt->inc_total_subqs();
771 RETURN(qopt->open_post_list(term, wqf, factor));
774 PostList*
775 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
777 LOGCALL(QUERY, PostList*, "QueryPostingSource::postlist", qopt | factor);
778 Assert(source.get());
779 if (factor != 0.0)
780 qopt->inc_total_subqs();
781 // Casting away const on the Database::Internal here is OK, as we wrap
782 // them in a const Xapian::Database so non-const methods can't actually
783 // be called on the Database::Internal object.
784 const Xapian::Database wrappeddb(
785 const_cast<Xapian::Database::Internal*>(&(qopt->db)));
786 RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
789 PostList*
790 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
792 LOGCALL(QUERY, PostList*, "QueryScaleWeight::postlist", qopt | factor);
793 RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
796 void
797 QueryTerm::gather_terms(void * void_terms) const
799 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
800 if (!term.empty()) {
801 vector<pair<Xapian::termpos, string>> &terms =
802 *static_cast<vector<pair<Xapian::termpos, string>>*>(void_terms);
803 terms.push_back(make_pair(pos, term));
807 PostList*
808 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
810 LOGCALL(QUERY, PostList*, "QueryValueRange::postlist", qopt | factor);
811 if (factor != 0.0)
812 qopt->inc_total_subqs();
813 const Xapian::Database::Internal & db = qopt->db;
814 const string & lb = db.get_value_lower_bound(slot);
815 if (lb.empty()) {
816 // This should only happen if there are no values in this slot (which
817 // could be because the backend just doesn't support values at all).
818 // If there were values in the slot, the backend should have a
819 // non-empty lower bound, even if it isn't a tight one.
820 AssertEq(db.get_value_freq(slot), 0);
821 RETURN(new EmptyPostList);
823 if (end < lb) {
824 RETURN(new EmptyPostList);
826 const string & ub = db.get_value_upper_bound(slot);
827 if (begin > ub) {
828 RETURN(new EmptyPostList);
830 if (end >= ub) {
831 // If begin <= lb too, then the range check isn't needed, but we do
832 // still need to consider which documents have a value set in this
833 // slot. If this value is set for all documents, we can replace it
834 // with the MatchAll postlist, which is especially efficient if
835 // there are no gaps in the docids.
836 if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
837 RETURN(db.open_post_list(string()));
839 RETURN(new ValueGePostList(&db, slot, begin));
841 RETURN(new ValueRangePostList(&db, slot, begin, end));
844 void
845 QueryValueRange::serialise(string & result) const
847 if (slot < 15) {
848 result += static_cast<char>(0x20 | slot);
849 } else {
850 result += static_cast<char>(0x20 | 15);
851 result += encode_length(slot - 15);
853 result += encode_length(begin.size());
854 result += begin;
855 result += encode_length(end.size());
856 result += end;
859 Query::op
860 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
862 return Query::OP_VALUE_RANGE;
865 string
866 QueryValueRange::get_description() const
868 string desc = "VALUE_RANGE ";
869 desc += str(slot);
870 desc += ' ';
871 description_append(desc, begin);
872 desc += ' ';
873 description_append(desc, end);
874 return desc;
877 PostList*
878 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
880 LOGCALL(QUERY, PostList*, "QueryValueLE::postlist", qopt | factor);
881 if (factor != 0.0)
882 qopt->inc_total_subqs();
883 const Xapian::Database::Internal & db = qopt->db;
884 const string & lb = db.get_value_lower_bound(slot);
885 // If lb.empty(), the backend doesn't provide value bounds.
886 if (!lb.empty()) {
887 if (limit < lb) {
888 RETURN(new EmptyPostList);
890 if (limit >= db.get_value_upper_bound(slot)) {
891 // The range check isn't needed, but we do still need to consider
892 // which documents have a value set in this slot. If this value is
893 // set for all documents, we can replace it with the MatchAll
894 // postlist, which is especially efficient if there are no gaps in
895 // the docids.
896 if (db.get_value_freq(slot) == db.get_doccount()) {
897 RETURN(db.open_post_list(string()));
901 RETURN(new ValueRangePostList(&db, slot, string(), limit));
904 void
905 QueryValueLE::serialise(string & result) const
907 // Encode as a range with an empty start (which only takes a single byte to
908 // encode).
909 if (slot < 15) {
910 result += static_cast<char>(0x20 | slot);
911 } else {
912 result += static_cast<char>(0x20 | 15);
913 result += encode_length(slot - 15);
915 result += encode_length(0);
916 result += encode_length(limit.size());
917 result += limit;
920 Query::op
921 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
923 return Query::OP_VALUE_LE;
926 string
927 QueryValueLE::get_description() const
929 string desc = "VALUE_LE ";
930 desc += str(slot);
931 desc += ' ';
932 description_append(desc, limit);
933 return desc;
936 PostList*
937 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
939 LOGCALL(QUERY, PostList*, "QueryValueGE::postlist", qopt | factor);
940 if (factor != 0.0)
941 qopt->inc_total_subqs();
942 const Xapian::Database::Internal & db = qopt->db;
943 const string & lb = db.get_value_lower_bound(slot);
944 // If lb.empty(), the backend doesn't provide value bounds.
945 if (!lb.empty()) {
946 if (limit > db.get_value_upper_bound(slot)) {
947 RETURN(new EmptyPostList);
949 if (limit < lb) {
950 // The range check isn't needed, but we do still need to consider
951 // which documents have a value set in this slot. If this value is
952 // set for all documents, we can replace it with the MatchAll
953 // postlist, which is especially efficient if there are no gaps in
954 // the docids.
955 if (db.get_value_freq(slot) == db.get_doccount()) {
956 RETURN(db.open_post_list(string()));
960 RETURN(new ValueGePostList(&db, slot, limit));
963 void
964 QueryValueGE::serialise(string & result) const
966 if (slot < 15) {
967 result += static_cast<char>(0x20 | 0x10 | slot);
968 } else {
969 result += static_cast<char>(0x20 | 0x10 | 15);
970 result += encode_length(slot - 15);
972 result += encode_length(limit.size());
973 result += limit;
976 Query::op
977 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
979 return Query::OP_VALUE_GE;
982 string
983 QueryValueGE::get_description() const
985 string desc = "VALUE_GE ";
986 desc += str(slot);
987 desc += ' ';
988 description_append(desc, limit);
989 return desc;
992 PostList*
993 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
995 LOGCALL(QUERY, PostList*, "QueryWildcard::postlist", qopt | factor);
996 Query::op op = combiner;
997 double or_factor = 0.0;
998 if (factor == 0.0) {
999 // If we have a factor of 0, we don't care about the weights, so
1000 // we're just like a normal OR query.
1001 op = Query::OP_OR;
1002 } else if (op != Query::OP_SYNONYM) {
1003 or_factor = factor;
1006 bool old_in_synonym = qopt->in_synonym;
1007 if (!old_in_synonym) {
1008 qopt->in_synonym = (op == Query::OP_SYNONYM);
1011 OrContext ctx(qopt, 0);
1012 unique_ptr<TermList> t(qopt->db.open_allterms(pattern));
1013 Xapian::termcount expansions_left = max_expansion;
1014 // If there's no expansion limit, set expansions_left to the maximum
1015 // value Xapian::termcount can hold.
1016 if (expansions_left == 0)
1017 --expansions_left;
1018 while (true) {
1019 t->next();
1020 if (t->at_end())
1021 break;
1022 if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1023 if (expansions_left-- == 0) {
1024 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
1025 break;
1026 string msg("Wildcard ");
1027 msg += pattern;
1028 msg += "* expands to more than ";
1029 msg += str(max_expansion);
1030 msg += " terms";
1031 throw Xapian::WildcardError(msg);
1034 const string & term = t->get_termname();
1035 ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
1038 if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1039 // FIXME: open_lazy_post_list() results in the term getting registered
1040 // for stats, so we still incur an avoidable cost from the full
1041 // expansion size of the wildcard, which is most likely to be visible
1042 // with the remote backend. Perhaps we should split creating the lazy
1043 // postlist from registering the term for stats.
1044 if (ctx.size() > max_expansion)
1045 ctx.select_most_frequent(max_expansion);
1048 if (factor != 0.0) {
1049 if (op != Query::OP_SYNONYM) {
1050 qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1051 } else {
1052 qopt->inc_total_subqs();
1056 qopt->in_synonym = old_in_synonym;
1058 if (ctx.empty())
1059 RETURN(new EmptyPostList);
1061 if (op == Query::OP_MAX)
1062 RETURN(ctx.postlist_max());
1064 PostList * pl = ctx.postlist();
1065 if (op == Query::OP_OR)
1066 RETURN(pl);
1068 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1069 // SynonymPostList, which supplies the weights.
1071 // We know the subqueries from a wildcard expansion are wdf-disjoint
1072 // (i.e. each wdf from the document contributes at most itself to the
1073 // wdf of the subquery).
1074 RETURN(qopt->make_synonym_postlist(pl, factor, true));
1077 termcount
1078 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1080 // We currently assume wqf is 1 for calculating the synonym's weight
1081 // since conceptually the synonym is one "virtual" term. If we were
1082 // to combine multiple occurrences of the same synonym expansion into
1083 // a single instance with wqf set, we would want to track the wqf.
1084 return 1;
1087 void
1088 QueryWildcard::serialise(string & result) const
1090 result += static_cast<char>(0x0b);
1091 result += encode_length(max_expansion);
1092 result += static_cast<unsigned char>(max_type);
1093 result += static_cast<unsigned char>(combiner);
1094 result += encode_length(pattern.size());
1095 result += pattern;
1098 Query::op
1099 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1101 return Query::OP_WILDCARD;
1104 string
1105 QueryWildcard::get_description() const
1107 string desc = "WILDCARD ";
1108 switch (combiner) {
1109 case Query::OP_SYNONYM:
1110 desc += "SYNONYM ";
1111 break;
1112 case Query::OP_MAX:
1113 desc += "MAX ";
1114 break;
1115 case Query::OP_OR:
1116 desc += "OR ";
1117 break;
1118 default:
1119 desc += "BAD ";
1120 break;
1122 description_append(desc, pattern);
1123 return desc;
1126 Xapian::termcount
1127 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1129 // Sum results from all subqueries.
1130 Xapian::termcount result = 0;
1131 QueryVector::const_iterator i;
1132 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1133 // MatchNothing subqueries should have been removed by done(), but we
1134 // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1135 // segfault anyway.
1136 result += (*i).internal->get_length();
1138 return result;
1141 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1142 #define MISC(X) static_cast<unsigned char>(X)
1143 void
1144 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1146 static const unsigned char first_byte[] = {
1147 MULTIWAY(0), // OP_AND
1148 MULTIWAY(1), // OP_OR
1149 MULTIWAY(2), // OP_AND_NOT
1150 MULTIWAY(3), // OP_XOR
1151 MULTIWAY(4), // OP_AND_MAYBE
1152 MULTIWAY(5), // OP_FILTER
1153 MULTIWAY(14), // OP_NEAR
1154 MULTIWAY(15), // OP_PHRASE
1155 0, // OP_VALUE_RANGE
1156 MISC(3), // OP_SCALE_WEIGHT
1157 MULTIWAY(13), // OP_ELITE_SET
1158 0, // OP_VALUE_GE
1159 0, // OP_VALUE_LE
1160 MULTIWAY(6), // OP_SYNONYM
1161 MULTIWAY(7) // OP_MAX
1163 Xapian::Query::op op_ = get_op();
1164 AssertRel(size_t(op_),<,sizeof(first_byte));
1165 unsigned char ch = first_byte[op_];
1166 if (ch & 0x80) {
1167 // Multi-way operator.
1168 if (subqueries.size() < 8)
1169 ch |= subqueries.size();
1170 result += ch;
1171 if (subqueries.size() >= 8)
1172 result += encode_length(subqueries.size() - 8);
1173 if (ch >= MULTIWAY(13))
1174 result += encode_length(parameter);
1175 } else {
1176 result += ch;
1179 QueryVector::const_iterator i;
1180 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1181 // MatchNothing subqueries should have been removed by done().
1182 Assert((*i).internal.get());
1183 (*i).internal->serialise(result);
1186 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1187 // appended next by an overloaded serialise() method in the subclass.
1190 void
1191 QueryBranch::serialise(string & result) const
1193 QueryBranch::serialise_(result);
1196 void
1197 QueryNear::serialise(string & result) const
1199 // FIXME: window - subqueries.size() ?
1200 QueryBranch::serialise_(result, window);
1203 void
1204 QueryPhrase::serialise(string & result) const
1206 // FIXME: window - subqueries.size() ?
1207 QueryBranch::serialise_(result, window);
1210 void
1211 QueryEliteSet::serialise(string & result) const
1213 // FIXME: set_size - subqueries.size() ?
1214 QueryBranch::serialise_(result, set_size);
1217 void
1218 QueryBranch::gather_terms(void * void_terms) const
1220 // Gather results from all subqueries.
1221 QueryVector::const_iterator i;
1222 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1223 // MatchNothing subqueries should have been removed by done().
1224 Assert((*i).internal.get());
1225 (*i).internal->gather_terms(void_terms);
1229 void
1230 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1231 Xapian::termcount elite_set_size, size_t first) const
1233 LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1235 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1236 // for AND-like operations.
1238 // OP_SYNONYM with a single subquery is only simplified by
1239 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1240 Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1242 vector<PostList *> postlists;
1243 postlists.reserve(subqueries.size() - first);
1245 QueryVector::const_iterator q;
1246 for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1247 // MatchNothing subqueries should have been removed by done().
1248 Assert((*q).internal.get());
1249 (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1252 if (elite_set_size && elite_set_size < subqueries.size()) {
1253 ctx.select_elite_set(elite_set_size, subqueries.size());
1254 // FIXME: not right!
1258 PostList *
1259 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1261 LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1262 OrContext ctx(qopt, subqueries.size());
1263 if (factor == 0.0) {
1264 // If we have a factor of 0, we don't care about the weights, so
1265 // we're just like a normal OR query.
1266 do_or_like(ctx, qopt, 0.0);
1267 return ctx.postlist();
1270 bool old_in_synonym = qopt->in_synonym;
1271 qopt->in_synonym = true;
1272 do_or_like(ctx, qopt, 0.0);
1273 PostList * pl = ctx.postlist();
1274 qopt->in_synonym = old_in_synonym;
1276 bool wdf_disjoint = false;
1277 Assert(!subqueries.empty());
1278 auto type = subqueries.front().get_type();
1279 if (type == Query::OP_WILDCARD) {
1280 // Detect common easy case where all subqueries are OP_WILDCARD whose
1281 // constant prefixes form a prefix-free set.
1282 wdf_disjoint = true;
1283 vector<string> prefixes;
1284 for (auto&& q : subqueries) {
1285 if (q.get_type() != Query::OP_WILDCARD) {
1286 wdf_disjoint = false;
1287 break;
1289 auto qw = static_cast<const QueryWildcard*>(q.internal.get());
1290 prefixes.push_back(qw->get_pattern());
1293 if (wdf_disjoint) {
1294 sort(prefixes.begin(), prefixes.end());
1295 const string* prev = nullptr;
1296 for (const auto& i : prefixes) {
1297 if (prev) {
1298 if (startswith(i, *prev)) {
1299 wdf_disjoint = false;
1300 break;
1303 prev = &i;
1306 } else if (type == Query::LEAF_TERM) {
1307 // Detect common easy case where all subqueries are terms, none of
1308 // which are the same.
1309 wdf_disjoint = true;
1310 unordered_set<string> terms;
1311 for (auto&& q : subqueries) {
1312 if (q.get_type() != Query::LEAF_TERM) {
1313 wdf_disjoint = false;
1314 break;
1316 auto qt = static_cast<const QueryTerm*>(q.internal.get());
1317 if (!terms.insert(qt->get_term()).second) {
1318 wdf_disjoint = false;
1319 break;
1324 // We currently assume wqf is 1 for calculating the synonym's weight
1325 // since conceptually the synonym is one "virtual" term. If we were
1326 // to combine multiple occurrences of the same synonym expansion into
1327 // a single instance with wqf set, we would want to track the wqf.
1329 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1330 // SynonymPostList, which supplies the weights.
1331 RETURN(qopt->make_synonym_postlist(pl, factor, wdf_disjoint));
1334 PostList *
1335 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1337 LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1338 OrContext ctx(qopt, subqueries.size());
1339 do_or_like(ctx, qopt, factor);
1340 if (factor == 0.0) {
1341 // If we have a factor of 0, we don't care about the weights, so
1342 // we're just like a normal OR query.
1343 RETURN(ctx.postlist());
1346 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1347 // since conceptually the OP_MAX is one "virtual" term. If we were
1348 // to combine multiple occurrences of the same OP_MAX expansion into
1349 // a single instance with wqf set, we would want to track the wqf.
1350 RETURN(ctx.postlist_max());
1353 Xapian::Query::op
1354 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1356 return get_op();
1359 size_t
1360 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1362 return subqueries.size();
1365 const Query
1366 QueryBranch::get_subquery(size_t n) const
1368 return subqueries[n];
1371 const string
1372 QueryBranch::get_description_helper(const char * op,
1373 Xapian::termcount parameter) const
1375 string desc = "(";
1376 QueryVector::const_iterator i;
1377 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1378 if (desc.size() > 1) {
1379 desc += op;
1380 if (parameter) {
1381 desc += str(parameter);
1382 desc += ' ';
1385 Assert((*i).internal.get());
1386 // MatchNothing subqueries should have been removed by done(), and we
1387 // shouldn't get called before done() is, since that happens at the
1388 // end of the Xapian::Query constructor.
1389 desc += (*i).internal->get_description();
1391 desc += ')';
1392 return desc;
1395 Query::Internal *
1396 QueryWindowed::done()
1398 // If window size not specified, default it.
1399 if (window == 0)
1400 window = subqueries.size();
1401 return QueryAndLike::done();
1404 void
1405 QueryScaleWeight::gather_terms(void * void_terms) const
1407 subquery.internal->gather_terms(void_terms);
1410 void QueryTerm::serialise(string & result) const
1412 size_t len = term.size();
1413 if (len == 0) {
1414 if (wqf == 1 && pos == 0) {
1415 // Query::MatchAll
1416 result += '\x0f';
1417 } else {
1418 // Weird mutant versions of MatchAll
1419 result += '\x0e';
1420 result += encode_length(wqf);
1421 result += encode_length(pos);
1423 } else if (wqf == 1) {
1424 if (pos == 0) {
1425 // Single occurrence free-text term without position set.
1426 if (len >= 16) {
1427 result += static_cast<char>(0x40 | 0x10);
1428 result += encode_length(term.size() - 16);
1429 } else {
1430 result += static_cast<char>(0x40 | 0x10 | len);
1432 result += term;
1433 } else {
1434 // Single occurrence free-text term with position set.
1435 if (len >= 16) {
1436 result += static_cast<char>(0x40 | 0x20);
1437 result += encode_length(term.size() - 16);
1438 } else {
1439 result += static_cast<char>(0x40 | 0x20 | len);
1441 result += term;
1442 result += encode_length(pos);
1444 } else if (wqf > 1 || pos > 0) {
1445 // General case.
1446 if (len >= 16) {
1447 result += static_cast<char>(0x40 | 0x30);
1448 result += encode_length(term.size() - 16);
1449 } else if (len) {
1450 result += static_cast<char>(0x40 | 0x30 | len);
1452 result += term;
1453 result += encode_length(wqf);
1454 result += encode_length(pos);
1455 } else {
1456 // Typical boolean term.
1457 AssertEq(wqf, 0);
1458 AssertEq(pos, 0);
1459 if (len >= 16) {
1460 result += static_cast<char>(0x40);
1461 result += encode_length(term.size() - 16);
1462 } else {
1463 result += static_cast<char>(0x40 | len);
1465 result += term;
1469 void QueryPostingSource::serialise(string & result) const
1471 result += static_cast<char>(0x0c);
1473 const string & n = source->name();
1474 result += encode_length(n.size());
1475 result += n;
1477 const string & s = source->serialise();
1478 result += encode_length(s.size());
1479 result += s;
1482 void QueryScaleWeight::serialise(string & result) const
1484 Assert(subquery.internal.get());
1485 const string & s = serialise_double(scale_factor);
1486 result += '\x0d';
1487 result += s;
1488 subquery.internal->serialise(result);
1491 struct is_matchnothing {
1492 bool operator()(const Xapian::Query & q) const {
1493 return q.internal.get() == NULL;
1497 void
1498 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1500 // If the AndLike is already MatchNothing, do nothing.
1501 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1502 return;
1503 // If we're adding MatchNothing, discard any previous subqueries.
1504 if (subquery.internal.get() == NULL)
1505 subqueries.clear();
1506 subqueries.push_back(subquery);
1509 Query::Internal *
1510 QueryAndLike::done()
1512 // Empty AndLike gives MatchNothing.
1513 if (subqueries.empty())
1514 return NULL;
1515 // We handle any subquery being MatchNothing in add_subquery() by leaving
1516 // a single MatchNothing subquery, and so this check results in AndLike
1517 // giving MatchNothing.
1518 if (subqueries.size() == 1)
1519 return subqueries[0].internal.get();
1520 return this;
1523 PostList*
1524 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1526 LOGCALL(QUERY, PostList*, "QueryAndLike::postlist", qopt | factor);
1527 AndContext ctx(qopt, subqueries.size());
1528 postlist_sub_and_like(ctx, qopt, factor);
1529 RETURN(ctx.postlist());
1532 void
1533 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1535 QueryVector::const_iterator i;
1536 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1537 // MatchNothing subqueries should have been removed by done().
1538 Assert((*i).internal.get());
1539 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1543 void
1544 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1546 // Drop any subqueries which are MatchNothing.
1547 if (subquery.internal.get() != NULL)
1548 subqueries.push_back(subquery);
1551 Query::Internal *
1552 QueryOrLike::done()
1554 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1555 // subqueries which are MatchNothing.
1556 if (subqueries.empty())
1557 return NULL;
1558 if (subqueries.size() == 1)
1559 return subqueries[0].internal.get();
1560 return this;
1563 void
1564 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1566 if (!subqueries.empty()) {
1567 // We're adding the 2nd or subsequent subquery, so this subquery is
1568 // negated.
1569 if (subqueries[0].internal.get() == NULL) {
1570 // The left side is already MatchNothing so drop any right side.
1572 // MatchNothing AND_NOT X == MatchNothing
1573 return;
1575 if (subquery.internal.get() == NULL) {
1576 // Drop MatchNothing on the right of AndNot.
1578 // X AND_NOT MatchNothing == X
1579 return;
1581 if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
1582 // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1583 // AndNot as no weight is taken from them.
1584 subqueries.push_back(subquery.get_subquery(0));
1585 // The Query constructor for OP_SCALE_WEIGHT constructor should
1586 // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1587 Assert(subquery.get_subquery(0).internal.get() != NULL);
1588 return;
1591 subqueries.push_back(subquery);
1594 Query::Internal *
1595 QueryAndNot::done()
1597 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1598 // that leaves just the left subquery, return that.
1600 // If left subquery is MatchNothing, then add_subquery() discards all right
1601 // subqueries, so this check also gives MatchNothing for this case.
1602 if (subqueries.size() == 1)
1603 return subqueries[0].internal.get();
1604 return this;
1607 void
1608 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1610 // If the left side of AndMaybe is already MatchNothing, do nothing.
1611 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1612 return;
1613 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1614 if (subquery.internal.get() != NULL || subqueries.empty())
1615 subqueries.push_back(subquery);
1618 Query::Internal *
1619 QueryAndMaybe::done()
1621 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1622 // that leaves just the left subquery, return that.
1624 // If left subquery is MatchNothing, then add_subquery() discards all right
1625 // subqueries, so this check also gives MatchNothing for this case.
1626 if (subqueries.size() == 1)
1627 return subqueries[0].internal.get();
1628 return this;
1631 PostList*
1632 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1634 LOGCALL(QUERY, PostList*, "QueryOr::postlist", qopt | factor);
1635 OrContext ctx(qopt, subqueries.size());
1636 do_or_like(ctx, qopt, factor);
1637 RETURN(ctx.postlist());
1640 void
1641 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1643 do_or_like(ctx, qopt, factor);
1646 PostList*
1647 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1649 LOGCALL(QUERY, PostList*, "QueryAndNot::postlist", qopt | factor);
1650 // FIXME: Combine and-like side with and-like stuff above.
1651 unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1652 OrContext ctx(qopt, subqueries.size() - 1);
1653 do_or_like(ctx, qopt, 0.0, 0, 1);
1654 unique_ptr<PostList> r(ctx.postlist());
1655 RETURN(new AndNotPostList(l.release(), r.release(),
1656 qopt->matcher, qopt->db_size));
1659 PostList*
1660 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1662 LOGCALL(QUERY, PostList*, "QueryXor::postlist", qopt | factor);
1663 XorContext ctx(qopt, subqueries.size());
1664 postlist_sub_xor(ctx, qopt, factor);
1665 RETURN(ctx.postlist());
1668 void
1669 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1671 QueryVector::const_iterator i;
1672 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1673 // MatchNothing subqueries should have been removed by done().
1674 Assert((*i).internal.get());
1675 (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1679 PostList*
1680 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1682 LOGCALL(QUERY, PostList*, "QueryAndMaybe::postlist", qopt | factor);
1683 // FIXME: Combine and-like side with and-like stuff above.
1684 unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1685 if (factor == 0.0) {
1686 // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1687 RETURN(l.release());
1689 OrContext ctx(qopt, subqueries.size() - 1);
1690 do_or_like(ctx, qopt, factor, 0, 1);
1691 unique_ptr<PostList> r(ctx.postlist());
1692 RETURN(new AndMaybePostList(l.release(), r.release(),
1693 qopt->matcher, qopt->db_size));
1696 PostList*
1697 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1699 LOGCALL(QUERY, PostList*, "QueryFilter::postlist", qopt | factor);
1700 // FIXME: Combine and-like stuff, like QueryOptimiser.
1701 AssertEq(subqueries.size(), 2);
1702 PostList * pls[2];
1703 unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1704 pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1705 pls[0] = l.release();
1706 RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1709 void
1710 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1712 QueryVector::const_iterator i;
1713 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1714 // MatchNothing subqueries should have been removed by done().
1715 Assert((*i).internal.get());
1716 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1717 // Second and subsequent subqueries are unweighted.
1718 factor = 0.0;
1722 void
1723 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1725 if (!qopt->full_db_has_positions) {
1726 // No positional data anywhere, so just handle as AND.
1727 QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1728 return;
1731 if (!qopt->db.has_positions()) {
1732 // No positions in this subdatabase so this matches nothing, which
1733 // means the whole andcontext matches nothing.
1735 // Bailing out here means we don't recurse deeper and that means we
1736 // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1737 // the phrase, but at least one shard will count them, and the matcher
1738 // takes the highest answer (since 1.4.6).
1739 ctx.shrink(0);
1740 return;
1743 bool old_need_positions = qopt->need_positions;
1744 qopt->need_positions = true;
1746 QueryVector::const_iterator i;
1747 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1748 // MatchNothing subqueries should have been removed by done().
1749 Assert((*i).internal.get());
1750 bool is_term = ((*i).internal->get_type() == Query::LEAF_TERM);
1751 PostList* pl = (*i).internal->postlist(qopt, factor);
1752 if (!is_term)
1753 pl = new OrPosPostList(pl);
1754 ctx.add_postlist(pl);
1756 // Record the positional filter to apply higher up the tree.
1757 ctx.add_pos_filter(op, subqueries.size(), window);
1759 qopt->need_positions = old_need_positions;
1762 void
1763 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1765 QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1768 void
1769 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1771 QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1774 PostList*
1775 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1777 LOGCALL(QUERY, PostList*, "QueryEliteSet::postlist", qopt | factor);
1778 OrContext ctx(qopt, subqueries.size());
1779 do_or_like(ctx, qopt, factor, set_size);
1780 RETURN(ctx.postlist());
1783 void
1784 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1786 do_or_like(ctx, qopt, factor, set_size);
1789 PostList*
1790 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1792 LOGCALL(QUERY, PostList*, "QuerySynonym::postlist", qopt | factor);
1793 // Save and restore total_subqs so we only add one for the whole
1794 // OP_SYNONYM subquery (or none if we're not weighted).
1795 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1796 if (factor != 0.0)
1797 ++save_total_subqs;
1798 PostList * pl = do_synonym(qopt, factor);
1799 qopt->set_total_subqs(save_total_subqs);
1800 RETURN(pl);
1803 Query::Internal *
1804 QuerySynonym::done()
1806 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1807 // subqueries which are MatchNothing.
1808 if (subqueries.empty())
1809 return NULL;
1810 if (subqueries.size() == 1) {
1811 Query::op sub_type = subqueries[0].get_type();
1812 // Synonym of a single subquery should only be simplified if that
1813 // subquery is a term (or MatchAll), or if it's also OP_SYNONYM. Note
1814 // that MatchNothing subqueries are dropped, so we'd never get here
1815 // with a single MatchNothing subquery.
1816 if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1817 sub_type == Query::OP_SYNONYM) {
1818 return subqueries[0].internal.get();
1820 if (sub_type == Query::OP_WILDCARD) {
1821 auto q = static_cast<QueryWildcard*>(subqueries[0].internal.get());
1822 // SYNONYM over WILDCARD X -> WILDCARD SYNONYM for any combiner X.
1823 return q->change_combiner(Query::OP_SYNONYM);
1826 return this;
1829 PostList*
1830 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1832 LOGCALL(QUERY, PostList*, "QueryMax::postlist", qopt | factor);
1833 // Save and restore total_subqs so we only add one for the whole
1834 // OP_MAX subquery (or none if we're not weighted).
1835 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1836 if (factor != 0.0)
1837 ++save_total_subqs;
1838 PostList * pl = do_max(qopt, factor);
1839 qopt->set_total_subqs(save_total_subqs);
1840 RETURN(pl);
1843 Xapian::Query::op
1844 QueryAnd::get_op() const
1846 return Xapian::Query::OP_AND;
1849 Xapian::Query::op
1850 QueryOr::get_op() const
1852 return Xapian::Query::OP_OR;
1855 Xapian::Query::op
1856 QueryAndNot::get_op() const
1858 return Xapian::Query::OP_AND_NOT;
1861 Xapian::Query::op
1862 QueryXor::get_op() const
1864 return Xapian::Query::OP_XOR;
1867 Xapian::Query::op
1868 QueryAndMaybe::get_op() const
1870 return Xapian::Query::OP_AND_MAYBE;
1873 Xapian::Query::op
1874 QueryFilter::get_op() const
1876 return Xapian::Query::OP_FILTER;
1879 Xapian::Query::op
1880 QueryNear::get_op() const
1882 return Xapian::Query::OP_NEAR;
1885 Xapian::Query::op
1886 QueryPhrase::get_op() const
1888 return Xapian::Query::OP_PHRASE;
1891 Xapian::Query::op
1892 QueryEliteSet::get_op() const
1894 return Xapian::Query::OP_ELITE_SET;
1897 Xapian::Query::op
1898 QuerySynonym::get_op() const
1900 return Xapian::Query::OP_SYNONYM;
1903 Xapian::Query::op
1904 QueryMax::get_op() const
1906 return Xapian::Query::OP_MAX;
1909 Xapian::Query::op
1910 QueryWildcard::get_op() const
1912 return Xapian::Query::OP_WILDCARD;
1915 string
1916 QueryAnd::get_description() const
1918 return get_description_helper(" AND ");
1921 string
1922 QueryOr::get_description() const
1924 return get_description_helper(" OR ");
1927 string
1928 QueryAndNot::get_description() const
1930 return get_description_helper(" AND_NOT ");
1933 string
1934 QueryXor::get_description() const
1936 return get_description_helper(" XOR ");
1939 string
1940 QueryAndMaybe::get_description() const
1942 return get_description_helper(" AND_MAYBE ");
1945 string
1946 QueryFilter::get_description() const
1948 return get_description_helper(" FILTER ");
1951 string
1952 QueryNear::get_description() const
1954 return get_description_helper(" NEAR ", window);
1957 string
1958 QueryPhrase::get_description() const
1960 return get_description_helper(" PHRASE ", window);
1963 string
1964 QueryEliteSet::get_description() const
1966 return get_description_helper(" ELITE_SET ", set_size);
1969 string
1970 QuerySynonym::get_description() const
1972 if (subqueries.size() == 1) {
1973 string d = "(SYNONYM ";
1974 d += subqueries[0].internal->get_description();
1975 d += ")";
1976 return d;
1978 return get_description_helper(" SYNONYM ");
1981 string
1982 QueryMax::get_description() const
1984 return get_description_helper(" MAX ");
1987 Xapian::Query::op
1988 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1990 return Xapian::Query::OP_INVALID;
1993 PostList*
1994 QueryInvalid::postlist(QueryOptimiser *, double) const
1996 throw Xapian::InvalidOperationError("Query is invalid");
1999 void
2000 QueryInvalid::serialise(std::string & result) const
2002 result += static_cast<char>(0x00);
2005 string
2006 QueryInvalid::get_description() const
2008 return "<INVALID>";