Fix assertion for value range on empty slot
[xapian.git] / xapian-core / api / queryinternal.cc
blobcd2bfcb25587d68e23b51944c372bbb8e88dd543
1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "queryinternal.h"
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
30 #include "leafpostlist.h"
31 #include "matcher/andmaybepostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
34 #include "matcher/exactphrasepostlist.h"
35 #include "matcher/externalpostlist.h"
36 #include "matcher/maxpostlist.h"
37 #include "matcher/multiandpostlist.h"
38 #include "matcher/multixorpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpospostlist.h"
41 #include "matcher/orpostlist.h"
42 #include "matcher/phrasepostlist.h"
43 #include "matcher/queryoptimiser.h"
44 #include "matcher/valuerangepostlist.h"
45 #include "matcher/valuegepostlist.h"
46 #include "net/length.h"
47 #include "serialise-double.h"
48 #include "termlist.h"
50 #include "debuglog.h"
51 #include "omassert.h"
52 #include "str.h"
53 #include "unicode/description_append.h"
55 #include <algorithm>
56 #include <functional>
57 #include <list>
58 #include <memory>
59 #include <string>
60 #include <vector>
62 using namespace std;
64 template<class CLASS> struct delete_ptr {
65 void operator()(CLASS *p) const { delete p; }
68 using Xapian::Internal::AndContext;
69 using Xapian::Internal::OrContext;
70 using Xapian::Internal::XorContext;
72 namespace Xapian {
74 namespace Internal {
76 /** Class providing an operator which sorts postlists to select max or terms.
77 * This returns true if a has a (strictly) greater termweight than b.
79 * Using this comparator will tend to result in multiple calls to
80 * recalc_maxweight() for each of the subqueries (we use it with nth_element
81 * which should be O(n)) - perhaps it'd be better to call recalc_maxweight()
82 * once and then sort on that.
84 struct CmpMaxOrTerms {
85 /** Return true if and only if a has a strictly greater termweight than b. */
86 bool operator()(PostList *a, PostList *b) {
87 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
88 defined(__mc68000__) || defined(__mc68010__) || \
89 defined(__mc68020__) || defined(__mc68030__)
90 // On some architectures, most common of which is x86, floating point
91 // values are calculated and stored in registers with excess precision.
92 // If the two recalc_maxweight() calls below return identical values in a
93 // register, the excess precision may be dropped for one of them but
94 // not the other (e.g. because the compiler saves the first calculated
95 // weight to memory while calculating the second, then reloads it to
96 // compare). This leads to both a > b and b > a being true, which
97 // violates the antisymmetry property of the strict weak ordering
98 // required by nth_element(). This can have serious consequences (e.g.
99 // segfaults).
101 // Note that m68k only has excess precision in earlier models - 68040
102 // and later are OK:
103 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
105 // To avoid this, we store each result in a volatile double prior to
106 // comparing them. This means that the result of this test should
107 // match that on other architectures with the same double format (which
108 // is desirable), and actually has less overhead than rounding both
109 // results to float (which is another approach which works).
110 volatile double a_max_wt = a->recalc_maxweight();
111 volatile double b_max_wt = b->recalc_maxweight();
112 return a_max_wt > b_max_wt;
113 #else
114 return a->recalc_maxweight() > b->recalc_maxweight();
115 #endif
119 /// Comparison functor which orders PostList* by descending get_termfreq_est().
120 struct ComparePostListTermFreqAscending {
121 /// Order by descending get_termfreq_est().
122 bool operator()(const PostList *a, const PostList *b) const {
123 return a->get_termfreq_est() > b->get_termfreq_est();
127 class Context {
128 protected:
129 QueryOptimiser* qopt;
131 vector<PostList*> pls;
133 public:
134 Context(QueryOptimiser* qopt_, size_t reserve);
136 ~Context();
138 void add_postlist(PostList * pl) {
139 pls.push_back(pl);
142 bool empty() const {
143 return pls.empty();
146 size_t size() const {
147 return pls.size();
150 void shrink(size_t new_size);
153 Context::Context(QueryOptimiser* qopt_, size_t reserve)
154 : qopt(qopt_)
156 pls.reserve(reserve);
159 void
160 Context::shrink(size_t new_size)
162 AssertRel(new_size, <=, pls.size());
163 if (new_size >= pls.size())
164 return;
166 const PostList * hint_pl = qopt->get_hint_postlist();
167 for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
168 const PostList * pl = *i;
169 if (rare(pl == hint_pl)) {
170 // We were about to delete qopt's hint - instead tell qopt to take
171 // ownership.
172 qopt->take_hint_ownership();
173 hint_pl = NULL;
174 } else {
175 delete pl;
178 pls.resize(new_size);
181 Context::~Context()
183 shrink(0);
186 class OrContext : public Context {
187 public:
188 OrContext(QueryOptimiser* qopt_, size_t reserve)
189 : Context(qopt_, reserve) { }
191 /// Select the best set_size postlists from the last out_of added.
192 void select_elite_set(size_t set_size, size_t out_of);
194 /// Select the set_size postlists with the highest term frequency.
195 void select_most_frequent(size_t set_size);
197 PostList * postlist();
198 PostList * postlist_max();
201 void
202 OrContext::select_elite_set(size_t set_size, size_t out_of)
204 vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
205 nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
206 shrink(pls.size() - out_of + set_size);
209 void
210 OrContext::select_most_frequent(size_t set_size)
212 vector<PostList*>::iterator begin = pls.begin();
213 nth_element(begin, begin + set_size - 1, pls.end(),
214 ComparePostListTermFreqAscending());
215 shrink(set_size);
218 PostList *
219 OrContext::postlist()
221 Assert(!pls.empty());
223 if (pls.size() == 1) {
224 PostList * pl = pls[0];
225 pls.clear();
226 return pl;
229 // Make postlists into a heap so that the postlist with the greatest term
230 // frequency is at the top of the heap.
231 make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
233 // Now build a tree of binary OrPostList objects.
235 // The algorithm used to build the tree is like that used to build an
236 // optimal Huffman coding tree. If we called next() repeatedly, this
237 // arrangement would minimise the number of method calls. Generally we
238 // don't actually do that, but this arrangement is still likely to be a
239 // good one, and it does minimise the work in the worst case.
240 while (true) {
241 // We build the tree such that at each branch:
243 // l.get_termfreq_est() >= r.get_termfreq_est()
245 // We do this so that the OrPostList class can be optimised assuming
246 // that this is the case.
247 PostList * r = pls.front();
248 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
249 pls.pop_back();
250 PostList * pl;
251 pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
253 if (pls.size() == 1) {
254 pls.clear();
255 return pl;
258 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
259 pls.back() = pl;
260 push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
264 PostList *
265 OrContext::postlist_max()
267 Assert(!pls.empty());
269 if (pls.size() == 1) {
270 PostList * pl = pls[0];
271 pls.clear();
272 return pl;
275 // Sort the postlists so that the postlist with the greatest term frequency
276 // is first.
277 sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
279 PostList * pl;
280 pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
282 pls.clear();
283 return pl;
286 class XorContext : public Context {
287 public:
288 XorContext(QueryOptimiser* qopt_, size_t reserve)
289 : Context(qopt_, reserve) { }
291 PostList * postlist();
294 PostList *
295 XorContext::postlist()
297 Xapian::doccount db_size = qopt->db_size;
298 PostList * pl;
299 pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
301 // Empty pls so our destructor doesn't delete them all!
302 pls.clear();
303 return pl;
306 class AndContext : public Context {
307 class PosFilter {
308 Xapian::Query::op op_;
310 /// Start and end indices for the PostLists this positional filter uses.
311 size_t begin, end;
313 Xapian::termcount window;
315 public:
316 PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
317 Xapian::termcount window_)
318 : op_(op__), begin(begin_), end(end_), window(window_) { }
320 PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
323 list<PosFilter> pos_filters;
325 public:
326 AndContext(QueryOptimiser* qopt_, size_t reserve)
327 : Context(qopt_, reserve) { }
329 void add_pos_filter(Query::op op_,
330 size_t n_subqs,
331 Xapian::termcount window);
333 PostList * postlist();
336 PostList *
337 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
338 try {
339 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
340 vector<PostList *>::const_iterator terms_end = pls.begin() + end;
342 if (op_ == Xapian::Query::OP_NEAR) {
343 pl = new NearPostList(pl, window, terms_begin, terms_end);
344 } else if (window == end - begin) {
345 AssertEq(op_, Xapian::Query::OP_PHRASE);
346 pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
347 } else {
348 AssertEq(op_, Xapian::Query::OP_PHRASE);
349 pl = new PhrasePostList(pl, window, terms_begin, terms_end);
351 return pl;
352 } catch (...) {
353 delete pl;
354 throw;
357 void
358 AndContext::add_pos_filter(Query::op op_,
359 size_t n_subqs,
360 Xapian::termcount window)
362 Assert(n_subqs > 1);
363 size_t end = pls.size();
364 size_t begin = end - n_subqs;
365 pos_filters.push_back(PosFilter(op_, begin, end, window));
368 PostList *
369 AndContext::postlist()
371 if (pls.empty()) {
372 // This case only happens if this sub-database has no positional data
373 // (but another sub-database does).
374 Assert(pos_filters.empty());
375 return new EmptyPostList;
378 unique_ptr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
379 qopt->matcher, qopt->db_size));
381 // Sort the positional filters to try to apply them in an efficient order.
382 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
383 // first?
385 // Apply any positional filters.
386 list<PosFilter>::const_iterator i;
387 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
388 const PosFilter & filter = *i;
389 pl.reset(filter.postlist(pl.release(), pls));
392 // Empty pls so our destructor doesn't delete them all!
393 pls.clear();
394 return pl.release();
399 Query::Internal::~Internal() { }
401 size_t
402 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
404 return 0;
407 const Query
408 Query::Internal::get_subquery(size_t) const
410 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
413 Xapian::termcount
414 Query::Internal::get_wqf() const
416 throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
419 Xapian::termpos
420 Query::Internal::get_pos() const
422 throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
425 void
426 Query::Internal::gather_terms(void *) const
430 Xapian::termcount
431 Query::Internal::get_length() const XAPIAN_NOEXCEPT
433 return 0;
436 Query::Internal *
437 Query::Internal::unserialise(const char ** p, const char * end,
438 const Registry & reg)
440 if (*p == end)
441 return NULL;
442 unsigned char ch = *(*p)++;
443 switch (ch >> 5) {
444 case 4: case 5: case 6: case 7: {
445 // Multi-way branch
447 // 1ccccnnn where:
448 // nnn -> n_subqs (0 means encoded value follows)
449 // cccc -> code (which OP_XXX)
450 size_t n_subqs = ch & 0x07;
451 if (n_subqs == 0) {
452 decode_length(p, end, n_subqs);
453 n_subqs += 8;
455 unsigned char code = (ch >> 3) & 0x0f;
456 Xapian::termcount parameter = 0;
457 if (code >= 13)
458 decode_length(p, end, parameter);
459 Xapian::Internal::QueryBranch * result;
460 switch (code) {
461 case 0: // OP_AND
462 result = new Xapian::Internal::QueryAnd(n_subqs);
463 break;
464 case 1: // OP_OR
465 result = new Xapian::Internal::QueryOr(n_subqs);
466 break;
467 case 2: // OP_AND_NOT
468 result = new Xapian::Internal::QueryAndNot(n_subqs);
469 break;
470 case 3: // OP_XOR
471 result = new Xapian::Internal::QueryXor(n_subqs);
472 break;
473 case 4: // OP_AND_MAYBE
474 result = new Xapian::Internal::QueryAndMaybe(n_subqs);
475 break;
476 case 5: // OP_FILTER
477 result = new Xapian::Internal::QueryFilter(n_subqs);
478 break;
479 case 6: // OP_SYNONYM
480 result = new Xapian::Internal::QuerySynonym(n_subqs);
481 break;
482 case 7: // OP_MAX
483 result = new Xapian::Internal::QueryMax(n_subqs);
484 break;
485 case 13: // OP_ELITE_SET
486 result = new Xapian::Internal::QueryEliteSet(n_subqs,
487 parameter);
488 break;
489 case 14: // OP_NEAR
490 result = new Xapian::Internal::QueryNear(n_subqs,
491 parameter);
492 break;
493 case 15: // OP_PHRASE
494 result = new Xapian::Internal::QueryPhrase(n_subqs,
495 parameter);
496 break;
497 default:
498 // 8 to 12 are currently unused.
499 throw SerialisationError("Unknown multi-way branch Query operator");
501 do {
502 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
503 } while (--n_subqs);
504 result->done();
505 return result;
507 case 2: case 3: { // Term
508 // Term
510 // 01ccLLLL where:
511 // LLLL -> length (0 means encoded value follows)
512 // cc -> code:
513 // 0: wqf = 0; pos = 0
514 // 1: wqf = 1; pos = 0
515 // 2: wqf = 1; pos -> encoded value follows
516 // 3: wqf -> encoded value follows; pos -> encoded value follows
517 size_t len = ch & 0x0f;
518 if (len == 0) {
519 decode_length(p, end, len);
520 len += 16;
522 if (size_t(end - *p) < len)
523 throw SerialisationError("Not enough data");
524 string term(*p, len);
525 *p += len;
527 int code = ((ch >> 4) & 0x03);
529 Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
530 if (code == 3)
531 decode_length(p, end, wqf);
533 Xapian::termpos pos = 0;
534 if (code >= 2)
535 decode_length(p, end, pos);
537 return new Xapian::Internal::QueryTerm(term, wqf, pos);
539 case 1: {
540 // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
542 // 001tssss where:
543 // ssss -> slot number (15 means encoded value follows)
544 // t -> op:
545 // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
546 // 1: OP_VALUE_GE
547 Xapian::valueno slot = ch & 15;
548 if (slot == 15) {
549 decode_length(p, end, slot);
550 slot += 15;
552 size_t len;
553 decode_length_and_check(p, end, len);
554 string begin(*p, len);
555 *p += len;
556 if (ch & 0x10) {
557 // OP_VALUE_GE
558 return new Xapian::Internal::QueryValueGE(slot, begin);
561 // OP_VALUE_RANGE
562 decode_length_and_check(p, end, len);
563 string end_(*p, len);
564 *p += len;
565 if (begin.empty()) // FIXME: is this right?
566 return new Xapian::Internal::QueryValueLE(slot, end_);
567 return new Xapian::Internal::QueryValueRange(slot, begin, end_);
569 case 0: {
570 // Other operators
572 // 000ttttt where:
573 // ttttt -> encodes which OP_XXX
574 switch (ch & 0x1f) {
575 case 0x00: // OP_INVALID
576 return new Xapian::Internal::QueryInvalid();
577 case 0x0b: { // Wildcard
578 if (*p == end)
579 throw SerialisationError("not enough data");
580 Xapian::termcount max_expansion;
581 decode_length(p, end, max_expansion);
582 if (end - *p < 2)
583 throw SerialisationError("not enough data");
584 int max_type = static_cast<unsigned char>(*(*p)++);
585 op combiner = static_cast<op>(*(*p)++);
586 size_t len;
587 decode_length_and_check(p, end, len);
588 string pattern(*p, len);
589 *p += len;
590 return new Xapian::Internal::QueryWildcard(pattern,
591 max_expansion,
592 max_type,
593 combiner);
595 case 0x0c: { // PostingSource
596 size_t len;
597 decode_length_and_check(p, end, len);
598 string name(*p, len);
599 *p += len;
601 const PostingSource * reg_source = reg.get_posting_source(name);
602 if (!reg_source) {
603 string m = "PostingSource ";
604 m += name;
605 m += " not registered";
606 throw SerialisationError(m);
609 decode_length_and_check(p, end, len);
610 PostingSource * source =
611 reg_source->unserialise_with_registry(string(*p, len),
612 reg);
613 *p += len;
614 return new Xapian::Internal::QueryPostingSource(source->release());
616 case 0x0d: {
617 using Xapian::Internal::QueryScaleWeight;
618 double scale_factor = unserialise_double(p, end);
619 return new QueryScaleWeight(scale_factor,
620 Query(unserialise(p, end, reg)));
622 case 0x0e: {
623 Xapian::termcount wqf;
624 Xapian::termpos pos;
625 decode_length(p, end, wqf);
626 decode_length(p, end, pos);
627 return new Xapian::Internal::QueryTerm(string(), wqf, pos);
629 case 0x0f:
630 return Query::MatchAll.internal.get();
631 default: // Others currently unused.
632 break;
634 break;
637 string msg = "Unknown Query serialisation: ";
638 msg += str(ch);
639 throw SerialisationError(msg);
642 void
643 Query::Internal::postlist_sub_and_like(AndContext& ctx,
644 QueryOptimiser * qopt,
645 double factor) const
647 ctx.add_postlist(postlist(qopt, factor));
650 void
651 Query::Internal::postlist_sub_or_like(OrContext& ctx,
652 QueryOptimiser * qopt,
653 double factor) const
655 ctx.add_postlist(postlist(qopt, factor));
658 void
659 Query::Internal::postlist_sub_xor(XorContext& ctx,
660 QueryOptimiser * qopt,
661 double factor) const
663 ctx.add_postlist(postlist(qopt, factor));
666 namespace Internal {
668 Query::op
669 QueryTerm::get_type() const XAPIAN_NOEXCEPT
671 return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
674 string
675 QueryTerm::get_description() const
677 string desc;
678 if (term.empty()) {
679 desc = "<alldocuments>";
680 } else {
681 description_append(desc, term);
683 if (wqf != 1) {
684 desc += '#';
685 desc += str(wqf);
687 if (pos) {
688 desc += '@';
689 desc += str(pos);
691 return desc;
694 QueryPostingSource::QueryPostingSource(PostingSource * source_)
695 : source(source_)
697 if (!source_)
698 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
699 if (source->_refs == 0) {
700 // source_ isn't reference counted, so try to clone it. If clone()
701 // isn't implemented, just use the object provided and it's the
702 // caller's responsibility to ensure it stays valid while in use.
703 PostingSource * cloned_source = source->clone();
704 if (cloned_source) source = cloned_source->release();
708 Query::op
709 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
711 return Query::LEAF_POSTING_SOURCE;
714 string
715 QueryPostingSource::get_description() const
717 string desc = "PostingSource(";
718 desc += source->get_description();
719 desc += ')';
720 return desc;
723 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
724 : scale_factor(factor), subquery(subquery_)
726 if (rare(scale_factor < 0.0))
727 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
730 Query::op
731 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
733 return Query::OP_SCALE_WEIGHT;
736 size_t
737 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
739 return 1;
742 const Query
743 QueryScaleWeight::get_subquery(size_t) const
745 return subquery;
748 string
749 QueryScaleWeight::get_description() const
751 Assert(subquery.internal.get());
752 string desc = str(scale_factor);
753 desc += " * ";
754 desc += subquery.internal->get_description();
755 return desc;
758 PostingIterator::Internal *
759 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
761 LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
762 if (factor != 0.0)
763 qopt->inc_total_subqs();
764 RETURN(qopt->open_post_list(term, wqf, factor));
767 PostingIterator::Internal *
768 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
770 LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
771 Assert(source.get());
772 if (factor != 0.0)
773 qopt->inc_total_subqs();
774 // Casting away const on the Database::Internal here is OK, as we wrap
775 // them in a const Xapian::Database so non-const methods can't actually
776 // be called on the Database::Internal object.
777 const Xapian::Database wrappeddb(
778 const_cast<Xapian::Database::Internal*>(&(qopt->db)));
779 RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
782 PostingIterator::Internal *
783 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
785 LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
786 RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
789 void
790 QueryTerm::gather_terms(void * void_terms) const
792 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
793 if (!term.empty()) {
794 vector<pair<Xapian::termpos, string> > &terms =
795 *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
796 terms.push_back(make_pair(pos, term));
800 PostingIterator::Internal *
801 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
803 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
804 if (factor != 0.0)
805 qopt->inc_total_subqs();
806 const Xapian::Database::Internal & db = qopt->db;
807 const string & lb = db.get_value_lower_bound(slot);
808 if (lb.empty()) {
809 // This should only happen if there are no values in this slot (which
810 // could be because the backend just doesn't support values at all).
811 // If there were values in the slot, the backend should have a
812 // non-empty lower bound, even if it isn't a tight one.
813 AssertEq(db.get_value_freq(slot), 0);
814 RETURN(new EmptyPostList);
816 if (end < lb) {
817 RETURN(new EmptyPostList);
819 const string & ub = db.get_value_upper_bound(slot);
820 if (begin > ub) {
821 RETURN(new EmptyPostList);
823 if (end >= ub) {
824 // If begin <= lb too, then the range check isn't needed, but we do
825 // still need to consider which documents have a value set in this
826 // slot. If this value is set for all documents, we can replace it
827 // with the MatchAll postlist, which is especially efficient if
828 // there are no gaps in the docids.
829 if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
830 RETURN(db.open_post_list(string()));
832 RETURN(new ValueGePostList(&db, slot, begin));
834 RETURN(new ValueRangePostList(&db, slot, begin, end));
837 void
838 QueryValueRange::serialise(string & result) const
840 if (slot < 15) {
841 result += static_cast<char>(0x20 | slot);
842 } else {
843 result += static_cast<char>(0x20 | 15);
844 result += encode_length(slot - 15);
846 result += encode_length(begin.size());
847 result += begin;
848 result += encode_length(end.size());
849 result += end;
852 Query::op
853 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
855 return Query::OP_VALUE_RANGE;
858 string
859 QueryValueRange::get_description() const
861 string desc = "VALUE_RANGE ";
862 desc += str(slot);
863 desc += ' ';
864 description_append(desc, begin);
865 desc += ' ';
866 description_append(desc, end);
867 return desc;
870 PostingIterator::Internal *
871 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
873 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
874 if (factor != 0.0)
875 qopt->inc_total_subqs();
876 const Xapian::Database::Internal & db = qopt->db;
877 const string & lb = db.get_value_lower_bound(slot);
878 // If lb.empty(), the backend doesn't provide value bounds.
879 if (!lb.empty()) {
880 if (limit < lb) {
881 RETURN(new EmptyPostList);
883 if (limit >= db.get_value_upper_bound(slot)) {
884 // The range check isn't needed, but we do still need to consider
885 // which documents have a value set in this slot. If this value is
886 // set for all documents, we can replace it with the MatchAll
887 // postlist, which is especially efficient if there are no gaps in
888 // the docids.
889 if (db.get_value_freq(slot) == db.get_doccount()) {
890 RETURN(db.open_post_list(string()));
894 RETURN(new ValueRangePostList(&db, slot, string(), limit));
897 void
898 QueryValueLE::serialise(string & result) const
900 // Encode as a range with an empty start (which only takes a single byte to
901 // encode).
902 if (slot < 15) {
903 result += static_cast<char>(0x20 | slot);
904 } else {
905 result += static_cast<char>(0x20 | 15);
906 result += encode_length(slot - 15);
908 result += encode_length(0);
909 result += encode_length(limit.size());
910 result += limit;
913 Query::op
914 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
916 return Query::OP_VALUE_LE;
919 string
920 QueryValueLE::get_description() const
922 string desc = "VALUE_LE ";
923 desc += str(slot);
924 desc += ' ';
925 description_append(desc, limit);
926 return desc;
929 PostingIterator::Internal *
930 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
932 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
933 if (factor != 0.0)
934 qopt->inc_total_subqs();
935 const Xapian::Database::Internal & db = qopt->db;
936 const string & lb = db.get_value_lower_bound(slot);
937 // If lb.empty(), the backend doesn't provide value bounds.
938 if (!lb.empty()) {
939 if (limit > db.get_value_upper_bound(slot)) {
940 RETURN(new EmptyPostList);
942 if (limit < lb) {
943 // The range check isn't needed, but we do still need to consider
944 // which documents have a value set in this slot. If this value is
945 // set for all documents, we can replace it with the MatchAll
946 // postlist, which is especially efficient if there are no gaps in
947 // the docids.
948 if (db.get_value_freq(slot) == db.get_doccount()) {
949 RETURN(db.open_post_list(string()));
953 RETURN(new ValueGePostList(&db, slot, limit));
956 void
957 QueryValueGE::serialise(string & result) const
959 if (slot < 15) {
960 result += static_cast<char>(0x20 | 0x10 | slot);
961 } else {
962 result += static_cast<char>(0x20 | 0x10 | 15);
963 result += encode_length(slot - 15);
965 result += encode_length(limit.size());
966 result += limit;
969 Query::op
970 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
972 return Query::OP_VALUE_GE;
975 string
976 QueryValueGE::get_description() const
978 string desc = "VALUE_GE ";
979 desc += str(slot);
980 desc += ' ';
981 description_append(desc, limit);
982 return desc;
985 PostingIterator::Internal *
986 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
988 LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
989 Query::op op = combiner;
990 double or_factor = 0.0;
991 if (factor == 0.0) {
992 // If we have a factor of 0, we don't care about the weights, so
993 // we're just like a normal OR query.
994 op = Query::OP_OR;
995 } else if (op != Query::OP_SYNONYM) {
996 or_factor = factor;
999 bool old_in_synonym = qopt->in_synonym;
1000 if (!old_in_synonym) {
1001 qopt->in_synonym = (op == Query::OP_SYNONYM);
1004 OrContext ctx(qopt, 0);
1005 unique_ptr<TermList> t(qopt->db.open_allterms(pattern));
1006 Xapian::termcount expansions_left = max_expansion;
1007 // If there's no expansion limit, set expansions_left to the maximum
1008 // value Xapian::termcount can hold.
1009 if (expansions_left == 0)
1010 --expansions_left;
1011 while (true) {
1012 t->next();
1013 if (t->at_end())
1014 break;
1015 if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1016 if (expansions_left-- == 0) {
1017 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
1018 break;
1019 string msg("Wildcard ");
1020 msg += pattern;
1021 msg += "* expands to more than ";
1022 msg += str(max_expansion);
1023 msg += " terms";
1024 throw Xapian::WildcardError(msg);
1027 const string & term = t->get_termname();
1028 ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
1031 if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1032 // FIXME: open_lazy_post_list() results in the term getting registered
1033 // for stats, so we still incur an avoidable cost from the full
1034 // expansion size of the wildcard, which is most likely to be visible
1035 // with the remote backend. Perhaps we should split creating the lazy
1036 // postlist from registering the term for stats.
1037 if (ctx.size() > max_expansion)
1038 ctx.select_most_frequent(max_expansion);
1041 if (factor != 0.0) {
1042 if (op != Query::OP_SYNONYM) {
1043 qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1044 } else {
1045 qopt->inc_total_subqs();
1049 qopt->in_synonym = old_in_synonym;
1051 if (ctx.empty())
1052 RETURN(new EmptyPostList);
1054 if (op == Query::OP_MAX)
1055 RETURN(ctx.postlist_max());
1057 PostList * pl = ctx.postlist();
1058 if (op == Query::OP_OR)
1059 RETURN(pl);
1061 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1062 // SynonymPostList, which supplies the weights.
1063 PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
1064 RETURN(r);
1067 termcount
1068 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1070 // We currently assume wqf is 1 for calculating the synonym's weight
1071 // since conceptually the synonym is one "virtual" term. If we were
1072 // to combine multiple occurrences of the same synonym expansion into
1073 // a single instance with wqf set, we would want to track the wqf.
1074 return 1;
1077 void
1078 QueryWildcard::serialise(string & result) const
1080 result += static_cast<char>(0x0b);
1081 result += encode_length(max_expansion);
1082 result += static_cast<unsigned char>(max_type);
1083 result += static_cast<unsigned char>(combiner);
1084 result += encode_length(pattern.size());
1085 result += pattern;
1088 Query::op
1089 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1091 return Query::OP_WILDCARD;
1094 string
1095 QueryWildcard::get_description() const
1097 string desc = "WILDCARD ";
1098 switch (combiner) {
1099 case Query::OP_SYNONYM:
1100 desc += "SYNONYM ";
1101 break;
1102 case Query::OP_MAX:
1103 desc += "MAX ";
1104 break;
1105 case Query::OP_OR:
1106 desc += "OR ";
1107 break;
1108 default:
1109 desc += "BAD ";
1110 break;
1112 description_append(desc, pattern);
1113 return desc;
1116 Xapian::termcount
1117 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1119 // Sum results from all subqueries.
1120 Xapian::termcount result = 0;
1121 QueryVector::const_iterator i;
1122 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1123 // MatchNothing subqueries should have been removed by done(), but we
1124 // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1125 // segfault anyway.
1126 result += (*i).internal->get_length();
1128 return result;
1131 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1132 #define MISC(X) static_cast<unsigned char>(X)
1133 void
1134 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1136 static const unsigned char first_byte[] = {
1137 MULTIWAY(0), // OP_AND
1138 MULTIWAY(1), // OP_OR
1139 MULTIWAY(2), // OP_AND_NOT
1140 MULTIWAY(3), // OP_XOR
1141 MULTIWAY(4), // OP_AND_MAYBE
1142 MULTIWAY(5), // OP_FILTER
1143 MULTIWAY(14), // OP_NEAR
1144 MULTIWAY(15), // OP_PHRASE
1145 0, // OP_VALUE_RANGE
1146 MISC(3), // OP_SCALE_WEIGHT
1147 MULTIWAY(13), // OP_ELITE_SET
1148 0, // OP_VALUE_GE
1149 0, // OP_VALUE_LE
1150 MULTIWAY(6), // OP_SYNONYM
1151 MULTIWAY(7) // OP_MAX
1153 Xapian::Query::op op_ = get_op();
1154 AssertRel(size_t(op_),<,sizeof(first_byte));
1155 unsigned char ch = first_byte[op_];
1156 if (ch & 0x80) {
1157 // Multi-way operator.
1158 if (subqueries.size() < 8)
1159 ch |= subqueries.size();
1160 result += ch;
1161 if (subqueries.size() >= 8)
1162 result += encode_length(subqueries.size() - 8);
1163 if (ch >= MULTIWAY(13))
1164 result += encode_length(parameter);
1165 } else {
1166 result += ch;
1169 QueryVector::const_iterator i;
1170 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1171 // MatchNothing subqueries should have been removed by done().
1172 Assert((*i).internal.get());
1173 (*i).internal->serialise(result);
1176 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1177 // appended next by an overloaded serialise() method in the subclass.
1180 void
1181 QueryBranch::serialise(string & result) const
1183 QueryBranch::serialise_(result);
1186 void
1187 QueryNear::serialise(string & result) const
1189 // FIXME: window - subqueries.size() ?
1190 QueryBranch::serialise_(result, window);
1193 void
1194 QueryPhrase::serialise(string & result) const
1196 // FIXME: window - subqueries.size() ?
1197 QueryBranch::serialise_(result, window);
1200 void
1201 QueryEliteSet::serialise(string & result) const
1203 // FIXME: set_size - subqueries.size() ?
1204 QueryBranch::serialise_(result, set_size);
1207 void
1208 QueryBranch::gather_terms(void * void_terms) const
1210 // Gather results from all subqueries.
1211 QueryVector::const_iterator i;
1212 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1213 // MatchNothing subqueries should have been removed by done().
1214 Assert((*i).internal.get());
1215 (*i).internal->gather_terms(void_terms);
1219 void
1220 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1221 Xapian::termcount elite_set_size, size_t first) const
1223 LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1225 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1226 // for AND-like operations.
1228 // OP_SYNONYM with a single subquery is only simplified by
1229 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1230 Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1232 vector<PostList *> postlists;
1233 postlists.reserve(subqueries.size() - first);
1235 QueryVector::const_iterator q;
1236 for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1237 // MatchNothing subqueries should have been removed by done().
1238 Assert((*q).internal.get());
1239 (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1242 if (elite_set_size && elite_set_size < subqueries.size()) {
1243 ctx.select_elite_set(elite_set_size, subqueries.size());
1244 // FIXME: not right!
1248 PostList *
1249 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1251 LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1252 OrContext ctx(qopt, subqueries.size());
1253 if (factor == 0.0) {
1254 // If we have a factor of 0, we don't care about the weights, so
1255 // we're just like a normal OR query.
1256 do_or_like(ctx, qopt, 0.0);
1257 return ctx.postlist();
1260 bool old_in_synonym = qopt->in_synonym;
1261 qopt->in_synonym = true;
1262 do_or_like(ctx, qopt, 0.0);
1263 PostList * pl = ctx.postlist();
1264 qopt->in_synonym = old_in_synonym;
1266 // We currently assume wqf is 1 for calculating the synonym's weight
1267 // since conceptually the synonym is one "virtual" term. If we were
1268 // to combine multiple occurrences of the same synonym expansion into
1269 // a single instance with wqf set, we would want to track the wqf.
1271 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1272 // SynonymPostList, which supplies the weights.
1273 RETURN(qopt->make_synonym_postlist(pl, factor));
1276 PostList *
1277 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1279 LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1280 OrContext ctx(qopt, subqueries.size());
1281 do_or_like(ctx, qopt, factor);
1282 if (factor == 0.0) {
1283 // If we have a factor of 0, we don't care about the weights, so
1284 // we're just like a normal OR query.
1285 RETURN(ctx.postlist());
1288 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1289 // since conceptually the OP_MAX is one "virtual" term. If we were
1290 // to combine multiple occurrences of the same OP_MAX expansion into
1291 // a single instance with wqf set, we would want to track the wqf.
1292 RETURN(ctx.postlist_max());
1295 Xapian::Query::op
1296 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1298 return get_op();
1301 size_t
1302 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1304 return subqueries.size();
1307 const Query
1308 QueryBranch::get_subquery(size_t n) const
1310 return subqueries[n];
1313 const string
1314 QueryBranch::get_description_helper(const char * op,
1315 Xapian::termcount parameter) const
1317 string desc = "(";
1318 QueryVector::const_iterator i;
1319 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1320 if (desc.size() > 1) {
1321 desc += op;
1322 if (parameter) {
1323 desc += str(parameter);
1324 desc += ' ';
1327 Assert((*i).internal.get());
1328 // MatchNothing subqueries should have been removed by done(), and we
1329 // shouldn't get called before done() is, since that happens at the
1330 // end of the Xapian::Query constructor.
1331 desc += (*i).internal->get_description();
1333 desc += ')';
1334 return desc;
1337 Query::Internal *
1338 QueryWindowed::done()
1340 // If window size not specified, default it.
1341 if (window == 0)
1342 window = subqueries.size();
1343 return QueryAndLike::done();
1346 void
1347 QueryScaleWeight::gather_terms(void * void_terms) const
1349 subquery.internal->gather_terms(void_terms);
1352 void QueryTerm::serialise(string & result) const
1354 size_t len = term.size();
1355 if (len == 0) {
1356 if (wqf == 1 && pos == 0) {
1357 // Query::MatchAll
1358 result += '\x0f';
1359 } else {
1360 // Weird mutant versions of MatchAll
1361 result += '\x0e';
1362 result += encode_length(wqf);
1363 result += encode_length(pos);
1365 } else if (wqf == 1) {
1366 if (pos == 0) {
1367 // Single occurrence probabilistic term without position set.
1368 if (len >= 16) {
1369 result += static_cast<char>(0x40 | 0x10);
1370 result += encode_length(term.size() - 16);
1371 } else {
1372 result += static_cast<char>(0x40 | 0x10 | len);
1374 result += term;
1375 } else {
1376 // Single occurrence probabilistic term with position set.
1377 if (len >= 16) {
1378 result += static_cast<char>(0x40 | 0x20);
1379 result += encode_length(term.size() - 16);
1380 } else {
1381 result += static_cast<char>(0x40 | 0x20 | len);
1383 result += term;
1384 result += encode_length(pos);
1386 } else if (wqf > 1 || pos > 0) {
1387 // General case.
1388 if (len >= 16) {
1389 result += static_cast<char>(0x40 | 0x30);
1390 result += encode_length(term.size() - 16);
1391 } else if (len) {
1392 result += static_cast<char>(0x40 | 0x30 | len);
1394 result += term;
1395 result += encode_length(wqf);
1396 result += encode_length(pos);
1397 } else {
1398 // Typical boolean term.
1399 AssertEq(wqf, 0);
1400 AssertEq(pos, 0);
1401 if (len >= 16) {
1402 result += static_cast<char>(0x40);
1403 result += encode_length(term.size() - 16);
1404 } else {
1405 result += static_cast<char>(0x40 | len);
1407 result += term;
1411 void QueryPostingSource::serialise(string & result) const
1413 result += static_cast<char>(0x0c);
1415 const string & n = source->name();
1416 result += encode_length(n.size());
1417 result += n;
1419 const string & s = source->serialise();
1420 result += encode_length(s.size());
1421 result += s;
1424 void QueryScaleWeight::serialise(string & result) const
1426 Assert(subquery.internal.get());
1427 const string & s = serialise_double(scale_factor);
1428 result += '\x0d';
1429 result += s;
1430 subquery.internal->serialise(result);
1433 struct is_matchnothing {
1434 bool operator()(const Xapian::Query & q) const {
1435 return q.internal.get() == NULL;
1439 void
1440 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1442 // If the AndLike is already MatchNothing, do nothing.
1443 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1444 return;
1445 // If we're adding MatchNothing, discard any previous subqueries.
1446 if (subquery.internal.get() == NULL)
1447 subqueries.clear();
1448 subqueries.push_back(subquery);
1451 Query::Internal *
1452 QueryAndLike::done()
1454 // Empty AndLike gives MatchNothing.
1455 if (subqueries.empty())
1456 return NULL;
1457 // We handle any subquery being MatchNothing in add_subquery() by leaving
1458 // a single MatchNothing subquery, and so this check results in AndLike
1459 // giving MatchNothing.
1460 if (subqueries.size() == 1)
1461 return subqueries[0].internal.get();
1462 return this;
1465 PostingIterator::Internal *
1466 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1468 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1469 AndContext ctx(qopt, subqueries.size());
1470 postlist_sub_and_like(ctx, qopt, factor);
1471 RETURN(ctx.postlist());
1474 void
1475 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1477 QueryVector::const_iterator i;
1478 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1479 // MatchNothing subqueries should have been removed by done().
1480 Assert((*i).internal.get());
1481 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1485 void
1486 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1488 // Drop any subqueries which are MatchNothing.
1489 if (subquery.internal.get() != NULL)
1490 subqueries.push_back(subquery);
1493 Query::Internal *
1494 QueryOrLike::done()
1496 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1497 // subqueries which are MatchNothing.
1498 if (subqueries.empty())
1499 return NULL;
1500 if (subqueries.size() == 1)
1501 return subqueries[0].internal.get();
1502 return this;
1505 void
1506 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1508 if (!subqueries.empty()) {
1509 // We're adding the 2nd or subsequent subquery, so this subquery is
1510 // negated.
1511 if (subqueries[0].internal.get() == NULL) {
1512 // The left side is already MatchNothing so drop any right side.
1514 // MatchNothing AND_NOT X == MatchNothing
1515 return;
1517 if (subquery.internal.get() == NULL) {
1518 // Drop MatchNothing on the right of AndNot.
1520 // X AND_NOT MatchNothing == X
1521 return;
1523 if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
1524 // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1525 // AndNot as no weight is taken from them.
1526 subqueries.push_back(subquery.get_subquery(0));
1527 // The Query constructor for OP_SCALE_WEIGHT constructor should
1528 // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1529 Assert(subquery.get_subquery(0).internal.get() != NULL);
1530 return;
1533 subqueries.push_back(subquery);
1536 Query::Internal *
1537 QueryAndNot::done()
1539 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1540 // that leaves just the left subquery, return that.
1542 // If left subquery is MatchNothing, then add_subquery() discards all right
1543 // subqueries, so this check also gives MatchNothing for this case.
1544 if (subqueries.size() == 1)
1545 return subqueries[0].internal.get();
1546 return this;
1549 void
1550 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1552 // If the left side of AndMaybe is already MatchNothing, do nothing.
1553 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1554 return;
1555 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1556 if (subquery.internal.get() != NULL || subqueries.empty())
1557 subqueries.push_back(subquery);
1560 Query::Internal *
1561 QueryAndMaybe::done()
1563 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1564 // that leaves just the left subquery, return that.
1566 // If left subquery is MatchNothing, then add_subquery() discards all right
1567 // subqueries, so this check also gives MatchNothing for this case.
1568 if (subqueries.size() == 1)
1569 return subqueries[0].internal.get();
1570 return this;
1573 PostingIterator::Internal *
1574 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1576 LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1577 OrContext ctx(qopt, subqueries.size());
1578 do_or_like(ctx, qopt, factor);
1579 RETURN(ctx.postlist());
1582 void
1583 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1585 do_or_like(ctx, qopt, factor);
1588 PostingIterator::Internal *
1589 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1591 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1592 // FIXME: Combine and-like side with and-like stuff above.
1593 unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1594 OrContext ctx(qopt, subqueries.size() - 1);
1595 do_or_like(ctx, qopt, 0.0, 0, 1);
1596 unique_ptr<PostList> r(ctx.postlist());
1597 RETURN(new AndNotPostList(l.release(), r.release(),
1598 qopt->matcher, qopt->db_size));
1601 PostingIterator::Internal *
1602 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1604 LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1605 XorContext ctx(qopt, subqueries.size());
1606 postlist_sub_xor(ctx, qopt, factor);
1607 RETURN(ctx.postlist());
1610 void
1611 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1613 QueryVector::const_iterator i;
1614 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1615 // MatchNothing subqueries should have been removed by done().
1616 Assert((*i).internal.get());
1617 (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1621 PostingIterator::Internal *
1622 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1624 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1625 // FIXME: Combine and-like side with and-like stuff above.
1626 unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1627 if (factor == 0.0) {
1628 // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1629 RETURN(l.release());
1631 OrContext ctx(qopt, subqueries.size() - 1);
1632 do_or_like(ctx, qopt, factor, 0, 1);
1633 unique_ptr<PostList> r(ctx.postlist());
1634 RETURN(new AndMaybePostList(l.release(), r.release(),
1635 qopt->matcher, qopt->db_size));
1638 PostingIterator::Internal *
1639 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1641 LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1642 // FIXME: Combine and-like stuff, like QueryOptimiser.
1643 AssertEq(subqueries.size(), 2);
1644 PostList * pls[2];
1645 unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1646 pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1647 pls[0] = l.release();
1648 RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1651 void
1652 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1654 QueryVector::const_iterator i;
1655 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1656 // MatchNothing subqueries should have been removed by done().
1657 Assert((*i).internal.get());
1658 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1659 // Second and subsequent subqueries are unweighted.
1660 factor = 0.0;
1664 void
1665 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1667 if (!qopt->full_db_has_positions) {
1668 // No positional data anywhere, so just handle as AND.
1669 QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1670 return;
1673 if (!qopt->db.has_positions()) {
1674 // No positions in this subdatabase so this matches nothing, which
1675 // means the whole andcontext matches nothing.
1677 // Bailing out here means we don't recurse deeper and that means we
1678 // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1679 // the phrase, but at least one shard will count them, and the matcher
1680 // takes the highest answer (since 1.4.6).
1681 ctx.shrink(0);
1682 return;
1685 bool old_need_positions = qopt->need_positions;
1686 qopt->need_positions = true;
1688 QueryVector::const_iterator i;
1689 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1690 // MatchNothing subqueries should have been removed by done().
1691 Assert((*i).internal.get());
1692 bool is_term = ((*i).internal->get_type() == Query::LEAF_TERM);
1693 PostList* pl = (*i).internal->postlist(qopt, factor);
1694 if (!is_term)
1695 pl = new OrPosPostList(pl);
1696 ctx.add_postlist(pl);
1698 // Record the positional filter to apply higher up the tree.
1699 ctx.add_pos_filter(op, subqueries.size(), window);
1701 qopt->need_positions = old_need_positions;
1704 void
1705 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1707 QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1710 void
1711 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1713 QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1716 PostingIterator::Internal *
1717 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1719 LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1720 OrContext ctx(qopt, subqueries.size());
1721 do_or_like(ctx, qopt, factor, set_size);
1722 RETURN(ctx.postlist());
1725 void
1726 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1728 do_or_like(ctx, qopt, factor, set_size);
1731 PostingIterator::Internal *
1732 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1734 LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1735 // Save and restore total_subqs so we only add one for the whole
1736 // OP_SYNONYM subquery (or none if we're not weighted).
1737 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1738 if (factor != 0.0)
1739 ++save_total_subqs;
1740 PostList * pl = do_synonym(qopt, factor);
1741 qopt->set_total_subqs(save_total_subqs);
1742 RETURN(pl);
1745 Query::Internal *
1746 QuerySynonym::done()
1748 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1749 // subqueries which are MatchNothing.
1750 if (subqueries.empty())
1751 return NULL;
1752 // Synonym of a single subquery should only be simplified if that subquery
1753 // is a term (or MatchAll), or if it's also OP_SYNONYM. Note that
1754 // MatchNothing subqueries are dropped, so we'd never get here with a
1755 // single MatchNothing subquery.
1756 if (subqueries.size() == 1) {
1757 Query::op sub_type = subqueries[0].get_type();
1758 if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1759 sub_type == Query::OP_SYNONYM) {
1760 return subqueries[0].internal.get();
1763 return this;
1766 PostingIterator::Internal *
1767 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1769 LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1770 // Save and restore total_subqs so we only add one for the whole
1771 // OP_MAX subquery (or none if we're not weighted).
1772 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1773 if (factor != 0.0)
1774 ++save_total_subqs;
1775 PostList * pl = do_max(qopt, factor);
1776 qopt->set_total_subqs(save_total_subqs);
1777 RETURN(pl);
1780 Xapian::Query::op
1781 QueryAnd::get_op() const
1783 return Xapian::Query::OP_AND;
1786 Xapian::Query::op
1787 QueryOr::get_op() const
1789 return Xapian::Query::OP_OR;
1792 Xapian::Query::op
1793 QueryAndNot::get_op() const
1795 return Xapian::Query::OP_AND_NOT;
1798 Xapian::Query::op
1799 QueryXor::get_op() const
1801 return Xapian::Query::OP_XOR;
1804 Xapian::Query::op
1805 QueryAndMaybe::get_op() const
1807 return Xapian::Query::OP_AND_MAYBE;
1810 Xapian::Query::op
1811 QueryFilter::get_op() const
1813 return Xapian::Query::OP_FILTER;
1816 Xapian::Query::op
1817 QueryNear::get_op() const
1819 return Xapian::Query::OP_NEAR;
1822 Xapian::Query::op
1823 QueryPhrase::get_op() const
1825 return Xapian::Query::OP_PHRASE;
1828 Xapian::Query::op
1829 QueryEliteSet::get_op() const
1831 return Xapian::Query::OP_ELITE_SET;
1834 Xapian::Query::op
1835 QuerySynonym::get_op() const
1837 return Xapian::Query::OP_SYNONYM;
1840 Xapian::Query::op
1841 QueryMax::get_op() const
1843 return Xapian::Query::OP_MAX;
1846 Xapian::Query::op
1847 QueryWildcard::get_op() const
1849 return Xapian::Query::OP_WILDCARD;
1852 string
1853 QueryAnd::get_description() const
1855 return get_description_helper(" AND ");
1858 string
1859 QueryOr::get_description() const
1861 return get_description_helper(" OR ");
1864 string
1865 QueryAndNot::get_description() const
1867 return get_description_helper(" AND_NOT ");
1870 string
1871 QueryXor::get_description() const
1873 return get_description_helper(" XOR ");
1876 string
1877 QueryAndMaybe::get_description() const
1879 return get_description_helper(" AND_MAYBE ");
1882 string
1883 QueryFilter::get_description() const
1885 return get_description_helper(" FILTER ");
1888 string
1889 QueryNear::get_description() const
1891 return get_description_helper(" NEAR ", window);
1894 string
1895 QueryPhrase::get_description() const
1897 return get_description_helper(" PHRASE ", window);
1900 string
1901 QueryEliteSet::get_description() const
1903 return get_description_helper(" ELITE_SET ", set_size);
1906 string
1907 QuerySynonym::get_description() const
1909 if (subqueries.size() == 1) {
1910 string d = "(SYNONYM ";
1911 d += subqueries[0].internal->get_description();
1912 d += ")";
1913 return d;
1915 return get_description_helper(" SYNONYM ");
1918 string
1919 QueryMax::get_description() const
1921 return get_description_helper(" MAX ");
1924 Xapian::Query::op
1925 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1927 return Xapian::Query::OP_INVALID;
1930 PostingIterator::Internal *
1931 QueryInvalid::postlist(QueryOptimiser *, double) const
1933 throw Xapian::InvalidOperationError("Query is invalid");
1936 void
1937 QueryInvalid::serialise(std::string & result) const
1939 result += static_cast<char>(0x00);
1942 string
1943 QueryInvalid::get_description() const
1945 return "<INVALID>";