Disable volatile workaround for excess precision segv for SSE
[xapian.git] / xapian-core / api / queryinternal.cc
blob1769e3f05e886957681122d0e9e861bbd5405361
1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "queryinternal.h"
26 #include "xapian/error.h"
27 #include "xapian/postingsource.h"
28 #include "xapian/query.h"
30 #include "leafpostlist.h"
31 #include "matcher/andmaybepostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
34 #include "matcher/exactphrasepostlist.h"
35 #include "matcher/externalpostlist.h"
36 #include "matcher/maxpostlist.h"
37 #include "matcher/multiandpostlist.h"
38 #include "matcher/multixorpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpostlist.h"
41 #include "matcher/phrasepostlist.h"
42 #include "matcher/queryoptimiser.h"
43 #include "matcher/valuerangepostlist.h"
44 #include "matcher/valuegepostlist.h"
45 #include "net/length.h"
46 #include "serialise-double.h"
47 #include "termlist.h"
49 #include "autoptr.h"
50 #include "debuglog.h"
51 #include "omassert.h"
52 #include "str.h"
53 #include "unicode/description_append.h"
55 #include <algorithm>
56 #include <functional>
57 #include <list>
58 #include <string>
59 #include <vector>
61 using namespace std;
63 template<class CLASS> struct delete_ptr {
64 void operator()(CLASS *p) { delete p; }
67 using Xapian::Internal::AndContext;
68 using Xapian::Internal::OrContext;
69 using Xapian::Internal::XorContext;
71 namespace Xapian {
73 namespace Internal {
75 /** Class providing an operator which sorts postlists to select max or terms.
76 * This returns true if a has a (strictly) greater termweight than b,
77 * unless a or b contain no documents, in which case the other one is
78 * selected.
80 struct CmpMaxOrTerms {
81 /** Return true if and only if a has a strictly greater termweight than b. */
82 bool operator()(const PostList *a, const PostList *b) {
83 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
84 defined(__mc68000__) || defined(__mc68010__) || \
85 defined(__mc68020__) || defined(__mc68030__)
86 // On some architectures, most common of which is x86, floating point
87 // values are calculated and stored in registers with excess precision.
88 // If the two get_maxweight() calls below return identical values in a
89 // register, the excess precision may be dropped for one of them but
90 // not the other (e.g. because the compiler saves the first calculated
91 // weight to memory while calculating the second, then reloads it to
92 // compare). This leads to both a > b and b > a being true, which
93 // violates the antisymmetry property of the strict weak ordering
94 // required by nth_element(). This can have serious consequences (e.g.
95 // segfaults).
97 // Note that m68k only has excess precision in earlier models - 68040
98 // and later are OK:
99 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
101 // To avoid this, we store each result in a volatile double prior to
102 // comparing them. This means that the result of this test should
103 // match that on other architectures with the same double format (which
104 // is desirable), and actually has less overhead than rounding both
105 // results to float (which is another approach which works).
106 volatile double a_max_wt = a->get_maxweight();
107 volatile double b_max_wt = b->get_maxweight();
108 return a_max_wt > b_max_wt;
109 #else
110 return a->get_maxweight() > b->get_maxweight();
111 #endif
115 /// Comparison functor which orders PostList* by descending get_termfreq_est().
116 struct ComparePostListTermFreqAscending {
117 /// Order by descending get_termfreq_est().
118 bool operator()(const PostList *a, const PostList *b) {
119 return a->get_termfreq_est() > b->get_termfreq_est();
123 class Context {
124 protected:
125 vector<PostList*> pls;
127 public:
128 explicit Context(size_t reserve);
130 ~Context();
132 void add_postlist(PostList * pl) {
133 pls.push_back(pl);
136 bool empty() const {
137 return pls.empty();
140 size_t size() const {
141 return pls.size();
145 Context::Context(size_t reserve) {
146 pls.reserve(reserve);
149 Context::~Context()
151 for_each(pls.begin(), pls.end(), delete_ptr<PostList>());
154 class OrContext : public Context {
155 public:
156 explicit OrContext(size_t reserve) : Context(reserve) { }
158 /// Select the best set_size postlists from the last out_of added.
159 void select_elite_set(QueryOptimiser * qopt,
160 size_t set_size, size_t out_of);
162 /// Select the set_size postlists with the highest term frequency.
163 void select_most_frequent(QueryOptimiser * qopt, size_t set_size);
165 PostList * postlist(QueryOptimiser* qopt);
166 PostList * postlist_max(QueryOptimiser* qopt);
169 void
170 OrContext::select_elite_set(QueryOptimiser * qopt,
171 size_t set_size, size_t out_of)
173 // Call recalc_maxweight() as otherwise get_maxweight()
174 // may not be valid before next() or skip_to()
175 vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
176 for_each(begin, pls.end(), mem_fun(&PostList::recalc_maxweight));
178 nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
179 const PostList * hint_pl = qopt->get_hint_postlist();
180 for_each(begin + set_size, pls.end(),
181 [&qopt, &hint_pl](const PostList * p) {
182 if (rare(p == hint_pl)) {
183 qopt->take_hint_ownership();
184 hint_pl = NULL;
185 } else {
186 delete p;
189 pls.resize(pls.size() - out_of + set_size);
192 void
193 OrContext::select_most_frequent(QueryOptimiser * qopt, size_t set_size)
195 vector<PostList*>::iterator begin = pls.begin();
196 nth_element(begin, begin + set_size - 1, pls.end(),
197 ComparePostListTermFreqAscending());
198 const PostList * hint_pl = qopt->get_hint_postlist();
199 for_each(begin + set_size, pls.end(),
200 [&qopt, &hint_pl](const PostList * p) {
201 if (rare(p == hint_pl)) {
202 qopt->take_hint_ownership();
203 hint_pl = NULL;
204 } else {
205 delete p;
208 pls.resize(set_size);
211 PostList *
212 OrContext::postlist(QueryOptimiser* qopt)
214 Assert(!pls.empty());
216 if (pls.size() == 1) {
217 PostList * pl = pls[0];
218 pls.clear();
219 return pl;
222 // Make postlists into a heap so that the postlist with the greatest term
223 // frequency is at the top of the heap.
224 make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
226 // Now build a tree of binary OrPostList objects.
228 // The algorithm used to build the tree is like that used to build an
229 // optimal Huffman coding tree. If we called next() repeatedly, this
230 // arrangement would minimise the number of method calls. Generally we
231 // don't actually do that, but this arrangement is still likely to be a
232 // good one, and it does minimise the work in the worst case.
233 while (true) {
234 // We build the tree such that at each branch:
236 // l.get_termfreq_est() >= r.get_termfreq_est()
238 // We do this so that the OrPostList class can be optimised assuming
239 // that this is the case.
240 PostList * r = pls.front();
241 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
242 pls.pop_back();
243 PostList * pl;
244 pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
246 if (pls.size() == 1) {
247 pls.clear();
248 return pl;
251 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
252 pls.back() = pl;
253 push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
257 PostList *
258 OrContext::postlist_max(QueryOptimiser* qopt)
260 Assert(!pls.empty());
262 if (pls.size() == 1) {
263 PostList * pl = pls[0];
264 pls.clear();
265 return pl;
268 // Sort the postlists so that the postlist with the greatest term frequency
269 // is first.
270 sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
272 PostList * pl;
273 pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
275 pls.clear();
276 return pl;
279 class XorContext : public Context {
280 public:
281 explicit XorContext(size_t reserve) : Context(reserve) { }
283 PostList * postlist(QueryOptimiser* qopt);
286 PostList *
287 XorContext::postlist(QueryOptimiser* qopt)
289 Xapian::doccount db_size = qopt->db_size;
290 PostList * pl;
291 pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
293 // Empty pls so our destructor doesn't delete them all!
294 pls.clear();
295 return pl;
298 class AndContext : public Context {
299 class PosFilter {
300 Xapian::Query::op op_;
302 /// Start and end indices for the PostLists this positional filter uses.
303 size_t begin, end;
305 Xapian::termcount window;
307 public:
308 PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
309 Xapian::termcount window_)
310 : op_(op__), begin(begin_), end(end_), window(window_) { }
312 PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
315 list<PosFilter> pos_filters;
317 public:
318 explicit AndContext(size_t reserve) : Context(reserve) { }
320 void add_pos_filter(Query::op op_,
321 size_t n_subqs,
322 Xapian::termcount window);
324 PostList * postlist(QueryOptimiser* qopt);
327 PostList *
328 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
329 try {
330 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
331 vector<PostList *>::const_iterator terms_end = pls.begin() + end;
333 if (op_ == Xapian::Query::OP_NEAR) {
334 pl = new NearPostList(pl, window, terms_begin, terms_end);
335 } else if (window == end - begin) {
336 AssertEq(op_, Xapian::Query::OP_PHRASE);
337 pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
338 } else {
339 AssertEq(op_, Xapian::Query::OP_PHRASE);
340 pl = new PhrasePostList(pl, window, terms_begin, terms_end);
342 return pl;
343 } catch (...) {
344 delete pl;
345 throw;
348 void
349 AndContext::add_pos_filter(Query::op op_,
350 size_t n_subqs,
351 Xapian::termcount window)
353 Assert(n_subqs > 1);
354 size_t end = pls.size();
355 size_t begin = end - n_subqs;
356 pos_filters.push_back(PosFilter(op_, begin, end, window));
359 PostList *
360 AndContext::postlist(QueryOptimiser* qopt)
362 AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
363 qopt->matcher, qopt->db_size));
365 // Sort the positional filters to try to apply them in an efficient order.
366 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
367 // first?
369 // Apply any positional filters.
370 list<PosFilter>::const_iterator i;
371 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
372 const PosFilter & filter = *i;
373 pl.reset(filter.postlist(pl.release(), pls));
376 // Empty pls so our destructor doesn't delete them all!
377 pls.clear();
378 return pl.release();
383 Query::Internal::~Internal() { }
385 size_t
386 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
388 return 0;
391 const Query
392 Query::Internal::get_subquery(size_t) const
394 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
397 void
398 Query::Internal::gather_terms(void *) const
402 Xapian::termcount
403 Query::Internal::get_length() const XAPIAN_NOEXCEPT
405 return 0;
408 Query::Internal *
409 Query::Internal::unserialise(const char ** p, const char * end,
410 const Registry & reg)
412 if (*p == end)
413 return NULL;
414 unsigned char ch = *(*p)++;
415 switch (ch >> 5) {
416 case 4: case 5: case 6: case 7: {
417 // Multi-way branch
419 // 1ccccnnn where:
420 // nnn -> n_subqs (0 means encoded value follows)
421 // cccc -> code (which OP_XXX)
422 size_t n_subqs = ch & 0x07;
423 if (n_subqs == 0) {
424 decode_length(p, end, n_subqs);
425 n_subqs += 8;
427 unsigned char code = (ch >> 3) & 0x0f;
428 Xapian::termcount parameter = 0;
429 if (code >= 13)
430 decode_length(p, end, parameter);
431 Xapian::Internal::QueryBranch * result;
432 switch (code) {
433 case 0: // OP_AND
434 result = new Xapian::Internal::QueryAnd(n_subqs);
435 break;
436 case 1: // OP_OR
437 result = new Xapian::Internal::QueryOr(n_subqs);
438 break;
439 case 2: // OP_AND_NOT
440 result = new Xapian::Internal::QueryAndNot(n_subqs);
441 break;
442 case 3: // OP_XOR
443 result = new Xapian::Internal::QueryXor(n_subqs);
444 break;
445 case 4: // OP_AND_MAYBE
446 result = new Xapian::Internal::QueryAndMaybe(n_subqs);
447 break;
448 case 5: // OP_FILTER
449 result = new Xapian::Internal::QueryFilter(n_subqs);
450 break;
451 case 6: // OP_SYNONYM
452 result = new Xapian::Internal::QuerySynonym(n_subqs);
453 break;
454 case 7: // OP_MAX
455 result = new Xapian::Internal::QueryMax(n_subqs);
456 break;
457 case 13: // OP_ELITE_SET
458 result = new Xapian::Internal::QueryEliteSet(n_subqs,
459 parameter);
460 break;
461 case 14: // OP_NEAR
462 result = new Xapian::Internal::QueryNear(n_subqs,
463 parameter);
464 break;
465 case 15: // OP_PHRASE
466 result = new Xapian::Internal::QueryPhrase(n_subqs,
467 parameter);
468 break;
469 default:
470 // 8 to 12 are currently unused.
471 throw SerialisationError("Unknown multi-way branch Query operator");
473 do {
474 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
475 } while (--n_subqs);
476 result->done();
477 return result;
479 case 2: case 3: { // Term
480 // Term
482 // 01ccLLLL where:
483 // LLLL -> length (0 means encoded value follows)
484 // cc -> code:
485 // 0: wqf = 0; pos = 0
486 // 1: wqf = 1; pos = 0
487 // 2: wqf = 1; pos -> encoded value follows
488 // 3: wqf -> encoded value follows; pos -> encoded value follows
489 size_t len = ch & 0x0f;
490 if (len == 0) {
491 decode_length(p, end, len);
492 len += 16;
494 if (size_t(end - *p) < len)
495 throw SerialisationError("Not enough data");
496 string term(*p, len);
497 *p += len;
499 int code = ((ch >> 4) & 0x03);
501 Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
502 if (code == 3)
503 decode_length(p, end, wqf);
505 Xapian::termpos pos = 0;
506 if (code >= 2)
507 decode_length(p, end, pos);
509 return new Xapian::Internal::QueryTerm(term, wqf, pos);
511 case 1: {
512 // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
514 // 001tssss where:
515 // ssss -> slot number (15 means encoded value follows)
516 // t -> op:
517 // 0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
518 // 1: OP_VALUE_GE
519 Xapian::valueno slot = ch & 15;
520 if (slot == 15) {
521 decode_length(p, end, slot);
522 slot += 15;
524 size_t len;
525 decode_length_and_check(p, end, len);
526 string begin(*p, len);
527 *p += len;
528 if (ch & 0x10) {
529 // OP_VALUE_GE
530 return new Xapian::Internal::QueryValueGE(slot, begin);
533 // OP_VALUE_RANGE
534 decode_length_and_check(p, end, len);
535 string end_(*p, len);
536 *p += len;
537 if (begin.empty()) // FIXME: is this right?
538 return new Xapian::Internal::QueryValueLE(slot, end_);
539 return new Xapian::Internal::QueryValueRange(slot, begin, end_);
541 case 0: {
542 // Other operators
544 // 000ttttt where:
545 // ttttt -> encodes which OP_XXX
546 switch (ch & 0x1f) {
547 case 0x00: // OP_INVALID
548 return new Xapian::Internal::QueryInvalid();
549 case 0x0b: { // Wildcard
550 if (*p == end)
551 throw SerialisationError("not enough data");
552 Xapian::termcount max_expansion;
553 decode_length(p, end, max_expansion);
554 if (end - *p < 2)
555 throw SerialisationError("not enough data");
556 int max_type = static_cast<unsigned char>(*(*p)++);
557 op combiner = static_cast<op>(*(*p)++);
558 size_t len;
559 decode_length_and_check(p, end, len);
560 string pattern(*p, len);
561 *p += len;
562 return new Xapian::Internal::QueryWildcard(pattern,
563 max_expansion,
564 max_type,
565 combiner);
567 case 0x0c: { // PostingSource
568 size_t len;
569 decode_length_and_check(p, end, len);
570 string name(*p, len);
571 *p += len;
573 const PostingSource * reg_source = reg.get_posting_source(name);
574 if (!reg_source) {
575 string m = "PostingSource ";
576 m += name;
577 m += " not registered";
578 throw SerialisationError(m);
581 decode_length_and_check(p, end, len);
582 PostingSource * source =
583 reg_source->unserialise_with_registry(string(*p, len),
584 reg);
585 *p += len;
586 return new Xapian::Internal::QueryPostingSource(source->release());
588 case 0x0d: {
589 using Xapian::Internal::QueryScaleWeight;
590 double scale_factor = unserialise_double(p, end);
591 return new QueryScaleWeight(scale_factor,
592 Query(unserialise(p, end, reg)));
594 case 0x0e: {
595 Xapian::termcount wqf;
596 Xapian::termpos pos;
597 decode_length(p, end, wqf);
598 decode_length(p, end, pos);
599 return new Xapian::Internal::QueryTerm(string(), wqf, pos);
601 case 0x0f:
602 return Query::MatchAll.internal.get();
603 default: // Others currently unused.
604 break;
606 break;
609 string msg = "Unknown Query serialisation: ";
610 msg += str(ch);
611 throw SerialisationError(msg);
614 void
615 Query::Internal::postlist_sub_and_like(AndContext& ctx,
616 QueryOptimiser * qopt,
617 double factor) const
619 ctx.add_postlist(postlist(qopt, factor));
622 void
623 Query::Internal::postlist_sub_or_like(OrContext& ctx,
624 QueryOptimiser * qopt,
625 double factor) const
627 ctx.add_postlist(postlist(qopt, factor));
630 void
631 Query::Internal::postlist_sub_xor(XorContext& ctx,
632 QueryOptimiser * qopt,
633 double factor) const
635 ctx.add_postlist(postlist(qopt, factor));
638 namespace Internal {
640 Query::op
641 QueryTerm::get_type() const XAPIAN_NOEXCEPT
643 return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
646 string
647 QueryTerm::get_description() const
649 string desc;
650 if (term.empty()) {
651 desc = "<alldocuments>";
652 } else {
653 description_append(desc, term);
655 if (wqf != 1) {
656 desc += '#';
657 desc += str(wqf);
659 if (pos) {
660 desc += '@';
661 desc += str(pos);
663 return desc;
666 QueryPostingSource::QueryPostingSource(PostingSource * source_)
667 : source(source_)
669 if (!source_)
670 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
671 if (source->_refs == 0) {
672 // source_ isn't reference counted, so try to clone it. If clone()
673 // isn't implemented, just use the object provided and it's the
674 // caller's responsibility to ensure it stays valid while in use.
675 PostingSource * cloned_source = source->clone();
676 if (cloned_source) source = cloned_source->release();
680 Query::op
681 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
683 return Query::LEAF_POSTING_SOURCE;
686 string
687 QueryPostingSource::get_description() const
689 string desc = "PostingSource(";
690 desc += source->get_description();
691 desc += ')';
692 return desc;
695 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
696 : scale_factor(factor), subquery(subquery_)
698 if (rare(scale_factor < 0.0))
699 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
702 Query::op
703 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
705 return Query::OP_SCALE_WEIGHT;
708 size_t
709 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
711 return 1;
714 const Query
715 QueryScaleWeight::get_subquery(size_t) const
717 return subquery;
720 string
721 QueryScaleWeight::get_description() const
723 Assert(subquery.internal.get());
724 string desc = str(scale_factor);
725 desc += " * ";
726 desc += subquery.internal->get_description();
727 return desc;
730 PostingIterator::Internal *
731 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
733 LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
734 if (factor != 0.0)
735 qopt->inc_total_subqs();
736 RETURN(qopt->open_post_list(term, wqf, factor));
739 PostingIterator::Internal *
740 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
742 LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
743 Assert(source.get());
744 if (factor != 0.0)
745 qopt->inc_total_subqs();
746 // Casting away const on the Database::Internal here is OK, as we wrap
747 // them in a const Xapian::Database so non-const methods can't actually
748 // be called on the Database::Internal object.
749 const Xapian::Database wrappeddb(
750 const_cast<Xapian::Database::Internal*>(&(qopt->db)));
751 RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
754 PostingIterator::Internal *
755 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
757 LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
758 RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
761 void
762 QueryTerm::gather_terms(void * void_terms) const
764 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
765 if (!term.empty()) {
766 vector<pair<Xapian::termpos, string> > &terms =
767 *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
768 terms.push_back(make_pair(pos, term));
772 PostingIterator::Internal *
773 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
775 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
776 if (factor != 0.0)
777 qopt->inc_total_subqs();
778 const Xapian::Database::Internal & db = qopt->db;
779 const string & lb = db.get_value_lower_bound(slot);
780 // If lb.empty(), the backend doesn't provide value bounds.
781 if (!lb.empty()) {
782 if (end < lb) {
783 RETURN(new EmptyPostList);
785 const string & ub = db.get_value_upper_bound(slot);
786 if (begin > ub) {
787 RETURN(new EmptyPostList);
789 if (end >= ub) {
790 // If begin <= lb too, then the range check isn't needed, but we do
791 // still need to consider which documents have a value set in this
792 // slot. If this value is set for all documents, we can replace it
793 // with the MatchAll postlist, which is especially efficient if
794 // there are no gaps in the docids.
795 if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
796 RETURN(db.open_post_list(string()));
798 RETURN(new ValueGePostList(&db, slot, begin));
801 RETURN(new ValueRangePostList(&db, slot, begin, end));
804 void
805 QueryValueRange::serialise(string & result) const
807 if (slot < 15) {
808 result += static_cast<char>(0x20 | slot);
809 } else {
810 result += static_cast<char>(0x20 | 15);
811 result += encode_length(slot - 15);
813 result += encode_length(begin.size());
814 result += begin;
815 result += encode_length(end.size());
816 result += end;
819 Query::op
820 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
822 return Query::OP_VALUE_RANGE;
825 string
826 QueryValueRange::get_description() const
828 string desc = "VALUE_RANGE ";
829 desc += str(slot);
830 desc += ' ';
831 description_append(desc, begin);
832 desc += ' ';
833 description_append(desc, end);
834 return desc;
837 PostingIterator::Internal *
838 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
840 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
841 if (factor != 0.0)
842 qopt->inc_total_subqs();
843 const Xapian::Database::Internal & db = qopt->db;
844 const string & lb = db.get_value_lower_bound(slot);
845 // If lb.empty(), the backend doesn't provide value bounds.
846 if (!lb.empty()) {
847 if (limit < lb) {
848 RETURN(new EmptyPostList);
850 if (limit >= db.get_value_upper_bound(slot)) {
851 // The range check isn't needed, but we do still need to consider
852 // which documents have a value set in this slot. If this value is
853 // set for all documents, we can replace it with the MatchAll
854 // postlist, which is especially efficient if there are no gaps in
855 // the docids.
856 if (db.get_value_freq(slot) == db.get_doccount()) {
857 RETURN(db.open_post_list(string()));
861 RETURN(new ValueRangePostList(&db, slot, string(), limit));
864 void
865 QueryValueLE::serialise(string & result) const
867 // Encode as a range with an empty start (which only takes a single byte to
868 // encode).
869 if (slot < 15) {
870 result += static_cast<char>(0x20 | slot);
871 } else {
872 result += static_cast<char>(0x20 | 15);
873 result += encode_length(slot - 15);
875 result += encode_length(0);
876 result += encode_length(limit.size());
877 result += limit;
880 Query::op
881 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
883 return Query::OP_VALUE_LE;
886 string
887 QueryValueLE::get_description() const
889 string desc = "VALUE_LE ";
890 desc += str(slot);
891 desc += ' ';
892 description_append(desc, limit);
893 return desc;
896 PostingIterator::Internal *
897 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
899 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
900 if (factor != 0.0)
901 qopt->inc_total_subqs();
902 const Xapian::Database::Internal & db = qopt->db;
903 const string & lb = db.get_value_lower_bound(slot);
904 // If lb.empty(), the backend doesn't provide value bounds.
905 if (!lb.empty()) {
906 if (limit > db.get_value_upper_bound(slot)) {
907 RETURN(new EmptyPostList);
909 if (limit < lb) {
910 // The range check isn't needed, but we do still need to consider
911 // which documents have a value set in this slot. If this value is
912 // set for all documents, we can replace it with the MatchAll
913 // postlist, which is especially efficient if there are no gaps in
914 // the docids.
915 if (db.get_value_freq(slot) == db.get_doccount()) {
916 RETURN(db.open_post_list(string()));
920 RETURN(new ValueGePostList(&db, slot, limit));
923 void
924 QueryValueGE::serialise(string & result) const
926 if (slot < 15) {
927 result += static_cast<char>(0x20 | 0x10 | slot);
928 } else {
929 result += static_cast<char>(0x20 | 0x10 | 15);
930 result += encode_length(slot - 15);
932 result += encode_length(limit.size());
933 result += limit;
936 Query::op
937 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
939 return Query::OP_VALUE_GE;
942 string
943 QueryValueGE::get_description() const
945 string desc = "VALUE_GE ";
946 desc += str(slot);
947 desc += ' ';
948 description_append(desc, limit);
949 return desc;
952 PostingIterator::Internal *
953 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
955 LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
956 Query::op op = combiner;
957 double or_factor = 0.0;
958 if (factor == 0.0) {
959 // If we have a factor of 0, we don't care about the weights, so
960 // we're just like a normal OR query.
961 op = Query::OP_OR;
962 } else if (op != Query::OP_SYNONYM) {
963 or_factor = factor;
966 bool old_in_synonym = qopt->in_synonym;
967 if (!old_in_synonym) {
968 qopt->in_synonym = (op == Query::OP_SYNONYM);
971 OrContext ctx(0);
972 AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
973 Xapian::termcount expansions_left = max_expansion;
974 // If there's no expansion limit, set expansions_left to the maximum
975 // value Xapian::termcount can hold.
976 if (expansions_left == 0)
977 --expansions_left;
978 while (true) {
979 t->next();
980 if (t->at_end())
981 break;
982 if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
983 if (expansions_left-- == 0) {
984 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
985 break;
986 string msg("Wildcard ");
987 msg += pattern;
988 msg += "* expands to more than ";
989 msg += str(max_expansion);
990 msg += " terms";
991 throw Xapian::WildcardError(msg);
994 const string & term = t->get_termname();
995 ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
998 if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
999 // FIXME: open_lazy_post_list() results in the term getting registered
1000 // for stats, so we still incur an avoidable cost from the full
1001 // expansion size of the wildcard, which is most likely to be visible
1002 // with the remote backend. Perhaps we should split creating the lazy
1003 // postlist from registering the term for stats.
1004 if (ctx.size() > max_expansion)
1005 ctx.select_most_frequent(qopt, max_expansion);
1008 if (factor != 0.0) {
1009 if (op != Query::OP_SYNONYM) {
1010 qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1011 } else {
1012 qopt->inc_total_subqs();
1016 qopt->in_synonym = old_in_synonym;
1018 if (ctx.empty())
1019 RETURN(new EmptyPostList);
1021 if (op == Query::OP_MAX)
1022 RETURN(ctx.postlist_max(qopt));
1024 PostList * pl = ctx.postlist(qopt);
1025 if (op == Query::OP_OR)
1026 RETURN(pl);
1028 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1029 // SynonymPostList, which supplies the weights.
1030 PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
1031 RETURN(r);
1034 termcount
1035 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1037 // We currently assume wqf is 1 for calculating the synonym's weight
1038 // since conceptually the synonym is one "virtual" term. If we were
1039 // to combine multiple occurrences of the same synonym expansion into
1040 // a single instance with wqf set, we would want to track the wqf.
1041 return 1;
1044 void
1045 QueryWildcard::serialise(string & result) const
1047 result += static_cast<char>(0x0b);
1048 result += encode_length(max_expansion);
1049 result += static_cast<unsigned char>(max_type);
1050 result += static_cast<unsigned char>(combiner);
1051 result += encode_length(pattern.size());
1052 result += pattern;
1055 Query::op
1056 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1058 return Query::OP_WILDCARD;
1061 string
1062 QueryWildcard::get_description() const
1064 string desc = "WILDCARD ";
1065 switch (combiner) {
1066 case Query::OP_SYNONYM:
1067 desc += "SYNONYM ";
1068 break;
1069 case Query::OP_MAX:
1070 desc += "MAX ";
1071 break;
1072 case Query::OP_OR:
1073 desc += "OR ";
1074 break;
1075 default:
1076 desc += "BAD ";
1077 break;
1079 description_append(desc, pattern);
1080 return desc;
1083 Xapian::termcount
1084 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1086 // Sum results from all subqueries.
1087 Xapian::termcount result = 0;
1088 QueryVector::const_iterator i;
1089 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1090 // MatchNothing subqueries should have been removed by done(), but we
1091 // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1092 // segfault anyway.
1093 result += (*i).internal->get_length();
1095 return result;
1098 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1099 #define MISC(X) static_cast<unsigned char>(X)
1100 void
1101 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1103 static const unsigned char first_byte[] = {
1104 MULTIWAY(0), // OP_AND
1105 MULTIWAY(1), // OP_OR
1106 MULTIWAY(2), // OP_AND_NOT
1107 MULTIWAY(3), // OP_XOR
1108 MULTIWAY(4), // OP_AND_MAYBE
1109 MULTIWAY(5), // OP_FILTER
1110 MULTIWAY(14), // OP_NEAR
1111 MULTIWAY(15), // OP_PHRASE
1112 0, // OP_VALUE_RANGE
1113 MISC(3), // OP_SCALE_WEIGHT
1114 MULTIWAY(13), // OP_ELITE_SET
1115 0, // OP_VALUE_GE
1116 0, // OP_VALUE_LE
1117 MULTIWAY(6), // OP_SYNONYM
1118 MULTIWAY(7) // OP_MAX
1120 Xapian::Query::op op_ = get_op();
1121 AssertRel(size_t(op_),<,sizeof(first_byte));
1122 unsigned char ch = first_byte[op_];
1123 if (ch & 0x80) {
1124 // Multi-way operator.
1125 if (subqueries.size() < 8)
1126 ch |= subqueries.size();
1127 result += ch;
1128 if (subqueries.size() >= 8)
1129 result += encode_length(subqueries.size() - 8);
1130 if (ch >= MULTIWAY(13))
1131 result += encode_length(parameter);
1132 } else {
1133 result += ch;
1136 QueryVector::const_iterator i;
1137 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1138 // MatchNothing subqueries should have been removed by done().
1139 Assert((*i).internal.get());
1140 (*i).internal->serialise(result);
1143 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1144 // appended next by an overloaded serialise() method in the subclass.
1147 void
1148 QueryBranch::serialise(string & result) const
1150 QueryBranch::serialise_(result);
1153 void
1154 QueryNear::serialise(string & result) const
1156 // FIXME: window - subqueries.size() ?
1157 QueryBranch::serialise_(result, window);
1160 void
1161 QueryPhrase::serialise(string & result) const
1163 // FIXME: window - subqueries.size() ?
1164 QueryBranch::serialise_(result, window);
1167 void
1168 QueryEliteSet::serialise(string & result) const
1170 // FIXME: set_size - subqueries.size() ?
1171 QueryBranch::serialise_(result, set_size);
1174 void
1175 QueryBranch::gather_terms(void * void_terms) const
1177 // Gather results from all subqueries.
1178 QueryVector::const_iterator i;
1179 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1180 // MatchNothing subqueries should have been removed by done().
1181 Assert((*i).internal.get());
1182 (*i).internal->gather_terms(void_terms);
1186 void
1187 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1188 Xapian::termcount elite_set_size, size_t first) const
1190 LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1192 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1193 // for AND-like operations.
1195 // OP_SYNONYM with a single subquery is only simplified by
1196 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1197 Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1199 vector<PostList *> postlists;
1200 postlists.reserve(subqueries.size() - first);
1202 QueryVector::const_iterator q;
1203 for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1204 // MatchNothing subqueries should have been removed by done().
1205 Assert((*q).internal.get());
1206 (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1209 if (elite_set_size && elite_set_size < subqueries.size()) {
1210 ctx.select_elite_set(qopt, elite_set_size, subqueries.size());
1211 // FIXME: not right!
1215 PostList *
1216 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1218 LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1219 OrContext ctx(subqueries.size());
1220 bool old_in_synonym;
1221 if (factor != 0.0) {
1222 old_in_synonym = qopt->in_synonym;
1223 qopt->in_synonym = true;
1225 do_or_like(ctx, qopt, 0.0);
1226 PostList * pl = ctx.postlist(qopt);
1227 if (factor == 0.0) {
1228 // If we have a factor of 0, we don't care about the weights, so
1229 // we're just like a normal OR query.
1230 return pl;
1233 qopt->in_synonym = old_in_synonym;
1235 // We currently assume wqf is 1 for calculating the synonym's weight
1236 // since conceptually the synonym is one "virtual" term. If we were
1237 // to combine multiple occurrences of the same synonym expansion into
1238 // a single instance with wqf set, we would want to track the wqf.
1240 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1241 // SynonymPostList, which supplies the weights.
1242 RETURN(qopt->make_synonym_postlist(pl, factor));
1245 PostList *
1246 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1248 LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1249 OrContext ctx(subqueries.size());
1250 do_or_like(ctx, qopt, factor);
1251 if (factor == 0.0) {
1252 // If we have a factor of 0, we don't care about the weights, so
1253 // we're just like a normal OR query.
1254 RETURN(ctx.postlist(qopt));
1257 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1258 // since conceptually the OP_MAX is one "virtual" term. If we were
1259 // to combine multiple occurrences of the same OP_MAX expansion into
1260 // a single instance with wqf set, we would want to track the wqf.
1261 RETURN(ctx.postlist_max(qopt));
1264 Xapian::Query::op
1265 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1267 return get_op();
1270 size_t
1271 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1273 return subqueries.size();
1276 const Query
1277 QueryBranch::get_subquery(size_t n) const
1279 return subqueries[n];
1282 const string
1283 QueryBranch::get_description_helper(const char * op,
1284 Xapian::termcount parameter) const
1286 string desc = "(";
1287 QueryVector::const_iterator i;
1288 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1289 if (desc.size() > 1) {
1290 desc += op;
1291 if (parameter) {
1292 desc += str(parameter);
1293 desc += ' ';
1296 Assert((*i).internal.get());
1297 // MatchNothing subqueries should have been removed by done(), and we
1298 // shouldn't get called before done() is, since that happens at the
1299 // end of the Xapian::Query constructor.
1300 desc += (*i).internal->get_description();
1302 desc += ')';
1303 return desc;
1306 Query::Internal *
1307 QueryWindowed::done()
1309 // If window size not specified, default it.
1310 if (window == 0)
1311 window = subqueries.size();
1312 return QueryAndLike::done();
1315 void
1316 QueryScaleWeight::gather_terms(void * void_terms) const
1318 subquery.internal->gather_terms(void_terms);
1321 void QueryTerm::serialise(string & result) const
1323 size_t len = term.size();
1324 if (len == 0) {
1325 if (wqf == 1 && pos == 0) {
1326 // Query::MatchAll
1327 result += '\x0f';
1328 } else {
1329 // Weird mutant versions of MatchAll
1330 result += '\x0e';
1331 result += encode_length(wqf);
1332 result += encode_length(pos);
1334 } else if (wqf == 1) {
1335 if (pos == 0) {
1336 // Single occurrence probabilistic term without position set.
1337 if (len >= 16) {
1338 result += static_cast<char>(0x40 | 0x10);
1339 result += encode_length(term.size() - 16);
1340 } else {
1341 result += static_cast<char>(0x40 | 0x10 | len);
1343 result += term;
1344 } else {
1345 // Single occurrence probabilistic term with position set.
1346 if (len >= 16) {
1347 result += static_cast<char>(0x40 | 0x20);
1348 result += encode_length(term.size() - 16);
1349 } else {
1350 result += static_cast<char>(0x40 | 0x20 | len);
1352 result += term;
1353 result += encode_length(pos);
1355 } else if (wqf > 1 || pos > 0) {
1356 // General case.
1357 if (len >= 16) {
1358 result += static_cast<char>(0x40 | 0x30);
1359 result += encode_length(term.size() - 16);
1360 } else if (len) {
1361 result += static_cast<char>(0x40 | 0x30 | len);
1363 result += term;
1364 result += encode_length(wqf);
1365 result += encode_length(pos);
1366 } else {
1367 // Typical boolean term.
1368 AssertEq(wqf, 0);
1369 AssertEq(pos, 0);
1370 if (len >= 16) {
1371 result += static_cast<char>(0x40);
1372 result += encode_length(term.size() - 16);
1373 } else {
1374 result += static_cast<char>(0x40 | len);
1376 result += term;
1380 void QueryPostingSource::serialise(string & result) const
1382 result += static_cast<char>(0x0c);
1384 const string & n = source->name();
1385 result += encode_length(n.size());
1386 result += n;
1388 const string & s = source->serialise();
1389 result += encode_length(s.size());
1390 result += s;
1393 void QueryScaleWeight::serialise(string & result) const
1395 Assert(subquery.internal.get());
1396 const string & s = serialise_double(scale_factor);
1397 result += '\x0d';
1398 result += s;
1399 subquery.internal->serialise(result);
1402 struct is_matchnothing {
1403 bool operator()(const Xapian::Query & q) {
1404 return q.internal.get() == NULL;
1408 void
1409 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1411 // If the AndLike is already MatchNothing, do nothing.
1412 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1413 return;
1414 // If we're adding MatchNothing, discard any previous subqueries.
1415 if (subquery.internal.get() == NULL)
1416 subqueries.clear();
1417 subqueries.push_back(subquery);
1420 Query::Internal *
1421 QueryAndLike::done()
1423 // Empty AndLike gives MatchNothing.
1424 if (subqueries.empty())
1425 return NULL;
1426 // We handle any subquery being MatchNothing in add_subquery() by leaving
1427 // a single MatchNothing subquery, and so this check results in AndLike
1428 // giving MatchNothing.
1429 if (subqueries.size() == 1)
1430 return subqueries[0].internal.get();
1431 return this;
1434 PostingIterator::Internal *
1435 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1437 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1438 AndContext ctx(subqueries.size());
1439 postlist_sub_and_like(ctx, qopt, factor);
1440 RETURN(ctx.postlist(qopt));
1443 void
1444 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1446 QueryVector::const_iterator i;
1447 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1448 // MatchNothing subqueries should have been removed by done().
1449 Assert((*i).internal.get());
1450 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1454 void
1455 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1457 // Drop any subqueries which are MatchNothing.
1458 if (subquery.internal.get() != NULL)
1459 subqueries.push_back(subquery);
1462 Query::Internal *
1463 QueryOrLike::done()
1465 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1466 // subqueries which are MatchNothing.
1467 if (subqueries.empty())
1468 return NULL;
1469 if (subqueries.size() == 1)
1470 return subqueries[0].internal.get();
1471 return this;
1474 void
1475 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1477 // If the left side of AndNot is already MatchNothing, do nothing.
1478 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1479 return;
1480 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1481 if (subquery.internal.get() != NULL || subqueries.empty())
1482 subqueries.push_back(subquery);
1485 Query::Internal *
1486 QueryAndNot::done()
1488 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1489 // that leaves just the left subquery, return that.
1491 // If left subquery is MatchNothing, then add_subquery() discards all right
1492 // subqueries, so this check also gives MatchNothing for this case.
1493 if (subqueries.size() == 1)
1494 return subqueries[0].internal.get();
1495 return this;
1498 void
1499 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1501 // If the left side of AndMaybe is already MatchNothing, do nothing.
1502 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1503 return;
1504 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1505 if (subquery.internal.get() != NULL || subqueries.empty())
1506 subqueries.push_back(subquery);
1509 Query::Internal *
1510 QueryAndMaybe::done()
1512 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1513 // that leaves just the left subquery, return that.
1515 // If left subquery is MatchNothing, then add_subquery() discards all right
1516 // subqueries, so this check also gives MatchNothing for this case.
1517 if (subqueries.size() == 1)
1518 return subqueries[0].internal.get();
1519 return this;
1522 PostingIterator::Internal *
1523 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1525 LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1526 OrContext ctx(subqueries.size());
1527 do_or_like(ctx, qopt, factor);
1528 RETURN(ctx.postlist(qopt));
1531 void
1532 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1534 do_or_like(ctx, qopt, factor);
1537 PostingIterator::Internal *
1538 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1540 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1541 // FIXME: Combine and-like side with and-like stuff above.
1542 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1543 OrContext ctx(subqueries.size() - 1);
1544 do_or_like(ctx, qopt, 0.0, 0, 1);
1545 AutoPtr<PostList> r(ctx.postlist(qopt));
1546 RETURN(new AndNotPostList(l.release(), r.release(),
1547 qopt->matcher, qopt->db_size));
1550 PostingIterator::Internal *
1551 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1553 LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1554 XorContext ctx(subqueries.size());
1555 postlist_sub_xor(ctx, qopt, factor);
1556 RETURN(ctx.postlist(qopt));
1559 void
1560 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1562 QueryVector::const_iterator i;
1563 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1564 // MatchNothing subqueries should have been removed by done().
1565 Assert((*i).internal.get());
1566 (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1570 PostingIterator::Internal *
1571 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1573 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1574 // FIXME: Combine and-like side with and-like stuff above.
1575 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1576 OrContext ctx(subqueries.size() - 1);
1577 do_or_like(ctx, qopt, factor, 0, 1);
1578 AutoPtr<PostList> r(ctx.postlist(qopt));
1579 RETURN(new AndMaybePostList(l.release(), r.release(),
1580 qopt->matcher, qopt->db_size));
1583 PostingIterator::Internal *
1584 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1586 LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1587 // FIXME: Combine and-like stuff, like QueryOptimiser.
1588 AssertEq(subqueries.size(), 2);
1589 PostList * pls[2];
1590 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1591 pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1592 pls[0] = l.release();
1593 RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1596 void
1597 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1599 QueryVector::const_iterator i;
1600 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1601 // MatchNothing subqueries should have been removed by done().
1602 Assert((*i).internal.get());
1603 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1604 // Second and subsequent subqueries are unweighted.
1605 factor = 0.0;
1609 void
1610 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1612 // FIXME: should has_positions() be on the combined DB (not this sub)?
1613 if (qopt->db.has_positions()) {
1614 bool old_need_positions = qopt->need_positions;
1615 qopt->need_positions = true;
1617 QueryVector::const_iterator i;
1618 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1619 // MatchNothing subqueries should have been removed by done().
1620 Assert((*i).internal.get());
1621 ctx.add_postlist((*i).internal->postlist(qopt, factor));
1623 // Record the positional filter to apply higher up the tree.
1624 ctx.add_pos_filter(op, subqueries.size(), window);
1626 qopt->need_positions = old_need_positions;
1627 } else {
1628 QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1632 void
1633 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1635 QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1638 void
1639 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1641 QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1644 PostingIterator::Internal *
1645 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1647 LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1648 OrContext ctx(subqueries.size());
1649 do_or_like(ctx, qopt, factor, set_size);
1650 RETURN(ctx.postlist(qopt));
1653 void
1654 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1656 do_or_like(ctx, qopt, factor, set_size);
1659 PostingIterator::Internal *
1660 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1662 LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1663 // Save and restore total_subqs so we only add one for the whole
1664 // OP_SYNONYM subquery (or none if we're not weighted).
1665 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1666 if (factor != 0.0)
1667 ++save_total_subqs;
1668 PostList * pl = do_synonym(qopt, factor);
1669 qopt->set_total_subqs(save_total_subqs);
1670 RETURN(pl);
1673 Query::Internal *
1674 QuerySynonym::done()
1676 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1677 // subqueries which are MatchNothing.
1678 if (subqueries.empty())
1679 return NULL;
1680 // Synonym of a single subquery should only be simplified if that subquery
1681 // is a term (or MatchAll), or if it's also OP_SYNONYM. Note that
1682 // MatchNothing subqueries are dropped, so we'd never get here with a
1683 // single MatchNothing subquery.
1684 if (subqueries.size() == 1) {
1685 Query::op sub_type = subqueries[0].get_type();
1686 if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1687 sub_type == Query::OP_SYNONYM) {
1688 return subqueries[0].internal.get();
1691 return this;
1694 PostingIterator::Internal *
1695 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1697 LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1698 // Save and restore total_subqs so we only add one for the whole
1699 // OP_MAX subquery (or none if we're not weighted).
1700 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1701 if (factor != 0.0)
1702 ++save_total_subqs;
1703 PostList * pl = do_max(qopt, factor);
1704 qopt->set_total_subqs(save_total_subqs);
1705 RETURN(pl);
1708 Xapian::Query::op
1709 QueryAnd::get_op() const
1711 return Xapian::Query::OP_AND;
1714 Xapian::Query::op
1715 QueryOr::get_op() const
1717 return Xapian::Query::OP_OR;
1720 Xapian::Query::op
1721 QueryAndNot::get_op() const
1723 return Xapian::Query::OP_AND_NOT;
1726 Xapian::Query::op
1727 QueryXor::get_op() const
1729 return Xapian::Query::OP_XOR;
1732 Xapian::Query::op
1733 QueryAndMaybe::get_op() const
1735 return Xapian::Query::OP_AND_MAYBE;
1738 Xapian::Query::op
1739 QueryFilter::get_op() const
1741 return Xapian::Query::OP_FILTER;
1744 Xapian::Query::op
1745 QueryNear::get_op() const
1747 return Xapian::Query::OP_NEAR;
1750 Xapian::Query::op
1751 QueryPhrase::get_op() const
1753 return Xapian::Query::OP_PHRASE;
1756 Xapian::Query::op
1757 QueryEliteSet::get_op() const
1759 return Xapian::Query::OP_ELITE_SET;
1762 Xapian::Query::op
1763 QuerySynonym::get_op() const
1765 return Xapian::Query::OP_SYNONYM;
1768 Xapian::Query::op
1769 QueryMax::get_op() const
1771 return Xapian::Query::OP_MAX;
1774 Xapian::Query::op
1775 QueryWildcard::get_op() const
1777 return Xapian::Query::OP_WILDCARD;
1780 string
1781 QueryAnd::get_description() const
1783 return get_description_helper(" AND ");
1786 string
1787 QueryOr::get_description() const
1789 return get_description_helper(" OR ");
1792 string
1793 QueryAndNot::get_description() const
1795 return get_description_helper(" AND_NOT ");
1798 string
1799 QueryXor::get_description() const
1801 return get_description_helper(" XOR ");
1804 string
1805 QueryAndMaybe::get_description() const
1807 return get_description_helper(" AND_MAYBE ");
1810 string
1811 QueryFilter::get_description() const
1813 return get_description_helper(" FILTER ");
1816 string
1817 QueryNear::get_description() const
1819 return get_description_helper(" NEAR ", window);
1822 string
1823 QueryPhrase::get_description() const
1825 return get_description_helper(" PHRASE ", window);
1828 string
1829 QueryEliteSet::get_description() const
1831 return get_description_helper(" ELITE_SET ", set_size);
1834 string
1835 QuerySynonym::get_description() const
1837 if (subqueries.size() == 1) {
1838 string d = "(SYNONYM ";
1839 d += subqueries[0].internal->get_description();
1840 d += ")";
1841 return d;
1843 return get_description_helper(" SYNONYM ");
1846 string
1847 QueryMax::get_description() const
1849 return get_description_helper(" MAX ");
1852 Xapian::Query::op
1853 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1855 return Xapian::Query::OP_INVALID;
1858 PostingIterator::Internal *
1859 QueryInvalid::postlist(QueryOptimiser *, double) const
1861 throw Xapian::InvalidOperationError("Query is invalid");
1864 void
1865 QueryInvalid::serialise(std::string & result) const
1867 result += static_cast<char>(0x00);
1870 string
1871 QueryInvalid::get_description() const
1873 return "<INVALID>";