Website now in git not CVS
[xapian.git] / xapian-core / api / queryinternal.cc
blob86b6d7050fa74c92176e48c2d3edacc3a0f04e97
1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "queryinternal.h"
26 #include "xapian/postingsource.h"
27 #include "xapian/query.h"
29 #include "matcher/const_database_wrapper.h"
30 #include "leafpostlist.h"
31 #include "matcher/andmaybepostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
34 #include "matcher/exactphrasepostlist.h"
35 #include "matcher/externalpostlist.h"
36 #include "matcher/maxpostlist.h"
37 #include "matcher/multiandpostlist.h"
38 #include "matcher/multixorpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpostlist.h"
41 #include "matcher/phrasepostlist.h"
42 #include "matcher/queryoptimiser.h"
43 #include "matcher/valuerangepostlist.h"
44 #include "matcher/valuegepostlist.h"
45 #include "net/length.h"
46 #include "serialise-double.h"
47 #include "termlist.h"
49 #include "autoptr.h"
50 #include "debuglog.h"
51 #include "omassert.h"
52 #include "str.h"
53 #include "unicode/description_append.h"
55 #include <algorithm>
56 #include <functional>
57 #include <list>
58 #include <string>
59 #include <vector>
61 using namespace std;
63 template<class CLASS> struct delete_ptr {
64 void operator()(CLASS *p) { delete p; }
67 using Xapian::Internal::AndContext;
68 using Xapian::Internal::OrContext;
69 using Xapian::Internal::XorContext;
71 namespace Xapian {
73 namespace Internal {
75 /** Class providing an operator which sorts postlists to select max or terms.
76 * This returns true if a has a (strictly) greater termweight than b,
77 * unless a or b contain no documents, in which case the other one is
78 * selected.
80 struct CmpMaxOrTerms {
81 /** Return true if and only if a has a strictly greater termweight than b. */
82 bool operator()(const PostList *a, const PostList *b) {
83 #if (defined(__i386__) && !defined(__SSE2_MATH__)) || defined(__mc68000__) || defined(__mc68010__) || defined(__mc68020__) || defined(__mc68030__)
84 // On some architectures, most common of which is x86, floating point
85 // values are calculated and stored in registers with excess precision.
86 // If the two get_maxweight() calls below return identical values in a
87 // register, the excess precision may be dropped for one of them but
88 // not the other (e.g. because the compiler saves the first calculated
89 // weight to memory while calculating the second, then reloads it to
90 // compare). This leads to both a > b and b > a being true, which
91 // violates the antisymmetry property of the strict weak ordering
92 // required by nth_element(). This can have serious consequences (e.g.
93 // segfaults).
95 // Note that m68k only has excess precision in earlier models - 68040
96 // and later are OK:
97 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
99 // To avoid this, we store each result in a volatile double prior to
100 // comparing them. This means that the result of this test should
101 // match that on other architectures with the same double format (which
102 // is desirable), and actually has less overhead than rounding both
103 // results to float (which is another approach which works).
104 volatile double a_max_wt = a->get_maxweight();
105 volatile double b_max_wt = b->get_maxweight();
106 return a_max_wt > b_max_wt;
107 #else
108 return a->get_maxweight() > b->get_maxweight();
109 #endif
113 /// Comparison functor which orders PostList* by descending get_termfreq_est().
114 struct ComparePostListTermFreqAscending {
115 /// Order by descending get_termfreq_est().
116 bool operator()(const PostList *a, const PostList *b) {
117 return a->get_termfreq_est() > b->get_termfreq_est();
121 class Context {
122 protected:
123 vector<PostList*> pls;
125 public:
126 explicit Context(size_t reserve);
128 ~Context();
130 void add_postlist(PostList * pl) {
131 pls.push_back(pl);
134 bool empty() const {
135 return pls.empty();
138 size_t size() const {
139 return pls.size();
143 Context::Context(size_t reserve) {
144 pls.reserve(reserve);
147 Context::~Context()
149 for_each(pls.begin(), pls.end(), delete_ptr<PostList>());
152 class OrContext : public Context {
153 public:
154 explicit OrContext(size_t reserve) : Context(reserve) { }
156 /// Select the best set_size postlists from the last out_of added.
157 void select_elite_set(QueryOptimiser * qopt,
158 size_t set_size, size_t out_of);
160 /// Select the set_size postlists with the highest term frequency.
161 void select_most_frequent(QueryOptimiser * qopt, size_t set_size);
163 PostList * postlist(QueryOptimiser* qopt);
164 PostList * postlist_max(QueryOptimiser* qopt);
167 void
168 OrContext::select_elite_set(QueryOptimiser * qopt,
169 size_t set_size, size_t out_of)
171 // Call recalc_maxweight() as otherwise get_maxweight()
172 // may not be valid before next() or skip_to()
173 vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
174 for_each(begin, pls.end(), mem_fun(&PostList::recalc_maxweight));
176 nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
177 const PostList * hint_pl = qopt->get_hint_postlist();
178 for_each(begin + set_size, pls.end(),
179 [&qopt, &hint_pl](const PostList * p) {
180 if (rare(p == hint_pl)) {
181 qopt->take_hint_ownership();
182 hint_pl = NULL;
183 } else {
184 delete p;
187 pls.resize(pls.size() - out_of + set_size);
190 void
191 OrContext::select_most_frequent(QueryOptimiser * qopt, size_t set_size)
193 vector<PostList*>::iterator begin = pls.begin();
194 nth_element(begin, begin + set_size - 1, pls.end(),
195 ComparePostListTermFreqAscending());
196 const PostList * hint_pl = qopt->get_hint_postlist();
197 for_each(begin + set_size, pls.end(),
198 [&qopt, &hint_pl](const PostList * p) {
199 if (rare(p == hint_pl)) {
200 qopt->take_hint_ownership();
201 hint_pl = NULL;
202 } else {
203 delete p;
206 pls.resize(set_size);
209 PostList *
210 OrContext::postlist(QueryOptimiser* qopt)
212 Assert(!pls.empty());
214 if (pls.size() == 1) {
215 PostList * pl = pls[0];
216 pls.clear();
217 return pl;
220 // Make postlists into a heap so that the postlist with the greatest term
221 // frequency is at the top of the heap.
222 make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
224 // Now build a tree of binary OrPostList objects.
226 // The algorithm used to build the tree is like that used to build an
227 // optimal Huffman coding tree. If we called next() repeatedly, this
228 // arrangement would minimise the number of method calls. Generally we
229 // don't actually do that, but this arrangement is still likely to be a
230 // good one, and it does minimise the work in the worst case.
231 while (true) {
232 // We build the tree such that at each branch:
234 // l.get_termfreq_est() >= r.get_termfreq_est()
236 // We do this so that the OrPostList class can be optimised assuming
237 // that this is the case.
238 PostList * r = pls.front();
239 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
240 pls.pop_back();
241 PostList * pl;
242 pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
244 if (pls.size() == 1) {
245 pls.clear();
246 return pl;
249 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
250 pls.back() = pl;
251 push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
255 PostList *
256 OrContext::postlist_max(QueryOptimiser* qopt)
258 Assert(!pls.empty());
260 if (pls.size() == 1) {
261 PostList * pl = pls[0];
262 pls.clear();
263 return pl;
266 // Sort the postlists so that the postlist with the greatest term frequency
267 // is first.
268 sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
270 PostList * pl;
271 pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
273 pls.clear();
274 return pl;
277 class XorContext : public Context {
278 public:
279 explicit XorContext(size_t reserve) : Context(reserve) { }
281 PostList * postlist(QueryOptimiser* qopt);
284 PostList *
285 XorContext::postlist(QueryOptimiser* qopt)
287 Xapian::doccount db_size = qopt->db_size;
288 PostList * pl;
289 pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
291 // Empty pls so our destructor doesn't delete them all!
292 pls.clear();
293 return pl;
296 class AndContext : public Context {
297 class PosFilter {
298 Xapian::Query::op op_;
300 /// Start and end indices for the PostLists this positional filter uses.
301 size_t begin, end;
303 Xapian::termcount window;
305 public:
306 PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
307 Xapian::termcount window_)
308 : op_(op__), begin(begin_), end(end_), window(window_) { }
310 PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
313 list<PosFilter> pos_filters;
315 public:
316 explicit AndContext(size_t reserve) : Context(reserve) { }
318 void add_pos_filter(Query::op op_,
319 size_t n_subqs,
320 Xapian::termcount window);
322 PostList * postlist(QueryOptimiser* qopt);
325 PostList *
326 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
327 try {
328 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
329 vector<PostList *>::const_iterator terms_end = pls.begin() + end;
331 if (op_ == Xapian::Query::OP_NEAR) {
332 pl = new NearPostList(pl, window, terms_begin, terms_end);
333 } else if (window == end - begin) {
334 AssertEq(op_, Xapian::Query::OP_PHRASE);
335 pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
336 } else {
337 AssertEq(op_, Xapian::Query::OP_PHRASE);
338 pl = new PhrasePostList(pl, window, terms_begin, terms_end);
340 return pl;
341 } catch (...) {
342 delete pl;
343 throw;
346 void
347 AndContext::add_pos_filter(Query::op op_,
348 size_t n_subqs,
349 Xapian::termcount window)
351 Assert(n_subqs > 1);
352 size_t end = pls.size();
353 size_t begin = end - n_subqs;
354 pos_filters.push_back(PosFilter(op_, begin, end, window));
357 PostList *
358 AndContext::postlist(QueryOptimiser* qopt)
360 AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
361 qopt->matcher, qopt->db_size));
363 // Sort the positional filters to try to apply them in an efficient order.
364 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
365 // first?
367 // Apply any positional filters.
368 list<PosFilter>::const_iterator i;
369 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
370 const PosFilter & filter = *i;
371 pl.reset(filter.postlist(pl.release(), pls));
374 // Empty pls so our destructor doesn't delete them all!
375 pls.clear();
376 return pl.release();
381 Query::Internal::~Internal() { }
383 size_t
384 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
386 return 0;
389 const Query
390 Query::Internal::get_subquery(size_t) const
392 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
395 void
396 Query::Internal::gather_terms(void *) const
400 Xapian::termcount
401 Query::Internal::get_length() const XAPIAN_NOEXCEPT
403 return 0;
406 Query::Internal *
407 Query::Internal::unserialise(const char ** p, const char * end,
408 const Registry & reg)
410 if (*p == end)
411 return NULL;
412 unsigned char ch = *(*p)++;
413 switch (ch >> 5) {
414 case 4: case 5: case 6: case 7: { // Multi-way branch
415 size_t n_subqs = ch & 0x07;
416 if (n_subqs == 0) {
417 decode_length(p, end, n_subqs);
418 n_subqs += 8;
420 unsigned char code = (ch >> 3) & 0x0f;
421 Xapian::termcount parameter = 0;
422 if (code >= 13)
423 decode_length(p, end, parameter);
424 Xapian::Internal::QueryBranch * result;
425 switch (code) {
426 case 0: // OP_AND
427 result = new Xapian::Internal::QueryAnd(n_subqs);
428 break;
429 case 1: // OP_OR
430 result = new Xapian::Internal::QueryOr(n_subqs);
431 break;
432 case 2: // OP_AND_NOT
433 result = new Xapian::Internal::QueryAndNot(n_subqs);
434 break;
435 case 3: // OP_XOR
436 result = new Xapian::Internal::QueryXor(n_subqs);
437 break;
438 case 4: // OP_AND_MAYBE
439 result = new Xapian::Internal::QueryAndMaybe(n_subqs);
440 break;
441 case 5: // OP_FILTER
442 result = new Xapian::Internal::QueryFilter(n_subqs);
443 break;
444 case 6: // OP_SYNONYM
445 result = new Xapian::Internal::QuerySynonym(n_subqs);
446 break;
447 case 7: // OP_MAX
448 result = new Xapian::Internal::QueryMax(n_subqs);
449 break;
450 case 13: // OP_ELITE_SET
451 result = new Xapian::Internal::QueryEliteSet(n_subqs,
452 parameter);
453 break;
454 case 14: // OP_NEAR
455 result = new Xapian::Internal::QueryNear(n_subqs,
456 parameter);
457 break;
458 case 15: // OP_PHRASE
459 result = new Xapian::Internal::QueryPhrase(n_subqs,
460 parameter);
461 break;
462 default:
463 // 8 to 12 are currently unused.
464 throw SerialisationError("Unknown multi-way branch Query operator");
466 do {
467 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
468 } while (--n_subqs);
469 result->done();
470 return result;
472 case 2: case 3: { // Term
473 size_t len = ch & 0x0f;
474 if (len == 0) {
475 decode_length(p, end, len);
476 len += 16;
478 if (size_t(end - *p) < len)
479 throw SerialisationError("Not enough data");
480 string term(*p, len);
481 *p += len;
483 int code = ((ch >> 4) & 0x03);
485 Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
486 if (code == 3)
487 decode_length(p, end, wqf);
489 Xapian::termpos pos = 0;
490 if (code >= 2)
491 decode_length(p, end, pos);
493 return new Xapian::Internal::QueryTerm(term, wqf, pos);
495 case 1: { // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
496 Xapian::valueno slot = ch & 15;
497 if (slot == 15) {
498 decode_length(p, end, slot);
499 slot += 15;
501 size_t len;
502 decode_length_and_check(p, end, len);
503 string begin(*p, len);
504 *p += len;
505 if (ch & 0x10) {
506 // OP_VALUE_GE
507 return new Xapian::Internal::QueryValueGE(slot, begin);
510 // OP_VALUE_RANGE
511 decode_length_and_check(p, end, len);
512 string end_(*p, len);
513 *p += len;
514 if (begin.empty()) // FIXME: is this right?
515 return new Xapian::Internal::QueryValueLE(slot, end_);
516 return new Xapian::Internal::QueryValueRange(slot, begin, end_);
518 case 0: {
519 switch (ch & 0x0f) {
520 case 0x0b: { // Wildcard
521 if (*p == end)
522 throw SerialisationError("not enough data");
523 Xapian::termcount max_expansion;
524 decode_length(p, end, max_expansion);
525 if (end - *p < 2)
526 throw SerialisationError("not enough data");
527 int max_type = static_cast<unsigned char>(*(*p)++);
528 op combiner = static_cast<op>(*(*p)++);
529 size_t len;
530 decode_length_and_check(p, end, len);
531 string pattern(*p, len);
532 *p += len;
533 return new Xapian::Internal::QueryWildcard(pattern,
534 max_expansion,
535 max_type,
536 combiner);
538 case 0x0c: { // PostingSource
539 size_t len;
540 decode_length_and_check(p, end, len);
541 string name(*p, len);
542 *p += len;
544 const PostingSource * reg_source = reg.get_posting_source(name);
545 if (!reg_source) {
546 string m = "PostingSource ";
547 m += name;
548 m += " not registered";
549 throw SerialisationError(m);
552 decode_length_and_check(p, end, len);
553 PostingSource * source =
554 reg_source->unserialise_with_registry(string(*p, len),
555 reg);
556 *p += len;
557 return new Xapian::Internal::QueryPostingSource(source->release());
559 case 0x0d: {
560 using Xapian::Internal::QueryScaleWeight;
561 double scale_factor = unserialise_double(p, end);
562 return new QueryScaleWeight(scale_factor,
563 Query(unserialise(p, end, reg)));
565 case 0x0e: {
566 Xapian::termcount wqf;
567 Xapian::termpos pos;
568 decode_length(p, end, wqf);
569 decode_length(p, end, pos);
570 return new Xapian::Internal::QueryTerm(string(), wqf, pos);
572 case 0x0f:
573 return Query::MatchAll.internal.get();
574 default: // Others currently unused.
575 break;
577 break;
580 string msg = "Unknown Query serialisation: ";
581 msg += str(ch);
582 throw SerialisationError(msg);
585 void
586 Query::Internal::postlist_sub_and_like(AndContext& ctx,
587 QueryOptimiser * qopt,
588 double factor) const
590 ctx.add_postlist(postlist(qopt, factor));
593 void
594 Query::Internal::postlist_sub_or_like(OrContext& ctx,
595 QueryOptimiser * qopt,
596 double factor) const
598 ctx.add_postlist(postlist(qopt, factor));
601 void
602 Query::Internal::postlist_sub_xor(XorContext& ctx,
603 QueryOptimiser * qopt,
604 double factor) const
606 ctx.add_postlist(postlist(qopt, factor));
609 namespace Internal {
611 Query::op
612 QueryTerm::get_type() const XAPIAN_NOEXCEPT
614 return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
617 string
618 QueryTerm::get_description() const
620 string desc;
621 if (term.empty()) {
622 desc = "<alldocuments>";
623 } else {
624 description_append(desc, term);
626 if (wqf != 1) {
627 desc += '#';
628 desc += str(wqf);
630 if (pos) {
631 desc += '@';
632 desc += str(pos);
634 return desc;
637 QueryPostingSource::QueryPostingSource(PostingSource * source_)
638 : source(source_)
640 if (!source_)
641 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
644 Query::op
645 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
647 return Query::LEAF_POSTING_SOURCE;
650 string
651 QueryPostingSource::get_description() const
653 string desc = "PostingSource(";
654 desc += source->get_description();
655 desc += ')';
656 return desc;
659 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
660 : scale_factor(factor), subquery(subquery_)
662 if (rare(scale_factor < 0.0))
663 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
666 Query::op
667 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
669 return Query::OP_SCALE_WEIGHT;
672 size_t
673 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
675 return 1;
678 const Query
679 QueryScaleWeight::get_subquery(size_t) const
681 return subquery;
684 string
685 QueryScaleWeight::get_description() const
687 Assert(subquery.internal.get());
688 string desc = str(scale_factor);
689 desc += " * ";
690 desc += subquery.internal->get_description();
691 return desc;
694 PostingIterator::Internal *
695 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
697 LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
698 if (factor != 0.0)
699 qopt->inc_total_subqs();
700 RETURN(qopt->open_post_list(term, wqf, factor));
703 PostingIterator::Internal *
704 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
706 LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
707 Assert(source.get());
708 if (factor != 0.0)
709 qopt->inc_total_subqs();
710 Xapian::Database wrappeddb(new ConstDatabaseWrapper(&(qopt->db)));
711 RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
714 PostingIterator::Internal *
715 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
717 LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
718 RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
721 void
722 QueryTerm::gather_terms(void * void_terms) const
724 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
725 if (!term.empty()) {
726 vector<pair<Xapian::termpos, string> > &terms =
727 *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
728 terms.push_back(make_pair(pos, term));
732 PostingIterator::Internal *
733 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
735 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
736 if (factor != 0.0)
737 qopt->inc_total_subqs();
738 const Xapian::Database::Internal & db = qopt->db;
739 const string & lb = db.get_value_lower_bound(slot);
740 // If lb.empty(), the backend doesn't provide value bounds.
741 if (!lb.empty()) {
742 if (end < lb) {
743 RETURN(new EmptyPostList);
745 const string & ub = db.get_value_upper_bound(slot);
746 if (begin > ub) {
747 RETURN(new EmptyPostList);
749 if (end >= ub) {
750 // If begin <= lb too, then the range check isn't needed, but we do
751 // still need to consider which documents have a value set in this
752 // slot. If this value is set for all documents, we can replace it
753 // with the MatchAll postlist, which is especially efficient if
754 // there are no gaps in the docids.
755 if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
756 RETURN(db.open_post_list(string()));
758 RETURN(new ValueGePostList(&db, slot, begin));
761 RETURN(new ValueRangePostList(&db, slot, begin, end));
764 void
765 QueryValueRange::serialise(string & result) const
767 if (slot < 15) {
768 result += static_cast<char>(0x20 | slot);
769 } else {
770 result += static_cast<char>(0x20 | 15);
771 result += encode_length(slot - 15);
773 result += encode_length(begin.size());
774 result += begin;
775 result += encode_length(end.size());
776 result += end;
779 Query::op
780 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
782 return Query::OP_VALUE_RANGE;
785 string
786 QueryValueRange::get_description() const
788 string desc = "VALUE_RANGE ";
789 desc += str(slot);
790 desc += ' ';
791 description_append(desc, begin);
792 desc += ' ';
793 description_append(desc, end);
794 return desc;
797 PostingIterator::Internal *
798 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
800 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
801 if (factor != 0.0)
802 qopt->inc_total_subqs();
803 const Xapian::Database::Internal & db = qopt->db;
804 const string & lb = db.get_value_lower_bound(slot);
805 // If lb.empty(), the backend doesn't provide value bounds.
806 if (!lb.empty()) {
807 if (limit < lb) {
808 RETURN(new EmptyPostList);
810 if (limit >= db.get_value_upper_bound(slot)) {
811 // The range check isn't needed, but we do still need to consider
812 // which documents have a value set in this slot. If this value is
813 // set for all documents, we can replace it with the MatchAll
814 // postlist, which is especially efficient if there are no gaps in
815 // the docids.
816 if (db.get_value_freq(slot) == db.get_doccount()) {
817 RETURN(db.open_post_list(string()));
821 RETURN(new ValueRangePostList(&db, slot, string(), limit));
824 void
825 QueryValueLE::serialise(string & result) const
827 // Encode as a range with an empty start (which only takes a single byte to
828 // encode).
829 if (slot < 15) {
830 result += static_cast<char>(0x20 | slot);
831 } else {
832 result += static_cast<char>(0x20 | 15);
833 result += encode_length(slot - 15);
835 result += encode_length(0);
836 result += encode_length(limit.size());
837 result += limit;
840 Query::op
841 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
843 return Query::OP_VALUE_LE;
846 string
847 QueryValueLE::get_description() const
849 string desc = "VALUE_LE ";
850 desc += str(slot);
851 desc += ' ';
852 description_append(desc, limit);
853 return desc;
856 PostingIterator::Internal *
857 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
859 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
860 if (factor != 0.0)
861 qopt->inc_total_subqs();
862 const Xapian::Database::Internal & db = qopt->db;
863 const string & lb = db.get_value_lower_bound(slot);
864 // If lb.empty(), the backend doesn't provide value bounds.
865 if (!lb.empty()) {
866 if (limit > db.get_value_upper_bound(slot)) {
867 RETURN(new EmptyPostList);
869 if (limit < lb) {
870 // The range check isn't needed, but we do still need to consider
871 // which documents have a value set in this slot. If this value is
872 // set for all documents, we can replace it with the MatchAll
873 // postlist, which is especially efficient if there are no gaps in
874 // the docids.
875 if (db.get_value_freq(slot) == db.get_doccount()) {
876 RETURN(db.open_post_list(string()));
880 RETURN(new ValueGePostList(&db, slot, limit));
883 void
884 QueryValueGE::serialise(string & result) const
886 if (slot < 15) {
887 result += static_cast<char>(0x20 | 0x10 | slot);
888 } else {
889 result += static_cast<char>(0x20 | 0x10 | 15);
890 result += encode_length(slot - 15);
892 result += encode_length(limit.size());
893 result += limit;
896 Query::op
897 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
899 return Query::OP_VALUE_GE;
902 string
903 QueryValueGE::get_description() const
905 string desc = "VALUE_GE ";
906 desc += str(slot);
907 desc += ' ';
908 description_append(desc, limit);
909 return desc;
912 PostingIterator::Internal *
913 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
915 LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
916 Query::op op = combiner;
917 double or_factor = 0.0;
918 if (factor == 0.0) {
919 // If we have a factor of 0, we don't care about the weights, so
920 // we're just like a normal OR query.
921 op = Query::OP_OR;
922 } else if (op != Query::OP_SYNONYM) {
923 or_factor = factor;
925 OrContext ctx(0);
926 AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
927 Xapian::termcount expansions_left = max_expansion;
928 // If there's no expansion limit, set expansions_left to the maximum
929 // value Xapian::termcount can hold.
930 if (expansions_left == 0)
931 --expansions_left;
932 while (true) {
933 t->next();
934 if (t->at_end())
935 break;
936 if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
937 if (expansions_left-- == 0) {
938 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
939 break;
940 string msg("Wildcard ");
941 msg += pattern;
942 msg += "* expands to more than ";
943 msg += str(max_expansion);
944 msg += " terms";
945 throw Xapian::WildcardError(msg);
948 const string & term = t->get_termname();
949 ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
952 if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
953 // FIXME: open_lazy_post_list() results in the term getting registered
954 // for stats, so we still incur an avoidable cost from the full
955 // expansion size of the wildcard, which is most likely to be visible
956 // with the remote backend. Perhaps we should split creating the lazy
957 // postlist from registering the term for stats.
958 if (ctx.size() > max_expansion)
959 ctx.select_most_frequent(qopt, max_expansion);
962 if (factor != 0.0) {
963 if (op != Query::OP_SYNONYM) {
964 qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
965 } else {
966 qopt->inc_total_subqs();
970 if (ctx.empty())
971 RETURN(new EmptyPostList);
973 if (op == Query::OP_MAX)
974 RETURN(ctx.postlist_max(qopt));
976 PostList * pl = ctx.postlist(qopt);
977 if (op == Query::OP_OR)
978 RETURN(pl);
980 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
981 // SynonymPostList, which supplies the weights.
982 PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
983 RETURN(r);
986 termcount
987 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
989 // We currently assume wqf is 1 for calculating the synonym's weight
990 // since conceptually the synonym is one "virtual" term. If we were
991 // to combine multiple occurrences of the same synonym expansion into
992 // a single instance with wqf set, we would want to track the wqf.
993 return 1;
996 void
997 QueryWildcard::serialise(string & result) const
999 result += static_cast<char>(0x0b);
1000 result += encode_length(max_expansion);
1001 result += static_cast<unsigned char>(max_type);
1002 result += static_cast<unsigned char>(combiner);
1003 result += encode_length(pattern.size());
1004 result += pattern;
1007 Query::op
1008 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1010 return Query::OP_WILDCARD;
1013 string
1014 QueryWildcard::get_description() const
1016 string desc = "WILDCARD ";
1017 switch (combiner) {
1018 case Query::OP_SYNONYM:
1019 desc += "SYNONYM ";
1020 break;
1021 case Query::OP_MAX:
1022 desc += "MAX ";
1023 break;
1024 case Query::OP_OR:
1025 desc += "OR ";
1026 break;
1027 default:
1028 desc += "BAD ";
1029 break;
1031 description_append(desc, pattern);
1032 return desc;
1035 Xapian::termcount
1036 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1038 // Sum results from all subqueries.
1039 Xapian::termcount result = 0;
1040 QueryVector::const_iterator i;
1041 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1042 // MatchNothing subqueries should have been removed by done(), but we
1043 // can't use Assert in a XAPIAN_NOEXCEPT function. But we'll get a
1044 // segfault anyway.
1045 result += (*i).internal->get_length();
1047 return result;
1050 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1051 #define MISC(X) static_cast<unsigned char>(X)
1052 void
1053 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1055 static const unsigned char first_byte[] = {
1056 MULTIWAY(0), // OP_AND
1057 MULTIWAY(1), // OP_OR
1058 MULTIWAY(2), // OP_AND_NOT
1059 MULTIWAY(3), // OP_XOR
1060 MULTIWAY(4), // OP_AND_MAYBE
1061 MULTIWAY(5), // OP_FILTER
1062 MULTIWAY(14), // OP_NEAR
1063 MULTIWAY(15), // OP_PHRASE
1064 0, // OP_VALUE_RANGE
1065 MISC(3), // OP_SCALE_WEIGHT
1066 MULTIWAY(13), // OP_ELITE_SET
1067 0, // OP_VALUE_GE
1068 0, // OP_VALUE_LE
1069 MULTIWAY(6), // OP_SYNONYM
1070 MULTIWAY(7) // OP_MAX
1072 Xapian::Query::op op_ = get_op();
1073 AssertRel(size_t(op_),<,sizeof(first_byte));
1074 unsigned char ch = first_byte[op_];
1075 if (ch & 0x80) {
1076 // Multi-way operator.
1077 if (subqueries.size() < 8)
1078 ch |= subqueries.size();
1079 result += ch;
1080 if (subqueries.size() >= 8)
1081 result += encode_length(subqueries.size() - 8);
1082 if (ch >= MULTIWAY(13))
1083 result += encode_length(parameter);
1084 } else {
1085 result += ch;
1088 QueryVector::const_iterator i;
1089 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1090 // MatchNothing subqueries should have been removed by done().
1091 Assert((*i).internal.get());
1092 (*i).internal->serialise(result);
1095 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1096 // appended next by an overloaded serialise() method in the subclass.
1099 void
1100 QueryBranch::serialise(string & result) const
1102 QueryBranch::serialise_(result);
1105 void
1106 QueryNear::serialise(string & result) const
1108 // FIXME: window - subqueries.size() ?
1109 QueryBranch::serialise_(result, window);
1112 void
1113 QueryPhrase::serialise(string & result) const
1115 // FIXME: window - subqueries.size() ?
1116 QueryBranch::serialise_(result, window);
1119 void
1120 QueryEliteSet::serialise(string & result) const
1122 // FIXME: set_size - subqueries.size() ?
1123 QueryBranch::serialise_(result, set_size);
1126 void
1127 QueryBranch::gather_terms(void * void_terms) const
1129 // Gather results from all subqueries.
1130 QueryVector::const_iterator i;
1131 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1132 // MatchNothing subqueries should have been removed by done().
1133 Assert((*i).internal.get());
1134 (*i).internal->gather_terms(void_terms);
1138 void
1139 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1140 Xapian::termcount elite_set_size, size_t first) const
1142 LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1144 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1145 // for AND-like operations.
1147 // OP_SYNONYM with a single subquery is only simplified by
1148 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1149 Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1151 vector<PostList *> postlists;
1152 postlists.reserve(subqueries.size() - first);
1154 QueryVector::const_iterator q;
1155 for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1156 // MatchNothing subqueries should have been removed by done().
1157 Assert((*q).internal.get());
1158 (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1161 if (elite_set_size && elite_set_size < subqueries.size()) {
1162 ctx.select_elite_set(qopt, elite_set_size, subqueries.size());
1163 // FIXME: not right!
1167 PostList *
1168 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1170 LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1171 OrContext ctx(subqueries.size());
1172 do_or_like(ctx, qopt, 0.0);
1173 PostList * pl = ctx.postlist(qopt);
1174 if (factor == 0.0) {
1175 // If we have a factor of 0, we don't care about the weights, so
1176 // we're just like a normal OR query.
1177 return pl;
1180 // We currently assume wqf is 1 for calculating the synonym's weight
1181 // since conceptually the synonym is one "virtual" term. If we were
1182 // to combine multiple occurrences of the same synonym expansion into
1183 // a single instance with wqf set, we would want to track the wqf.
1185 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1186 // SynonymPostList, which supplies the weights.
1187 RETURN(qopt->make_synonym_postlist(pl, factor));
1190 PostList *
1191 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1193 LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1194 OrContext ctx(subqueries.size());
1195 do_or_like(ctx, qopt, factor);
1196 if (factor == 0.0) {
1197 // If we have a factor of 0, we don't care about the weights, so
1198 // we're just like a normal OR query.
1199 RETURN(ctx.postlist(qopt));
1202 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1203 // since conceptually the OP_MAX is one "virtual" term. If we were
1204 // to combine multiple occurrences of the same OP_MAX expansion into
1205 // a single instance with wqf set, we would want to track the wqf.
1206 RETURN(ctx.postlist_max(qopt));
1209 Xapian::Query::op
1210 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1212 return get_op();
1215 size_t
1216 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1218 return subqueries.size();
1221 const Query
1222 QueryBranch::get_subquery(size_t n) const
1224 return subqueries[n];
1227 const string
1228 QueryBranch::get_description_helper(const char * op,
1229 Xapian::termcount parameter) const
1231 string desc = "(";
1232 QueryVector::const_iterator i;
1233 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1234 if (desc.size() > 1) {
1235 desc += op;
1236 if (parameter) {
1237 desc += str(parameter);
1238 desc += ' ';
1241 Assert((*i).internal.get());
1242 // MatchNothing subqueries should have been removed by done(), and we
1243 // shouldn't get called before done() is, since that happens at the
1244 // end of the Xapian::Query constructor.
1245 desc += (*i).internal->get_description();
1247 desc += ')';
1248 return desc;
1251 Query::Internal *
1252 QueryWindowed::done()
1254 // If window size not specified, default it.
1255 if (window == 0)
1256 window = subqueries.size();
1257 return QueryAndLike::done();
1260 void
1261 QueryScaleWeight::gather_terms(void * void_terms) const
1263 subquery.internal->gather_terms(void_terms);
1266 void QueryTerm::serialise(string & result) const
1268 size_t len = term.size();
1269 if (len == 0) {
1270 if (wqf == 1 && pos == 0) {
1271 // Query::MatchAll
1272 result += '\x0f';
1273 } else {
1274 // Weird mutant versions of MatchAll
1275 result += '\x0e';
1276 result += encode_length(wqf);
1277 result += encode_length(pos);
1279 } else if (wqf == 1) {
1280 if (pos == 0) {
1281 // Single occurrence probabilistic term without position set.
1282 if (len >= 16) {
1283 result += static_cast<char>(0x40 | 0x10);
1284 result += encode_length(term.size() - 16);
1285 } else {
1286 result += static_cast<char>(0x40 | 0x10 | len);
1288 result += term;
1289 } else {
1290 // Single occurrence probabilistic term with position set.
1291 if (len >= 16) {
1292 result += static_cast<char>(0x40 | 0x20);
1293 result += encode_length(term.size() - 16);
1294 } else {
1295 result += static_cast<char>(0x40 | 0x20 | len);
1297 result += term;
1298 result += encode_length(pos);
1300 } else if (wqf > 1 || pos > 0) {
1301 // General case.
1302 if (len >= 16) {
1303 result += static_cast<char>(0x40 | 0x30);
1304 result += encode_length(term.size() - 16);
1305 } else if (len) {
1306 result += static_cast<char>(0x40 | 0x30 | len);
1308 result += term;
1309 result += encode_length(wqf);
1310 result += encode_length(pos);
1311 } else {
1312 // Typical boolean term.
1313 AssertEq(wqf, 0);
1314 AssertEq(pos, 0);
1315 if (len >= 16) {
1316 result += static_cast<char>(0x40);
1317 result += encode_length(term.size() - 16);
1318 } else {
1319 result += static_cast<char>(0x40 | len);
1321 result += term;
1325 void QueryPostingSource::serialise(string & result) const
1327 result += static_cast<char>(0x0c);
1329 const string & n = source->name();
1330 result += encode_length(n.size());
1331 result += n;
1333 const string & s = source->serialise();
1334 result += encode_length(s.size());
1335 result += s;
1338 void QueryScaleWeight::serialise(string & result) const
1340 Assert(subquery.internal.get());
1341 const string & s = serialise_double(scale_factor);
1342 result += '\x0d';
1343 result += s;
1344 subquery.internal->serialise(result);
1347 struct is_matchnothing {
1348 bool operator()(const Xapian::Query & q) {
1349 return q.internal.get() == NULL;
1353 void
1354 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1356 // If the AndLike is already MatchNothing, do nothing.
1357 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1358 return;
1359 // If we're adding MatchNothing, discard any previous subqueries.
1360 if (subquery.internal.get() == NULL)
1361 subqueries.clear();
1362 subqueries.push_back(subquery);
1365 Query::Internal *
1366 QueryAndLike::done()
1368 // Empty AndLike gives MatchNothing.
1369 if (subqueries.empty())
1370 return NULL;
1371 // We handle any subquery being MatchNothing in add_subquery() by leaving
1372 // a single MatchNothing subquery, and so this check results in AndLike
1373 // giving MatchNothing.
1374 if (subqueries.size() == 1)
1375 return subqueries[0].internal.get();
1376 return this;
1379 PostingIterator::Internal *
1380 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1382 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1383 AndContext ctx(subqueries.size());
1384 postlist_sub_and_like(ctx, qopt, factor);
1385 RETURN(ctx.postlist(qopt));
1388 void
1389 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1391 QueryVector::const_iterator i;
1392 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1393 // MatchNothing subqueries should have been removed by done().
1394 Assert((*i).internal.get());
1395 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1399 void
1400 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1402 // Drop any subqueries which are MatchNothing.
1403 if (subquery.internal.get() != NULL)
1404 subqueries.push_back(subquery);
1407 Query::Internal *
1408 QueryOrLike::done()
1410 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1411 // subqueries which are MatchNothing.
1412 if (subqueries.empty())
1413 return NULL;
1414 if (subqueries.size() == 1)
1415 return subqueries[0].internal.get();
1416 return this;
1419 void
1420 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1422 // If the left side of AndNot is already MatchNothing, do nothing.
1423 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1424 return;
1425 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1426 if (subquery.internal.get() != NULL || subqueries.empty())
1427 subqueries.push_back(subquery);
1430 Query::Internal *
1431 QueryAndNot::done()
1433 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1434 // that leaves just the left subquery, return that.
1436 // If left subquery is MatchNothing, then add_subquery() discards all right
1437 // subqueries, so this check also gives MatchNothing for this case.
1438 if (subqueries.size() == 1)
1439 return subqueries[0].internal.get();
1440 return this;
1443 void
1444 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1446 // If the left side of AndMaybe is already MatchNothing, do nothing.
1447 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1448 return;
1449 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1450 if (subquery.internal.get() != NULL || subqueries.empty())
1451 subqueries.push_back(subquery);
1454 Query::Internal *
1455 QueryAndMaybe::done()
1457 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1458 // that leaves just the left subquery, return that.
1460 // If left subquery is MatchNothing, then add_subquery() discards all right
1461 // subqueries, so this check also gives MatchNothing for this case.
1462 if (subqueries.size() == 1)
1463 return subqueries[0].internal.get();
1464 return this;
1467 PostingIterator::Internal *
1468 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1470 LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1471 OrContext ctx(subqueries.size());
1472 do_or_like(ctx, qopt, factor);
1473 RETURN(ctx.postlist(qopt));
1476 void
1477 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1479 do_or_like(ctx, qopt, factor);
1482 PostingIterator::Internal *
1483 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1485 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1486 // FIXME: Combine and-like side with and-like stuff above.
1487 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1488 OrContext ctx(subqueries.size() - 1);
1489 do_or_like(ctx, qopt, 0.0, 0, 1);
1490 AutoPtr<PostList> r(ctx.postlist(qopt));
1491 RETURN(new AndNotPostList(l.release(), r.release(),
1492 qopt->matcher, qopt->db_size));
1495 PostingIterator::Internal *
1496 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1498 LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1499 XorContext ctx(subqueries.size());
1500 postlist_sub_xor(ctx, qopt, factor);
1501 RETURN(ctx.postlist(qopt));
1504 void
1505 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1507 QueryVector::const_iterator i;
1508 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1509 // MatchNothing subqueries should have been removed by done().
1510 Assert((*i).internal.get());
1511 (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1515 PostingIterator::Internal *
1516 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1518 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1519 // FIXME: Combine and-like side with and-like stuff above.
1520 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1521 OrContext ctx(subqueries.size() - 1);
1522 do_or_like(ctx, qopt, factor, 0, 1);
1523 AutoPtr<PostList> r(ctx.postlist(qopt));
1524 RETURN(new AndMaybePostList(l.release(), r.release(),
1525 qopt->matcher, qopt->db_size));
1528 PostingIterator::Internal *
1529 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1531 LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1532 // FIXME: Combine and-like stuff, like QueryOptimiser.
1533 AssertEq(subqueries.size(), 2);
1534 PostList * pls[2];
1535 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1536 pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1537 pls[0] = l.release();
1538 RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1541 void
1542 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1544 QueryVector::const_iterator i;
1545 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1546 // MatchNothing subqueries should have been removed by done().
1547 Assert((*i).internal.get());
1548 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1549 // Second and subsequent subqueries are unweighted.
1550 factor = 0.0;
1554 void
1555 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1557 // FIXME: should has_positions() be on the combined DB (not this sub)?
1558 if (qopt->db.has_positions()) {
1559 bool old_need_positions = qopt->need_positions;
1560 qopt->need_positions = true;
1562 QueryVector::const_iterator i;
1563 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1564 // MatchNothing subqueries should have been removed by done().
1565 Assert((*i).internal.get());
1566 ctx.add_postlist((*i).internal->postlist(qopt, factor));
1568 // Record the positional filter to apply higher up the tree.
1569 ctx.add_pos_filter(op, subqueries.size(), window);
1571 qopt->need_positions = old_need_positions;
1572 } else {
1573 QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1577 void
1578 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1580 QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1583 void
1584 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1586 QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1589 PostingIterator::Internal *
1590 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1592 LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1593 OrContext ctx(subqueries.size());
1594 do_or_like(ctx, qopt, factor, set_size);
1595 RETURN(ctx.postlist(qopt));
1598 void
1599 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1601 do_or_like(ctx, qopt, factor, set_size);
1604 PostingIterator::Internal *
1605 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1607 LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1608 // Save and restore total_subqs so we only add one for the whole
1609 // OP_SYNONYM subquery (or none if we're not weighted).
1610 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1611 if (factor != 0.0)
1612 ++save_total_subqs;
1613 PostList * pl = do_synonym(qopt, factor);
1614 qopt->set_total_subqs(save_total_subqs);
1615 RETURN(pl);
1618 Query::Internal *
1619 QuerySynonym::done()
1621 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1622 // subqueries which are MatchNothing.
1623 if (subqueries.empty())
1624 return NULL;
1625 // Synonym of a single subquery should only be simplified if that subquery
1626 // is a term (or MatchAll), or if it's also OP_SYNONYM. Note that
1627 // MatchNothing subqueries are dropped, so we'd never get here with a
1628 // single MatchNothing subquery.
1629 if (subqueries.size() == 1) {
1630 Query::op sub_type = subqueries[0].get_type();
1631 if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1632 sub_type == Query::OP_SYNONYM) {
1633 return subqueries[0].internal.get();
1636 return this;
1639 PostingIterator::Internal *
1640 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1642 LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1643 // Save and restore total_subqs so we only add one for the whole
1644 // OP_MAX subquery (or none if we're not weighted).
1645 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1646 if (factor != 0.0)
1647 ++save_total_subqs;
1648 PostList * pl = do_max(qopt, factor);
1649 qopt->set_total_subqs(save_total_subqs);
1650 RETURN(pl);
1653 Xapian::Query::op
1654 QueryAnd::get_op() const
1656 return Xapian::Query::OP_AND;
1659 Xapian::Query::op
1660 QueryOr::get_op() const
1662 return Xapian::Query::OP_OR;
1665 Xapian::Query::op
1666 QueryAndNot::get_op() const
1668 return Xapian::Query::OP_AND_NOT;
1671 Xapian::Query::op
1672 QueryXor::get_op() const
1674 return Xapian::Query::OP_XOR;
1677 Xapian::Query::op
1678 QueryAndMaybe::get_op() const
1680 return Xapian::Query::OP_AND_MAYBE;
1683 Xapian::Query::op
1684 QueryFilter::get_op() const
1686 return Xapian::Query::OP_FILTER;
1689 Xapian::Query::op
1690 QueryNear::get_op() const
1692 return Xapian::Query::OP_NEAR;
1695 Xapian::Query::op
1696 QueryPhrase::get_op() const
1698 return Xapian::Query::OP_PHRASE;
1701 Xapian::Query::op
1702 QueryEliteSet::get_op() const
1704 return Xapian::Query::OP_ELITE_SET;
1707 Xapian::Query::op
1708 QuerySynonym::get_op() const
1710 return Xapian::Query::OP_SYNONYM;
1713 Xapian::Query::op
1714 QueryMax::get_op() const
1716 return Xapian::Query::OP_MAX;
1719 Xapian::Query::op
1720 QueryWildcard::get_op() const
1722 return Xapian::Query::OP_WILDCARD;
1725 string
1726 QueryAnd::get_description() const
1728 return get_description_helper(" AND ");
1731 string
1732 QueryOr::get_description() const
1734 return get_description_helper(" OR ");
1737 string
1738 QueryAndNot::get_description() const
1740 return get_description_helper(" AND_NOT ");
1743 string
1744 QueryXor::get_description() const
1746 return get_description_helper(" XOR ");
1749 string
1750 QueryAndMaybe::get_description() const
1752 return get_description_helper(" AND_MAYBE ");
1755 string
1756 QueryFilter::get_description() const
1758 return get_description_helper(" FILTER ");
1761 string
1762 QueryNear::get_description() const
1764 return get_description_helper(" NEAR ", window);
1767 string
1768 QueryPhrase::get_description() const
1770 return get_description_helper(" PHRASE ", window);
1773 string
1774 QueryEliteSet::get_description() const
1776 return get_description_helper(" ELITE_SET ", set_size);
1779 string
1780 QuerySynonym::get_description() const
1782 if (subqueries.size() == 1) {
1783 string d = "(SYNONYM ";
1784 d += subqueries[0].internal->get_description();
1785 d += ")";
1786 return d;
1788 return get_description_helper(" SYNONYM ");
1791 string
1792 QueryMax::get_description() const
1794 return get_description_helper(" MAX ");