Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / api / queryinternal.cc
blob4dd106c6fe389aa1f60aa1224eeca09718e01349
1 /** @file queryinternal.cc
2 * @brief Xapian::Query internals
3 */
4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "queryinternal.h"
26 #include "xapian/postingsource.h"
27 #include "xapian/query.h"
29 #include "matcher/const_database_wrapper.h"
30 #include "leafpostlist.h"
31 #include "matcher/andmaybepostlist.h"
32 #include "matcher/andnotpostlist.h"
33 #include "emptypostlist.h"
34 #include "matcher/exactphrasepostlist.h"
35 #include "matcher/externalpostlist.h"
36 #include "matcher/maxpostlist.h"
37 #include "matcher/multiandpostlist.h"
38 #include "matcher/multixorpostlist.h"
39 #include "matcher/nearpostlist.h"
40 #include "matcher/orpostlist.h"
41 #include "matcher/phrasepostlist.h"
42 #include "matcher/queryoptimiser.h"
43 #include "matcher/valuerangepostlist.h"
44 #include "matcher/valuegepostlist.h"
45 #include "net/length.h"
46 #include "serialise-double.h"
47 #include "termlist.h"
49 #include "autoptr.h"
50 #include "debuglog.h"
51 #include "omassert.h"
52 #include "str.h"
53 #include "unicode/description_append.h"
55 #include <algorithm>
56 #include <functional>
57 #include <list>
58 #include <string>
59 #include <vector>
61 using namespace std;
63 template<class CLASS> struct delete_ptr {
64 void operator()(CLASS *p) { delete p; }
67 using Xapian::Internal::AndContext;
68 using Xapian::Internal::OrContext;
69 using Xapian::Internal::XorContext;
71 namespace Xapian {
73 namespace Internal {
75 /** Class providing an operator which sorts postlists to select max or terms.
76 * This returns true if a has a (strictly) greater termweight than b,
77 * unless a or b contain no documents, in which case the other one is
78 * selected.
80 struct CmpMaxOrTerms {
81 /** Return true if and only if a has a strictly greater termweight than b;
82 * with the proviso that if the termfrequency of a or b is 0, then the
83 * termweight is considered to be 0.
85 * We use termfreq_max() because we really don't want to exclude a
86 * postlist which has a low but non-zero termfrequency: the estimate
87 * is quite likely to be zero in this case.
89 bool operator()(const PostList *a, const PostList *b) {
90 if (a->get_termfreq_max() == 0) return false;
91 if (b->get_termfreq_max() == 0) return true;
93 #if (defined(__i386__) && !defined(__SSE2_MATH__)) || defined(__mc68000__) || defined(__mc68010__) || defined(__mc68020__) || defined(__mc68030__)
94 // On some architectures, most common of which is x86, floating point
95 // values are calculated and stored in registers with excess precision.
96 // If the two get_maxweight() calls below return identical values in a
97 // register, the excess precision may be dropped for one of them but
98 // not the other (e.g. because the compiler saves the first calculated
99 // weight to memory while calculating the second, then reloads it to
100 // compare). This leads to both a > b and b > a being true, which
101 // violates the antisymmetry property of the strict weak ordering
102 // required by nth_element(). This can have serious consequences (e.g.
103 // segfaults).
105 // Note that m68k only has excess precision in earlier models - 68040
106 // and later are OK:
107 // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
109 // To avoid this, we store each result in a volatile double prior to
110 // comparing them. This means that the result of this test should
111 // match that on other architectures with the same double format (which
112 // is desirable), and actually has less overhead than rounding both
113 // results to float (which is another approach which works).
114 volatile double a_max_wt = a->get_maxweight();
115 volatile double b_max_wt = b->get_maxweight();
116 return a_max_wt > b_max_wt;
117 #else
118 return a->get_maxweight() > b->get_maxweight();
119 #endif
123 /// Comparison functor which orders PostList* by descending get_termfreq_est().
124 struct ComparePostListTermFreqAscending {
125 /// Order by descending get_termfreq_est().
126 bool operator()(const PostList *a, const PostList *b) {
127 return a->get_termfreq_est() > b->get_termfreq_est();
131 class Context {
132 protected:
133 vector<PostList*> pls;
135 public:
136 explicit Context(size_t reserve);
138 ~Context();
140 void add_postlist(PostList * pl) {
141 pls.push_back(pl);
144 bool empty() const {
145 return pls.empty();
148 size_t size() const {
149 return pls.size();
153 Context::Context(size_t reserve) {
154 pls.reserve(reserve);
157 Context::~Context()
159 for_each(pls.begin(), pls.end(), delete_ptr<PostList>());
162 class OrContext : public Context {
163 public:
164 explicit OrContext(size_t reserve) : Context(reserve) { }
166 /// Select the best set_size postlists from the last out_of added.
167 void select_elite_set(QueryOptimiser * qopt,
168 size_t set_size, size_t out_of);
170 /// Select the set_size postlists with the highest term frequency.
171 void select_most_frequent(QueryOptimiser * qopt, size_t set_size);
173 PostList * postlist(QueryOptimiser* qopt);
174 PostList * postlist_max(QueryOptimiser* qopt);
177 void
178 OrContext::select_elite_set(QueryOptimiser * qopt,
179 size_t set_size, size_t out_of)
181 // Call recalc_maxweight() as otherwise get_maxweight()
182 // may not be valid before next() or skip_to()
183 vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
184 for_each(begin, pls.end(), mem_fun(&PostList::recalc_maxweight));
186 nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
187 const PostList * hint_pl = qopt->get_hint_postlist();
188 for_each(begin + set_size, pls.end(),
189 [&qopt, &hint_pl](const PostList * p) {
190 if (rare(p == hint_pl)) {
191 qopt->take_hint_ownership();
192 hint_pl = NULL;
193 } else {
194 delete p;
197 pls.resize(pls.size() - out_of + set_size);
200 void
201 OrContext::select_most_frequent(QueryOptimiser * qopt, size_t set_size)
203 vector<PostList*>::iterator begin = pls.begin();
204 nth_element(begin, begin + set_size - 1, pls.end(),
205 ComparePostListTermFreqAscending());
206 const PostList * hint_pl = qopt->get_hint_postlist();
207 for_each(begin + set_size, pls.end(),
208 [&qopt, &hint_pl](const PostList * p) {
209 if (rare(p == hint_pl)) {
210 qopt->take_hint_ownership();
211 hint_pl = NULL;
212 } else {
213 delete p;
216 pls.resize(set_size);
219 PostList *
220 OrContext::postlist(QueryOptimiser* qopt)
222 Assert(!pls.empty());
224 if (pls.size() == 1) {
225 PostList * pl = pls[0];
226 pls.clear();
227 return pl;
230 // Make postlists into a heap so that the postlist with the greatest term
231 // frequency is at the top of the heap.
232 make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
234 // Now build a tree of binary OrPostList objects.
236 // The algorithm used to build the tree is like that used to build an
237 // optimal Huffman coding tree. If we called next() repeatedly, this
238 // arrangement would minimise the number of method calls. Generally we
239 // don't actually do that, but this arrangement is still likely to be a
240 // good one, and it does minimise the work in the worst case.
241 while (true) {
242 // We build the tree such that at each branch:
244 // l.get_termfreq_est() >= r.get_termfreq_est()
246 // We do this so that the OrPostList class can be optimised assuming
247 // that this is the case.
248 PostList * r = pls.front();
249 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
250 pls.pop_back();
251 PostList * pl;
252 pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
254 if (pls.size() == 1) {
255 pls.clear();
256 return pl;
259 pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
260 pls.back() = pl;
261 push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
265 PostList *
266 OrContext::postlist_max(QueryOptimiser* qopt)
268 Assert(!pls.empty());
270 if (pls.size() == 1) {
271 PostList * pl = pls[0];
272 pls.clear();
273 return pl;
276 // Sort the postlists so that the postlist with the greatest term frequency
277 // is first.
278 sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
280 PostList * pl;
281 pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
283 pls.clear();
284 return pl;
287 class XorContext : public Context {
288 public:
289 explicit XorContext(size_t reserve) : Context(reserve) { }
291 PostList * postlist(QueryOptimiser* qopt);
294 PostList *
295 XorContext::postlist(QueryOptimiser* qopt)
297 Xapian::doccount db_size = qopt->db_size;
298 PostList * pl;
299 pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
301 // Empty pls so our destructor doesn't delete them all!
302 pls.clear();
303 return pl;
306 class AndContext : public Context {
307 class PosFilter {
308 Xapian::Query::op op_;
310 /// Start and end indices for the PostLists this positional filter uses.
311 size_t begin, end;
313 Xapian::termcount window;
315 public:
316 PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
317 Xapian::termcount window_)
318 : op_(op__), begin(begin_), end(end_), window(window_) { }
320 PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
323 list<PosFilter> pos_filters;
325 public:
326 explicit AndContext(size_t reserve) : Context(reserve) { }
328 void add_pos_filter(Query::op op_,
329 size_t n_subqs,
330 Xapian::termcount window);
332 PostList * postlist(QueryOptimiser* qopt);
335 PostList *
336 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
337 try {
338 vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
339 vector<PostList *>::const_iterator terms_end = pls.begin() + end;
341 if (op_ == Xapian::Query::OP_NEAR) {
342 pl = new NearPostList(pl, window, terms_begin, terms_end);
343 } else if (window == end - begin) {
344 AssertEq(op_, Xapian::Query::OP_PHRASE);
345 pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
346 } else {
347 AssertEq(op_, Xapian::Query::OP_PHRASE);
348 pl = new PhrasePostList(pl, window, terms_begin, terms_end);
350 return pl;
351 } catch (...) {
352 delete pl;
353 throw;
356 void
357 AndContext::add_pos_filter(Query::op op_,
358 size_t n_subqs,
359 Xapian::termcount window)
361 Assert(n_subqs > 1);
362 size_t end = pls.size();
363 size_t begin = end - n_subqs;
364 pos_filters.push_back(PosFilter(op_, begin, end, window));
367 PostList *
368 AndContext::postlist(QueryOptimiser* qopt)
370 AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
371 qopt->matcher, qopt->db_size));
373 // Sort the positional filters to try to apply them in an efficient order.
374 // FIXME: We need to figure out what that is! Try applying lowest cf/tf
375 // first?
377 // Apply any positional filters.
378 list<PosFilter>::const_iterator i;
379 for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
380 const PosFilter & filter = *i;
381 pl.reset(filter.postlist(pl.release(), pls));
384 // Empty pls so our destructor doesn't delete them all!
385 pls.clear();
386 return pl.release();
391 Query::Internal::~Internal() { }
393 size_t
394 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
396 return 0;
399 const Query
400 Query::Internal::get_subquery(size_t) const
402 throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
405 void
406 Query::Internal::gather_terms(void *) const
410 Xapian::termcount
411 Query::Internal::get_length() const XAPIAN_NOEXCEPT
413 return 0;
416 Query::Internal *
417 Query::Internal::unserialise(const char ** p, const char * end,
418 const Registry & reg)
420 if (*p == end)
421 return NULL;
422 unsigned char ch = *(*p)++;
423 switch (ch >> 5) {
424 case 4: case 5: case 6: case 7: { // Multi-way branch
425 size_t n_subqs = ch & 0x07;
426 if (n_subqs == 0) {
427 decode_length(p, end, n_subqs);
428 n_subqs += 8;
430 unsigned char code = (ch >> 3) & 0x0f;
431 Xapian::termcount parameter = 0;
432 if (code >= 13)
433 decode_length(p, end, parameter);
434 Xapian::Internal::QueryBranch * result;
435 switch (code) {
436 case 0: // OP_AND
437 result = new Xapian::Internal::QueryAnd(n_subqs);
438 break;
439 case 1: // OP_OR
440 result = new Xapian::Internal::QueryOr(n_subqs);
441 break;
442 case 2: // OP_AND_NOT
443 result = new Xapian::Internal::QueryAndNot(n_subqs);
444 break;
445 case 3: // OP_XOR
446 result = new Xapian::Internal::QueryXor(n_subqs);
447 break;
448 case 4: // OP_AND_MAYBE
449 result = new Xapian::Internal::QueryAndMaybe(n_subqs);
450 break;
451 case 5: // OP_FILTER
452 result = new Xapian::Internal::QueryFilter(n_subqs);
453 break;
454 case 6: // OP_SYNONYM
455 result = new Xapian::Internal::QuerySynonym(n_subqs);
456 break;
457 case 7: // OP_MAX
458 result = new Xapian::Internal::QueryMax(n_subqs);
459 break;
460 case 13: // OP_ELITE_SET
461 result = new Xapian::Internal::QueryEliteSet(n_subqs,
462 parameter);
463 break;
464 case 14: // OP_NEAR
465 result = new Xapian::Internal::QueryNear(n_subqs,
466 parameter);
467 break;
468 case 15: // OP_PHRASE
469 result = new Xapian::Internal::QueryPhrase(n_subqs,
470 parameter);
471 break;
472 default:
473 // 8 to 12 are currently unused.
474 throw SerialisationError("Unknown multi-way branch Query operator");
476 do {
477 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
478 } while (--n_subqs);
479 result->done();
480 return result;
482 case 2: case 3: { // Term
483 size_t len = ch & 0x0f;
484 if (len == 0) {
485 decode_length(p, end, len);
486 len += 16;
488 if (size_t(end - *p) < len)
489 throw SerialisationError("Not enough data");
490 string term(*p, len);
491 *p += len;
493 int code = ((ch >> 4) & 0x03);
495 Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
496 if (code == 3)
497 decode_length(p, end, wqf);
499 Xapian::termpos pos = 0;
500 if (code >= 2)
501 decode_length(p, end, pos);
503 return new Xapian::Internal::QueryTerm(term, wqf, pos);
505 case 1: { // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
506 Xapian::valueno slot = ch & 15;
507 if (slot == 15) {
508 decode_length(p, end, slot);
509 slot += 15;
511 size_t len;
512 decode_length_and_check(p, end, len);
513 string begin(*p, len);
514 *p += len;
515 if (ch & 0x10) {
516 // OP_VALUE_GE
517 return new Xapian::Internal::QueryValueGE(slot, begin);
520 // OP_VALUE_RANGE
521 decode_length_and_check(p, end, len);
522 string end_(*p, len);
523 *p += len;
524 if (begin.empty()) // FIXME: is this right?
525 return new Xapian::Internal::QueryValueLE(slot, end_);
526 return new Xapian::Internal::QueryValueRange(slot, begin, end_);
528 case 0: {
529 switch (ch & 0x0f) {
530 case 0x0b: { // Wildcard
531 if (*p == end)
532 throw SerialisationError("not enough data");
533 Xapian::termcount max_expansion;
534 decode_length(p, end, max_expansion);
535 if (end - *p < 2)
536 throw SerialisationError("not enough data");
537 int max_type = static_cast<unsigned char>(*(*p)++);
538 op combiner = static_cast<op>(*(*p)++);
539 size_t len;
540 decode_length_and_check(p, end, len);
541 string pattern(*p, len);
542 *p += len;
543 return new Xapian::Internal::QueryWildcard(pattern,
544 max_expansion,
545 max_type,
546 combiner);
548 case 0x0c: { // PostingSource
549 size_t len;
550 decode_length_and_check(p, end, len);
551 string name(*p, len);
552 *p += len;
554 const PostingSource * reg_source = reg.get_posting_source(name);
555 if (!reg_source) {
556 string m = "PostingSource ";
557 m += name;
558 m += " not registered";
559 throw SerialisationError(m);
562 decode_length_and_check(p, end, len);
563 PostingSource * source =
564 reg_source->unserialise_with_registry(string(*p, len),
565 reg);
566 *p += len;
567 return new Xapian::Internal::QueryPostingSource(source, true);
569 case 0x0d: {
570 using Xapian::Internal::QueryScaleWeight;
571 double scale_factor = unserialise_double(p, end);
572 return new QueryScaleWeight(scale_factor,
573 Query(unserialise(p, end, reg)));
575 case 0x0e: {
576 Xapian::termcount wqf;
577 Xapian::termpos pos;
578 decode_length(p, end, wqf);
579 decode_length(p, end, pos);
580 return new Xapian::Internal::QueryTerm(string(), wqf, pos);
582 case 0x0f:
583 return Query::MatchAll.internal.get();
584 default: // Others currently unused.
585 break;
587 break;
590 string msg = "Unknown Query serialisation: ";
591 msg += str(ch);
592 throw SerialisationError(msg);
595 void
596 Query::Internal::postlist_sub_and_like(AndContext& ctx,
597 QueryOptimiser * qopt,
598 double factor) const
600 ctx.add_postlist(postlist(qopt, factor));
603 void
604 Query::Internal::postlist_sub_or_like(OrContext& ctx,
605 QueryOptimiser * qopt,
606 double factor) const
608 ctx.add_postlist(postlist(qopt, factor));
611 void
612 Query::Internal::postlist_sub_xor(XorContext& ctx,
613 QueryOptimiser * qopt,
614 double factor) const
616 ctx.add_postlist(postlist(qopt, factor));
619 namespace Internal {
621 Query::op
622 QueryTerm::get_type() const XAPIAN_NOEXCEPT
624 return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
627 string
628 QueryTerm::get_description() const
630 string desc;
631 if (term.empty()) {
632 desc = "<alldocuments>";
633 } else {
634 description_append(desc, term);
636 if (wqf != 1) {
637 desc += '#';
638 desc += str(wqf);
640 if (pos) {
641 desc += '@';
642 desc += str(pos);
644 return desc;
647 QueryPostingSource::QueryPostingSource(PostingSource * source_, bool owned_)
648 : source(source_), owned(owned_)
650 if (!source)
651 throw Xapian::InvalidArgumentError("source parameter can't be NULL");
652 if (!owned) {
653 PostingSource * cloned_source = source->clone();
654 if (cloned_source) {
655 source = cloned_source;
656 owned = true;
661 QueryPostingSource::~QueryPostingSource()
663 if (owned)
664 delete source;
667 Query::op
668 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
670 return Query::LEAF_POSTING_SOURCE;
673 string
674 QueryPostingSource::get_description() const
676 string desc = "PostingSource(";
677 desc += source->get_description();
678 desc += ')';
679 return desc;
682 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
683 : scale_factor(factor), subquery(subquery_)
685 if (rare(scale_factor < 0.0))
686 throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
689 Query::op
690 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
692 return Query::OP_SCALE_WEIGHT;
695 size_t
696 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
698 return 1;
701 const Query
702 QueryScaleWeight::get_subquery(size_t) const
704 return subquery;
707 string
708 QueryScaleWeight::get_description() const
710 Assert(subquery.internal.get());
711 string desc = str(scale_factor);
712 desc += " * ";
713 desc += subquery.internal->get_description();
714 return desc;
717 PostingIterator::Internal *
718 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
720 LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
721 if (factor != 0.0)
722 qopt->inc_total_subqs();
723 RETURN(qopt->open_post_list(term, wqf, factor));
726 PostingIterator::Internal *
727 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
729 LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
730 Assert(source);
731 if (factor != 0.0)
732 qopt->inc_total_subqs();
733 Xapian::Database wrappeddb(new ConstDatabaseWrapper(&(qopt->db)));
734 RETURN(new ExternalPostList(wrappeddb, source, factor, qopt->matcher));
737 PostingIterator::Internal *
738 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
740 LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
741 RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
744 void
745 QueryTerm::gather_terms(void * void_terms) const
747 // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
748 if (!term.empty()) {
749 vector<pair<Xapian::termpos, string> > &terms =
750 *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
751 terms.push_back(make_pair(pos, term));
755 PostingIterator::Internal *
756 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
758 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
759 if (factor != 0.0)
760 qopt->inc_total_subqs();
761 const Xapian::Database::Internal & db = qopt->db;
762 const string & lb = db.get_value_lower_bound(slot);
763 // If lb.empty(), the backend doesn't provide value bounds.
764 if (!lb.empty()) {
765 if (end < lb) {
766 RETURN(new EmptyPostList);
768 const string & ub = db.get_value_upper_bound(slot);
769 if (begin > ub) {
770 RETURN(new EmptyPostList);
772 if (end >= ub) {
773 // If begin <= lb too, then the range check isn't needed, but we do
774 // still need to consider which documents have a value set in this
775 // slot. If this value is set for all documents, we can replace it
776 // with the MatchAll postlist, which is especially efficient if
777 // there are no gaps in the docids.
778 if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
779 RETURN(db.open_post_list(string()));
781 RETURN(new ValueGePostList(&db, slot, begin));
784 RETURN(new ValueRangePostList(&db, slot, begin, end));
787 void
788 QueryValueRange::serialise(string & result) const
790 if (slot < 15) {
791 result += static_cast<char>(0x20 | slot);
792 } else {
793 result += static_cast<char>(0x20 | 15);
794 result += encode_length(slot - 15);
796 result += encode_length(begin.size());
797 result += begin;
798 result += encode_length(end.size());
799 result += end;
802 Query::op
803 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
805 return Query::OP_VALUE_RANGE;
808 string
809 QueryValueRange::get_description() const
811 string desc = "VALUE_RANGE ";
812 desc += str(slot);
813 desc += ' ';
814 description_append(desc, begin);
815 desc += ' ';
816 description_append(desc, end);
817 return desc;
820 PostingIterator::Internal *
821 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
823 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
824 if (factor != 0.0)
825 qopt->inc_total_subqs();
826 const Xapian::Database::Internal & db = qopt->db;
827 const string & lb = db.get_value_lower_bound(slot);
828 // If lb.empty(), the backend doesn't provide value bounds.
829 if (!lb.empty()) {
830 if (limit < lb) {
831 RETURN(new EmptyPostList);
833 if (limit >= db.get_value_upper_bound(slot)) {
834 // The range check isn't needed, but we do still need to consider
835 // which documents have a value set in this slot. If this value is
836 // set for all documents, we can replace it with the MatchAll
837 // postlist, which is especially efficient if there are no gaps in
838 // the docids.
839 if (db.get_value_freq(slot) == db.get_doccount()) {
840 RETURN(db.open_post_list(string()));
844 RETURN(new ValueRangePostList(&db, slot, string(), limit));
847 void
848 QueryValueLE::serialise(string & result) const
850 // Encode as a range with an empty start (which only takes a single byte to
851 // encode).
852 if (slot < 15) {
853 result += static_cast<char>(0x20 | slot);
854 } else {
855 result += static_cast<char>(0x20 | 15);
856 result += encode_length(slot - 15);
858 result += encode_length(0);
859 result += encode_length(limit.size());
860 result += limit;
863 Query::op
864 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
866 return Query::OP_VALUE_LE;
869 string
870 QueryValueLE::get_description() const
872 string desc = "VALUE_LE ";
873 desc += str(slot);
874 desc += ' ';
875 description_append(desc, limit);
876 return desc;
879 PostingIterator::Internal *
880 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
882 LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
883 if (factor != 0.0)
884 qopt->inc_total_subqs();
885 const Xapian::Database::Internal & db = qopt->db;
886 const string & lb = db.get_value_lower_bound(slot);
887 // If lb.empty(), the backend doesn't provide value bounds.
888 if (!lb.empty()) {
889 if (limit > db.get_value_upper_bound(slot)) {
890 RETURN(new EmptyPostList);
892 if (limit < lb) {
893 // The range check isn't needed, but we do still need to consider
894 // which documents have a value set in this slot. If this value is
895 // set for all documents, we can replace it with the MatchAll
896 // postlist, which is especially efficient if there are no gaps in
897 // the docids.
898 if (db.get_value_freq(slot) == db.get_doccount()) {
899 RETURN(db.open_post_list(string()));
903 RETURN(new ValueGePostList(&db, slot, limit));
906 void
907 QueryValueGE::serialise(string & result) const
909 if (slot < 15) {
910 result += static_cast<char>(0x20 | 0x10 | slot);
911 } else {
912 result += static_cast<char>(0x20 | 0x10 | 15);
913 result += encode_length(slot - 15);
915 result += encode_length(limit.size());
916 result += limit;
919 Query::op
920 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
922 return Query::OP_VALUE_GE;
925 string
926 QueryValueGE::get_description() const
928 string desc = "VALUE_GE ";
929 desc += str(slot);
930 desc += ' ';
931 description_append(desc, limit);
932 return desc;
935 PostingIterator::Internal *
936 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
938 LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
939 Query::op op = combiner;
940 double or_factor = 0.0;
941 if (factor == 0.0) {
942 // If we have a factor of 0, we don't care about the weights, so
943 // we're just like a normal OR query.
944 op = Query::OP_OR;
945 } else if (op != Query::OP_SYNONYM) {
946 or_factor = factor;
948 OrContext ctx(0);
949 AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
950 Xapian::termcount expansions_left = max_expansion;
951 // If there's no expansion limit, set expansions_left to the maximum
952 // value Xapian::termcount can hold.
953 if (expansions_left == 0)
954 --expansions_left;
955 while (true) {
956 t->next();
957 if (t->at_end())
958 break;
959 if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
960 if (expansions_left-- == 0) {
961 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
962 break;
963 string msg("Wildcard ");
964 msg += pattern;
965 msg += "* expands to more than ";
966 msg += str(max_expansion);
967 msg += " terms";
968 throw Xapian::WildcardError(msg);
971 const string & term = t->get_termname();
972 ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
975 if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
976 // FIXME: open_lazy_post_list() results in the term getting registered
977 // for stats, so we still incur an avoidable cost from the full
978 // expansion size of the wildcard, which is most likely to be visible
979 // with the remote backend. Perhaps we should split creating the lazy
980 // postlist from registering the term for stats.
981 if (ctx.size() > max_expansion)
982 ctx.select_most_frequent(qopt, max_expansion);
985 if (factor != 0.0) {
986 if (op != Query::OP_SYNONYM) {
987 qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
988 } else {
989 qopt->inc_total_subqs();
993 if (ctx.empty())
994 RETURN(new EmptyPostList);
996 if (op == Query::OP_MAX)
997 RETURN(ctx.postlist_max(qopt));
999 PostList * pl = ctx.postlist(qopt);
1000 if (op == Query::OP_OR)
1001 RETURN(pl);
1003 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1004 // SynonymPostList, which supplies the weights.
1005 PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
1006 RETURN(r);
1009 termcount
1010 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1012 // We currently assume wqf is 1 for calculating the synonym's weight
1013 // since conceptually the synonym is one "virtual" term. If we were
1014 // to combine multiple occurrences of the same synonym expansion into
1015 // a single instance with wqf set, we would want to track the wqf.
1016 return 1;
1019 void
1020 QueryWildcard::serialise(string & result) const
1022 result += static_cast<char>(0x0b);
1023 result += encode_length(max_expansion);
1024 result += static_cast<unsigned char>(max_type);
1025 result += static_cast<unsigned char>(combiner);
1026 result += encode_length(pattern.size());
1027 result += pattern;
1030 Query::op
1031 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1033 return Query::OP_WILDCARD;
1036 string
1037 QueryWildcard::get_description() const
1039 string desc = "WILDCARD ";
1040 switch (combiner) {
1041 case Query::OP_SYNONYM:
1042 desc += "SYNONYM ";
1043 break;
1044 case Query::OP_MAX:
1045 desc += "MAX ";
1046 break;
1047 case Query::OP_OR:
1048 desc += "OR ";
1049 break;
1050 default:
1051 desc += "BAD ";
1052 break;
1054 description_append(desc, pattern);
1055 return desc;
1058 Xapian::termcount
1059 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1061 // Sum results from all subqueries.
1062 Xapian::termcount result = 0;
1063 QueryVector::const_iterator i;
1064 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1065 // MatchNothing subqueries should have been removed by done().
1066 Assert((*i).internal.get());
1067 result += (*i).internal->get_length();
1069 return result;
1072 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1073 #define MISC(X) static_cast<unsigned char>(X)
1074 void
1075 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1077 static const unsigned char first_byte[] = {
1078 MULTIWAY(0), // OP_AND
1079 MULTIWAY(1), // OP_OR
1080 MULTIWAY(2), // OP_AND_NOT
1081 MULTIWAY(3), // OP_XOR
1082 MULTIWAY(4), // OP_AND_MAYBE
1083 MULTIWAY(5), // OP_FILTER
1084 MULTIWAY(14), // OP_NEAR
1085 MULTIWAY(15), // OP_PHRASE
1086 0, // OP_VALUE_RANGE
1087 MISC(3), // OP_SCALE_WEIGHT
1088 MULTIWAY(13), // OP_ELITE_SET
1089 0, // OP_VALUE_GE
1090 0, // OP_VALUE_LE
1091 MULTIWAY(6), // OP_SYNONYM
1092 MULTIWAY(7) // OP_MAX
1094 Xapian::Query::op op_ = get_op();
1095 AssertRel(size_t(op_),<,sizeof(first_byte));
1096 unsigned char ch = first_byte[op_];
1097 if (ch & 0x80) {
1098 // Multi-way operator.
1099 if (subqueries.size() < 8)
1100 ch |= subqueries.size();
1101 result += ch;
1102 if (subqueries.size() >= 8)
1103 result += encode_length(subqueries.size() - 8);
1104 if (ch >= MULTIWAY(13))
1105 result += encode_length(parameter);
1106 } else {
1107 result += ch;
1110 QueryVector::const_iterator i;
1111 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1112 // MatchNothing subqueries should have been removed by done().
1113 Assert((*i).internal.get());
1114 (*i).internal->serialise(result);
1117 // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1118 // appended next by an overloaded serialise() method in the subclass.
1121 void
1122 QueryBranch::serialise(string & result) const
1124 QueryBranch::serialise_(result);
1127 void
1128 QueryNear::serialise(string & result) const
1130 // FIXME: window - subqueries.size() ?
1131 QueryBranch::serialise_(result, window);
1134 void
1135 QueryPhrase::serialise(string & result) const
1137 // FIXME: window - subqueries.size() ?
1138 QueryBranch::serialise_(result, window);
1141 void
1142 QueryEliteSet::serialise(string & result) const
1144 // FIXME: set_size - subqueries.size() ?
1145 QueryBranch::serialise_(result, set_size);
1148 void
1149 QueryBranch::gather_terms(void * void_terms) const
1151 // Gather results from all subqueries.
1152 QueryVector::const_iterator i;
1153 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1154 // MatchNothing subqueries should have been removed by done().
1155 Assert((*i).internal.get());
1156 (*i).internal->gather_terms(void_terms);
1160 void
1161 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1162 Xapian::termcount elite_set_size, size_t first) const
1164 LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1166 // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1167 // for AND-like operations.
1169 // OP_SYNONYM with a single subquery is only simplified by
1170 // QuerySynonym::done() if the single subquery is a term or MatchAll.
1171 Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1173 vector<PostList *> postlists;
1174 postlists.reserve(subqueries.size() - first);
1176 QueryVector::const_iterator q;
1177 for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1178 // MatchNothing subqueries should have been removed by done().
1179 Assert((*q).internal.get());
1180 (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1183 if (elite_set_size && elite_set_size < subqueries.size()) {
1184 ctx.select_elite_set(qopt, elite_set_size, subqueries.size());
1185 // FIXME: not right!
1189 PostList *
1190 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1192 LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1193 OrContext ctx(subqueries.size());
1194 do_or_like(ctx, qopt, 0.0);
1195 PostList * pl = ctx.postlist(qopt);
1196 if (factor == 0.0) {
1197 // If we have a factor of 0, we don't care about the weights, so
1198 // we're just like a normal OR query.
1199 return pl;
1202 // We currently assume wqf is 1 for calculating the synonym's weight
1203 // since conceptually the synonym is one "virtual" term. If we were
1204 // to combine multiple occurrences of the same synonym expansion into
1205 // a single instance with wqf set, we would want to track the wqf.
1207 // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1208 // SynonymPostList, which supplies the weights.
1209 RETURN(qopt->make_synonym_postlist(pl, factor));
1212 PostList *
1213 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1215 LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1216 OrContext ctx(subqueries.size());
1217 do_or_like(ctx, qopt, factor);
1218 if (factor == 0.0) {
1219 // If we have a factor of 0, we don't care about the weights, so
1220 // we're just like a normal OR query.
1221 RETURN(ctx.postlist(qopt));
1224 // We currently assume wqf is 1 for calculating the OP_MAX's weight
1225 // since conceptually the OP_MAX is one "virtual" term. If we were
1226 // to combine multiple occurrences of the same OP_MAX expansion into
1227 // a single instance with wqf set, we would want to track the wqf.
1228 RETURN(ctx.postlist_max(qopt));
1231 Xapian::Query::op
1232 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1234 return get_op();
1237 size_t
1238 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1240 return subqueries.size();
1243 const Query
1244 QueryBranch::get_subquery(size_t n) const
1246 return subqueries[n];
1249 const string
1250 QueryBranch::get_description_helper(const char * op,
1251 Xapian::termcount parameter) const
1253 string desc = "(";
1254 QueryVector::const_iterator i;
1255 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1256 if (desc.size() > 1) {
1257 desc += op;
1258 if (parameter) {
1259 desc += str(parameter);
1260 desc += ' ';
1263 Assert((*i).internal.get());
1264 // MatchNothing subqueries should have been removed by done(), and we
1265 // shouldn't get called before done() is, since that happens at the
1266 // end of the Xapian::Query constructor.
1267 desc += (*i).internal->get_description();
1269 desc += ')';
1270 return desc;
1273 Query::Internal *
1274 QueryWindowed::done()
1276 // If window size not specified, default it.
1277 if (window == 0)
1278 window = subqueries.size();
1279 return QueryAndLike::done();
1282 void
1283 QueryScaleWeight::gather_terms(void * void_terms) const
1285 subquery.internal->gather_terms(void_terms);
1288 void QueryTerm::serialise(string & result) const
1290 size_t len = term.size();
1291 if (len == 0) {
1292 if (wqf == 1 && pos == 0) {
1293 // Query::MatchAll
1294 result += '\x0f';
1295 } else {
1296 // Weird mutant versions of MatchAll
1297 result += '\x0e';
1298 result += encode_length(wqf);
1299 result += encode_length(pos);
1301 } else if (wqf == 1) {
1302 if (pos == 0) {
1303 // Single occurrence probabilistic term without position set.
1304 if (len >= 16) {
1305 result += static_cast<char>(0x40 | 0x10);
1306 result += encode_length(term.size() - 16);
1307 } else {
1308 result += static_cast<char>(0x40 | 0x10 | len);
1310 result += term;
1311 } else {
1312 // Single occurrence probabilistic term with position set.
1313 if (len >= 16) {
1314 result += static_cast<char>(0x40 | 0x20);
1315 result += encode_length(term.size() - 16);
1316 } else {
1317 result += static_cast<char>(0x40 | 0x20 | len);
1319 result += term;
1320 result += encode_length(pos);
1322 } else if (wqf > 1 || pos > 0) {
1323 // General case.
1324 if (len >= 16) {
1325 result += static_cast<char>(0x40 | 0x30);
1326 result += encode_length(term.size() - 16);
1327 } else if (len) {
1328 result += static_cast<char>(0x40 | 0x30 | len);
1330 result += term;
1331 result += encode_length(wqf);
1332 result += encode_length(pos);
1333 } else {
1334 // Typical boolean term.
1335 AssertEq(wqf, 0);
1336 AssertEq(pos, 0);
1337 if (len >= 16) {
1338 result += static_cast<char>(0x40);
1339 result += encode_length(term.size() - 16);
1340 } else {
1341 result += static_cast<char>(0x40 | len);
1343 result += term;
1347 void QueryPostingSource::serialise(string & result) const
1349 result += static_cast<char>(0x0c);
1351 const string & n = source->name();
1352 result += encode_length(n.size());
1353 result += n;
1355 const string & s = source->serialise();
1356 result += encode_length(s.size());
1357 result += s;
1360 void QueryScaleWeight::serialise(string & result) const
1362 Assert(subquery.internal.get());
1363 const string & s = serialise_double(scale_factor);
1364 result += '\x0d';
1365 result += s;
1366 subquery.internal->serialise(result);
1369 struct is_matchnothing {
1370 bool operator()(const Xapian::Query & q) {
1371 return q.internal.get() == NULL;
1375 void
1376 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1378 // If the AndLike is already MatchNothing, do nothing.
1379 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1380 return;
1381 // If we're adding MatchNothing, discard any previous subqueries.
1382 if (subquery.internal.get() == NULL)
1383 subqueries.clear();
1384 subqueries.push_back(subquery);
1387 Query::Internal *
1388 QueryAndLike::done()
1390 // Empty AndLike gives MatchNothing.
1391 if (subqueries.empty())
1392 return NULL;
1393 // We handle any subquery being MatchNothing in add_subquery() by leaving
1394 // a single MatchNothing subquery, and so this check results in AndLike
1395 // giving MatchNothing.
1396 if (subqueries.size() == 1)
1397 return subqueries[0].internal.get();
1398 return this;
1401 PostingIterator::Internal *
1402 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1404 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1405 AndContext ctx(subqueries.size());
1406 postlist_sub_and_like(ctx, qopt, factor);
1407 RETURN(ctx.postlist(qopt));
1410 void
1411 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1413 QueryVector::const_iterator i;
1414 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1415 // MatchNothing subqueries should have been removed by done().
1416 Assert((*i).internal.get());
1417 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1421 void
1422 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1424 // Drop any subqueries which are MatchNothing.
1425 if (subquery.internal.get() != NULL)
1426 subqueries.push_back(subquery);
1429 Query::Internal *
1430 QueryOrLike::done()
1432 // An empty OrLike gives MatchNothing. Note that add_subquery() drops any
1433 // subqueries which are MatchNothing.
1434 if (subqueries.empty())
1435 return NULL;
1436 if (subqueries.size() == 1)
1437 return subqueries[0].internal.get();
1438 return this;
1441 void
1442 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1444 // If the left side of AndNot is already MatchNothing, do nothing.
1445 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1446 return;
1447 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1448 if (subquery.internal.get() != NULL || subqueries.empty())
1449 subqueries.push_back(subquery);
1452 Query::Internal *
1453 QueryAndNot::done()
1455 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1456 // that leaves just the left subquery, return that.
1458 // If left subquery is MatchNothing, then add_subquery() discards all right
1459 // subqueries, so this check also gives MatchNothing for this case.
1460 if (subqueries.size() == 1)
1461 return subqueries[0].internal.get();
1462 return this;
1465 void
1466 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1468 // If the left side of AndMaybe is already MatchNothing, do nothing.
1469 if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1470 return;
1471 // Drop any 2nd or subsequent subqueries which are MatchNothing.
1472 if (subquery.internal.get() != NULL || subqueries.empty())
1473 subqueries.push_back(subquery);
1476 Query::Internal *
1477 QueryAndMaybe::done()
1479 // Any MatchNothing right subqueries get discarded by add_subquery() - if
1480 // that leaves just the left subquery, return that.
1482 // If left subquery is MatchNothing, then add_subquery() discards all right
1483 // subqueries, so this check also gives MatchNothing for this case.
1484 if (subqueries.size() == 1)
1485 return subqueries[0].internal.get();
1486 return this;
1489 PostingIterator::Internal *
1490 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1492 LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1493 OrContext ctx(subqueries.size());
1494 do_or_like(ctx, qopt, factor);
1495 RETURN(ctx.postlist(qopt));
1498 void
1499 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1501 do_or_like(ctx, qopt, factor);
1504 PostingIterator::Internal *
1505 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1507 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1508 // FIXME: Combine and-like side with and-like stuff above.
1509 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1510 OrContext ctx(subqueries.size() - 1);
1511 do_or_like(ctx, qopt, 0.0, 0, 1);
1512 AutoPtr<PostList> r(ctx.postlist(qopt));
1513 RETURN(new AndNotPostList(l.release(), r.release(),
1514 qopt->matcher, qopt->db_size));
1517 PostingIterator::Internal *
1518 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1520 LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1521 XorContext ctx(subqueries.size());
1522 postlist_sub_xor(ctx, qopt, factor);
1523 RETURN(ctx.postlist(qopt));
1526 void
1527 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1529 QueryVector::const_iterator i;
1530 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1531 // MatchNothing subqueries should have been removed by done().
1532 Assert((*i).internal.get());
1533 (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1537 PostingIterator::Internal *
1538 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1540 LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1541 // FIXME: Combine and-like side with and-like stuff above.
1542 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1543 OrContext ctx(subqueries.size() - 1);
1544 do_or_like(ctx, qopt, factor, 0, 1);
1545 AutoPtr<PostList> r(ctx.postlist(qopt));
1546 RETURN(new AndMaybePostList(l.release(), r.release(),
1547 qopt->matcher, qopt->db_size));
1550 PostingIterator::Internal *
1551 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1553 LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1554 // FIXME: Combine and-like stuff, like QueryOptimiser.
1555 AssertEq(subqueries.size(), 2);
1556 PostList * pls[2];
1557 AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1558 pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1559 pls[0] = l.release();
1560 RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1563 void
1564 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1566 QueryVector::const_iterator i;
1567 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1568 // MatchNothing subqueries should have been removed by done().
1569 Assert((*i).internal.get());
1570 (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1571 // Second and subsequent subqueries are unweighted.
1572 factor = 0.0;
1576 void
1577 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1579 // FIXME: should has_positions() be on the combined DB (not this sub)?
1580 if (qopt->db.has_positions()) {
1581 bool old_need_positions = qopt->need_positions;
1582 qopt->need_positions = true;
1584 QueryVector::const_iterator i;
1585 for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1586 // MatchNothing subqueries should have been removed by done().
1587 Assert((*i).internal.get());
1588 ctx.add_postlist((*i).internal->postlist(qopt, factor));
1590 // Record the positional filter to apply higher up the tree.
1591 ctx.add_pos_filter(op, subqueries.size(), window);
1593 qopt->need_positions = old_need_positions;
1594 } else {
1595 QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1599 void
1600 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1602 QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1605 void
1606 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1608 QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1611 PostingIterator::Internal *
1612 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1614 LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1615 OrContext ctx(subqueries.size());
1616 do_or_like(ctx, qopt, factor, set_size);
1617 RETURN(ctx.postlist(qopt));
1620 void
1621 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1623 do_or_like(ctx, qopt, factor, set_size);
1626 PostingIterator::Internal *
1627 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1629 LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1630 // Save and restore total_subqs so we only add one for the whole
1631 // OP_SYNONYM subquery (or none if we're not weighted).
1632 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1633 if (factor != 0.0)
1634 ++save_total_subqs;
1635 PostList * pl = do_synonym(qopt, factor);
1636 qopt->set_total_subqs(save_total_subqs);
1637 RETURN(pl);
1640 Query::Internal *
1641 QuerySynonym::done()
1643 // An empty Synonym gives MatchNothing. Note that add_subquery() drops any
1644 // subqueries which are MatchNothing.
1645 if (subqueries.empty())
1646 return NULL;
1647 // Synonym of a single subquery should only be simplified if that subquery
1648 // is a term (or MatchAll), or if it's also OP_SYNONYM. Note that
1649 // MatchNothing subqueries are dropped, so we'd never get here with a
1650 // single MatchNothing subquery.
1651 if (subqueries.size() == 1) {
1652 Query::op sub_type = subqueries[0].get_type();
1653 if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1654 sub_type == Query::OP_SYNONYM) {
1655 return subqueries[0].internal.get();
1658 return this;
1661 PostingIterator::Internal *
1662 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1664 LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1665 // Save and restore total_subqs so we only add one for the whole
1666 // OP_MAX subquery (or none if we're not weighted).
1667 Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1668 if (factor != 0.0)
1669 ++save_total_subqs;
1670 PostList * pl = do_max(qopt, factor);
1671 qopt->set_total_subqs(save_total_subqs);
1672 RETURN(pl);
1675 Xapian::Query::op
1676 QueryAnd::get_op() const
1678 return Xapian::Query::OP_AND;
1681 Xapian::Query::op
1682 QueryOr::get_op() const
1684 return Xapian::Query::OP_OR;
1687 Xapian::Query::op
1688 QueryAndNot::get_op() const
1690 return Xapian::Query::OP_AND_NOT;
1693 Xapian::Query::op
1694 QueryXor::get_op() const
1696 return Xapian::Query::OP_XOR;
1699 Xapian::Query::op
1700 QueryAndMaybe::get_op() const
1702 return Xapian::Query::OP_AND_MAYBE;
1705 Xapian::Query::op
1706 QueryFilter::get_op() const
1708 return Xapian::Query::OP_FILTER;
1711 Xapian::Query::op
1712 QueryNear::get_op() const
1714 return Xapian::Query::OP_NEAR;
1717 Xapian::Query::op
1718 QueryPhrase::get_op() const
1720 return Xapian::Query::OP_PHRASE;
1723 Xapian::Query::op
1724 QueryEliteSet::get_op() const
1726 return Xapian::Query::OP_ELITE_SET;
1729 Xapian::Query::op
1730 QuerySynonym::get_op() const
1732 return Xapian::Query::OP_SYNONYM;
1735 Xapian::Query::op
1736 QueryMax::get_op() const
1738 return Xapian::Query::OP_MAX;
1741 Xapian::Query::op
1742 QueryWildcard::get_op() const
1744 return Xapian::Query::OP_WILDCARD;
1747 string
1748 QueryAnd::get_description() const
1750 return get_description_helper(" AND ");
1753 string
1754 QueryOr::get_description() const
1756 return get_description_helper(" OR ");
1759 string
1760 QueryAndNot::get_description() const
1762 return get_description_helper(" AND_NOT ");
1765 string
1766 QueryXor::get_description() const
1768 return get_description_helper(" XOR ");
1771 string
1772 QueryAndMaybe::get_description() const
1774 return get_description_helper(" AND_MAYBE ");
1777 string
1778 QueryFilter::get_description() const
1780 return get_description_helper(" FILTER ");
1783 string
1784 QueryNear::get_description() const
1786 return get_description_helper(" NEAR ", window);
1789 string
1790 QueryPhrase::get_description() const
1792 return get_description_helper(" PHRASE ", window);
1795 string
1796 QueryEliteSet::get_description() const
1798 return get_description_helper(" ELITE_SET ", set_size);
1801 string
1802 QuerySynonym::get_description() const
1804 if (subqueries.size() == 1) {
1805 string d = "(SYNONYM ";
1806 d += subqueries[0].internal->get_description();
1807 d += ")";
1808 return d;
1810 return get_description_helper(" SYNONYM ");
1813 string
1814 QueryMax::get_description() const
1816 return get_description_helper(" MAX ");