xapian-core/api/queryinternal.cc

   1 /** @file queryinternal.cc
   2  * @brief Xapian::Query internals
   3  */
   4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Olly Betts
   5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License as
   9  * published by the Free Software Foundation; either version 2 of the
  10  * License, or (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  20  */
  21
  22 #include <config.h>
  23
  24 #include "queryinternal.h"
  25
  26 #include "xapian/postingsource.h"
  27 #include "xapian/query.h"
  28
  29 #include "matcher/const_database_wrapper.h"
  30 #include "leafpostlist.h"
  31 #include "matcher/andmaybepostlist.h"
  32 #include "matcher/andnotpostlist.h"
  33 #include "emptypostlist.h"
  34 #include "matcher/exactphrasepostlist.h"
  35 #include "matcher/externalpostlist.h"
  36 #include "matcher/maxpostlist.h"
  37 #include "matcher/multiandpostlist.h"
  38 #include "matcher/multixorpostlist.h"
  39 #include "matcher/nearpostlist.h"
  40 #include "matcher/orpostlist.h"
  41 #include "matcher/phrasepostlist.h"
  42 #include "matcher/queryoptimiser.h"
  43 #include "matcher/valuerangepostlist.h"
  44 #include "matcher/valuegepostlist.h"
  45 #include "net/length.h"
  46 #include "serialise-double.h"
  47 #include "termlist.h"
  48
  49 #include "autoptr.h"
  50 #include "debuglog.h"
  51 #include "omassert.h"
  52 #include "str.h"
  53 #include "unicode/description_append.h"
  54
  55 #include <algorithm>
  56 #include <functional>
  57 #include <list>
  58 #include <string>
  59 #include <vector>
  60
  61 using namespace std;
  62
  63 template<class CLASS> struct delete_ptr {
  64     void operator()(CLASS *p) { delete p; }
  65 };
  66
  67 using Xapian::Internal::AndContext;
  68 using Xapian::Internal::OrContext;
  69 using Xapian::Internal::XorContext;
  70
  71 namespace Xapian {
  72
  73 namespace Internal {
  74
  75 /** Class providing an operator which sorts postlists to select max or terms.
  76  *  This returns true if a has a (strictly) greater termweight than b,
  77  *  unless a or b contain no documents, in which case the other one is
  78  *  selected.
  79  */
  80 struct CmpMaxOrTerms {
  81     /** Return true if and only if a has a strictly greater termweight than b;
  82      *  with the proviso that if the termfrequency of a or b is 0, then the
  83      *  termweight is considered to be 0.
  84      *
  85      *  We use termfreq_max() because we really don't want to exclude a
  86      *  postlist which has a low but non-zero termfrequency: the estimate
  87      *  is quite likely to be zero in this case.
  88      */
  89     bool operator()(const PostList *a, const PostList *b) {
  90         if (a->get_termfreq_max() == 0) return false;
  91         if (b->get_termfreq_max() == 0) return true;
  92
  93 #if (defined(__i386__) && !defined(__SSE2_MATH__)) || defined(__mc68000__) || defined(__mc68010__) || defined(__mc68020__) || defined(__mc68030__)
  94         // On some architectures, most common of which is x86, floating point
  95         // values are calculated and stored in registers with excess precision.
  96         // If the two get_maxweight() calls below return identical values in a
  97         // register, the excess precision may be dropped for one of them but
  98         // not the other (e.g. because the compiler saves the first calculated
  99         // weight to memory while calculating the second, then reloads it to
 100         // compare).  This leads to both a > b and b > a being true, which
 101         // violates the antisymmetry property of the strict weak ordering
 102         // required by nth_element().  This can have serious consequences (e.g.
 103         // segfaults).
 104         //
 105         // Note that m68k only has excess precision in earlier models - 68040
 106         // and later are OK:
 107         // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
 108         //
 109         // To avoid this, we store each result in a volatile double prior to
 110         // comparing them.  This means that the result of this test should
 111         // match that on other architectures with the same double format (which
 112         // is desirable), and actually has less overhead than rounding both
 113         // results to float (which is another approach which works).
 114         volatile double a_max_wt = a->get_maxweight();
 115         volatile double b_max_wt = b->get_maxweight();
 116         return a_max_wt > b_max_wt;
 117 #else
 118         return a->get_maxweight() > b->get_maxweight();
 119 #endif
 120     }
 121 };
 122
 123 /// Comparison functor which orders PostList* by descending get_termfreq_est().
 124 struct ComparePostListTermFreqAscending {
 125     /// Order by descending get_termfreq_est().
 126     bool operator()(const PostList *a, const PostList *b) {
 127         return a->get_termfreq_est() > b->get_termfreq_est();
 128     }
 129 };
 130
 131 class Context {
 132   protected:
 133     vector<PostList*> pls;
 134
 135   public:
 136     explicit Context(size_t reserve);
 137
 138     ~Context();
 139
 140     void add_postlist(PostList * pl) {
 141         pls.push_back(pl);
 142     }
 143
 144     bool empty() const {
 145         return pls.empty();
 146     }
 147
 148     size_t size() const {
 149         return pls.size();
 150     }
 151 };
 152
 153 Context::Context(size_t reserve) {
 154     pls.reserve(reserve);
 155 }
 156
 157 Context::~Context()
 158 {
 159     for_each(pls.begin(), pls.end(), delete_ptr<PostList>());
 160 }
 161
 162 class OrContext : public Context {
 163   public:
 164     explicit OrContext(size_t reserve) : Context(reserve) { }
 165
 166     /// Select the best set_size postlists from the last out_of added.
 167     void select_elite_set(QueryOptimiser * qopt,
 168                           size_t set_size, size_t out_of);
 169
 170     /// Select the set_size postlists with the highest term frequency.
 171     void select_most_frequent(QueryOptimiser * qopt, size_t set_size);
 172
 173     PostList * postlist(QueryOptimiser* qopt);
 174     PostList * postlist_max(QueryOptimiser* qopt);
 175 };
 176
 177 void
 178 OrContext::select_elite_set(QueryOptimiser * qopt,
 179                             size_t set_size, size_t out_of)
 180 {
 181     // Call recalc_maxweight() as otherwise get_maxweight()
 182     // may not be valid before next() or skip_to()
 183     vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
 184     for_each(begin, pls.end(), mem_fun(&PostList::recalc_maxweight));
 185
 186     nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
 187     const PostList * hint_pl = qopt->get_hint_postlist();
 188     for_each(begin + set_size, pls.end(),
 189         [&qopt, &hint_pl](const PostList * p) {
 190             if (rare(p == hint_pl)) {
 191                 qopt->take_hint_ownership();
 192                 hint_pl = NULL;
 193             } else {
 194                 delete p;
 195             }
 196         });
 197     pls.resize(pls.size() - out_of + set_size);
 198 }
 199
 200 void
 201 OrContext::select_most_frequent(QueryOptimiser * qopt, size_t set_size)
 202 {
 203     vector<PostList*>::iterator begin = pls.begin();
 204     nth_element(begin, begin + set_size - 1, pls.end(),
 205                 ComparePostListTermFreqAscending());
 206     const PostList * hint_pl = qopt->get_hint_postlist();
 207     for_each(begin + set_size, pls.end(),
 208         [&qopt, &hint_pl](const PostList * p) {
 209             if (rare(p == hint_pl)) {
 210                 qopt->take_hint_ownership();
 211                 hint_pl = NULL;
 212             } else {
 213                 delete p;
 214             }
 215         });
 216     pls.resize(set_size);
 217 }
 218
 219 PostList *
 220 OrContext::postlist(QueryOptimiser* qopt)
 221 {
 222     Assert(!pls.empty());
 223
 224     if (pls.size() == 1) {
 225         PostList * pl = pls[0];
 226         pls.clear();
 227         return pl;
 228     }
 229
 230     // Make postlists into a heap so that the postlist with the greatest term
 231     // frequency is at the top of the heap.
 232     make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 233
 234     // Now build a tree of binary OrPostList objects.
 235     //
 236     // The algorithm used to build the tree is like that used to build an
 237     // optimal Huffman coding tree.  If we called next() repeatedly, this
 238     // arrangement would minimise the number of method calls.  Generally we
 239     // don't actually do that, but this arrangement is still likely to be a
 240     // good one, and it does minimise the work in the worst case.
 241     while (true) {
 242         // We build the tree such that at each branch:
 243         //
 244         //   l.get_termfreq_est() >= r.get_termfreq_est()
 245         //
 246         // We do this so that the OrPostList class can be optimised assuming
 247         // that this is the case.
 248         PostList * r = pls.front();
 249         pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 250         pls.pop_back();
 251         PostList * pl;
 252         pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
 253
 254         if (pls.size() == 1) {
 255             pls.clear();
 256             return pl;
 257         }
 258
 259         pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 260         pls.back() = pl;
 261         push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 262     }
 263 }
 264
 265 PostList *
 266 OrContext::postlist_max(QueryOptimiser* qopt)
 267 {
 268     Assert(!pls.empty());
 269
 270     if (pls.size() == 1) {
 271         PostList * pl = pls[0];
 272         pls.clear();
 273         return pl;
 274     }
 275
 276     // Sort the postlists so that the postlist with the greatest term frequency
 277     // is first.
 278     sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 279
 280     PostList * pl;
 281     pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
 282
 283     pls.clear();
 284     return pl;
 285 }
 286
 287 class XorContext : public Context {
 288   public:
 289     explicit XorContext(size_t reserve) : Context(reserve) { }
 290
 291     PostList * postlist(QueryOptimiser* qopt);
 292 };
 293
 294 PostList *
 295 XorContext::postlist(QueryOptimiser* qopt)
 296 {
 297     Xapian::doccount db_size = qopt->db_size;
 298     PostList * pl;
 299     pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
 300
 301     // Empty pls so our destructor doesn't delete them all!
 302     pls.clear();
 303     return pl;
 304 }
 305
 306 class AndContext : public Context {
 307     class PosFilter {
 308         Xapian::Query::op op_;
 309
 310         /// Start and end indices for the PostLists this positional filter uses.
 311         size_t begin, end;
 312
 313         Xapian::termcount window;
 314
 315       public:
 316         PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
 317                   Xapian::termcount window_)
 318             : op_(op__), begin(begin_), end(end_), window(window_) { }
 319
 320         PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
 321     };
 322
 323     list<PosFilter> pos_filters;
 324
 325   public:
 326     explicit AndContext(size_t reserve) : Context(reserve) { }
 327
 328     void add_pos_filter(Query::op op_,
 329                         size_t n_subqs,
 330                         Xapian::termcount window);
 331
 332     PostList * postlist(QueryOptimiser* qopt);
 333 };
 334
 335 PostList *
 336 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
 337 try {
 338     vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
 339     vector<PostList *>::const_iterator terms_end = pls.begin() + end;
 340
 341     if (op_ == Xapian::Query::OP_NEAR) {
 342         pl = new NearPostList(pl, window, terms_begin, terms_end);
 343     } else if (window == end - begin) {
 344         AssertEq(op_, Xapian::Query::OP_PHRASE);
 345         pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
 346     } else {
 347         AssertEq(op_, Xapian::Query::OP_PHRASE);
 348         pl = new PhrasePostList(pl, window, terms_begin, terms_end);
 349     }
 350     return pl;
 351 } catch (...) {
 352     delete pl;
 353     throw;
 354 }
 355
 356 void
 357 AndContext::add_pos_filter(Query::op op_,
 358                            size_t n_subqs,
 359                            Xapian::termcount window)
 360 {
 361     Assert(n_subqs > 1);
 362     size_t end = pls.size();
 363     size_t begin = end - n_subqs;
 364     pos_filters.push_back(PosFilter(op_, begin, end, window));
 365 }
 366
 367 PostList *
 368 AndContext::postlist(QueryOptimiser* qopt)
 369 {
 370     AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
 371                                               qopt->matcher, qopt->db_size));
 372
 373     // Sort the positional filters to try to apply them in an efficient order.
 374     // FIXME: We need to figure out what that is!  Try applying lowest cf/tf
 375     // first?
 376
 377     // Apply any positional filters.
 378     list<PosFilter>::const_iterator i;
 379     for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
 380         const PosFilter & filter = *i;
 381         pl.reset(filter.postlist(pl.release(), pls));
 382     }
 383
 384     // Empty pls so our destructor doesn't delete them all!
 385     pls.clear();
 386     return pl.release();
 387 }
 388
 389 }
 390
 391 Query::Internal::~Internal() { }
 392
 393 size_t
 394 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
 395 {
 396     return 0;
 397 }
 398
 399 const Query
 400 Query::Internal::get_subquery(size_t) const
 401 {
 402     throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
 403 }
 404
 405 void
 406 Query::Internal::gather_terms(void *) const
 407 {
 408 }
 409
 410 Xapian::termcount
 411 Query::Internal::get_length() const XAPIAN_NOEXCEPT
 412 {
 413     return 0;
 414 }
 415
 416 Query::Internal *
 417 Query::Internal::unserialise(const char ** p, const char * end,
 418                              const Registry & reg)
 419 {
 420     if (*p == end)
 421         return NULL;
 422     unsigned char ch = *(*p)++;
 423     switch (ch >> 5) {
 424         case 4: case 5: case 6: case 7: { // Multi-way branch
 425             size_t n_subqs = ch & 0x07;
 426             if (n_subqs == 0) {
 427                 decode_length(p, end, n_subqs);
 428                 n_subqs += 8;
 429             }
 430             unsigned char code = (ch >> 3) & 0x0f;
 431             Xapian::termcount parameter = 0;
 432             if (code >= 13)
 433                 decode_length(p, end, parameter);
 434             Xapian::Internal::QueryBranch * result;
 435             switch (code) {
 436                 case 0: // OP_AND
 437                     result = new Xapian::Internal::QueryAnd(n_subqs);
 438                     break;
 439                 case 1: // OP_OR
 440                     result = new Xapian::Internal::QueryOr(n_subqs);
 441                     break;
 442                 case 2: // OP_AND_NOT
 443                     result = new Xapian::Internal::QueryAndNot(n_subqs);
 444                     break;
 445                 case 3: // OP_XOR
 446                     result = new Xapian::Internal::QueryXor(n_subqs);
 447                     break;
 448                 case 4: // OP_AND_MAYBE
 449                     result = new Xapian::Internal::QueryAndMaybe(n_subqs);
 450                     break;
 451                 case 5: // OP_FILTER
 452                     result = new Xapian::Internal::QueryFilter(n_subqs);
 453                     break;
 454                 case 6: // OP_SYNONYM
 455                     result = new Xapian::Internal::QuerySynonym(n_subqs);
 456                     break;
 457                 case 7: // OP_MAX
 458                     result = new Xapian::Internal::QueryMax(n_subqs);
 459                     break;
 460                 case 13: // OP_ELITE_SET
 461                     result = new Xapian::Internal::QueryEliteSet(n_subqs,
 462                                                                  parameter);
 463                     break;
 464                 case 14: // OP_NEAR
 465                     result = new Xapian::Internal::QueryNear(n_subqs,
 466                                                              parameter);
 467                     break;
 468                 case 15: // OP_PHRASE
 469                     result = new Xapian::Internal::QueryPhrase(n_subqs,
 470                                                                parameter);
 471                     break;
 472                 default:
 473                     // 8 to 12 are currently unused.
 474                     throw SerialisationError("Unknown multi-way branch Query operator");
 475             }
 476             do {
 477                 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
 478             } while (--n_subqs);
 479             result->done();
 480             return result;
 481         }
 482         case 2: case 3: { // Term
 483             size_t len = ch & 0x0f;
 484             if (len == 0) {
 485                 decode_length(p, end, len);
 486                 len += 16;
 487             }
 488             if (size_t(end - *p) < len)
 489                 throw SerialisationError("Not enough data");
 490             string term(*p, len);
 491             *p += len;
 492
 493             int code = ((ch >> 4) & 0x03);
 494
 495             Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
 496             if (code == 3)
 497                 decode_length(p, end, wqf);
 498
 499             Xapian::termpos pos = 0;
 500             if (code >= 2)
 501                 decode_length(p, end, pos);
 502
 503             return new Xapian::Internal::QueryTerm(term, wqf, pos);
 504         }
 505         case 1: { // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
 506             Xapian::valueno slot = ch & 15;
 507             if (slot == 15) {
 508                 decode_length(p, end, slot);
 509                 slot += 15;
 510             }
 511             size_t len;
 512             decode_length_and_check(p, end, len);
 513             string begin(*p, len);
 514             *p += len;
 515             if (ch & 0x10) {
 516                 // OP_VALUE_GE
 517                 return new Xapian::Internal::QueryValueGE(slot, begin);
 518             }
 519
 520             // OP_VALUE_RANGE
 521             decode_length_and_check(p, end, len);
 522             string end_(*p, len);
 523             *p += len;
 524             if (begin.empty()) // FIXME: is this right?
 525                 return new Xapian::Internal::QueryValueLE(slot, end_);
 526             return new Xapian::Internal::QueryValueRange(slot, begin, end_);
 527         }
 528         case 0: {
 529             switch (ch & 0x0f) {
 530                 case 0x0b: { // Wildcard
 531                     if (*p == end)
 532                         throw SerialisationError("not enough data");
 533                     Xapian::termcount max_expansion;
 534                     decode_length(p, end, max_expansion);
 535                     if (end - *p < 2)
 536                         throw SerialisationError("not enough data");
 537                     int max_type = static_cast<unsigned char>(*(*p)++);
 538                     op combiner = static_cast<op>(*(*p)++);
 539                     size_t len;
 540                     decode_length_and_check(p, end, len);
 541                     string pattern(*p, len);
 542                     *p += len;
 543                     return new Xapian::Internal::QueryWildcard(pattern,
 544                                                                max_expansion,
 545                                                                max_type,
 546                                                                combiner);
 547                 }
 548                 case 0x0c: { // PostingSource
 549                     size_t len;
 550                     decode_length_and_check(p, end, len);
 551                     string name(*p, len);
 552                     *p += len;
 553
 554                     const PostingSource * reg_source = reg.get_posting_source(name);
 555                     if (!reg_source) {
 556                         string m = "PostingSource ";
 557                         m += name;
 558                         m += " not registered";
 559                         throw SerialisationError(m);
 560                     }
 561
 562                     decode_length_and_check(p, end, len);
 563                     PostingSource * source =
 564                         reg_source->unserialise_with_registry(string(*p, len),
 565                                                               reg);
 566                     *p += len;
 567                     return new Xapian::Internal::QueryPostingSource(source, true);
 568                 }
 569                 case 0x0d: {
 570                     using Xapian::Internal::QueryScaleWeight;
 571                     double scale_factor = unserialise_double(p, end);
 572                     return new QueryScaleWeight(scale_factor,
 573                                                 Query(unserialise(p, end, reg)));
 574                 }
 575                 case 0x0e: {
 576                     Xapian::termcount wqf;
 577                     Xapian::termpos pos;
 578                     decode_length(p, end, wqf);
 579                     decode_length(p, end, pos);
 580                     return new Xapian::Internal::QueryTerm(string(), wqf, pos);
 581                 }
 582                 case 0x0f:
 583                     return Query::MatchAll.internal.get();
 584                 default: // Others currently unused.
 585                     break;
 586             }
 587             break;
 588         }
 589     }
 590     string msg = "Unknown Query serialisation: ";
 591     msg += str(ch);
 592     throw SerialisationError(msg);
 593 }
 594
 595 void
 596 Query::Internal::postlist_sub_and_like(AndContext& ctx,
 597                                        QueryOptimiser * qopt,
 598                                        double factor) const
 599 {
 600     ctx.add_postlist(postlist(qopt, factor));
 601 }
 602
 603 void
 604 Query::Internal::postlist_sub_or_like(OrContext& ctx,
 605                                       QueryOptimiser * qopt,
 606                                       double factor) const
 607 {
 608     ctx.add_postlist(postlist(qopt, factor));
 609 }
 610
 611 void
 612 Query::Internal::postlist_sub_xor(XorContext& ctx,
 613                                   QueryOptimiser * qopt,
 614                                   double factor) const
 615 {
 616     ctx.add_postlist(postlist(qopt, factor));
 617 }
 618
 619 namespace Internal {
 620
 621 Query::op
 622 QueryTerm::get_type() const XAPIAN_NOEXCEPT
 623 {
 624     return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
 625 }
 626
 627 string
 628 QueryTerm::get_description() const
 629 {
 630     string desc;
 631     if (term.empty()) {
 632         desc = "<alldocuments>";
 633     } else {
 634         description_append(desc, term);
 635     }
 636     if (wqf != 1) {
 637         desc += '#';
 638         desc += str(wqf);
 639     }
 640     if (pos) {
 641         desc += '@';
 642         desc += str(pos);
 643     }
 644     return desc;
 645 }
 646
 647 QueryPostingSource::QueryPostingSource(PostingSource * source_, bool owned_)
 648     : source(source_), owned(owned_)
 649 {
 650     if (!source)
 651         throw Xapian::InvalidArgumentError("source parameter can't be NULL");
 652     if (!owned) {
 653         PostingSource * cloned_source = source->clone();
 654         if (cloned_source) {
 655             source = cloned_source;
 656             owned = true;
 657         }
 658     }
 659 }
 660
 661 QueryPostingSource::~QueryPostingSource()
 662 {
 663     if (owned)
 664         delete source;
 665 }
 666
 667 Query::op
 668 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
 669 {
 670     return Query::LEAF_POSTING_SOURCE;
 671 }
 672
 673 string
 674 QueryPostingSource::get_description() const
 675 {
 676     string desc = "PostingSource(";
 677     desc += source->get_description();
 678     desc += ')';
 679     return desc;
 680 }
 681
 682 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
 683     : scale_factor(factor), subquery(subquery_)
 684 {
 685     if (rare(scale_factor < 0.0))
 686         throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
 687 }
 688
 689 Query::op
 690 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
 691 {
 692     return Query::OP_SCALE_WEIGHT;
 693 }
 694
 695 size_t
 696 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
 697 {
 698     return 1;
 699 }
 700
 701 const Query
 702 QueryScaleWeight::get_subquery(size_t) const
 703 {
 704     return subquery;
 705 }
 706
 707 string
 708 QueryScaleWeight::get_description() const
 709 {
 710     Assert(subquery.internal.get());
 711     string desc = str(scale_factor);
 712     desc += " * ";
 713     desc += subquery.internal->get_description();
 714     return desc;
 715 }
 716
 717 PostingIterator::Internal *
 718 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
 719 {
 720     LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
 721     if (factor != 0.0)
 722         qopt->inc_total_subqs();
 723     RETURN(qopt->open_post_list(term, wqf, factor));
 724 }
 725
 726 PostingIterator::Internal *
 727 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
 728 {
 729     LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
 730     Assert(source);
 731     if (factor != 0.0)
 732         qopt->inc_total_subqs();
 733     Xapian::Database wrappeddb(new ConstDatabaseWrapper(&(qopt->db)));
 734     RETURN(new ExternalPostList(wrappeddb, source, factor, qopt->matcher));
 735 }
 736
 737 PostingIterator::Internal *
 738 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
 739 {
 740     LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
 741     RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
 742 }
 743
 744 void
 745 QueryTerm::gather_terms(void * void_terms) const
 746 {
 747     // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
 748     if (!term.empty()) {
 749         vector<pair<Xapian::termpos, string> > &terms =
 750             *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
 751         terms.push_back(make_pair(pos, term));
 752     }
 753 }
 754
 755 PostingIterator::Internal *
 756 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
 757 {
 758     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
 759     if (factor != 0.0)
 760         qopt->inc_total_subqs();
 761     const Xapian::Database::Internal & db = qopt->db;
 762     const string & lb = db.get_value_lower_bound(slot);
 763     // If lb.empty(), the backend doesn't provide value bounds.
 764     if (!lb.empty()) {
 765         if (end < lb) {
 766             RETURN(new EmptyPostList);
 767         }
 768         const string & ub = db.get_value_upper_bound(slot);
 769         if (begin > ub) {
 770             RETURN(new EmptyPostList);
 771         }
 772         if (end >= ub) {
 773             // If begin <= lb too, then the range check isn't needed, but we do
 774             // still need to consider which documents have a value set in this
 775             // slot.  If this value is set for all documents, we can replace it
 776             // with the MatchAll postlist, which is especially efficient if
 777             // there are no gaps in the docids.
 778             if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
 779                 RETURN(db.open_post_list(string()));
 780             }
 781             RETURN(new ValueGePostList(&db, slot, begin));
 782         }
 783     }
 784     RETURN(new ValueRangePostList(&db, slot, begin, end));
 785 }
 786
 787 void
 788 QueryValueRange::serialise(string & result) const
 789 {
 790     if (slot < 15) {
 791         result += static_cast<char>(0x20 | slot);
 792     } else {
 793         result += static_cast<char>(0x20 | 15);
 794         result += encode_length(slot - 15);
 795     }
 796     result += encode_length(begin.size());
 797     result += begin;
 798     result += encode_length(end.size());
 799     result += end;
 800 }
 801
 802 Query::op
 803 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
 804 {
 805     return Query::OP_VALUE_RANGE;
 806 }
 807
 808 string
 809 QueryValueRange::get_description() const
 810 {
 811     string desc = "VALUE_RANGE ";
 812     desc += str(slot);
 813     desc += ' ';
 814     description_append(desc, begin);
 815     desc += ' ';
 816     description_append(desc, end);
 817     return desc;
 818 }
 819
 820 PostingIterator::Internal *
 821 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
 822 {
 823     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
 824     if (factor != 0.0)
 825         qopt->inc_total_subqs();
 826     const Xapian::Database::Internal & db = qopt->db;
 827     const string & lb = db.get_value_lower_bound(slot);
 828     // If lb.empty(), the backend doesn't provide value bounds.
 829     if (!lb.empty()) {
 830         if (limit < lb) {
 831             RETURN(new EmptyPostList);
 832         }
 833         if (limit >= db.get_value_upper_bound(slot)) {
 834             // The range check isn't needed, but we do still need to consider
 835             // which documents have a value set in this slot.  If this value is
 836             // set for all documents, we can replace it with the MatchAll
 837             // postlist, which is especially efficient if there are no gaps in
 838             // the docids.
 839             if (db.get_value_freq(slot) == db.get_doccount()) {
 840                 RETURN(db.open_post_list(string()));
 841             }
 842         }
 843     }
 844     RETURN(new ValueRangePostList(&db, slot, string(), limit));
 845 }
 846
 847 void
 848 QueryValueLE::serialise(string & result) const
 849 {
 850     // Encode as a range with an empty start (which only takes a single byte to
 851     // encode).
 852     if (slot < 15) {
 853         result += static_cast<char>(0x20 | slot);
 854     } else {
 855         result += static_cast<char>(0x20 | 15);
 856         result += encode_length(slot - 15);
 857     }
 858     result += encode_length(0);
 859     result += encode_length(limit.size());
 860     result += limit;
 861 }
 862
 863 Query::op
 864 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
 865 {
 866     return Query::OP_VALUE_LE;
 867 }
 868
 869 string
 870 QueryValueLE::get_description() const
 871 {
 872     string desc = "VALUE_LE ";
 873     desc += str(slot);
 874     desc += ' ';
 875     description_append(desc, limit);
 876     return desc;
 877 }
 878
 879 PostingIterator::Internal *
 880 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
 881 {
 882     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
 883     if (factor != 0.0)
 884         qopt->inc_total_subqs();
 885     const Xapian::Database::Internal & db = qopt->db;
 886     const string & lb = db.get_value_lower_bound(slot);
 887     // If lb.empty(), the backend doesn't provide value bounds.
 888     if (!lb.empty()) {
 889         if (limit > db.get_value_upper_bound(slot)) {
 890             RETURN(new EmptyPostList);
 891         }
 892         if (limit < lb) {
 893             // The range check isn't needed, but we do still need to consider
 894             // which documents have a value set in this slot.  If this value is
 895             // set for all documents, we can replace it with the MatchAll
 896             // postlist, which is especially efficient if there are no gaps in
 897             // the docids.
 898             if (db.get_value_freq(slot) == db.get_doccount()) {
 899                 RETURN(db.open_post_list(string()));
 900             }
 901         }
 902     }
 903     RETURN(new ValueGePostList(&db, slot, limit));
 904 }
 905
 906 void
 907 QueryValueGE::serialise(string & result) const
 908 {
 909     if (slot < 15) {
 910         result += static_cast<char>(0x20 | 0x10 | slot);
 911     } else {
 912         result += static_cast<char>(0x20 | 0x10 | 15);
 913         result += encode_length(slot - 15);
 914     }
 915     result += encode_length(limit.size());
 916     result += limit;
 917 }
 918
 919 Query::op
 920 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
 921 {
 922     return Query::OP_VALUE_GE;
 923 }
 924
 925 string
 926 QueryValueGE::get_description() const
 927 {
 928     string desc = "VALUE_GE ";
 929     desc += str(slot);
 930     desc += ' ';
 931     description_append(desc, limit);
 932     return desc;
 933 }
 934
 935 PostingIterator::Internal *
 936 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
 937 {
 938     LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
 939     Query::op op = combiner;
 940     double or_factor = 0.0;
 941     if (factor == 0.0) {
 942         // If we have a factor of 0, we don't care about the weights, so
 943         // we're just like a normal OR query.
 944         op = Query::OP_OR;
 945     } else if (op != Query::OP_SYNONYM) {
 946         or_factor = factor;
 947     }
 948     OrContext ctx(0);
 949     AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
 950     Xapian::termcount expansions_left = max_expansion;
 951     // If there's no expansion limit, set expansions_left to the maximum
 952     // value Xapian::termcount can hold.
 953     if (expansions_left == 0)
 954         --expansions_left;
 955     while (true) {
 956         t->next();
 957         if (t->at_end())
 958             break;
 959         if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
 960             if (expansions_left-- == 0) {
 961                 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
 962                     break;
 963                 string msg("Wildcard ");
 964                 msg += pattern;
 965                 msg += "* expands to more than ";
 966                 msg += str(max_expansion);
 967                 msg += " terms";
 968                 throw Xapian::WildcardError(msg);
 969             }
 970         }
 971         const string & term = t->get_termname();
 972         ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
 973     }
 974
 975     if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
 976         // FIXME: open_lazy_post_list() results in the term getting registered
 977         // for stats, so we still incur an avoidable cost from the full
 978         // expansion size of the wildcard, which is most likely to be visible
 979         // with the remote backend.  Perhaps we should split creating the lazy
 980         // postlist from registering the term for stats.
 981         if (ctx.size() > max_expansion)
 982             ctx.select_most_frequent(qopt, max_expansion);
 983     }
 984
 985     if (factor != 0.0) {
 986         if (op != Query::OP_SYNONYM) {
 987             qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
 988         } else {
 989             qopt->inc_total_subqs();
 990         }
 991     }
 992
 993     if (ctx.empty())
 994         RETURN(new EmptyPostList);
 995
 996     if (op == Query::OP_MAX)
 997         RETURN(ctx.postlist_max(qopt));
 998
 999     PostList * pl = ctx.postlist(qopt);
1000     if (op == Query::OP_OR)
1001         RETURN(pl);
1002
1003     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1004     // SynonymPostList, which supplies the weights.
1005     PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
1006     RETURN(r);
1007 }
1008
1009 termcount
1010 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1011 {
1012     // We currently assume wqf is 1 for calculating the synonym's weight
1013     // since conceptually the synonym is one "virtual" term.  If we were
1014     // to combine multiple occurrences of the same synonym expansion into
1015     // a single instance with wqf set, we would want to track the wqf.
1016     return 1;
1017 }
1018
1019 void
1020 QueryWildcard::serialise(string & result) const
1021 {
1022     result += static_cast<char>(0x0b);
1023     result += encode_length(max_expansion);
1024     result += static_cast<unsigned char>(max_type);
1025     result += static_cast<unsigned char>(combiner);
1026     result += encode_length(pattern.size());
1027     result += pattern;
1028 }
1029
1030 Query::op
1031 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1032 {
1033     return Query::OP_WILDCARD;
1034 }
1035
1036 string
1037 QueryWildcard::get_description() const
1038 {
1039     string desc = "WILDCARD ";
1040     switch (combiner) {
1041         case Query::OP_SYNONYM:
1042             desc += "SYNONYM ";
1043             break;
1044         case Query::OP_MAX:
1045             desc += "MAX ";
1046             break;
1047         case Query::OP_OR:
1048             desc += "OR ";
1049             break;
1050         default:
1051             desc += "BAD ";
1052             break;
1053     }
1054     description_append(desc, pattern);
1055     return desc;
1056 }
1057
1058 Xapian::termcount
1059 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1060 {
1061     // Sum results from all subqueries.
1062     Xapian::termcount result = 0;
1063     QueryVector::const_iterator i;
1064     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1065         // MatchNothing subqueries should have been removed by done().
1066         Assert((*i).internal.get());
1067         result += (*i).internal->get_length();
1068     }
1069     return result;
1070 }
1071
1072 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1073 #define MISC(X) static_cast<unsigned char>(X)
1074 void
1075 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1076 {
1077     static const unsigned char first_byte[] = {
1078         MULTIWAY(0),    // OP_AND
1079         MULTIWAY(1),    // OP_OR
1080         MULTIWAY(2),    // OP_AND_NOT
1081         MULTIWAY(3),    // OP_XOR
1082         MULTIWAY(4),    // OP_AND_MAYBE
1083         MULTIWAY(5),    // OP_FILTER
1084         MULTIWAY(14),   // OP_NEAR
1085         MULTIWAY(15),   // OP_PHRASE
1086         0,              // OP_VALUE_RANGE
1087         MISC(3),        // OP_SCALE_WEIGHT
1088         MULTIWAY(13),   // OP_ELITE_SET
1089         0,              // OP_VALUE_GE
1090         0,              // OP_VALUE_LE
1091         MULTIWAY(6),    // OP_SYNONYM
1092         MULTIWAY(7)     // OP_MAX
1093     };
1094     Xapian::Query::op op_ = get_op();
1095     AssertRel(size_t(op_),<,sizeof(first_byte));
1096     unsigned char ch = first_byte[op_];
1097     if (ch & 0x80) {
1098         // Multi-way operator.
1099         if (subqueries.size() < 8)
1100             ch |= subqueries.size();
1101         result += ch;
1102         if (subqueries.size() >= 8)
1103             result += encode_length(subqueries.size() - 8);
1104         if (ch >= MULTIWAY(13))
1105             result += encode_length(parameter);
1106     } else {
1107         result += ch;
1108     }
1109
1110     QueryVector::const_iterator i;
1111     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1112         // MatchNothing subqueries should have been removed by done().
1113         Assert((*i).internal.get());
1114         (*i).internal->serialise(result);
1115     }
1116
1117     // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1118     // appended next by an overloaded serialise() method in the subclass.
1119 }
1120
1121 void
1122 QueryBranch::serialise(string & result) const
1123 {
1124     QueryBranch::serialise_(result);
1125 }
1126
1127 void
1128 QueryNear::serialise(string & result) const
1129 {
1130     // FIXME: window - subqueries.size() ?
1131     QueryBranch::serialise_(result, window);
1132 }
1133
1134 void
1135 QueryPhrase::serialise(string & result) const
1136 {
1137     // FIXME: window - subqueries.size() ?
1138     QueryBranch::serialise_(result, window);
1139 }
1140
1141 void
1142 QueryEliteSet::serialise(string & result) const
1143 {
1144     // FIXME: set_size - subqueries.size() ?
1145     QueryBranch::serialise_(result, set_size);
1146 }
1147
1148 void
1149 QueryBranch::gather_terms(void * void_terms) const
1150 {
1151     // Gather results from all subqueries.
1152     QueryVector::const_iterator i;
1153     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1154         // MatchNothing subqueries should have been removed by done().
1155         Assert((*i).internal.get());
1156         (*i).internal->gather_terms(void_terms);
1157     }
1158 }
1159
1160 void
1161 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1162                         Xapian::termcount elite_set_size, size_t first) const
1163 {
1164     LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1165
1166     // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1167     // for AND-like operations.
1168
1169     // OP_SYNONYM with a single subquery is only simplified by
1170     // QuerySynonym::done() if the single subquery is a term or MatchAll.
1171     Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1172
1173     vector<PostList *> postlists;
1174     postlists.reserve(subqueries.size() - first);
1175
1176     QueryVector::const_iterator q;
1177     for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1178         // MatchNothing subqueries should have been removed by done().
1179         Assert((*q).internal.get());
1180         (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1181     }
1182
1183     if (elite_set_size && elite_set_size < subqueries.size()) {
1184         ctx.select_elite_set(qopt, elite_set_size, subqueries.size());
1185         // FIXME: not right!
1186     }
1187 }
1188
1189 PostList *
1190 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1191 {
1192     LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1193     OrContext ctx(subqueries.size());
1194     do_or_like(ctx, qopt, 0.0);
1195     PostList * pl = ctx.postlist(qopt);
1196     if (factor == 0.0) {
1197         // If we have a factor of 0, we don't care about the weights, so
1198         // we're just like a normal OR query.
1199         return pl;
1200     }
1201
1202     // We currently assume wqf is 1 for calculating the synonym's weight
1203     // since conceptually the synonym is one "virtual" term.  If we were
1204     // to combine multiple occurrences of the same synonym expansion into
1205     // a single instance with wqf set, we would want to track the wqf.
1206
1207     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1208     // SynonymPostList, which supplies the weights.
1209     RETURN(qopt->make_synonym_postlist(pl, factor));
1210 }
1211
1212 PostList *
1213 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1214 {
1215     LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1216     OrContext ctx(subqueries.size());
1217     do_or_like(ctx, qopt, factor);
1218     if (factor == 0.0) {
1219         // If we have a factor of 0, we don't care about the weights, so
1220         // we're just like a normal OR query.
1221         RETURN(ctx.postlist(qopt));
1222     }
1223
1224     // We currently assume wqf is 1 for calculating the OP_MAX's weight
1225     // since conceptually the OP_MAX is one "virtual" term.  If we were
1226     // to combine multiple occurrences of the same OP_MAX expansion into
1227     // a single instance with wqf set, we would want to track the wqf.
1228     RETURN(ctx.postlist_max(qopt));
1229 }
1230
1231 Xapian::Query::op
1232 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1233 {
1234     return get_op();
1235 }
1236
1237 size_t
1238 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1239 {
1240     return subqueries.size();
1241 }
1242
1243 const Query
1244 QueryBranch::get_subquery(size_t n) const
1245 {
1246     return subqueries[n];
1247 }
1248
1249 const string
1250 QueryBranch::get_description_helper(const char * op,
1251                                     Xapian::termcount parameter) const
1252 {
1253     string desc = "(";
1254     QueryVector::const_iterator i;
1255     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1256         if (desc.size() > 1) {
1257             desc += op;
1258             if (parameter) {
1259                 desc += str(parameter);
1260                 desc += ' ';
1261             }
1262         }
1263         Assert((*i).internal.get());
1264         // MatchNothing subqueries should have been removed by done(), and we
1265         // shouldn't get called before done() is, since that happens at the
1266         // end of the Xapian::Query constructor.
1267         desc += (*i).internal->get_description();
1268     }
1269     desc += ')';
1270     return desc;
1271 }
1272
1273 Query::Internal *
1274 QueryWindowed::done()
1275 {
1276     // If window size not specified, default it.
1277     if (window == 0)
1278         window = subqueries.size();
1279     return QueryAndLike::done();
1280 }
1281
1282 void
1283 QueryScaleWeight::gather_terms(void * void_terms) const
1284 {
1285     subquery.internal->gather_terms(void_terms);
1286 }
1287
1288 void QueryTerm::serialise(string & result) const
1289 {
1290     size_t len = term.size();
1291     if (len == 0) {
1292         if (wqf == 1 && pos == 0) {
1293             // Query::MatchAll
1294             result += '\x0f';
1295         } else {
1296             // Weird mutant versions of MatchAll
1297             result += '\x0e';
1298             result += encode_length(wqf);
1299             result += encode_length(pos);
1300         }
1301     } else if (wqf == 1) {
1302         if (pos == 0) {
1303             // Single occurrence probabilistic term without position set.
1304             if (len >= 16) {
1305                 result += static_cast<char>(0x40 | 0x10);
1306                 result += encode_length(term.size() - 16);
1307             } else {
1308                 result += static_cast<char>(0x40 | 0x10 | len);
1309             }
1310             result += term;
1311         } else {
1312             // Single occurrence probabilistic term with position set.
1313             if (len >= 16) {
1314                 result += static_cast<char>(0x40 | 0x20);
1315                 result += encode_length(term.size() - 16);
1316             } else {
1317                 result += static_cast<char>(0x40 | 0x20 | len);
1318             }
1319             result += term;
1320             result += encode_length(pos);
1321         }
1322     } else if (wqf > 1 || pos > 0) {
1323         // General case.
1324         if (len >= 16) {
1325             result += static_cast<char>(0x40 | 0x30);
1326             result += encode_length(term.size() - 16);
1327         } else if (len) {
1328             result += static_cast<char>(0x40 | 0x30 | len);
1329         }
1330         result += term;
1331         result += encode_length(wqf);
1332         result += encode_length(pos);
1333     } else {
1334         // Typical boolean term.
1335         AssertEq(wqf, 0);
1336         AssertEq(pos, 0);
1337         if (len >= 16) {
1338             result += static_cast<char>(0x40);
1339             result += encode_length(term.size() - 16);
1340         } else {
1341             result += static_cast<char>(0x40 | len);
1342         }
1343         result += term;
1344     }
1345 }
1346
1347 void QueryPostingSource::serialise(string & result) const
1348 {
1349     result += static_cast<char>(0x0c);
1350
1351     const string & n = source->name();
1352     result += encode_length(n.size());
1353     result += n;
1354
1355     const string & s = source->serialise();
1356     result += encode_length(s.size());
1357     result += s;
1358 }
1359
1360 void QueryScaleWeight::serialise(string & result) const
1361 {
1362     Assert(subquery.internal.get());
1363     const string & s = serialise_double(scale_factor);
1364     result += '\x0d';
1365     result += s;
1366     subquery.internal->serialise(result);
1367 }
1368
1369 struct is_matchnothing {
1370     bool operator()(const Xapian::Query & q) {
1371         return q.internal.get() == NULL;
1372     }
1373 };
1374
1375 void
1376 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1377 {
1378     // If the AndLike is already MatchNothing, do nothing.
1379     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1380         return;
1381     // If we're adding MatchNothing, discard any previous subqueries.
1382     if (subquery.internal.get() == NULL)
1383         subqueries.clear();
1384     subqueries.push_back(subquery);
1385 }
1386
1387 Query::Internal *
1388 QueryAndLike::done()
1389 {
1390     // Empty AndLike gives MatchNothing.
1391     if (subqueries.empty())
1392         return NULL;
1393     // We handle any subquery being MatchNothing in add_subquery() by leaving
1394     // a single MatchNothing subquery, and so this check results in AndLike
1395     // giving MatchNothing.
1396     if (subqueries.size() == 1)
1397         return subqueries[0].internal.get();
1398     return this;
1399 }
1400
1401 PostingIterator::Internal *
1402 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1403 {
1404     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1405     AndContext ctx(subqueries.size());
1406     postlist_sub_and_like(ctx, qopt, factor);
1407     RETURN(ctx.postlist(qopt));
1408 }
1409
1410 void
1411 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1412 {
1413     QueryVector::const_iterator i;
1414     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1415         // MatchNothing subqueries should have been removed by done().
1416         Assert((*i).internal.get());
1417         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1418     }
1419 }
1420
1421 void
1422 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1423 {
1424     // Drop any subqueries which are MatchNothing.
1425     if (subquery.internal.get() != NULL)
1426         subqueries.push_back(subquery);
1427 }
1428
1429 Query::Internal *
1430 QueryOrLike::done()
1431 {
1432     // An empty OrLike gives MatchNothing.  Note that add_subquery() drops any
1433     // subqueries which are MatchNothing.
1434     if (subqueries.empty())
1435         return NULL;
1436     if (subqueries.size() == 1)
1437         return subqueries[0].internal.get();
1438     return this;
1439 }
1440
1441 void
1442 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1443 {
1444     // If the left side of AndNot is already MatchNothing, do nothing.
1445     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1446         return;
1447     // Drop any 2nd or subsequent subqueries which are MatchNothing.
1448     if (subquery.internal.get() != NULL || subqueries.empty())
1449         subqueries.push_back(subquery);
1450 }
1451
1452 Query::Internal *
1453 QueryAndNot::done()
1454 {
1455     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1456     // that leaves just the left subquery, return that.
1457     //
1458     // If left subquery is MatchNothing, then add_subquery() discards all right
1459     // subqueries, so this check also gives MatchNothing for this case.
1460     if (subqueries.size() == 1)
1461         return subqueries[0].internal.get();
1462     return this;
1463 }
1464
1465 void
1466 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1467 {
1468     // If the left side of AndMaybe is already MatchNothing, do nothing.
1469     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1470         return;
1471     // Drop any 2nd or subsequent subqueries which are MatchNothing.
1472     if (subquery.internal.get() != NULL || subqueries.empty())
1473         subqueries.push_back(subquery);
1474 }
1475
1476 Query::Internal *
1477 QueryAndMaybe::done()
1478 {
1479     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1480     // that leaves just the left subquery, return that.
1481     //
1482     // If left subquery is MatchNothing, then add_subquery() discards all right
1483     // subqueries, so this check also gives MatchNothing for this case.
1484     if (subqueries.size() == 1)
1485         return subqueries[0].internal.get();
1486     return this;
1487 }
1488
1489 PostingIterator::Internal *
1490 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1491 {
1492     LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1493     OrContext ctx(subqueries.size());
1494     do_or_like(ctx, qopt, factor);
1495     RETURN(ctx.postlist(qopt));
1496 }
1497
1498 void
1499 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1500 {
1501     do_or_like(ctx, qopt, factor);
1502 }
1503
1504 PostingIterator::Internal *
1505 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1506 {
1507     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1508     // FIXME: Combine and-like side with and-like stuff above.
1509     AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1510     OrContext ctx(subqueries.size() - 1);
1511     do_or_like(ctx, qopt, 0.0, 0, 1);
1512     AutoPtr<PostList> r(ctx.postlist(qopt));
1513     RETURN(new AndNotPostList(l.release(), r.release(),
1514                               qopt->matcher, qopt->db_size));
1515 }
1516
1517 PostingIterator::Internal *
1518 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1519 {
1520     LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1521     XorContext ctx(subqueries.size());
1522     postlist_sub_xor(ctx, qopt, factor);
1523     RETURN(ctx.postlist(qopt));
1524 }
1525
1526 void
1527 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1528 {
1529     QueryVector::const_iterator i;
1530     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1531         // MatchNothing subqueries should have been removed by done().
1532         Assert((*i).internal.get());
1533         (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1534     }
1535 }
1536
1537 PostingIterator::Internal *
1538 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1539 {
1540     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1541     // FIXME: Combine and-like side with and-like stuff above.
1542     AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1543     OrContext ctx(subqueries.size() - 1);
1544     do_or_like(ctx, qopt, factor, 0, 1);
1545     AutoPtr<PostList> r(ctx.postlist(qopt));
1546     RETURN(new AndMaybePostList(l.release(), r.release(),
1547                                 qopt->matcher, qopt->db_size));
1548 }
1549
1550 PostingIterator::Internal *
1551 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1552 {
1553     LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1554     // FIXME: Combine and-like stuff, like QueryOptimiser.
1555     AssertEq(subqueries.size(), 2);
1556     PostList * pls[2];
1557     AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1558     pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1559     pls[0] = l.release();
1560     RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1561 }
1562
1563 void
1564 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1565 {
1566     QueryVector::const_iterator i;
1567     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1568         // MatchNothing subqueries should have been removed by done().
1569         Assert((*i).internal.get());
1570         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1571         // Second and subsequent subqueries are unweighted.
1572         factor = 0.0;
1573     }
1574 }
1575
1576 void
1577 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1578 {
1579     // FIXME: should has_positions() be on the combined DB (not this sub)?
1580     if (qopt->db.has_positions()) {
1581         bool old_need_positions = qopt->need_positions;
1582         qopt->need_positions = true;
1583
1584         QueryVector::const_iterator i;
1585         for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1586             // MatchNothing subqueries should have been removed by done().
1587             Assert((*i).internal.get());
1588             ctx.add_postlist((*i).internal->postlist(qopt, factor));
1589         }
1590         // Record the positional filter to apply higher up the tree.
1591         ctx.add_pos_filter(op, subqueries.size(), window);
1592
1593         qopt->need_positions = old_need_positions;
1594     } else {
1595         QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1596     }
1597 }
1598
1599 void
1600 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1601 {
1602     QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1603 }
1604
1605 void
1606 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1607 {
1608     QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1609 }
1610
1611 PostingIterator::Internal *
1612 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1613 {
1614     LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1615     OrContext ctx(subqueries.size());
1616     do_or_like(ctx, qopt, factor, set_size);
1617     RETURN(ctx.postlist(qopt));
1618 }
1619
1620 void
1621 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1622 {
1623     do_or_like(ctx, qopt, factor, set_size);
1624 }
1625
1626 PostingIterator::Internal *
1627 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1628 {
1629     LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1630     // Save and restore total_subqs so we only add one for the whole
1631     // OP_SYNONYM subquery (or none if we're not weighted).
1632     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1633     if (factor != 0.0)
1634         ++save_total_subqs;
1635     PostList * pl = do_synonym(qopt, factor);
1636     qopt->set_total_subqs(save_total_subqs);
1637     RETURN(pl);
1638 }
1639
1640 Query::Internal *
1641 QuerySynonym::done()
1642 {
1643     // An empty Synonym gives MatchNothing.  Note that add_subquery() drops any
1644     // subqueries which are MatchNothing.
1645     if (subqueries.empty())
1646         return NULL;
1647     // Synonym of a single subquery should only be simplified if that subquery
1648     // is a term (or MatchAll), or if it's also OP_SYNONYM.  Note that
1649     // MatchNothing subqueries are dropped, so we'd never get here with a
1650     // single MatchNothing subquery.
1651     if (subqueries.size() == 1) {
1652         Query::op sub_type = subqueries[0].get_type();
1653         if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1654             sub_type == Query::OP_SYNONYM) {
1655             return subqueries[0].internal.get();
1656         }
1657     }
1658     return this;
1659 }
1660
1661 PostingIterator::Internal *
1662 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1663 {
1664     LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1665     // Save and restore total_subqs so we only add one for the whole
1666     // OP_MAX subquery (or none if we're not weighted).
1667     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1668     if (factor != 0.0)
1669         ++save_total_subqs;
1670     PostList * pl = do_max(qopt, factor);
1671     qopt->set_total_subqs(save_total_subqs);
1672     RETURN(pl);
1673 }
1674
1675 Xapian::Query::op
1676 QueryAnd::get_op() const
1677 {
1678     return Xapian::Query::OP_AND;
1679 }
1680
1681 Xapian::Query::op
1682 QueryOr::get_op() const
1683 {
1684     return Xapian::Query::OP_OR;
1685 }
1686
1687 Xapian::Query::op
1688 QueryAndNot::get_op() const
1689 {
1690     return Xapian::Query::OP_AND_NOT;
1691 }
1692
1693 Xapian::Query::op
1694 QueryXor::get_op() const
1695 {
1696     return Xapian::Query::OP_XOR;
1697 }
1698
1699 Xapian::Query::op
1700 QueryAndMaybe::get_op() const
1701 {
1702     return Xapian::Query::OP_AND_MAYBE;
1703 }
1704
1705 Xapian::Query::op
1706 QueryFilter::get_op() const
1707 {
1708     return Xapian::Query::OP_FILTER;
1709 }
1710
1711 Xapian::Query::op
1712 QueryNear::get_op() const
1713 {
1714     return Xapian::Query::OP_NEAR;
1715 }
1716
1717 Xapian::Query::op
1718 QueryPhrase::get_op() const
1719 {
1720     return Xapian::Query::OP_PHRASE;
1721 }
1722
1723 Xapian::Query::op
1724 QueryEliteSet::get_op() const
1725 {
1726     return Xapian::Query::OP_ELITE_SET;
1727 }
1728
1729 Xapian::Query::op
1730 QuerySynonym::get_op() const
1731 {
1732     return Xapian::Query::OP_SYNONYM;
1733 }
1734
1735 Xapian::Query::op
1736 QueryMax::get_op() const
1737 {
1738     return Xapian::Query::OP_MAX;
1739 }
1740
1741 Xapian::Query::op
1742 QueryWildcard::get_op() const
1743 {
1744     return Xapian::Query::OP_WILDCARD;
1745 }
1746
1747 string
1748 QueryAnd::get_description() const
1749 {
1750     return get_description_helper(" AND ");
1751 }
1752
1753 string
1754 QueryOr::get_description() const
1755 {
1756     return get_description_helper(" OR ");
1757 }
1758
1759 string
1760 QueryAndNot::get_description() const
1761 {
1762     return get_description_helper(" AND_NOT ");
1763 }
1764
1765 string
1766 QueryXor::get_description() const
1767 {
1768     return get_description_helper(" XOR ");
1769 }
1770
1771 string
1772 QueryAndMaybe::get_description() const
1773 {
1774     return get_description_helper(" AND_MAYBE ");
1775 }
1776
1777 string
1778 QueryFilter::get_description() const
1779 {
1780     return get_description_helper(" FILTER ");
1781 }
1782
1783 string
1784 QueryNear::get_description() const
1785 {
1786     return get_description_helper(" NEAR ", window);
1787 }
1788
1789 string
1790 QueryPhrase::get_description() const
1791 {
1792     return get_description_helper(" PHRASE ", window);
1793 }
1794
1795 string
1796 QueryEliteSet::get_description() const
1797 {
1798     return get_description_helper(" ELITE_SET ", set_size);
1799 }
1800
1801 string
1802 QuerySynonym::get_description() const
1803 {
1804     if (subqueries.size() == 1) {
1805         string d = "(SYNONYM ";
1806         d += subqueries[0].internal->get_description();
1807         d += ")";
1808         return d;
1809     }
1810     return get_description_helper(" SYNONYM ");
1811 }
1812
1813 string
1814 QueryMax::get_description() const
1815 {
1816     return get_description_helper(" MAX ");
1817 }
1818
1819 }
1820 }