xapian-core/api/queryinternal.cc

   1 /** @file queryinternal.cc
   2  * @brief Xapian::Query internals
   3  */
   4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017 Olly Betts
   5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License as
   9  * published by the Free Software Foundation; either version 2 of the
  10  * License, or (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  20  */
  21
  22 #include <config.h>
  23
  24 #include "queryinternal.h"
  25
  26 #include "xapian/error.h"
  27 #include "xapian/postingsource.h"
  28 #include "xapian/query.h"
  29
  30 #include "leafpostlist.h"
  31 #include "matcher/andmaybepostlist.h"
  32 #include "matcher/andnotpostlist.h"
  33 #include "emptypostlist.h"
  34 #include "matcher/exactphrasepostlist.h"
  35 #include "matcher/externalpostlist.h"
  36 #include "matcher/maxpostlist.h"
  37 #include "matcher/multiandpostlist.h"
  38 #include "matcher/multixorpostlist.h"
  39 #include "matcher/nearpostlist.h"
  40 #include "matcher/orpospostlist.h"
  41 #include "matcher/orpostlist.h"
  42 #include "matcher/phrasepostlist.h"
  43 #include "matcher/queryoptimiser.h"
  44 #include "matcher/valuerangepostlist.h"
  45 #include "matcher/valuegepostlist.h"
  46 #include "net/length.h"
  47 #include "serialise-double.h"
  48 #include "termlist.h"
  49
  50 #include "debuglog.h"
  51 #include "omassert.h"
  52 #include "str.h"
  53 #include "unicode/description_append.h"
  54
  55 #include <algorithm>
  56 #include <functional>
  57 #include <list>
  58 #include <memory>
  59 #include <string>
  60 #include <vector>
  61
  62 using namespace std;
  63
  64 template<class CLASS> struct delete_ptr {
  65     void operator()(CLASS *p) const { delete p; }
  66 };
  67
  68 using Xapian::Internal::AndContext;
  69 using Xapian::Internal::OrContext;
  70 using Xapian::Internal::XorContext;
  71
  72 namespace Xapian {
  73
  74 namespace Internal {
  75
  76 /** Class providing an operator which sorts postlists to select max or terms.
  77  *  This returns true if a has a (strictly) greater termweight than b.
  78  *
  79  *  Using this comparator will tend to result in multiple calls to
  80  *  recalc_maxweight() for each of the subqueries (we use it with nth_element
  81  *  which should be O(n)) - perhaps it'd be better to call recalc_maxweight()
  82  *  once and then sort on that.
  83  */
  84 struct CmpMaxOrTerms {
  85     /** Return true if and only if a has a strictly greater termweight than b. */
  86     bool operator()(PostList *a, PostList *b) {
  87 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
  88     defined(__mc68000__) || defined(__mc68010__) || \
  89     defined(__mc68020__) || defined(__mc68030__)
  90         // On some architectures, most common of which is x86, floating point
  91         // values are calculated and stored in registers with excess precision.
  92         // If the two recalc_maxweight() calls below return identical values in a
  93         // register, the excess precision may be dropped for one of them but
  94         // not the other (e.g. because the compiler saves the first calculated
  95         // weight to memory while calculating the second, then reloads it to
  96         // compare).  This leads to both a > b and b > a being true, which
  97         // violates the antisymmetry property of the strict weak ordering
  98         // required by nth_element().  This can have serious consequences (e.g.
  99         // segfaults).
 100         //
 101         // Note that m68k only has excess precision in earlier models - 68040
 102         // and later are OK:
 103         // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
 104         //
 105         // To avoid this, we store each result in a volatile double prior to
 106         // comparing them.  This means that the result of this test should
 107         // match that on other architectures with the same double format (which
 108         // is desirable), and actually has less overhead than rounding both
 109         // results to float (which is another approach which works).
 110         volatile double a_max_wt = a->recalc_maxweight();
 111         volatile double b_max_wt = b->recalc_maxweight();
 112         return a_max_wt > b_max_wt;
 113 #else
 114         return a->recalc_maxweight() > b->recalc_maxweight();
 115 #endif
 116     }
 117 };
 118
 119 /// Comparison functor which orders PostList* by descending get_termfreq_est().
 120 struct ComparePostListTermFreqAscending {
 121     /// Order by descending get_termfreq_est().
 122     bool operator()(const PostList *a, const PostList *b) const {
 123         return a->get_termfreq_est() > b->get_termfreq_est();
 124     }
 125 };
 126
 127 class Context {
 128   protected:
 129     QueryOptimiser* qopt;
 130
 131     vector<PostList*> pls;
 132
 133   public:
 134     Context(QueryOptimiser* qopt_, size_t reserve);
 135
 136     ~Context();
 137
 138     void add_postlist(PostList * pl) {
 139         pls.push_back(pl);
 140     }
 141
 142     bool empty() const {
 143         return pls.empty();
 144     }
 145
 146     size_t size() const {
 147         return pls.size();
 148     }
 149
 150     void shrink(size_t new_size);
 151 };
 152
 153 Context::Context(QueryOptimiser* qopt_, size_t reserve)
 154     : qopt(qopt_)
 155 {
 156     pls.reserve(reserve);
 157 }
 158
 159 void
 160 Context::shrink(size_t new_size)
 161 {
 162     AssertRel(new_size, <=, pls.size());
 163     if (new_size >= pls.size())
 164         return;
 165
 166     const PostList * hint_pl = qopt->get_hint_postlist();
 167     for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
 168         const PostList * pl = *i;
 169         if (rare(pl == hint_pl)) {
 170             // We were about to delete qopt's hint - instead tell qopt to take
 171             // ownership.
 172             qopt->take_hint_ownership();
 173             hint_pl = NULL;
 174         } else {
 175             delete pl;
 176         }
 177     }
 178     pls.resize(new_size);
 179 }
 180
 181 Context::~Context()
 182 {
 183     shrink(0);
 184 }
 185
 186 class OrContext : public Context {
 187   public:
 188     OrContext(QueryOptimiser* qopt_, size_t reserve)
 189         : Context(qopt_, reserve) { }
 190
 191     /// Select the best set_size postlists from the last out_of added.
 192     void select_elite_set(size_t set_size, size_t out_of);
 193
 194     /// Select the set_size postlists with the highest term frequency.
 195     void select_most_frequent(size_t set_size);
 196
 197     PostList * postlist();
 198     PostList * postlist_max();
 199 };
 200
 201 void
 202 OrContext::select_elite_set(size_t set_size, size_t out_of)
 203 {
 204     vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
 205     nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
 206     shrink(pls.size() - out_of + set_size);
 207 }
 208
 209 void
 210 OrContext::select_most_frequent(size_t set_size)
 211 {
 212     vector<PostList*>::iterator begin = pls.begin();
 213     nth_element(begin, begin + set_size - 1, pls.end(),
 214                 ComparePostListTermFreqAscending());
 215     shrink(set_size);
 216 }
 217
 218 PostList *
 219 OrContext::postlist()
 220 {
 221     Assert(!pls.empty());
 222
 223     if (pls.size() == 1) {
 224         PostList * pl = pls[0];
 225         pls.clear();
 226         return pl;
 227     }
 228
 229     // Make postlists into a heap so that the postlist with the greatest term
 230     // frequency is at the top of the heap.
 231     make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 232
 233     // Now build a tree of binary OrPostList objects.
 234     //
 235     // The algorithm used to build the tree is like that used to build an
 236     // optimal Huffman coding tree.  If we called next() repeatedly, this
 237     // arrangement would minimise the number of method calls.  Generally we
 238     // don't actually do that, but this arrangement is still likely to be a
 239     // good one, and it does minimise the work in the worst case.
 240     while (true) {
 241         // We build the tree such that at each branch:
 242         //
 243         //   l.get_termfreq_est() >= r.get_termfreq_est()
 244         //
 245         // We do this so that the OrPostList class can be optimised assuming
 246         // that this is the case.
 247         PostList * r = pls.front();
 248         pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 249         pls.pop_back();
 250         PostList * pl;
 251         pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
 252
 253         if (pls.size() == 1) {
 254             pls.clear();
 255             return pl;
 256         }
 257
 258         pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 259         pls.back() = pl;
 260         push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 261     }
 262 }
 263
 264 PostList *
 265 OrContext::postlist_max()
 266 {
 267     Assert(!pls.empty());
 268
 269     if (pls.size() == 1) {
 270         PostList * pl = pls[0];
 271         pls.clear();
 272         return pl;
 273     }
 274
 275     // Sort the postlists so that the postlist with the greatest term frequency
 276     // is first.
 277     sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 278
 279     PostList * pl;
 280     pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
 281
 282     pls.clear();
 283     return pl;
 284 }
 285
 286 class XorContext : public Context {
 287   public:
 288     XorContext(QueryOptimiser* qopt_, size_t reserve)
 289         : Context(qopt_, reserve) { }
 290
 291     PostList * postlist();
 292 };
 293
 294 PostList *
 295 XorContext::postlist()
 296 {
 297     Xapian::doccount db_size = qopt->db_size;
 298     PostList * pl;
 299     pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
 300
 301     // Empty pls so our destructor doesn't delete them all!
 302     pls.clear();
 303     return pl;
 304 }
 305
 306 class AndContext : public Context {
 307     class PosFilter {
 308         Xapian::Query::op op_;
 309
 310         /// Start and end indices for the PostLists this positional filter uses.
 311         size_t begin, end;
 312
 313         Xapian::termcount window;
 314
 315       public:
 316         PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
 317                   Xapian::termcount window_)
 318             : op_(op__), begin(begin_), end(end_), window(window_) { }
 319
 320         PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
 321     };
 322
 323     list<PosFilter> pos_filters;
 324
 325   public:
 326     AndContext(QueryOptimiser* qopt_, size_t reserve)
 327         : Context(qopt_, reserve) { }
 328
 329     void add_pos_filter(Query::op op_,
 330                         size_t n_subqs,
 331                         Xapian::termcount window);
 332
 333     PostList * postlist();
 334 };
 335
 336 PostList *
 337 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
 338 try {
 339     vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
 340     vector<PostList *>::const_iterator terms_end = pls.begin() + end;
 341
 342     if (op_ == Xapian::Query::OP_NEAR) {
 343         pl = new NearPostList(pl, window, terms_begin, terms_end);
 344     } else if (window == end - begin) {
 345         AssertEq(op_, Xapian::Query::OP_PHRASE);
 346         pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
 347     } else {
 348         AssertEq(op_, Xapian::Query::OP_PHRASE);
 349         pl = new PhrasePostList(pl, window, terms_begin, terms_end);
 350     }
 351     return pl;
 352 } catch (...) {
 353     delete pl;
 354     throw;
 355 }
 356
 357 void
 358 AndContext::add_pos_filter(Query::op op_,
 359                            size_t n_subqs,
 360                            Xapian::termcount window)
 361 {
 362     Assert(n_subqs > 1);
 363     size_t end = pls.size();
 364     size_t begin = end - n_subqs;
 365     pos_filters.push_back(PosFilter(op_, begin, end, window));
 366 }
 367
 368 PostList *
 369 AndContext::postlist()
 370 {
 371     if (pls.empty()) {
 372         // This case only happens if this sub-database has no positional data
 373         // (but another sub-database does).
 374         Assert(pos_filters.empty());
 375         return new EmptyPostList;
 376     }
 377
 378     unique_ptr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
 379                                                  qopt->matcher, qopt->db_size));
 380
 381     // Sort the positional filters to try to apply them in an efficient order.
 382     // FIXME: We need to figure out what that is!  Try applying lowest cf/tf
 383     // first?
 384
 385     // Apply any positional filters.
 386     list<PosFilter>::const_iterator i;
 387     for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
 388         const PosFilter & filter = *i;
 389         pl.reset(filter.postlist(pl.release(), pls));
 390     }
 391
 392     // Empty pls so our destructor doesn't delete them all!
 393     pls.clear();
 394     return pl.release();
 395 }
 396
 397 }
 398
 399 Query::Internal::~Internal() { }
 400
 401 size_t
 402 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
 403 {
 404     return 0;
 405 }
 406
 407 const Query
 408 Query::Internal::get_subquery(size_t) const
 409 {
 410     throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
 411 }
 412
 413 Xapian::termcount
 414 Query::Internal::get_wqf() const
 415 {
 416     throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
 417 }
 418
 419 Xapian::termpos
 420 Query::Internal::get_pos() const
 421 {
 422     throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
 423 }
 424
 425 void
 426 Query::Internal::gather_terms(void *) const
 427 {
 428 }
 429
 430 Xapian::termcount
 431 Query::Internal::get_length() const XAPIAN_NOEXCEPT
 432 {
 433     return 0;
 434 }
 435
 436 Query::Internal *
 437 Query::Internal::unserialise(const char ** p, const char * end,
 438                              const Registry & reg)
 439 {
 440     if (*p == end)
 441         return NULL;
 442     unsigned char ch = *(*p)++;
 443     switch (ch >> 5) {
 444         case 4: case 5: case 6: case 7: {
 445             // Multi-way branch
 446             //
 447             // 1ccccnnn where:
 448             //   nnn -> n_subqs (0 means encoded value follows)
 449             //   cccc -> code (which OP_XXX)
 450             size_t n_subqs = ch & 0x07;
 451             if (n_subqs == 0) {
 452                 decode_length(p, end, n_subqs);
 453                 n_subqs += 8;
 454             }
 455             unsigned char code = (ch >> 3) & 0x0f;
 456             Xapian::termcount parameter = 0;
 457             if (code >= 13)
 458                 decode_length(p, end, parameter);
 459             Xapian::Internal::QueryBranch * result;
 460             switch (code) {
 461                 case 0: // OP_AND
 462                     result = new Xapian::Internal::QueryAnd(n_subqs);
 463                     break;
 464                 case 1: // OP_OR
 465                     result = new Xapian::Internal::QueryOr(n_subqs);
 466                     break;
 467                 case 2: // OP_AND_NOT
 468                     result = new Xapian::Internal::QueryAndNot(n_subqs);
 469                     break;
 470                 case 3: // OP_XOR
 471                     result = new Xapian::Internal::QueryXor(n_subqs);
 472                     break;
 473                 case 4: // OP_AND_MAYBE
 474                     result = new Xapian::Internal::QueryAndMaybe(n_subqs);
 475                     break;
 476                 case 5: // OP_FILTER
 477                     result = new Xapian::Internal::QueryFilter(n_subqs);
 478                     break;
 479                 case 6: // OP_SYNONYM
 480                     result = new Xapian::Internal::QuerySynonym(n_subqs);
 481                     break;
 482                 case 7: // OP_MAX
 483                     result = new Xapian::Internal::QueryMax(n_subqs);
 484                     break;
 485                 case 13: // OP_ELITE_SET
 486                     result = new Xapian::Internal::QueryEliteSet(n_subqs,
 487                                                                  parameter);
 488                     break;
 489                 case 14: // OP_NEAR
 490                     result = new Xapian::Internal::QueryNear(n_subqs,
 491                                                              parameter);
 492                     break;
 493                 case 15: // OP_PHRASE
 494                     result = new Xapian::Internal::QueryPhrase(n_subqs,
 495                                                                parameter);
 496                     break;
 497                 default:
 498                     // 8 to 12 are currently unused.
 499                     throw SerialisationError("Unknown multi-way branch Query operator");
 500             }
 501             do {
 502                 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
 503             } while (--n_subqs);
 504             result->done();
 505             return result;
 506         }
 507         case 2: case 3: { // Term
 508             // Term
 509             //
 510             // 01ccLLLL where:
 511             //   LLLL -> length (0 means encoded value follows)
 512             //   cc -> code:
 513             //     0: wqf = 0; pos = 0
 514             //     1: wqf = 1; pos = 0
 515             //     2: wqf = 1; pos -> encoded value follows
 516             //     3: wqf -> encoded value follows; pos -> encoded value follows
 517             size_t len = ch & 0x0f;
 518             if (len == 0) {
 519                 decode_length(p, end, len);
 520                 len += 16;
 521             }
 522             if (size_t(end - *p) < len)
 523                 throw SerialisationError("Not enough data");
 524             string term(*p, len);
 525             *p += len;
 526
 527             int code = ((ch >> 4) & 0x03);
 528
 529             Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
 530             if (code == 3)
 531                 decode_length(p, end, wqf);
 532
 533             Xapian::termpos pos = 0;
 534             if (code >= 2)
 535                 decode_length(p, end, pos);
 536
 537             return new Xapian::Internal::QueryTerm(term, wqf, pos);
 538         }
 539         case 1: {
 540             // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
 541             //
 542             // 001tssss where:
 543             //   ssss -> slot number (15 means encoded value follows)
 544             //   t -> op:
 545             //     0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
 546             //     1: OP_VALUE_GE
 547             Xapian::valueno slot = ch & 15;
 548             if (slot == 15) {
 549                 decode_length(p, end, slot);
 550                 slot += 15;
 551             }
 552             size_t len;
 553             decode_length_and_check(p, end, len);
 554             string begin(*p, len);
 555             *p += len;
 556             if (ch & 0x10) {
 557                 // OP_VALUE_GE
 558                 return new Xapian::Internal::QueryValueGE(slot, begin);
 559             }
 560
 561             // OP_VALUE_RANGE
 562             decode_length_and_check(p, end, len);
 563             string end_(*p, len);
 564             *p += len;
 565             if (begin.empty()) // FIXME: is this right?
 566                 return new Xapian::Internal::QueryValueLE(slot, end_);
 567             return new Xapian::Internal::QueryValueRange(slot, begin, end_);
 568         }
 569         case 0: {
 570             // Other operators
 571             //
 572             //   000ttttt where:
 573             //     ttttt -> encodes which OP_XXX
 574             switch (ch & 0x1f) {
 575                 case 0x00: // OP_INVALID
 576                     return new Xapian::Internal::QueryInvalid();
 577                 case 0x0b: { // Wildcard
 578                     if (*p == end)
 579                         throw SerialisationError("not enough data");
 580                     Xapian::termcount max_expansion;
 581                     decode_length(p, end, max_expansion);
 582                     if (end - *p < 2)
 583                         throw SerialisationError("not enough data");
 584                     int max_type = static_cast<unsigned char>(*(*p)++);
 585                     op combiner = static_cast<op>(*(*p)++);
 586                     size_t len;
 587                     decode_length_and_check(p, end, len);
 588                     string pattern(*p, len);
 589                     *p += len;
 590                     return new Xapian::Internal::QueryWildcard(pattern,
 591                                                                max_expansion,
 592                                                                max_type,
 593                                                                combiner);
 594                 }
 595                 case 0x0c: { // PostingSource
 596                     size_t len;
 597                     decode_length_and_check(p, end, len);
 598                     string name(*p, len);
 599                     *p += len;
 600
 601                     const PostingSource * reg_source = reg.get_posting_source(name);
 602                     if (!reg_source) {
 603                         string m = "PostingSource ";
 604                         m += name;
 605                         m += " not registered";
 606                         throw SerialisationError(m);
 607                     }
 608
 609                     decode_length_and_check(p, end, len);
 610                     PostingSource * source =
 611                         reg_source->unserialise_with_registry(string(*p, len),
 612                                                               reg);
 613                     *p += len;
 614                     return new Xapian::Internal::QueryPostingSource(source->release());
 615                 }
 616                 case 0x0d: {
 617                     using Xapian::Internal::QueryScaleWeight;
 618                     double scale_factor = unserialise_double(p, end);
 619                     return new QueryScaleWeight(scale_factor,
 620                                                 Query(unserialise(p, end, reg)));
 621                 }
 622                 case 0x0e: {
 623                     Xapian::termcount wqf;
 624                     Xapian::termpos pos;
 625                     decode_length(p, end, wqf);
 626                     decode_length(p, end, pos);
 627                     return new Xapian::Internal::QueryTerm(string(), wqf, pos);
 628                 }
 629                 case 0x0f:
 630                     return Query::MatchAll.internal.get();
 631                 default: // Others currently unused.
 632                     break;
 633             }
 634             break;
 635         }
 636     }
 637     string msg = "Unknown Query serialisation: ";
 638     msg += str(ch);
 639     throw SerialisationError(msg);
 640 }
 641
 642 void
 643 Query::Internal::postlist_sub_and_like(AndContext& ctx,
 644                                        QueryOptimiser * qopt,
 645                                        double factor) const
 646 {
 647     ctx.add_postlist(postlist(qopt, factor));
 648 }
 649
 650 void
 651 Query::Internal::postlist_sub_or_like(OrContext& ctx,
 652                                       QueryOptimiser * qopt,
 653                                       double factor) const
 654 {
 655     ctx.add_postlist(postlist(qopt, factor));
 656 }
 657
 658 void
 659 Query::Internal::postlist_sub_xor(XorContext& ctx,
 660                                   QueryOptimiser * qopt,
 661                                   double factor) const
 662 {
 663     ctx.add_postlist(postlist(qopt, factor));
 664 }
 665
 666 namespace Internal {
 667
 668 Query::op
 669 QueryTerm::get_type() const XAPIAN_NOEXCEPT
 670 {
 671     return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
 672 }
 673
 674 string
 675 QueryTerm::get_description() const
 676 {
 677     string desc;
 678     if (term.empty()) {
 679         desc = "<alldocuments>";
 680     } else {
 681         description_append(desc, term);
 682     }
 683     if (wqf != 1) {
 684         desc += '#';
 685         desc += str(wqf);
 686     }
 687     if (pos) {
 688         desc += '@';
 689         desc += str(pos);
 690     }
 691     return desc;
 692 }
 693
 694 QueryPostingSource::QueryPostingSource(PostingSource * source_)
 695     : source(source_)
 696 {
 697     if (!source_)
 698         throw Xapian::InvalidArgumentError("source parameter can't be NULL");
 699     if (source->_refs == 0) {
 700         // source_ isn't reference counted, so try to clone it.  If clone()
 701         // isn't implemented, just use the object provided and it's the
 702         // caller's responsibility to ensure it stays valid while in use.
 703         PostingSource * cloned_source = source->clone();
 704         if (cloned_source) source = cloned_source->release();
 705     }
 706 }
 707
 708 Query::op
 709 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
 710 {
 711     return Query::LEAF_POSTING_SOURCE;
 712 }
 713
 714 string
 715 QueryPostingSource::get_description() const
 716 {
 717     string desc = "PostingSource(";
 718     desc += source->get_description();
 719     desc += ')';
 720     return desc;
 721 }
 722
 723 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
 724     : scale_factor(factor), subquery(subquery_)
 725 {
 726     if (rare(scale_factor < 0.0))
 727         throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
 728 }
 729
 730 Query::op
 731 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
 732 {
 733     return Query::OP_SCALE_WEIGHT;
 734 }
 735
 736 size_t
 737 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
 738 {
 739     return 1;
 740 }
 741
 742 const Query
 743 QueryScaleWeight::get_subquery(size_t) const
 744 {
 745     return subquery;
 746 }
 747
 748 string
 749 QueryScaleWeight::get_description() const
 750 {
 751     Assert(subquery.internal.get());
 752     string desc = str(scale_factor);
 753     desc += " * ";
 754     desc += subquery.internal->get_description();
 755     return desc;
 756 }
 757
 758 PostingIterator::Internal *
 759 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
 760 {
 761     LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
 762     if (factor != 0.0)
 763         qopt->inc_total_subqs();
 764     RETURN(qopt->open_post_list(term, wqf, factor));
 765 }
 766
 767 PostingIterator::Internal *
 768 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
 769 {
 770     LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
 771     Assert(source.get());
 772     if (factor != 0.0)
 773         qopt->inc_total_subqs();
 774     // Casting away const on the Database::Internal here is OK, as we wrap
 775     // them in a const Xapian::Database so non-const methods can't actually
 776     // be called on the Database::Internal object.
 777     const Xapian::Database wrappeddb(
 778             const_cast<Xapian::Database::Internal*>(&(qopt->db)));
 779     RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
 780 }
 781
 782 PostingIterator::Internal *
 783 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
 784 {
 785     LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
 786     RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
 787 }
 788
 789 void
 790 QueryTerm::gather_terms(void * void_terms) const
 791 {
 792     // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
 793     if (!term.empty()) {
 794         vector<pair<Xapian::termpos, string> > &terms =
 795             *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
 796         terms.push_back(make_pair(pos, term));
 797     }
 798 }
 799
 800 PostingIterator::Internal *
 801 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
 802 {
 803     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
 804     if (factor != 0.0)
 805         qopt->inc_total_subqs();
 806     const Xapian::Database::Internal & db = qopt->db;
 807     const string & lb = db.get_value_lower_bound(slot);
 808     if (lb.empty()) {
 809         // This should only happen if there are no values in this slot (which
 810         // could be because the backend just doesn't support values at all).
 811         // If there were values in the slot, the backend should have a
 812         // non-empty lower bound, even if it isn't a tight one.
 813         AssertEq(db.get_value_freq(slot), 0);
 814         RETURN(new EmptyPostList);
 815     }
 816     if (end < lb) {
 817         RETURN(new EmptyPostList);
 818     }
 819     const string & ub = db.get_value_upper_bound(slot);
 820     if (begin > ub) {
 821         RETURN(new EmptyPostList);
 822     }
 823     if (end >= ub) {
 824         // If begin <= lb too, then the range check isn't needed, but we do
 825         // still need to consider which documents have a value set in this
 826         // slot.  If this value is set for all documents, we can replace it
 827         // with the MatchAll postlist, which is especially efficient if
 828         // there are no gaps in the docids.
 829         if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
 830             RETURN(db.open_post_list(string()));
 831         }
 832         RETURN(new ValueGePostList(&db, slot, begin));
 833     }
 834     RETURN(new ValueRangePostList(&db, slot, begin, end));
 835 }
 836
 837 void
 838 QueryValueRange::serialise(string & result) const
 839 {
 840     if (slot < 15) {
 841         result += static_cast<char>(0x20 | slot);
 842     } else {
 843         result += static_cast<char>(0x20 | 15);
 844         result += encode_length(slot - 15);
 845     }
 846     result += encode_length(begin.size());
 847     result += begin;
 848     result += encode_length(end.size());
 849     result += end;
 850 }
 851
 852 Query::op
 853 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
 854 {
 855     return Query::OP_VALUE_RANGE;
 856 }
 857
 858 string
 859 QueryValueRange::get_description() const
 860 {
 861     string desc = "VALUE_RANGE ";
 862     desc += str(slot);
 863     desc += ' ';
 864     description_append(desc, begin);
 865     desc += ' ';
 866     description_append(desc, end);
 867     return desc;
 868 }
 869
 870 PostingIterator::Internal *
 871 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
 872 {
 873     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
 874     if (factor != 0.0)
 875         qopt->inc_total_subqs();
 876     const Xapian::Database::Internal & db = qopt->db;
 877     const string & lb = db.get_value_lower_bound(slot);
 878     // If lb.empty(), the backend doesn't provide value bounds.
 879     if (!lb.empty()) {
 880         if (limit < lb) {
 881             RETURN(new EmptyPostList);
 882         }
 883         if (limit >= db.get_value_upper_bound(slot)) {
 884             // The range check isn't needed, but we do still need to consider
 885             // which documents have a value set in this slot.  If this value is
 886             // set for all documents, we can replace it with the MatchAll
 887             // postlist, which is especially efficient if there are no gaps in
 888             // the docids.
 889             if (db.get_value_freq(slot) == db.get_doccount()) {
 890                 RETURN(db.open_post_list(string()));
 891             }
 892         }
 893     }
 894     RETURN(new ValueRangePostList(&db, slot, string(), limit));
 895 }
 896
 897 void
 898 QueryValueLE::serialise(string & result) const
 899 {
 900     // Encode as a range with an empty start (which only takes a single byte to
 901     // encode).
 902     if (slot < 15) {
 903         result += static_cast<char>(0x20 | slot);
 904     } else {
 905         result += static_cast<char>(0x20 | 15);
 906         result += encode_length(slot - 15);
 907     }
 908     result += encode_length(0);
 909     result += encode_length(limit.size());
 910     result += limit;
 911 }
 912
 913 Query::op
 914 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
 915 {
 916     return Query::OP_VALUE_LE;
 917 }
 918
 919 string
 920 QueryValueLE::get_description() const
 921 {
 922     string desc = "VALUE_LE ";
 923     desc += str(slot);
 924     desc += ' ';
 925     description_append(desc, limit);
 926     return desc;
 927 }
 928
 929 PostingIterator::Internal *
 930 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
 931 {
 932     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
 933     if (factor != 0.0)
 934         qopt->inc_total_subqs();
 935     const Xapian::Database::Internal & db = qopt->db;
 936     const string & lb = db.get_value_lower_bound(slot);
 937     // If lb.empty(), the backend doesn't provide value bounds.
 938     if (!lb.empty()) {
 939         if (limit > db.get_value_upper_bound(slot)) {
 940             RETURN(new EmptyPostList);
 941         }
 942         if (limit < lb) {
 943             // The range check isn't needed, but we do still need to consider
 944             // which documents have a value set in this slot.  If this value is
 945             // set for all documents, we can replace it with the MatchAll
 946             // postlist, which is especially efficient if there are no gaps in
 947             // the docids.
 948             if (db.get_value_freq(slot) == db.get_doccount()) {
 949                 RETURN(db.open_post_list(string()));
 950             }
 951         }
 952     }
 953     RETURN(new ValueGePostList(&db, slot, limit));
 954 }
 955
 956 void
 957 QueryValueGE::serialise(string & result) const
 958 {
 959     if (slot < 15) {
 960         result += static_cast<char>(0x20 | 0x10 | slot);
 961     } else {
 962         result += static_cast<char>(0x20 | 0x10 | 15);
 963         result += encode_length(slot - 15);
 964     }
 965     result += encode_length(limit.size());
 966     result += limit;
 967 }
 968
 969 Query::op
 970 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
 971 {
 972     return Query::OP_VALUE_GE;
 973 }
 974
 975 string
 976 QueryValueGE::get_description() const
 977 {
 978     string desc = "VALUE_GE ";
 979     desc += str(slot);
 980     desc += ' ';
 981     description_append(desc, limit);
 982     return desc;
 983 }
 984
 985 PostingIterator::Internal *
 986 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
 987 {
 988     LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
 989     Query::op op = combiner;
 990     double or_factor = 0.0;
 991     if (factor == 0.0) {
 992         // If we have a factor of 0, we don't care about the weights, so
 993         // we're just like a normal OR query.
 994         op = Query::OP_OR;
 995     } else if (op != Query::OP_SYNONYM) {
 996         or_factor = factor;
 997     }
 998
 999     bool old_in_synonym = qopt->in_synonym;
1000     if (!old_in_synonym) {
1001         qopt->in_synonym = (op == Query::OP_SYNONYM);
1002     }
1003
1004     OrContext ctx(qopt, 0);
1005     unique_ptr<TermList> t(qopt->db.open_allterms(pattern));
1006     Xapian::termcount expansions_left = max_expansion;
1007     // If there's no expansion limit, set expansions_left to the maximum
1008     // value Xapian::termcount can hold.
1009     if (expansions_left == 0)
1010         --expansions_left;
1011     while (true) {
1012         t->next();
1013         if (t->at_end())
1014             break;
1015         if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1016             if (expansions_left-- == 0) {
1017                 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
1018                     break;
1019                 string msg("Wildcard ");
1020                 msg += pattern;
1021                 msg += "* expands to more than ";
1022                 msg += str(max_expansion);
1023                 msg += " terms";
1024                 throw Xapian::WildcardError(msg);
1025             }
1026         }
1027         const string & term = t->get_termname();
1028         ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
1029     }
1030
1031     if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1032         // FIXME: open_lazy_post_list() results in the term getting registered
1033         // for stats, so we still incur an avoidable cost from the full
1034         // expansion size of the wildcard, which is most likely to be visible
1035         // with the remote backend.  Perhaps we should split creating the lazy
1036         // postlist from registering the term for stats.
1037         if (ctx.size() > max_expansion)
1038             ctx.select_most_frequent(max_expansion);
1039     }
1040
1041     if (factor != 0.0) {
1042         if (op != Query::OP_SYNONYM) {
1043             qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1044         } else {
1045             qopt->inc_total_subqs();
1046         }
1047     }
1048
1049     qopt->in_synonym = old_in_synonym;
1050
1051     if (ctx.empty())
1052         RETURN(new EmptyPostList);
1053
1054     if (op == Query::OP_MAX)
1055         RETURN(ctx.postlist_max());
1056
1057     PostList * pl = ctx.postlist();
1058     if (op == Query::OP_OR)
1059         RETURN(pl);
1060
1061     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1062     // SynonymPostList, which supplies the weights.
1063     PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
1064     RETURN(r);
1065 }
1066
1067 termcount
1068 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1069 {
1070     // We currently assume wqf is 1 for calculating the synonym's weight
1071     // since conceptually the synonym is one "virtual" term.  If we were
1072     // to combine multiple occurrences of the same synonym expansion into
1073     // a single instance with wqf set, we would want to track the wqf.
1074     return 1;
1075 }
1076
1077 void
1078 QueryWildcard::serialise(string & result) const
1079 {
1080     result += static_cast<char>(0x0b);
1081     result += encode_length(max_expansion);
1082     result += static_cast<unsigned char>(max_type);
1083     result += static_cast<unsigned char>(combiner);
1084     result += encode_length(pattern.size());
1085     result += pattern;
1086 }
1087
1088 Query::op
1089 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1090 {
1091     return Query::OP_WILDCARD;
1092 }
1093
1094 string
1095 QueryWildcard::get_description() const
1096 {
1097     string desc = "WILDCARD ";
1098     switch (combiner) {
1099         case Query::OP_SYNONYM:
1100             desc += "SYNONYM ";
1101             break;
1102         case Query::OP_MAX:
1103             desc += "MAX ";
1104             break;
1105         case Query::OP_OR:
1106             desc += "OR ";
1107             break;
1108         default:
1109             desc += "BAD ";
1110             break;
1111     }
1112     description_append(desc, pattern);
1113     return desc;
1114 }
1115
1116 Xapian::termcount
1117 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1118 {
1119     // Sum results from all subqueries.
1120     Xapian::termcount result = 0;
1121     QueryVector::const_iterator i;
1122     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1123         // MatchNothing subqueries should have been removed by done(), but we
1124         // can't use Assert in a XAPIAN_NOEXCEPT function.  But we'll get a
1125         // segfault anyway.
1126         result += (*i).internal->get_length();
1127     }
1128     return result;
1129 }
1130
1131 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1132 #define MISC(X) static_cast<unsigned char>(X)
1133 void
1134 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1135 {
1136     static const unsigned char first_byte[] = {
1137         MULTIWAY(0),    // OP_AND
1138         MULTIWAY(1),    // OP_OR
1139         MULTIWAY(2),    // OP_AND_NOT
1140         MULTIWAY(3),    // OP_XOR
1141         MULTIWAY(4),    // OP_AND_MAYBE
1142         MULTIWAY(5),    // OP_FILTER
1143         MULTIWAY(14),   // OP_NEAR
1144         MULTIWAY(15),   // OP_PHRASE
1145         0,              // OP_VALUE_RANGE
1146         MISC(3),        // OP_SCALE_WEIGHT
1147         MULTIWAY(13),   // OP_ELITE_SET
1148         0,              // OP_VALUE_GE
1149         0,              // OP_VALUE_LE
1150         MULTIWAY(6),    // OP_SYNONYM
1151         MULTIWAY(7)     // OP_MAX
1152     };
1153     Xapian::Query::op op_ = get_op();
1154     AssertRel(size_t(op_),<,sizeof(first_byte));
1155     unsigned char ch = first_byte[op_];
1156     if (ch & 0x80) {
1157         // Multi-way operator.
1158         if (subqueries.size() < 8)
1159             ch |= subqueries.size();
1160         result += ch;
1161         if (subqueries.size() >= 8)
1162             result += encode_length(subqueries.size() - 8);
1163         if (ch >= MULTIWAY(13))
1164             result += encode_length(parameter);
1165     } else {
1166         result += ch;
1167     }
1168
1169     QueryVector::const_iterator i;
1170     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1171         // MatchNothing subqueries should have been removed by done().
1172         Assert((*i).internal.get());
1173         (*i).internal->serialise(result);
1174     }
1175
1176     // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1177     // appended next by an overloaded serialise() method in the subclass.
1178 }
1179
1180 void
1181 QueryBranch::serialise(string & result) const
1182 {
1183     QueryBranch::serialise_(result);
1184 }
1185
1186 void
1187 QueryNear::serialise(string & result) const
1188 {
1189     // FIXME: window - subqueries.size() ?
1190     QueryBranch::serialise_(result, window);
1191 }
1192
1193 void
1194 QueryPhrase::serialise(string & result) const
1195 {
1196     // FIXME: window - subqueries.size() ?
1197     QueryBranch::serialise_(result, window);
1198 }
1199
1200 void
1201 QueryEliteSet::serialise(string & result) const
1202 {
1203     // FIXME: set_size - subqueries.size() ?
1204     QueryBranch::serialise_(result, set_size);
1205 }
1206
1207 void
1208 QueryBranch::gather_terms(void * void_terms) const
1209 {
1210     // Gather results from all subqueries.
1211     QueryVector::const_iterator i;
1212     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1213         // MatchNothing subqueries should have been removed by done().
1214         Assert((*i).internal.get());
1215         (*i).internal->gather_terms(void_terms);
1216     }
1217 }
1218
1219 void
1220 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1221                         Xapian::termcount elite_set_size, size_t first) const
1222 {
1223     LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1224
1225     // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1226     // for AND-like operations.
1227
1228     // OP_SYNONYM with a single subquery is only simplified by
1229     // QuerySynonym::done() if the single subquery is a term or MatchAll.
1230     Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1231
1232     vector<PostList *> postlists;
1233     postlists.reserve(subqueries.size() - first);
1234
1235     QueryVector::const_iterator q;
1236     for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1237         // MatchNothing subqueries should have been removed by done().
1238         Assert((*q).internal.get());
1239         (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1240     }
1241
1242     if (elite_set_size && elite_set_size < subqueries.size()) {
1243         ctx.select_elite_set(elite_set_size, subqueries.size());
1244         // FIXME: not right!
1245     }
1246 }
1247
1248 PostList *
1249 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1250 {
1251     LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1252     OrContext ctx(qopt, subqueries.size());
1253     if (factor == 0.0) {
1254         // If we have a factor of 0, we don't care about the weights, so
1255         // we're just like a normal OR query.
1256         do_or_like(ctx, qopt, 0.0);
1257         return ctx.postlist();
1258     }
1259
1260     bool old_in_synonym = qopt->in_synonym;
1261     qopt->in_synonym = true;
1262     do_or_like(ctx, qopt, 0.0);
1263     PostList * pl = ctx.postlist();
1264     qopt->in_synonym = old_in_synonym;
1265
1266     // We currently assume wqf is 1 for calculating the synonym's weight
1267     // since conceptually the synonym is one "virtual" term.  If we were
1268     // to combine multiple occurrences of the same synonym expansion into
1269     // a single instance with wqf set, we would want to track the wqf.
1270
1271     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1272     // SynonymPostList, which supplies the weights.
1273     RETURN(qopt->make_synonym_postlist(pl, factor));
1274 }
1275
1276 PostList *
1277 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1278 {
1279     LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1280     OrContext ctx(qopt, subqueries.size());
1281     do_or_like(ctx, qopt, factor);
1282     if (factor == 0.0) {
1283         // If we have a factor of 0, we don't care about the weights, so
1284         // we're just like a normal OR query.
1285         RETURN(ctx.postlist());
1286     }
1287
1288     // We currently assume wqf is 1 for calculating the OP_MAX's weight
1289     // since conceptually the OP_MAX is one "virtual" term.  If we were
1290     // to combine multiple occurrences of the same OP_MAX expansion into
1291     // a single instance with wqf set, we would want to track the wqf.
1292     RETURN(ctx.postlist_max());
1293 }
1294
1295 Xapian::Query::op
1296 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1297 {
1298     return get_op();
1299 }
1300
1301 size_t
1302 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1303 {
1304     return subqueries.size();
1305 }
1306
1307 const Query
1308 QueryBranch::get_subquery(size_t n) const
1309 {
1310     return subqueries[n];
1311 }
1312
1313 const string
1314 QueryBranch::get_description_helper(const char * op,
1315                                     Xapian::termcount parameter) const
1316 {
1317     string desc = "(";
1318     QueryVector::const_iterator i;
1319     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1320         if (desc.size() > 1) {
1321             desc += op;
1322             if (parameter) {
1323                 desc += str(parameter);
1324                 desc += ' ';
1325             }
1326         }
1327         Assert((*i).internal.get());
1328         // MatchNothing subqueries should have been removed by done(), and we
1329         // shouldn't get called before done() is, since that happens at the
1330         // end of the Xapian::Query constructor.
1331         desc += (*i).internal->get_description();
1332     }
1333     desc += ')';
1334     return desc;
1335 }
1336
1337 Query::Internal *
1338 QueryWindowed::done()
1339 {
1340     // If window size not specified, default it.
1341     if (window == 0)
1342         window = subqueries.size();
1343     return QueryAndLike::done();
1344 }
1345
1346 void
1347 QueryScaleWeight::gather_terms(void * void_terms) const
1348 {
1349     subquery.internal->gather_terms(void_terms);
1350 }
1351
1352 void QueryTerm::serialise(string & result) const
1353 {
1354     size_t len = term.size();
1355     if (len == 0) {
1356         if (wqf == 1 && pos == 0) {
1357             // Query::MatchAll
1358             result += '\x0f';
1359         } else {
1360             // Weird mutant versions of MatchAll
1361             result += '\x0e';
1362             result += encode_length(wqf);
1363             result += encode_length(pos);
1364         }
1365     } else if (wqf == 1) {
1366         if (pos == 0) {
1367             // Single occurrence probabilistic term without position set.
1368             if (len >= 16) {
1369                 result += static_cast<char>(0x40 | 0x10);
1370                 result += encode_length(term.size() - 16);
1371             } else {
1372                 result += static_cast<char>(0x40 | 0x10 | len);
1373             }
1374             result += term;
1375         } else {
1376             // Single occurrence probabilistic term with position set.
1377             if (len >= 16) {
1378                 result += static_cast<char>(0x40 | 0x20);
1379                 result += encode_length(term.size() - 16);
1380             } else {
1381                 result += static_cast<char>(0x40 | 0x20 | len);
1382             }
1383             result += term;
1384             result += encode_length(pos);
1385         }
1386     } else if (wqf > 1 || pos > 0) {
1387         // General case.
1388         if (len >= 16) {
1389             result += static_cast<char>(0x40 | 0x30);
1390             result += encode_length(term.size() - 16);
1391         } else if (len) {
1392             result += static_cast<char>(0x40 | 0x30 | len);
1393         }
1394         result += term;
1395         result += encode_length(wqf);
1396         result += encode_length(pos);
1397     } else {
1398         // Typical boolean term.
1399         AssertEq(wqf, 0);
1400         AssertEq(pos, 0);
1401         if (len >= 16) {
1402             result += static_cast<char>(0x40);
1403             result += encode_length(term.size() - 16);
1404         } else {
1405             result += static_cast<char>(0x40 | len);
1406         }
1407         result += term;
1408     }
1409 }
1410
1411 void QueryPostingSource::serialise(string & result) const
1412 {
1413     result += static_cast<char>(0x0c);
1414
1415     const string & n = source->name();
1416     result += encode_length(n.size());
1417     result += n;
1418
1419     const string & s = source->serialise();
1420     result += encode_length(s.size());
1421     result += s;
1422 }
1423
1424 void QueryScaleWeight::serialise(string & result) const
1425 {
1426     Assert(subquery.internal.get());
1427     const string & s = serialise_double(scale_factor);
1428     result += '\x0d';
1429     result += s;
1430     subquery.internal->serialise(result);
1431 }
1432
1433 struct is_matchnothing {
1434     bool operator()(const Xapian::Query & q) const {
1435         return q.internal.get() == NULL;
1436     }
1437 };
1438
1439 void
1440 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1441 {
1442     // If the AndLike is already MatchNothing, do nothing.
1443     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1444         return;
1445     // If we're adding MatchNothing, discard any previous subqueries.
1446     if (subquery.internal.get() == NULL)
1447         subqueries.clear();
1448     subqueries.push_back(subquery);
1449 }
1450
1451 Query::Internal *
1452 QueryAndLike::done()
1453 {
1454     // Empty AndLike gives MatchNothing.
1455     if (subqueries.empty())
1456         return NULL;
1457     // We handle any subquery being MatchNothing in add_subquery() by leaving
1458     // a single MatchNothing subquery, and so this check results in AndLike
1459     // giving MatchNothing.
1460     if (subqueries.size() == 1)
1461         return subqueries[0].internal.get();
1462     return this;
1463 }
1464
1465 PostingIterator::Internal *
1466 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1467 {
1468     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1469     AndContext ctx(qopt, subqueries.size());
1470     postlist_sub_and_like(ctx, qopt, factor);
1471     RETURN(ctx.postlist());
1472 }
1473
1474 void
1475 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1476 {
1477     QueryVector::const_iterator i;
1478     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1479         // MatchNothing subqueries should have been removed by done().
1480         Assert((*i).internal.get());
1481         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1482     }
1483 }
1484
1485 void
1486 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1487 {
1488     // Drop any subqueries which are MatchNothing.
1489     if (subquery.internal.get() != NULL)
1490         subqueries.push_back(subquery);
1491 }
1492
1493 Query::Internal *
1494 QueryOrLike::done()
1495 {
1496     // An empty OrLike gives MatchNothing.  Note that add_subquery() drops any
1497     // subqueries which are MatchNothing.
1498     if (subqueries.empty())
1499         return NULL;
1500     if (subqueries.size() == 1)
1501         return subqueries[0].internal.get();
1502     return this;
1503 }
1504
1505 void
1506 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1507 {
1508     if (!subqueries.empty()) {
1509         // We're adding the 2nd or subsequent subquery, so this subquery is
1510         // negated.
1511         if (subqueries[0].internal.get() == NULL) {
1512             // The left side is already MatchNothing so drop any right side.
1513             //
1514             // MatchNothing AND_NOT X == MatchNothing
1515             return;
1516         }
1517         if (subquery.internal.get() == NULL) {
1518             // Drop MatchNothing on the right of AndNot.
1519             //
1520             // X AND_NOT MatchNothing == X
1521             return;
1522         }
1523         if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
1524             // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1525             // AndNot as no weight is taken from them.
1526             subqueries.push_back(subquery.get_subquery(0));
1527             // The Query constructor for OP_SCALE_WEIGHT constructor should
1528             // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1529             Assert(subquery.get_subquery(0).internal.get() != NULL);
1530             return;
1531         }
1532     }
1533     subqueries.push_back(subquery);
1534 }
1535
1536 Query::Internal *
1537 QueryAndNot::done()
1538 {
1539     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1540     // that leaves just the left subquery, return that.
1541     //
1542     // If left subquery is MatchNothing, then add_subquery() discards all right
1543     // subqueries, so this check also gives MatchNothing for this case.
1544     if (subqueries.size() == 1)
1545         return subqueries[0].internal.get();
1546     return this;
1547 }
1548
1549 void
1550 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1551 {
1552     // If the left side of AndMaybe is already MatchNothing, do nothing.
1553     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1554         return;
1555     // Drop any 2nd or subsequent subqueries which are MatchNothing.
1556     if (subquery.internal.get() != NULL || subqueries.empty())
1557         subqueries.push_back(subquery);
1558 }
1559
1560 Query::Internal *
1561 QueryAndMaybe::done()
1562 {
1563     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1564     // that leaves just the left subquery, return that.
1565     //
1566     // If left subquery is MatchNothing, then add_subquery() discards all right
1567     // subqueries, so this check also gives MatchNothing for this case.
1568     if (subqueries.size() == 1)
1569         return subqueries[0].internal.get();
1570     return this;
1571 }
1572
1573 PostingIterator::Internal *
1574 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1575 {
1576     LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1577     OrContext ctx(qopt, subqueries.size());
1578     do_or_like(ctx, qopt, factor);
1579     RETURN(ctx.postlist());
1580 }
1581
1582 void
1583 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1584 {
1585     do_or_like(ctx, qopt, factor);
1586 }
1587
1588 PostingIterator::Internal *
1589 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1590 {
1591     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1592     // FIXME: Combine and-like side with and-like stuff above.
1593     unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1594     OrContext ctx(qopt, subqueries.size() - 1);
1595     do_or_like(ctx, qopt, 0.0, 0, 1);
1596     unique_ptr<PostList> r(ctx.postlist());
1597     RETURN(new AndNotPostList(l.release(), r.release(),
1598                               qopt->matcher, qopt->db_size));
1599 }
1600
1601 PostingIterator::Internal *
1602 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1603 {
1604     LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1605     XorContext ctx(qopt, subqueries.size());
1606     postlist_sub_xor(ctx, qopt, factor);
1607     RETURN(ctx.postlist());
1608 }
1609
1610 void
1611 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1612 {
1613     QueryVector::const_iterator i;
1614     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1615         // MatchNothing subqueries should have been removed by done().
1616         Assert((*i).internal.get());
1617         (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1618     }
1619 }
1620
1621 PostingIterator::Internal *
1622 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1623 {
1624     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1625     // FIXME: Combine and-like side with and-like stuff above.
1626     unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1627     if (factor == 0.0) {
1628         // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1629         RETURN(l.release());
1630     }
1631     OrContext ctx(qopt, subqueries.size() - 1);
1632     do_or_like(ctx, qopt, factor, 0, 1);
1633     unique_ptr<PostList> r(ctx.postlist());
1634     RETURN(new AndMaybePostList(l.release(), r.release(),
1635                                 qopt->matcher, qopt->db_size));
1636 }
1637
1638 PostingIterator::Internal *
1639 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1640 {
1641     LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1642     // FIXME: Combine and-like stuff, like QueryOptimiser.
1643     AssertEq(subqueries.size(), 2);
1644     PostList * pls[2];
1645     unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1646     pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1647     pls[0] = l.release();
1648     RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1649 }
1650
1651 void
1652 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1653 {
1654     QueryVector::const_iterator i;
1655     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1656         // MatchNothing subqueries should have been removed by done().
1657         Assert((*i).internal.get());
1658         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1659         // Second and subsequent subqueries are unweighted.
1660         factor = 0.0;
1661     }
1662 }
1663
1664 void
1665 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1666 {
1667     if (!qopt->full_db_has_positions) {
1668         // No positional data anywhere, so just handle as AND.
1669         QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1670         return;
1671     }
1672
1673     if (!qopt->db.has_positions()) {
1674         // No positions in this subdatabase so this matches nothing, which
1675         // means the whole andcontext matches nothing.
1676         //
1677         // Bailing out here means we don't recurse deeper and that means we
1678         // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1679         // the phrase, but at least one shard will count them, and the matcher
1680         // takes the highest answer (since 1.4.6).
1681         ctx.shrink(0);
1682         return;
1683     }
1684
1685     bool old_need_positions = qopt->need_positions;
1686     qopt->need_positions = true;
1687
1688     QueryVector::const_iterator i;
1689     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1690         // MatchNothing subqueries should have been removed by done().
1691         Assert((*i).internal.get());
1692         bool is_term = ((*i).internal->get_type() == Query::LEAF_TERM);
1693         PostList* pl = (*i).internal->postlist(qopt, factor);
1694         if (!is_term)
1695             pl = new OrPosPostList(pl);
1696         ctx.add_postlist(pl);
1697     }
1698     // Record the positional filter to apply higher up the tree.
1699     ctx.add_pos_filter(op, subqueries.size(), window);
1700
1701     qopt->need_positions = old_need_positions;
1702 }
1703
1704 void
1705 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1706 {
1707     QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1708 }
1709
1710 void
1711 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1712 {
1713     QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1714 }
1715
1716 PostingIterator::Internal *
1717 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1718 {
1719     LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1720     OrContext ctx(qopt, subqueries.size());
1721     do_or_like(ctx, qopt, factor, set_size);
1722     RETURN(ctx.postlist());
1723 }
1724
1725 void
1726 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1727 {
1728     do_or_like(ctx, qopt, factor, set_size);
1729 }
1730
1731 PostingIterator::Internal *
1732 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1733 {
1734     LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1735     // Save and restore total_subqs so we only add one for the whole
1736     // OP_SYNONYM subquery (or none if we're not weighted).
1737     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1738     if (factor != 0.0)
1739         ++save_total_subqs;
1740     PostList * pl = do_synonym(qopt, factor);
1741     qopt->set_total_subqs(save_total_subqs);
1742     RETURN(pl);
1743 }
1744
1745 Query::Internal *
1746 QuerySynonym::done()
1747 {
1748     // An empty Synonym gives MatchNothing.  Note that add_subquery() drops any
1749     // subqueries which are MatchNothing.
1750     if (subqueries.empty())
1751         return NULL;
1752     // Synonym of a single subquery should only be simplified if that subquery
1753     // is a term (or MatchAll), or if it's also OP_SYNONYM.  Note that
1754     // MatchNothing subqueries are dropped, so we'd never get here with a
1755     // single MatchNothing subquery.
1756     if (subqueries.size() == 1) {
1757         Query::op sub_type = subqueries[0].get_type();
1758         if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1759             sub_type == Query::OP_SYNONYM) {
1760             return subqueries[0].internal.get();
1761         }
1762     }
1763     return this;
1764 }
1765
1766 PostingIterator::Internal *
1767 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1768 {
1769     LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1770     // Save and restore total_subqs so we only add one for the whole
1771     // OP_MAX subquery (or none if we're not weighted).
1772     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1773     if (factor != 0.0)
1774         ++save_total_subqs;
1775     PostList * pl = do_max(qopt, factor);
1776     qopt->set_total_subqs(save_total_subqs);
1777     RETURN(pl);
1778 }
1779
1780 Xapian::Query::op
1781 QueryAnd::get_op() const
1782 {
1783     return Xapian::Query::OP_AND;
1784 }
1785
1786 Xapian::Query::op
1787 QueryOr::get_op() const
1788 {
1789     return Xapian::Query::OP_OR;
1790 }
1791
1792 Xapian::Query::op
1793 QueryAndNot::get_op() const
1794 {
1795     return Xapian::Query::OP_AND_NOT;
1796 }
1797
1798 Xapian::Query::op
1799 QueryXor::get_op() const
1800 {
1801     return Xapian::Query::OP_XOR;
1802 }
1803
1804 Xapian::Query::op
1805 QueryAndMaybe::get_op() const
1806 {
1807     return Xapian::Query::OP_AND_MAYBE;
1808 }
1809
1810 Xapian::Query::op
1811 QueryFilter::get_op() const
1812 {
1813     return Xapian::Query::OP_FILTER;
1814 }
1815
1816 Xapian::Query::op
1817 QueryNear::get_op() const
1818 {
1819     return Xapian::Query::OP_NEAR;
1820 }
1821
1822 Xapian::Query::op
1823 QueryPhrase::get_op() const
1824 {
1825     return Xapian::Query::OP_PHRASE;
1826 }
1827
1828 Xapian::Query::op
1829 QueryEliteSet::get_op() const
1830 {
1831     return Xapian::Query::OP_ELITE_SET;
1832 }
1833
1834 Xapian::Query::op
1835 QuerySynonym::get_op() const
1836 {
1837     return Xapian::Query::OP_SYNONYM;
1838 }
1839
1840 Xapian::Query::op
1841 QueryMax::get_op() const
1842 {
1843     return Xapian::Query::OP_MAX;
1844 }
1845
1846 Xapian::Query::op
1847 QueryWildcard::get_op() const
1848 {
1849     return Xapian::Query::OP_WILDCARD;
1850 }
1851
1852 string
1853 QueryAnd::get_description() const
1854 {
1855     return get_description_helper(" AND ");
1856 }
1857
1858 string
1859 QueryOr::get_description() const
1860 {
1861     return get_description_helper(" OR ");
1862 }
1863
1864 string
1865 QueryAndNot::get_description() const
1866 {
1867     return get_description_helper(" AND_NOT ");
1868 }
1869
1870 string
1871 QueryXor::get_description() const
1872 {
1873     return get_description_helper(" XOR ");
1874 }
1875
1876 string
1877 QueryAndMaybe::get_description() const
1878 {
1879     return get_description_helper(" AND_MAYBE ");
1880 }
1881
1882 string
1883 QueryFilter::get_description() const
1884 {
1885     return get_description_helper(" FILTER ");
1886 }
1887
1888 string
1889 QueryNear::get_description() const
1890 {
1891     return get_description_helper(" NEAR ", window);
1892 }
1893
1894 string
1895 QueryPhrase::get_description() const
1896 {
1897     return get_description_helper(" PHRASE ", window);
1898 }
1899
1900 string
1901 QueryEliteSet::get_description() const
1902 {
1903     return get_description_helper(" ELITE_SET ", set_size);
1904 }
1905
1906 string
1907 QuerySynonym::get_description() const
1908 {
1909     if (subqueries.size() == 1) {
1910         string d = "(SYNONYM ";
1911         d += subqueries[0].internal->get_description();
1912         d += ")";
1913         return d;
1914     }
1915     return get_description_helper(" SYNONYM ");
1916 }
1917
1918 string
1919 QueryMax::get_description() const
1920 {
1921     return get_description_helper(" MAX ");
1922 }
1923
1924 Xapian::Query::op
1925 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1926 {
1927     return Xapian::Query::OP_INVALID;
1928 }
1929
1930 PostingIterator::Internal *
1931 QueryInvalid::postlist(QueryOptimiser *, double) const
1932 {
1933     throw Xapian::InvalidOperationError("Query is invalid");
1934 }
1935
1936 void
1937 QueryInvalid::serialise(std::string & result) const
1938 {
1939     result += static_cast<char>(0x00);
1940 }
1941
1942 string
1943 QueryInvalid::get_description() const
1944 {
1945     return "<INVALID>";
1946 }
1947
1948 }
1949 }