xapian-core/api/queryinternal.cc

   1 /** @file queryinternal.cc
   2  * @brief Xapian::Query internals
   3  */
   4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018 Olly Betts
   5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License as
   9  * published by the Free Software Foundation; either version 2 of the
  10  * License, or (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  20  */
  21
  22 #include <config.h>
  23
  24 #include "queryinternal.h"
  25
  26 #include "xapian/error.h"
  27 #include "xapian/postingsource.h"
  28 #include "xapian/query.h"
  29
  30 #include "heap.h"
  31 #include "leafpostlist.h"
  32 #include "matcher/andmaybepostlist.h"
  33 #include "matcher/andnotpostlist.h"
  34 #include "emptypostlist.h"
  35 #include "matcher/exactphrasepostlist.h"
  36 #include "matcher/externalpostlist.h"
  37 #include "matcher/maxpostlist.h"
  38 #include "matcher/multiandpostlist.h"
  39 #include "matcher/multixorpostlist.h"
  40 #include "matcher/nearpostlist.h"
  41 #include "matcher/orpospostlist.h"
  42 #include "matcher/orpostlist.h"
  43 #include "matcher/phrasepostlist.h"
  44 #include "matcher/queryoptimiser.h"
  45 #include "matcher/valuerangepostlist.h"
  46 #include "matcher/valuegepostlist.h"
  47 #include "net/length.h"
  48 #include "serialise-double.h"
  49 #include "stringutils.h"
  50 #include "termlist.h"
  51
  52 #include "debuglog.h"
  53 #include "omassert.h"
  54 #include "str.h"
  55 #include "unicode/description_append.h"
  56
  57 #include <algorithm>
  58 #include <functional>
  59 #include <list>
  60 #include <memory>
  61 #include <string>
  62 #include <unordered_set>
  63 #include <vector>
  64
  65 using namespace std;
  66
  67 template<class CLASS> struct delete_ptr {
  68     void operator()(CLASS *p) const { delete p; }
  69 };
  70
  71 using Xapian::Internal::AndContext;
  72 using Xapian::Internal::OrContext;
  73 using Xapian::Internal::XorContext;
  74
  75 namespace Xapian {
  76
  77 namespace Internal {
  78
  79 /** Class providing an operator which sorts postlists to select max or terms.
  80  *  This returns true if a has a (strictly) greater termweight than b.
  81  *
  82  *  Using this comparator will tend to result in multiple calls to
  83  *  recalc_maxweight() for each of the subqueries (we use it with nth_element
  84  *  which should be O(n)) - perhaps it'd be better to call recalc_maxweight()
  85  *  once and then sort on that.
  86  */
  87 struct CmpMaxOrTerms {
  88     /** Return true if and only if a has a strictly greater termweight than b. */
  89     bool operator()(PostList *a, PostList *b) {
  90 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
  91     defined(__mc68000__) || defined(__mc68010__) || \
  92     defined(__mc68020__) || defined(__mc68030__)
  93         // On some architectures, most common of which is x86, floating point
  94         // values are calculated and stored in registers with excess precision.
  95         // If the two recalc_maxweight() calls below return identical values in a
  96         // register, the excess precision may be dropped for one of them but
  97         // not the other (e.g. because the compiler saves the first calculated
  98         // weight to memory while calculating the second, then reloads it to
  99         // compare).  This leads to both a > b and b > a being true, which
 100         // violates the antisymmetry property of the strict weak ordering
 101         // required by nth_element().  This can have serious consequences (e.g.
 102         // segfaults).
 103         //
 104         // Note that m68k only has excess precision in earlier models - 68040
 105         // and later are OK:
 106         // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
 107         //
 108         // To avoid this, we store each result in a volatile double prior to
 109         // comparing them.  This means that the result of this test should
 110         // match that on other architectures with the same double format (which
 111         // is desirable), and actually has less overhead than rounding both
 112         // results to float (which is another approach which works).
 113         volatile double a_max_wt = a->recalc_maxweight();
 114         volatile double b_max_wt = b->recalc_maxweight();
 115         return a_max_wt > b_max_wt;
 116 #else
 117         return a->recalc_maxweight() > b->recalc_maxweight();
 118 #endif
 119     }
 120 };
 121
 122 /// Comparison functor which orders PostList* by descending get_termfreq_est().
 123 struct ComparePostListTermFreqAscending {
 124     /// Order by descending get_termfreq_est().
 125     bool operator()(const PostList *a, const PostList *b) const {
 126         return a->get_termfreq_est() > b->get_termfreq_est();
 127     }
 128 };
 129
 130 class Context {
 131   protected:
 132     QueryOptimiser* qopt;
 133
 134     vector<PostList*> pls;
 135
 136   public:
 137     Context(QueryOptimiser* qopt_, size_t reserve);
 138
 139     ~Context();
 140
 141     void add_postlist(PostList * pl) {
 142         pls.push_back(pl);
 143     }
 144
 145     bool empty() const {
 146         return pls.empty();
 147     }
 148
 149     size_t size() const {
 150         return pls.size();
 151     }
 152
 153     void shrink(size_t new_size);
 154 };
 155
 156 Context::Context(QueryOptimiser* qopt_, size_t reserve)
 157     : qopt(qopt_)
 158 {
 159     pls.reserve(reserve);
 160 }
 161
 162 void
 163 Context::shrink(size_t new_size)
 164 {
 165     AssertRel(new_size, <=, pls.size());
 166     if (new_size >= pls.size())
 167         return;
 168
 169     const PostList * hint_pl = qopt->get_hint_postlist();
 170     for (auto&& i = pls.begin() + new_size; i != pls.end(); ++i) {
 171         const PostList * pl = *i;
 172         if (rare(pl == hint_pl)) {
 173             // We were about to delete qopt's hint - instead tell qopt to take
 174             // ownership.
 175             qopt->take_hint_ownership();
 176             hint_pl = NULL;
 177         } else {
 178             delete pl;
 179         }
 180     }
 181     pls.resize(new_size);
 182 }
 183
 184 Context::~Context()
 185 {
 186     shrink(0);
 187 }
 188
 189 class OrContext : public Context {
 190   public:
 191     OrContext(QueryOptimiser* qopt_, size_t reserve)
 192         : Context(qopt_, reserve) { }
 193
 194     /// Select the best set_size postlists from the last out_of added.
 195     void select_elite_set(size_t set_size, size_t out_of);
 196
 197     /// Select the set_size postlists with the highest term frequency.
 198     void select_most_frequent(size_t set_size);
 199
 200     PostList * postlist();
 201     PostList * postlist_max();
 202 };
 203
 204 void
 205 OrContext::select_elite_set(size_t set_size, size_t out_of)
 206 {
 207     vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
 208     nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
 209     shrink(pls.size() - out_of + set_size);
 210 }
 211
 212 void
 213 OrContext::select_most_frequent(size_t set_size)
 214 {
 215     vector<PostList*>::iterator begin = pls.begin();
 216     nth_element(begin, begin + set_size - 1, pls.end(),
 217                 ComparePostListTermFreqAscending());
 218     shrink(set_size);
 219 }
 220
 221 PostList *
 222 OrContext::postlist()
 223 {
 224     Assert(!pls.empty());
 225
 226     if (pls.size() == 1) {
 227         PostList * pl = pls[0];
 228         pls.clear();
 229         return pl;
 230     }
 231
 232     // Make postlists into a heap so that the postlist with the greatest term
 233     // frequency is at the top of the heap.
 234     Heap::make(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 235
 236     // Now build a tree of binary OrPostList objects.
 237     //
 238     // The algorithm used to build the tree is like that used to build an
 239     // optimal Huffman coding tree.  If we called next() repeatedly, this
 240     // arrangement would minimise the number of method calls.  Generally we
 241     // don't actually do that, but this arrangement is still likely to be a
 242     // good one, and it does minimise the work in the worst case.
 243     while (true) {
 244         // We build the tree such that at each branch:
 245         //
 246         //   l.get_termfreq_est() >= r.get_termfreq_est()
 247         //
 248         // We do this so that the OrPostList class can be optimised assuming
 249         // that this is the case.
 250         PostList * r = pls.front();
 251         Heap::pop(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 252         pls.pop_back();
 253         PostList * pl;
 254         pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
 255
 256         if (pls.size() == 1) {
 257             pls.clear();
 258             return pl;
 259         }
 260
 261         pls[0] = pl;
 262         Heap::replace(pls.begin(), pls.end(),
 263                       ComparePostListTermFreqAscending());
 264     }
 265 }
 266
 267 PostList *
 268 OrContext::postlist_max()
 269 {
 270     Assert(!pls.empty());
 271
 272     if (pls.size() == 1) {
 273         PostList * pl = pls[0];
 274         pls.clear();
 275         return pl;
 276     }
 277
 278     // Sort the postlists so that the postlist with the greatest term frequency
 279     // is first.
 280     sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 281
 282     PostList * pl;
 283     pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
 284
 285     pls.clear();
 286     return pl;
 287 }
 288
 289 class XorContext : public Context {
 290   public:
 291     XorContext(QueryOptimiser* qopt_, size_t reserve)
 292         : Context(qopt_, reserve) { }
 293
 294     PostList * postlist();
 295 };
 296
 297 PostList *
 298 XorContext::postlist()
 299 {
 300     Xapian::doccount db_size = qopt->db_size;
 301     PostList * pl;
 302     pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
 303
 304     // Empty pls so our destructor doesn't delete them all!
 305     pls.clear();
 306     return pl;
 307 }
 308
 309 class AndContext : public Context {
 310     class PosFilter {
 311         Xapian::Query::op op_;
 312
 313         /// Start and end indices for the PostLists this positional filter uses.
 314         size_t begin, end;
 315
 316         Xapian::termcount window;
 317
 318       public:
 319         PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
 320                   Xapian::termcount window_)
 321             : op_(op__), begin(begin_), end(end_), window(window_) { }
 322
 323         PostList * postlist(PostList* pl,
 324                             const vector<PostList*>& pls,
 325                             PostListTree* pltree) const;
 326     };
 327
 328     list<PosFilter> pos_filters;
 329
 330   public:
 331     AndContext(QueryOptimiser* qopt_, size_t reserve)
 332         : Context(qopt_, reserve) { }
 333
 334     void add_pos_filter(Query::op op_,
 335                         size_t n_subqs,
 336                         Xapian::termcount window);
 337
 338     PostList * postlist();
 339 };
 340
 341 PostList *
 342 AndContext::PosFilter::postlist(PostList* pl,
 343                                 const vector<PostList*>& pls,
 344                                 PostListTree* pltree) const
 345 try {
 346     vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
 347     vector<PostList *>::const_iterator terms_end = pls.begin() + end;
 348
 349     if (op_ == Xapian::Query::OP_NEAR) {
 350         pl = new NearPostList(pl, window, terms_begin, terms_end, pltree);
 351     } else if (window == end - begin) {
 352         AssertEq(op_, Xapian::Query::OP_PHRASE);
 353         pl = new ExactPhrasePostList(pl, terms_begin, terms_end, pltree);
 354     } else {
 355         AssertEq(op_, Xapian::Query::OP_PHRASE);
 356         pl = new PhrasePostList(pl, window, terms_begin, terms_end, pltree);
 357     }
 358     return pl;
 359 } catch (...) {
 360     delete pl;
 361     throw;
 362 }
 363
 364 void
 365 AndContext::add_pos_filter(Query::op op_,
 366                            size_t n_subqs,
 367                            Xapian::termcount window)
 368 {
 369     Assert(n_subqs > 1);
 370     size_t end = pls.size();
 371     size_t begin = end - n_subqs;
 372     pos_filters.push_back(PosFilter(op_, begin, end, window));
 373 }
 374
 375 PostList *
 376 AndContext::postlist()
 377 {
 378     if (pls.empty()) {
 379         // This case only happens if this sub-database has no positional data
 380         // (but another sub-database does).
 381         Assert(pos_filters.empty());
 382         return new EmptyPostList;
 383     }
 384
 385     unique_ptr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
 386                                                  qopt->matcher, qopt->db_size));
 387
 388     // Sort the positional filters to try to apply them in an efficient order.
 389     // FIXME: We need to figure out what that is!  Try applying lowest cf/tf
 390     // first?
 391
 392     // Apply any positional filters.
 393     list<PosFilter>::const_iterator i;
 394     for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
 395         const PosFilter & filter = *i;
 396         pl.reset(filter.postlist(pl.release(), pls, qopt->matcher));
 397     }
 398
 399     // Empty pls so our destructor doesn't delete them all!
 400     pls.clear();
 401     return pl.release();
 402 }
 403
 404 }
 405
 406 Query::Internal::~Internal() { }
 407
 408 size_t
 409 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
 410 {
 411     return 0;
 412 }
 413
 414 const Query
 415 Query::Internal::get_subquery(size_t) const
 416 {
 417     throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
 418 }
 419
 420 Xapian::termcount
 421 Query::Internal::get_wqf() const
 422 {
 423     throw Xapian::InvalidArgumentError("get_wqf() not meaningful for this Query object");
 424 }
 425
 426 Xapian::termpos
 427 Query::Internal::get_pos() const
 428 {
 429     throw Xapian::InvalidArgumentError("get_pos() not meaningful for this Query object");
 430 }
 431
 432 void
 433 Query::Internal::gather_terms(void *) const
 434 {
 435 }
 436
 437 Xapian::termcount
 438 Query::Internal::get_length() const XAPIAN_NOEXCEPT
 439 {
 440     return 0;
 441 }
 442
 443 Query::Internal *
 444 Query::Internal::unserialise(const char ** p, const char * end,
 445                              const Registry & reg)
 446 {
 447     if (*p == end)
 448         return NULL;
 449     unsigned char ch = *(*p)++;
 450     switch (ch >> 5) {
 451         case 4: case 5: case 6: case 7: {
 452             // Multi-way branch
 453             //
 454             // 1ccccnnn where:
 455             //   nnn -> n_subqs (0 means encoded value follows)
 456             //   cccc -> code (which OP_XXX)
 457             size_t n_subqs = ch & 0x07;
 458             if (n_subqs == 0) {
 459                 decode_length(p, end, n_subqs);
 460                 n_subqs += 8;
 461             }
 462             unsigned char code = (ch >> 3) & 0x0f;
 463             Xapian::termcount parameter = 0;
 464             if (code >= 13)
 465                 decode_length(p, end, parameter);
 466             Xapian::Internal::QueryBranch * result;
 467             switch (code) {
 468                 case 0: // OP_AND
 469                     result = new Xapian::Internal::QueryAnd(n_subqs);
 470                     break;
 471                 case 1: // OP_OR
 472                     result = new Xapian::Internal::QueryOr(n_subqs);
 473                     break;
 474                 case 2: // OP_AND_NOT
 475                     result = new Xapian::Internal::QueryAndNot(n_subqs);
 476                     break;
 477                 case 3: // OP_XOR
 478                     result = new Xapian::Internal::QueryXor(n_subqs);
 479                     break;
 480                 case 4: // OP_AND_MAYBE
 481                     result = new Xapian::Internal::QueryAndMaybe(n_subqs);
 482                     break;
 483                 case 5: // OP_FILTER
 484                     result = new Xapian::Internal::QueryFilter(n_subqs);
 485                     break;
 486                 case 6: // OP_SYNONYM
 487                     result = new Xapian::Internal::QuerySynonym(n_subqs);
 488                     break;
 489                 case 7: // OP_MAX
 490                     result = new Xapian::Internal::QueryMax(n_subqs);
 491                     break;
 492                 case 13: // OP_ELITE_SET
 493                     result = new Xapian::Internal::QueryEliteSet(n_subqs,
 494                                                                  parameter);
 495                     break;
 496                 case 14: // OP_NEAR
 497                     result = new Xapian::Internal::QueryNear(n_subqs,
 498                                                              parameter);
 499                     break;
 500                 case 15: // OP_PHRASE
 501                     result = new Xapian::Internal::QueryPhrase(n_subqs,
 502                                                                parameter);
 503                     break;
 504                 default:
 505                     // 8 to 12 are currently unused.
 506                     throw SerialisationError("Unknown multi-way branch Query operator");
 507             }
 508             do {
 509                 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
 510             } while (--n_subqs);
 511             result->done();
 512             return result;
 513         }
 514         case 2: case 3: { // Term
 515             // Term
 516             //
 517             // 01ccLLLL where:
 518             //   LLLL -> length (0 means encoded value follows)
 519             //   cc -> code:
 520             //     0: wqf = 0; pos = 0
 521             //     1: wqf = 1; pos = 0
 522             //     2: wqf = 1; pos -> encoded value follows
 523             //     3: wqf -> encoded value follows; pos -> encoded value follows
 524             size_t len = ch & 0x0f;
 525             if (len == 0) {
 526                 decode_length(p, end, len);
 527                 len += 16;
 528             }
 529             if (size_t(end - *p) < len)
 530                 throw SerialisationError("Not enough data");
 531             string term(*p, len);
 532             *p += len;
 533
 534             int code = ((ch >> 4) & 0x03);
 535
 536             Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
 537             if (code == 3)
 538                 decode_length(p, end, wqf);
 539
 540             Xapian::termpos pos = 0;
 541             if (code >= 2)
 542                 decode_length(p, end, pos);
 543
 544             return new Xapian::Internal::QueryTerm(term, wqf, pos);
 545         }
 546         case 1: {
 547             // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
 548             //
 549             // 001tssss where:
 550             //   ssss -> slot number (15 means encoded value follows)
 551             //   t -> op:
 552             //     0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
 553             //     1: OP_VALUE_GE
 554             Xapian::valueno slot = ch & 15;
 555             if (slot == 15) {
 556                 decode_length(p, end, slot);
 557                 slot += 15;
 558             }
 559             size_t len;
 560             decode_length_and_check(p, end, len);
 561             string begin(*p, len);
 562             *p += len;
 563             if (ch & 0x10) {
 564                 // OP_VALUE_GE
 565                 return new Xapian::Internal::QueryValueGE(slot, begin);
 566             }
 567
 568             // OP_VALUE_RANGE
 569             decode_length_and_check(p, end, len);
 570             string end_(*p, len);
 571             *p += len;
 572             if (begin.empty()) // FIXME: is this right?
 573                 return new Xapian::Internal::QueryValueLE(slot, end_);
 574             return new Xapian::Internal::QueryValueRange(slot, begin, end_);
 575         }
 576         case 0: {
 577             // Other operators
 578             //
 579             //   000ttttt where:
 580             //     ttttt -> encodes which OP_XXX
 581             switch (ch & 0x1f) {
 582                 case 0x00: // OP_INVALID
 583                     return new Xapian::Internal::QueryInvalid();
 584                 case 0x0b: { // Wildcard
 585                     if (*p == end)
 586                         throw SerialisationError("not enough data");
 587                     Xapian::termcount max_expansion;
 588                     decode_length(p, end, max_expansion);
 589                     if (end - *p < 2)
 590                         throw SerialisationError("not enough data");
 591                     int max_type = static_cast<unsigned char>(*(*p)++);
 592                     op combiner = static_cast<op>(*(*p)++);
 593                     size_t len;
 594                     decode_length_and_check(p, end, len);
 595                     string pattern(*p, len);
 596                     *p += len;
 597                     return new Xapian::Internal::QueryWildcard(pattern,
 598                                                                max_expansion,
 599                                                                max_type,
 600                                                                combiner);
 601                 }
 602                 case 0x0c: { // PostingSource
 603                     size_t len;
 604                     decode_length_and_check(p, end, len);
 605                     string name(*p, len);
 606                     *p += len;
 607
 608                     const PostingSource * reg_source = reg.get_posting_source(name);
 609                     if (!reg_source) {
 610                         string m = "PostingSource ";
 611                         m += name;
 612                         m += " not registered";
 613                         throw SerialisationError(m);
 614                     }
 615
 616                     decode_length_and_check(p, end, len);
 617                     PostingSource * source =
 618                         reg_source->unserialise_with_registry(string(*p, len),
 619                                                               reg);
 620                     *p += len;
 621                     return new Xapian::Internal::QueryPostingSource(source->release());
 622                 }
 623                 case 0x0d: {
 624                     using Xapian::Internal::QueryScaleWeight;
 625                     double scale_factor = unserialise_double(p, end);
 626                     return new QueryScaleWeight(scale_factor,
 627                                                 Query(unserialise(p, end, reg)));
 628                 }
 629                 case 0x0e: {
 630                     Xapian::termcount wqf;
 631                     Xapian::termpos pos;
 632                     decode_length(p, end, wqf);
 633                     decode_length(p, end, pos);
 634                     return new Xapian::Internal::QueryTerm(string(), wqf, pos);
 635                 }
 636                 case 0x0f:
 637                     return new Xapian::Internal::QueryTerm();
 638                 default: // Others currently unused.
 639                     break;
 640             }
 641             break;
 642         }
 643     }
 644     string msg = "Unknown Query serialisation: ";
 645     msg += str(ch);
 646     throw SerialisationError(msg);
 647 }
 648
 649 void
 650 Query::Internal::postlist_sub_and_like(AndContext& ctx,
 651                                        QueryOptimiser * qopt,
 652                                        double factor) const
 653 {
 654     ctx.add_postlist(postlist(qopt, factor));
 655 }
 656
 657 void
 658 Query::Internal::postlist_sub_or_like(OrContext& ctx,
 659                                       QueryOptimiser * qopt,
 660                                       double factor) const
 661 {
 662     ctx.add_postlist(postlist(qopt, factor));
 663 }
 664
 665 void
 666 Query::Internal::postlist_sub_xor(XorContext& ctx,
 667                                   QueryOptimiser * qopt,
 668                                   double factor) const
 669 {
 670     ctx.add_postlist(postlist(qopt, factor));
 671 }
 672
 673 namespace Internal {
 674
 675 Query::op
 676 QueryTerm::get_type() const XAPIAN_NOEXCEPT
 677 {
 678     return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
 679 }
 680
 681 string
 682 QueryTerm::get_description() const
 683 {
 684     string desc;
 685     if (term.empty()) {
 686         desc = "<alldocuments>";
 687     } else {
 688         description_append(desc, term);
 689     }
 690     if (wqf != 1) {
 691         desc += '#';
 692         desc += str(wqf);
 693     }
 694     if (pos) {
 695         desc += '@';
 696         desc += str(pos);
 697     }
 698     return desc;
 699 }
 700
 701 QueryPostingSource::QueryPostingSource(PostingSource * source_)
 702     : source(source_)
 703 {
 704     if (!source_)
 705         throw Xapian::InvalidArgumentError("source parameter can't be NULL");
 706     if (source->_refs == 0) {
 707         // source_ isn't reference counted, so try to clone it.  If clone()
 708         // isn't implemented, just use the object provided and it's the
 709         // caller's responsibility to ensure it stays valid while in use.
 710         PostingSource * cloned_source = source->clone();
 711         if (cloned_source) source = cloned_source->release();
 712     }
 713 }
 714
 715 Query::op
 716 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
 717 {
 718     return Query::LEAF_POSTING_SOURCE;
 719 }
 720
 721 string
 722 QueryPostingSource::get_description() const
 723 {
 724     string desc = "PostingSource(";
 725     desc += source->get_description();
 726     desc += ')';
 727     return desc;
 728 }
 729
 730 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
 731     : scale_factor(factor), subquery(subquery_)
 732 {
 733     if (rare(scale_factor < 0.0))
 734         throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
 735 }
 736
 737 Query::op
 738 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
 739 {
 740     return Query::OP_SCALE_WEIGHT;
 741 }
 742
 743 size_t
 744 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
 745 {
 746     return 1;
 747 }
 748
 749 const Query
 750 QueryScaleWeight::get_subquery(size_t) const
 751 {
 752     return subquery;
 753 }
 754
 755 string
 756 QueryScaleWeight::get_description() const
 757 {
 758     Assert(subquery.internal.get());
 759     string desc = str(scale_factor);
 760     desc += " * ";
 761     desc += subquery.internal->get_description();
 762     return desc;
 763 }
 764
 765 PostList*
 766 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
 767 {
 768     LOGCALL(QUERY, PostList*, "QueryTerm::postlist", qopt | factor);
 769     if (factor != 0.0)
 770         qopt->inc_total_subqs();
 771     RETURN(qopt->open_post_list(term, wqf, factor));
 772 }
 773
 774 PostList*
 775 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
 776 {
 777     LOGCALL(QUERY, PostList*, "QueryPostingSource::postlist", qopt | factor);
 778     Assert(source.get());
 779     if (factor != 0.0)
 780         qopt->inc_total_subqs();
 781     // Casting away const on the Database::Internal here is OK, as we wrap
 782     // them in a const Xapian::Database so non-const methods can't actually
 783     // be called on the Database::Internal object.
 784     const Xapian::Database wrappeddb(
 785             const_cast<Xapian::Database::Internal*>(&(qopt->db)));
 786     RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
 787 }
 788
 789 PostList*
 790 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
 791 {
 792     LOGCALL(QUERY, PostList*, "QueryScaleWeight::postlist", qopt | factor);
 793     RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
 794 }
 795
 796 void
 797 QueryTerm::gather_terms(void * void_terms) const
 798 {
 799     // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
 800     if (!term.empty()) {
 801         vector<pair<Xapian::termpos, string>> &terms =
 802             *static_cast<vector<pair<Xapian::termpos, string>>*>(void_terms);
 803         terms.push_back(make_pair(pos, term));
 804     }
 805 }
 806
 807 PostList*
 808 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
 809 {
 810     LOGCALL(QUERY, PostList*, "QueryValueRange::postlist", qopt | factor);
 811     if (factor != 0.0)
 812         qopt->inc_total_subqs();
 813     const Xapian::Database::Internal & db = qopt->db;
 814     const string & lb = db.get_value_lower_bound(slot);
 815     if (lb.empty()) {
 816         // This should only happen if there are no values in this slot (which
 817         // could be because the backend just doesn't support values at all).
 818         // If there were values in the slot, the backend should have a
 819         // non-empty lower bound, even if it isn't a tight one.
 820         AssertEq(db.get_value_freq(slot), 0);
 821         RETURN(new EmptyPostList);
 822     }
 823     if (end < lb) {
 824         RETURN(new EmptyPostList);
 825     }
 826     const string & ub = db.get_value_upper_bound(slot);
 827     if (begin > ub) {
 828         RETURN(new EmptyPostList);
 829     }
 830     if (end >= ub) {
 831         // If begin <= lb too, then the range check isn't needed, but we do
 832         // still need to consider which documents have a value set in this
 833         // slot.  If this value is set for all documents, we can replace it
 834         // with the MatchAll postlist, which is especially efficient if
 835         // there are no gaps in the docids.
 836         if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
 837             RETURN(db.open_post_list(string()));
 838         }
 839         RETURN(new ValueGePostList(&db, slot, begin));
 840     }
 841     RETURN(new ValueRangePostList(&db, slot, begin, end));
 842 }
 843
 844 void
 845 QueryValueRange::serialise(string & result) const
 846 {
 847     if (slot < 15) {
 848         result += static_cast<char>(0x20 | slot);
 849     } else {
 850         result += static_cast<char>(0x20 | 15);
 851         result += encode_length(slot - 15);
 852     }
 853     result += encode_length(begin.size());
 854     result += begin;
 855     result += encode_length(end.size());
 856     result += end;
 857 }
 858
 859 Query::op
 860 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
 861 {
 862     return Query::OP_VALUE_RANGE;
 863 }
 864
 865 string
 866 QueryValueRange::get_description() const
 867 {
 868     string desc = "VALUE_RANGE ";
 869     desc += str(slot);
 870     desc += ' ';
 871     description_append(desc, begin);
 872     desc += ' ';
 873     description_append(desc, end);
 874     return desc;
 875 }
 876
 877 PostList*
 878 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
 879 {
 880     LOGCALL(QUERY, PostList*, "QueryValueLE::postlist", qopt | factor);
 881     if (factor != 0.0)
 882         qopt->inc_total_subqs();
 883     const Xapian::Database::Internal & db = qopt->db;
 884     const string & lb = db.get_value_lower_bound(slot);
 885     // If lb.empty(), the backend doesn't provide value bounds.
 886     if (!lb.empty()) {
 887         if (limit < lb) {
 888             RETURN(new EmptyPostList);
 889         }
 890         if (limit >= db.get_value_upper_bound(slot)) {
 891             // The range check isn't needed, but we do still need to consider
 892             // which documents have a value set in this slot.  If this value is
 893             // set for all documents, we can replace it with the MatchAll
 894             // postlist, which is especially efficient if there are no gaps in
 895             // the docids.
 896             if (db.get_value_freq(slot) == db.get_doccount()) {
 897                 RETURN(db.open_post_list(string()));
 898             }
 899         }
 900     }
 901     RETURN(new ValueRangePostList(&db, slot, string(), limit));
 902 }
 903
 904 void
 905 QueryValueLE::serialise(string & result) const
 906 {
 907     // Encode as a range with an empty start (which only takes a single byte to
 908     // encode).
 909     if (slot < 15) {
 910         result += static_cast<char>(0x20 | slot);
 911     } else {
 912         result += static_cast<char>(0x20 | 15);
 913         result += encode_length(slot - 15);
 914     }
 915     result += encode_length(0);
 916     result += encode_length(limit.size());
 917     result += limit;
 918 }
 919
 920 Query::op
 921 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
 922 {
 923     return Query::OP_VALUE_LE;
 924 }
 925
 926 string
 927 QueryValueLE::get_description() const
 928 {
 929     string desc = "VALUE_LE ";
 930     desc += str(slot);
 931     desc += ' ';
 932     description_append(desc, limit);
 933     return desc;
 934 }
 935
 936 PostList*
 937 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
 938 {
 939     LOGCALL(QUERY, PostList*, "QueryValueGE::postlist", qopt | factor);
 940     if (factor != 0.0)
 941         qopt->inc_total_subqs();
 942     const Xapian::Database::Internal & db = qopt->db;
 943     const string & lb = db.get_value_lower_bound(slot);
 944     // If lb.empty(), the backend doesn't provide value bounds.
 945     if (!lb.empty()) {
 946         if (limit > db.get_value_upper_bound(slot)) {
 947             RETURN(new EmptyPostList);
 948         }
 949         if (limit < lb) {
 950             // The range check isn't needed, but we do still need to consider
 951             // which documents have a value set in this slot.  If this value is
 952             // set for all documents, we can replace it with the MatchAll
 953             // postlist, which is especially efficient if there are no gaps in
 954             // the docids.
 955             if (db.get_value_freq(slot) == db.get_doccount()) {
 956                 RETURN(db.open_post_list(string()));
 957             }
 958         }
 959     }
 960     RETURN(new ValueGePostList(&db, slot, limit));
 961 }
 962
 963 void
 964 QueryValueGE::serialise(string & result) const
 965 {
 966     if (slot < 15) {
 967         result += static_cast<char>(0x20 | 0x10 | slot);
 968     } else {
 969         result += static_cast<char>(0x20 | 0x10 | 15);
 970         result += encode_length(slot - 15);
 971     }
 972     result += encode_length(limit.size());
 973     result += limit;
 974 }
 975
 976 Query::op
 977 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
 978 {
 979     return Query::OP_VALUE_GE;
 980 }
 981
 982 string
 983 QueryValueGE::get_description() const
 984 {
 985     string desc = "VALUE_GE ";
 986     desc += str(slot);
 987     desc += ' ';
 988     description_append(desc, limit);
 989     return desc;
 990 }
 991
 992 PostList*
 993 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
 994 {
 995     LOGCALL(QUERY, PostList*, "QueryWildcard::postlist", qopt | factor);
 996     Query::op op = combiner;
 997     double or_factor = 0.0;
 998     if (factor == 0.0) {
 999         // If we have a factor of 0, we don't care about the weights, so
1000         // we're just like a normal OR query.
1001         op = Query::OP_OR;
1002     } else if (op != Query::OP_SYNONYM) {
1003         or_factor = factor;
1004     }
1005
1006     bool old_in_synonym = qopt->in_synonym;
1007     if (!old_in_synonym) {
1008         qopt->in_synonym = (op == Query::OP_SYNONYM);
1009     }
1010
1011     OrContext ctx(qopt, 0);
1012     unique_ptr<TermList> t(qopt->db.open_allterms(pattern));
1013     Xapian::termcount expansions_left = max_expansion;
1014     // If there's no expansion limit, set expansions_left to the maximum
1015     // value Xapian::termcount can hold.
1016     if (expansions_left == 0)
1017         --expansions_left;
1018     while (true) {
1019         t->next();
1020         if (t->at_end())
1021             break;
1022         if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1023             if (expansions_left-- == 0) {
1024                 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
1025                     break;
1026                 string msg("Wildcard ");
1027                 msg += pattern;
1028                 msg += "* expands to more than ";
1029                 msg += str(max_expansion);
1030                 msg += " terms";
1031                 throw Xapian::WildcardError(msg);
1032             }
1033         }
1034         const string & term = t->get_termname();
1035         ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
1036     }
1037
1038     if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
1039         // FIXME: open_lazy_post_list() results in the term getting registered
1040         // for stats, so we still incur an avoidable cost from the full
1041         // expansion size of the wildcard, which is most likely to be visible
1042         // with the remote backend.  Perhaps we should split creating the lazy
1043         // postlist from registering the term for stats.
1044         if (ctx.size() > max_expansion)
1045             ctx.select_most_frequent(max_expansion);
1046     }
1047
1048     if (factor != 0.0) {
1049         if (op != Query::OP_SYNONYM) {
1050             qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1051         } else {
1052             qopt->inc_total_subqs();
1053         }
1054     }
1055
1056     qopt->in_synonym = old_in_synonym;
1057
1058     if (ctx.empty())
1059         RETURN(new EmptyPostList);
1060
1061     if (op == Query::OP_MAX)
1062         RETURN(ctx.postlist_max());
1063
1064     PostList * pl = ctx.postlist();
1065     if (op == Query::OP_OR)
1066         RETURN(pl);
1067
1068     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1069     // SynonymPostList, which supplies the weights.
1070     //
1071     // We know the subqueries from a wildcard expansion are wdf-disjoint
1072     // (i.e. each wdf from the document contributes at most itself to the
1073     // wdf of the subquery).
1074     RETURN(qopt->make_synonym_postlist(pl, factor, true));
1075 }
1076
1077 termcount
1078 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1079 {
1080     // We currently assume wqf is 1 for calculating the synonym's weight
1081     // since conceptually the synonym is one "virtual" term.  If we were
1082     // to combine multiple occurrences of the same synonym expansion into
1083     // a single instance with wqf set, we would want to track the wqf.
1084     return 1;
1085 }
1086
1087 void
1088 QueryWildcard::serialise(string & result) const
1089 {
1090     result += static_cast<char>(0x0b);
1091     result += encode_length(max_expansion);
1092     result += static_cast<unsigned char>(max_type);
1093     result += static_cast<unsigned char>(combiner);
1094     result += encode_length(pattern.size());
1095     result += pattern;
1096 }
1097
1098 Query::op
1099 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1100 {
1101     return Query::OP_WILDCARD;
1102 }
1103
1104 string
1105 QueryWildcard::get_description() const
1106 {
1107     string desc = "WILDCARD ";
1108     switch (combiner) {
1109         case Query::OP_SYNONYM:
1110             desc += "SYNONYM ";
1111             break;
1112         case Query::OP_MAX:
1113             desc += "MAX ";
1114             break;
1115         case Query::OP_OR:
1116             desc += "OR ";
1117             break;
1118         default:
1119             desc += "BAD ";
1120             break;
1121     }
1122     description_append(desc, pattern);
1123     return desc;
1124 }
1125
1126 Xapian::termcount
1127 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1128 {
1129     // Sum results from all subqueries.
1130     Xapian::termcount result = 0;
1131     QueryVector::const_iterator i;
1132     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1133         // MatchNothing subqueries should have been removed by done(), but we
1134         // can't use Assert in a XAPIAN_NOEXCEPT function.  But we'll get a
1135         // segfault anyway.
1136         result += (*i).internal->get_length();
1137     }
1138     return result;
1139 }
1140
1141 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1142 #define MISC(X) static_cast<unsigned char>(X)
1143 void
1144 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1145 {
1146     static const unsigned char first_byte[] = {
1147         MULTIWAY(0),    // OP_AND
1148         MULTIWAY(1),    // OP_OR
1149         MULTIWAY(2),    // OP_AND_NOT
1150         MULTIWAY(3),    // OP_XOR
1151         MULTIWAY(4),    // OP_AND_MAYBE
1152         MULTIWAY(5),    // OP_FILTER
1153         MULTIWAY(14),   // OP_NEAR
1154         MULTIWAY(15),   // OP_PHRASE
1155         0,              // OP_VALUE_RANGE
1156         MISC(3),        // OP_SCALE_WEIGHT
1157         MULTIWAY(13),   // OP_ELITE_SET
1158         0,              // OP_VALUE_GE
1159         0,              // OP_VALUE_LE
1160         MULTIWAY(6),    // OP_SYNONYM
1161         MULTIWAY(7)     // OP_MAX
1162     };
1163     Xapian::Query::op op_ = get_op();
1164     AssertRel(size_t(op_),<,sizeof(first_byte));
1165     unsigned char ch = first_byte[op_];
1166     if (ch & 0x80) {
1167         // Multi-way operator.
1168         if (subqueries.size() < 8)
1169             ch |= subqueries.size();
1170         result += ch;
1171         if (subqueries.size() >= 8)
1172             result += encode_length(subqueries.size() - 8);
1173         if (ch >= MULTIWAY(13))
1174             result += encode_length(parameter);
1175     } else {
1176         result += ch;
1177     }
1178
1179     QueryVector::const_iterator i;
1180     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1181         // MatchNothing subqueries should have been removed by done().
1182         Assert((*i).internal.get());
1183         (*i).internal->serialise(result);
1184     }
1185
1186     // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1187     // appended next by an overloaded serialise() method in the subclass.
1188 }
1189
1190 void
1191 QueryBranch::serialise(string & result) const
1192 {
1193     QueryBranch::serialise_(result);
1194 }
1195
1196 void
1197 QueryNear::serialise(string & result) const
1198 {
1199     // FIXME: window - subqueries.size() ?
1200     QueryBranch::serialise_(result, window);
1201 }
1202
1203 void
1204 QueryPhrase::serialise(string & result) const
1205 {
1206     // FIXME: window - subqueries.size() ?
1207     QueryBranch::serialise_(result, window);
1208 }
1209
1210 void
1211 QueryEliteSet::serialise(string & result) const
1212 {
1213     // FIXME: set_size - subqueries.size() ?
1214     QueryBranch::serialise_(result, set_size);
1215 }
1216
1217 void
1218 QueryBranch::gather_terms(void * void_terms) const
1219 {
1220     // Gather results from all subqueries.
1221     QueryVector::const_iterator i;
1222     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1223         // MatchNothing subqueries should have been removed by done().
1224         Assert((*i).internal.get());
1225         (*i).internal->gather_terms(void_terms);
1226     }
1227 }
1228
1229 void
1230 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1231                         Xapian::termcount elite_set_size, size_t first) const
1232 {
1233     LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1234
1235     // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1236     // for AND-like operations.
1237
1238     // OP_SYNONYM with a single subquery is only simplified by
1239     // QuerySynonym::done() if the single subquery is a term or MatchAll.
1240     Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1241
1242     vector<PostList *> postlists;
1243     postlists.reserve(subqueries.size() - first);
1244
1245     QueryVector::const_iterator q;
1246     for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1247         // MatchNothing subqueries should have been removed by done().
1248         Assert((*q).internal.get());
1249         (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1250     }
1251
1252     if (elite_set_size && elite_set_size < subqueries.size()) {
1253         ctx.select_elite_set(elite_set_size, subqueries.size());
1254         // FIXME: not right!
1255     }
1256 }
1257
1258 PostList *
1259 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1260 {
1261     LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1262     OrContext ctx(qopt, subqueries.size());
1263     if (factor == 0.0) {
1264         // If we have a factor of 0, we don't care about the weights, so
1265         // we're just like a normal OR query.
1266         do_or_like(ctx, qopt, 0.0);
1267         return ctx.postlist();
1268     }
1269
1270     bool old_in_synonym = qopt->in_synonym;
1271     qopt->in_synonym = true;
1272     do_or_like(ctx, qopt, 0.0);
1273     PostList * pl = ctx.postlist();
1274     qopt->in_synonym = old_in_synonym;
1275
1276     bool wdf_disjoint = false;
1277     Assert(!subqueries.empty());
1278     auto type = subqueries.front().get_type();
1279     if (type == Query::OP_WILDCARD) {
1280         // Detect common easy case where all subqueries are OP_WILDCARD whose
1281         // constant prefixes form a prefix-free set.
1282         wdf_disjoint = true;
1283         vector<string> prefixes;
1284         for (auto&& q : subqueries) {
1285             if (q.get_type() != Query::OP_WILDCARD) {
1286                 wdf_disjoint = false;
1287                 break;
1288             }
1289             auto qw = static_cast<const QueryWildcard*>(q.internal.get());
1290             prefixes.push_back(qw->get_pattern());
1291         }
1292
1293         if (wdf_disjoint) {
1294             sort(prefixes.begin(), prefixes.end());
1295             const string* prev = nullptr;
1296             for (const auto& i : prefixes) {
1297                 if (prev) {
1298                     if (startswith(i, *prev)) {
1299                         wdf_disjoint = false;
1300                         break;
1301                     }
1302                 }
1303                 prev = &i;
1304             }
1305         }
1306     } else if (type == Query::LEAF_TERM) {
1307         // Detect common easy case where all subqueries are terms, none of
1308         // which are the same.
1309         wdf_disjoint = true;
1310         unordered_set<string> terms;
1311         for (auto&& q : subqueries) {
1312             if (q.get_type() != Query::LEAF_TERM) {
1313                 wdf_disjoint = false;
1314                 break;
1315             }
1316             auto qt = static_cast<const QueryTerm*>(q.internal.get());
1317             if (!terms.insert(qt->get_term()).second) {
1318                 wdf_disjoint = false;
1319                 break;
1320             }
1321         }
1322     }
1323
1324     // We currently assume wqf is 1 for calculating the synonym's weight
1325     // since conceptually the synonym is one "virtual" term.  If we were
1326     // to combine multiple occurrences of the same synonym expansion into
1327     // a single instance with wqf set, we would want to track the wqf.
1328
1329     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1330     // SynonymPostList, which supplies the weights.
1331     RETURN(qopt->make_synonym_postlist(pl, factor, wdf_disjoint));
1332 }
1333
1334 PostList *
1335 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1336 {
1337     LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1338     OrContext ctx(qopt, subqueries.size());
1339     do_or_like(ctx, qopt, factor);
1340     if (factor == 0.0) {
1341         // If we have a factor of 0, we don't care about the weights, so
1342         // we're just like a normal OR query.
1343         RETURN(ctx.postlist());
1344     }
1345
1346     // We currently assume wqf is 1 for calculating the OP_MAX's weight
1347     // since conceptually the OP_MAX is one "virtual" term.  If we were
1348     // to combine multiple occurrences of the same OP_MAX expansion into
1349     // a single instance with wqf set, we would want to track the wqf.
1350     RETURN(ctx.postlist_max());
1351 }
1352
1353 Xapian::Query::op
1354 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1355 {
1356     return get_op();
1357 }
1358
1359 size_t
1360 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1361 {
1362     return subqueries.size();
1363 }
1364
1365 const Query
1366 QueryBranch::get_subquery(size_t n) const
1367 {
1368     return subqueries[n];
1369 }
1370
1371 const string
1372 QueryBranch::get_description_helper(const char * op,
1373                                     Xapian::termcount parameter) const
1374 {
1375     string desc = "(";
1376     QueryVector::const_iterator i;
1377     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1378         if (desc.size() > 1) {
1379             desc += op;
1380             if (parameter) {
1381                 desc += str(parameter);
1382                 desc += ' ';
1383             }
1384         }
1385         Assert((*i).internal.get());
1386         // MatchNothing subqueries should have been removed by done(), and we
1387         // shouldn't get called before done() is, since that happens at the
1388         // end of the Xapian::Query constructor.
1389         desc += (*i).internal->get_description();
1390     }
1391     desc += ')';
1392     return desc;
1393 }
1394
1395 Query::Internal *
1396 QueryWindowed::done()
1397 {
1398     // If window size not specified, default it.
1399     if (window == 0)
1400         window = subqueries.size();
1401     return QueryAndLike::done();
1402 }
1403
1404 void
1405 QueryScaleWeight::gather_terms(void * void_terms) const
1406 {
1407     subquery.internal->gather_terms(void_terms);
1408 }
1409
1410 void QueryTerm::serialise(string & result) const
1411 {
1412     size_t len = term.size();
1413     if (len == 0) {
1414         if (wqf == 1 && pos == 0) {
1415             // Query::MatchAll
1416             result += '\x0f';
1417         } else {
1418             // Weird mutant versions of MatchAll
1419             result += '\x0e';
1420             result += encode_length(wqf);
1421             result += encode_length(pos);
1422         }
1423     } else if (wqf == 1) {
1424         if (pos == 0) {
1425             // Single occurrence free-text term without position set.
1426             if (len >= 16) {
1427                 result += static_cast<char>(0x40 | 0x10);
1428                 result += encode_length(term.size() - 16);
1429             } else {
1430                 result += static_cast<char>(0x40 | 0x10 | len);
1431             }
1432             result += term;
1433         } else {
1434             // Single occurrence free-text term with position set.
1435             if (len >= 16) {
1436                 result += static_cast<char>(0x40 | 0x20);
1437                 result += encode_length(term.size() - 16);
1438             } else {
1439                 result += static_cast<char>(0x40 | 0x20 | len);
1440             }
1441             result += term;
1442             result += encode_length(pos);
1443         }
1444     } else if (wqf > 1 || pos > 0) {
1445         // General case.
1446         if (len >= 16) {
1447             result += static_cast<char>(0x40 | 0x30);
1448             result += encode_length(term.size() - 16);
1449         } else if (len) {
1450             result += static_cast<char>(0x40 | 0x30 | len);
1451         }
1452         result += term;
1453         result += encode_length(wqf);
1454         result += encode_length(pos);
1455     } else {
1456         // Typical boolean term.
1457         AssertEq(wqf, 0);
1458         AssertEq(pos, 0);
1459         if (len >= 16) {
1460             result += static_cast<char>(0x40);
1461             result += encode_length(term.size() - 16);
1462         } else {
1463             result += static_cast<char>(0x40 | len);
1464         }
1465         result += term;
1466     }
1467 }
1468
1469 void QueryPostingSource::serialise(string & result) const
1470 {
1471     result += static_cast<char>(0x0c);
1472
1473     const string & n = source->name();
1474     result += encode_length(n.size());
1475     result += n;
1476
1477     const string & s = source->serialise();
1478     result += encode_length(s.size());
1479     result += s;
1480 }
1481
1482 void QueryScaleWeight::serialise(string & result) const
1483 {
1484     Assert(subquery.internal.get());
1485     const string & s = serialise_double(scale_factor);
1486     result += '\x0d';
1487     result += s;
1488     subquery.internal->serialise(result);
1489 }
1490
1491 struct is_matchnothing {
1492     bool operator()(const Xapian::Query & q) const {
1493         return q.internal.get() == NULL;
1494     }
1495 };
1496
1497 void
1498 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1499 {
1500     // If the AndLike is already MatchNothing, do nothing.
1501     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1502         return;
1503     // If we're adding MatchNothing, discard any previous subqueries.
1504     if (subquery.internal.get() == NULL)
1505         subqueries.clear();
1506     subqueries.push_back(subquery);
1507 }
1508
1509 Query::Internal *
1510 QueryAndLike::done()
1511 {
1512     // Empty AndLike gives MatchNothing.
1513     if (subqueries.empty())
1514         return NULL;
1515     // We handle any subquery being MatchNothing in add_subquery() by leaving
1516     // a single MatchNothing subquery, and so this check results in AndLike
1517     // giving MatchNothing.
1518     if (subqueries.size() == 1)
1519         return subqueries[0].internal.get();
1520     return this;
1521 }
1522
1523 PostList*
1524 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1525 {
1526     LOGCALL(QUERY, PostList*, "QueryAndLike::postlist", qopt | factor);
1527     AndContext ctx(qopt, subqueries.size());
1528     postlist_sub_and_like(ctx, qopt, factor);
1529     RETURN(ctx.postlist());
1530 }
1531
1532 void
1533 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1534 {
1535     QueryVector::const_iterator i;
1536     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1537         // MatchNothing subqueries should have been removed by done().
1538         Assert((*i).internal.get());
1539         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1540     }
1541 }
1542
1543 void
1544 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1545 {
1546     // Drop any subqueries which are MatchNothing.
1547     if (subquery.internal.get() != NULL)
1548         subqueries.push_back(subquery);
1549 }
1550
1551 Query::Internal *
1552 QueryOrLike::done()
1553 {
1554     // An empty OrLike gives MatchNothing.  Note that add_subquery() drops any
1555     // subqueries which are MatchNothing.
1556     if (subqueries.empty())
1557         return NULL;
1558     if (subqueries.size() == 1)
1559         return subqueries[0].internal.get();
1560     return this;
1561 }
1562
1563 void
1564 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1565 {
1566     if (!subqueries.empty()) {
1567         // We're adding the 2nd or subsequent subquery, so this subquery is
1568         // negated.
1569         if (subqueries[0].internal.get() == NULL) {
1570             // The left side is already MatchNothing so drop any right side.
1571             //
1572             // MatchNothing AND_NOT X == MatchNothing
1573             return;
1574         }
1575         if (subquery.internal.get() == NULL) {
1576             // Drop MatchNothing on the right of AndNot.
1577             //
1578             // X AND_NOT MatchNothing == X
1579             return;
1580         }
1581         if (subquery.get_type() == subquery.OP_SCALE_WEIGHT) {
1582             // Strip OP_SCALE_WEIGHT wrapping from queries on the right of
1583             // AndNot as no weight is taken from them.
1584             subqueries.push_back(subquery.get_subquery(0));
1585             // The Query constructor for OP_SCALE_WEIGHT constructor should
1586             // eliminate OP_SCALE_WEIGHT applied to MatchNothing.
1587             Assert(subquery.get_subquery(0).internal.get() != NULL);
1588             return;
1589         }
1590     }
1591     subqueries.push_back(subquery);
1592 }
1593
1594 Query::Internal *
1595 QueryAndNot::done()
1596 {
1597     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1598     // that leaves just the left subquery, return that.
1599     //
1600     // If left subquery is MatchNothing, then add_subquery() discards all right
1601     // subqueries, so this check also gives MatchNothing for this case.
1602     if (subqueries.size() == 1)
1603         return subqueries[0].internal.get();
1604     return this;
1605 }
1606
1607 void
1608 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1609 {
1610     // If the left side of AndMaybe is already MatchNothing, do nothing.
1611     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1612         return;
1613     // Drop any 2nd or subsequent subqueries which are MatchNothing.
1614     if (subquery.internal.get() != NULL || subqueries.empty())
1615         subqueries.push_back(subquery);
1616 }
1617
1618 Query::Internal *
1619 QueryAndMaybe::done()
1620 {
1621     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1622     // that leaves just the left subquery, return that.
1623     //
1624     // If left subquery is MatchNothing, then add_subquery() discards all right
1625     // subqueries, so this check also gives MatchNothing for this case.
1626     if (subqueries.size() == 1)
1627         return subqueries[0].internal.get();
1628     return this;
1629 }
1630
1631 PostList*
1632 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1633 {
1634     LOGCALL(QUERY, PostList*, "QueryOr::postlist", qopt | factor);
1635     OrContext ctx(qopt, subqueries.size());
1636     do_or_like(ctx, qopt, factor);
1637     RETURN(ctx.postlist());
1638 }
1639
1640 void
1641 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1642 {
1643     do_or_like(ctx, qopt, factor);
1644 }
1645
1646 PostList*
1647 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1648 {
1649     LOGCALL(QUERY, PostList*, "QueryAndNot::postlist", qopt | factor);
1650     // FIXME: Combine and-like side with and-like stuff above.
1651     unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1652     OrContext ctx(qopt, subqueries.size() - 1);
1653     do_or_like(ctx, qopt, 0.0, 0, 1);
1654     unique_ptr<PostList> r(ctx.postlist());
1655     RETURN(new AndNotPostList(l.release(), r.release(),
1656                               qopt->matcher, qopt->db_size));
1657 }
1658
1659 PostList*
1660 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1661 {
1662     LOGCALL(QUERY, PostList*, "QueryXor::postlist", qopt | factor);
1663     XorContext ctx(qopt, subqueries.size());
1664     postlist_sub_xor(ctx, qopt, factor);
1665     RETURN(ctx.postlist());
1666 }
1667
1668 void
1669 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1670 {
1671     QueryVector::const_iterator i;
1672     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1673         // MatchNothing subqueries should have been removed by done().
1674         Assert((*i).internal.get());
1675         (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1676     }
1677 }
1678
1679 PostList*
1680 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1681 {
1682     LOGCALL(QUERY, PostList*, "QueryAndMaybe::postlist", qopt | factor);
1683     // FIXME: Combine and-like side with and-like stuff above.
1684     unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1685     if (factor == 0.0) {
1686         // An unweighted OP_AND_MAYBE can be replaced with its left branch.
1687         RETURN(l.release());
1688     }
1689     OrContext ctx(qopt, subqueries.size() - 1);
1690     do_or_like(ctx, qopt, factor, 0, 1);
1691     unique_ptr<PostList> r(ctx.postlist());
1692     RETURN(new AndMaybePostList(l.release(), r.release(),
1693                                 qopt->matcher, qopt->db_size));
1694 }
1695
1696 PostList*
1697 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1698 {
1699     LOGCALL(QUERY, PostList*, "QueryFilter::postlist", qopt | factor);
1700     // FIXME: Combine and-like stuff, like QueryOptimiser.
1701     AssertEq(subqueries.size(), 2);
1702     PostList * pls[2];
1703     unique_ptr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1704     pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1705     pls[0] = l.release();
1706     RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1707 }
1708
1709 void
1710 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1711 {
1712     QueryVector::const_iterator i;
1713     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1714         // MatchNothing subqueries should have been removed by done().
1715         Assert((*i).internal.get());
1716         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1717         // Second and subsequent subqueries are unweighted.
1718         factor = 0.0;
1719     }
1720 }
1721
1722 void
1723 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1724 {
1725     if (!qopt->full_db_has_positions) {
1726         // No positional data anywhere, so just handle as AND.
1727         QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1728         return;
1729     }
1730
1731     if (!qopt->db.has_positions()) {
1732         // No positions in this subdatabase so this matches nothing, which
1733         // means the whole andcontext matches nothing.
1734         //
1735         // Bailing out here means we don't recurse deeper and that means we
1736         // don't call QueryOptimiser::inc_total_subqs() for leaf postlists in
1737         // the phrase, but at least one shard will count them, and the matcher
1738         // takes the highest answer (since 1.4.6).
1739         ctx.shrink(0);
1740         return;
1741     }
1742
1743     bool old_need_positions = qopt->need_positions;
1744     qopt->need_positions = true;
1745
1746     QueryVector::const_iterator i;
1747     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1748         // MatchNothing subqueries should have been removed by done().
1749         Assert((*i).internal.get());
1750         bool is_term = ((*i).internal->get_type() == Query::LEAF_TERM);
1751         PostList* pl = (*i).internal->postlist(qopt, factor);
1752         if (!is_term)
1753             pl = new OrPosPostList(pl);
1754         ctx.add_postlist(pl);
1755     }
1756     // Record the positional filter to apply higher up the tree.
1757     ctx.add_pos_filter(op, subqueries.size(), window);
1758
1759     qopt->need_positions = old_need_positions;
1760 }
1761
1762 void
1763 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1764 {
1765     QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1766 }
1767
1768 void
1769 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1770 {
1771     QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1772 }
1773
1774 PostList*
1775 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1776 {
1777     LOGCALL(QUERY, PostList*, "QueryEliteSet::postlist", qopt | factor);
1778     OrContext ctx(qopt, subqueries.size());
1779     do_or_like(ctx, qopt, factor, set_size);
1780     RETURN(ctx.postlist());
1781 }
1782
1783 void
1784 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1785 {
1786     do_or_like(ctx, qopt, factor, set_size);
1787 }
1788
1789 PostList*
1790 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1791 {
1792     LOGCALL(QUERY, PostList*, "QuerySynonym::postlist", qopt | factor);
1793     // Save and restore total_subqs so we only add one for the whole
1794     // OP_SYNONYM subquery (or none if we're not weighted).
1795     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1796     if (factor != 0.0)
1797         ++save_total_subqs;
1798     PostList * pl = do_synonym(qopt, factor);
1799     qopt->set_total_subqs(save_total_subqs);
1800     RETURN(pl);
1801 }
1802
1803 Query::Internal *
1804 QuerySynonym::done()
1805 {
1806     // An empty Synonym gives MatchNothing.  Note that add_subquery() drops any
1807     // subqueries which are MatchNothing.
1808     if (subqueries.empty())
1809         return NULL;
1810     if (subqueries.size() == 1) {
1811         Query::op sub_type = subqueries[0].get_type();
1812         // Synonym of a single subquery should only be simplified if that
1813         // subquery is a term (or MatchAll), or if it's also OP_SYNONYM.  Note
1814         // that MatchNothing subqueries are dropped, so we'd never get here
1815         // with a single MatchNothing subquery.
1816         if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1817             sub_type == Query::OP_SYNONYM) {
1818             return subqueries[0].internal.get();
1819         }
1820         if (sub_type == Query::OP_WILDCARD) {
1821             auto q = static_cast<QueryWildcard*>(subqueries[0].internal.get());
1822             // SYNONYM over WILDCARD X -> WILDCARD SYNONYM for any combiner X.
1823             return q->change_combiner(Query::OP_SYNONYM);
1824         }
1825     }
1826     return this;
1827 }
1828
1829 PostList*
1830 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1831 {
1832     LOGCALL(QUERY, PostList*, "QueryMax::postlist", qopt | factor);
1833     // Save and restore total_subqs so we only add one for the whole
1834     // OP_MAX subquery (or none if we're not weighted).
1835     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1836     if (factor != 0.0)
1837         ++save_total_subqs;
1838     PostList * pl = do_max(qopt, factor);
1839     qopt->set_total_subqs(save_total_subqs);
1840     RETURN(pl);
1841 }
1842
1843 Xapian::Query::op
1844 QueryAnd::get_op() const
1845 {
1846     return Xapian::Query::OP_AND;
1847 }
1848
1849 Xapian::Query::op
1850 QueryOr::get_op() const
1851 {
1852     return Xapian::Query::OP_OR;
1853 }
1854
1855 Xapian::Query::op
1856 QueryAndNot::get_op() const
1857 {
1858     return Xapian::Query::OP_AND_NOT;
1859 }
1860
1861 Xapian::Query::op
1862 QueryXor::get_op() const
1863 {
1864     return Xapian::Query::OP_XOR;
1865 }
1866
1867 Xapian::Query::op
1868 QueryAndMaybe::get_op() const
1869 {
1870     return Xapian::Query::OP_AND_MAYBE;
1871 }
1872
1873 Xapian::Query::op
1874 QueryFilter::get_op() const
1875 {
1876     return Xapian::Query::OP_FILTER;
1877 }
1878
1879 Xapian::Query::op
1880 QueryNear::get_op() const
1881 {
1882     return Xapian::Query::OP_NEAR;
1883 }
1884
1885 Xapian::Query::op
1886 QueryPhrase::get_op() const
1887 {
1888     return Xapian::Query::OP_PHRASE;
1889 }
1890
1891 Xapian::Query::op
1892 QueryEliteSet::get_op() const
1893 {
1894     return Xapian::Query::OP_ELITE_SET;
1895 }
1896
1897 Xapian::Query::op
1898 QuerySynonym::get_op() const
1899 {
1900     return Xapian::Query::OP_SYNONYM;
1901 }
1902
1903 Xapian::Query::op
1904 QueryMax::get_op() const
1905 {
1906     return Xapian::Query::OP_MAX;
1907 }
1908
1909 Xapian::Query::op
1910 QueryWildcard::get_op() const
1911 {
1912     return Xapian::Query::OP_WILDCARD;
1913 }
1914
1915 string
1916 QueryAnd::get_description() const
1917 {
1918     return get_description_helper(" AND ");
1919 }
1920
1921 string
1922 QueryOr::get_description() const
1923 {
1924     return get_description_helper(" OR ");
1925 }
1926
1927 string
1928 QueryAndNot::get_description() const
1929 {
1930     return get_description_helper(" AND_NOT ");
1931 }
1932
1933 string
1934 QueryXor::get_description() const
1935 {
1936     return get_description_helper(" XOR ");
1937 }
1938
1939 string
1940 QueryAndMaybe::get_description() const
1941 {
1942     return get_description_helper(" AND_MAYBE ");
1943 }
1944
1945 string
1946 QueryFilter::get_description() const
1947 {
1948     return get_description_helper(" FILTER ");
1949 }
1950
1951 string
1952 QueryNear::get_description() const
1953 {
1954     return get_description_helper(" NEAR ", window);
1955 }
1956
1957 string
1958 QueryPhrase::get_description() const
1959 {
1960     return get_description_helper(" PHRASE ", window);
1961 }
1962
1963 string
1964 QueryEliteSet::get_description() const
1965 {
1966     return get_description_helper(" ELITE_SET ", set_size);
1967 }
1968
1969 string
1970 QuerySynonym::get_description() const
1971 {
1972     if (subqueries.size() == 1) {
1973         string d = "(SYNONYM ";
1974         d += subqueries[0].internal->get_description();
1975         d += ")";
1976         return d;
1977     }
1978     return get_description_helper(" SYNONYM ");
1979 }
1980
1981 string
1982 QueryMax::get_description() const
1983 {
1984     return get_description_helper(" MAX ");
1985 }
1986
1987 Xapian::Query::op
1988 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1989 {
1990     return Xapian::Query::OP_INVALID;
1991 }
1992
1993 PostList*
1994 QueryInvalid::postlist(QueryOptimiser *, double) const
1995 {
1996     throw Xapian::InvalidOperationError("Query is invalid");
1997 }
1998
1999 void
2000 QueryInvalid::serialise(std::string & result) const
2001 {
2002     result += static_cast<char>(0x00);
2003 }
2004
2005 string
2006 QueryInvalid::get_description() const
2007 {
2008     return "<INVALID>";
2009 }
2010
2011 }
2012 }