xapian-core/api/queryinternal.cc

   1 /** @file queryinternal.cc
   2  * @brief Xapian::Query internals
   3  */
   4 /* Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
   5  * Copyright (C) 2008,2009 Lemur Consulting Ltd
   6  *
   7  * This program is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU General Public License as
   9  * published by the Free Software Foundation; either version 2 of the
  10  * License, or (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  20  */
  21
  22 #include <config.h>
  23
  24 #include "queryinternal.h"
  25
  26 #include "xapian/error.h"
  27 #include "xapian/postingsource.h"
  28 #include "xapian/query.h"
  29
  30 #include "leafpostlist.h"
  31 #include "matcher/andmaybepostlist.h"
  32 #include "matcher/andnotpostlist.h"
  33 #include "emptypostlist.h"
  34 #include "matcher/exactphrasepostlist.h"
  35 #include "matcher/externalpostlist.h"
  36 #include "matcher/maxpostlist.h"
  37 #include "matcher/multiandpostlist.h"
  38 #include "matcher/multixorpostlist.h"
  39 #include "matcher/nearpostlist.h"
  40 #include "matcher/orpostlist.h"
  41 #include "matcher/phrasepostlist.h"
  42 #include "matcher/queryoptimiser.h"
  43 #include "matcher/valuerangepostlist.h"
  44 #include "matcher/valuegepostlist.h"
  45 #include "net/length.h"
  46 #include "serialise-double.h"
  47 #include "termlist.h"
  48
  49 #include "autoptr.h"
  50 #include "debuglog.h"
  51 #include "omassert.h"
  52 #include "str.h"
  53 #include "unicode/description_append.h"
  54
  55 #include <algorithm>
  56 #include <functional>
  57 #include <list>
  58 #include <string>
  59 #include <vector>
  60
  61 using namespace std;
  62
  63 template<class CLASS> struct delete_ptr {
  64     void operator()(CLASS *p) { delete p; }
  65 };
  66
  67 using Xapian::Internal::AndContext;
  68 using Xapian::Internal::OrContext;
  69 using Xapian::Internal::XorContext;
  70
  71 namespace Xapian {
  72
  73 namespace Internal {
  74
  75 /** Class providing an operator which sorts postlists to select max or terms.
  76  *  This returns true if a has a (strictly) greater termweight than b,
  77  *  unless a or b contain no documents, in which case the other one is
  78  *  selected.
  79  */
  80 struct CmpMaxOrTerms {
  81     /** Return true if and only if a has a strictly greater termweight than b. */
  82     bool operator()(const PostList *a, const PostList *b) {
  83 #if (defined(__i386__) && !defined(__SSE_MATH__)) || \
  84     defined(__mc68000__) || defined(__mc68010__) || \
  85     defined(__mc68020__) || defined(__mc68030__)
  86         // On some architectures, most common of which is x86, floating point
  87         // values are calculated and stored in registers with excess precision.
  88         // If the two get_maxweight() calls below return identical values in a
  89         // register, the excess precision may be dropped for one of them but
  90         // not the other (e.g. because the compiler saves the first calculated
  91         // weight to memory while calculating the second, then reloads it to
  92         // compare).  This leads to both a > b and b > a being true, which
  93         // violates the antisymmetry property of the strict weak ordering
  94         // required by nth_element().  This can have serious consequences (e.g.
  95         // segfaults).
  96         //
  97         // Note that m68k only has excess precision in earlier models - 68040
  98         // and later are OK:
  99         // http://gcc.gnu.org/ml/gcc-patches/2008-11/msg00105.html
 100         //
 101         // To avoid this, we store each result in a volatile double prior to
 102         // comparing them.  This means that the result of this test should
 103         // match that on other architectures with the same double format (which
 104         // is desirable), and actually has less overhead than rounding both
 105         // results to float (which is another approach which works).
 106         volatile double a_max_wt = a->get_maxweight();
 107         volatile double b_max_wt = b->get_maxweight();
 108         return a_max_wt > b_max_wt;
 109 #else
 110         return a->get_maxweight() > b->get_maxweight();
 111 #endif
 112     }
 113 };
 114
 115 /// Comparison functor which orders PostList* by descending get_termfreq_est().
 116 struct ComparePostListTermFreqAscending {
 117     /// Order by descending get_termfreq_est().
 118     bool operator()(const PostList *a, const PostList *b) {
 119         return a->get_termfreq_est() > b->get_termfreq_est();
 120     }
 121 };
 122
 123 class Context {
 124   protected:
 125     vector<PostList*> pls;
 126
 127   public:
 128     explicit Context(size_t reserve);
 129
 130     ~Context();
 131
 132     void add_postlist(PostList * pl) {
 133         pls.push_back(pl);
 134     }
 135
 136     bool empty() const {
 137         return pls.empty();
 138     }
 139
 140     size_t size() const {
 141         return pls.size();
 142     }
 143 };
 144
 145 Context::Context(size_t reserve) {
 146     pls.reserve(reserve);
 147 }
 148
 149 Context::~Context()
 150 {
 151     for_each(pls.begin(), pls.end(), delete_ptr<PostList>());
 152 }
 153
 154 class OrContext : public Context {
 155   public:
 156     explicit OrContext(size_t reserve) : Context(reserve) { }
 157
 158     /// Select the best set_size postlists from the last out_of added.
 159     void select_elite_set(QueryOptimiser * qopt,
 160                           size_t set_size, size_t out_of);
 161
 162     /// Select the set_size postlists with the highest term frequency.
 163     void select_most_frequent(QueryOptimiser * qopt, size_t set_size);
 164
 165     PostList * postlist(QueryOptimiser* qopt);
 166     PostList * postlist_max(QueryOptimiser* qopt);
 167 };
 168
 169 void
 170 OrContext::select_elite_set(QueryOptimiser * qopt,
 171                             size_t set_size, size_t out_of)
 172 {
 173     // Call recalc_maxweight() as otherwise get_maxweight()
 174     // may not be valid before next() or skip_to()
 175     vector<PostList*>::iterator begin = pls.begin() + pls.size() - out_of;
 176     for_each(begin, pls.end(), mem_fun(&PostList::recalc_maxweight));
 177
 178     nth_element(begin, begin + set_size - 1, pls.end(), CmpMaxOrTerms());
 179     const PostList * hint_pl = qopt->get_hint_postlist();
 180     for_each(begin + set_size, pls.end(),
 181         [&qopt, &hint_pl](const PostList * p) {
 182             if (rare(p == hint_pl)) {
 183                 qopt->take_hint_ownership();
 184                 hint_pl = NULL;
 185             } else {
 186                 delete p;
 187             }
 188         });
 189     pls.resize(pls.size() - out_of + set_size);
 190 }
 191
 192 void
 193 OrContext::select_most_frequent(QueryOptimiser * qopt, size_t set_size)
 194 {
 195     vector<PostList*>::iterator begin = pls.begin();
 196     nth_element(begin, begin + set_size - 1, pls.end(),
 197                 ComparePostListTermFreqAscending());
 198     const PostList * hint_pl = qopt->get_hint_postlist();
 199     for_each(begin + set_size, pls.end(),
 200         [&qopt, &hint_pl](const PostList * p) {
 201             if (rare(p == hint_pl)) {
 202                 qopt->take_hint_ownership();
 203                 hint_pl = NULL;
 204             } else {
 205                 delete p;
 206             }
 207         });
 208     pls.resize(set_size);
 209 }
 210
 211 PostList *
 212 OrContext::postlist(QueryOptimiser* qopt)
 213 {
 214     Assert(!pls.empty());
 215
 216     if (pls.size() == 1) {
 217         PostList * pl = pls[0];
 218         pls.clear();
 219         return pl;
 220     }
 221
 222     // Make postlists into a heap so that the postlist with the greatest term
 223     // frequency is at the top of the heap.
 224     make_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 225
 226     // Now build a tree of binary OrPostList objects.
 227     //
 228     // The algorithm used to build the tree is like that used to build an
 229     // optimal Huffman coding tree.  If we called next() repeatedly, this
 230     // arrangement would minimise the number of method calls.  Generally we
 231     // don't actually do that, but this arrangement is still likely to be a
 232     // good one, and it does minimise the work in the worst case.
 233     while (true) {
 234         // We build the tree such that at each branch:
 235         //
 236         //   l.get_termfreq_est() >= r.get_termfreq_est()
 237         //
 238         // We do this so that the OrPostList class can be optimised assuming
 239         // that this is the case.
 240         PostList * r = pls.front();
 241         pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 242         pls.pop_back();
 243         PostList * pl;
 244         pl = new OrPostList(pls.front(), r, qopt->matcher, qopt->db_size);
 245
 246         if (pls.size() == 1) {
 247             pls.clear();
 248             return pl;
 249         }
 250
 251         pop_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 252         pls.back() = pl;
 253         push_heap(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 254     }
 255 }
 256
 257 PostList *
 258 OrContext::postlist_max(QueryOptimiser* qopt)
 259 {
 260     Assert(!pls.empty());
 261
 262     if (pls.size() == 1) {
 263         PostList * pl = pls[0];
 264         pls.clear();
 265         return pl;
 266     }
 267
 268     // Sort the postlists so that the postlist with the greatest term frequency
 269     // is first.
 270     sort(pls.begin(), pls.end(), ComparePostListTermFreqAscending());
 271
 272     PostList * pl;
 273     pl = new MaxPostList(pls.begin(), pls.end(), qopt->matcher, qopt->db_size);
 274
 275     pls.clear();
 276     return pl;
 277 }
 278
 279 class XorContext : public Context {
 280   public:
 281     explicit XorContext(size_t reserve) : Context(reserve) { }
 282
 283     PostList * postlist(QueryOptimiser* qopt);
 284 };
 285
 286 PostList *
 287 XorContext::postlist(QueryOptimiser* qopt)
 288 {
 289     Xapian::doccount db_size = qopt->db_size;
 290     PostList * pl;
 291     pl = new MultiXorPostList(pls.begin(), pls.end(), qopt->matcher, db_size);
 292
 293     // Empty pls so our destructor doesn't delete them all!
 294     pls.clear();
 295     return pl;
 296 }
 297
 298 class AndContext : public Context {
 299     class PosFilter {
 300         Xapian::Query::op op_;
 301
 302         /// Start and end indices for the PostLists this positional filter uses.
 303         size_t begin, end;
 304
 305         Xapian::termcount window;
 306
 307       public:
 308         PosFilter(Xapian::Query::op op__, size_t begin_, size_t end_,
 309                   Xapian::termcount window_)
 310             : op_(op__), begin(begin_), end(end_), window(window_) { }
 311
 312         PostList * postlist(PostList * pl, const vector<PostList*>& pls) const;
 313     };
 314
 315     list<PosFilter> pos_filters;
 316
 317   public:
 318     explicit AndContext(size_t reserve) : Context(reserve) { }
 319
 320     void add_pos_filter(Query::op op_,
 321                         size_t n_subqs,
 322                         Xapian::termcount window);
 323
 324     PostList * postlist(QueryOptimiser* qopt);
 325 };
 326
 327 PostList *
 328 AndContext::PosFilter::postlist(PostList * pl, const vector<PostList*>& pls) const
 329 try {
 330     vector<PostList *>::const_iterator terms_begin = pls.begin() + begin;
 331     vector<PostList *>::const_iterator terms_end = pls.begin() + end;
 332
 333     if (op_ == Xapian::Query::OP_NEAR) {
 334         pl = new NearPostList(pl, window, terms_begin, terms_end);
 335     } else if (window == end - begin) {
 336         AssertEq(op_, Xapian::Query::OP_PHRASE);
 337         pl = new ExactPhrasePostList(pl, terms_begin, terms_end);
 338     } else {
 339         AssertEq(op_, Xapian::Query::OP_PHRASE);
 340         pl = new PhrasePostList(pl, window, terms_begin, terms_end);
 341     }
 342     return pl;
 343 } catch (...) {
 344     delete pl;
 345     throw;
 346 }
 347
 348 void
 349 AndContext::add_pos_filter(Query::op op_,
 350                            size_t n_subqs,
 351                            Xapian::termcount window)
 352 {
 353     Assert(n_subqs > 1);
 354     size_t end = pls.size();
 355     size_t begin = end - n_subqs;
 356     pos_filters.push_back(PosFilter(op_, begin, end, window));
 357 }
 358
 359 PostList *
 360 AndContext::postlist(QueryOptimiser* qopt)
 361 {
 362     AutoPtr<PostList> pl(new MultiAndPostList(pls.begin(), pls.end(),
 363                                               qopt->matcher, qopt->db_size));
 364
 365     // Sort the positional filters to try to apply them in an efficient order.
 366     // FIXME: We need to figure out what that is!  Try applying lowest cf/tf
 367     // first?
 368
 369     // Apply any positional filters.
 370     list<PosFilter>::const_iterator i;
 371     for (i = pos_filters.begin(); i != pos_filters.end(); ++i) {
 372         const PosFilter & filter = *i;
 373         pl.reset(filter.postlist(pl.release(), pls));
 374     }
 375
 376     // Empty pls so our destructor doesn't delete them all!
 377     pls.clear();
 378     return pl.release();
 379 }
 380
 381 }
 382
 383 Query::Internal::~Internal() { }
 384
 385 size_t
 386 Query::Internal::get_num_subqueries() const XAPIAN_NOEXCEPT
 387 {
 388     return 0;
 389 }
 390
 391 const Query
 392 Query::Internal::get_subquery(size_t) const
 393 {
 394     throw Xapian::InvalidArgumentError("get_subquery() not meaningful for this Query object");
 395 }
 396
 397 void
 398 Query::Internal::gather_terms(void *) const
 399 {
 400 }
 401
 402 Xapian::termcount
 403 Query::Internal::get_length() const XAPIAN_NOEXCEPT
 404 {
 405     return 0;
 406 }
 407
 408 Query::Internal *
 409 Query::Internal::unserialise(const char ** p, const char * end,
 410                              const Registry & reg)
 411 {
 412     if (*p == end)
 413         return NULL;
 414     unsigned char ch = *(*p)++;
 415     switch (ch >> 5) {
 416         case 4: case 5: case 6: case 7: {
 417             // Multi-way branch
 418             //
 419             // 1ccccnnn where:
 420             //   nnn -> n_subqs (0 means encoded value follows)
 421             //   cccc -> code (which OP_XXX)
 422             size_t n_subqs = ch & 0x07;
 423             if (n_subqs == 0) {
 424                 decode_length(p, end, n_subqs);
 425                 n_subqs += 8;
 426             }
 427             unsigned char code = (ch >> 3) & 0x0f;
 428             Xapian::termcount parameter = 0;
 429             if (code >= 13)
 430                 decode_length(p, end, parameter);
 431             Xapian::Internal::QueryBranch * result;
 432             switch (code) {
 433                 case 0: // OP_AND
 434                     result = new Xapian::Internal::QueryAnd(n_subqs);
 435                     break;
 436                 case 1: // OP_OR
 437                     result = new Xapian::Internal::QueryOr(n_subqs);
 438                     break;
 439                 case 2: // OP_AND_NOT
 440                     result = new Xapian::Internal::QueryAndNot(n_subqs);
 441                     break;
 442                 case 3: // OP_XOR
 443                     result = new Xapian::Internal::QueryXor(n_subqs);
 444                     break;
 445                 case 4: // OP_AND_MAYBE
 446                     result = new Xapian::Internal::QueryAndMaybe(n_subqs);
 447                     break;
 448                 case 5: // OP_FILTER
 449                     result = new Xapian::Internal::QueryFilter(n_subqs);
 450                     break;
 451                 case 6: // OP_SYNONYM
 452                     result = new Xapian::Internal::QuerySynonym(n_subqs);
 453                     break;
 454                 case 7: // OP_MAX
 455                     result = new Xapian::Internal::QueryMax(n_subqs);
 456                     break;
 457                 case 13: // OP_ELITE_SET
 458                     result = new Xapian::Internal::QueryEliteSet(n_subqs,
 459                                                                  parameter);
 460                     break;
 461                 case 14: // OP_NEAR
 462                     result = new Xapian::Internal::QueryNear(n_subqs,
 463                                                              parameter);
 464                     break;
 465                 case 15: // OP_PHRASE
 466                     result = new Xapian::Internal::QueryPhrase(n_subqs,
 467                                                                parameter);
 468                     break;
 469                 default:
 470                     // 8 to 12 are currently unused.
 471                     throw SerialisationError("Unknown multi-way branch Query operator");
 472             }
 473             do {
 474                 result->add_subquery(Xapian::Query(unserialise(p, end, reg)));
 475             } while (--n_subqs);
 476             result->done();
 477             return result;
 478         }
 479         case 2: case 3: { // Term
 480             // Term
 481             //
 482             // 01ccLLLL where:
 483             //   LLLL -> length (0 means encoded value follows)
 484             //   cc -> code:
 485             //     0: wqf = 0; pos = 0
 486             //     1: wqf = 1; pos = 0
 487             //     2: wqf = 1; pos -> encoded value follows
 488             //     3: wqf -> encoded value follows; pos -> encoded value follows
 489             size_t len = ch & 0x0f;
 490             if (len == 0) {
 491                 decode_length(p, end, len);
 492                 len += 16;
 493             }
 494             if (size_t(end - *p) < len)
 495                 throw SerialisationError("Not enough data");
 496             string term(*p, len);
 497             *p += len;
 498
 499             int code = ((ch >> 4) & 0x03);
 500
 501             Xapian::termcount wqf = static_cast<Xapian::termcount>(code > 0);
 502             if (code == 3)
 503                 decode_length(p, end, wqf);
 504
 505             Xapian::termpos pos = 0;
 506             if (code >= 2)
 507                 decode_length(p, end, pos);
 508
 509             return new Xapian::Internal::QueryTerm(term, wqf, pos);
 510         }
 511         case 1: {
 512             // OP_VALUE_RANGE or OP_VALUE_GE or OP_VALUE_LE
 513             //
 514             // 001tssss where:
 515             //   ssss -> slot number (15 means encoded value follows)
 516             //   t -> op:
 517             //     0: OP_VALUE_RANGE (or OP_VALUE_LE if begin empty)
 518             //     1: OP_VALUE_GE
 519             Xapian::valueno slot = ch & 15;
 520             if (slot == 15) {
 521                 decode_length(p, end, slot);
 522                 slot += 15;
 523             }
 524             size_t len;
 525             decode_length_and_check(p, end, len);
 526             string begin(*p, len);
 527             *p += len;
 528             if (ch & 0x10) {
 529                 // OP_VALUE_GE
 530                 return new Xapian::Internal::QueryValueGE(slot, begin);
 531             }
 532
 533             // OP_VALUE_RANGE
 534             decode_length_and_check(p, end, len);
 535             string end_(*p, len);
 536             *p += len;
 537             if (begin.empty()) // FIXME: is this right?
 538                 return new Xapian::Internal::QueryValueLE(slot, end_);
 539             return new Xapian::Internal::QueryValueRange(slot, begin, end_);
 540         }
 541         case 0: {
 542             // Other operators
 543             //
 544             //   000ttttt where:
 545             //     ttttt -> encodes which OP_XXX
 546             switch (ch & 0x1f) {
 547                 case 0x00: // OP_INVALID
 548                     return new Xapian::Internal::QueryInvalid();
 549                 case 0x0b: { // Wildcard
 550                     if (*p == end)
 551                         throw SerialisationError("not enough data");
 552                     Xapian::termcount max_expansion;
 553                     decode_length(p, end, max_expansion);
 554                     if (end - *p < 2)
 555                         throw SerialisationError("not enough data");
 556                     int max_type = static_cast<unsigned char>(*(*p)++);
 557                     op combiner = static_cast<op>(*(*p)++);
 558                     size_t len;
 559                     decode_length_and_check(p, end, len);
 560                     string pattern(*p, len);
 561                     *p += len;
 562                     return new Xapian::Internal::QueryWildcard(pattern,
 563                                                                max_expansion,
 564                                                                max_type,
 565                                                                combiner);
 566                 }
 567                 case 0x0c: { // PostingSource
 568                     size_t len;
 569                     decode_length_and_check(p, end, len);
 570                     string name(*p, len);
 571                     *p += len;
 572
 573                     const PostingSource * reg_source = reg.get_posting_source(name);
 574                     if (!reg_source) {
 575                         string m = "PostingSource ";
 576                         m += name;
 577                         m += " not registered";
 578                         throw SerialisationError(m);
 579                     }
 580
 581                     decode_length_and_check(p, end, len);
 582                     PostingSource * source =
 583                         reg_source->unserialise_with_registry(string(*p, len),
 584                                                               reg);
 585                     *p += len;
 586                     return new Xapian::Internal::QueryPostingSource(source->release());
 587                 }
 588                 case 0x0d: {
 589                     using Xapian::Internal::QueryScaleWeight;
 590                     double scale_factor = unserialise_double(p, end);
 591                     return new QueryScaleWeight(scale_factor,
 592                                                 Query(unserialise(p, end, reg)));
 593                 }
 594                 case 0x0e: {
 595                     Xapian::termcount wqf;
 596                     Xapian::termpos pos;
 597                     decode_length(p, end, wqf);
 598                     decode_length(p, end, pos);
 599                     return new Xapian::Internal::QueryTerm(string(), wqf, pos);
 600                 }
 601                 case 0x0f:
 602                     return Query::MatchAll.internal.get();
 603                 default: // Others currently unused.
 604                     break;
 605             }
 606             break;
 607         }
 608     }
 609     string msg = "Unknown Query serialisation: ";
 610     msg += str(ch);
 611     throw SerialisationError(msg);
 612 }
 613
 614 void
 615 Query::Internal::postlist_sub_and_like(AndContext& ctx,
 616                                        QueryOptimiser * qopt,
 617                                        double factor) const
 618 {
 619     ctx.add_postlist(postlist(qopt, factor));
 620 }
 621
 622 void
 623 Query::Internal::postlist_sub_or_like(OrContext& ctx,
 624                                       QueryOptimiser * qopt,
 625                                       double factor) const
 626 {
 627     ctx.add_postlist(postlist(qopt, factor));
 628 }
 629
 630 void
 631 Query::Internal::postlist_sub_xor(XorContext& ctx,
 632                                   QueryOptimiser * qopt,
 633                                   double factor) const
 634 {
 635     ctx.add_postlist(postlist(qopt, factor));
 636 }
 637
 638 namespace Internal {
 639
 640 Query::op
 641 QueryTerm::get_type() const XAPIAN_NOEXCEPT
 642 {
 643     return term.empty() ? Query::LEAF_MATCH_ALL : Query::LEAF_TERM;
 644 }
 645
 646 string
 647 QueryTerm::get_description() const
 648 {
 649     string desc;
 650     if (term.empty()) {
 651         desc = "<alldocuments>";
 652     } else {
 653         description_append(desc, term);
 654     }
 655     if (wqf != 1) {
 656         desc += '#';
 657         desc += str(wqf);
 658     }
 659     if (pos) {
 660         desc += '@';
 661         desc += str(pos);
 662     }
 663     return desc;
 664 }
 665
 666 QueryPostingSource::QueryPostingSource(PostingSource * source_)
 667     : source(source_)
 668 {
 669     if (!source_)
 670         throw Xapian::InvalidArgumentError("source parameter can't be NULL");
 671     if (source->_refs == 0) {
 672         // source_ isn't reference counted, so try to clone it.  If clone()
 673         // isn't implemented, just use the object provided and it's the
 674         // caller's responsibility to ensure it stays valid while in use.
 675         PostingSource * cloned_source = source->clone();
 676         if (cloned_source) source = cloned_source->release();
 677     }
 678 }
 679
 680 Query::op
 681 QueryPostingSource::get_type() const XAPIAN_NOEXCEPT
 682 {
 683     return Query::LEAF_POSTING_SOURCE;
 684 }
 685
 686 string
 687 QueryPostingSource::get_description() const
 688 {
 689     string desc = "PostingSource(";
 690     desc += source->get_description();
 691     desc += ')';
 692     return desc;
 693 }
 694
 695 QueryScaleWeight::QueryScaleWeight(double factor, const Query & subquery_)
 696     : scale_factor(factor), subquery(subquery_)
 697 {
 698     if (rare(scale_factor < 0.0))
 699         throw Xapian::InvalidArgumentError("OP_SCALE_WEIGHT requires factor >= 0");
 700 }
 701
 702 Query::op
 703 QueryScaleWeight::get_type() const XAPIAN_NOEXCEPT
 704 {
 705     return Query::OP_SCALE_WEIGHT;
 706 }
 707
 708 size_t
 709 QueryScaleWeight::get_num_subqueries() const XAPIAN_NOEXCEPT
 710 {
 711     return 1;
 712 }
 713
 714 const Query
 715 QueryScaleWeight::get_subquery(size_t) const
 716 {
 717     return subquery;
 718 }
 719
 720 string
 721 QueryScaleWeight::get_description() const
 722 {
 723     Assert(subquery.internal.get());
 724     string desc = str(scale_factor);
 725     desc += " * ";
 726     desc += subquery.internal->get_description();
 727     return desc;
 728 }
 729
 730 PostingIterator::Internal *
 731 QueryTerm::postlist(QueryOptimiser * qopt, double factor) const
 732 {
 733     LOGCALL(QUERY, PostingIterator::Internal *, "QueryTerm::postlist", qopt | factor);
 734     if (factor != 0.0)
 735         qopt->inc_total_subqs();
 736     RETURN(qopt->open_post_list(term, wqf, factor));
 737 }
 738
 739 PostingIterator::Internal *
 740 QueryPostingSource::postlist(QueryOptimiser * qopt, double factor) const
 741 {
 742     LOGCALL(QUERY, PostingIterator::Internal *, "QueryPostingSource::postlist", qopt | factor);
 743     Assert(source.get());
 744     if (factor != 0.0)
 745         qopt->inc_total_subqs();
 746     // Casting away const on the Database::Internal here is OK, as we wrap
 747     // them in a const Xapian::Database so non-const methods can't actually
 748     // be called on the Database::Internal object.
 749     const Xapian::Database wrappeddb(
 750             const_cast<Xapian::Database::Internal*>(&(qopt->db)));
 751     RETURN(new ExternalPostList(wrappeddb, source.get(), factor, qopt->matcher));
 752 }
 753
 754 PostingIterator::Internal *
 755 QueryScaleWeight::postlist(QueryOptimiser * qopt, double factor) const
 756 {
 757     LOGCALL(QUERY, PostingIterator::Internal *, "QueryScaleWeight::postlist", qopt | factor);
 758     RETURN(subquery.internal->postlist(qopt, factor * scale_factor));
 759 }
 760
 761 void
 762 QueryTerm::gather_terms(void * void_terms) const
 763 {
 764     // Skip Xapian::Query::MatchAll (aka Xapian::Query("")).
 765     if (!term.empty()) {
 766         vector<pair<Xapian::termpos, string> > &terms =
 767             *static_cast<vector<pair<Xapian::termpos, string> >*>(void_terms);
 768         terms.push_back(make_pair(pos, term));
 769     }
 770 }
 771
 772 PostingIterator::Internal *
 773 QueryValueRange::postlist(QueryOptimiser *qopt, double factor) const
 774 {
 775     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueRange::postlist", qopt | factor);
 776     if (factor != 0.0)
 777         qopt->inc_total_subqs();
 778     const Xapian::Database::Internal & db = qopt->db;
 779     const string & lb = db.get_value_lower_bound(slot);
 780     // If lb.empty(), the backend doesn't provide value bounds.
 781     if (!lb.empty()) {
 782         if (end < lb) {
 783             RETURN(new EmptyPostList);
 784         }
 785         const string & ub = db.get_value_upper_bound(slot);
 786         if (begin > ub) {
 787             RETURN(new EmptyPostList);
 788         }
 789         if (end >= ub) {
 790             // If begin <= lb too, then the range check isn't needed, but we do
 791             // still need to consider which documents have a value set in this
 792             // slot.  If this value is set for all documents, we can replace it
 793             // with the MatchAll postlist, which is especially efficient if
 794             // there are no gaps in the docids.
 795             if (begin <= lb && db.get_value_freq(slot) == db.get_doccount()) {
 796                 RETURN(db.open_post_list(string()));
 797             }
 798             RETURN(new ValueGePostList(&db, slot, begin));
 799         }
 800     }
 801     RETURN(new ValueRangePostList(&db, slot, begin, end));
 802 }
 803
 804 void
 805 QueryValueRange::serialise(string & result) const
 806 {
 807     if (slot < 15) {
 808         result += static_cast<char>(0x20 | slot);
 809     } else {
 810         result += static_cast<char>(0x20 | 15);
 811         result += encode_length(slot - 15);
 812     }
 813     result += encode_length(begin.size());
 814     result += begin;
 815     result += encode_length(end.size());
 816     result += end;
 817 }
 818
 819 Query::op
 820 QueryValueRange::get_type() const XAPIAN_NOEXCEPT
 821 {
 822     return Query::OP_VALUE_RANGE;
 823 }
 824
 825 string
 826 QueryValueRange::get_description() const
 827 {
 828     string desc = "VALUE_RANGE ";
 829     desc += str(slot);
 830     desc += ' ';
 831     description_append(desc, begin);
 832     desc += ' ';
 833     description_append(desc, end);
 834     return desc;
 835 }
 836
 837 PostingIterator::Internal *
 838 QueryValueLE::postlist(QueryOptimiser *qopt, double factor) const
 839 {
 840     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueLE::postlist", qopt | factor);
 841     if (factor != 0.0)
 842         qopt->inc_total_subqs();
 843     const Xapian::Database::Internal & db = qopt->db;
 844     const string & lb = db.get_value_lower_bound(slot);
 845     // If lb.empty(), the backend doesn't provide value bounds.
 846     if (!lb.empty()) {
 847         if (limit < lb) {
 848             RETURN(new EmptyPostList);
 849         }
 850         if (limit >= db.get_value_upper_bound(slot)) {
 851             // The range check isn't needed, but we do still need to consider
 852             // which documents have a value set in this slot.  If this value is
 853             // set for all documents, we can replace it with the MatchAll
 854             // postlist, which is especially efficient if there are no gaps in
 855             // the docids.
 856             if (db.get_value_freq(slot) == db.get_doccount()) {
 857                 RETURN(db.open_post_list(string()));
 858             }
 859         }
 860     }
 861     RETURN(new ValueRangePostList(&db, slot, string(), limit));
 862 }
 863
 864 void
 865 QueryValueLE::serialise(string & result) const
 866 {
 867     // Encode as a range with an empty start (which only takes a single byte to
 868     // encode).
 869     if (slot < 15) {
 870         result += static_cast<char>(0x20 | slot);
 871     } else {
 872         result += static_cast<char>(0x20 | 15);
 873         result += encode_length(slot - 15);
 874     }
 875     result += encode_length(0);
 876     result += encode_length(limit.size());
 877     result += limit;
 878 }
 879
 880 Query::op
 881 QueryValueLE::get_type() const XAPIAN_NOEXCEPT
 882 {
 883     return Query::OP_VALUE_LE;
 884 }
 885
 886 string
 887 QueryValueLE::get_description() const
 888 {
 889     string desc = "VALUE_LE ";
 890     desc += str(slot);
 891     desc += ' ';
 892     description_append(desc, limit);
 893     return desc;
 894 }
 895
 896 PostingIterator::Internal *
 897 QueryValueGE::postlist(QueryOptimiser *qopt, double factor) const
 898 {
 899     LOGCALL(QUERY, PostingIterator::Internal *, "QueryValueGE::postlist", qopt | factor);
 900     if (factor != 0.0)
 901         qopt->inc_total_subqs();
 902     const Xapian::Database::Internal & db = qopt->db;
 903     const string & lb = db.get_value_lower_bound(slot);
 904     // If lb.empty(), the backend doesn't provide value bounds.
 905     if (!lb.empty()) {
 906         if (limit > db.get_value_upper_bound(slot)) {
 907             RETURN(new EmptyPostList);
 908         }
 909         if (limit < lb) {
 910             // The range check isn't needed, but we do still need to consider
 911             // which documents have a value set in this slot.  If this value is
 912             // set for all documents, we can replace it with the MatchAll
 913             // postlist, which is especially efficient if there are no gaps in
 914             // the docids.
 915             if (db.get_value_freq(slot) == db.get_doccount()) {
 916                 RETURN(db.open_post_list(string()));
 917             }
 918         }
 919     }
 920     RETURN(new ValueGePostList(&db, slot, limit));
 921 }
 922
 923 void
 924 QueryValueGE::serialise(string & result) const
 925 {
 926     if (slot < 15) {
 927         result += static_cast<char>(0x20 | 0x10 | slot);
 928     } else {
 929         result += static_cast<char>(0x20 | 0x10 | 15);
 930         result += encode_length(slot - 15);
 931     }
 932     result += encode_length(limit.size());
 933     result += limit;
 934 }
 935
 936 Query::op
 937 QueryValueGE::get_type() const XAPIAN_NOEXCEPT
 938 {
 939     return Query::OP_VALUE_GE;
 940 }
 941
 942 string
 943 QueryValueGE::get_description() const
 944 {
 945     string desc = "VALUE_GE ";
 946     desc += str(slot);
 947     desc += ' ';
 948     description_append(desc, limit);
 949     return desc;
 950 }
 951
 952 PostingIterator::Internal *
 953 QueryWildcard::postlist(QueryOptimiser * qopt, double factor) const
 954 {
 955     LOGCALL(QUERY, PostingIterator::Internal *, "QueryWildcard::postlist", qopt | factor);
 956     Query::op op = combiner;
 957     double or_factor = 0.0;
 958     if (factor == 0.0) {
 959         // If we have a factor of 0, we don't care about the weights, so
 960         // we're just like a normal OR query.
 961         op = Query::OP_OR;
 962     } else if (op != Query::OP_SYNONYM) {
 963         or_factor = factor;
 964     }
 965
 966     bool old_in_synonym = qopt->in_synonym;
 967     if (!old_in_synonym) {
 968         qopt->in_synonym = (op == Query::OP_SYNONYM);
 969     }
 970
 971     OrContext ctx(0);
 972     AutoPtr<TermList> t(qopt->db.open_allterms(pattern));
 973     Xapian::termcount expansions_left = max_expansion;
 974     // If there's no expansion limit, set expansions_left to the maximum
 975     // value Xapian::termcount can hold.
 976     if (expansions_left == 0)
 977         --expansions_left;
 978     while (true) {
 979         t->next();
 980         if (t->at_end())
 981             break;
 982         if (max_type < Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
 983             if (expansions_left-- == 0) {
 984                 if (max_type == Xapian::Query::WILDCARD_LIMIT_FIRST)
 985                     break;
 986                 string msg("Wildcard ");
 987                 msg += pattern;
 988                 msg += "* expands to more than ";
 989                 msg += str(max_expansion);
 990                 msg += " terms";
 991                 throw Xapian::WildcardError(msg);
 992             }
 993         }
 994         const string & term = t->get_termname();
 995         ctx.add_postlist(qopt->open_lazy_post_list(term, 1, or_factor));
 996     }
 997
 998     if (max_type == Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT) {
 999         // FIXME: open_lazy_post_list() results in the term getting registered
1000         // for stats, so we still incur an avoidable cost from the full
1001         // expansion size of the wildcard, which is most likely to be visible
1002         // with the remote backend.  Perhaps we should split creating the lazy
1003         // postlist from registering the term for stats.
1004         if (ctx.size() > max_expansion)
1005             ctx.select_most_frequent(qopt, max_expansion);
1006     }
1007
1008     if (factor != 0.0) {
1009         if (op != Query::OP_SYNONYM) {
1010             qopt->set_total_subqs(qopt->get_total_subqs() + ctx.size());
1011         } else {
1012             qopt->inc_total_subqs();
1013         }
1014     }
1015
1016     qopt->in_synonym = old_in_synonym;
1017
1018     if (ctx.empty())
1019         RETURN(new EmptyPostList);
1020
1021     if (op == Query::OP_MAX)
1022         RETURN(ctx.postlist_max(qopt));
1023
1024     PostList * pl = ctx.postlist(qopt);
1025     if (op == Query::OP_OR)
1026         RETURN(pl);
1027
1028     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1029     // SynonymPostList, which supplies the weights.
1030     PostingIterator::Internal * r = qopt->make_synonym_postlist(pl, factor);
1031     RETURN(r);
1032 }
1033
1034 termcount
1035 QueryWildcard::get_length() const XAPIAN_NOEXCEPT
1036 {
1037     // We currently assume wqf is 1 for calculating the synonym's weight
1038     // since conceptually the synonym is one "virtual" term.  If we were
1039     // to combine multiple occurrences of the same synonym expansion into
1040     // a single instance with wqf set, we would want to track the wqf.
1041     return 1;
1042 }
1043
1044 void
1045 QueryWildcard::serialise(string & result) const
1046 {
1047     result += static_cast<char>(0x0b);
1048     result += encode_length(max_expansion);
1049     result += static_cast<unsigned char>(max_type);
1050     result += static_cast<unsigned char>(combiner);
1051     result += encode_length(pattern.size());
1052     result += pattern;
1053 }
1054
1055 Query::op
1056 QueryWildcard::get_type() const XAPIAN_NOEXCEPT
1057 {
1058     return Query::OP_WILDCARD;
1059 }
1060
1061 string
1062 QueryWildcard::get_description() const
1063 {
1064     string desc = "WILDCARD ";
1065     switch (combiner) {
1066         case Query::OP_SYNONYM:
1067             desc += "SYNONYM ";
1068             break;
1069         case Query::OP_MAX:
1070             desc += "MAX ";
1071             break;
1072         case Query::OP_OR:
1073             desc += "OR ";
1074             break;
1075         default:
1076             desc += "BAD ";
1077             break;
1078     }
1079     description_append(desc, pattern);
1080     return desc;
1081 }
1082
1083 Xapian::termcount
1084 QueryBranch::get_length() const XAPIAN_NOEXCEPT
1085 {
1086     // Sum results from all subqueries.
1087     Xapian::termcount result = 0;
1088     QueryVector::const_iterator i;
1089     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1090         // MatchNothing subqueries should have been removed by done(), but we
1091         // can't use Assert in a XAPIAN_NOEXCEPT function.  But we'll get a
1092         // segfault anyway.
1093         result += (*i).internal->get_length();
1094     }
1095     return result;
1096 }
1097
1098 #define MULTIWAY(X) static_cast<unsigned char>(0x80 | (X) << 3)
1099 #define MISC(X) static_cast<unsigned char>(X)
1100 void
1101 QueryBranch::serialise_(string & result, Xapian::termcount parameter) const
1102 {
1103     static const unsigned char first_byte[] = {
1104         MULTIWAY(0),    // OP_AND
1105         MULTIWAY(1),    // OP_OR
1106         MULTIWAY(2),    // OP_AND_NOT
1107         MULTIWAY(3),    // OP_XOR
1108         MULTIWAY(4),    // OP_AND_MAYBE
1109         MULTIWAY(5),    // OP_FILTER
1110         MULTIWAY(14),   // OP_NEAR
1111         MULTIWAY(15),   // OP_PHRASE
1112         0,              // OP_VALUE_RANGE
1113         MISC(3),        // OP_SCALE_WEIGHT
1114         MULTIWAY(13),   // OP_ELITE_SET
1115         0,              // OP_VALUE_GE
1116         0,              // OP_VALUE_LE
1117         MULTIWAY(6),    // OP_SYNONYM
1118         MULTIWAY(7)     // OP_MAX
1119     };
1120     Xapian::Query::op op_ = get_op();
1121     AssertRel(size_t(op_),<,sizeof(first_byte));
1122     unsigned char ch = first_byte[op_];
1123     if (ch & 0x80) {
1124         // Multi-way operator.
1125         if (subqueries.size() < 8)
1126             ch |= subqueries.size();
1127         result += ch;
1128         if (subqueries.size() >= 8)
1129             result += encode_length(subqueries.size() - 8);
1130         if (ch >= MULTIWAY(13))
1131             result += encode_length(parameter);
1132     } else {
1133         result += ch;
1134     }
1135
1136     QueryVector::const_iterator i;
1137     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1138         // MatchNothing subqueries should have been removed by done().
1139         Assert((*i).internal.get());
1140         (*i).internal->serialise(result);
1141     }
1142
1143     // For OP_NEAR, OP_PHRASE, and OP_ELITE_SET, the window/set size gets
1144     // appended next by an overloaded serialise() method in the subclass.
1145 }
1146
1147 void
1148 QueryBranch::serialise(string & result) const
1149 {
1150     QueryBranch::serialise_(result);
1151 }
1152
1153 void
1154 QueryNear::serialise(string & result) const
1155 {
1156     // FIXME: window - subqueries.size() ?
1157     QueryBranch::serialise_(result, window);
1158 }
1159
1160 void
1161 QueryPhrase::serialise(string & result) const
1162 {
1163     // FIXME: window - subqueries.size() ?
1164     QueryBranch::serialise_(result, window);
1165 }
1166
1167 void
1168 QueryEliteSet::serialise(string & result) const
1169 {
1170     // FIXME: set_size - subqueries.size() ?
1171     QueryBranch::serialise_(result, set_size);
1172 }
1173
1174 void
1175 QueryBranch::gather_terms(void * void_terms) const
1176 {
1177     // Gather results from all subqueries.
1178     QueryVector::const_iterator i;
1179     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1180         // MatchNothing subqueries should have been removed by done().
1181         Assert((*i).internal.get());
1182         (*i).internal->gather_terms(void_terms);
1183     }
1184 }
1185
1186 void
1187 QueryBranch::do_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor,
1188                         Xapian::termcount elite_set_size, size_t first) const
1189 {
1190     LOGCALL_VOID(MATCH, "QueryBranch::do_or_like", ctx | qopt | factor | elite_set_size);
1191
1192     // FIXME: we could optimise by merging OP_ELITE_SET and OP_OR like we do
1193     // for AND-like operations.
1194
1195     // OP_SYNONYM with a single subquery is only simplified by
1196     // QuerySynonym::done() if the single subquery is a term or MatchAll.
1197     Assert(subqueries.size() >= 2 || get_op() == Query::OP_SYNONYM);
1198
1199     vector<PostList *> postlists;
1200     postlists.reserve(subqueries.size() - first);
1201
1202     QueryVector::const_iterator q;
1203     for (q = subqueries.begin() + first; q != subqueries.end(); ++q) {
1204         // MatchNothing subqueries should have been removed by done().
1205         Assert((*q).internal.get());
1206         (*q).internal->postlist_sub_or_like(ctx, qopt, factor);
1207     }
1208
1209     if (elite_set_size && elite_set_size < subqueries.size()) {
1210         ctx.select_elite_set(qopt, elite_set_size, subqueries.size());
1211         // FIXME: not right!
1212     }
1213 }
1214
1215 PostList *
1216 QueryBranch::do_synonym(QueryOptimiser * qopt, double factor) const
1217 {
1218     LOGCALL(MATCH, PostList *, "QueryBranch::do_synonym", qopt | factor);
1219     OrContext ctx(subqueries.size());
1220     bool old_in_synonym;
1221     if (factor != 0.0) {
1222         old_in_synonym = qopt->in_synonym;
1223         qopt->in_synonym = true;
1224     }
1225     do_or_like(ctx, qopt, 0.0);
1226     PostList * pl = ctx.postlist(qopt);
1227     if (factor == 0.0) {
1228         // If we have a factor of 0, we don't care about the weights, so
1229         // we're just like a normal OR query.
1230         return pl;
1231     }
1232
1233     qopt->in_synonym = old_in_synonym;
1234
1235     // We currently assume wqf is 1 for calculating the synonym's weight
1236     // since conceptually the synonym is one "virtual" term.  If we were
1237     // to combine multiple occurrences of the same synonym expansion into
1238     // a single instance with wqf set, we would want to track the wqf.
1239
1240     // We build an OP_OR tree for OP_SYNONYM and then wrap it in a
1241     // SynonymPostList, which supplies the weights.
1242     RETURN(qopt->make_synonym_postlist(pl, factor));
1243 }
1244
1245 PostList *
1246 QueryBranch::do_max(QueryOptimiser * qopt, double factor) const
1247 {
1248     LOGCALL(MATCH, PostList *, "QueryBranch::do_max", qopt | factor);
1249     OrContext ctx(subqueries.size());
1250     do_or_like(ctx, qopt, factor);
1251     if (factor == 0.0) {
1252         // If we have a factor of 0, we don't care about the weights, so
1253         // we're just like a normal OR query.
1254         RETURN(ctx.postlist(qopt));
1255     }
1256
1257     // We currently assume wqf is 1 for calculating the OP_MAX's weight
1258     // since conceptually the OP_MAX is one "virtual" term.  If we were
1259     // to combine multiple occurrences of the same OP_MAX expansion into
1260     // a single instance with wqf set, we would want to track the wqf.
1261     RETURN(ctx.postlist_max(qopt));
1262 }
1263
1264 Xapian::Query::op
1265 QueryBranch::get_type() const XAPIAN_NOEXCEPT
1266 {
1267     return get_op();
1268 }
1269
1270 size_t
1271 QueryBranch::get_num_subqueries() const XAPIAN_NOEXCEPT
1272 {
1273     return subqueries.size();
1274 }
1275
1276 const Query
1277 QueryBranch::get_subquery(size_t n) const
1278 {
1279     return subqueries[n];
1280 }
1281
1282 const string
1283 QueryBranch::get_description_helper(const char * op,
1284                                     Xapian::termcount parameter) const
1285 {
1286     string desc = "(";
1287     QueryVector::const_iterator i;
1288     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1289         if (desc.size() > 1) {
1290             desc += op;
1291             if (parameter) {
1292                 desc += str(parameter);
1293                 desc += ' ';
1294             }
1295         }
1296         Assert((*i).internal.get());
1297         // MatchNothing subqueries should have been removed by done(), and we
1298         // shouldn't get called before done() is, since that happens at the
1299         // end of the Xapian::Query constructor.
1300         desc += (*i).internal->get_description();
1301     }
1302     desc += ')';
1303     return desc;
1304 }
1305
1306 Query::Internal *
1307 QueryWindowed::done()
1308 {
1309     // If window size not specified, default it.
1310     if (window == 0)
1311         window = subqueries.size();
1312     return QueryAndLike::done();
1313 }
1314
1315 void
1316 QueryScaleWeight::gather_terms(void * void_terms) const
1317 {
1318     subquery.internal->gather_terms(void_terms);
1319 }
1320
1321 void QueryTerm::serialise(string & result) const
1322 {
1323     size_t len = term.size();
1324     if (len == 0) {
1325         if (wqf == 1 && pos == 0) {
1326             // Query::MatchAll
1327             result += '\x0f';
1328         } else {
1329             // Weird mutant versions of MatchAll
1330             result += '\x0e';
1331             result += encode_length(wqf);
1332             result += encode_length(pos);
1333         }
1334     } else if (wqf == 1) {
1335         if (pos == 0) {
1336             // Single occurrence probabilistic term without position set.
1337             if (len >= 16) {
1338                 result += static_cast<char>(0x40 | 0x10);
1339                 result += encode_length(term.size() - 16);
1340             } else {
1341                 result += static_cast<char>(0x40 | 0x10 | len);
1342             }
1343             result += term;
1344         } else {
1345             // Single occurrence probabilistic term with position set.
1346             if (len >= 16) {
1347                 result += static_cast<char>(0x40 | 0x20);
1348                 result += encode_length(term.size() - 16);
1349             } else {
1350                 result += static_cast<char>(0x40 | 0x20 | len);
1351             }
1352             result += term;
1353             result += encode_length(pos);
1354         }
1355     } else if (wqf > 1 || pos > 0) {
1356         // General case.
1357         if (len >= 16) {
1358             result += static_cast<char>(0x40 | 0x30);
1359             result += encode_length(term.size() - 16);
1360         } else if (len) {
1361             result += static_cast<char>(0x40 | 0x30 | len);
1362         }
1363         result += term;
1364         result += encode_length(wqf);
1365         result += encode_length(pos);
1366     } else {
1367         // Typical boolean term.
1368         AssertEq(wqf, 0);
1369         AssertEq(pos, 0);
1370         if (len >= 16) {
1371             result += static_cast<char>(0x40);
1372             result += encode_length(term.size() - 16);
1373         } else {
1374             result += static_cast<char>(0x40 | len);
1375         }
1376         result += term;
1377     }
1378 }
1379
1380 void QueryPostingSource::serialise(string & result) const
1381 {
1382     result += static_cast<char>(0x0c);
1383
1384     const string & n = source->name();
1385     result += encode_length(n.size());
1386     result += n;
1387
1388     const string & s = source->serialise();
1389     result += encode_length(s.size());
1390     result += s;
1391 }
1392
1393 void QueryScaleWeight::serialise(string & result) const
1394 {
1395     Assert(subquery.internal.get());
1396     const string & s = serialise_double(scale_factor);
1397     result += '\x0d';
1398     result += s;
1399     subquery.internal->serialise(result);
1400 }
1401
1402 struct is_matchnothing {
1403     bool operator()(const Xapian::Query & q) {
1404         return q.internal.get() == NULL;
1405     }
1406 };
1407
1408 void
1409 QueryAndLike::add_subquery(const Xapian::Query & subquery)
1410 {
1411     // If the AndLike is already MatchNothing, do nothing.
1412     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1413         return;
1414     // If we're adding MatchNothing, discard any previous subqueries.
1415     if (subquery.internal.get() == NULL)
1416         subqueries.clear();
1417     subqueries.push_back(subquery);
1418 }
1419
1420 Query::Internal *
1421 QueryAndLike::done()
1422 {
1423     // Empty AndLike gives MatchNothing.
1424     if (subqueries.empty())
1425         return NULL;
1426     // We handle any subquery being MatchNothing in add_subquery() by leaving
1427     // a single MatchNothing subquery, and so this check results in AndLike
1428     // giving MatchNothing.
1429     if (subqueries.size() == 1)
1430         return subqueries[0].internal.get();
1431     return this;
1432 }
1433
1434 PostingIterator::Internal *
1435 QueryAndLike::postlist(QueryOptimiser * qopt, double factor) const
1436 {
1437     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndLike::postlist", qopt | factor);
1438     AndContext ctx(subqueries.size());
1439     postlist_sub_and_like(ctx, qopt, factor);
1440     RETURN(ctx.postlist(qopt));
1441 }
1442
1443 void
1444 QueryAndLike::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1445 {
1446     QueryVector::const_iterator i;
1447     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1448         // MatchNothing subqueries should have been removed by done().
1449         Assert((*i).internal.get());
1450         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1451     }
1452 }
1453
1454 void
1455 QueryOrLike::add_subquery(const Xapian::Query & subquery)
1456 {
1457     // Drop any subqueries which are MatchNothing.
1458     if (subquery.internal.get() != NULL)
1459         subqueries.push_back(subquery);
1460 }
1461
1462 Query::Internal *
1463 QueryOrLike::done()
1464 {
1465     // An empty OrLike gives MatchNothing.  Note that add_subquery() drops any
1466     // subqueries which are MatchNothing.
1467     if (subqueries.empty())
1468         return NULL;
1469     if (subqueries.size() == 1)
1470         return subqueries[0].internal.get();
1471     return this;
1472 }
1473
1474 void
1475 QueryAndNot::add_subquery(const Xapian::Query & subquery)
1476 {
1477     // If the left side of AndNot is already MatchNothing, do nothing.
1478     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1479         return;
1480     // Drop any 2nd or subsequent subqueries which are MatchNothing.
1481     if (subquery.internal.get() != NULL || subqueries.empty())
1482         subqueries.push_back(subquery);
1483 }
1484
1485 Query::Internal *
1486 QueryAndNot::done()
1487 {
1488     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1489     // that leaves just the left subquery, return that.
1490     //
1491     // If left subquery is MatchNothing, then add_subquery() discards all right
1492     // subqueries, so this check also gives MatchNothing for this case.
1493     if (subqueries.size() == 1)
1494         return subqueries[0].internal.get();
1495     return this;
1496 }
1497
1498 void
1499 QueryAndMaybe::add_subquery(const Xapian::Query & subquery)
1500 {
1501     // If the left side of AndMaybe is already MatchNothing, do nothing.
1502     if (subqueries.size() == 1 && subqueries[0].internal.get() == NULL)
1503         return;
1504     // Drop any 2nd or subsequent subqueries which are MatchNothing.
1505     if (subquery.internal.get() != NULL || subqueries.empty())
1506         subqueries.push_back(subquery);
1507 }
1508
1509 Query::Internal *
1510 QueryAndMaybe::done()
1511 {
1512     // Any MatchNothing right subqueries get discarded by add_subquery() - if
1513     // that leaves just the left subquery, return that.
1514     //
1515     // If left subquery is MatchNothing, then add_subquery() discards all right
1516     // subqueries, so this check also gives MatchNothing for this case.
1517     if (subqueries.size() == 1)
1518         return subqueries[0].internal.get();
1519     return this;
1520 }
1521
1522 PostingIterator::Internal *
1523 QueryOr::postlist(QueryOptimiser * qopt, double factor) const
1524 {
1525     LOGCALL(QUERY, PostingIterator::Internal *, "QueryOr::postlist", qopt | factor);
1526     OrContext ctx(subqueries.size());
1527     do_or_like(ctx, qopt, factor);
1528     RETURN(ctx.postlist(qopt));
1529 }
1530
1531 void
1532 QueryOr::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1533 {
1534     do_or_like(ctx, qopt, factor);
1535 }
1536
1537 PostingIterator::Internal *
1538 QueryAndNot::postlist(QueryOptimiser * qopt, double factor) const
1539 {
1540     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndNot::postlist", qopt | factor);
1541     // FIXME: Combine and-like side with and-like stuff above.
1542     AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1543     OrContext ctx(subqueries.size() - 1);
1544     do_or_like(ctx, qopt, 0.0, 0, 1);
1545     AutoPtr<PostList> r(ctx.postlist(qopt));
1546     RETURN(new AndNotPostList(l.release(), r.release(),
1547                               qopt->matcher, qopt->db_size));
1548 }
1549
1550 PostingIterator::Internal *
1551 QueryXor::postlist(QueryOptimiser * qopt, double factor) const
1552 {
1553     LOGCALL(QUERY, PostingIterator::Internal *, "QueryXor::postlist", qopt | factor);
1554     XorContext ctx(subqueries.size());
1555     postlist_sub_xor(ctx, qopt, factor);
1556     RETURN(ctx.postlist(qopt));
1557 }
1558
1559 void
1560 QueryXor::postlist_sub_xor(XorContext& ctx, QueryOptimiser * qopt, double factor) const
1561 {
1562     QueryVector::const_iterator i;
1563     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1564         // MatchNothing subqueries should have been removed by done().
1565         Assert((*i).internal.get());
1566         (*i).internal->postlist_sub_xor(ctx, qopt, factor);
1567     }
1568 }
1569
1570 PostingIterator::Internal *
1571 QueryAndMaybe::postlist(QueryOptimiser * qopt, double factor) const
1572 {
1573     LOGCALL(QUERY, PostingIterator::Internal *, "QueryAndMaybe::postlist", qopt | factor);
1574     // FIXME: Combine and-like side with and-like stuff above.
1575     AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1576     OrContext ctx(subqueries.size() - 1);
1577     do_or_like(ctx, qopt, factor, 0, 1);
1578     AutoPtr<PostList> r(ctx.postlist(qopt));
1579     RETURN(new AndMaybePostList(l.release(), r.release(),
1580                                 qopt->matcher, qopt->db_size));
1581 }
1582
1583 PostingIterator::Internal *
1584 QueryFilter::postlist(QueryOptimiser * qopt, double factor) const
1585 {
1586     LOGCALL(QUERY, PostingIterator::Internal *, "QueryFilter::postlist", qopt | factor);
1587     // FIXME: Combine and-like stuff, like QueryOptimiser.
1588     AssertEq(subqueries.size(), 2);
1589     PostList * pls[2];
1590     AutoPtr<PostList> l(subqueries[0].internal->postlist(qopt, factor));
1591     pls[1] = subqueries[1].internal->postlist(qopt, 0.0);
1592     pls[0] = l.release();
1593     RETURN(new MultiAndPostList(pls, pls + 2, qopt->matcher, qopt->db_size));
1594 }
1595
1596 void
1597 QueryFilter::postlist_sub_and_like(AndContext& ctx, QueryOptimiser * qopt, double factor) const
1598 {
1599     QueryVector::const_iterator i;
1600     for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1601         // MatchNothing subqueries should have been removed by done().
1602         Assert((*i).internal.get());
1603         (*i).internal->postlist_sub_and_like(ctx, qopt, factor);
1604         // Second and subsequent subqueries are unweighted.
1605         factor = 0.0;
1606     }
1607 }
1608
1609 void
1610 QueryWindowed::postlist_windowed(Query::op op, AndContext& ctx, QueryOptimiser * qopt, double factor) const
1611 {
1612     // FIXME: should has_positions() be on the combined DB (not this sub)?
1613     if (qopt->db.has_positions()) {
1614         bool old_need_positions = qopt->need_positions;
1615         qopt->need_positions = true;
1616
1617         QueryVector::const_iterator i;
1618         for (i = subqueries.begin(); i != subqueries.end(); ++i) {
1619             // MatchNothing subqueries should have been removed by done().
1620             Assert((*i).internal.get());
1621             ctx.add_postlist((*i).internal->postlist(qopt, factor));
1622         }
1623         // Record the positional filter to apply higher up the tree.
1624         ctx.add_pos_filter(op, subqueries.size(), window);
1625
1626         qopt->need_positions = old_need_positions;
1627     } else {
1628         QueryAndLike::postlist_sub_and_like(ctx, qopt, factor);
1629     }
1630 }
1631
1632 void
1633 QueryPhrase::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1634 {
1635     QueryWindowed::postlist_windowed(Query::OP_PHRASE, ctx, qopt, factor);
1636 }
1637
1638 void
1639 QueryNear::postlist_sub_and_like(AndContext & ctx, QueryOptimiser * qopt, double factor) const
1640 {
1641     QueryWindowed::postlist_windowed(Query::OP_NEAR, ctx, qopt, factor);
1642 }
1643
1644 PostingIterator::Internal *
1645 QueryEliteSet::postlist(QueryOptimiser * qopt, double factor) const
1646 {
1647     LOGCALL(QUERY, PostingIterator::Internal *, "QueryEliteSet::postlist", qopt | factor);
1648     OrContext ctx(subqueries.size());
1649     do_or_like(ctx, qopt, factor, set_size);
1650     RETURN(ctx.postlist(qopt));
1651 }
1652
1653 void
1654 QueryEliteSet::postlist_sub_or_like(OrContext& ctx, QueryOptimiser * qopt, double factor) const
1655 {
1656     do_or_like(ctx, qopt, factor, set_size);
1657 }
1658
1659 PostingIterator::Internal *
1660 QuerySynonym::postlist(QueryOptimiser * qopt, double factor) const
1661 {
1662     LOGCALL(QUERY, PostingIterator::Internal *, "QuerySynonym::postlist", qopt | factor);
1663     // Save and restore total_subqs so we only add one for the whole
1664     // OP_SYNONYM subquery (or none if we're not weighted).
1665     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1666     if (factor != 0.0)
1667         ++save_total_subqs;
1668     PostList * pl = do_synonym(qopt, factor);
1669     qopt->set_total_subqs(save_total_subqs);
1670     RETURN(pl);
1671 }
1672
1673 Query::Internal *
1674 QuerySynonym::done()
1675 {
1676     // An empty Synonym gives MatchNothing.  Note that add_subquery() drops any
1677     // subqueries which are MatchNothing.
1678     if (subqueries.empty())
1679         return NULL;
1680     // Synonym of a single subquery should only be simplified if that subquery
1681     // is a term (or MatchAll), or if it's also OP_SYNONYM.  Note that
1682     // MatchNothing subqueries are dropped, so we'd never get here with a
1683     // single MatchNothing subquery.
1684     if (subqueries.size() == 1) {
1685         Query::op sub_type = subqueries[0].get_type();
1686         if (sub_type == Query::LEAF_TERM || sub_type == Query::LEAF_MATCH_ALL ||
1687             sub_type == Query::OP_SYNONYM) {
1688             return subqueries[0].internal.get();
1689         }
1690     }
1691     return this;
1692 }
1693
1694 PostingIterator::Internal *
1695 QueryMax::postlist(QueryOptimiser * qopt, double factor) const
1696 {
1697     LOGCALL(QUERY, PostingIterator::Internal *, "QueryMax::postlist", qopt | factor);
1698     // Save and restore total_subqs so we only add one for the whole
1699     // OP_MAX subquery (or none if we're not weighted).
1700     Xapian::termcount save_total_subqs = qopt->get_total_subqs();
1701     if (factor != 0.0)
1702         ++save_total_subqs;
1703     PostList * pl = do_max(qopt, factor);
1704     qopt->set_total_subqs(save_total_subqs);
1705     RETURN(pl);
1706 }
1707
1708 Xapian::Query::op
1709 QueryAnd::get_op() const
1710 {
1711     return Xapian::Query::OP_AND;
1712 }
1713
1714 Xapian::Query::op
1715 QueryOr::get_op() const
1716 {
1717     return Xapian::Query::OP_OR;
1718 }
1719
1720 Xapian::Query::op
1721 QueryAndNot::get_op() const
1722 {
1723     return Xapian::Query::OP_AND_NOT;
1724 }
1725
1726 Xapian::Query::op
1727 QueryXor::get_op() const
1728 {
1729     return Xapian::Query::OP_XOR;
1730 }
1731
1732 Xapian::Query::op
1733 QueryAndMaybe::get_op() const
1734 {
1735     return Xapian::Query::OP_AND_MAYBE;
1736 }
1737
1738 Xapian::Query::op
1739 QueryFilter::get_op() const
1740 {
1741     return Xapian::Query::OP_FILTER;
1742 }
1743
1744 Xapian::Query::op
1745 QueryNear::get_op() const
1746 {
1747     return Xapian::Query::OP_NEAR;
1748 }
1749
1750 Xapian::Query::op
1751 QueryPhrase::get_op() const
1752 {
1753     return Xapian::Query::OP_PHRASE;
1754 }
1755
1756 Xapian::Query::op
1757 QueryEliteSet::get_op() const
1758 {
1759     return Xapian::Query::OP_ELITE_SET;
1760 }
1761
1762 Xapian::Query::op
1763 QuerySynonym::get_op() const
1764 {
1765     return Xapian::Query::OP_SYNONYM;
1766 }
1767
1768 Xapian::Query::op
1769 QueryMax::get_op() const
1770 {
1771     return Xapian::Query::OP_MAX;
1772 }
1773
1774 Xapian::Query::op
1775 QueryWildcard::get_op() const
1776 {
1777     return Xapian::Query::OP_WILDCARD;
1778 }
1779
1780 string
1781 QueryAnd::get_description() const
1782 {
1783     return get_description_helper(" AND ");
1784 }
1785
1786 string
1787 QueryOr::get_description() const
1788 {
1789     return get_description_helper(" OR ");
1790 }
1791
1792 string
1793 QueryAndNot::get_description() const
1794 {
1795     return get_description_helper(" AND_NOT ");
1796 }
1797
1798 string
1799 QueryXor::get_description() const
1800 {
1801     return get_description_helper(" XOR ");
1802 }
1803
1804 string
1805 QueryAndMaybe::get_description() const
1806 {
1807     return get_description_helper(" AND_MAYBE ");
1808 }
1809
1810 string
1811 QueryFilter::get_description() const
1812 {
1813     return get_description_helper(" FILTER ");
1814 }
1815
1816 string
1817 QueryNear::get_description() const
1818 {
1819     return get_description_helper(" NEAR ", window);
1820 }
1821
1822 string
1823 QueryPhrase::get_description() const
1824 {
1825     return get_description_helper(" PHRASE ", window);
1826 }
1827
1828 string
1829 QueryEliteSet::get_description() const
1830 {
1831     return get_description_helper(" ELITE_SET ", set_size);
1832 }
1833
1834 string
1835 QuerySynonym::get_description() const
1836 {
1837     if (subqueries.size() == 1) {
1838         string d = "(SYNONYM ";
1839         d += subqueries[0].internal->get_description();
1840         d += ")";
1841         return d;
1842     }
1843     return get_description_helper(" SYNONYM ");
1844 }
1845
1846 string
1847 QueryMax::get_description() const
1848 {
1849     return get_description_helper(" MAX ");
1850 }
1851
1852 Xapian::Query::op
1853 QueryInvalid::get_type() const XAPIAN_NOEXCEPT
1854 {
1855     return Xapian::Query::OP_INVALID;
1856 }
1857
1858 PostingIterator::Internal *
1859 QueryInvalid::postlist(QueryOptimiser *, double) const
1860 {
1861     throw Xapian::InvalidOperationError("Query is invalid");
1862 }
1863
1864 void
1865 QueryInvalid::serialise(std::string & result) const
1866 {
1867     result += static_cast<char>(0x00);
1868 }
1869
1870 string
1871 QueryInvalid::get_description() const
1872 {
1873     return "<INVALID>";
1874 }
1875
1876 }
1877 }