Split PostList and PostingIterator::Internal
[xapian.git] / xapian-core / include / xapian / query.h
blobdce38d5c76d7eeb4f746920dcd6d609428f976ba
1 /** @file query.h
2 * @brief Xapian::Query API class
3 */
4 /* Copyright (C) 2011,2012,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2008 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_QUERY_H
23 #define XAPIAN_INCLUDED_QUERY_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error "Never use <xapian/query.h> directly; include <xapian.h> instead."
27 #endif
29 #include <string>
31 #include <xapian/attributes.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/registry.h>
35 #include <xapian/termiterator.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
39 namespace Xapian {
41 class PostingSource;
43 /// Class representing a query.
44 class XAPIAN_VISIBILITY_DEFAULT Query {
45 public:
46 /// Class representing the query internals.
47 class Internal;
48 /// @private @internal Reference counted internals.
49 Xapian::Internal::intrusive_ptr<Internal> internal;
51 /** A query matching no documents.
53 * Exactly equivalent to Xapian::Query().
55 static const Xapian::Query MatchNothing;
57 /** A query matching all documents.
59 * Exactly equivalent to Xapian::Query(std::string()).
61 static const Xapian::Query MatchAll;
63 /** Query operators. */
64 enum op {
65 OP_AND = 0,
66 OP_OR = 1,
67 OP_AND_NOT = 2,
68 OP_XOR = 3,
69 OP_AND_MAYBE = 4,
70 OP_FILTER = 5,
71 OP_NEAR = 6,
72 OP_PHRASE = 7,
73 OP_VALUE_RANGE = 8,
74 OP_SCALE_WEIGHT = 9,
76 /** Pick the best N subqueries and combine with OP_OR.
78 * If you want to implement a feature which finds documents similar to
79 * a piece of text, an obvious approach is to build an "OR" query from
80 * all the terms in the text, and run this query against a database
81 * containing the documents. However such a query can contain a lots
82 * of terms and be quite slow to perform, yet many of these terms
83 * don't contribute usefully to the results.
85 * The OP_ELITE_SET operator can be used instead of OP_OR in this
86 * situation. OP_ELITE_SET selects the most important ''N'' terms and
87 * then acts as an OP_OR query with just these, ignoring any other
88 * terms. This will usually return results just as good as the full
89 * OP_OR query, but much faster.
91 * In general, the OP_ELITE_SET operator can be used when you have a
92 * large OR query, but it doesn't matter if the search completely
93 * ignores some of the less important terms in the query.
95 * The subqueries don't have to be terms, but if they aren't then
96 * OP_ELITE_SET will look at the estimated frequencies of the
97 * subqueries and so could pick a subset which don't actually
98 * match any documents even if the full OR would match some.
100 * You can specify a parameter to the query constructor which control
101 * the number of terms which OP_ELITE_SET will pick. If not
102 * specified, this defaults to 10 (Xapian used to default to
103 * <code>ceil(sqrt(number_of_subqueries))</code> if there are more
104 * than 100 subqueries, but this rather arbitrary special case was
105 * dropped in 1.3.0). For example, this will pick the best 7 terms:
107 * <pre>
108 * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7);
109 * </pre>
111 * If the number of subqueries is less than this threshold,
112 * OP_ELITE_SET behaves identically to OP_OR.
114 OP_ELITE_SET = 10,
115 OP_VALUE_GE = 11,
116 OP_VALUE_LE = 12,
117 OP_SYNONYM = 13,
118 /** Pick the maximum weight of any subquery.
120 * Matches the same documents as @a OP_OR, but the weight contributed
121 * is the maximum weight from any matching subquery (for OP_OR, it's
122 * the sum of the weights from the matching subqueries).
124 * Added in Xapian 1.3.2.
126 OP_MAX = 14,
127 /** Wildcard expansion.
129 * Added in Xapian 1.3.3.
131 OP_WILDCARD = 15,
133 OP_INVALID = 99,
135 LEAF_TERM = 100,
136 LEAF_POSTING_SOURCE,
137 LEAF_MATCH_ALL,
138 LEAF_MATCH_NOTHING
141 enum {
142 /** Throw an error if OP_WILDCARD exceeds its expansion limit.
144 * Xapian::WildcardError will be thrown when the query is actually
145 * run.
147 WILDCARD_LIMIT_ERROR,
148 /** Stop expanding when OP_WILDCARD reaches its expansion limit.
150 * This makes the wildcard expand to only the first N terms (sorted
151 * by byte order).
153 WILDCARD_LIMIT_FIRST,
154 /** Limit OP_WILDCARD expansion to the most frequent terms.
156 * If OP_WILDCARD would expand to more than its expansion limit, the
157 * most frequent terms are taken. This approach works well for cases
158 * such as expanding a partial term at the end of a query string which
159 * the user hasn't finished typing yet - as well as being less expense
160 * to evaluate than the full expansion, using only the most frequent
161 * terms tends to give better results too.
163 WILDCARD_LIMIT_MOST_FREQUENT
166 /// Default constructor.
167 XAPIAN_NOTHROW(Query()) { }
169 /// Destructor.
170 ~Query() { }
172 /** Copying is allowed.
174 * The internals are reference counted, so copying is cheap.
176 Query(const Query & o) : internal(o.internal) { }
178 /** Copying is allowed.
180 * The internals are reference counted, so assignment is cheap.
182 Query & operator=(const Query & o) { internal = o.internal; return *this; }
184 /** Construct a Query object for a term. */
185 Query(const std::string & term,
186 Xapian::termcount wqf = 1,
187 Xapian::termpos pos = 0);
189 /** Construct a Query object for a PostingSource. */
190 explicit Query(Xapian::PostingSource * source);
192 /** Scale using OP_SCALE_WEIGHT.
194 * @param factor Non-negative real number to multiply weights by.
195 * @param subquery Query object to scale weights from.
197 Query(double factor, const Xapian::Query & subquery);
199 /** Scale using OP_SCALE_WEIGHT.
201 * In this form, the op_ parameter is totally redundant - use
202 * Query(factor, subquery) in preference.
204 * @param op_ Must be OP_SCALE_WEIGHT.
205 * @param factor Non-negative real number to multiply weights by.
206 * @param subquery Query object to scale weights from.
208 Query(op op_, const Xapian::Query & subquery, double factor);
210 /** Construct a Query object by combining two others.
212 * @param op_ The operator to combine the queries with.
213 * @param a First subquery.
214 * @param b Second subquery.
216 Query(op op_, const Xapian::Query & a, const Xapian::Query & b)
218 init(op_, 2);
219 bool positional = (op_ == OP_NEAR || op_ == OP_PHRASE);
220 add_subquery(positional, a);
221 add_subquery(positional, b);
222 done();
225 /** Construct a Query object by combining two terms.
227 * @param op_ The operator to combine the terms with.
228 * @param a First term.
229 * @param b Second term.
231 Query(op op_, const std::string & a, const std::string & b)
233 init(op_, 2);
234 add_subquery(false, a);
235 add_subquery(false, b);
236 done();
239 /** Construct a Query object for a single-ended value range.
241 * @param op_ Must be OP_VALUE_LE or OP_VALUE_GE currently.
242 * @param slot The value slot to work over.
243 * @param range_limit The limit of the range.
245 Query(op op_, Xapian::valueno slot, const std::string & range_limit);
247 /** Construct a Query object for a value range.
249 * @param op_ Must be OP_VALUE_RANGE currently.
250 * @param slot The value slot to work over.
251 * @param range_lower Lower end of the range.
252 * @param range_upper Upper end of the range.
254 Query(op op_, Xapian::valueno slot,
255 const std::string & range_lower, const std::string & range_upper);
257 /** Query constructor for OP_WILDCARD queries.
259 * @param op_ Must be OP_WILDCARD
260 * @param pattern The wildcard pattern - currently this is just a string
261 * and the wildcard expands to terms which start with
262 * exactly this string.
263 * @param max_expansion The maximum number of terms to expand to
264 * (default: 0, which means no limit)
265 * @param max_type How to enforce max_expansion - one of
266 * @a WILDCARD_LIMIT_ERROR (the default),
267 * @a WILDCARD_LIMIT_FIRST or
268 * @a WILDCARD_LIMIT_MOST_FREQUENT.
269 * When searching multiple databases, the expansion limit
270 * is currently applied independently for each database,
271 * so the total number of terms may be higher than the
272 * limit. This is arguably a bug, and may change in
273 * future versions.
274 * @param combiner The @a op_ to combine the terms with - one of
275 * @a OP_SYNONYM (the default), @a OP_OR or @a OP_MAX.
277 Query(op op_,
278 const std::string & pattern,
279 Xapian::termcount max_expansion = 0,
280 int max_type = WILDCARD_LIMIT_ERROR,
281 op combiner = OP_SYNONYM);
283 /** Construct a Query object from a begin/end iterator pair.
285 * Dereferencing the iterator should return a Xapian::Query, a non-NULL
286 * Xapian::Query*, a std::string or a type which converts to one of
287 * these (e.g. const char*).
289 * @param op_ The operator to combine the queries with.
290 * @param begin Begin iterator.
291 * @param end End iterator.
292 * @param window Window size for OP_NEAR and OP_PHRASE, or 0 to use the
293 * number of subqueries as the window size (default: 0).
295 template<typename I>
296 Query(op op_, I begin, I end, Xapian::termcount window = 0)
298 if (begin != end) {
299 typedef typename std::iterator_traits<I>::iterator_category iterator_category;
300 init(op_, window, begin, end, iterator_category());
301 bool positional = (op_ == OP_NEAR || op_ == OP_PHRASE);
302 for (I i = begin; i != end; ++i) {
303 add_subquery(positional, *i);
305 done();
309 #ifdef SWIG
310 // SWIG's %template doesn't seem to handle a templated ctor so we
311 // provide this fake specialised form of the above prototype.
312 Query(op op_, XapianSWIGQueryItor qbegin, XapianSWIGQueryItor qend,
313 Xapian::termcount parameter = 0);
315 # ifdef SWIGJAVA
316 Query(op op_, XapianSWIGStrItor qbegin, XapianSWIGStrItor qend,
317 Xapian::termcount parameter = 0);
318 # endif
319 #endif
321 /** Begin iterator for terms in the query object.
323 * The iterator returns terms in ascending query position order, and
324 * will return the same term in each unique position it occurs in.
325 * If you want the terms in sorted order and without duplicates, see
326 * get_unique_terms_begin().
328 const TermIterator get_terms_begin() const;
330 /// End iterator for terms in the query object.
331 const TermIterator XAPIAN_NOTHROW(get_terms_end() const) {
332 return TermIterator();
335 /** Begin iterator for unique terms in the query object.
337 * Terms are sorted and terms with the same name removed from the list.
339 * If you want the terms in ascending query position order, see
340 * get_terms_begin().
342 const TermIterator get_unique_terms_begin() const;
344 /** Return the length of this query object. */
345 Xapian::termcount XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION;
347 /** Check if this query is Xapian::Query::MatchNothing. */
348 bool XAPIAN_NOTHROW(empty() const) {
349 return internal.get() == 0;
352 /** Serialise this object into a string. */
353 std::string serialise() const;
355 /** Unserialise a string and return a Query object.
357 * @param serialised the string to unserialise.
358 * @param reg Xapian::Registry object to use to unserialise
359 * user-subclasses of Xapian::PostingSource
360 * (default: standard registry).
362 static const Query unserialise(const std::string & serialised,
363 const Registry & reg = Registry());
365 /** Get the type of the top level of the query. */
366 op XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION;
368 /** Get the number of subqueries of the top level query. */
369 size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION;
371 /** Get the wqf parameter of a leaf node. */
372 Xapian::termcount get_leaf_wqf() const;
374 /** Get the pos parameter of a leaf node. */
375 Xapian::termpos get_leaf_pos() const;
377 /** Read a top level subquery.
379 * @param n Return the n-th subquery (starting from 0) - only valid when
380 * 0 <= n < get_num_subqueries().
382 const Query get_subquery(size_t n) const;
384 /// Return a string describing this object.
385 std::string get_description() const;
387 /** Combine with another Xapian::Query object using OP_AND. */
388 const Query operator&=(const Query & o) {
389 return (*this = Query(OP_AND, *this, o));
392 /** Combine with another Xapian::Query object using OP_OR. */
393 const Query operator|=(const Query & o) {
394 return (*this = Query(OP_OR, *this, o));
397 /** Combine with another Xapian::Query object using OP_XOR. */
398 const Query operator^=(const Query & o) {
399 return (*this = Query(OP_XOR, *this, o));
402 /** Scale using OP_SCALE_WEIGHT.
404 * @param factor Non-negative real number to multiply weights by.
406 const Query operator*=(double factor) {
407 return (*this = Query(factor, *this));
410 /** Inverse scale using OP_SCALE_WEIGHT.
412 * @param factor Positive real number to divide weights by.
414 const Query operator/=(double factor) {
415 return (*this = Query(1.0 / factor, *this));
418 /// @private @internal Wrap an existing Internal.
419 explicit Query(Internal * internal_) : internal(internal_) { }
421 /** Construct with just an operator.
423 * @param op_ The operator to use - currently only OP_INVALID is useful.
425 explicit Query(Query::op op_) {
426 init(op_, 0);
427 if (op_ != Query::OP_INVALID) done();
430 private:
431 void init(Query::op op_, size_t n_subqueries, Xapian::termcount window = 0);
433 template<typename I>
434 void init(Query::op op_, Xapian::termcount window,
435 const I & begin, const I & end, std::random_access_iterator_tag)
437 init(op_, end - begin, window);
440 template<typename I>
441 void init(Query::op op_, Xapian::termcount window,
442 const I &, const I &, std::input_iterator_tag)
444 init(op_, 0, window);
447 void add_subquery(bool positional, const Xapian::Query & subquery);
449 void add_subquery(bool, const std::string & subquery) {
450 add_subquery(false, Xapian::Query(subquery));
453 void add_subquery(bool positional, const Xapian::Query * subquery) {
454 // FIXME: subquery NULL?
455 add_subquery(positional, *subquery);
458 void done();
461 /** Combine two Xapian::Query objects using OP_AND. */
462 inline const Query
463 operator&(const Query & a, const Query & b)
465 return Query(Query::OP_AND, a, b);
468 /** Combine two Xapian::Query objects using OP_OR. */
469 inline const Query
470 operator|(const Query & a, const Query & b)
472 return Query(Query::OP_OR, a, b);
475 /** Combine two Xapian::Query objects using OP_XOR. */
476 inline const Query
477 operator^(const Query & a, const Query & b)
479 return Query(Query::OP_XOR, a, b);
482 /** Scale a Xapian::Query object using OP_SCALE_WEIGHT.
484 * @param factor Non-negative real number to multiply weights by.
485 * @param q Xapian::Query object.
487 inline const Query
488 operator*(double factor, const Query & q)
490 return Query(factor, q);
493 /** Scale a Xapian::Query object using OP_SCALE_WEIGHT.
495 * @param q Xapian::Query object.
496 * @param factor Non-negative real number to multiply weights by.
498 inline const Query
499 operator*(const Query & q, double factor)
501 return Query(factor, q);
504 /** Inverse-scale a Xapian::Query object using OP_SCALE_WEIGHT.
506 * @param factor Positive real number to divide weights by.
507 * @param q Xapian::Query object.
509 inline const Query
510 operator/(const Query & q, double factor)
512 return Query(1.0 / factor, q);
515 /** @private @internal */
516 class InvertedQuery_ {
517 const Query & query;
519 void operator=(const InvertedQuery_ &);
521 explicit InvertedQuery_(const Query & query_) : query(query_) { }
523 public:
524 // GCC 4.2 seems to needs a copy ctor.
525 InvertedQuery_(const InvertedQuery_ & o) : query(o.query) { }
527 operator Query() const {
528 return Query(Query::OP_AND_NOT, Query::MatchAll, query);
531 friend const InvertedQuery_ operator~(const Query &q);
533 friend const Query operator&(const Query & a, const InvertedQuery_ & b);
536 /** Combine two Xapian::Query objects using OP_AND_NOT.
538 * E.g. Xapian::Query q = q1 &~ q2;
540 inline const Query
541 operator&(const Query & a, const InvertedQuery_ & b)
543 return Query(Query::OP_AND_NOT, a, b.query);
546 #ifndef DOXYGEN /* @internal doesn't seem to avoid a warning here. */
547 /** @internal Helper to allow q1 &~ q2 to work. */
548 inline const InvertedQuery_
549 operator~(const Query &q)
551 return InvertedQuery_(q);
553 #endif
555 namespace Internal {
556 class AndContext;
557 class OrContext;
558 class XorContext;
560 class PostList;
561 class QueryOptimiser;
564 /** @private @internal */
565 class Query::Internal : public Xapian::Internal::intrusive_base {
566 public:
567 XAPIAN_NOTHROW(Internal()) { }
569 virtual ~Internal();
571 virtual
572 Xapian::Internal::PostList* postlist(Xapian::Internal::QueryOptimiser* qopt,
573 double factor) const = 0;
575 virtual void postlist_sub_and_like(Xapian::Internal::AndContext& ctx,
576 Xapian::Internal::QueryOptimiser* qopt,
577 double factor) const;
579 virtual void postlist_sub_or_like(Xapian::Internal::OrContext& ctx,
580 Xapian::Internal::QueryOptimiser* qopt,
581 double factor) const;
583 virtual void postlist_sub_xor(Xapian::Internal::XorContext& ctx,
584 Xapian::Internal::QueryOptimiser* qopt,
585 double factor) const;
587 virtual termcount XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION;
589 virtual void serialise(std::string & result) const = 0;
591 static Query::Internal * unserialise(const char ** p, const char * end, const Registry & reg);
593 virtual Query::op XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION = 0;
594 virtual size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION;
595 virtual const Query get_subquery(size_t n) const;
596 virtual termcount get_wqf() const;
597 virtual termpos get_pos() const;
599 virtual std::string get_description() const = 0;
601 // Pass argument as void* to avoid need to include <vector>.
602 virtual void gather_terms(void * void_terms) const;
607 #endif // XAPIAN_INCLUDED_QUERY_H