Address MatchAll thread-safety issues
[xapian.git] / xapian-core / include / xapian / query.h
blobfa11e56399524643559f4ffd7d3954a7f34d22e5
1 /** @file query.h
2 * @brief Xapian::Query API class
3 */
4 /* Copyright (C) 2011,2012,2013,2014,2015,2016,2017,2018 Olly Betts
5 * Copyright (C) 2008 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_QUERY_H
23 #define XAPIAN_INCLUDED_QUERY_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error "Never use <xapian/query.h> directly; include <xapian.h> instead."
27 #endif
29 #include <string>
31 #include <xapian/attributes.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/registry.h>
35 #include <xapian/termiterator.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
39 namespace Xapian {
41 class PostingSource;
43 /// Class representing a query.
44 class XAPIAN_VISIBILITY_DEFAULT Query {
45 public:
46 /// Class representing the query internals.
47 class Internal;
48 /// @private @internal Reference counted internals.
49 Xapian::Internal::intrusive_ptr<Internal> internal;
51 /** A query matching no documents.
53 * This is a static instance of a default-constructed Xapian::Query
54 * object. It is safe to use concurrently from different threads,
55 * unlike @a MatchAll (this is because MatchNothing has a NULL
56 * internal object so there's no reference counting happening).
58 static const Xapian::Query MatchNothing;
60 /** A query matching all documents.
62 * This is a static instance of Xapian::Query(std::string()). If
63 * you are constructing Query objects in different threads, avoid
64 * using @a MatchAll as the reference counting of the static object
65 * can get messed up by concurrent access).
67 static const Xapian::Query MatchAll;
69 /** Query operators. */
70 enum op {
71 OP_AND = 0,
72 OP_OR = 1,
73 OP_AND_NOT = 2,
74 OP_XOR = 3,
75 OP_AND_MAYBE = 4,
76 OP_FILTER = 5,
77 OP_NEAR = 6,
78 OP_PHRASE = 7,
79 OP_VALUE_RANGE = 8,
80 OP_SCALE_WEIGHT = 9,
82 /** Pick the best N subqueries and combine with OP_OR.
84 * If you want to implement a feature which finds documents similar to
85 * a piece of text, an obvious approach is to build an "OR" query from
86 * all the terms in the text, and run this query against a database
87 * containing the documents. However such a query can contain a lots
88 * of terms and be quite slow to perform, yet many of these terms
89 * don't contribute usefully to the results.
91 * The OP_ELITE_SET operator can be used instead of OP_OR in this
92 * situation. OP_ELITE_SET selects the most important ''N'' terms and
93 * then acts as an OP_OR query with just these, ignoring any other
94 * terms. This will usually return results just as good as the full
95 * OP_OR query, but much faster.
97 * In general, the OP_ELITE_SET operator can be used when you have a
98 * large OR query, but it doesn't matter if the search completely
99 * ignores some of the less important terms in the query.
101 * The subqueries don't have to be terms, but if they aren't then
102 * OP_ELITE_SET will look at the estimated frequencies of the
103 * subqueries and so could pick a subset which don't actually
104 * match any documents even if the full OR would match some.
106 * You can specify a parameter to the query constructor which control
107 * the number of terms which OP_ELITE_SET will pick. If not
108 * specified, this defaults to 10 (Xapian used to default to
109 * <code>ceil(sqrt(number_of_subqueries))</code> if there are more
110 * than 100 subqueries, but this rather arbitrary special case was
111 * dropped in 1.3.0). For example, this will pick the best 7 terms:
113 * <pre>
114 * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7);
115 * </pre>
117 * If the number of subqueries is less than this threshold,
118 * OP_ELITE_SET behaves identically to OP_OR.
120 OP_ELITE_SET = 10,
121 OP_VALUE_GE = 11,
122 OP_VALUE_LE = 12,
123 OP_SYNONYM = 13,
124 /** Pick the maximum weight of any subquery.
126 * Matches the same documents as @a OP_OR, but the weight contributed
127 * is the maximum weight from any matching subquery (for OP_OR, it's
128 * the sum of the weights from the matching subqueries).
130 * Added in Xapian 1.3.2.
132 OP_MAX = 14,
133 /** Wildcard expansion.
135 * Added in Xapian 1.3.3.
137 OP_WILDCARD = 15,
139 OP_INVALID = 99,
141 LEAF_TERM = 100,
142 LEAF_POSTING_SOURCE,
143 LEAF_MATCH_ALL,
144 LEAF_MATCH_NOTHING
147 enum {
148 /** Throw an error if OP_WILDCARD exceeds its expansion limit.
150 * Xapian::WildcardError will be thrown when the query is actually
151 * run.
153 WILDCARD_LIMIT_ERROR,
154 /** Stop expanding when OP_WILDCARD reaches its expansion limit.
156 * This makes the wildcard expand to only the first N terms (sorted
157 * by byte order).
159 WILDCARD_LIMIT_FIRST,
160 /** Limit OP_WILDCARD expansion to the most frequent terms.
162 * If OP_WILDCARD would expand to more than its expansion limit, the
163 * most frequent terms are taken. This approach works well for cases
164 * such as expanding a partial term at the end of a query string which
165 * the user hasn't finished typing yet - as well as being less expense
166 * to evaluate than the full expansion, using only the most frequent
167 * terms tends to give better results too.
169 WILDCARD_LIMIT_MOST_FREQUENT
172 /// Default constructor.
173 XAPIAN_NOTHROW(Query()) { }
175 /// Destructor.
176 ~Query() { }
178 /** Copying is allowed.
180 * The internals are reference counted, so copying is cheap.
182 Query(const Query & o) : internal(o.internal) { }
184 /** Copying is allowed.
186 * The internals are reference counted, so assignment is cheap.
188 Query & operator=(const Query & o) { internal = o.internal; return *this; }
190 /// Move constructor.
191 Query(Query &&) = default;
193 /// Move assignment operator.
194 Query & operator=(Query &&) = default;
196 /** Construct a Query object for a term. */
197 Query(const std::string & term,
198 Xapian::termcount wqf = 1,
199 Xapian::termpos pos = 0);
201 /** Construct a Query object for a PostingSource. */
202 explicit Query(Xapian::PostingSource * source);
204 /** Scale using OP_SCALE_WEIGHT.
206 * @param factor Non-negative real number to multiply weights by.
207 * @param subquery Query object to scale weights from.
209 Query(double factor, const Xapian::Query & subquery);
211 /** Scale using OP_SCALE_WEIGHT.
213 * In this form, the op_ parameter is totally redundant - use
214 * Query(factor, subquery) in preference.
216 * @param op_ Must be OP_SCALE_WEIGHT.
217 * @param factor Non-negative real number to multiply weights by.
218 * @param subquery Query object to scale weights from.
220 Query(op op_, const Xapian::Query & subquery, double factor);
222 /** Construct a Query object by combining two others.
224 * @param op_ The operator to combine the queries with.
225 * @param a First subquery.
226 * @param b Second subquery.
228 Query(op op_, const Xapian::Query & a, const Xapian::Query & b)
230 init(op_, 2);
231 bool positional = (op_ == OP_NEAR || op_ == OP_PHRASE);
232 add_subquery(positional, a);
233 add_subquery(positional, b);
234 done();
237 /** Construct a Query object by combining two terms.
239 * @param op_ The operator to combine the terms with.
240 * @param a First term.
241 * @param b Second term.
243 Query(op op_, const std::string & a, const std::string & b)
245 init(op_, 2);
246 add_subquery(false, a);
247 add_subquery(false, b);
248 done();
251 /** Construct a Query object for a single-ended value range.
253 * @param op_ Must be OP_VALUE_LE or OP_VALUE_GE currently.
254 * @param slot The value slot to work over.
255 * @param range_limit The limit of the range.
257 Query(op op_, Xapian::valueno slot, const std::string & range_limit);
259 /** Construct a Query object for a value range.
261 * @param op_ Must be OP_VALUE_RANGE currently.
262 * @param slot The value slot to work over.
263 * @param range_lower Lower end of the range.
264 * @param range_upper Upper end of the range.
266 Query(op op_, Xapian::valueno slot,
267 const std::string & range_lower, const std::string & range_upper);
269 /** Query constructor for OP_WILDCARD queries.
271 * @param op_ Must be OP_WILDCARD
272 * @param pattern The wildcard pattern - currently this is just a string
273 * and the wildcard expands to terms which start with
274 * exactly this string.
275 * @param max_expansion The maximum number of terms to expand to
276 * (default: 0, which means no limit)
277 * @param max_type How to enforce max_expansion - one of
278 * @a WILDCARD_LIMIT_ERROR (the default),
279 * @a WILDCARD_LIMIT_FIRST or
280 * @a WILDCARD_LIMIT_MOST_FREQUENT.
281 * When searching multiple databases, the expansion limit
282 * is currently applied independently for each database,
283 * so the total number of terms may be higher than the
284 * limit. This is arguably a bug, and may change in
285 * future versions.
286 * @param combiner The @a op_ to combine the terms with - one of
287 * @a OP_SYNONYM (the default), @a OP_OR or @a OP_MAX.
289 Query(op op_,
290 const std::string & pattern,
291 Xapian::termcount max_expansion = 0,
292 int max_type = WILDCARD_LIMIT_ERROR,
293 op combiner = OP_SYNONYM);
295 /** Construct a Query object from a begin/end iterator pair.
297 * Dereferencing the iterator should return a Xapian::Query, a non-NULL
298 * Xapian::Query*, a std::string or a type which converts to one of
299 * these (e.g. const char*).
301 * @param op_ The operator to combine the queries with.
302 * @param begin Begin iterator.
303 * @param end End iterator.
304 * @param window Window size for OP_NEAR and OP_PHRASE, or 0 to use the
305 * number of subqueries as the window size (default: 0).
307 template<typename I>
308 Query(op op_, I begin, I end, Xapian::termcount window = 0)
310 if (begin != end) {
311 typedef typename std::iterator_traits<I>::iterator_category iterator_category;
312 init(op_, window, begin, end, iterator_category());
313 bool positional = (op_ == OP_NEAR || op_ == OP_PHRASE);
314 for (I i = begin; i != end; ++i) {
315 add_subquery(positional, *i);
317 done();
321 #ifdef SWIG
322 // SWIG's %template doesn't seem to handle a templated ctor so we
323 // provide this fake specialised form of the above prototype.
324 Query(op op_, XapianSWIGQueryItor qbegin, XapianSWIGQueryItor qend,
325 Xapian::termcount parameter = 0);
327 # ifdef SWIGJAVA
328 Query(op op_, XapianSWIGStrItor qbegin, XapianSWIGStrItor qend,
329 Xapian::termcount parameter = 0);
330 # endif
331 #endif
333 /** Begin iterator for terms in the query object.
335 * The iterator returns terms in ascending query position order, and
336 * will return the same term in each unique position it occurs in.
337 * If you want the terms in sorted order and without duplicates, see
338 * get_unique_terms_begin().
340 const TermIterator get_terms_begin() const;
342 /// End iterator for terms in the query object.
343 const TermIterator XAPIAN_NOTHROW(get_terms_end() const) {
344 return TermIterator();
347 /** Begin iterator for unique terms in the query object.
349 * Terms are sorted and terms with the same name removed from the list.
351 * If you want the terms in ascending query position order, see
352 * get_terms_begin().
354 const TermIterator get_unique_terms_begin() const;
356 /** Return the length of this query object. */
357 Xapian::termcount XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION;
359 /** Check if this query is Xapian::Query::MatchNothing. */
360 bool XAPIAN_NOTHROW(empty() const) {
361 return internal.get() == 0;
364 /** Serialise this object into a string. */
365 std::string serialise() const;
367 /** Unserialise a string and return a Query object.
369 * @param serialised the string to unserialise.
370 * @param reg Xapian::Registry object to use to unserialise
371 * user-subclasses of Xapian::PostingSource
372 * (default: standard registry).
374 static const Query unserialise(const std::string & serialised,
375 const Registry & reg = Registry());
377 /** Get the type of the top level of the query. */
378 op XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION;
380 /** Get the number of subqueries of the top level query. */
381 size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION;
383 /** Get the wqf parameter of a leaf node. */
384 Xapian::termcount get_leaf_wqf() const;
386 /** Get the pos parameter of a leaf node. */
387 Xapian::termpos get_leaf_pos() const;
389 /** Read a top level subquery.
391 * @param n Return the n-th subquery (starting from 0) - only valid when
392 * 0 <= n < get_num_subqueries().
394 const Query get_subquery(size_t n) const;
396 /// Return a string describing this object.
397 std::string get_description() const;
399 /** Combine with another Xapian::Query object using OP_AND. */
400 const Query operator&=(const Query & o) {
401 return (*this = Query(OP_AND, *this, o));
404 /** Combine with another Xapian::Query object using OP_OR. */
405 const Query operator|=(const Query & o) {
406 return (*this = Query(OP_OR, *this, o));
409 /** Combine with another Xapian::Query object using OP_XOR. */
410 const Query operator^=(const Query & o) {
411 return (*this = Query(OP_XOR, *this, o));
414 /** Scale using OP_SCALE_WEIGHT.
416 * @param factor Non-negative real number to multiply weights by.
418 const Query operator*=(double factor) {
419 return (*this = Query(factor, *this));
422 /** Inverse scale using OP_SCALE_WEIGHT.
424 * @param factor Positive real number to divide weights by.
426 const Query operator/=(double factor) {
427 return (*this = Query(1.0 / factor, *this));
430 /// @private @internal Wrap an existing Internal.
431 explicit Query(Internal * internal_) : internal(internal_) { }
433 /** Construct with just an operator.
435 * @param op_ The operator to use - currently only OP_INVALID is useful.
437 explicit Query(Query::op op_) {
438 init(op_, 0);
439 if (op_ != Query::OP_INVALID) done();
442 private:
443 void init(Query::op op_, size_t n_subqueries, Xapian::termcount window = 0);
445 template<typename I>
446 void init(Query::op op_, Xapian::termcount window,
447 const I & begin, const I & end, std::random_access_iterator_tag)
449 init(op_, end - begin, window);
452 template<typename I>
453 void init(Query::op op_, Xapian::termcount window,
454 const I &, const I &, std::input_iterator_tag)
456 init(op_, 0, window);
459 void add_subquery(bool positional, const Xapian::Query & subquery);
461 void add_subquery(bool, const std::string & subquery) {
462 add_subquery(false, Xapian::Query(subquery));
465 void add_subquery(bool positional, const Xapian::Query * subquery) {
466 // FIXME: subquery NULL?
467 add_subquery(positional, *subquery);
470 void done();
473 /** Combine two Xapian::Query objects using OP_AND. */
474 inline const Query
475 operator&(const Query & a, const Query & b)
477 return Query(Query::OP_AND, a, b);
480 /** Combine two Xapian::Query objects using OP_OR. */
481 inline const Query
482 operator|(const Query & a, const Query & b)
484 return Query(Query::OP_OR, a, b);
487 /** Combine two Xapian::Query objects using OP_XOR. */
488 inline const Query
489 operator^(const Query & a, const Query & b)
491 return Query(Query::OP_XOR, a, b);
494 /** Scale a Xapian::Query object using OP_SCALE_WEIGHT.
496 * @param factor Non-negative real number to multiply weights by.
497 * @param q Xapian::Query object.
499 inline const Query
500 operator*(double factor, const Query & q)
502 return Query(factor, q);
505 /** Scale a Xapian::Query object using OP_SCALE_WEIGHT.
507 * @param q Xapian::Query object.
508 * @param factor Non-negative real number to multiply weights by.
510 inline const Query
511 operator*(const Query & q, double factor)
513 return Query(factor, q);
516 /** Inverse-scale a Xapian::Query object using OP_SCALE_WEIGHT.
518 * @param factor Positive real number to divide weights by.
519 * @param q Xapian::Query object.
521 inline const Query
522 operator/(const Query & q, double factor)
524 return Query(1.0 / factor, q);
527 /** @private @internal */
528 class InvertedQuery_ {
529 const Query & query;
531 void operator=(const InvertedQuery_ &);
533 explicit InvertedQuery_(const Query & query_) : query(query_) { }
535 public:
536 // GCC 4.2 seems to needs a copy ctor.
537 InvertedQuery_(const InvertedQuery_ & o) : query(o.query) { }
539 operator Query() const {
540 return Query(Query::OP_AND_NOT, Query(std::string()), query);
543 friend const InvertedQuery_ operator~(const Query &q);
545 friend const Query operator&(const Query & a, const InvertedQuery_ & b);
548 /** Combine two Xapian::Query objects using OP_AND_NOT.
550 * E.g. Xapian::Query q = q1 &~ q2;
552 inline const Query
553 operator&(const Query & a, const InvertedQuery_ & b)
555 return Query(Query::OP_AND_NOT, a, b.query);
558 #ifndef DOXYGEN /* @internal doesn't seem to avoid a warning here. */
559 /** @internal Helper to allow q1 &~ q2 to work. */
560 inline const InvertedQuery_
561 operator~(const Query &q)
563 return InvertedQuery_(q);
565 #endif
567 namespace Internal {
568 class AndContext;
569 class OrContext;
570 class XorContext;
572 class PostList;
573 class QueryOptimiser;
576 /** @private @internal */
577 class Query::Internal : public Xapian::Internal::intrusive_base {
578 public:
579 XAPIAN_NOTHROW(Internal()) { }
581 virtual ~Internal();
583 virtual
584 Xapian::Internal::PostList* postlist(Xapian::Internal::QueryOptimiser* qopt,
585 double factor) const = 0;
587 virtual void postlist_sub_and_like(Xapian::Internal::AndContext& ctx,
588 Xapian::Internal::QueryOptimiser* qopt,
589 double factor) const;
591 virtual void postlist_sub_or_like(Xapian::Internal::OrContext& ctx,
592 Xapian::Internal::QueryOptimiser* qopt,
593 double factor) const;
595 virtual void postlist_sub_xor(Xapian::Internal::XorContext& ctx,
596 Xapian::Internal::QueryOptimiser* qopt,
597 double factor) const;
599 virtual termcount XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION;
601 virtual void serialise(std::string & result) const = 0;
603 static Query::Internal * unserialise(const char ** p, const char * end, const Registry & reg);
605 virtual Query::op XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION = 0;
606 virtual size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION;
607 virtual const Query get_subquery(size_t n) const;
608 virtual termcount get_wqf() const;
609 virtual termpos get_pos() const;
611 virtual std::string get_description() const = 0;
613 // Pass argument as void* to avoid need to include <vector>.
614 virtual void gather_terms(void * void_terms) const;
619 #endif // XAPIAN_INCLUDED_QUERY_H