Remove unused header include
[xapian.git] / xapian-core / include / xapian / query.h
blob8c5c54318a930ef8616f471177e7ab7344c2f543
1 /** @file query.h
2 * @brief Xapian::Query API class
3 */
4 /* Copyright (C) 2011,2012,2013,2014,2015,2016 Olly Betts
5 * Copyright (C) 2008 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_QUERY_H
23 #define XAPIAN_INCLUDED_QUERY_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error "Never use <xapian/query.h> directly; include <xapian.h> instead."
27 #endif
29 #include <string>
31 #include <xapian/attributes.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/registry.h>
35 #include <xapian/termiterator.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
39 class QueryOptimiser; // FIXME
41 namespace Xapian {
43 class PostingSource;
45 /// Class representing a query.
46 class XAPIAN_VISIBILITY_DEFAULT Query {
47 public:
48 /// Class representing the query internals.
49 class Internal;
50 /// @private @internal Reference counted internals.
51 Xapian::Internal::intrusive_ptr<Internal> internal;
53 static const Xapian::Query MatchNothing;
54 static const Xapian::Query MatchAll;
56 /** Query operators. */
57 enum op {
58 OP_AND = 0,
59 OP_OR = 1,
60 OP_AND_NOT = 2,
61 OP_XOR = 3,
62 OP_AND_MAYBE = 4,
63 OP_FILTER = 5,
64 OP_NEAR = 6,
65 OP_PHRASE = 7,
66 OP_VALUE_RANGE = 8,
67 OP_SCALE_WEIGHT = 9,
69 /** Pick the best N subqueries and combine with OP_OR.
71 * If you want to implement a feature which finds documents similar to
72 * a piece of text, an obvious approach is to build an "OR" query from
73 * all the terms in the text, and run this query against a database
74 * containing the documents. However such a query can contain a lots
75 * of terms and be quite slow to perform, yet many of these terms
76 * don't contribute usefully to the results.
78 * The OP_ELITE_SET operator can be used instead of OP_OR in this
79 * situation. OP_ELITE_SET selects the most important ''N'' terms and
80 * then acts as an OP_OR query with just these, ignoring any other
81 * terms. This will usually return results just as good as the full
82 * OP_OR query, but much faster.
84 * In general, the OP_ELITE_SET operator can be used when you have a
85 * large OR query, but it doesn't matter if the search completely
86 * ignores some of the less important terms in the query.
88 * The subqueries don't have to be terms, but if they aren't then
89 * OP_ELITE_SET will look at the estimated frequencies of the
90 * subqueries and so could pick a subset which don't actually
91 * match any documents even if the full OR would match some.
93 * You can specify a parameter to the query constructor which control
94 * the number of terms which OP_ELITE_SET will pick. If not
95 * specified, this defaults to 10 (Xapian used to default to
96 * <code>ceil(sqrt(number_of_subqueries))</code> if there are more
97 * than 100 subqueries, but this rather arbitrary special case was
98 * dropped in 1.3.0). For example, this will pick the best 7 terms:
100 * <pre>
101 * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7);
102 * </pre>
104 * If the number of subqueries is less than this threshold,
105 * OP_ELITE_SET behaves identically to OP_OR.
107 OP_ELITE_SET = 10,
108 OP_VALUE_GE = 11,
109 OP_VALUE_LE = 12,
110 OP_SYNONYM = 13,
111 OP_MAX = 14,
112 OP_WILDCARD = 15,
114 OP_INVALID = 99,
116 LEAF_TERM = 100,
117 LEAF_POSTING_SOURCE,
118 LEAF_MATCH_ALL,
119 LEAF_MATCH_NOTHING
122 enum {
123 /** Throw an error if OP_WILDCARD exceeds its expansion limit.
125 * Xapian::WildcardError will be thrown when the query is actually
126 * run.
128 WILDCARD_LIMIT_ERROR,
129 /** Stop expanding when OP_WILDCARD reaches its expansion limit.
131 * This makes the wildcard expand to only the first N terms (sorted
132 * by byte order).
134 WILDCARD_LIMIT_FIRST,
135 /** Limit OP_WILDCARD expansion to the most frequent terms.
137 * If OP_WILDCARD would expand to more than its expansion limit, the
138 * most frequent terms are taken. This approach works well for cases
139 * such as expanding a partial term at the end of a query string which
140 * the user hasn't finished typing yet - as well as being less expense
141 * to evaluate than the full expansion, using only the most frequent
142 * terms tends to give better results too.
144 WILDCARD_LIMIT_MOST_FREQUENT
147 /// Default constructor.
148 XAPIAN_NOTHROW(Query())
149 : internal(0) { }
151 /// Destructor.
152 ~Query() { }
154 /** Copying is allowed.
156 * The internals are reference counted, so copying is cheap.
158 Query(const Query & o) : internal(o.internal) { }
160 /** Copying is allowed.
162 * The internals are reference counted, so assignment is cheap.
164 Query & operator=(const Query & o) { internal = o.internal; return *this; }
166 /** Construct a Query object for a term. */
167 Query(const std::string & term,
168 Xapian::termcount wqf = 1,
169 Xapian::termpos pos = 0);
171 /** Construct a Query object for a PostingSource. */
172 explicit Query(Xapian::PostingSource * source);
174 // FIXME: new form for OP_SCALE_WEIGHT - do we want this?
175 Query(double factor, const Xapian::Query & subquery);
177 // FIXME: legacy form of above (assuming we want to add that...)
178 Query(op op_, const Xapian::Query & subquery, double factor);
180 // Pairwise.
181 Query(op op_, const Xapian::Query & a, const Xapian::Query & b)
183 init(op_, 2);
184 bool positional = (op_ == OP_NEAR || op_ == OP_PHRASE);
185 add_subquery(positional, a);
186 add_subquery(positional, b);
187 done();
190 // Pairwise with std::string.
191 Query(op op_, const std::string & a, const std::string & b)
193 init(op_, 2);
194 add_subquery(false, a);
195 add_subquery(false, b);
196 done();
199 // OP_VALUE_GE/OP_VALUE_LE
200 Query(op op_, Xapian::valueno slot, const std::string & limit);
202 // OP_VALUE_RANGE
203 Query(op op_, Xapian::valueno slot,
204 const std::string & begin, const std::string & end);
206 /** Query constructor for OP_WILDCARD queries.
208 * @param op Must be OP_WILDCARD
209 * @param pattern The wildcard pattern - currently this is just a string
210 * and the wildcard expands to terms which start with
211 * exactly this string.
212 * @param max_expansion The maximum number of terms to expand to
213 * (default: 0, which means no limit)
214 * @param max_type How to enforce max_expansion - one of
215 * @a WILDCARD_LIMIT_ERROR (the default),
216 * @a WILDCARD_LIMIT_FIRST or
217 * @a WILDCARD_LIMIT_MOST_FREQUENT.
218 * When searching multiple databases, the expansion limit
219 * is currently applied independently for each database,
220 * so the total number of terms may be higher than the
221 * limit. This is arguably a bug, and may change in
222 * future versions.
223 * @param combiner The @op to combine the terms with - one of
224 * @a OP_SYNONYM (the default), @a OP_OR or @a OP_MAX.
226 Query(op op_,
227 const std::string & pattern,
228 Xapian::termcount max_expansion = 0,
229 int max_type = WILDCARD_LIMIT_ERROR,
230 op combiner = OP_SYNONYM);
232 template<typename I>
233 Query(op op_, I begin, I end, Xapian::termcount window = 0)
235 if (begin != end) {
236 typedef typename std::iterator_traits<I>::iterator_category iterator_category;
237 init(op_, window, begin, end, iterator_category());
238 bool positional = (op_ == OP_NEAR || op_ == OP_PHRASE);
239 for (I i = begin; i != end; ++i) {
240 add_subquery(positional, *i);
242 done();
246 #ifdef SWIG
247 // SWIG's %template doesn't seem to handle a templated ctor so we
248 // provide this fake specialised form of the above prototype.
249 Query(op op_, XapianSWIGQueryItor qbegin, XapianSWIGQueryItor qend,
250 Xapian::termcount parameter = 0);
252 # ifdef SWIGJAVA
253 Query(op op_, XapianSWIGStrItor qbegin, XapianSWIGStrItor qend,
254 Xapian::termcount parameter = 0);
255 # endif
256 #endif
258 const TermIterator get_terms_begin() const;
260 const TermIterator XAPIAN_NOTHROW(get_terms_end() const) {
261 return TermIterator();
264 const TermIterator get_unique_terms_begin() const;
266 Xapian::termcount XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION;
268 bool XAPIAN_NOTHROW(empty() const) {
269 return internal.get() == 0;
272 std::string serialise() const;
274 static const Query unserialise(const std::string & serialised,
275 const Registry & reg = Registry());
277 /** Get the type of the top level of the query. */
278 op XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION;
280 /** Get the number of subqueries of the top level query. */
281 size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION;
283 /** Read a top level subquery.
285 * @param n Return the n-th subquery (starting from 0) - only valid when
286 * 0 <= n < get_num_subqueries().
288 const Query get_subquery(size_t n) const;
290 std::string get_description() const;
292 const Query operator&=(const Query & o) {
293 return (*this = Query(OP_AND, *this, o));
296 const Query operator|=(const Query & o) {
297 return (*this = Query(OP_OR, *this, o));
300 const Query operator^=(const Query & o) {
301 return (*this = Query(OP_XOR, *this, o));
304 const Query operator*=(double factor) {
305 return (*this = Query(factor, *this));
308 const Query operator/=(double factor) {
309 return (*this = Query(1.0 / factor, *this));
312 /** @private @internal */
313 explicit Query(Internal * internal_) : internal(internal_) { }
315 explicit Query(Query::op op_) {
316 init(op_, 0);
317 if (op_ != Query::OP_INVALID) done();
320 private:
321 void init(Query::op op_, size_t n_subqueries, Xapian::termcount window = 0);
323 template<typename I>
324 void init(Query::op op_, Xapian::termcount window,
325 const I & begin, const I & end, std::random_access_iterator_tag)
327 init(op_, end - begin, window);
330 template<typename I>
331 void init(Query::op op_, Xapian::termcount window,
332 const I &, const I &, std::input_iterator_tag)
334 init(op_, 0, window);
337 void add_subquery(bool positional, const Xapian::Query & subquery);
339 void add_subquery(bool, const std::string & subquery) {
340 add_subquery(false, Xapian::Query(subquery));
343 void add_subquery(bool positional, const Xapian::Query * subquery) {
344 // FIXME: subquery NULL?
345 add_subquery(positional, *subquery);
348 void done();
351 inline const Query
352 operator&(const Query & a, const Query & b)
354 return Query(Query::OP_AND, a, b);
357 inline const Query
358 operator|(const Query & a, const Query & b)
360 return Query(Query::OP_OR, a, b);
363 inline const Query
364 operator^(const Query & a, const Query & b)
366 return Query(Query::OP_XOR, a, b);
369 inline const Query
370 operator*(double factor, const Query & q)
372 return Query(factor, q);
375 inline const Query
376 operator*(const Query & q, double factor)
378 return Query(factor, q);
381 inline const Query
382 operator/(const Query & q, double factor)
384 return Query(1.0 / factor, q);
387 class InvertedQuery_ {
388 const Query & query;
390 void operator=(const InvertedQuery_ &);
392 explicit InvertedQuery_(const Query & query_) : query(query_) { }
394 public:
395 // GCC 4.2 seems to needs a copy ctor.
396 InvertedQuery_(const InvertedQuery_ & o) : query(o.query) { }
398 operator Query() const {
399 return Query(Query::OP_AND_NOT, Query::MatchAll, query);
402 friend const InvertedQuery_ operator~(const Query &q);
404 friend const Query operator&(const Query & a, const InvertedQuery_ & b);
407 inline const Query
408 operator&(const Query & a, const InvertedQuery_ & b)
410 return Query(Query::OP_AND_NOT, a, b.query);
413 inline const InvertedQuery_
414 operator~(const Query &q)
416 return InvertedQuery_(q);
419 namespace Internal {
420 class AndContext;
421 class OrContext;
422 class XorContext;
425 class Query::Internal : public Xapian::Internal::intrusive_base {
426 public:
427 XAPIAN_NOTHROW(Internal()) { }
429 virtual ~Internal();
431 virtual PostingIterator::Internal * postlist(QueryOptimiser * qopt, double factor) const = 0;
433 virtual void postlist_sub_and_like(Xapian::Internal::AndContext& ctx,
434 QueryOptimiser * qopt,
435 double factor) const;
437 virtual void postlist_sub_or_like(Xapian::Internal::OrContext& ctx,
438 QueryOptimiser * qopt,
439 double factor) const;
441 virtual void postlist_sub_xor(Xapian::Internal::XorContext& ctx,
442 QueryOptimiser * qopt,
443 double factor) const;
445 virtual termcount XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION;
447 virtual void serialise(std::string & result) const = 0;
449 static Query::Internal * unserialise(const char ** p, const char * end, const Registry & reg);
451 virtual Query::op XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION = 0;
452 virtual size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION;
453 virtual const Query get_subquery(size_t n) const;
455 virtual std::string get_description() const = 0;
457 // Pass argument as void* to avoid need to include <vector>.
458 virtual void gather_terms(void * void_terms) const;
463 #endif // XAPIAN_INCLUDED_QUERY_H