2 * @brief Xapian::Query API class
4 /* Copyright (C) 2011,2012,2013,2014,2015,2016,2017 Olly Betts
5 * Copyright (C) 2008 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_QUERY_H
23 #define XAPIAN_INCLUDED_QUERY_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error "Never use <xapian/query.h> directly; include <xapian.h> instead."
31 #include <xapian/attributes.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/registry.h>
35 #include <xapian/termiterator.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
43 /// Class representing a query.
44 class XAPIAN_VISIBILITY_DEFAULT Query
{
46 /// Class representing the query internals.
48 /// @private @internal Reference counted internals.
49 Xapian::Internal::intrusive_ptr
<Internal
> internal
;
51 /** A query matching no documents.
53 * Exactly equivalent to Xapian::Query().
55 static const Xapian::Query MatchNothing
;
57 /** A query matching all documents.
59 * Exactly equivalent to Xapian::Query(std::string()).
61 static const Xapian::Query MatchAll
;
63 /** Query operators. */
76 /** Pick the best N subqueries and combine with OP_OR.
78 * If you want to implement a feature which finds documents similar to
79 * a piece of text, an obvious approach is to build an "OR" query from
80 * all the terms in the text, and run this query against a database
81 * containing the documents. However such a query can contain a lots
82 * of terms and be quite slow to perform, yet many of these terms
83 * don't contribute usefully to the results.
85 * The OP_ELITE_SET operator can be used instead of OP_OR in this
86 * situation. OP_ELITE_SET selects the most important ''N'' terms and
87 * then acts as an OP_OR query with just these, ignoring any other
88 * terms. This will usually return results just as good as the full
89 * OP_OR query, but much faster.
91 * In general, the OP_ELITE_SET operator can be used when you have a
92 * large OR query, but it doesn't matter if the search completely
93 * ignores some of the less important terms in the query.
95 * The subqueries don't have to be terms, but if they aren't then
96 * OP_ELITE_SET will look at the estimated frequencies of the
97 * subqueries and so could pick a subset which don't actually
98 * match any documents even if the full OR would match some.
100 * You can specify a parameter to the query constructor which control
101 * the number of terms which OP_ELITE_SET will pick. If not
102 * specified, this defaults to 10 (Xapian used to default to
103 * <code>ceil(sqrt(number_of_subqueries))</code> if there are more
104 * than 100 subqueries, but this rather arbitrary special case was
105 * dropped in 1.3.0). For example, this will pick the best 7 terms:
108 * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7);
111 * If the number of subqueries is less than this threshold,
112 * OP_ELITE_SET behaves identically to OP_OR.
118 /** Pick the maximum weight of any subquery.
120 * Matches the same documents as @a OP_OR, but the weight contributed
121 * is the maximum weight from any matching subquery (for OP_OR, it's
122 * the sum of the weights from the matching subqueries).
124 * Added in Xapian 1.3.2.
127 /** Wildcard expansion.
129 * Added in Xapian 1.3.3.
142 /** Throw an error if OP_WILDCARD exceeds its expansion limit.
144 * Xapian::WildcardError will be thrown when the query is actually
147 WILDCARD_LIMIT_ERROR
,
148 /** Stop expanding when OP_WILDCARD reaches its expansion limit.
150 * This makes the wildcard expand to only the first N terms (sorted
153 WILDCARD_LIMIT_FIRST
,
154 /** Limit OP_WILDCARD expansion to the most frequent terms.
156 * If OP_WILDCARD would expand to more than its expansion limit, the
157 * most frequent terms are taken. This approach works well for cases
158 * such as expanding a partial term at the end of a query string which
159 * the user hasn't finished typing yet - as well as being less expense
160 * to evaluate than the full expansion, using only the most frequent
161 * terms tends to give better results too.
163 WILDCARD_LIMIT_MOST_FREQUENT
166 /// Default constructor.
167 XAPIAN_NOTHROW(Query()) { }
172 /** Copying is allowed.
174 * The internals are reference counted, so copying is cheap.
176 Query(const Query
& o
) : internal(o
.internal
) { }
178 /** Copying is allowed.
180 * The internals are reference counted, so assignment is cheap.
182 Query
& operator=(const Query
& o
) { internal
= o
.internal
; return *this; }
184 /** Construct a Query object for a term. */
185 Query(const std::string
& term
,
186 Xapian::termcount wqf
= 1,
187 Xapian::termpos pos
= 0);
189 /** Construct a Query object for a PostingSource. */
190 explicit Query(Xapian::PostingSource
* source
);
192 /** Scale using OP_SCALE_WEIGHT.
194 * @param factor Non-negative real number to multiply weights by.
195 * @param subquery Query object to scale weights from.
197 Query(double factor
, const Xapian::Query
& subquery
);
199 /** Scale using OP_SCALE_WEIGHT.
201 * In this form, the op_ parameter is totally redundant - use
202 * Query(factor, subquery) in preference.
204 * @param op_ Must be OP_SCALE_WEIGHT.
205 * @param factor Non-negative real number to multiply weights by.
206 * @param subquery Query object to scale weights from.
208 Query(op op_
, const Xapian::Query
& subquery
, double factor
);
210 /** Construct a Query object by combining two others.
212 * @param op_ The operator to combine the queries with.
213 * @param a First subquery.
214 * @param b Second subquery.
216 Query(op op_
, const Xapian::Query
& a
, const Xapian::Query
& b
)
219 bool positional
= (op_
== OP_NEAR
|| op_
== OP_PHRASE
);
220 add_subquery(positional
, a
);
221 add_subquery(positional
, b
);
225 /** Construct a Query object by combining two terms.
227 * @param op_ The operator to combine the terms with.
228 * @param a First term.
229 * @param b Second term.
231 Query(op op_
, const std::string
& a
, const std::string
& b
)
234 add_subquery(false, a
);
235 add_subquery(false, b
);
239 /** Construct a Query object for a single-ended value range.
241 * @param op_ Must be OP_VALUE_LE or OP_VALUE_GE currently.
242 * @param slot The value slot to work over.
243 * @param range_limit The limit of the range.
245 Query(op op_
, Xapian::valueno slot
, const std::string
& range_limit
);
247 /** Construct a Query object for a value range.
249 * @param op_ Must be OP_VALUE_RANGE currently.
250 * @param slot The value slot to work over.
251 * @param range_lower Lower end of the range.
252 * @param range_upper Upper end of the range.
254 Query(op op_
, Xapian::valueno slot
,
255 const std::string
& range_lower
, const std::string
& range_upper
);
257 /** Query constructor for OP_WILDCARD queries.
259 * @param op_ Must be OP_WILDCARD
260 * @param pattern The wildcard pattern - currently this is just a string
261 * and the wildcard expands to terms which start with
262 * exactly this string.
263 * @param max_expansion The maximum number of terms to expand to
264 * (default: 0, which means no limit)
265 * @param max_type How to enforce max_expansion - one of
266 * @a WILDCARD_LIMIT_ERROR (the default),
267 * @a WILDCARD_LIMIT_FIRST or
268 * @a WILDCARD_LIMIT_MOST_FREQUENT.
269 * When searching multiple databases, the expansion limit
270 * is currently applied independently for each database,
271 * so the total number of terms may be higher than the
272 * limit. This is arguably a bug, and may change in
274 * @param combiner The @a op_ to combine the terms with - one of
275 * @a OP_SYNONYM (the default), @a OP_OR or @a OP_MAX.
278 const std::string
& pattern
,
279 Xapian::termcount max_expansion
= 0,
280 int max_type
= WILDCARD_LIMIT_ERROR
,
281 op combiner
= OP_SYNONYM
);
283 /** Construct a Query object from a begin/end iterator pair.
285 * Dereferencing the iterator should return a Xapian::Query, a non-NULL
286 * Xapian::Query*, a std::string or a type which converts to one of
287 * these (e.g. const char*).
289 * @param op_ The operator to combine the queries with.
290 * @param begin Begin iterator.
291 * @param end End iterator.
292 * @param window Window size for OP_NEAR and OP_PHRASE, or 0 to use the
293 * number of subqueries as the window size (default: 0).
296 Query(op op_
, I begin
, I end
, Xapian::termcount window
= 0)
299 typedef typename
std::iterator_traits
<I
>::iterator_category iterator_category
;
300 init(op_
, window
, begin
, end
, iterator_category());
301 bool positional
= (op_
== OP_NEAR
|| op_
== OP_PHRASE
);
302 for (I i
= begin
; i
!= end
; ++i
) {
303 add_subquery(positional
, *i
);
310 // SWIG's %template doesn't seem to handle a templated ctor so we
311 // provide this fake specialised form of the above prototype.
312 Query(op op_
, XapianSWIGQueryItor qbegin
, XapianSWIGQueryItor qend
,
313 Xapian::termcount parameter
= 0);
316 Query(op op_
, XapianSWIGStrItor qbegin
, XapianSWIGStrItor qend
,
317 Xapian::termcount parameter
= 0);
321 /** Begin iterator for terms in the query object.
323 * The iterator returns terms in ascending query position order, and
324 * will return the same term in each unique position it occurs in.
325 * If you want the terms in sorted order and without duplicates, see
326 * get_unique_terms_begin().
328 const TermIterator
get_terms_begin() const;
330 /// End iterator for terms in the query object.
331 const TermIterator
XAPIAN_NOTHROW(get_terms_end() const) {
332 return TermIterator();
335 /** Begin iterator for unique terms in the query object.
337 * Terms are sorted and terms with the same name removed from the list.
339 * If you want the terms in ascending query position order, see
342 const TermIterator
get_unique_terms_begin() const;
344 /** Return the length of this query object. */
345 Xapian::termcount
XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION
;
347 /** Check if this query is Xapian::Query::MatchNothing. */
348 bool XAPIAN_NOTHROW(empty() const) {
349 return internal
.get() == 0;
352 /** Serialise this object into a string. */
353 std::string
serialise() const;
355 /** Unserialise a string and return a Query object.
357 * @param serialised the string to unserialise.
358 * @param reg Xapian::Registry object to use to unserialise
359 * user-subclasses of Xapian::PostingSource
360 * (default: standard registry).
362 static const Query
unserialise(const std::string
& serialised
,
363 const Registry
& reg
= Registry());
365 /** Get the type of the top level of the query. */
366 op
XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION
;
368 /** Get the number of subqueries of the top level query. */
369 size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION
;
371 /** Get the wqf parameter of a leaf node. */
372 Xapian::termcount
get_leaf_wqf() const;
374 /** Get the pos parameter of a leaf node. */
375 Xapian::termpos
get_leaf_pos() const;
377 /** Read a top level subquery.
379 * @param n Return the n-th subquery (starting from 0) - only valid when
380 * 0 <= n < get_num_subqueries().
382 const Query
get_subquery(size_t n
) const;
384 /// Return a string describing this object.
385 std::string
get_description() const;
387 /** Combine with another Xapian::Query object using OP_AND. */
388 const Query
operator&=(const Query
& o
) {
389 return (*this = Query(OP_AND
, *this, o
));
392 /** Combine with another Xapian::Query object using OP_OR. */
393 const Query
operator|=(const Query
& o
) {
394 return (*this = Query(OP_OR
, *this, o
));
397 /** Combine with another Xapian::Query object using OP_XOR. */
398 const Query
operator^=(const Query
& o
) {
399 return (*this = Query(OP_XOR
, *this, o
));
402 /** Scale using OP_SCALE_WEIGHT.
404 * @param factor Non-negative real number to multiply weights by.
406 const Query
operator*=(double factor
) {
407 return (*this = Query(factor
, *this));
410 /** Inverse scale using OP_SCALE_WEIGHT.
412 * @param factor Positive real number to divide weights by.
414 const Query
operator/=(double factor
) {
415 return (*this = Query(1.0 / factor
, *this));
418 /// @private @internal Wrap an existing Internal.
419 explicit Query(Internal
* internal_
) : internal(internal_
) { }
421 /** Construct with just an operator.
423 * @param op_ The operator to use - currently only OP_INVALID is useful.
425 explicit Query(Query::op op_
) {
427 if (op_
!= Query::OP_INVALID
) done();
431 void init(Query::op op_
, size_t n_subqueries
, Xapian::termcount window
= 0);
434 void init(Query::op op_
, Xapian::termcount window
,
435 const I
& begin
, const I
& end
, std::random_access_iterator_tag
)
437 init(op_
, end
- begin
, window
);
441 void init(Query::op op_
, Xapian::termcount window
,
442 const I
&, const I
&, std::input_iterator_tag
)
444 init(op_
, 0, window
);
447 void add_subquery(bool positional
, const Xapian::Query
& subquery
);
449 void add_subquery(bool, const std::string
& subquery
) {
450 add_subquery(false, Xapian::Query(subquery
));
453 void add_subquery(bool positional
, const Xapian::Query
* subquery
) {
454 // FIXME: subquery NULL?
455 add_subquery(positional
, *subquery
);
461 /** Combine two Xapian::Query objects using OP_AND. */
463 operator&(const Query
& a
, const Query
& b
)
465 return Query(Query::OP_AND
, a
, b
);
468 /** Combine two Xapian::Query objects using OP_OR. */
470 operator|(const Query
& a
, const Query
& b
)
472 return Query(Query::OP_OR
, a
, b
);
475 /** Combine two Xapian::Query objects using OP_XOR. */
477 operator^(const Query
& a
, const Query
& b
)
479 return Query(Query::OP_XOR
, a
, b
);
482 /** Scale a Xapian::Query object using OP_SCALE_WEIGHT.
484 * @param factor Non-negative real number to multiply weights by.
485 * @param q Xapian::Query object.
488 operator*(double factor
, const Query
& q
)
490 return Query(factor
, q
);
493 /** Scale a Xapian::Query object using OP_SCALE_WEIGHT.
495 * @param q Xapian::Query object.
496 * @param factor Non-negative real number to multiply weights by.
499 operator*(const Query
& q
, double factor
)
501 return Query(factor
, q
);
504 /** Inverse-scale a Xapian::Query object using OP_SCALE_WEIGHT.
506 * @param factor Positive real number to divide weights by.
507 * @param q Xapian::Query object.
510 operator/(const Query
& q
, double factor
)
512 return Query(1.0 / factor
, q
);
515 /** @private @internal */
516 class InvertedQuery_
{
519 void operator=(const InvertedQuery_
&);
521 explicit InvertedQuery_(const Query
& query_
) : query(query_
) { }
524 // GCC 4.2 seems to needs a copy ctor.
525 InvertedQuery_(const InvertedQuery_
& o
) : query(o
.query
) { }
527 operator Query() const {
528 return Query(Query::OP_AND_NOT
, Query::MatchAll
, query
);
531 friend const InvertedQuery_
operator~(const Query
&q
);
533 friend const Query
operator&(const Query
& a
, const InvertedQuery_
& b
);
536 /** Combine two Xapian::Query objects using OP_AND_NOT.
538 * E.g. Xapian::Query q = q1 &~ q2;
541 operator&(const Query
& a
, const InvertedQuery_
& b
)
543 return Query(Query::OP_AND_NOT
, a
, b
.query
);
546 #ifndef DOXYGEN /* @internal doesn't seem to avoid a warning here. */
547 /** @internal Helper to allow q1 &~ q2 to work. */
548 inline const InvertedQuery_
549 operator~(const Query
&q
)
551 return InvertedQuery_(q
);
561 class QueryOptimiser
;
564 /** @private @internal */
565 class Query::Internal
: public Xapian::Internal::intrusive_base
{
567 XAPIAN_NOTHROW(Internal()) { }
572 Xapian::Internal::PostList
* postlist(Xapian::Internal::QueryOptimiser
* qopt
,
573 double factor
) const = 0;
575 virtual void postlist_sub_and_like(Xapian::Internal::AndContext
& ctx
,
576 Xapian::Internal::QueryOptimiser
* qopt
,
577 double factor
) const;
579 virtual void postlist_sub_or_like(Xapian::Internal::OrContext
& ctx
,
580 Xapian::Internal::QueryOptimiser
* qopt
,
581 double factor
) const;
583 virtual void postlist_sub_xor(Xapian::Internal::XorContext
& ctx
,
584 Xapian::Internal::QueryOptimiser
* qopt
,
585 double factor
) const;
587 virtual termcount
XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION
;
589 virtual void serialise(std::string
& result
) const = 0;
591 static Query::Internal
* unserialise(const char ** p
, const char * end
, const Registry
& reg
);
593 virtual Query::op
XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION
= 0;
594 virtual size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION
;
595 virtual const Query
get_subquery(size_t n
) const;
596 virtual termcount
get_wqf() const;
597 virtual termpos
get_pos() const;
599 virtual std::string
get_description() const = 0;
601 // Pass argument as void* to avoid need to include <vector>.
602 virtual void gather_terms(void * void_terms
) const;
607 #endif // XAPIAN_INCLUDED_QUERY_H