2 * @brief Xapian::Query API class
4 /* Copyright (C) 2011,2012,2013,2014,2015,2016 Olly Betts
5 * Copyright (C) 2008 Richard Boulton
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_QUERY_H
23 #define XAPIAN_INCLUDED_QUERY_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error "Never use <xapian/query.h> directly; include <xapian.h> instead."
31 #include <xapian/attributes.h>
32 #include <xapian/intrusive_ptr.h>
33 #include <xapian/postingiterator.h>
34 #include <xapian/registry.h>
35 #include <xapian/termiterator.h>
36 #include <xapian/types.h>
37 #include <xapian/visibility.h>
39 class QueryOptimiser
; // FIXME
45 /// Class representing a query.
46 class XAPIAN_VISIBILITY_DEFAULT Query
{
48 /// Class representing the query internals.
50 /// @private @internal Reference counted internals.
51 Xapian::Internal::intrusive_ptr
<Internal
> internal
;
53 static const Xapian::Query MatchNothing
;
54 static const Xapian::Query MatchAll
;
56 /** Query operators. */
69 /** Pick the best N subqueries and combine with OP_OR.
71 * If you want to implement a feature which finds documents similar to
72 * a piece of text, an obvious approach is to build an "OR" query from
73 * all the terms in the text, and run this query against a database
74 * containing the documents. However such a query can contain a lots
75 * of terms and be quite slow to perform, yet many of these terms
76 * don't contribute usefully to the results.
78 * The OP_ELITE_SET operator can be used instead of OP_OR in this
79 * situation. OP_ELITE_SET selects the most important ''N'' terms and
80 * then acts as an OP_OR query with just these, ignoring any other
81 * terms. This will usually return results just as good as the full
82 * OP_OR query, but much faster.
84 * In general, the OP_ELITE_SET operator can be used when you have a
85 * large OR query, but it doesn't matter if the search completely
86 * ignores some of the less important terms in the query.
88 * The subqueries don't have to be terms, but if they aren't then
89 * OP_ELITE_SET will look at the estimated frequencies of the
90 * subqueries and so could pick a subset which don't actually
91 * match any documents even if the full OR would match some.
93 * You can specify a parameter to the query constructor which control
94 * the number of terms which OP_ELITE_SET will pick. If not
95 * specified, this defaults to 10 (Xapian used to default to
96 * <code>ceil(sqrt(number_of_subqueries))</code> if there are more
97 * than 100 subqueries, but this rather arbitrary special case was
98 * dropped in 1.3.0). For example, this will pick the best 7 terms:
101 * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7);
104 * If the number of subqueries is less than this threshold,
105 * OP_ELITE_SET behaves identically to OP_OR.
123 /** Throw an error if OP_WILDCARD exceeds its expansion limit.
125 * Xapian::WildcardError will be thrown when the query is actually
128 WILDCARD_LIMIT_ERROR
,
129 /** Stop expanding when OP_WILDCARD reaches its expansion limit.
131 * This makes the wildcard expand to only the first N terms (sorted
134 WILDCARD_LIMIT_FIRST
,
135 /** Limit OP_WILDCARD expansion to the most frequent terms.
137 * If OP_WILDCARD would expand to more than its expansion limit, the
138 * most frequent terms are taken. This approach works well for cases
139 * such as expanding a partial term at the end of a query string which
140 * the user hasn't finished typing yet - as well as being less expense
141 * to evaluate than the full expansion, using only the most frequent
142 * terms tends to give better results too.
144 WILDCARD_LIMIT_MOST_FREQUENT
147 /// Default constructor.
148 XAPIAN_NOTHROW(Query())
154 /** Copying is allowed.
156 * The internals are reference counted, so copying is cheap.
158 Query(const Query
& o
) : internal(o
.internal
) { }
160 /** Copying is allowed.
162 * The internals are reference counted, so assignment is cheap.
164 Query
& operator=(const Query
& o
) { internal
= o
.internal
; return *this; }
166 /** Construct a Query object for a term. */
167 Query(const std::string
& term
,
168 Xapian::termcount wqf
= 1,
169 Xapian::termpos pos
= 0);
171 /** Construct a Query object for a PostingSource. */
172 explicit Query(Xapian::PostingSource
* source
);
174 // FIXME: new form for OP_SCALE_WEIGHT - do we want this?
175 Query(double factor
, const Xapian::Query
& subquery
);
177 // FIXME: legacy form of above (assuming we want to add that...)
178 Query(op op_
, const Xapian::Query
& subquery
, double factor
);
181 Query(op op_
, const Xapian::Query
& a
, const Xapian::Query
& b
)
184 bool positional
= (op_
== OP_NEAR
|| op_
== OP_PHRASE
);
185 add_subquery(positional
, a
);
186 add_subquery(positional
, b
);
190 // Pairwise with std::string.
191 Query(op op_
, const std::string
& a
, const std::string
& b
)
194 add_subquery(false, a
);
195 add_subquery(false, b
);
199 // OP_VALUE_GE/OP_VALUE_LE
200 Query(op op_
, Xapian::valueno slot
, const std::string
& limit
);
203 Query(op op_
, Xapian::valueno slot
,
204 const std::string
& begin
, const std::string
& end
);
206 /** Query constructor for OP_WILDCARD queries.
208 * @param op Must be OP_WILDCARD
209 * @param pattern The wildcard pattern - currently this is just a string
210 * and the wildcard expands to terms which start with
211 * exactly this string.
212 * @param max_expansion The maximum number of terms to expand to
213 * (default: 0, which means no limit)
214 * @param max_type How to enforce max_expansion - one of
215 * @a WILDCARD_LIMIT_ERROR (the default),
216 * @a WILDCARD_LIMIT_FIRST or
217 * @a WILDCARD_LIMIT_MOST_FREQUENT.
218 * When searching multiple databases, the expansion limit
219 * is currently applied independently for each database,
220 * so the total number of terms may be higher than the
221 * limit. This is arguably a bug, and may change in
223 * @param combiner The @op to combine the terms with - one of
224 * @a OP_SYNONYM (the default), @a OP_OR or @a OP_MAX.
227 const std::string
& pattern
,
228 Xapian::termcount max_expansion
= 0,
229 int max_type
= WILDCARD_LIMIT_ERROR
,
230 op combiner
= OP_SYNONYM
);
233 Query(op op_
, I begin
, I end
, Xapian::termcount window
= 0)
236 typedef typename
std::iterator_traits
<I
>::iterator_category iterator_category
;
237 init(op_
, window
, begin
, end
, iterator_category());
238 bool positional
= (op_
== OP_NEAR
|| op_
== OP_PHRASE
);
239 for (I i
= begin
; i
!= end
; ++i
) {
240 add_subquery(positional
, *i
);
247 // SWIG's %template doesn't seem to handle a templated ctor so we
248 // provide this fake specialised form of the above prototype.
249 Query(op op_
, XapianSWIGQueryItor qbegin
, XapianSWIGQueryItor qend
,
250 Xapian::termcount parameter
= 0);
253 Query(op op_
, XapianSWIGStrItor qbegin
, XapianSWIGStrItor qend
,
254 Xapian::termcount parameter
= 0);
258 const TermIterator
get_terms_begin() const;
260 const TermIterator
XAPIAN_NOTHROW(get_terms_end() const) {
261 return TermIterator();
264 const TermIterator
get_unique_terms_begin() const;
266 Xapian::termcount
XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION
;
268 bool XAPIAN_NOTHROW(empty() const) {
269 return internal
.get() == 0;
272 std::string
serialise() const;
274 static const Query
unserialise(const std::string
& serialised
,
275 const Registry
& reg
= Registry());
277 /** Get the type of the top level of the query. */
278 op
XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION
;
280 /** Get the number of subqueries of the top level query. */
281 size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION
;
283 /** Read a top level subquery.
285 * @param n Return the n-th subquery (starting from 0) - only valid when
286 * 0 <= n < get_num_subqueries().
288 const Query
get_subquery(size_t n
) const;
290 std::string
get_description() const;
292 const Query
operator&=(const Query
& o
) {
293 return (*this = Query(OP_AND
, *this, o
));
296 const Query
operator|=(const Query
& o
) {
297 return (*this = Query(OP_OR
, *this, o
));
300 const Query
operator^=(const Query
& o
) {
301 return (*this = Query(OP_XOR
, *this, o
));
304 const Query
operator*=(double factor
) {
305 return (*this = Query(factor
, *this));
308 const Query
operator/=(double factor
) {
309 return (*this = Query(1.0 / factor
, *this));
312 /** @private @internal */
313 explicit Query(Internal
* internal_
) : internal(internal_
) { }
315 explicit Query(Query::op op_
) {
317 if (op_
!= Query::OP_INVALID
) done();
321 void init(Query::op op_
, size_t n_subqueries
, Xapian::termcount window
= 0);
324 void init(Query::op op_
, Xapian::termcount window
,
325 const I
& begin
, const I
& end
, std::random_access_iterator_tag
)
327 init(op_
, end
- begin
, window
);
331 void init(Query::op op_
, Xapian::termcount window
,
332 const I
&, const I
&, std::input_iterator_tag
)
334 init(op_
, 0, window
);
337 void add_subquery(bool positional
, const Xapian::Query
& subquery
);
339 void add_subquery(bool, const std::string
& subquery
) {
340 add_subquery(false, Xapian::Query(subquery
));
343 void add_subquery(bool positional
, const Xapian::Query
* subquery
) {
344 // FIXME: subquery NULL?
345 add_subquery(positional
, *subquery
);
352 operator&(const Query
& a
, const Query
& b
)
354 return Query(Query::OP_AND
, a
, b
);
358 operator|(const Query
& a
, const Query
& b
)
360 return Query(Query::OP_OR
, a
, b
);
364 operator^(const Query
& a
, const Query
& b
)
366 return Query(Query::OP_XOR
, a
, b
);
370 operator*(double factor
, const Query
& q
)
372 return Query(factor
, q
);
376 operator*(const Query
& q
, double factor
)
378 return Query(factor
, q
);
382 operator/(const Query
& q
, double factor
)
384 return Query(1.0 / factor
, q
);
387 class InvertedQuery_
{
390 void operator=(const InvertedQuery_
&);
392 explicit InvertedQuery_(const Query
& query_
) : query(query_
) { }
395 // GCC 4.2 seems to needs a copy ctor.
396 InvertedQuery_(const InvertedQuery_
& o
) : query(o
.query
) { }
398 operator Query() const {
399 return Query(Query::OP_AND_NOT
, Query::MatchAll
, query
);
402 friend const InvertedQuery_
operator~(const Query
&q
);
404 friend const Query
operator&(const Query
& a
, const InvertedQuery_
& b
);
408 operator&(const Query
& a
, const InvertedQuery_
& b
)
410 return Query(Query::OP_AND_NOT
, a
, b
.query
);
413 inline const InvertedQuery_
414 operator~(const Query
&q
)
416 return InvertedQuery_(q
);
425 class Query::Internal
: public Xapian::Internal::intrusive_base
{
427 XAPIAN_NOTHROW(Internal()) { }
431 virtual PostingIterator::Internal
* postlist(QueryOptimiser
* qopt
, double factor
) const = 0;
433 virtual void postlist_sub_and_like(Xapian::Internal::AndContext
& ctx
,
434 QueryOptimiser
* qopt
,
435 double factor
) const;
437 virtual void postlist_sub_or_like(Xapian::Internal::OrContext
& ctx
,
438 QueryOptimiser
* qopt
,
439 double factor
) const;
441 virtual void postlist_sub_xor(Xapian::Internal::XorContext
& ctx
,
442 QueryOptimiser
* qopt
,
443 double factor
) const;
445 virtual termcount
XAPIAN_NOTHROW(get_length() const) XAPIAN_PURE_FUNCTION
;
447 virtual void serialise(std::string
& result
) const = 0;
449 static Query::Internal
* unserialise(const char ** p
, const char * end
, const Registry
& reg
);
451 virtual Query::op
XAPIAN_NOTHROW(get_type() const) XAPIAN_PURE_FUNCTION
= 0;
452 virtual size_t XAPIAN_NOTHROW(get_num_subqueries() const) XAPIAN_PURE_FUNCTION
;
453 virtual const Query
get_subquery(size_t n
) const;
455 virtual std::string
get_description() const = 0;
457 // Pass argument as void* to avoid need to include <vector>.
458 virtual void gather_terms(void * void_terms
) const;
463 #endif // XAPIAN_INCLUDED_QUERY_H