Remove unused header include
[xapian.git] / xapian-core / include / xapian / queryparser.h
blob3b272d03d8ac03bc780eb5105be45ebbcdd0e186
1 /** @file queryparser.h
2 * @brief parsing a user query string to build a Xapian::Query object
3 */
4 /* Copyright (C) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Olly Betts
5 * Copyright (C) 2010 Adam Sjøgren
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #ifndef XAPIAN_INCLUDED_QUERYPARSER_H
24 #define XAPIAN_INCLUDED_QUERYPARSER_H
26 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
27 # error "Never use <xapian/queryparser.h> directly; include <xapian.h> instead."
28 #endif
30 #include <xapian/attributes.h>
31 #include <xapian/intrusive_ptr.h>
32 #include <xapian/query.h>
33 #include <xapian/termiterator.h>
34 #include <xapian/visibility.h>
36 #include <set>
37 #include <string>
39 namespace Xapian {
41 class Database;
42 class Stem;
44 /// Base class for stop-word decision functor.
45 class XAPIAN_VISIBILITY_DEFAULT Stopper
46 : public Xapian::Internal::opt_intrusive_base {
47 /// Don't allow assignment.
48 void operator=(const Stopper &);
50 /// Don't allow copying.
51 Stopper(const Stopper &);
53 public:
54 /// Default constructor.
55 Stopper() { }
57 /** Is term a stop-word?
59 * @param term The term to test.
61 virtual bool operator()(const std::string & term) const = 0;
63 /// Class has virtual methods, so provide a virtual destructor.
64 virtual ~Stopper() { }
66 /// Return a string describing this object.
67 virtual std::string get_description() const;
69 Stopper * release() {
70 opt_intrusive_base::release();
71 return this;
74 const Stopper * release() const {
75 opt_intrusive_base::release();
76 return this;
80 /// Simple implementation of Stopper class - this will suit most users.
81 class XAPIAN_VISIBILITY_DEFAULT SimpleStopper : public Stopper {
82 std::set<std::string> stop_words;
84 public:
85 /// Default constructor.
86 SimpleStopper() { }
88 /** Initialise from a pair of iterators.
90 * Xapian includes stop list files for many languages. You can initialise from a file like that:
91 * @code
92 * ifstream inFile ("stopwords/english/stop.txt");
93 * Xapian::SimplerStopper stopper(istream_iterator<string>(inFile), istream_iterator<string>());
94 * @endcode
97 template <class Iterator>
98 SimpleStopper(Iterator begin, Iterator end) : stop_words(begin, end) { }
100 /// Add a single stop word.
101 void add(const std::string & word) { stop_words.insert(word); }
103 virtual bool operator()(const std::string & term) const {
104 return stop_words.find(term) != stop_words.end();
107 virtual std::string get_description() const;
110 enum {
111 RP_SUFFIX = 1,
112 RP_REPEATED = 2,
113 RP_DATE_PREFER_MDY = 4
116 /// Base class for range processors.
117 class XAPIAN_VISIBILITY_DEFAULT RangeProcessor
118 : public Xapian::Internal::opt_intrusive_base {
119 /// Don't allow assignment.
120 void operator=(const RangeProcessor &);
122 /// Don't allow copying.
123 RangeProcessor(const RangeProcessor &);
125 protected:
126 Xapian::valueno slot;
128 std::string str;
130 unsigned flags;
132 public:
133 /** Default constructor. */
134 RangeProcessor() : slot(Xapian::BAD_VALUENO), flags(0) { }
136 /** Constructor.
138 * @param slot_ Which value slot to generate ranges over.
139 * @param str_ A string to look for to recognise values as belonging
140 * to this range (as a prefix by default, or as a suffix
141 * if flags Xapian::RP_SUFFIX is specified).
142 * @param flags_ Zero or more of the following flags, combined with
143 * bitwise-or:
144 * * Xapian::RP_SUFFIX - require @a str_ as a suffix
145 * instead of a prefix.
146 * * Xapian::RP_REPEATED - optionally allow @a str_
147 * on both ends of the range - e.g. $1..$10 or
148 * 5m..50m. By default a prefix is only checked for on
149 * the start (e.g. date:1/1/1980..31/12/1989), and a
150 * suffix only on the end (e.g. 2..12kg).
152 RangeProcessor(Xapian::valueno slot_,
153 const std::string& str_ = std::string(),
154 unsigned flags_ = 0)
155 : slot(slot_), str(str_), flags(flags_) { }
157 /// Destructor.
158 virtual ~RangeProcessor();
160 Xapian::Query check_range(const std::string& b, const std::string& e);
162 /** Check for a valid range of this type.
164 * @param begin The start of the range as specified in the query string
165 * by the user.
166 * @param end The end of the range as specified in the query string
167 * by the user (empty string for no upper limit).
169 * @return An OP_VALUE_RANGE Query object (or if end.empty(), an
170 * OP_VALUE_GE Query object).
172 virtual Xapian::Query
173 operator()(const std::string &begin, const std::string &end);
175 RangeProcessor * release() {
176 opt_intrusive_base::release();
177 return this;
180 const RangeProcessor * release() const {
181 opt_intrusive_base::release();
182 return this;
186 /** Handle a date range.
188 * Begin and end must be dates in a recognised format.
190 class XAPIAN_VISIBILITY_DEFAULT DateRangeProcessor : public RangeProcessor {
191 int epoch_year;
193 public:
194 /** Constructor.
196 * @param slot_ The value number to return from operator().
198 * @param flags_ Zero or more of the following flags, combined with
199 * bitwise-or:
200 * * Xapian::RP_DATE_PREFER_MDY - interpret ambiguous
201 * dates as month/day/year rather than day/month/year.
203 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
204 * years (default: 1970, so 1/1/69 is 2069 while
205 * 1/1/70 is 1970).
207 explicit DateRangeProcessor(Xapian::valueno slot_,
208 unsigned flags_ = 0,
209 int epoch_year_ = 1970)
210 : RangeProcessor(slot_, std::string(), flags_),
211 epoch_year(epoch_year_) { }
213 /** Constructor.
215 * @param slot_ The value slot number to query.
217 * @param str_ A string to look for to recognise values as belonging
218 * to this date range.
220 * @param flags_ Zero or more of the following flags, combined with
221 * bitwise-or:
222 * * Xapian::RP_SUFFIX - require @a str_ as a suffix
223 * instead of a prefix.
224 * * Xapian::RP_REPEATED - optionally allow @a str_
225 * on both ends of the range - e.g. $1..$10 or
226 * 5m..50m. By default a prefix is only checked for on
227 * the start (e.g. date:1/1/1980..31/12/1989), and a
228 * suffix only on the end (e.g. 2..12kg).
229 * * Xapian::RP_DATE_PREFER_MDY - interpret ambiguous
230 * dates as month/day/year rather than day/month/year.
232 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
233 * years (default: 1970, so 1/1/69 is 2069 while
234 * 1/1/70 is 1970).
236 * The string supplied in str_ is used by @a operator() to decide whether
237 * the pair of strings supplied to it constitute a valid range. If
238 * prefix_ is true, the first value in a range must begin with str_ (and
239 * the second value may optionally begin with str_);
240 * if prefix_ is false, the second value in a range must end with str_
241 * (and the first value may optionally end with str_).
243 * If str_ is empty, the Xapian::RP_SUFFIX and Xapian::RP_REPEATED are
244 * irrelevant, and no special strings are required at the start or end of
245 * the strings defining the range.
247 * The remainder of both strings defining the endpoints must be valid
248 * dates.
250 * For example, if str_ is "created:", Xapian::RP_SUFFIX is not specified,
251 * and the range processor has been added to the queryparser, the
252 * queryparser will accept "created:1/1/2000..31/12/2001".
254 DateRangeProcessor(Xapian::valueno slot_, const std::string &str_,
255 unsigned flags_ = 0, int epoch_year_ = 1970)
256 : RangeProcessor(slot_, str_, flags_),
257 epoch_year(epoch_year_) { }
259 /** Check for a valid date range.
261 * If any specified prefix is present, and the range looks like a
262 * date range, the dates are converted to the format YYYYMMDD and
263 * combined into a value range query.
265 * @param begin The start of the range as specified in the query string
266 * by the user.
267 * @param end The end of the range as specified in the query string
268 * by the user.
270 Xapian::Query operator()(const std::string& begin, const std::string& end);
273 /** Handle a number range.
275 * This class must be used on values which have been encoded using
276 * Xapian::sortable_serialise() which turns numbers into strings which
277 * will sort in the same order as the numbers (the same values can be
278 * used to implement a numeric sort).
280 class XAPIAN_VISIBILITY_DEFAULT NumberRangeProcessor : public RangeProcessor {
281 public:
282 /** Constructor.
284 * @param slot_ The value slot number to query.
286 * @param str_ A string to look for to recognise values as belonging
287 * to this numeric range.
289 * @param flags_ Zero or more of the following flags, combined with
290 * bitwise-or:
291 * * Xapian::RP_SUFFIX - require @a str_ as a suffix
292 * instead of a prefix.
293 * * Xapian::RP_REPEATED - optionally allow @a str_
294 * on both ends of the range - e.g. $1..$10 or
295 * 5m..50m. By default a prefix is only checked for on
296 * the start (e.g. date:1/1/1980..31/12/1989), and a
297 * suffix only on the end (e.g. 2..12kg).
299 * The string supplied in str_ is used by @a operator() to decide whether
300 * the pair of strings supplied to it constitute a valid range. If
301 * prefix_ is true, the first value in a range must begin with str_ (and
302 * the second value may optionally begin with str_);
303 * if prefix_ is false, the second value in a range must end with str_
304 * (and the first value may optionally end with str_).
306 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
307 * strings are required at the start or end of the strings defining the
308 * range.
310 * The remainder of both strings defining the endpoints must be valid
311 * floating point numbers. (FIXME: define format recognised).
313 * For example, if str_ is "$" and prefix_ is true, and the range
314 * processor has been added to the queryparser, the queryparser will
315 * accept "$10..50" or "$10..$50", but not "10..50" or "10..$50" as valid
316 * ranges. If str_ is "kg" and prefix_ is false, the queryparser will
317 * accept "10..50kg" or "10kg..50kg", but not "10..50" or "10kg..50" as
318 * valid ranges.
320 NumberRangeProcessor(Xapian::valueno slot_,
321 const std::string &str_ = std::string(),
322 unsigned flags_ = 0)
323 : RangeProcessor(slot_, str_, flags_) { }
325 /** Check for a valid numeric range.
327 * If BEGIN..END is a valid numeric range with the specified prefix/suffix
328 * (if one was specified), the prefix/suffix is removed, the string
329 * converted to a number, and encoded with Xapian::sortable_serialise(),
330 * and a value range query is built.
332 * @param begin The start of the range as specified in the query string
333 * by the user.
334 * @param end The end of the range as specified in the query string
335 * by the user.
337 Xapian::Query operator()(const std::string& begin, const std::string& end);
340 /// Base class for value range processors.
341 class XAPIAN_VISIBILITY_DEFAULT ValueRangeProcessor
342 : public Xapian::Internal::opt_intrusive_base {
343 /// Don't allow assignment.
344 void operator=(const ValueRangeProcessor &);
346 /// Don't allow copying.
347 ValueRangeProcessor(const ValueRangeProcessor &);
349 public:
350 /// Default constructor.
351 ValueRangeProcessor() { }
353 /// Destructor.
354 virtual ~ValueRangeProcessor();
356 /** Check for a valid range of this type.
358 * @param[in,out] begin The start of the range as specified in the query
359 * string by the user. This parameter is a
360 * non-const reference so the ValueRangeProcessor
361 * can modify it to return the value to start the
362 * range with.
363 * @param[in,out] end The end of the range. This is also a non-const
364 * reference so it can be modified.
366 * @return If this ValueRangeProcessor recognises the range BEGIN..END it
367 * returns the value slot number to range filter on. Otherwise it
368 * returns Xapian::BAD_VALUENO.
370 virtual Xapian::valueno operator()(std::string &begin, std::string &end) = 0;
372 ValueRangeProcessor * release() {
373 opt_intrusive_base::release();
374 return this;
377 const ValueRangeProcessor * release() const {
378 opt_intrusive_base::release();
379 return this;
383 /** Handle a string range.
385 * The end points can be any strings.
387 * @deprecated Use Xapian::RangeProcessor instead (added in 1.3.6).
389 class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT StringValueRangeProcessor : public ValueRangeProcessor {
390 protected:
391 Xapian::valueno valno;
393 bool prefix;
395 std::string str;
397 public:
398 /** Constructor.
400 * @param slot_ The value number to return from operator().
402 explicit StringValueRangeProcessor(Xapian::valueno slot_)
403 : valno(slot_), str() { }
405 /** Constructor.
407 * @param slot_ The value number to return from operator().
408 * @param str_ A string to look for to recognise values as belonging
409 * to this range.
410 * @param prefix_ Flag specifying whether to check for str_ as a prefix
411 * or a suffix.
413 StringValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
414 bool prefix_ = true)
415 : valno(slot_), prefix(prefix_), str(str_) { }
417 /** Check for a valid string range.
419 * @param[in,out] begin The start of the range as specified in the
420 * query string by the user. This parameter is a
421 * non-const reference so the ValueRangeProcessor
422 * can modify it to return the value to start the
423 * range with.
424 * @param[in,out] end The end of the range. This is also a non-const
425 * reference so it can be modified.
427 * @return A StringValueRangeProcessor always accepts a range it is
428 * offered, and returns the value of slot_ passed at construction
429 * time. It doesn't modify @a begin or @a end.
431 Xapian::valueno operator()(std::string &begin, std::string &end);
434 /** Handle a date range.
436 * Begin and end must be dates in a recognised format.
438 * @deprecated Use Xapian::DateRangeProcessor instead (added in 1.3.6).
440 class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT DateValueRangeProcessor : public StringValueRangeProcessor {
441 bool prefer_mdy;
442 int epoch_year;
444 public:
445 /** Constructor.
447 * @param slot_ The value number to return from operator().
448 * @param prefer_mdy_ Should ambiguous dates be interpreted as
449 * month/day/year rather than day/month/year?
450 * (default: false)
451 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
452 * years (default: 1970, so 1/1/69 is 2069 while
453 * 1/1/70 is 1970).
455 DateValueRangeProcessor(Xapian::valueno slot_, bool prefer_mdy_ = false,
456 int epoch_year_ = 1970)
457 : StringValueRangeProcessor(slot_),
458 prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
460 /** Constructor.
462 * @param slot_ The value number to return from operator().
464 * @param str_ A string to look for to recognise values as belonging
465 * to this date range.
467 * @param prefix_ Whether to look for the string at the start or end of
468 * the values. If true, the string is a prefix; if
469 * false, the string is a suffix (default: true).
471 * @param prefer_mdy_ Should ambiguous dates be interpreted as
472 * month/day/year rather than day/month/year?
473 * (default: false)
475 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
476 * years (default: 1970, so 1/1/69 is 2069 while
477 * 1/1/70 is 1970).
479 * The string supplied in str_ is used by @a operator() to decide whether
480 * the pair of strings supplied to it constitute a valid range. If
481 * prefix_ is true, the first value in a range must begin with str_ (and
482 * the second value may optionally begin with str_);
483 * if prefix_ is false, the second value in a range must end with str_
484 * (and the first value may optionally end with str_).
486 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
487 * strings are required at the start or end of the strings defining the
488 * range.
490 * The remainder of both strings defining the endpoints must be valid
491 * dates.
493 * For example, if str_ is "created:" and prefix_ is true, and the range
494 * processor has been added to the queryparser, the queryparser will
495 * accept "created:1/1/2000..31/12/2001".
497 DateValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
498 bool prefix_ = true,
499 bool prefer_mdy_ = false, int epoch_year_ = 1970)
500 : StringValueRangeProcessor(slot_, str_, prefix_),
501 prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
503 #ifndef SWIG
504 /** Constructor.
506 * This is like the previous version, but with const char * instead of
507 * std::string - we need this overload as otherwise
508 * DateValueRangeProcessor(1, "date:") quietly interprets the second
509 * argument as a boolean in preference to std::string. If you want to
510 * be compatible with 1.2.12 and earlier, then explicitly convert to
511 * std::string, i.e.: DateValueRangeProcessor(1, std::string("date:"))
513 * @param slot_ The value number to return from operator().
515 * @param str_ A string to look for to recognise values as belonging
516 * to this date range.
518 * @param prefix_ Whether to look for the string at the start or end of
519 * the values. If true, the string is a prefix; if
520 * false, the string is a suffix (default: true).
522 * @param prefer_mdy_ Should ambiguous dates be interpreted as
523 * month/day/year rather than day/month/year?
524 * (default: false)
526 * @param epoch_year_ Year to use as the epoch for dates with 2 digit
527 * years (default: 1970, so 1/1/69 is 2069 while
528 * 1/1/70 is 1970).
530 * The string supplied in str_ is used by @a operator() to decide whether
531 * the pair of strings supplied to it constitute a valid range. If
532 * prefix_ is true, the first value in a range must begin with str_ (and
533 * the second value may optionally begin with str_);
534 * if prefix_ is false, the second value in a range must end with str_
535 * (and the first value may optionally end with str_).
537 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
538 * strings are required at the start or end of the strings defining the
539 * range.
541 * The remainder of both strings defining the endpoints must be valid
542 * dates.
544 * For example, if str_ is "created:" and prefix_ is true, and the range
545 * processor has been added to the queryparser, the queryparser will
546 * accept "created:1/1/2000..31/12/2001".
548 DateValueRangeProcessor(Xapian::valueno slot_, const char * str_,
549 bool prefix_ = true,
550 bool prefer_mdy_ = false, int epoch_year_ = 1970)
551 : StringValueRangeProcessor(slot_, str_, prefix_),
552 prefer_mdy(prefer_mdy_), epoch_year(epoch_year_) { }
553 #endif
555 /** Check for a valid date range.
557 * @param[in,out] begin The start of the range as specified in the
558 * query string by the user. This parameter is a
559 * non-const reference so the ValueRangeProcessor
560 * can modify it to return the value to start the
561 * range with.
562 * @param[in,out] end The end of the range. This is also a non-const
563 * reference so it can be modified.
565 * @return If BEGIN..END is a sensible date range, this method modifies
566 * them into the format YYYYMMDD and returns the value of slot_
567 * passed at construction time. Otherwise it returns
568 * Xapian::BAD_VALUENO.
570 Xapian::valueno operator()(std::string &begin, std::string &end);
573 /** Handle a number range.
575 * This class must be used on values which have been encoded using
576 * Xapian::sortable_serialise() which turns numbers into strings which
577 * will sort in the same order as the numbers (the same values can be
578 * used to implement a numeric sort).
580 * @deprecated Use Xapian::NumberRangeProcessor instead (added in 1.3.6).
582 class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT NumberValueRangeProcessor : public StringValueRangeProcessor {
583 public:
584 /** Constructor.
586 * @param slot_ The value number to return from operator().
588 explicit NumberValueRangeProcessor(Xapian::valueno slot_)
589 : StringValueRangeProcessor(slot_) { }
591 /** Constructor.
593 * @param slot_ The value number to return from operator().
595 * @param str_ A string to look for to recognise values as belonging
596 * to this numeric range.
598 * @param prefix_ Whether to look for the string at the start or end of
599 * the values. If true, the string is a prefix; if
600 * false, the string is a suffix (default: true).
602 * The string supplied in str_ is used by @a operator() to decide whether
603 * the pair of strings supplied to it constitute a valid range. If
604 * prefix_ is true, the first value in a range must begin with str_ (and
605 * the second value may optionally begin with str_);
606 * if prefix_ is false, the second value in a range must end with str_
607 * (and the first value may optionally end with str_).
609 * If str_ is empty, the setting of prefix_ is irrelevant, and no special
610 * strings are required at the start or end of the strings defining the
611 * range.
613 * The remainder of both strings defining the endpoints must be valid
614 * floating point numbers. (FIXME: define format recognised).
616 * For example, if str_ is "$" and prefix_ is true, and the range
617 * processor has been added to the queryparser, the queryparser will
618 * accept "$10..50" or "$10..$50", but not "10..50" or "10..$50" as valid
619 * ranges. If str_ is "kg" and prefix_ is false, the queryparser will
620 * accept "10..50kg" or "10kg..50kg", but not "10..50" or "10kg..50" as
621 * valid ranges.
623 NumberValueRangeProcessor(Xapian::valueno slot_, const std::string &str_,
624 bool prefix_ = true)
625 : StringValueRangeProcessor(slot_, str_, prefix_) { }
627 /** Check for a valid numeric range.
629 * @param[in,out] begin The start of the range as specified in the
630 * query string by the user. This parameter is a
631 * non-const reference so the ValueRangeProcessor
632 * can modify it to return the value to start the
633 * range with.
634 * @param[in,out] end The end of the range. This is also a non-const
635 * reference so it can be modified.
637 * @return If BEGIN..END is a valid numeric range with the specified
638 * prefix/suffix (if one was specified), this method modifies
639 * them by removing the prefix/suffix, converting to a number,
640 * and encoding with Xapian::sortable_serialise(), and returns the
641 * value of slot_ passed at construction time. Otherwise it
642 * returns Xapian::BAD_VALUENO.
644 Xapian::valueno operator()(std::string &begin, std::string &end);
647 /** Base class for field processors.
649 class XAPIAN_VISIBILITY_DEFAULT FieldProcessor
650 : public Xapian::Internal::opt_intrusive_base {
651 /// Don't allow assignment.
652 void operator=(const FieldProcessor &);
654 /// Don't allow copying.
655 FieldProcessor(const FieldProcessor &);
657 public:
658 /// Default constructor.
659 FieldProcessor() { }
661 /// Destructor.
662 virtual ~FieldProcessor();
664 /** Convert a field-prefixed string to a Query object.
666 * @param str The string to convert.
668 * @return Query object corresponding to @a str.
670 virtual Xapian::Query operator()(const std::string &str) = 0;
672 FieldProcessor * release() {
673 opt_intrusive_base::release();
674 return this;
677 const FieldProcessor * release() const {
678 opt_intrusive_base::release();
679 return this;
683 /// Build a Xapian::Query object from a user query string.
684 class XAPIAN_VISIBILITY_DEFAULT QueryParser {
685 public:
686 /// Class representing the queryparser internals.
687 class Internal;
688 /// @private @internal Reference counted internals.
689 Xapian::Internal::intrusive_ptr<Internal> internal;
691 /// Enum of feature flags.
692 typedef enum {
693 /// Support AND, OR, etc and bracketed subexpressions.
694 FLAG_BOOLEAN = 1,
695 /// Support quoted phrases.
696 FLAG_PHRASE = 2,
697 /// Support + and -.
698 FLAG_LOVEHATE = 4,
699 /// Support AND, OR, etc even if they aren't in ALLCAPS.
700 FLAG_BOOLEAN_ANY_CASE = 8,
701 /** Support wildcards.
703 * At present only right truncation (e.g. Xap*) is supported.
705 * Currently you can't use wildcards with boolean filter prefixes,
706 * or in a phrase (either an explicitly quoted one, or one implicitly
707 * generated by hyphens or other punctuation).
709 * In Xapian 1.2.x, you needed to tell the QueryParser object which
710 * database to expand wildcards from by calling set_database(). In
711 * Xapian 1.3.3, OP_WILDCARD was added and wildcards are now
712 * expanded when Enquire::get_mset() is called, with the expansion
713 * using the database being searched.
715 FLAG_WILDCARD = 16,
716 /** Allow queries such as 'NOT apples'.
718 * These require the use of a list of all documents in the database
719 * which is potentially expensive, so this feature isn't enabled by
720 * default.
722 FLAG_PURE_NOT = 32,
723 /** Enable partial matching.
725 * Partial matching causes the parser to treat the query as a
726 * "partially entered" search. This will automatically treat the
727 * final word as a wildcarded match, unless it is followed by
728 * whitespace, to produce more stable results from interactive
729 * searches.
731 * Currently FLAG_PARTIAL doesn't do anything if the final word
732 * in the query has a boolean filter prefix, or if it is in a phrase
733 * (either an explicitly quoted one, or one implicitly generated by
734 * hyphens or other punctuation). It also doesn't do anything if
735 * if the final word is part of a value range.
737 * In Xapian 1.2.x, you needed to tell the QueryParser object which
738 * database to expand wildcards from by calling set_database(). In
739 * Xapian 1.3.3, OP_WILDCARD was added and wildcards are now
740 * expanded when Enquire::get_mset() is called, with the expansion
741 * using the database being searched.
743 FLAG_PARTIAL = 64,
745 /** Enable spelling correction.
747 * For each word in the query which doesn't exist as a term in the
748 * database, Database::get_spelling_suggestion() will be called and if
749 * a suggestion is returned, a corrected version of the query string
750 * will be built up which can be read using
751 * QueryParser::get_corrected_query_string(). The query returned is
752 * based on the uncorrected query string however - if you want a
753 * parsed query based on the corrected query string, you must call
754 * QueryParser::parse_query() again.
756 * NB: You must also call set_database() for this to work.
758 FLAG_SPELLING_CORRECTION = 128,
760 /** Enable synonym operator '~'.
762 * NB: You must also call set_database() for this to work.
764 FLAG_SYNONYM = 256,
766 /** Enable automatic use of synonyms for single terms.
768 * NB: You must also call set_database() for this to work.
770 FLAG_AUTO_SYNONYMS = 512,
772 /** Enable automatic use of synonyms for single terms and groups of
773 * terms.
775 * NB: You must also call set_database() for this to work.
777 FLAG_AUTO_MULTIWORD_SYNONYMS = 1024,
779 /** Enable generation of n-grams from CJK text.
781 * With this enabled, spans of CJK characters are split into unigrams
782 * and bigrams, with the unigrams carrying positional information.
783 * Non-CJK characters are split into words as normal.
785 * The corresponding option needs to have been used at index time.
787 * Flag added in Xapian 1.3.4 and 1.2.22, but this mode can be
788 * enabled in 1.2.8 and later by setting environment variable
789 * XAPIAN_CJK_NGRAM.
791 FLAG_CJK_NGRAM = 2048,
793 /** The default flags.
795 * Used if you don't explicitly pass any to @a parse_query().
796 * The default flags are FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE.
798 * Added in Xapian 1.0.11.
800 FLAG_DEFAULT = FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE
801 } feature_flag;
803 /// Stemming strategies, for use with set_stemming_strategy().
804 typedef enum { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z } stem_strategy;
806 /// Copy constructor.
807 QueryParser(const QueryParser & o);
809 /// Assignment.
810 QueryParser & operator=(const QueryParser & o);
812 /// Default constructor.
813 QueryParser();
815 /// Destructor.
816 ~QueryParser();
818 /** Set the stemmer.
820 * This sets the stemming algorithm which will be used by the query
821 * parser. The stemming algorithm will be used according to the stemming
822 * strategy set by set_stemming_strategy(). As of 1.3.1, this defaults
823 * to STEM_SOME, but in earlier versions the default was STEM_NONE. If
824 * you want to work with older versions, you should explicitly set
825 * a stemming strategy as well as setting a stemmer, otherwise your
826 * stemmer won't actually be used.
828 * @param stemmer The Xapian::Stem object to set.
830 void set_stemmer(const Xapian::Stem & stemmer);
832 /** Set the stemming strategy.
834 * This controls how the query parser will apply the stemming algorithm.
835 * Note that the stemming algorithm is only applied to words in
836 * probabilistic fields - boolean filter terms are never stemmed.
838 * @param strategy The strategy to use - possible values are:
839 * - STEM_NONE: Don't perform any stemming. (default in Xapian <=
840 * 1.3.0)
841 * - STEM_SOME: Stem all terms except for those which start with a
842 * capital letter, or are followed by certain characters
843 * (currently: <code>(/\@<>=*[{"</code> ), or are used
844 * with operators which need positional information.
845 * Stemmed terms are prefixed with 'Z'. (default in
846 * Xapian >= 1.3.1)
847 * - STEM_ALL: Stem all terms (note: no 'Z' prefix is added).
848 * - STEM_ALL_Z: Stem all terms (note: 'Z' prefix is added). (new in
849 * Xapian 1.2.11 and 1.3.1)
851 void set_stemming_strategy(stem_strategy strategy);
853 /** Set the stopper.
855 * @param stop The Stopper object to set (default NULL, which means no
856 * stopwords).
858 void set_stopper(const Stopper *stop = NULL);
860 /** Set the default operator.
862 * @param default_op The operator to use to combine non-filter
863 * query items when no explicit operator is used.
865 * So for example, 'weather forecast' is parsed as
866 * if it were 'weather OR forecast' by default.
868 * The most useful values for this are OP_OR (the
869 * default) and OP_AND. OP_NEAR, OP_PHRASE,
870 * OP_ELITE_SET and OP_SYNONYM are also permitted.
871 * Passing other values will result in
872 * InvalidArgumentError being thrown.
874 void set_default_op(Query::op default_op);
876 /** Get the current default operator. */
877 Query::op get_default_op() const;
879 /** Specify the database being searched.
881 * @param db The database to use for spelling correction
882 * (FLAG_SPELLING_CORRECTION), and synonyms (FLAG_SYNONYM,
883 * FLAG_AUTO_SYNONYMS, and FLAG_AUTO_MULTIWORD_SYNONYMS).
885 void set_database(const Database &db);
887 /** Specify the maximum expansion of a wildcard and/or partial term.
889 * Note: you must also set FLAG_WILDCARD and/or FLAG_PARTIAL in the flags
890 * parameter to @a parse_query() for this setting to have anything to
891 * affect.
893 * If you don't call this method, the default settings are no limit on
894 * wildcard expansion, and partial terms expanding to the most frequent
895 * 100 terms - i.e. as if you'd called:
897 * set_max_expansion(0);
898 * set_max_expansion(100, Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT, Xapian::QueryParser::FLAG_PARTIAL);
900 * @param max_expansion The maximum number of terms each wildcard in the
901 * query can expand to, or 0 for no limit (which is the
902 * default).
903 * @param max_type @a Xapian::Query::WILDCARD_LIMIT_ERROR,
904 * @a Xapian::Query::WILDCARD_LIMIT_FIRST or
905 * @a Xapian::Query::WILDCARD_LIMIT_MOST_FREQUENT
906 * (default: Xapian::Query::WILDCARD_LIMIT_ERROR).
907 * @param flags What to set the limit for (default:
908 * FLAG_WILDCARD|FLAG_PARTIAL, setting the limit for both
909 * wildcards and partial terms).
911 void set_max_expansion(Xapian::termcount max_expansion,
912 int max_type = Xapian::Query::WILDCARD_LIMIT_ERROR,
913 unsigned flags = FLAG_WILDCARD|FLAG_PARTIAL);
915 /** Specify the maximum expansion of a wildcard.
917 * If any wildcard expands to more than @a max_expansion terms, an
918 * exception will be thrown.
920 * This method is provided for API compatibility with Xapian 1.2.x and is
921 * deprecated - replace it with:
923 * set_max_wildcard_expansion(max_expansion,
924 * Xapian::Query::WILDCARD_LIMIT_ERROR,
925 * Xapian::QueryParser::FLAG_WILDCARD);
927 XAPIAN_DEPRECATED(void set_max_wildcard_expansion(Xapian::termcount));
929 /** Parse a query.
931 * @param query_string A free-text query as entered by a user
932 * @param flags Zero or more Query::feature_flag specifying
933 * what features the QueryParser should support. Combine
934 * multiple values with bitwise-or (|) (default FLAG_DEFAULT).
935 * @param default_prefix The default term prefix to use (default none).
936 * For example, you can pass "A" when parsing an "Author" field.
938 * @exception If the query string can't be parsed, then
939 * Xapian::QueryParserError is thrown. You can get an English
940 * error message to report to the user by catching it and
941 * calling get_msg() on the caught exception. The current
942 * possible values (in case you want to translate them) are:
944 * @li Unknown range operation
945 * @li parse error
946 * @li Syntax: &lt;expression&gt; AND &lt;expression&gt;
947 * @li Syntax: &lt;expression&gt; AND NOT &lt;expression&gt;
948 * @li Syntax: &lt;expression&gt; NOT &lt;expression&gt;
949 * @li Syntax: &lt;expression&gt; OR &lt;expression&gt;
950 * @li Syntax: &lt;expression&gt; XOR &lt;expression&gt;
952 Query parse_query(const std::string &query_string,
953 unsigned flags = FLAG_DEFAULT,
954 const std::string &default_prefix = std::string());
956 /** Add a probabilistic term prefix.
958 * For example:
960 * @code
961 * qp.add_prefix("author", "A");
962 * @endcode
964 * This allows the user to search for author:Orwell which will be
965 * converted to a search for the term "Aorwell".
967 * Multiple fields can be mapped to the same prefix. For example, you
968 * can make title: and subject: aliases for each other.
970 * As of 1.0.4, you can call this method multiple times with the same
971 * value of field to allow a single field to be mapped to multiple
972 * prefixes. Multiple terms being generated for such a field, and
973 * combined with @c Xapian::Query::OP_OR.
975 * If any prefixes are specified for the empty field name (i.e. you
976 * call this method with an empty string as the first parameter)
977 * these prefixes will be used for terms without a field specifier.
978 * If you do this and also specify the @c default_prefix parameter to @c
979 * parse_query(), then the @c default_prefix parameter will override.
981 * If the prefix parameter is empty, then "field:word" will produce the
982 * term "word" (and this can be one of several prefixes for a particular
983 * field, or for terms without a field specifier).
985 * If you call @c add_prefix() and @c add_boolean_prefix() for the
986 * same value of @a field, a @c Xapian::InvalidOperationError exception
987 * will be thrown.
989 * In 1.0.3 and earlier, subsequent calls to this method with the same
990 * value of @a field had no effect.
992 * @param field The user visible field name
993 * @param prefix The term prefix to map this to
995 void add_prefix(const std::string& field, const std::string& prefix);
997 /** Register a FieldProcessor.
999 void add_prefix(const std::string& field, Xapian::FieldProcessor * proc);
1001 /** Add a boolean term prefix allowing the user to restrict a
1002 * search with a boolean filter specified in the free text query.
1004 * For example:
1006 * @code
1007 * qp.add_boolean_prefix("site", "H");
1008 * @endcode
1010 * This allows the user to restrict a search with site:xapian.org which
1011 * will be converted to Hxapian.org combined with any probabilistic
1012 * query with @c Xapian::Query::OP_FILTER.
1014 * If multiple boolean filters are specified in a query for the same
1015 * prefix, they will be combined with the @c Xapian::Query::OP_OR
1016 * operator. Then, if there are boolean filters for different prefixes,
1017 * they will be combined with the @c Xapian::Query::OP_AND operator.
1019 * Multiple fields can be mapped to the same prefix (so for example
1020 * you can make site: and domain: aliases for each other). Instances of
1021 * fields with different aliases but the same prefix will still be
1022 * combined with the OR operator.
1024 * For example, if "site" and "domain" map to "H", but author maps to "A",
1025 * a search for "site:foo domain:bar author:Fred" will map to
1026 * "(Hfoo OR Hbar) AND Afred".
1028 * As of 1.0.4, you can call this method multiple times with the same
1029 * value of field to allow a single field to be mapped to multiple
1030 * prefixes. Multiple terms being generated for such a field, and
1031 * combined with @c Xapian::Query::OP_OR.
1033 * Calling this method with an empty string for @a field will cause
1034 * a @c Xapian::InvalidArgumentError.
1036 * If you call @c add_prefix() and @c add_boolean_prefix() for the
1037 * same value of @a field, a @c Xapian::InvalidOperationError exception
1038 * will be thrown.
1040 * In 1.0.3 and earlier, subsequent calls to this method with the same
1041 * value of @a field had no effect.
1043 * @param field The user visible field name
1044 * @param prefix The term prefix to map this to
1045 * @param grouping Controls how multiple filters are combined - filters
1046 * with the same grouping value are combined with OP_OR,
1047 * then the resulting queries are combined with OP_AND.
1048 * If NULL, then @a field is used for grouping. If an
1049 * empty string, then a unique grouping is created for
1050 * each filter (this is sometimes useful when each
1051 * document can have multiple terms with this prefix).
1052 * [default: NULL]
1054 void add_boolean_prefix(const std::string &field, const std::string &prefix,
1055 const std::string* grouping = NULL);
1057 void add_boolean_prefix(const std::string &field, const std::string &prefix,
1058 bool exclusive) {
1059 if (exclusive) {
1060 add_boolean_prefix(field, prefix);
1061 } else {
1062 std::string empty_grouping;
1063 add_boolean_prefix(field, prefix, &empty_grouping);
1067 /** Register a FieldProcessor for a boolean prefix.
1069 void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1070 const std::string* grouping = NULL);
1072 /** Register a FieldProcessor for a boolean prefix.
1074 void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc,
1075 bool exclusive) {
1076 if (exclusive) {
1077 add_boolean_prefix(field, proc);
1078 } else {
1079 std::string empty_grouping;
1080 add_boolean_prefix(field, proc, &empty_grouping);
1084 /// Iterate over terms omitted from the query as stopwords.
1085 TermIterator stoplist_begin() const;
1086 TermIterator XAPIAN_NOTHROW(stoplist_end() const) {
1087 return TermIterator();
1090 /// Iterate over unstemmed forms of the given (stemmed) term used in the query.
1091 TermIterator unstem_begin(const std::string &term) const;
1092 TermIterator XAPIAN_NOTHROW(unstem_end(const std::string &) const) {
1093 return TermIterator();
1096 /// Register a RangeProcessor.
1097 void add_rangeprocessor(Xapian::RangeProcessor * range_proc,
1098 const std::string* grouping = NULL);
1100 /** Register a ValueRangeProcessor.
1102 * This method is provided for API compatibility with Xapian 1.2.x and is
1103 * deprecated - use @a add_rangeprocessor() with a RangeProcessor instead.
1105 XAPIAN_DEPRECATED(void add_valuerangeprocessor(Xapian::ValueRangeProcessor * vrproc)) {
1106 /// Compatibility shim.
1107 class ShimRangeProcessor : public RangeProcessor {
1108 Xapian::Internal::opt_intrusive_ptr<Xapian::ValueRangeProcessor> vrp;
1110 public:
1111 ShimRangeProcessor(Xapian::ValueRangeProcessor * vrp_)
1112 : RangeProcessor(Xapian::BAD_VALUENO), vrp(vrp_) { }
1114 Xapian::Query
1115 operator()(const std::string &begin, const std::string &end)
1117 std::string b = begin, e = end;
1118 slot = (*vrp)(b, e);
1119 if (slot == Xapian::BAD_VALUENO)
1120 return Xapian::Query(Xapian::Query::OP_INVALID);
1121 return RangeProcessor::operator()(b, e);
1125 add_rangeprocessor((new ShimRangeProcessor(vrproc))->release());
1128 /** Get the spelling-corrected query string.
1130 * This will only be set if FLAG_SPELLING_CORRECTION is specified when
1131 * QueryParser::parse_query() was last called.
1133 * If there were no corrections, an empty string is returned.
1135 std::string get_corrected_query_string() const;
1137 /// Return a string describing this object.
1138 std::string get_description() const;
1141 inline void
1142 QueryParser::set_max_wildcard_expansion(Xapian::termcount max_expansion)
1144 set_max_expansion(max_expansion,
1145 Xapian::Query::WILDCARD_LIMIT_ERROR,
1146 FLAG_WILDCARD);
1149 /// @private @internal Helper for sortable_serialise().
1150 XAPIAN_VISIBILITY_DEFAULT
1151 size_t XAPIAN_NOTHROW(sortable_serialise_(double value, char * buf));
1153 /** Convert a floating point number to a string, preserving sort order.
1155 * This method converts a floating point number to a string, suitable for
1156 * using as a value for numeric range restriction, or for use as a sort
1157 * key.
1159 * The conversion is platform independent.
1161 * The conversion attempts to ensure that, for any pair of values supplied
1162 * to the conversion algorithm, the result of comparing the original
1163 * values (with a numeric comparison operator) will be the same as the
1164 * result of comparing the resulting values (with a string comparison
1165 * operator). On platforms which represent doubles with the precisions
1166 * specified by IEEE_754, this will be the case: if the representation of
1167 * doubles is more precise, it is possible that two very close doubles
1168 * will be mapped to the same string, so will compare equal.
1170 * Note also that both zero and -zero will be converted to the same
1171 * representation: since these compare equal, this satisfies the
1172 * comparison constraint, but it's worth knowing this if you wish to use
1173 * the encoding in some situation where this distinction matters.
1175 * Handling of NaN isn't (currently) guaranteed to be sensible.
1177 * @param value The number to serialise.
1179 inline std::string sortable_serialise(double value) {
1180 char buf[9];
1181 return std::string(buf, sortable_serialise_(value, buf));
1184 /** Convert a string encoded using @a sortable_serialise back to a floating
1185 * point number.
1187 * This expects the input to be a string produced by @a sortable_serialise().
1188 * If the input is not such a string, the value returned is undefined (but
1189 * no error will be thrown).
1191 * The result of the conversion will be exactly the value which was
1192 * supplied to @a sortable_serialise() when making the string on platforms
1193 * which represent doubles with the precisions specified by IEEE_754, but
1194 * may be a different (nearby) value on other platforms.
1196 * @param serialised The serialised string to decode.
1198 XAPIAN_VISIBILITY_DEFAULT
1199 double XAPIAN_NOTHROW(sortable_unserialise(const std::string & serialised));
1203 #endif // XAPIAN_INCLUDED_QUERYPARSER_H