From cca8b4a5cf6f68bc44b26aff00a9a6ccb7925d74 Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Fri, 6 May 2016 15:30:29 +1200 Subject: [PATCH] Add RangeProcessor class to replace ValueRangeProcessor ValueRangeProcessor is now deprecated. RangeProcessor() has an operator()() method returning Xapian::Query, whereas ValueRangeProcessor can only return a value range. Combining of ranges over the same quantity with OP_OR is now handled by an explicit "grouping" parameter, with a sensible default which works for value range queries. Boolean term prefixes and FieldProcessor now support "grouping" too, so ranges and other filters can now be grouped together. Fixes #663. --- xapian-applications/omega/query.cc | 11 +- xapian-bindings/csharp/Makefile.am | 3 + xapian-bindings/java/Makefile.am | 3 + xapian-bindings/perl/perl.i | 1 + xapian-bindings/xapian-headers.i | 5 + xapian-core/api/query.cc | 4 + xapian-core/api/queryinternal.cc | 26 ++ xapian-core/api/queryinternal.h | 38 ++- xapian-core/api/valuerangeproc.cc | 190 +++++++++++++- xapian-core/docs/deprecation.rst | 21 ++ xapian-core/include/xapian/query.h | 7 + xapian-core/include/xapian/queryparser.h | 322 +++++++++++++++++++++-- xapian-core/queryparser/queryparser.cc | 34 +-- xapian-core/queryparser/queryparser.lemony | 208 ++++++++------- xapian-core/queryparser/queryparser_internal.h | 37 ++- xapian-core/tests/api_none.cc | 126 +++++++-- xapian-core/tests/generate-api_generated | 7 + xapian-core/tests/queryparsertest.cc | 349 ++++++++++++++++++++++++- 18 files changed, 1212 insertions(+), 180 deletions(-) diff --git a/xapian-applications/omega/query.cc b/xapian-applications/omega/query.cc index f0042a70b..84d4c5fba 100644 --- a/xapian-applications/omega/query.cc +++ b/xapian-applications/omega/query.cc @@ -112,7 +112,7 @@ static Xapian::Query query; Xapian::Query::op default_op = Xapian::Query::OP_AND; // default matching mode static Xapian::QueryParser qp; -static Xapian::NumberValueRangeProcessor * size_vrp = NULL; +static Xapian::NumberRangeProcessor * size_rp = NULL; static Xapian::Stem *stemmer = NULL; static string eval_file(const string &fmtfile); @@ -319,11 +319,10 @@ set_probabilistic(const string &oldp) qp.set_stopper(new MyStopper()); qp.set_default_op(default_op); qp.set_database(db); - // FIXME: provide a custom VRP which handles size:10..20K, etc. - if (!size_vrp) - size_vrp = new Xapian::NumberValueRangeProcessor(VALUE_SIZE, "size:", - true); - qp.add_valuerangeprocessor(size_vrp); + // FIXME: provide a custom RP which handles size:10..20K, etc. + if (!size_rp) + size_rp = new Xapian::NumberRangeProcessor(VALUE_SIZE, "size:"); + qp.add_rangeprocessor(size_rp); map::const_iterator pfx = option.lower_bound("prefix,"); for (; pfx != option.end() && startswith(pfx->first, "prefix,"); ++pfx) { string user_prefix(pfx->first, 7); diff --git a/xapian-bindings/csharp/Makefile.am b/xapian-bindings/csharp/Makefile.am index 8ac3d1dd2..bac12a85c 100644 --- a/xapian-bindings/csharp/Makefile.am +++ b/xapian-bindings/csharp/Makefile.am @@ -16,6 +16,7 @@ XAPIAN_SWIG_CS_SRCS=\ generated-csharp/Chert.cs \ generated-csharp/Compactor.cs \ generated-csharp/Database.cs \ + generated-csharp/DateRangeProcessor.cs \ generated-csharp/DateValueRangeProcessor.cs \ generated-csharp/DecreasingValueWeightPostingSource.cs \ generated-csharp/DLHWeight.cs \ @@ -47,6 +48,7 @@ XAPIAN_SWIG_CS_SRCS=\ generated-csharp/MSet.cs \ generated-csharp/MSetIterator.cs \ generated-csharp/MultiValueKeyMaker.cs \ + generated-csharp/NumberRangeProcessor.cs \ generated-csharp/NumberValueRangeProcessor.cs \ generated-csharp/PL2Weight.cs \ generated-csharp/PositionIterator.cs \ @@ -57,6 +59,7 @@ XAPIAN_SWIG_CS_SRCS=\ generated-csharp/Remote.cs \ generated-csharp/RSet.cs \ generated-csharp/SWIGTYPE_p_std__string.cs \ + generated-csharp/RangeProcessor.cs \ generated-csharp/Registry.cs \ generated-csharp/SimpleStopper.cs \ generated-csharp/Stem.cs \ diff --git a/xapian-bindings/java/Makefile.am b/xapian-bindings/java/Makefile.am index 292d30852..250b9d2ef 100644 --- a/xapian-bindings/java/Makefile.am +++ b/xapian-bindings/java/Makefile.am @@ -31,6 +31,7 @@ XAPIAN_SWIG_JAVA_SRCS=\ org/xapian/Chert.java\ org/xapian/Compactor.java\ org/xapian/Database.java\ + org/xapian/DateRangeProcessor.java\ org/xapian/DateValueRangeProcessor.java\ org/xapian/DecreasingValueWeightPostingSource.java\ org/xapian/DLHWeight.java\ @@ -62,6 +63,7 @@ XAPIAN_SWIG_JAVA_SRCS=\ org/xapian/MSet.java\ org/xapian/MSetIterator.java\ org/xapian/MultiValueKeyMaker.java\ + org/xapian/NumberRangeProcessor.java\ org/xapian/NumberValueRangeProcessor.java\ org/xapian/PL2Weight.java\ org/xapian/PositionIterator.java\ @@ -69,6 +71,7 @@ XAPIAN_SWIG_JAVA_SRCS=\ org/xapian/PostingSource.java\ org/xapian/Query.java\ org/xapian/QueryParser.java\ + org/xapian/RangeProcessor.java\ org/xapian/Registry.java\ org/xapian/Remote.java\ org/xapian/RSet.java\ diff --git a/xapian-bindings/perl/perl.i b/xapian-bindings/perl/perl.i index 76179cf5f..27d166086 100644 --- a/xapian-bindings/perl/perl.i +++ b/xapian-bindings/perl/perl.i @@ -49,6 +49,7 @@ %constant int OP_MAX = Xapian::Query::OP_MAX; %constant int OP_WILDCARD = Xapian::Query::OP_WILDCARD; %constant int OP_VALUE_LE = Xapian::Query::OP_VALUE_LE; +%constant int OP_INVALID = Xapian::Query::OP_INVALID; %constant int FLAG_BOOLEAN = Xapian::QueryParser::FLAG_BOOLEAN; %constant int FLAG_PHRASE = Xapian::QueryParser::FLAG_PHRASE; %constant int FLAG_LOVEHATE = Xapian::QueryParser::FLAG_LOVEHATE; diff --git a/xapian-bindings/xapian-headers.i b/xapian-bindings/xapian-headers.i index 56fbeb468..a62070105 100644 --- a/xapian-bindings/xapian-headers.i +++ b/xapian-bindings/xapian-headers.i @@ -346,13 +346,18 @@ SUBCLASSABLE(Xapian, KeyMaker) SUBCLASSABLE(Xapian, FieldProcessor) // Suppress warning that Xapian::Internal::opt_intrusive_base is unknown. %warnfilter(SWIGWARN_TYPE_UNDEFINED_CLASS) Xapian::Stopper; +SUBCLASSABLE(Xapian, RangeProcessor) SUBCLASSABLE(Xapian, Stopper) SUBCLASSABLE(Xapian, ValueRangeProcessor) // Suppress warning that Xapian::Internal::opt_intrusive_base is unknown. +%warnfilter(SWIGWARN_TYPE_UNDEFINED_CLASS) Xapian::RangeProcessor; %warnfilter(SWIGWARN_TYPE_UNDEFINED_CLASS) Xapian::ValueRangeProcessor; %warnfilter(SWIGWARN_TYPE_UNDEFINED_CLASS) Xapian::FieldProcessor; STANDARD_IGNORES(Xapian, QueryParser) %ignore Xapian::QueryParser::QueryParser(const QueryParser &); +CONSTANT(int, Xapian, RP_SUFFIX); +CONSTANT(int, Xapian, RP_REPEATED); +CONSTANT(int, Xapian, RP_DATE_PREFER_MDY); %include %include diff --git a/xapian-core/api/query.cc b/xapian-core/api/query.cc index f22bc2e10..221d279e2 100644 --- a/xapian-core/api/query.cc +++ b/xapian-core/api/query.cc @@ -271,6 +271,10 @@ Query::init(op op_, size_t n_subqueries, Xapian::termcount parameter) internal = new Xapian::Internal::QueryMax(n_subqueries); break; default: + if (op_ == OP_INVALID && n_subqueries == 0) { + internal = new Xapian::Internal::QueryInvalid(); + break; + } throw InvalidArgumentError("op not valid with a list of subqueries"); } } diff --git a/xapian-core/api/queryinternal.cc b/xapian-core/api/queryinternal.cc index 33fca96f4..4247eb884 100644 --- a/xapian-core/api/queryinternal.cc +++ b/xapian-core/api/queryinternal.cc @@ -542,6 +542,8 @@ Query::Internal::unserialise(const char ** p, const char * end, // 000ttttt where: // ttttt -> encodes which OP_XXX switch (ch & 0x1f) { + case 0x00: // OP_INVALID + return new Xapian::Internal::QueryInvalid(); case 0x0b: { // Wildcard if (*p == end) throw SerialisationError("not enough data"); @@ -1819,5 +1821,29 @@ QueryMax::get_description() const return get_description_helper(" MAX "); } +Xapian::Query::op +QueryInvalid::get_type() const XAPIAN_NOEXCEPT +{ + return Xapian::Query::OP_INVALID; +} + +PostingIterator::Internal * +QueryInvalid::postlist(QueryOptimiser *, double) const +{ + throw Xapian::InvalidOperationError("Query is invalid"); +} + +void +QueryInvalid::serialise(std::string & result) const +{ + result += static_cast(0x00); +} + +string +QueryInvalid::get_description() const +{ + return ""; +} + } } diff --git a/xapian-core/api/queryinternal.h b/xapian-core/api/queryinternal.h index c9738e9b9..1e62de71f 100644 --- a/xapian-core/api/queryinternal.h +++ b/xapian-core/api/queryinternal.h @@ -104,16 +104,25 @@ class QueryScaleWeight : public Query::Internal { void gather_terms(void * void_terms) const; }; -class QueryValueRange : public Query::Internal { +class QueryValueBase : public Query::Internal { + protected: Xapian::valueno slot; + public: + QueryValueBase(Xapian::valueno slot_) + : slot(slot_) { } + + Xapian::valueno get_slot() const { return slot; } +}; + +class QueryValueRange : public QueryValueBase { std::string begin, end; public: QueryValueRange(Xapian::valueno slot_, const std::string &begin_, const std::string &end_) - : slot(slot_), begin(begin_), end(end_) { } + : QueryValueBase(slot_), begin(begin_), end(end_) { } PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const; @@ -124,14 +133,12 @@ class QueryValueRange : public Query::Internal { std::string get_description() const; }; -class QueryValueLE : public Query::Internal { - Xapian::valueno slot; - +class QueryValueLE : public QueryValueBase { std::string limit; public: QueryValueLE(Xapian::valueno slot_, const std::string &limit_) - : slot(slot_), limit(limit_) { } + : QueryValueBase(slot_), limit(limit_) { } PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const; @@ -142,14 +149,12 @@ class QueryValueLE : public Query::Internal { std::string get_description() const; }; -class QueryValueGE : public Query::Internal { - Xapian::valueno slot; - +class QueryValueGE : public QueryValueBase { std::string limit; public: QueryValueGE(Xapian::valueno slot_, const std::string &limit_) - : slot(slot_), limit(limit_) { } + : QueryValueBase(slot_), limit(limit_) { } PostingIterator::Internal * postlist(QueryOptimiser *qopt, double factor) const; @@ -420,6 +425,19 @@ class QueryWildcard : public Query::Internal { std::string get_description() const; }; +class QueryInvalid : public Query::Internal { + public: + QueryInvalid() { } + + Xapian::Query::op get_type() const XAPIAN_NOEXCEPT XAPIAN_PURE_FUNCTION; + + PostingIterator::Internal * postlist(QueryOptimiser * qopt, double factor) const; + + void serialise(std::string & result) const; + + std::string get_description() const; +}; + } } diff --git a/xapian-core/api/valuerangeproc.cc b/xapian-core/api/valuerangeproc.cc index 86e803820..d1e9a69b8 100644 --- a/xapian-core/api/valuerangeproc.cc +++ b/xapian-core/api/valuerangeproc.cc @@ -1,7 +1,7 @@ /** @file valuerangeproc.cc * @brief Standard ValueRangeProcessor subclass implementations */ -/* Copyright (C) 2007,2008,2009,2010,2012 Olly Betts +/* Copyright (C) 2007,2008,2009,2010,2012,2016 Olly Betts * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -251,4 +251,192 @@ NumberValueRangeProcessor::operator()(string &begin, string &end) return valno; } +Xapian::Query +RangeProcessor::check_range(const string& b, const string& e) +{ + if (str.empty()) + return operator()(b, e); + + size_t off_b = 0, len_b = string::npos; + size_t off_e = 0, len_e = string::npos; + + bool prefix = !(flags & Xapian::RP_SUFFIX); + bool repeated = (flags & Xapian::RP_REPEATED); + + if (prefix) { + // If there's a prefix, require it on the start of the range. + if (!startswith(b, str)) { + // Prefix not given. + goto not_our_range; + } + off_b = str.size(); + // Optionally allow it on the end of the range, e.g. $10..50 + if (repeated && startswith(e, str)) { + off_e = off_b; + } + } else { + // If there's a suffix, require it on the end of the range. + if (!endswith(e, str)) { + // Suffix not given. + goto not_our_range; + } + len_e = e.size() - str.size(); + // Optionally allow it on the start of the range, e.g. 10..50kg + if (repeated && endswith(b, str)) { + len_b = b.size() - str.size(); + } + } + + return operator()(string(b, off_b, len_b), string(e, off_e, len_e)); + +not_our_range: + return Xapian::Query(Xapian::Query::OP_INVALID); +} + +Xapian::Query +RangeProcessor::operator()(const string& b, const string& e) +{ + if (e.empty()) + return Xapian::Query(Xapian::Query::OP_VALUE_GE, slot, b); + return Xapian::Query(Xapian::Query::OP_VALUE_RANGE, slot, b, e); +} + +Xapian::Query +DateRangeProcessor::operator()(const string& b, const string& e) +{ + if ((b.size() == 8 || b.size() == 0) && + (e.size() == 8 || e.size() == 0) && + b.find_first_not_of("0123456789") == string::npos && + e.find_first_not_of("0123456789") == string::npos) { + // YYYYMMDD + return RangeProcessor::operator()(b, e); + } + if ((b.size() == 10 || b.size() == 0) && + (e.size() == 10 || e.size() == 0)) { + if ((b.empty() || is_yyyy_mm_dd(b)) && + (e.empty() || is_yyyy_mm_dd(e))) { + string begin = b, end = e; + // YYYY-MM-DD + if (!begin.empty()) { + begin.erase(7, 1); + begin.erase(4, 1); + } + if (!end.empty()) { + end.erase(7, 1); + end.erase(4, 1); + } + return RangeProcessor::operator()(begin, end); + } + } + + bool prefer_mdy = (flags & Xapian::RP_DATE_PREFER_MDY); + int b_d, b_m, b_y; + int e_d, e_m, e_y; + if (!decode_xxy(b, b_d, b_m, b_y) || !decode_xxy(e, e_d, e_m, e_y)) + goto not_our_range; + + // Check that the month and day are within range. Also assume "start" <= + // "e" to help decide ambiguous cases. + if (!prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) && + (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) { + // OK. + } else if (vet_dm(b_m, b_d) && vet_dm(e_m, e_d) && + (b_y != e_y || b_d < e_d || (b_d == e_d && b_m <= e_m))) { + swap(b_m, b_d); + swap(e_m, e_d); + } else if (prefer_mdy && vet_dm(b_d, b_m) && vet_dm(e_d, e_m) && + (b_y != e_y || b_m < e_m || (b_m == e_m && b_d <= e_d))) { + // OK. + } else { + goto not_our_range; + } + + if (b_y < 100) { + b_y += 1900; + if (b_y < epoch_year) b_y += 100; + } + if (e_y < 100) { + e_y += 1900; + if (e_y < epoch_year) e_y += 100; + } + + { +#ifdef SNPRINTF + char buf_b[9], buf_e[9]; + if (!b.empty()) { + SNPRINTF(buf_b, sizeof(buf_b), "%08d", b_y * 10000 + b_m * 100 + b_d); + } else { + *buf_b = '\0'; + } + if (!e.empty()) { + SNPRINTF(buf_e, sizeof(buf_e), "%08d", e_y * 10000 + e_m * 100 + e_d); + } else { + *buf_e = '\0'; + } +#else + char buf_b[100], buf_e[100]; + buf_b[sizeof(buf_b) - 1] = '\0'; + buf_e[sizeof(buf_e) - 1] = '\0'; + if (!b.empty()) { + sprintf(buf_b, "%08d", b_y * 10000 + b_m * 100 + b_d); + if (buf_b[sizeof(buf_b) - 1]) abort(); // Buffer overrun! + } else { + *buf_b = '\0'; + } + if (!e.empty()) { + sprintf(buf_e, "%08d", e_y * 10000 + e_m * 100 + e_d); + if (buf_e[sizeof(buf_e) - 1]) abort(); // Buffer overrun! + } else { + *buf_e = '\0'; + } +#endif + return RangeProcessor::operator()(buf_b, buf_e); + } + +not_our_range: + return Xapian::Query(Xapian::Query::OP_INVALID); +} + +Xapian::Query +NumberRangeProcessor::operator()(const string& b, const string& e) +{ + // Parse the numbers to floating point. + double num_b, num_e; + + if (!b.empty()) { + errno = 0; + const char * startptr = b.c_str(); + char * endptr; + num_b = strtod(startptr, &endptr); + if (endptr != startptr + b.size() || errno) { + // Invalid characters in string || overflow or underflow. + goto not_our_range; + } + } else { + // Silence GCC warning. + num_b = 0.0; + } + + if (!e.empty()) { + errno = 0; + const char * startptr = e.c_str(); + char * endptr; + num_e = strtod(startptr, &endptr); + if (endptr != startptr + e.size() || errno) { + // Invalid characters in string || overflow or underflow. + goto not_our_range; + } + } else { + // Silence GCC warning. + num_e = 0.0; + } + + return RangeProcessor::operator()( + b.empty() ? b : Xapian::sortable_serialise(num_b), + e.empty() ? e : Xapian::sortable_serialise(num_e)); + +not_our_range: + return Xapian::Query(Xapian::Query::OP_INVALID); +} + } diff --git a/xapian-core/docs/deprecation.rst b/xapian-core/docs/deprecation.rst index 89c4845eb..bc7001831 100644 --- a/xapian-core/docs/deprecation.rst +++ b/xapian-core/docs/deprecation.rst @@ -200,6 +200,11 @@ Native C++ API .. |set_max_wildcard_expansion| replace:: ``Xapian::QueryParser::set_max_wildcard_expansion()`` .. |flush| replace:: ``Xapian::WritableDatabase::flush()`` +.. |VRP| replace:: ``Xapian::ValueRangeProcessor`` +.. |DateVRP| replace:: ``Xapian::DateValueRangeProcessor`` +.. |NumberVRP| replace:: ``Xapian::NumberValueRangeProcessor`` +.. |StringVRP| replace:: ``Xapian::StringValueRangeProcessor`` +.. |add_valuerangeprocessor| replace:: ``Xapian::QueryParser::add_valuerangeprocessor()`` .. Keep table width to <= 126 columns. @@ -238,6 +243,22 @@ Deprecated Remove Feature name Upgrade suggestion and com ---------- ------ ----------------------------------- ------------------------------------------------------------------------ 1.3.6 1.5.0 |flush| Use ``Xapian::WritableDatabase::commit()`` instead (available since 1.1.0). +---------- ------ ----------------------------------- ------------------------------------------------------------------------ +1.3.6 1.5.0 Subclassing |VRP| Subclass ``Xapian::RangeProcessor`` instead, and return a + ``Xapian::Query`` from ``operator()()``. +---------- ------ ----------------------------------- ------------------------------------------------------------------------ +1.3.6 1.5.0 Subclassing |DateVRP| Subclass ``Xapian::DateRangeProcessor`` instead, and return a + ``Xapian::Query`` from ``operator()()``. +---------- ------ ----------------------------------- ------------------------------------------------------------------------ +1.3.6 1.5.0 Subclassing |NumberVRP| Subclass ``Xapian::NumberRangeProcessor`` instead, and return a + ``Xapian::Query`` from ``operator()()``. +---------- ------ ----------------------------------- ------------------------------------------------------------------------ +1.3.6 1.5.0 Subclassing |StringVRP| Subclass ``Xapian::RangeProcessor`` instead (which includes equivalent + support for prefix/suffix checking), and return a ``Xapian::Query`` from + ``operator()()``. +---------- ------ ----------------------------------- ------------------------------------------------------------------------ +1.3.6 1.5.0 |add_valuerangeprocessor| Use ``Xapian::QueryParser::add_rangeprocessor()`` instead, with a + ``Xapian::RangeProcessor`` object instead of a |VRP| object. ========== ====== =================================== ======================================================================== Bindings diff --git a/xapian-core/include/xapian/query.h b/xapian-core/include/xapian/query.h index f1efbc2d4..8c5c54318 100644 --- a/xapian-core/include/xapian/query.h +++ b/xapian-core/include/xapian/query.h @@ -111,6 +111,8 @@ class XAPIAN_VISIBILITY_DEFAULT Query { OP_MAX = 14, OP_WILDCARD = 15, + OP_INVALID = 99, + LEAF_TERM = 100, LEAF_POSTING_SOURCE, LEAF_MATCH_ALL, @@ -310,6 +312,11 @@ class XAPIAN_VISIBILITY_DEFAULT Query { /** @private @internal */ explicit Query(Internal * internal_) : internal(internal_) { } + explicit Query(Query::op op_) { + init(op_, 0); + if (op_ != Query::OP_INVALID) done(); + } + private: void init(Query::op op_, size_t n_subqueries, Xapian::termcount window = 0); diff --git a/xapian-core/include/xapian/queryparser.h b/xapian-core/include/xapian/queryparser.h index 08567bcef..3b272d03d 100644 --- a/xapian-core/include/xapian/queryparser.h +++ b/xapian-core/include/xapian/queryparser.h @@ -107,6 +107,236 @@ class XAPIAN_VISIBILITY_DEFAULT SimpleStopper : public Stopper { virtual std::string get_description() const; }; +enum { + RP_SUFFIX = 1, + RP_REPEATED = 2, + RP_DATE_PREFER_MDY = 4 +}; + +/// Base class for range processors. +class XAPIAN_VISIBILITY_DEFAULT RangeProcessor + : public Xapian::Internal::opt_intrusive_base { + /// Don't allow assignment. + void operator=(const RangeProcessor &); + + /// Don't allow copying. + RangeProcessor(const RangeProcessor &); + + protected: + Xapian::valueno slot; + + std::string str; + + unsigned flags; + + public: + /** Default constructor. */ + RangeProcessor() : slot(Xapian::BAD_VALUENO), flags(0) { } + + /** Constructor. + * + * @param slot_ Which value slot to generate ranges over. + * @param str_ A string to look for to recognise values as belonging + * to this range (as a prefix by default, or as a suffix + * if flags Xapian::RP_SUFFIX is specified). + * @param flags_ Zero or more of the following flags, combined with + * bitwise-or: + * * Xapian::RP_SUFFIX - require @a str_ as a suffix + * instead of a prefix. + * * Xapian::RP_REPEATED - optionally allow @a str_ + * on both ends of the range - e.g. $1..$10 or + * 5m..50m. By default a prefix is only checked for on + * the start (e.g. date:1/1/1980..31/12/1989), and a + * suffix only on the end (e.g. 2..12kg). + */ + RangeProcessor(Xapian::valueno slot_, + const std::string& str_ = std::string(), + unsigned flags_ = 0) + : slot(slot_), str(str_), flags(flags_) { } + + /// Destructor. + virtual ~RangeProcessor(); + + Xapian::Query check_range(const std::string& b, const std::string& e); + + /** Check for a valid range of this type. + * + * @param begin The start of the range as specified in the query string + * by the user. + * @param end The end of the range as specified in the query string + * by the user (empty string for no upper limit). + * + * @return An OP_VALUE_RANGE Query object (or if end.empty(), an + * OP_VALUE_GE Query object). + */ + virtual Xapian::Query + operator()(const std::string &begin, const std::string &end); + + RangeProcessor * release() { + opt_intrusive_base::release(); + return this; + } + + const RangeProcessor * release() const { + opt_intrusive_base::release(); + return this; + } +}; + +/** Handle a date range. + * + * Begin and end must be dates in a recognised format. + */ +class XAPIAN_VISIBILITY_DEFAULT DateRangeProcessor : public RangeProcessor { + int epoch_year; + + public: + /** Constructor. + * + * @param slot_ The value number to return from operator(). + * + * @param flags_ Zero or more of the following flags, combined with + * bitwise-or: + * * Xapian::RP_DATE_PREFER_MDY - interpret ambiguous + * dates as month/day/year rather than day/month/year. + * + * @param epoch_year_ Year to use as the epoch for dates with 2 digit + * years (default: 1970, so 1/1/69 is 2069 while + * 1/1/70 is 1970). + */ + explicit DateRangeProcessor(Xapian::valueno slot_, + unsigned flags_ = 0, + int epoch_year_ = 1970) + : RangeProcessor(slot_, std::string(), flags_), + epoch_year(epoch_year_) { } + + /** Constructor. + * + * @param slot_ The value slot number to query. + * + * @param str_ A string to look for to recognise values as belonging + * to this date range. + * + * @param flags_ Zero or more of the following flags, combined with + * bitwise-or: + * * Xapian::RP_SUFFIX - require @a str_ as a suffix + * instead of a prefix. + * * Xapian::RP_REPEATED - optionally allow @a str_ + * on both ends of the range - e.g. $1..$10 or + * 5m..50m. By default a prefix is only checked for on + * the start (e.g. date:1/1/1980..31/12/1989), and a + * suffix only on the end (e.g. 2..12kg). + * * Xapian::RP_DATE_PREFER_MDY - interpret ambiguous + * dates as month/day/year rather than day/month/year. + * + * @param epoch_year_ Year to use as the epoch for dates with 2 digit + * years (default: 1970, so 1/1/69 is 2069 while + * 1/1/70 is 1970). + * + * The string supplied in str_ is used by @a operator() to decide whether + * the pair of strings supplied to it constitute a valid range. If + * prefix_ is true, the first value in a range must begin with str_ (and + * the second value may optionally begin with str_); + * if prefix_ is false, the second value in a range must end with str_ + * (and the first value may optionally end with str_). + * + * If str_ is empty, the Xapian::RP_SUFFIX and Xapian::RP_REPEATED are + * irrelevant, and no special strings are required at the start or end of + * the strings defining the range. + * + * The remainder of both strings defining the endpoints must be valid + * dates. + * + * For example, if str_ is "created:", Xapian::RP_SUFFIX is not specified, + * and the range processor has been added to the queryparser, the + * queryparser will accept "created:1/1/2000..31/12/2001". + */ + DateRangeProcessor(Xapian::valueno slot_, const std::string &str_, + unsigned flags_ = 0, int epoch_year_ = 1970) + : RangeProcessor(slot_, str_, flags_), + epoch_year(epoch_year_) { } + + /** Check for a valid date range. + * + * If any specified prefix is present, and the range looks like a + * date range, the dates are converted to the format YYYYMMDD and + * combined into a value range query. + * + * @param begin The start of the range as specified in the query string + * by the user. + * @param end The end of the range as specified in the query string + * by the user. + */ + Xapian::Query operator()(const std::string& begin, const std::string& end); +}; + +/** Handle a number range. + * + * This class must be used on values which have been encoded using + * Xapian::sortable_serialise() which turns numbers into strings which + * will sort in the same order as the numbers (the same values can be + * used to implement a numeric sort). + */ +class XAPIAN_VISIBILITY_DEFAULT NumberRangeProcessor : public RangeProcessor { + public: + /** Constructor. + * + * @param slot_ The value slot number to query. + * + * @param str_ A string to look for to recognise values as belonging + * to this numeric range. + * + * @param flags_ Zero or more of the following flags, combined with + * bitwise-or: + * * Xapian::RP_SUFFIX - require @a str_ as a suffix + * instead of a prefix. + * * Xapian::RP_REPEATED - optionally allow @a str_ + * on both ends of the range - e.g. $1..$10 or + * 5m..50m. By default a prefix is only checked for on + * the start (e.g. date:1/1/1980..31/12/1989), and a + * suffix only on the end (e.g. 2..12kg). + * + * The string supplied in str_ is used by @a operator() to decide whether + * the pair of strings supplied to it constitute a valid range. If + * prefix_ is true, the first value in a range must begin with str_ (and + * the second value may optionally begin with str_); + * if prefix_ is false, the second value in a range must end with str_ + * (and the first value may optionally end with str_). + * + * If str_ is empty, the setting of prefix_ is irrelevant, and no special + * strings are required at the start or end of the strings defining the + * range. + * + * The remainder of both strings defining the endpoints must be valid + * floating point numbers. (FIXME: define format recognised). + * + * For example, if str_ is "$" and prefix_ is true, and the range + * processor has been added to the queryparser, the queryparser will + * accept "$10..50" or "$10..$50", but not "10..50" or "10..$50" as valid + * ranges. If str_ is "kg" and prefix_ is false, the queryparser will + * accept "10..50kg" or "10kg..50kg", but not "10..50" or "10kg..50" as + * valid ranges. + */ + NumberRangeProcessor(Xapian::valueno slot_, + const std::string &str_ = std::string(), + unsigned flags_ = 0) + : RangeProcessor(slot_, str_, flags_) { } + + /** Check for a valid numeric range. + * + * If BEGIN..END is a valid numeric range with the specified prefix/suffix + * (if one was specified), the prefix/suffix is removed, the string + * converted to a number, and encoded with Xapian::sortable_serialise(), + * and a value range query is built. + * + * @param begin The start of the range as specified in the query string + * by the user. + * @param end The end of the range as specified in the query string + * by the user. + */ + Xapian::Query operator()(const std::string& begin, const std::string& end); +}; + /// Base class for value range processors. class XAPIAN_VISIBILITY_DEFAULT ValueRangeProcessor : public Xapian::Internal::opt_intrusive_base { @@ -153,13 +383,15 @@ class XAPIAN_VISIBILITY_DEFAULT ValueRangeProcessor /** Handle a string range. * * The end points can be any strings. + * + * @deprecated Use Xapian::RangeProcessor instead (added in 1.3.6). */ -class XAPIAN_VISIBILITY_DEFAULT StringValueRangeProcessor : public ValueRangeProcessor { +class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT StringValueRangeProcessor : public ValueRangeProcessor { protected: Xapian::valueno valno; - private: bool prefix; + std::string str; public: @@ -202,8 +434,10 @@ class XAPIAN_VISIBILITY_DEFAULT StringValueRangeProcessor : public ValueRangePro /** Handle a date range. * * Begin and end must be dates in a recognised format. + * + * @deprecated Use Xapian::DateRangeProcessor instead (added in 1.3.6). */ -class XAPIAN_VISIBILITY_DEFAULT DateValueRangeProcessor : public StringValueRangeProcessor { +class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT DateValueRangeProcessor : public StringValueRangeProcessor { bool prefer_mdy; int epoch_year; @@ -342,8 +576,10 @@ class XAPIAN_VISIBILITY_DEFAULT DateValueRangeProcessor : public StringValueRang * Xapian::sortable_serialise() which turns numbers into strings which * will sort in the same order as the numbers (the same values can be * used to implement a numeric sort). + * + * @deprecated Use Xapian::NumberRangeProcessor instead (added in 1.3.6). */ -class XAPIAN_VISIBILITY_DEFAULT NumberValueRangeProcessor : public StringValueRangeProcessor { +class XAPIAN_DEPRECATED_CLASS_EX XAPIAN_VISIBILITY_DEFAULT NumberValueRangeProcessor : public StringValueRangeProcessor { public: /** Constructor. * @@ -756,11 +992,11 @@ class XAPIAN_VISIBILITY_DEFAULT QueryParser { * @param field The user visible field name * @param prefix The term prefix to map this to */ - void add_prefix(const std::string &field, const std::string &prefix); + void add_prefix(const std::string& field, const std::string& prefix); /** Register a FieldProcessor. */ - void add_prefix(const std::string &field, Xapian::FieldProcessor * proc); + void add_prefix(const std::string& field, Xapian::FieldProcessor * proc); /** Add a boolean term prefix allowing the user to restrict a * search with a boolean filter specified in the free text query. @@ -806,21 +1042,44 @@ class XAPIAN_VISIBILITY_DEFAULT QueryParser { * * @param field The user visible field name * @param prefix The term prefix to map this to - * @param exclusive If true, each document can have at most one term with - * this prefix, so multiple filters with this prefix - * should be combined with OP_OR. If false, each - * document can have multiple terms with this prefix, so - * multiple filters should be combined with OP_AND, like - * happens with filters with different prefixes. - * [default: true] + * @param grouping Controls how multiple filters are combined - filters + * with the same grouping value are combined with OP_OR, + * then the resulting queries are combined with OP_AND. + * If NULL, then @a field is used for grouping. If an + * empty string, then a unique grouping is created for + * each filter (this is sometimes useful when each + * document can have multiple terms with this prefix). + * [default: NULL] */ void add_boolean_prefix(const std::string &field, const std::string &prefix, - bool exclusive = true); + const std::string* grouping = NULL); + + void add_boolean_prefix(const std::string &field, const std::string &prefix, + bool exclusive) { + if (exclusive) { + add_boolean_prefix(field, prefix); + } else { + std::string empty_grouping; + add_boolean_prefix(field, prefix, &empty_grouping); + } + } /** Register a FieldProcessor for a boolean prefix. */ void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc, - bool exclusive = true); + const std::string* grouping = NULL); + + /** Register a FieldProcessor for a boolean prefix. + */ + void add_boolean_prefix(const std::string &field, Xapian::FieldProcessor *proc, + bool exclusive) { + if (exclusive) { + add_boolean_prefix(field, proc); + } else { + std::string empty_grouping; + add_boolean_prefix(field, proc, &empty_grouping); + } + } /// Iterate over terms omitted from the query as stopwords. TermIterator stoplist_begin() const; @@ -834,8 +1093,37 @@ class XAPIAN_VISIBILITY_DEFAULT QueryParser { return TermIterator(); } - /// Register a ValueRangeProcessor. - void add_valuerangeprocessor(Xapian::ValueRangeProcessor * vrproc); + /// Register a RangeProcessor. + void add_rangeprocessor(Xapian::RangeProcessor * range_proc, + const std::string* grouping = NULL); + + /** Register a ValueRangeProcessor. + * + * This method is provided for API compatibility with Xapian 1.2.x and is + * deprecated - use @a add_rangeprocessor() with a RangeProcessor instead. + */ + XAPIAN_DEPRECATED(void add_valuerangeprocessor(Xapian::ValueRangeProcessor * vrproc)) { + /// Compatibility shim. + class ShimRangeProcessor : public RangeProcessor { + Xapian::Internal::opt_intrusive_ptr vrp; + + public: + ShimRangeProcessor(Xapian::ValueRangeProcessor * vrp_) + : RangeProcessor(Xapian::BAD_VALUENO), vrp(vrp_) { } + + Xapian::Query + operator()(const std::string &begin, const std::string &end) + { + std::string b = begin, e = end; + slot = (*vrp)(b, e); + if (slot == Xapian::BAD_VALUENO) + return Xapian::Query(Xapian::Query::OP_INVALID); + return RangeProcessor::operator()(b, e); + } + }; + + add_rangeprocessor((new ShimRangeProcessor(vrproc))->release()); + } /** Get the spelling-corrected query string. * diff --git a/xapian-core/queryparser/queryparser.cc b/xapian-core/queryparser/queryparser.cc index be830026d..e40ef764f 100644 --- a/xapian-core/queryparser/queryparser.cc +++ b/xapian-core/queryparser/queryparser.cc @@ -1,7 +1,7 @@ /* queryparser.cc: The non-lemon-generated parts of the QueryParser * class. * - * Copyright (C) 2005,2006,2007,2008,2010,2011,2012,2013,2015 Olly Betts + * Copyright (C) 2005,2006,2007,2008,2010,2011,2012,2013,2015,2016 Olly Betts * Copyright (C) 2010 Adam Sjøgren * * This program is free software; you can redistribute it and/or @@ -56,6 +56,8 @@ SimpleStopper::get_description() const return desc; } +RangeProcessor::~RangeProcessor() { } + ValueRangeProcessor::~ValueRangeProcessor() { } FieldProcessor::~FieldProcessor() { } @@ -174,40 +176,31 @@ void QueryParser::add_prefix(const string &field, const string &prefix) { Assert(internal.get()); - internal->add_prefix(field, prefix, NON_BOOLEAN); + internal->add_prefix(field, prefix); } void QueryParser::add_prefix(const string &field, Xapian::FieldProcessor * proc) { Assert(internal.get()); - internal->add_prefix(field, proc, NON_BOOLEAN); + internal->add_prefix(field, proc); } void QueryParser::add_boolean_prefix(const string &field, const string &prefix, - bool exclusive) + const string* grouping) { Assert(internal.get()); - // Don't allow the empty prefix to be set as boolean as it doesn't - // really make sense. - if (field.empty()) - throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter"); - filter_type type = (exclusive ? BOOLEAN_EXCLUSIVE : BOOLEAN); - internal->add_prefix(field, prefix, type); + internal->add_boolean_prefix(field, prefix, grouping); } void -QueryParser::add_boolean_prefix(const string &field, Xapian::FieldProcessor * proc, - bool exclusive) +QueryParser::add_boolean_prefix(const string &field, + Xapian::FieldProcessor * proc, + const string* grouping) { Assert(internal.get()); - // Don't allow the empty prefix to be set as boolean as it doesn't - // really make sense. - if (field.empty()) - throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter"); - filter_type type = (exclusive ? BOOLEAN_EXCLUSIVE : BOOLEAN); - internal->add_prefix(field, proc, type); + internal->add_boolean_prefix(field, proc, grouping); } TermIterator @@ -233,10 +226,11 @@ QueryParser::unstem_begin(const string &term) const } void -QueryParser::add_valuerangeprocessor(Xapian::ValueRangeProcessor * vrproc) +QueryParser::add_rangeprocessor(Xapian::RangeProcessor * range_proc, + const std::string* grouping) { Assert(internal.get()); - internal->valrangeprocs.push_back(vrproc); + internal->rangeprocs.push_back(RangeProc(range_proc, grouping)); } string diff --git a/xapian-core/queryparser/queryparser.lemony b/xapian-core/queryparser/queryparser.lemony index fd95d7d72..e12e46bb9 100644 --- a/xapian-core/queryparser/queryparser.lemony +++ b/xapian-core/queryparser/queryparser.lemony @@ -25,6 +25,7 @@ #include "queryparser_internal.h" +#include "api/queryinternal.h" #include "omassert.h" #include "str.h" #include "stringutils.h" @@ -114,44 +115,13 @@ is_positional(Xapian::Query::op op) return (op == Xapian::Query::OP_PHRASE || op == Xapian::Query::OP_NEAR); } -/// A structure identifying a group of filter terms or a value range. -struct filter_group_id { - /** The field info for boolean filter terms. - * - * This is NULL for a value range. - */ - const FieldInfo *field_info; - - /** The value number for a value range. - * - * This is used for value range terms. - */ - Xapian::valueno slot; - - /// Make a new filter_group_id for boolean filter terms. - explicit filter_group_id(const FieldInfo * field_info_) - : field_info(field_info_), slot(Xapian::BAD_VALUENO) {} - - /// Make a new filter_group_id for value range terms. - explicit filter_group_id(Xapian::valueno slot_) - : field_info(NULL), slot(slot_) {} - - /// Ordering needed to allow storage in a map. - bool operator<(const filter_group_id & other) const { - if (slot != other.slot) - return slot < other.slot; - // std::less provides a total order over pointers. - return less()(field_info, other.field_info); - } -}; - class Terms; /** Class used to pass information about a token from lexer to parser. * * Generally an instance of this class carries term information, but it can be - * used for the start or end of a value range, with some operators (e.g. the - * distance in NEAR/3 or ADJ/3, etc). + * used for a range query, and with some operators (e.g. the distance in + * NEAR/3 or ADJ/3, etc). */ class Term { State * state; @@ -162,6 +132,7 @@ class Term { string unstemmed; QueryParser::stem_strategy stem; termpos pos; + Query query; Term(const string &name_, termpos pos_) : name(name_), stem(QueryParser::STEM_NONE), pos(pos_) { } Term(const string &name_) : name(name_), stem(QueryParser::STEM_NONE), pos(0) { } @@ -176,8 +147,8 @@ class Term { : state(state_), name(name_), field_info(field_info_), unstemmed(unstemmed_), stem(stem_), pos(pos_) { } // For RANGE tokens. - Term(valueno slot, const string &a, const string &b) - : name(a), unstemmed(b), pos(slot) { } + Term(const Xapian::Query & q, const string & grouping) + : name(grouping), query(q) { } string make_term(const string & prefix) const; @@ -187,8 +158,8 @@ class Term { termpos get_termpos() const { return pos; } - filter_group_id get_filter_group_id() const { - return filter_group_id(field_info); + string get_grouping() const { + return field_info->grouping; } Query * as_wildcarded_query(State * state) const; @@ -209,8 +180,8 @@ class Term { /** Handle a CJK character string in a positional context. */ void as_positional_cjk_term(Terms * terms) const; - /// Value range query. - Query as_value_range_query() const; + /// Range query. + Query as_range_query() const; Query get_query() const; @@ -243,13 +214,28 @@ class State { qpi->unstem.insert(make_pair(term, unstemmed)); } - Term * value_range(const string &a, const string &b) { - for (auto i : qpi->valrangeprocs) { - string start = a; - string end = b; - Xapian::valueno slot = (*i)(start, end); - if (slot != Xapian::BAD_VALUENO) { - return new Term(slot, start, end); + Term * range(const string &a, const string &b) { + for (auto i : qpi->rangeprocs) { + Xapian::Query range_query = (i.proc)->check_range(a, b); + Xapian::Query::op op = range_query.get_type(); + switch (op) { + case Xapian::Query::OP_INVALID: + break; + case Xapian::Query::OP_VALUE_RANGE: + case Xapian::Query::OP_VALUE_GE: + case Xapian::Query::OP_VALUE_LE: + if (i.default_grouping) { + Xapian::Internal::QueryValueBase * base = + static_cast( + range_query.internal.get()); + Xapian::valueno slot = base->get_slot(); + return new Term(range_query, str(slot)); + } + // FALLTHRU + case Xapian::Query::LEAF_TERM: + return new Term(range_query, i.grouping); + default: + return new Term(range_query, string()); } } return NULL; @@ -520,13 +506,9 @@ Term::as_cjk_query() const } Query -Term::as_value_range_query() const +Term::as_range_query() const { - Query q; - if (unstemmed.empty()) - q = Query(Query::OP_VALUE_GE, pos, name); - else - q = Query(Query::OP_VALUE_RANGE, pos, name, unstemmed); + Query q = query; delete this; return q; } @@ -612,15 +594,14 @@ static void Parse(yyParser *, int, Term *, State *); static void yy_parse_failed(yyParser *); void -QueryParser::Internal::add_prefix(const string &field, const string &prefix, - filter_type type) +QueryParser::Internal::add_prefix(const string &field, const string &prefix) { map::iterator p = field_map.find(field); if (p == field_map.end()) { - field_map.insert(make_pair(field, FieldInfo(type, prefix))); + field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, prefix))); } else { // Check that this is the same type of filter as the existing one(s). - if (p->second.type != type) { + if (p->second.type != NON_BOOLEAN) { throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); } if (!p->second.procs.empty()) @@ -630,15 +611,14 @@ QueryParser::Internal::add_prefix(const string &field, const string &prefix, } void -QueryParser::Internal::add_prefix(const string &field, FieldProcessor *proc, - filter_type type) +QueryParser::Internal::add_prefix(const string &field, FieldProcessor *proc) { map::iterator p = field_map.find(field); if (p == field_map.end()) { - field_map.insert(make_pair(field, FieldInfo(type, proc))); + field_map.insert(make_pair(field, FieldInfo(NON_BOOLEAN, proc))); } else { // Check that this is the same type of filter as the existing one(s). - if (p->second.type != type) { + if (p->second.type != NON_BOOLEAN) { throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); } if (!p->second.prefixes.empty()) @@ -648,6 +628,57 @@ QueryParser::Internal::add_prefix(const string &field, FieldProcessor *proc, } } +void +QueryParser::Internal::add_boolean_prefix(const string &field, + const string &prefix, + const string* grouping) +{ + // Don't allow the empty prefix to be set as boolean as it doesn't + // really make sense. + if (field.empty()) + throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter"); + if (!grouping) grouping = &field; + filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE; + map::iterator p = field_map.find(field); + if (p == field_map.end()) { + field_map.insert(make_pair(field, FieldInfo(type, prefix, *grouping))); + } else { + // Check that this is the same type of filter as the existing one(s). + if (p->second.type != type) { + throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME + } + if (!p->second.procs.empty()) + throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported"); + p->second.prefixes.push_back(prefix); // FIXME grouping + } +} + +void +QueryParser::Internal::add_boolean_prefix(const string &field, + FieldProcessor *proc, + const string* grouping) +{ + // Don't allow the empty prefix to be set as boolean as it doesn't + // really make sense. + if (field.empty()) + throw Xapian::UnimplementedError("Can't set the empty prefix to be a boolean filter"); + if (!grouping) grouping = &field; + filter_type type = grouping->empty() ? BOOLEAN : BOOLEAN_EXCLUSIVE; + map::iterator p = field_map.find(field); + if (p == field_map.end()) { + field_map.insert(make_pair(field, FieldInfo(type, proc, *grouping))); + } else { + // Check that this is the same type of filter as the existing one(s). + if (p->second.type != type) { + throw Xapian::InvalidOperationError("Can't use add_prefix() and add_boolean_prefix() on the same field name, or add_boolean_prefix() with different values of the 'exclusive' parameter"); // FIXME + } + if (!p->second.prefixes.empty()) + throw Xapian::FeatureUnavailableError("Mixing FieldProcessor objects and string prefixes currently not supported"); + throw Xapian::FeatureUnavailableError("Multiple FieldProcessor objects for the same prefix currently not supported"); + // p->second.procs.push_back(proc); + } +} + string QueryParser::Internal::parse_term(Utf8Iterator &it, const Utf8Iterator &end, bool cjk_ngram, bool & is_cjk_term, @@ -756,9 +787,8 @@ QueryParser::Internal::parse_query(const string &qs, unsigned flags, { bool cjk_ngram = (flags & FLAG_CJK_NGRAM) || CJK::is_cjk_enabled(); - // Set value_ranges if we may have to handle value ranges in the query. - bool value_ranges; - value_ranges = !valrangeprocs.empty() && (qs.find("..") != string::npos); + // Set ranges if we may have to handle ranges in the query. + bool ranges = !rangeprocs.empty() && (qs.find("..") != string::npos); termpos term_pos = 1; Utf8Iterator it(qs), end; @@ -819,7 +849,7 @@ just_had_operator_needing_term: if (it == end) break; } - if (value_ranges && + if (ranges && (mode == DEFAULT || mode == IN_GROUP || mode == IN_GROUP2)) { // Scan forward to see if this could be the "start of range" // token. Sadly this has O(n^2) tendencies, though at least @@ -846,7 +876,7 @@ just_had_operator_needing_term: while (p != end && *p > ' ' && *p != ')') { Unicode::append_utf8(b, *p++); } - Term * range = state.value_range(a, b); + Term * range = state.range(a, b); if (!range) { state.error = "Unknown range operation"; if (a.find(':', 1) == string::npos) { @@ -1055,7 +1085,7 @@ just_had_operator_needing_term: // Build the unstemmed form in field. field += ':'; field += name; - // Clear any pending value range error. + // Clear any pending range error. state.error = NULL; Term * token = new Term(&state, name, field_info, field); Parse(pParser, BOOLEAN_FILTER, token, &state); @@ -1364,7 +1394,7 @@ struct ProbQuery { // filter is a map from prefix to a query for that prefix. Queries with // the same prefix are combined with OR, and the results of this are // combined with AND to get the full filter. - map filter; + map filter; ProbQuery() : query(0), love(0), hate(0) { } ~ProbQuery() { @@ -1373,35 +1403,35 @@ struct ProbQuery { delete hate; } - void add_filter(const filter_group_id & id, const Query & q) { - filter[id] = q; + void add_filter(const string& grouping, const Query & q) { + filter[grouping] = q; } - void append_filter(const filter_group_id & id, const Query & qnew) { - map::iterator it = filter.find(id); + void append_filter(const string& grouping, const Query & qnew) { + auto it = filter.find(grouping); if (it == filter.end()) { - filter.insert(make_pair(id, qnew)); + filter.insert(make_pair(grouping, qnew)); } else { Query & q = it->second; // We OR multiple filters with the same prefix if they're // exclusive, otherwise we AND them. - bool exclusive = (id.field_info->type == BOOLEAN_EXCLUSIVE); + bool exclusive = !grouping.empty(); Query::op op = exclusive ? Query::OP_OR : Query::OP_AND; q = Query(op, q, qnew); } } - void add_filter_range(Xapian::valueno slot, const Query & range) { - filter[filter_group_id(slot)] = range; + void add_filter_range(const string& grouping, const Query & range) { + filter[grouping] = range; } - void append_filter_range(Xapian::valueno slot, const Query & range) { - Query & q = filter[filter_group_id(slot)]; + void append_filter_range(const string& grouping, const Query & range) { + Query & q = filter[grouping]; q = Query(Query::OP_OR, q, range); } Query merge_filters() const { - map::const_iterator i = filter.begin(); + auto i = filter.begin(); Assert(i != filter.end()); Query q = i->second; while (++i != filter.end()) { @@ -1897,7 +1927,7 @@ prob_expr(E) ::= term(T). { } // prob - a probabilistic sub-expression consisting of stop_terms, "+" terms, -// "-" terms, boolean filters, and/or value ranges. +// "-" terms, boolean filters, and/or ranges. // // Note: stop_term can also be several other things other than a simple term! @@ -1905,17 +1935,17 @@ prob_expr(E) ::= term(T). { %destructor prob {delete $$;} prob(P) ::= RANGE(R). { - valueno slot = R->pos; - const Query & range = R->as_value_range_query(); + string grouping = R->name; + const Query & range = R->as_range_query(); P = new ProbQuery; - P->add_filter_range(slot, range); + P->add_filter_range(grouping, range); } prob(P) ::= stop_prob(Q) RANGE(R). { - valueno slot = R->pos; - const Query & range = R->as_value_range_query(); + string grouping = R->name; + const Query & range = R->as_range_query(); P = Q; - P->append_filter_range(slot, range); + P->append_filter_range(grouping, range); } prob(P) ::= stop_term(T) stop_term(U). { @@ -1987,20 +2017,20 @@ prob(P) ::= stop_prob(Q) HATE BOOLEAN_FILTER(T). { prob(P) ::= BOOLEAN_FILTER(T). { P = new ProbQuery; - P->add_filter(T->get_filter_group_id(), T->get_query()); + P->add_filter(T->get_grouping(), T->get_query()); delete T; } prob(P) ::= stop_prob(Q) BOOLEAN_FILTER(T). { P = Q; - P->append_filter(T->get_filter_group_id(), T->get_query()); + P->append_filter(T->get_grouping(), T->get_query()); delete T; } prob(P) ::= LOVE BOOLEAN_FILTER(T). { // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER P = new ProbQuery; - P->filter[T->get_filter_group_id()] = T->get_query(); + P->filter[T->get_grouping()] = T->get_query(); delete T; } @@ -2008,7 +2038,7 @@ prob(P) ::= stop_prob(Q) LOVE BOOLEAN_FILTER(T). { // LOVE BOOLEAN_FILTER(T) is just the same as BOOLEAN_FILTER P = Q; // We OR filters with the same prefix... - Query & q = P->filter[T->get_filter_group_id()]; + Query & q = P->filter[T->get_grouping()]; q = Query(Query::OP_OR, q, T->get_query()); delete T; } diff --git a/xapian-core/queryparser/queryparser_internal.h b/xapian-core/queryparser/queryparser_internal.h index 3f5240ecc..832453b95 100644 --- a/xapian-core/queryparser/queryparser_internal.h +++ b/xapian-core/queryparser/queryparser_internal.h @@ -43,20 +43,24 @@ struct FieldInfo { /// The type of this field. filter_type type; + string grouping; + /// Field prefix strings. list prefixes; /// Field processors. Currently only one is supported. list > procs; - FieldInfo(filter_type type_, const string & prefix) - : type(type_) + FieldInfo(filter_type type_, const string& prefix, + const string& grouping_ = string()) + : type(type_), grouping(grouping_) { prefixes.push_back(prefix); } - FieldInfo(filter_type type_, Xapian::FieldProcessor *proc) - : type(type_) + FieldInfo(filter_type type_, Xapian::FieldProcessor* proc, + const string& grouping_ = string()) + : type(type_), grouping(grouping_) { procs.push_back(proc); } @@ -66,6 +70,17 @@ namespace Xapian { class Utf8Iterator; +struct RangeProc { + Xapian::Internal::opt_intrusive_ptr proc; + std::string grouping; + bool default_grouping; + + RangeProc(RangeProcessor * range_proc, const std::string* grouping_) + : proc(range_proc), + grouping(grouping_ ? *grouping_ : std::string()), + default_grouping(grouping_ == NULL) { } +}; + class QueryParser::Internal : public Xapian::Internal::intrusive_base { friend class QueryParser; friend class ::State; @@ -82,7 +97,7 @@ class QueryParser::Internal : public Xapian::Internal::intrusive_base { // "foobar" -> "XFOO". FIXME: it does more than this now! map field_map; - list > valrangeprocs; + list rangeprocs; string corrected_query; @@ -94,11 +109,15 @@ class QueryParser::Internal : public Xapian::Internal::intrusive_base { int max_partial_type; - void add_prefix(const string &field, const string &prefix, - filter_type type); + void add_prefix(const string &field, const string &prefix); + + void add_prefix(const string &field, Xapian::FieldProcessor *proc); + + void add_boolean_prefix(const string &field, const string &prefix, + const string* grouping); - void add_prefix(const string &field, Xapian::FieldProcessor *proc, - filter_type type); + void add_boolean_prefix(const string &field, Xapian::FieldProcessor *proc, + const string* grouping); std::string parse_term(Utf8Iterator &it, const Utf8Iterator &end, bool cjk_ngram, bool &is_cjk_term, diff --git a/xapian-core/tests/api_none.cc b/xapian-core/tests/api_none.cc index 268b9f712..a3dbb608e 100644 --- a/xapian-core/tests/api_none.cc +++ b/xapian-core/tests/api_none.cc @@ -24,6 +24,7 @@ #include "api_none.h" +#define XAPIAN_DEPRECATED(D) D #include #include "apitest.h" @@ -191,14 +192,16 @@ class DestroyedFlag { } }; -class TestValueRangeProcessor : public Xapian::ValueRangeProcessor { +class TestRangeProcessor : public Xapian::RangeProcessor { DestroyedFlag destroyed; public: - TestValueRangeProcessor(bool & destroyed_) : destroyed(destroyed_) { } + TestRangeProcessor(bool & destroyed_) + : Xapian::RangeProcessor(0), destroyed(destroyed_) { } - Xapian::valueno operator()(std::string &, std::string &) { - return 42; + Xapian::Query operator()(const std::string&, const std::string&) + { + return Xapian::Query::MatchAll; } }; @@ -208,21 +211,21 @@ DEFINE_TESTCASE(subclassablerefcount1, !backend) { // Simple test of release(). { - Xapian::ValueRangeProcessor * vrp = new TestValueRangeProcessor(gone); + Xapian::RangeProcessor * rp = new TestRangeProcessor(gone); TEST(!gone); Xapian::QueryParser qp; - qp.add_valuerangeprocessor(vrp->release()); + qp.add_rangeprocessor(rp->release()); TEST(!gone); } TEST(gone); // Check a second call to release() has no effect. { - Xapian::ValueRangeProcessor * vrp = new TestValueRangeProcessor(gone); + Xapian::RangeProcessor * rp = new TestRangeProcessor(gone); TEST(!gone); Xapian::QueryParser qp; - qp.add_valuerangeprocessor(vrp->release()); - vrp->release(); + qp.add_rangeprocessor(rp->release()); + rp->release(); TEST(!gone); } TEST(gone); @@ -230,20 +233,20 @@ DEFINE_TESTCASE(subclassablerefcount1, !backend) { // Test reference counting works, and that a VRP with automatic storage // works OK. { - TestValueRangeProcessor vrp_auto(gone_auto); + TestRangeProcessor rp_auto(gone_auto); TEST(!gone_auto); { Xapian::QueryParser qp1; { Xapian::QueryParser qp2; - Xapian::ValueRangeProcessor * vrp; - vrp = new TestValueRangeProcessor(gone); + Xapian::RangeProcessor * rp; + rp = new TestRangeProcessor(gone); TEST(!gone); - qp1.add_valuerangeprocessor(vrp->release()); + qp1.add_rangeprocessor(rp->release()); TEST(!gone); - qp2.add_valuerangeprocessor(vrp); + qp2.add_rangeprocessor(rp); TEST(!gone); - qp2.add_valuerangeprocessor(&vrp_auto); + qp2.add_rangeprocessor(&rp_auto); TEST(!gone); TEST(!gone_auto); } @@ -260,15 +263,14 @@ DEFINE_TESTCASE(subclassablerefcount1, !backend) { { Xapian::QueryParser qp; { - Xapian::ValueRangeProcessor * vrp = - new TestValueRangeProcessor(gone); + Xapian::RangeProcessor * rp = new TestRangeProcessor(gone); TEST(!gone); - qp.add_valuerangeprocessor(vrp); - delete vrp; + qp.add_rangeprocessor(rp); + delete rp; TEST(gone); } // At the end of this block, qp is destroyed, but mustn't dereference - // the pointer it has to vrp. If it does, that should get caught + // the pointer it has to rp. If it does, that should get caught // when tests are run under valgrind. } @@ -733,6 +735,90 @@ DEFINE_TESTCASE(subclassablerefcount7, backend) { return true; } +class TestValueRangeProcessor : public Xapian::ValueRangeProcessor { + DestroyedFlag destroyed; + + public: + TestValueRangeProcessor(bool & destroyed_) : destroyed(destroyed_) { } + + Xapian::valueno operator()(std::string &, std::string &) { + return 42; + } +}; + +/// Check reference counting of user-subclassable classes. +DEFINE_TESTCASE(subclassablerefcount8, !backend) { + bool gone_auto, gone; + + // Simple test of release(). + { + Xapian::ValueRangeProcessor * vrp = new TestValueRangeProcessor(gone); + TEST(!gone); + Xapian::QueryParser qp; + qp.add_valuerangeprocessor(vrp->release()); + TEST(!gone); + } + TEST(gone); + + // Check a second call to release() has no effect. + { + Xapian::ValueRangeProcessor * vrp = new TestValueRangeProcessor(gone); + TEST(!gone); + Xapian::QueryParser qp; + qp.add_valuerangeprocessor(vrp->release()); + vrp->release(); + TEST(!gone); + } + TEST(gone); + + // Test reference counting works, and that a VRP with automatic storage + // works OK. + { + TestValueRangeProcessor vrp_auto(gone_auto); + TEST(!gone_auto); + { + Xapian::QueryParser qp1; + { + Xapian::QueryParser qp2; + Xapian::ValueRangeProcessor * vrp; + vrp = new TestValueRangeProcessor(gone); + TEST(!gone); + qp1.add_valuerangeprocessor(vrp->release()); + TEST(!gone); + qp2.add_valuerangeprocessor(vrp); + TEST(!gone); + qp2.add_valuerangeprocessor(&vrp_auto); + TEST(!gone); + TEST(!gone_auto); + } + TEST(!gone); + } + TEST(gone); + TEST(!gone_auto); + } + TEST(gone_auto); + + // Regression test for initial implementation, where ~opt_instrusive_ptr() + // checked the reference of the object, which may have already been deleted + // if it wasn't been reference counted. + { + Xapian::QueryParser qp; + { + Xapian::ValueRangeProcessor * vrp = + new TestValueRangeProcessor(gone); + TEST(!gone); + qp.add_valuerangeprocessor(vrp); + delete vrp; + TEST(gone); + } + // At the end of this block, qp is destroyed, but mustn't dereference + // the pointer it has to vrp. If it does, that should get caught + // when tests are run under valgrind. + } + + return true; +} + /// Check encoding of non-UTF8 document data. DEFINE_TESTCASE(nonutf8docdesc1, !backend) { Xapian::Document doc; diff --git a/xapian-core/tests/generate-api_generated b/xapian-core/tests/generate-api_generated index 78935ae0a..28fcd9d83 100755 --- a/xapian-core/tests/generate-api_generated +++ b/xapian-core/tests/generate-api_generated @@ -36,9 +36,12 @@ my %uncopyableclasses = ( 'ValueCountMatchSpy' => '0', 'ValueMapPostingSource' => '0', 'FixedWeightPostingSource' => '0', + 'DateRangeProcessor' => '0', 'DateValueRangeProcessor' => '0', 'MultiValueKeyMaker' => '', + 'NumberRangeProcessor' => '0, ""', 'NumberValueRangeProcessor' => '0, ""', + 'RangeProcessor' => '', 'SimpleStopper' => '', 'StringValueRangeProcessor' => '0' ); @@ -65,9 +68,12 @@ my %copyableclasses = ( ); my %no_get_description = ( + 'DateRangeProcessor' => 1, 'DateValueRangeProcessor' => 1, 'MultiValueKeyMaker' => 1, + 'NumberRangeProcessor' => 1, 'NumberValueRangeProcessor' => 1, + 'RangeProcessor' => 1, 'Registry' => 1, 'StringValueRangeProcessor' => 1, 'BB2Weight' => 1, @@ -109,6 +115,7 @@ print <<"END"; #include "api_generated.h" +#define XAPIAN_DEPRECATED(D) D #include #include "apitest.h" diff --git a/xapian-core/tests/queryparsertest.cc b/xapian-core/tests/queryparsertest.cc index a4f3ae901..8a77d756b 100644 --- a/xapian-core/tests/queryparsertest.cc +++ b/xapian-core/tests/queryparsertest.cc @@ -21,6 +21,7 @@ #include +#define XAPIAN_DEPRECATED(D) D #include #include "cputimer.h" @@ -659,8 +660,8 @@ static const test test_or_queries[] = { { "foo AND site:2", "(Zfoo@1 AND 0 * H2)" }, // Non-exclusive boolean prefixes feature tests (ticket#402): { "category:1 category:2", "0 * (XCAT1 AND XCAT2)" }, - { "category:1 site2:2", "0 * (J2 AND XCAT1)" }, - { "category:1 category:2 site2:2", "0 * (J2 AND (XCAT1 AND XCAT2))" }, + { "category:1 site2:2", "0 * (XCAT1 AND J2)" }, + { "category:1 category:2 site2:2", "0 * ((XCAT1 AND XCAT2) AND J2)" }, { "category:1 OR category:2", "(0 * XCAT1 OR 0 * XCAT2)" }, { "category:1 AND category:2", "(0 * XCAT1 AND 0 * XCAT2)" }, { "foo AND category:2", "(Zfoo@1 AND 0 * XCAT2)" }, @@ -688,7 +689,7 @@ static const test test_or_queries[] = { // { "authortitle:richard NEAR title:book", "((Arichard@1 OR XTrichard@1) NEAR 11 XTbook@2)" }, { "multisite:xapian.org", "0 * (Hxapian.org OR Jxapian.org)"}, { "authortitle:richard", "(ZArichard@1 OR ZXTrichard@1)"}, - { "multisite:xapian.org site:www.xapian.org author:richard authortitle:richard", "((ZArichard@1 OR (ZArichard@2 OR ZXTrichard@2)) FILTER (Hwww.xapian.org AND (Hxapian.org OR Jxapian.org)))" }, + { "multisite:xapian.org site:www.xapian.org author:richard authortitle:richard", "((ZArichard@1 OR (ZArichard@2 OR ZXTrichard@2)) FILTER ((Hxapian.org OR Jxapian.org) AND Hwww.xapian.org))" }, { "authortitle:richard-boulton", "((Arichard@1 PHRASE 2 Aboulton@2) OR (XTrichard@1 PHRASE 2 XTboulton@2))"}, { "authortitle:\"richard boulton\"", "((Arichard@1 PHRASE 2 Aboulton@2) OR (XTrichard@1 PHRASE 2 XTboulton@2))"}, // Test FLAG_CJK_NGRAM isn't on by default: @@ -1417,6 +1418,36 @@ static bool test_qp_value_range1() return true; } +// Simple test of RangeProcessor class. +static bool test_qp_range1() +{ + Xapian::QueryParser qp; + qp.add_boolean_prefix("test", "XTEST"); + Xapian::RangeProcessor rp(1); + qp.add_rangeprocessor(&rp); + for (const test *p = test_value_range1_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + static const test test_value_range2_queries[] = { { "a..b", "0 * VALUE_RANGE 3 a b" }, { "1..12", "0 * VALUE_RANGE 2 \\xa0 \\xae" }, @@ -1488,6 +1519,46 @@ static bool test_qp_value_range2() return true; } +// Test chaining of RangeProcessor classes. +static bool test_qp_range2() +{ + using Xapian::RP_REPEATED; + using Xapian::RP_SUFFIX; + Xapian::QueryParser qp; + qp.add_boolean_prefix("test", "XTEST"); + Xapian::DateRangeProcessor rp_date(1); + Xapian::NumberRangeProcessor rp_num(2); + Xapian::RangeProcessor rp_str(3); + Xapian::NumberRangeProcessor rp_cash(4, "$", RP_REPEATED); + Xapian::NumberRangeProcessor rp_weight(5, "kg", RP_SUFFIX|RP_REPEATED); + qp.add_rangeprocessor(&rp_date); + qp.add_rangeprocessor(&rp_num); + qp.add_rangeprocessor(&rp_cash); + qp.add_rangeprocessor(&rp_weight); + qp.add_rangeprocessor(&rp_str); + for (const test *p = test_value_range2_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + // Test NumberValueRangeProcessors with actual data. static bool test_qp_value_range3() { @@ -1536,6 +1607,54 @@ static bool test_qp_value_range3() #endif } +// Test NumberRangeProcessors with actual data. +static bool test_qp_range3() +{ +#ifndef XAPIAN_HAS_INMEMORY_BACKEND + SKIP_TEST("Testcase requires the InMemory backend which is disabled"); +#else + Xapian::WritableDatabase db(string(), Xapian::DB_BACKEND_INMEMORY); + double low = -10; + int steps = 60; + double step = 0.5; + + for (int i = 0; i <= steps; ++i) { + double v = low + i * step; + Xapian::Document doc; + doc.add_value(1, Xapian::sortable_serialise(v)); + db.add_document(doc); + } + + Xapian::NumberRangeProcessor rp_num(1); + Xapian::QueryParser qp; + qp.add_rangeprocessor(&rp_num); + + for (int j = 0; j <= steps; ++j) { + double start = low + j * step; + for (int k = 0; k <= steps; ++k) { + double end = low + k * step; + string query = str(start) + ".." + str(end); + tout << "Query: " << query << '\n'; + Xapian::Query qobj = qp.parse_query(query); + Xapian::Enquire enq(db); + enq.set_query(qobj); + Xapian::MSet mset = enq.get_mset(0, steps + 1); + if (end < start) { + TEST_EQUAL(mset.size(), 0); + } else { + TEST_EQUAL(mset.size(), 1u + (k - j)); + for (unsigned int m = 0; m != mset.size(); ++m) { + double v = start + m * step; + TEST_EQUAL(mset[m].get_document().get_value(1), + Xapian::sortable_serialise(v)); + } + } + } + } + return true; +#endif +} + static const test test_value_range4_queries[] = { { "id:19254@foo..example.com", "0 * Q19254@foo..example.com" }, { "hello:world", "0 * XHELLOworld" }, @@ -1579,6 +1698,41 @@ static bool test_qp_value_range4() return true; } +/** Test a boolean filter which happens to contain "..". + * + * Regression test for bug fixed in 1.2.3. + * + * Also test that the same prefix can be set for a range and filter. + */ +static bool test_qp_range4() +{ + Xapian::QueryParser qp; + qp.add_boolean_prefix("id", "Q"); + qp.add_boolean_prefix("hello", "XHELLO"); + Xapian::RangeProcessor rp_str(1, "hello:"); + qp.add_rangeprocessor(&rp_str); + for (const test *p = test_value_range4_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} static const test test_value_daterange1_queries[] = { { "12/03/99..12/04/01", "0 * VALUE_RANGE 1 19991203 20011204" }, @@ -1619,6 +1773,35 @@ static bool test_qp_value_daterange1() return true; } +// Test DateRangeProcessor +static bool test_qp_daterange1() +{ + Xapian::QueryParser qp; + Xapian::DateRangeProcessor rp_date(1, Xapian::RP_DATE_PREFER_MDY, 1960); + qp.add_rangeprocessor(&rp_date); + for (const test *p = test_value_daterange1_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + static const test test_value_daterange2_queries[] = { { "created:12/03/99..12/04/01", "0 * VALUE_RANGE 1 19991203 20011204" }, { "modified:03-12-99..04-14-01", "0 * VALUE_RANGE 2 19990312 20010414" }, @@ -1672,6 +1855,45 @@ static bool test_qp_value_daterange2() return true; } +// Feature test DateRangeProcessor with prefixes (added in 1.1.2). +static bool test_qp_daterange2() +{ + using Xapian::RP_DATE_PREFER_MDY; + Xapian::QueryParser qp; + Xapian::DateRangeProcessor rp_cdate(1, "created:", RP_DATE_PREFER_MDY, 1970); + Xapian::DateRangeProcessor rp_mdate(2, "modified:", RP_DATE_PREFER_MDY, 1970); + Xapian::DateRangeProcessor rp_adate(3, "accessed:", RP_DATE_PREFER_MDY, 1970); + // Regression test - here a const char * was taken as a bool rather than a + // std::string when resolving the overloaded forms. Fixed in 1.2.13 and + // 1.3.1. + Xapian::DateRangeProcessor rp_ddate(4, "deleted:"); + qp.add_rangeprocessor(&rp_cdate); + qp.add_rangeprocessor(&rp_mdate); + qp.add_rangeprocessor(&rp_adate); + qp.add_rangeprocessor(&rp_ddate); + for (const test *p = test_value_daterange2_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + static const test test_value_stringrange1_queries[] = { { "tag:bar..foo", "0 * VALUE_RANGE 1 bar foo" }, { "bar..foo", "0 * VALUE_RANGE 0 bar foo" }, @@ -1709,6 +1931,42 @@ static bool test_qp_value_stringrange1() return true; } +// Feature test RangeProcessor with prefixes. +static bool test_qp_stringrange1() +{ + Xapian::QueryParser qp; + Xapian::RangeProcessor rp_default(0); + Xapian::RangeProcessor rp_tag(1, "tag:"); + qp.add_rangeprocessor(&rp_tag); + qp.add_rangeprocessor(&rp_default); + for (const test *p = test_value_stringrange1_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + +static const test test_value_customrange1_queries[] = { + { "mars author:Asimov..Bradbury", "(mars@1 FILTER VALUE_RANGE 4 asimov bradbury)" }, + { NULL, NULL } +}; + struct AuthorValueRangeProcessor : public Xapian::ValueRangeProcessor { AuthorValueRangeProcessor() {} @@ -1722,11 +1980,6 @@ struct AuthorValueRangeProcessor : public Xapian::ValueRangeProcessor { } }; -static const test test_value_customrange1_queries[] = { - { "mars author:Asimov..Bradbury", "(mars@1 FILTER VALUE_RANGE 4 asimov bradbury)" }, - { NULL, NULL } -}; - // Test custom ValueRangeProcessor subclass. static bool test_qp_value_customrange1() { @@ -1756,6 +2009,46 @@ static bool test_qp_value_customrange1() return true; } +struct AuthorRangeProcessor : public Xapian::RangeProcessor { + AuthorRangeProcessor() : Xapian::RangeProcessor(4, "author:") { } + + Xapian::Query operator()(const std::string& b, const std::string& e) + { + string begin = Xapian::Unicode::tolower(b); + string end = Xapian::Unicode::tolower(e); + return Xapian::RangeProcessor::operator()(begin, end); + } +}; + +// Test custom RangeProcessor subclass. +static bool test_qp_customrange1() +{ + Xapian::QueryParser qp; + AuthorRangeProcessor rp_author; + qp.add_rangeprocessor(&rp_author); + for (const test *p = test_value_customrange1_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + class TitleFieldProcessor : public Xapian::FieldProcessor { Xapian::Query operator()(const std::string & str) { if (str == "all") @@ -1872,6 +2165,37 @@ static bool test_qp_fieldproc2() return true; } +// Test using FieldProcessor and RangeProcessor together. +static bool test_qp_fieldproc3() +{ + Xapian::QueryParser qp; + DateRangeFieldProcessor date_fproc; + qp.add_boolean_prefix("date", &date_fproc); + Xapian::DateRangeProcessor rp_date(1, "date:"); + qp.add_rangeprocessor(&rp_date); + for (const test *p = test_fieldproc2_queries; p->query; ++p) { + string expect, parsed; + if (p->expect) + expect = p->expect; + else + expect = "parse error"; + try { + Xapian::Query qobj = qp.parse_query(p->query); + parsed = qobj.get_description(); + expect = string("Query(") + expect + ')'; + } catch (const Xapian::QueryParserError &e) { + parsed = e.get_msg(); + } catch (const Xapian::Error &e) { + parsed = e.get_description(); + } catch (...) { + parsed = "Unknown exception!"; + } + tout << "Query: " << p->query << '\n'; + TEST_STRINGS_EQUAL(parsed, expect); + } + return true; +} + static bool test_qp_stoplist1() { Xapian::QueryParser qp; @@ -2716,6 +3040,14 @@ static const test_desc tests[] = { TESTCASE(qp_unstem_boolean_prefix), TESTCASE(qp_default_prefix1), TESTCASE(qp_default_prefix2), + TESTCASE(qp_range1), + TESTCASE(qp_range2), + TESTCASE(qp_range3), + TESTCASE(qp_range4), + TESTCASE(qp_daterange1), + TESTCASE(qp_daterange2), + TESTCASE(qp_stringrange1), + TESTCASE(qp_customrange1), TESTCASE(qp_value_range1), TESTCASE(qp_value_range2), TESTCASE(qp_value_range3), @@ -2726,6 +3058,7 @@ static const test_desc tests[] = { TESTCASE(qp_value_customrange1), TESTCASE(qp_fieldproc1), TESTCASE(qp_fieldproc2), + TESTCASE(qp_fieldproc3), TESTCASE(qp_stoplist1), TESTCASE(qp_spell1), TESTCASE(qp_spell2), -- 2.11.4.GIT