From 7b56a4b280fd975aee54beca6dd1b01043c8e09d Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Wed, 19 Sep 2018 13:52:20 +1200 Subject: [PATCH] Support YYYY/YYYYMM limits in term-based date ranges Previously value-based date ranges supported these as limits, but term-based date ranges gave an error. It's easy to support them and improves consistency so we now do. --- xapian-applications/omega/date.cc | 55 ++++++++++++++++++++++------ xapian-applications/omega/docs/cgiparams.rst | 26 ++++++++----- xapian-applications/omega/omegatest | 4 ++ 3 files changed, 64 insertions(+), 21 deletions(-) diff --git a/xapian-applications/omega/date.cc b/xapian-applications/omega/date.cc index 97beeeb03..df4a1b542 100644 --- a/xapian-applications/omega/date.cc +++ b/xapian-applications/omega/date.cc @@ -4,7 +4,7 @@ * Copyright 2001 James Aylett * Copyright 2001,2002 Ananova Ltd * Copyright 2002 Intercede 1749 Ltd - * Copyright 2002,2003,2006,2014,2016,2017 Olly Betts + * Copyright 2002,2003,2006,2014,2016,2017,2018 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -141,14 +141,47 @@ whole_year_at_end: return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end()); } +static int DIGIT(char ch) { return ch - '0'; } + +static int DIGIT2(const char *p) { + return DIGIT(p[0]) * 10 + DIGIT(p[1]); +} + +static int DIGIT4(const char *p) { + return DIGIT2(p) * 100 + DIGIT2(p + 2); +} + static void -parse_date(const string & date, int *y, int *m, int *d) +parse_date(const string & date, int *y, int *m, int *d, bool start) { - // FIXME: for now only support YYYYMMDD (e.g. 20011119) - // and don't error check - *y = atoi(date.substr(0, 4).c_str()); - *m = atoi(date.substr(4, 2).c_str()); - *d = atoi(date.substr(6, 2).c_str()); + // Support YYYYMMDD, YYYYMM and YYYY. + if (date.size() < 4) { + // We default to the start of 1970 when START isn't specified, so it + // seems logical to here do that here too. + *y = 1970; + } else { + *y = DIGIT4(date.c_str()); + } + if (date.size() < 6) { + if (start) { + *m = 1; + *d = 1; + } else { + *m = 12; + *d = 31; + } + return; + } + *m = DIGIT2(date.c_str() + 4); + if (date.size() < 8) { + if (start) { + *d = 1; + } else { + *d = last_day(*y, *m); + } + return; + } + *d = DIGIT2(date.c_str() + 6); } Xapian::Query @@ -159,7 +192,7 @@ date_range_filter(const string & date_start, const string & date_end, if (!date_span.empty()) { time_t secs = atoi(date_span.c_str()) * (24 * 60 * 60); if (!date_end.empty()) { - parse_date(date_end, &y2, &m2, &d2); + parse_date(date_end, &y2, &m2, &d2, false); struct tm t; t.tm_year = y2 - 1900; t.tm_mon = m2 - 1; @@ -173,7 +206,7 @@ date_range_filter(const string & date_start, const string & date_end, m1 = t2->tm_mon + 1; d1 = t2->tm_mday; } else if (!date_start.empty()) { - parse_date(date_start, &y1, &m1, &d1); + parse_date(date_start, &y1, &m1, &d1, true); struct tm t; t.tm_year = y1 - 1900; t.tm_mon = m1 - 1; @@ -204,7 +237,7 @@ date_range_filter(const string & date_start, const string & date_end, m1 = 1; d1 = 1; } else { - parse_date(date_start, &y1, &m1, &d1); + parse_date(date_start, &y1, &m1, &d1, true); } if (date_end.empty()) { time_t now = time(NULL); @@ -213,7 +246,7 @@ date_range_filter(const string & date_start, const string & date_end, m2 = t->tm_mon + 1; d2 = t->tm_mday; } else { - parse_date(date_end, &y2, &m2, &d2); + parse_date(date_end, &y2, &m2, &d2, false); } } return date_range_filter(y1, m1, d1, y2, m2, d2); diff --git a/xapian-applications/omega/docs/cgiparams.rst b/xapian-applications/omega/docs/cgiparams.rst index e19b3c7e9..454a8f0f4 100644 --- a/xapian-applications/omega/docs/cgiparams.rst +++ b/xapian-applications/omega/docs/cgiparams.rst @@ -133,9 +133,11 @@ START.\ *SLOT* END.\ *SLOT* SPAN.\ *SLOT* If `SPAN.`\ *SLOT* is not specified: * `START.`\ *SLOT* specifies the start of the range in the - format YYYYMMDD or YYYYMMDDHHMM. Default is the start of time. + format YYYY, YYYYMM, YYYYMMDD or YYYYMMDDHHMM. Default is the start + of time. * `END.`\ *SLOT* specifies the end of the range in the - format YYYYMMDD or YYYYMMDDHHMM. Default is the end of time. + format YYYY, YYYYMM, YYYYMMDD or YYYYMMDDHHMM. Default is the end of + time. Added in Xapian 1.4.8 - older versions will just ignore these parameters. @@ -148,19 +150,23 @@ DATEVALUE formats described above (YYYYMMDDHHMM, YYYYMMDD or a raw 4 byte big-endian time_t). + Don't mix `START.`\ *SLOT*, `END.`\ *SLOT* and/or `SPAN.`\ *SLOT* with + `DATEVALUE` on the same slot number. + If `DATEVALUE` isn't set then `START`, `END` and `SPAN` will perform date filtering using an older approach based on D-, M-, and Y-prefixed terms. This approach can only filter to a granularity of one day, so - only the `YYYYMMDD` part of `START` and `END` are used. Also instead - of `START`/`END` defaulting to the start and end of time, they instead - default to 1st January 1970 and today's date respectively. The - term-based date range filtering also includes a special `Dlatest` term, - which allows flagging a document as always current. There's no + only the `YYYYMMDD` part of `START` and `END` are used. Support for + `YYYY` and `YYYYMM` in `START` and `END` for term-based date filtering + was added in Xapian 1.4.8 - in earlier versions this failed with an + error. + + Also instead of `START`/`END` defaulting to the start and end of time, + they instead default to 1st January 1970 and today's date respectively. + The term-based date range filtering also includes a special `Dlatest` + term, which allows flagging a document as always current. There's no equivalent to this for value-based date range filters. - Don't mix `START.`\ *SLOT*, `END.`\ *SLOT* and/or `SPAN.`\ *SLOT* with - `DATEVALUE` on the same slot number. - START END SPAN like `START.`\ *SLOT*, `END.`\ *SLOT* and `SPAN.`\ *SLOT* but for value slot `DATEVALUE`, or for term-based date range filtering if `DATEVALUE` diff --git a/xapian-applications/omega/omegatest b/xapian-applications/omega/omegatest index b14c81901..739baded2 100755 --- a/xapian-applications/omega/omegatest +++ b/xapian-applications/omega/omegatest @@ -313,6 +313,10 @@ qtestcase '((D20141103 OR D20141104 OR D20141105 OR D20141106 OR D20141107 OR D2 # Check that if START, END and SPAN are all passed, START is ignored: qtestcase '((D20151103 OR D20151104 OR D20151105 OR D20151106) OR Dlatest)' START=19700101 END=20151106 SPAN=3 +# Check that YYYYMM and YYYY are accepted and handled appropriately: +qtestcase '((Y1980 OR Y1981) OR Dlatest)' START=1980 END=1981 +qtestcase '((M198012 OR M198101 OR M198102) OR Dlatest)' START=198012 END=198102 + # Check .SLOT combined with term based date range filter: qtestcase '(VALUE_RANGE 0 201512 2015~ AND ((Y1970 OR Y1971 OR Y1972 OR Y1973 OR Y1974) OR Dlatest))' START.0=20151201 END.0=20151231 END=19741231 -- 2.11.4.GIT