Sync arabic stemming algorithm change from Snowball
[xapian.git] / xapian-core / matcher / orpositionlist.cc
bloba0cf9a2890c8e9baaa9080e4c78ae2db8f82f648
1 /** @file orpositionlist.cc
2 * @brief Merge two PositionList objects using an OR operation.
3 */
4 /* Copyright (C) 2007,2010,2016,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "orpositionlist.h"
25 #include "debuglog.h"
27 using namespace std;
29 Xapian::termcount
30 OrPositionList::get_approx_size() const
32 LOGCALL(EXPAND, Xapian::termcount, "OrPositionList::get_approx_size", NO_ARGS);
33 // This is actually the upper bound, but generally there's only one term
34 // at each position, so it'll usually be correct too.
35 Xapian::termcount size = 0;
36 for (auto pl : pls) size += pl->get_approx_size();
37 RETURN(size);
40 Xapian::termpos
41 OrPositionList::get_position() const
43 LOGCALL(EXPAND, Xapian::termpos, "OrPositionList::get_position", NO_ARGS);
44 RETURN(current_pos);
47 // PositionList::next() is actually rarely used - ExactPhrasePostList will
48 // never call it, while PhrasePostList will only call it once for the first
49 // subquery and NearPostList will call it to start subqueries if we're near
50 // the start of the document, and also if a candidate match has two subqueries
51 // at the same position.
52 bool
53 OrPositionList::next()
55 LOGCALL(EXPAND, bool, "OrPositionList::next", NO_ARGS);
56 bool first = current.empty();
57 if (first) current.resize(pls.size());
58 Xapian::termpos old_pos = current_pos;
59 current_pos = Xapian::termpos(-1);
60 size_t j = 0;
61 for (size_t i = 0; i != pls.size(); ++i) {
62 PositionList* pl = pls[i];
63 Xapian::termpos pos;
64 if (first || current[i] <= old_pos) {
65 if (!pl->next())
66 continue;
67 pos = pl->get_position();
68 } else {
69 pos = current[i];
71 current_pos = min(current_pos, pos);
72 current[j] = pos;
73 if (i != j) pls[j] = pls[i];
74 ++j;
76 pls.resize(j);
77 RETURN(j != 0);
80 // A min-heap seems like an obvious optimisation here, but is only useful when
81 // handling clumps of terms - in particular when skip_to() advances all the
82 // sublists, the heap doesn't help (but we have the cost of rebuilding it, or N
83 // pop+push calls which has a worse complexity than rebuilding).
84 bool
85 OrPositionList::skip_to(Xapian::termpos termpos)
87 LOGCALL(EXPAND, bool, "OrPositionList::skip_to", termpos);
88 bool first = current.empty();
89 if (!first && termpos <= current_pos)
90 RETURN(true);
91 if (first) current.resize(pls.size());
92 current_pos = Xapian::termpos(-1);
93 size_t j = 0;
94 for (size_t i = 0; i != pls.size(); ++i) {
95 PositionList* pl = pls[i];
96 Xapian::termpos pos;
97 if (first || termpos > current[i]) {
98 if (!pl->skip_to(termpos))
99 continue;
100 pos = pl->get_position();
101 } else {
102 pos = current[i];
104 current_pos = min(current_pos, pos);
105 current[j] = pos;
106 if (i != j) pls[j] = pls[i];
107 ++j;
109 pls.resize(j);
110 RETURN(j != 0);