Pass doclen and unique_terms down from PostListTree
[xapian.git] / xapian-core / matcher / phrasepostlist.cc
blob719e05d9c98d13abc48bc4d9acc428229a9194e5
1 /** @file phrasepostlist.cc
2 * @brief Return docs containing terms forming a particular phrase.
3 */
4 /* Copyright (C) 2006,2007,2009,2010,2011,2014,2015,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "phrasepostlist.h"
25 #include "debuglog.h"
26 #include "backends/positionlist.h"
27 #include "omassert.h"
28 #include "str.h"
30 #include <algorithm>
31 #include <vector>
33 using namespace std;
35 PhrasePostList::PhrasePostList(PostList *source_,
36 Xapian::termpos window_,
37 const vector<PostList*>::const_iterator &terms_begin,
38 const vector<PostList*>::const_iterator &terms_end,
39 PostListTree* pltree_)
40 : SelectPostList(source_, pltree_),
41 window(window_),
42 terms(terms_begin, terms_end)
44 size_t n = terms.size();
45 Assert(n > 1);
46 poslists = new PositionList*[n];
49 PhrasePostList::~PhrasePostList()
51 delete [] poslists;
54 void
55 PhrasePostList::start_position_list(unsigned i)
57 poslists[i] = terms[i]->read_position_list();
60 bool
61 PhrasePostList::test_doc()
63 LOGCALL(MATCH, bool, "PhrasePostList::test_doc", NO_ARGS);
65 start_position_list(0);
66 if (!poslists[0]->next())
67 RETURN(false);
69 unsigned read_hwm = 0;
70 Xapian::termpos b;
71 do {
72 Xapian::termpos base = poslists[0]->get_position();
73 Xapian::termpos pos = base;
74 unsigned i = 0;
75 do {
76 if (++i == terms.size()) RETURN(true);
77 if (i > read_hwm) {
78 read_hwm = i;
79 start_position_list(i);
81 if (!poslists[i]->skip_to(pos + 1))
82 RETURN(false);
83 pos = poslists[i]->get_position();
84 b = pos + (terms.size() - i);
85 } while (b - base <= window);
86 // Advance the start of the window to the first position it could match
87 // in given the current position of term i.
88 } while (poslists[0]->skip_to(b - window));
89 RETURN(false);
92 Xapian::termcount
93 PhrasePostList::get_wdf() const
95 // Calculate an estimate for the wdf of a phrase postlist.
97 // We use the minimum wdf of a sub-postlist as our estimate. See the
98 // comment in NearPostList::get_wdf() for justification of this estimate.
99 vector<PostList *>::const_iterator i = terms.begin();
100 Xapian::termcount wdf = (*i)->get_wdf();
101 while (++i != terms.end()) {
102 wdf = min(wdf, (*i)->get_wdf());
104 return wdf;
107 Xapian::doccount
108 PhrasePostList::get_termfreq_est() const
110 // It's hard to estimate how many times the phrase will occur as
111 // it depends a lot on the phrase, but usually the phrase will
112 // occur significantly less often than the individual terms.
113 return pl->get_termfreq_est() / 3;
116 TermFreqs
117 PhrasePostList::get_termfreq_est_using_stats(
118 const Xapian::Weight::Internal & stats) const
120 LOGCALL(MATCH, TermFreqs, "PhrasePostList::get_termfreq_est_using_stats", stats);
121 // No idea how to estimate this - do the same as get_termfreq_est() for
122 // now.
123 TermFreqs result(pl->get_termfreq_est_using_stats(stats));
124 result.termfreq /= 3;
125 result.reltermfreq /= 3;
126 RETURN(result);
129 string
130 PhrasePostList::get_description() const
132 string m = "(Phrase ";
133 m += str(window);
134 m += ' ';
135 m += pl->get_description();
136 m += ")";
137 return m;