1 /** @file phrasepostlist.cc
2 * @brief Return docs containing terms forming a particular phrase.
4 /* Copyright (C) 2006,2007,2009,2010,2011,2014,2015,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "phrasepostlist.h"
26 #include "backends/positionlist.h"
35 PhrasePostList::PhrasePostList(PostList
*source_
,
36 Xapian::termpos window_
,
37 const vector
<PostList
*>::const_iterator
&terms_begin
,
38 const vector
<PostList
*>::const_iterator
&terms_end
,
39 PostListTree
* pltree_
)
40 : SelectPostList(source_
, pltree_
),
42 terms(terms_begin
, terms_end
)
44 size_t n
= terms
.size();
46 poslists
= new PositionList
*[n
];
49 PhrasePostList::~PhrasePostList()
55 PhrasePostList::start_position_list(unsigned i
)
57 poslists
[i
] = terms
[i
]->read_position_list();
61 PhrasePostList::test_doc()
63 LOGCALL(MATCH
, bool, "PhrasePostList::test_doc", NO_ARGS
);
65 start_position_list(0);
66 if (!poslists
[0]->next())
69 unsigned read_hwm
= 0;
72 Xapian::termpos base
= poslists
[0]->get_position();
73 Xapian::termpos pos
= base
;
76 if (++i
== terms
.size()) RETURN(true);
79 start_position_list(i
);
81 if (!poslists
[i
]->skip_to(pos
+ 1))
83 pos
= poslists
[i
]->get_position();
84 b
= pos
+ (terms
.size() - i
);
85 } while (b
- base
<= window
);
86 // Advance the start of the window to the first position it could match
87 // in given the current position of term i.
88 } while (poslists
[0]->skip_to(b
- window
));
93 PhrasePostList::get_wdf() const
95 // Calculate an estimate for the wdf of a phrase postlist.
97 // We use the minimum wdf of a sub-postlist as our estimate. See the
98 // comment in NearPostList::get_wdf() for justification of this estimate.
99 vector
<PostList
*>::const_iterator i
= terms
.begin();
100 Xapian::termcount wdf
= (*i
)->get_wdf();
101 while (++i
!= terms
.end()) {
102 wdf
= min(wdf
, (*i
)->get_wdf());
108 PhrasePostList::get_termfreq_est() const
110 // It's hard to estimate how many times the phrase will occur as
111 // it depends a lot on the phrase, but usually the phrase will
112 // occur significantly less often than the individual terms.
113 return pl
->get_termfreq_est() / 3;
117 PhrasePostList::get_termfreq_est_using_stats(
118 const Xapian::Weight::Internal
& stats
) const
120 LOGCALL(MATCH
, TermFreqs
, "PhrasePostList::get_termfreq_est_using_stats", stats
);
121 // No idea how to estimate this - do the same as get_termfreq_est() for
123 TermFreqs
result(pl
->get_termfreq_est_using_stats(stats
));
124 result
.termfreq
/= 3;
125 result
.reltermfreq
/= 3;
130 PhrasePostList::get_description() const
132 string m
= "(Phrase ";
135 m
+= pl
->get_description();