2 * @brief Abstract base class for postlists.
4 /* Copyright (C) 2007,2008,2009,2011,2015 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_POSTLIST_H
23 #define XAPIAN_INCLUDED_POSTLIST_H
27 #include "xapian/intrusive_ptr.h"
28 #include <xapian/types.h>
29 #include <xapian/postingiterator.h>
31 #include "backends/positionlist.h"
32 #include "weight/weightinternal.h"
34 /// Abstract base class for postlists.
35 class Xapian::PostingIterator::Internal
: public Xapian::Internal::intrusive_base
{
36 /// Don't allow assignment.
37 void operator=(const Internal
&);
39 /// Don't allow copying.
40 Internal(const Internal
&);
43 /// Only constructable as a base class for derived classes.
47 /** We have virtual methods and want to be able to delete derived classes
48 * using a pointer to the base class, so we need a virtual destructor.
52 /// Get a lower bound on the number of documents indexed by this term.
53 virtual Xapian::doccount
get_termfreq_min() const = 0;
55 /// Get an upper bound on the number of documents indexed by this term.
56 virtual Xapian::doccount
get_termfreq_max() const = 0;
58 /** Get an estimate of the number of documents indexed by this term.
60 * It should always be true that:
61 * get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
63 virtual Xapian::doccount
get_termfreq_est() const = 0;
65 /** Get an estimate for the termfreq and reltermfreq, given the stats.
67 * The frequencies may be for a combination of databases, or for just the
68 * relevant documents, so the results need not lie in the bounds given by
69 * get_termfreq_min() and get_termfreq_max().
71 virtual TermFreqs
get_termfreq_est_using_stats(
72 const Xapian::Weight::Internal
& stats
) const;
74 /// Return an upper bound on what get_weight() can return.
75 virtual double get_maxweight() const = 0;
77 /// Return the current docid.
78 virtual Xapian::docid
get_docid() const = 0;
80 /// Return the length of current document.
81 virtual Xapian::termcount
get_doclength() const = 0;
82 /* FIXME: Once flint has been retired, we should probably strip out
83 * PostList::get_doclength() and just fetch it from the DB directly.
86 /// Return the number of unique terms in the current document.
87 virtual Xapian::termcount
get_unique_terms() const = 0;
89 /** Return the wdf for the document at the current position.
91 * The default implementation throws Xapian::UnimplementedError.
93 virtual Xapian::termcount
get_wdf() const;
95 /// Return the weight contribution for the current position.
96 virtual double get_weight() const = 0;
98 virtual const std::string
* get_sort_key() const;
100 /** If the collapse key is already known, return it.
102 * This is implemented by MSetPostList (and MergePostList). Other
103 * subclasses rely on the default implementation which just returns
106 virtual const std::string
* get_collapse_key() const;
108 /// Return true if the current position is past the last entry in this list.
109 virtual bool at_end() const = 0;
111 /** Recalculate the upper bound on what get_weight() can return.
113 * If the tree has pruned, get_maxweight() may use cached values. Calling
114 * this method instead forces a full recalculation.
116 * Note that this method may be called after the postlist has reached the
117 * end. In this situation, the method should return 0.
119 virtual double recalc_maxweight() = 0;
121 /** Read the position list for the term in the current document and
122 * return a pointer to it (owned by the PostList).
124 * The default implementation throws Xapian::UnimplementedError.
126 virtual PositionList
* read_position_list();
128 /** Read the position list for the term in the current document and
129 * return a pointer to it (not owned by the PostList).
131 * The default implementation throws Xapian::UnimplementedError.
133 virtual PositionList
* open_position_list() const;
135 /** Advance the current position to the next document in the postlist.
137 * The list starts before the first entry in the list, so next()
138 * must be called before any methods which need the context of
139 * the current position.
141 * @param w_min The minimum weight contribution that is needed (this is
142 * just a hint which PostList subclasses may ignore).
144 * @return If a non-NULL pointer is returned, then the caller should
145 * substitute the returned pointer for its pointer to us, and then
146 * delete us. This "pruning" can only happen for a non-leaf
147 * subclass of this class.
149 virtual Internal
* next(double w_min
) = 0;
151 /** Skip forward to the specified docid.
153 * If the specified docid isn't in the list, position ourselves on the
154 * first document after it (or at_end() if no greater docids are present).
156 * @param w_min The minimum weight contribution that is needed (this is
157 * just a hint which PostList subclasses may ignore).
159 * @return If a non-NULL pointer is returned, then the caller should
160 * substitute the returned pointer for its pointer to us, and then
161 * delete us. This "pruning" can only happen for a non-leaf
162 * subclass of this class.
164 virtual Internal
* skip_to(Xapian::docid
, double w_min
) = 0;
166 /** Check if the specified docid occurs in this postlist.
168 * The caller is required to ensure that the specified @a docid actually
169 * exists in the database.
171 * This method acts like skip_to() if that can be done at little extra
172 * cost, in which case it then sets @a valid to true.
174 * Otherwise it simply checks if a particular docid is present. If it
175 * is, @a valid is set to true. If it isn't, it sets @a valid to
176 * false, and leaves the position unspecified (and hence the result of
177 * calling methods which depends on the current position, such as
178 * get_docid(), are also unspecified). In this state, next() will
179 * advance to the first matching position after @a docid, and skip_to()
180 * will act as it would if the position was the first matching position
183 * The default implementation calls skip_to().
185 virtual Internal
* check(Xapian::docid did
, double w_min
, bool &valid
);
187 /** Advance the current position to the next document in the postlist.
189 * Any weight contribution is acceptable.
191 Internal
* next() { return next(0.0); }
193 /** Skip forward to the specified docid.
195 * Any weight contribution is acceptable.
197 Internal
* skip_to(Xapian::docid did
) { return skip_to(did
, 0.0); }
199 /// Count the number of leaf subqueries which match at the current position.
200 virtual Xapian::termcount
count_matching_subqs() const;
202 /// Return a string description of this object.
203 virtual std::string
get_description() const = 0;
206 // In the external API headers, this class is Xapian::PostingIterator::Internal,
207 // but in the library code it's still known as "PostList" in most places.
208 typedef Xapian::PostingIterator::Internal PostList
;
210 #endif // XAPIAN_INCLUDED_POSTLIST_H