Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / api / postlist.h
blob9bf4e63099d6d6a2729108083032ed60aa5f4e58
1 /** @file postlist.h
2 * @brief Abstract base class for postlists.
3 */
4 /* Copyright (C) 2007,2008,2009,2011,2015 Olly Betts
5 * Copyright (C) 2009 Lemur Consulting Ltd
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_POSTLIST_H
23 #define XAPIAN_INCLUDED_POSTLIST_H
25 #include <string>
27 #include "xapian/intrusive_ptr.h"
28 #include <xapian/types.h>
29 #include <xapian/postingiterator.h>
31 #include "backends/positionlist.h"
32 #include "weight/weightinternal.h"
34 /// Abstract base class for postlists.
35 class Xapian::PostingIterator::Internal : public Xapian::Internal::intrusive_base {
36 /// Don't allow assignment.
37 void operator=(const Internal &);
39 /// Don't allow copying.
40 Internal(const Internal &);
42 protected:
43 /// Only constructable as a base class for derived classes.
44 Internal() { }
46 public:
47 /** We have virtual methods and want to be able to delete derived classes
48 * using a pointer to the base class, so we need a virtual destructor.
50 virtual ~Internal();
52 /// Get a lower bound on the number of documents indexed by this term.
53 virtual Xapian::doccount get_termfreq_min() const = 0;
55 /// Get an upper bound on the number of documents indexed by this term.
56 virtual Xapian::doccount get_termfreq_max() const = 0;
58 /** Get an estimate of the number of documents indexed by this term.
60 * It should always be true that:
61 * get_termfreq_min() <= get_termfreq_est() <= get_termfreq_max()
63 virtual Xapian::doccount get_termfreq_est() const = 0;
65 /** Get an estimate for the termfreq and reltermfreq, given the stats.
67 * The frequencies may be for a combination of databases, or for just the
68 * relevant documents, so the results need not lie in the bounds given by
69 * get_termfreq_min() and get_termfreq_max().
71 virtual TermFreqs get_termfreq_est_using_stats(
72 const Xapian::Weight::Internal & stats) const;
74 /// Return an upper bound on what get_weight() can return.
75 virtual double get_maxweight() const = 0;
77 /// Return the current docid.
78 virtual Xapian::docid get_docid() const = 0;
80 /// Return the length of current document.
81 virtual Xapian::termcount get_doclength() const = 0;
82 /* FIXME: Once flint has been retired, we should probably strip out
83 * PostList::get_doclength() and just fetch it from the DB directly.
86 /// Return the number of unique terms in the current document.
87 virtual Xapian::termcount get_unique_terms() const = 0;
89 /** Return the wdf for the document at the current position.
91 * The default implementation throws Xapian::UnimplementedError.
93 virtual Xapian::termcount get_wdf() const;
95 /// Return the weight contribution for the current position.
96 virtual double get_weight() const = 0;
98 virtual const std::string * get_sort_key() const;
100 /** If the collapse key is already known, return it.
102 * This is implemented by MSetPostList (and MergePostList). Other
103 * subclasses rely on the default implementation which just returns
104 * NULL.
106 virtual const std::string * get_collapse_key() const;
108 /// Return true if the current position is past the last entry in this list.
109 virtual bool at_end() const = 0;
111 /** Recalculate the upper bound on what get_weight() can return.
113 * If the tree has pruned, get_maxweight() may use cached values. Calling
114 * this method instead forces a full recalculation.
116 * Note that this method may be called after the postlist has reached the
117 * end. In this situation, the method should return 0.
119 virtual double recalc_maxweight() = 0;
121 /** Read the position list for the term in the current document and
122 * return a pointer to it (owned by the PostList).
124 * The default implementation throws Xapian::UnimplementedError.
126 virtual PositionList * read_position_list();
128 /** Read the position list for the term in the current document and
129 * return a pointer to it (not owned by the PostList).
131 * The default implementation throws Xapian::UnimplementedError.
133 virtual PositionList * open_position_list() const;
135 /** Advance the current position to the next document in the postlist.
137 * The list starts before the first entry in the list, so next()
138 * must be called before any methods which need the context of
139 * the current position.
141 * @param w_min The minimum weight contribution that is needed (this is
142 * just a hint which PostList subclasses may ignore).
144 * @return If a non-NULL pointer is returned, then the caller should
145 * substitute the returned pointer for its pointer to us, and then
146 * delete us. This "pruning" can only happen for a non-leaf
147 * subclass of this class.
149 virtual Internal * next(double w_min) = 0;
151 /** Skip forward to the specified docid.
153 * If the specified docid isn't in the list, position ourselves on the
154 * first document after it (or at_end() if no greater docids are present).
156 * @param w_min The minimum weight contribution that is needed (this is
157 * just a hint which PostList subclasses may ignore).
159 * @return If a non-NULL pointer is returned, then the caller should
160 * substitute the returned pointer for its pointer to us, and then
161 * delete us. This "pruning" can only happen for a non-leaf
162 * subclass of this class.
164 virtual Internal * skip_to(Xapian::docid, double w_min) = 0;
166 /** Check if the specified docid occurs in this postlist.
168 * The caller is required to ensure that the specified @a docid actually
169 * exists in the database.
171 * This method acts like skip_to() if that can be done at little extra
172 * cost, in which case it then sets @a valid to true.
174 * Otherwise it simply checks if a particular docid is present. If it
175 * is, @a valid is set to true. If it isn't, it sets @a valid to
176 * false, and leaves the position unspecified (and hence the result of
177 * calling methods which depends on the current position, such as
178 * get_docid(), are also unspecified). In this state, next() will
179 * advance to the first matching position after @a docid, and skip_to()
180 * will act as it would if the position was the first matching position
181 * after @a docid.
183 * The default implementation calls skip_to().
185 virtual Internal * check(Xapian::docid did, double w_min, bool &valid);
187 /** Advance the current position to the next document in the postlist.
189 * Any weight contribution is acceptable.
191 Internal * next() { return next(0.0); }
193 /** Skip forward to the specified docid.
195 * Any weight contribution is acceptable.
197 Internal * skip_to(Xapian::docid did) { return skip_to(did, 0.0); }
199 /// Count the number of leaf subqueries which match at the current position.
200 virtual Xapian::termcount count_matching_subqs() const;
202 /// Return a string description of this object.
203 virtual std::string get_description() const = 0;
206 // In the external API headers, this class is Xapian::PostingIterator::Internal,
207 // but in the library code it's still known as "PostList" in most places.
208 typedef Xapian::PostingIterator::Internal PostList;
210 #endif // XAPIAN_INCLUDED_POSTLIST_H