Website now in git not CVS
[xapian.git] / xapian-core / api / omenquireinternal.h
blob4b4c90294d50d3a0453ecb38a7961f90a20b1f99
1 /** @file omenquireinternal.h
2 * @brief Internals
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7 * Copyright 2009 Lemur Consulting Ltd
8 * Copyright 2011 Action Without Borders
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 * USA
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
35 #include <algorithm>
36 #include <cmath>
37 #include <map>
38 #include <set>
40 #include "weight/weightinternal.h"
42 using namespace std;
44 class OmExpand;
45 class MultiMatch;
47 namespace Xapian {
49 class TermIterator;
51 namespace Internal {
53 /** An item resulting from a query.
54 * This item contains the document id, and the weight calculated for
55 * the document.
57 class MSetItem {
58 public:
59 MSetItem(double wt_, Xapian::docid did_)
60 : wt(wt_), did(did_), collapse_count(0) {}
62 MSetItem(double wt_, Xapian::docid did_, const string &key_)
63 : wt(wt_), did(did_), collapse_key(key_), collapse_count(0) {}
65 MSetItem(double wt_, Xapian::docid did_, const string &key_,
66 Xapian::doccount collapse_count_)
67 : wt(wt_), did(did_), collapse_key(key_),
68 collapse_count(collapse_count_) {}
70 void swap(MSetItem & o) {
71 std::swap(wt, o.wt);
72 std::swap(did, o.did);
73 std::swap(collapse_key, o.collapse_key);
74 std::swap(collapse_count, o.collapse_count);
75 std::swap(sort_key, o.sort_key);
78 /** Weight calculated. */
79 double wt;
81 /** Document id. */
82 Xapian::docid did;
84 /** Value which was used to collapse upon.
86 * If the collapse option is not being used, this will always
87 * have a null value.
89 * If the collapse option is in use, this will contain the collapse
90 * key's value for this particular item. If the key is not present
91 * for this item, the value will be a null string. Only one instance
92 * of each key value (apart from the null string) will be present in
93 * the items in the returned Xapian::MSet.
95 string collapse_key;
97 /** Count of collapses done on collapse_key so far
99 * This is normally 0, and goes up for each collapse done
100 * It is not necessarily an indication of how many collapses
101 * might be done if an exhaustive match was done
103 Xapian::doccount collapse_count;
105 /** Used when sorting by value. */
106 string sort_key;
108 /// Return a string describing this object.
109 string get_description() const;
114 /** Internals of enquire system.
115 * This allows the implementation of Xapian::Enquire to be hidden and reference
116 * counted.
118 class Enquire::Internal : public Xapian::Internal::intrusive_base {
119 friend class MSet::Internal;
120 private:
121 /// The database which this enquire object uses.
122 const Xapian::Database db;
124 /// The user's query.
125 Query query;
127 /// The query length.
128 termcount qlen;
130 /// Copy not allowed
131 Internal(const Internal &);
132 /// Assignment not allowed
133 void operator=(const Internal &);
135 public:
136 typedef enum { REL, VAL, VAL_REL, REL_VAL } sort_setting;
138 Xapian::valueno collapse_key;
140 Xapian::doccount collapse_max;
142 Xapian::Enquire::docid_order order;
144 int percent_cutoff;
146 double weight_cutoff;
148 Xapian::valueno sort_key;
149 sort_setting sort_by;
150 bool sort_value_forward;
152 Xapian::Internal::opt_intrusive_ptr<KeyMaker> sorter;
154 double time_limit;
156 /** The weight to use for this query.
158 * This is mutable so that the default BM25Weight object can be
159 * created lazily when first required.
161 mutable Weight * weight;
163 /// The weighting scheme to use for query expansion.
164 std::string eweightname;
166 /// The parameter required for TradWeight query expansion.
167 double expand_k;
169 vector<Xapian::Internal::opt_intrusive_ptr<MatchSpy>> spies;
171 explicit Internal(const Xapian::Database &databases);
172 ~Internal();
174 /** Request a document from the database.
176 void request_doc(const Xapian::Internal::MSetItem &item) const;
178 /** Read a previously requested document from the database.
180 Xapian::Document read_doc(const Xapian::Internal::MSetItem &item) const;
182 Xapian::Document get_document(const Xapian::Internal::MSetItem &item) const;
184 void set_query(const Query & query_, termcount qlen_);
185 const Query & get_query() const;
186 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
187 Xapian::doccount check_at_least,
188 const RSet *omrset,
189 const MatchDecider *mdecider) const;
191 ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, int flags,
192 const ExpandDecider *edecider, double min_wt) const;
194 TermIterator get_matching_terms(Xapian::docid did) const;
195 TermIterator get_matching_terms(const Xapian::MSetIterator &it) const;
197 Xapian::doccount get_termfreq(const string &tname) const;
199 string get_description() const;
202 class MSet::Internal : public Xapian::Internal::intrusive_base {
203 public:
204 /// Factor to multiply weights by to convert them to percentages.
205 double percent_factor;
207 private:
208 /** The set of documents which have been requested but not yet
209 * collected.
211 mutable set<Xapian::doccount> requested_docs;
213 /// Cache of documents, indexed by MSet index.
214 mutable map<Xapian::doccount, Xapian::Document> indexeddocs;
216 /// Read and cache the documents so far requested.
217 void read_docs() const;
219 /// Copy not allowed
220 Internal(const Internal &);
221 /// Assignment not allowed
222 void operator=(const Internal &);
224 public:
225 /// Xapian::Enquire reference, for getting documents.
226 Xapian::Internal::intrusive_ptr<const Enquire::Internal> enquire;
228 /** Provides the term frequency and weight for each term in the query. */
229 Xapian::Weight::Internal * stats;
231 /// A list of items comprising the (selected part of the) MSet.
232 vector<Xapian::Internal::MSetItem> items;
234 /// Rank of first item in MSet.
235 Xapian::doccount firstitem;
237 Xapian::doccount matches_lower_bound;
239 Xapian::doccount matches_estimated;
241 Xapian::doccount matches_upper_bound;
243 Xapian::doccount uncollapsed_lower_bound;
245 Xapian::doccount uncollapsed_estimated;
247 Xapian::doccount uncollapsed_upper_bound;
249 double max_possible;
251 double max_attained;
253 Internal()
254 : percent_factor(0),
255 stats(NULL),
256 firstitem(0),
257 matches_lower_bound(0),
258 matches_estimated(0),
259 matches_upper_bound(0),
260 uncollapsed_lower_bound(0),
261 uncollapsed_estimated(0),
262 uncollapsed_upper_bound(0),
263 max_possible(0),
264 max_attained(0) {}
266 /// Note: destroys parameter items.
267 Internal(Xapian::doccount firstitem_,
268 Xapian::doccount matches_upper_bound_,
269 Xapian::doccount matches_lower_bound_,
270 Xapian::doccount matches_estimated_,
271 Xapian::doccount uncollapsed_upper_bound_,
272 Xapian::doccount uncollapsed_lower_bound_,
273 Xapian::doccount uncollapsed_estimated_,
274 double max_possible_,
275 double max_attained_,
276 vector<Xapian::Internal::MSetItem> &items_,
277 double percent_factor_)
278 : percent_factor(percent_factor_),
279 stats(NULL),
280 firstitem(firstitem_),
281 matches_lower_bound(matches_lower_bound_),
282 matches_estimated(matches_estimated_),
283 matches_upper_bound(matches_upper_bound_),
284 uncollapsed_lower_bound(uncollapsed_lower_bound_),
285 uncollapsed_estimated(uncollapsed_estimated_),
286 uncollapsed_upper_bound(uncollapsed_upper_bound_),
287 max_possible(max_possible_),
288 max_attained(max_attained_) {
289 std::swap(items, items_);
292 ~Internal() { delete stats; }
294 /// get a document by index in MSet, via the cache.
295 Xapian::Document get_doc_by_index(Xapian::doccount index) const;
297 /// Converts a weight to a percentage weight
298 int convert_to_percent_internal(double wt) const;
300 std::string snippet(const std::string & text, size_t length,
301 const Xapian::Stem & stemmer,
302 unsigned flags,
303 const std::string & hi_start,
304 const std::string & hi_end,
305 const std::string & omit) const;
307 /// Return a string describing this object.
308 string get_description() const;
310 /** Fetch items specified into the document cache.
312 void fetch_items(Xapian::doccount first, Xapian::doccount last) const;
315 class RSet::Internal : public Xapian::Internal::intrusive_base {
316 friend class Xapian::RSet;
318 private:
319 /// Items in the relevance set.
320 set<Xapian::docid> items;
322 public:
323 const set<Xapian::docid> & get_items() const { return items; }
325 /// Return a string describing this object.
326 string get_description() const;
331 #endif // OM_HGUARD_OMENQUIREINTERNAL_H