1 /** @file omenquireinternal.h
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2001,2002 Ananova Ltd
6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016 Olly Betts
7 * Copyright 2009 Lemur Consulting Ltd
8 * Copyright 2011 Action Without Borders
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 #ifndef OM_HGUARD_OMENQUIREINTERNAL_H
27 #define OM_HGUARD_OMENQUIREINTERNAL_H
29 #include "xapian/database.h"
30 #include "xapian/document.h"
31 #include "xapian/enquire.h"
32 #include "xapian/query.h"
33 #include "xapian/keymaker.h"
40 #include "weight/weightinternal.h"
53 /** An item resulting from a query.
54 * This item contains the document id, and the weight calculated for
59 MSetItem(double wt_
, Xapian::docid did_
)
60 : wt(wt_
), did(did_
), collapse_count(0) {}
62 MSetItem(double wt_
, Xapian::docid did_
, const string
&key_
)
63 : wt(wt_
), did(did_
), collapse_key(key_
), collapse_count(0) {}
65 MSetItem(double wt_
, Xapian::docid did_
, const string
&key_
,
66 Xapian::doccount collapse_count_
)
67 : wt(wt_
), did(did_
), collapse_key(key_
),
68 collapse_count(collapse_count_
) {}
70 void swap(MSetItem
& o
) {
72 std::swap(did
, o
.did
);
73 std::swap(collapse_key
, o
.collapse_key
);
74 std::swap(collapse_count
, o
.collapse_count
);
75 std::swap(sort_key
, o
.sort_key
);
78 /** Weight calculated. */
84 /** Value which was used to collapse upon.
86 * If the collapse option is not being used, this will always
89 * If the collapse option is in use, this will contain the collapse
90 * key's value for this particular item. If the key is not present
91 * for this item, the value will be a null string. Only one instance
92 * of each key value (apart from the null string) will be present in
93 * the items in the returned Xapian::MSet.
97 /** Count of collapses done on collapse_key so far
99 * This is normally 0, and goes up for each collapse done
100 * It is not necessarily an indication of how many collapses
101 * might be done if an exhaustive match was done
103 Xapian::doccount collapse_count
;
105 /** Used when sorting by value. */
108 /// Return a string describing this object.
109 string
get_description() const;
114 /** Internals of enquire system.
115 * This allows the implementation of Xapian::Enquire to be hidden and reference
118 class Enquire::Internal
: public Xapian::Internal::intrusive_base
{
119 friend class MSet::Internal
;
121 /// The database which this enquire object uses.
122 const Xapian::Database db
;
124 /// The user's query.
127 /// The query length.
131 Internal(const Internal
&);
132 /// Assignment not allowed
133 void operator=(const Internal
&);
136 typedef enum { REL
, VAL
, VAL_REL
, REL_VAL
} sort_setting
;
138 Xapian::valueno collapse_key
;
140 Xapian::doccount collapse_max
;
142 Xapian::Enquire::docid_order order
;
146 double weight_cutoff
;
148 Xapian::valueno sort_key
;
149 sort_setting sort_by
;
150 bool sort_value_forward
;
152 Xapian::Internal::opt_intrusive_ptr
<KeyMaker
> sorter
;
156 /** The weight to use for this query.
158 * This is mutable so that the default BM25Weight object can be
159 * created lazily when first required.
161 mutable Weight
* weight
;
163 /// The weighting scheme to use for query expansion.
164 std::string eweightname
;
166 /// The parameter required for TradWeight query expansion.
169 vector
<Xapian::Internal::opt_intrusive_ptr
<MatchSpy
>> spies
;
171 explicit Internal(const Xapian::Database
&databases
);
174 /** Request a document from the database.
176 void request_doc(const Xapian::Internal::MSetItem
&item
) const;
178 /** Read a previously requested document from the database.
180 Xapian::Document
read_doc(const Xapian::Internal::MSetItem
&item
) const;
182 Xapian::Document
get_document(const Xapian::Internal::MSetItem
&item
) const;
184 void set_query(const Query
& query_
, termcount qlen_
);
185 const Query
& get_query() const;
186 MSet
get_mset(Xapian::doccount first
, Xapian::doccount maxitems
,
187 Xapian::doccount check_at_least
,
189 const MatchDecider
*mdecider
) const;
191 ESet
get_eset(Xapian::termcount maxitems
, const RSet
& omrset
, int flags
,
192 const ExpandDecider
*edecider
, double min_wt
) const;
194 TermIterator
get_matching_terms(Xapian::docid did
) const;
195 TermIterator
get_matching_terms(const Xapian::MSetIterator
&it
) const;
197 Xapian::doccount
get_termfreq(const string
&tname
) const;
199 string
get_description() const;
202 class MSet::Internal
: public Xapian::Internal::intrusive_base
{
204 /// Factor to multiply weights by to convert them to percentages.
205 double percent_factor
;
208 /** The set of documents which have been requested but not yet
211 mutable set
<Xapian::doccount
> requested_docs
;
213 /// Cache of documents, indexed by MSet index.
214 mutable map
<Xapian::doccount
, Xapian::Document
> indexeddocs
;
216 /// Read and cache the documents so far requested.
217 void read_docs() const;
220 Internal(const Internal
&);
221 /// Assignment not allowed
222 void operator=(const Internal
&);
225 /// Xapian::Enquire reference, for getting documents.
226 Xapian::Internal::intrusive_ptr
<const Enquire::Internal
> enquire
;
228 /** Provides the term frequency and weight for each term in the query. */
229 Xapian::Weight::Internal
* stats
;
231 /// A list of items comprising the (selected part of the) MSet.
232 vector
<Xapian::Internal::MSetItem
> items
;
234 /// Rank of first item in MSet.
235 Xapian::doccount firstitem
;
237 Xapian::doccount matches_lower_bound
;
239 Xapian::doccount matches_estimated
;
241 Xapian::doccount matches_upper_bound
;
243 Xapian::doccount uncollapsed_lower_bound
;
245 Xapian::doccount uncollapsed_estimated
;
247 Xapian::doccount uncollapsed_upper_bound
;
257 matches_lower_bound(0),
258 matches_estimated(0),
259 matches_upper_bound(0),
260 uncollapsed_lower_bound(0),
261 uncollapsed_estimated(0),
262 uncollapsed_upper_bound(0),
266 /// Note: destroys parameter items.
267 Internal(Xapian::doccount firstitem_
,
268 Xapian::doccount matches_upper_bound_
,
269 Xapian::doccount matches_lower_bound_
,
270 Xapian::doccount matches_estimated_
,
271 Xapian::doccount uncollapsed_upper_bound_
,
272 Xapian::doccount uncollapsed_lower_bound_
,
273 Xapian::doccount uncollapsed_estimated_
,
274 double max_possible_
,
275 double max_attained_
,
276 vector
<Xapian::Internal::MSetItem
> &items_
,
277 double percent_factor_
)
278 : percent_factor(percent_factor_
),
280 firstitem(firstitem_
),
281 matches_lower_bound(matches_lower_bound_
),
282 matches_estimated(matches_estimated_
),
283 matches_upper_bound(matches_upper_bound_
),
284 uncollapsed_lower_bound(uncollapsed_lower_bound_
),
285 uncollapsed_estimated(uncollapsed_estimated_
),
286 uncollapsed_upper_bound(uncollapsed_upper_bound_
),
287 max_possible(max_possible_
),
288 max_attained(max_attained_
) {
289 std::swap(items
, items_
);
292 ~Internal() { delete stats
; }
294 /// get a document by index in MSet, via the cache.
295 Xapian::Document
get_doc_by_index(Xapian::doccount index
) const;
297 /// Converts a weight to a percentage weight
298 int convert_to_percent_internal(double wt
) const;
300 std::string
snippet(const std::string
& text
, size_t length
,
301 const Xapian::Stem
& stemmer
,
303 const std::string
& hi_start
,
304 const std::string
& hi_end
,
305 const std::string
& omit
) const;
307 /// Return a string describing this object.
308 string
get_description() const;
310 /** Fetch items specified into the document cache.
312 void fetch_items(Xapian::doccount first
, Xapian::doccount last
) const;
315 class RSet::Internal
: public Xapian::Internal::intrusive_base
{
316 friend class Xapian::RSet
;
319 /// Items in the relevance set.
320 set
<Xapian::docid
> items
;
323 const set
<Xapian::docid
> & get_items() const { return items
; }
325 /// Return a string describing this object.
326 string
get_description() const;
331 #endif // OM_HGUARD_OMENQUIREINTERNAL_H