Rename Collapser::entries() to get_entries()
[xapian.git] / xapian-core / matcher / collapser.h
blob3439fa9f2cff77e3d005acf94b7e0e5b1a8dac18
1 /** @file collapser.h
2 * @brief Collapse documents with the same collapse key during the match.
3 */
4 /* Copyright (C) 2009,2011,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_COLLAPSER_H
22 #define XAPIAN_INCLUDED_COLLAPSER_H
24 #include "backends/documentinternal.h"
25 #include "msetcmp.h"
26 #include "api/postlist.h"
27 #include "api/result.h"
29 #include <unordered_map>
30 #include <vector>
32 /// Enumeration reporting how a document was handled by the Collapser.
33 typedef enum {
34 EMPTY,
35 ADDED,
36 REJECTED,
37 REPLACED
38 } collapse_result;
40 /// Class tracking information for a given value of the collapse key.
41 class CollapseData {
42 /** Currently kept MSet entries for this value of the collapse key.
44 * If collapse_max > 1, then this is a min-heap once collapse_count > 0.
46 * FIXME: We expect collapse_max to be small, so perhaps we should
47 * preallocate space for that many entries and/or allocate space in
48 * larger blocks to divvy up?
50 std::vector<Result> items;
52 /// The highest weight of a document we've rejected.
53 double next_best_weight;
55 /// The number of documents we've rejected.
56 Xapian::doccount collapse_count;
58 public:
59 /// Construct with the given Result @a item.
60 explicit CollapseData(const Result& item)
61 : items(1, item), next_best_weight(0), collapse_count(0) {
62 items[0].set_collapse_key(std::string());
65 /** Handle a new Result with this collapse key value.
67 * @param item The new item.
68 * @param collapse_max Max no. of items for each collapse key value.
69 * @param mcmp Result comparison functor.
70 * @param[out] old_item Replaced item (when REPLACED is returned).
72 * @return How @a item was handled: ADDED, REJECTED or REPLACED.
74 collapse_result add_item(const Result& item,
75 Xapian::doccount collapse_max,
76 const MSetCmp & mcmp,
77 Result& old_item);
79 /// The highest weight of a document we've rejected.
80 double get_next_best_weight() const { return next_best_weight; }
82 /// The number of documents we've rejected.
83 Xapian::doccount get_collapse_count() const { return collapse_count; }
86 /// The Collapser class tracks collapse keys and the documents they match.
87 class Collapser {
88 /// Map from collapse key values to the items we're keeping for them.
89 std::unordered_map<std::string, CollapseData> table;
91 /// How many items we're currently keeping in @a table.
92 Xapian::doccount entry_count = 0;
94 /** How many documents have we seen without a collapse key?
96 * We use this statistic to improve matches_lower_bound.
98 Xapian::doccount no_collapse_key = 0;
100 /** How many documents with duplicate collapse keys we have ignored.
102 * We use this statistic to improve matches_estimated (by considering
103 * the rate of collapsing) and matches_upper_bound.
105 Xapian::doccount dups_ignored = 0;
107 /** How many documents we've considered for collapsing.
109 * We use this statistic to improve matches_estimated (by considering
110 * the rate of collapsing).
112 Xapian::doccount docs_considered = 0;
114 /** The value slot we're getting collapse keys from. */
115 Xapian::valueno slot;
117 /** The maximum number of items to keep for each collapse key value. */
118 Xapian::doccount collapse_max;
120 public:
121 /// Replaced item when REPLACED is returned by @a collapse().
122 Result old_item;
124 Collapser(Xapian::valueno slot_, Xapian::doccount collapse_max_)
125 : slot(slot_), collapse_max(collapse_max_), old_item(0, 0) { }
127 /// Return true if collapsing is active for this match.
128 operator bool() const { return collapse_max != 0; }
130 /** Handle a new Result.
132 * @param item The new item.
133 * @param key_ptr If non-NULL, points to the collapse key (this happens
134 * for a remote match).
135 * @param doc Document for getting values.
136 * @param mcmp Result comparison functor.
138 * @return How @a item was handled: EMPTY, ADDED, REJECTED or REPLACED.
140 collapse_result process(Result& item,
141 const std::string* key_ptr,
142 Xapian::Document::Internal & vsdoc,
143 const MSetCmp & mcmp);
145 Xapian::doccount get_collapse_count(const std::string & collapse_key,
146 int percent_cutoff,
147 double min_weight) const;
149 Xapian::doccount get_docs_considered() const { return docs_considered; }
151 Xapian::doccount get_dups_ignored() const { return dups_ignored; }
153 Xapian::doccount get_entries() const { return entry_count; }
155 Xapian::doccount get_matches_lower_bound() const;
157 bool empty() const { return table.empty(); }
160 #endif // XAPIAN_INCLUDED_COLLAPSER_H