2 * @brief Collapse documents with the same collapse key during the match.
4 /* Copyright (C) 2009,2011,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_COLLAPSER_H
22 #define XAPIAN_INCLUDED_COLLAPSER_H
24 #include "backends/documentinternal.h"
26 #include "api/postlist.h"
27 #include "api/result.h"
29 #include <unordered_map>
32 /// Enumeration reporting how a document was handled by the Collapser.
40 /// Class tracking information for a given value of the collapse key.
42 /** Currently kept MSet entries for this value of the collapse key.
44 * If collapse_max > 1, then this is a min-heap once collapse_count > 0.
46 * FIXME: We expect collapse_max to be small, so perhaps we should
47 * preallocate space for that many entries and/or allocate space in
48 * larger blocks to divvy up?
50 std::vector
<Result
> items
;
52 /// The highest weight of a document we've rejected.
53 double next_best_weight
;
55 /// The number of documents we've rejected.
56 Xapian::doccount collapse_count
;
59 /// Construct with the given Result @a item.
60 explicit CollapseData(const Result
& item
)
61 : items(1, item
), next_best_weight(0), collapse_count(0) {
62 items
[0].set_collapse_key(std::string());
65 /** Handle a new Result with this collapse key value.
67 * @param item The new item.
68 * @param collapse_max Max no. of items for each collapse key value.
69 * @param mcmp Result comparison functor.
70 * @param[out] old_item Replaced item (when REPLACED is returned).
72 * @return How @a item was handled: ADDED, REJECTED or REPLACED.
74 collapse_result
add_item(const Result
& item
,
75 Xapian::doccount collapse_max
,
79 /// The highest weight of a document we've rejected.
80 double get_next_best_weight() const { return next_best_weight
; }
82 /// The number of documents we've rejected.
83 Xapian::doccount
get_collapse_count() const { return collapse_count
; }
86 /// The Collapser class tracks collapse keys and the documents they match.
88 /// Map from collapse key values to the items we're keeping for them.
89 std::unordered_map
<std::string
, CollapseData
> table
;
91 /// How many items we're currently keeping in @a table.
92 Xapian::doccount entry_count
= 0;
94 /** How many documents have we seen without a collapse key?
96 * We use this statistic to improve matches_lower_bound.
98 Xapian::doccount no_collapse_key
= 0;
100 /** How many documents with duplicate collapse keys we have ignored.
102 * We use this statistic to improve matches_estimated (by considering
103 * the rate of collapsing) and matches_upper_bound.
105 Xapian::doccount dups_ignored
= 0;
107 /** How many documents we've considered for collapsing.
109 * We use this statistic to improve matches_estimated (by considering
110 * the rate of collapsing).
112 Xapian::doccount docs_considered
= 0;
114 /** The value slot we're getting collapse keys from. */
115 Xapian::valueno slot
;
117 /** The maximum number of items to keep for each collapse key value. */
118 Xapian::doccount collapse_max
;
121 /// Replaced item when REPLACED is returned by @a collapse().
124 Collapser(Xapian::valueno slot_
, Xapian::doccount collapse_max_
)
125 : slot(slot_
), collapse_max(collapse_max_
), old_item(0, 0) { }
127 /// Return true if collapsing is active for this match.
128 operator bool() const { return collapse_max
!= 0; }
130 /** Handle a new Result.
132 * @param item The new item.
133 * @param key_ptr If non-NULL, points to the collapse key (this happens
134 * for a remote match).
135 * @param doc Document for getting values.
136 * @param mcmp Result comparison functor.
138 * @return How @a item was handled: EMPTY, ADDED, REJECTED or REPLACED.
140 collapse_result
process(Result
& item
,
141 const std::string
* key_ptr
,
142 Xapian::Document::Internal
& vsdoc
,
143 const MSetCmp
& mcmp
);
145 Xapian::doccount
get_collapse_count(const std::string
& collapse_key
,
147 double min_weight
) const;
149 Xapian::doccount
get_docs_considered() const { return docs_considered
; }
151 Xapian::doccount
get_dups_ignored() const { return dups_ignored
; }
153 Xapian::doccount
get_entries() const { return entry_count
; }
155 Xapian::doccount
get_matches_lower_bound() const;
157 bool empty() const { return table
.empty(); }
160 #endif // XAPIAN_INCLUDED_COLLAPSER_H