2 * @brief Class representing a list of search results
4 /* Copyright (C) 2015,2016,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 #ifndef XAPIAN_INCLUDED_MSET_H
23 #define XAPIAN_INCLUDED_MSET_H
25 #if !defined XAPIAN_IN_XAPIAN_H && !defined XAPIAN_LIB_BUILD
26 # error "Never use <xapian/mset.h> directly; include <xapian.h> instead."
32 #include <xapian/attributes.h>
33 #include <xapian/document.h>
34 #include <xapian/error.h>
35 #include <xapian/intrusive_ptr.h>
36 #include <xapian/stem.h>
37 #include <xapian/types.h>
38 #include <xapian/visibility.h>
44 /// Class representing a list of search results.
45 class XAPIAN_VISIBILITY_DEFAULT MSet
{
46 friend class MSetIterator
;
48 // Helper function for fetch() methods.
49 void fetch_(Xapian::doccount first
, Xapian::doccount last
) const;
51 /** Update the weight corresponding to the document indexed at
54 * The MSet's max_possible and max_attained are also updated.
56 * This method must be called to update the weight of every document in
57 * the MSet for i = 0 to mset.size() - 1 in ascending order to avoid
58 * miscalculation of max_attained and max_possible.
60 * @param i MSet index to update
61 * @param wt new weight to assign to the document at index @a i
63 void set_item_weight(Xapian::doccount i
, double wt
);
66 /// Class representing the MSet internals.
68 /// @private @internal Reference counted internals.
69 Xapian::Internal::intrusive_ptr_nonnull
<Internal
> internal
;
71 /** Copying is allowed.
73 * The internals are reference counted, so copying is cheap.
77 /** Copying is allowed.
79 * The internals are reference counted, so assignment is cheap.
81 MSet
& operator=(const MSet
& o
);
83 /** Default constructor.
85 * Creates an empty MSet, mostly useful as a placeholder.
89 /** @private @internal Wrap an existing Internal. */
90 XAPIAN_VISIBILITY_INTERNAL
91 explicit MSet(Internal
* internal_
);
96 /** Assigns new weights and updates MSet.
98 * Dereferencing the Iterator should return a double.
100 * The weights returned by the iterator are assigned to elements of
101 * the MSet in rank order.
103 * @param begin Begin iterator.
104 * @param end End iterator.
106 * @exception Xapian::InvalidArgument is thrown if the total number of
107 * elements in the input doesn't match the total number of
110 template <typename Iterator
>
111 void replace_weights(Iterator first
, Iterator last
)
113 if (last
- first
!= size()) {
114 throw Xapian::InvalidArgumentError("Number of weights assigned "
115 "doesn't match the number of "
118 Xapian::doccount i
= 0;
119 while (first
!= last
) {
120 set_item_weight(i
, *first
);
126 /** Sorts the list of documents in MSet according to their weights.
128 * Use after calling MSet::replace_weights.
130 void sort_by_relevance();
132 /** Convert a weight to a percentage.
134 * The matching document with the highest weight will get 100% if it
135 * matches all the weighted query terms, and proportionally less if it
136 * only matches some, and other weights are scaled by the same factor.
138 * Documents with a non-zero score will always score at least 1%.
140 * Note that these generally aren't percentages of anything meaningful
141 * (unless you use a custom weighting formula where they are!)
143 int convert_to_percent(double weight
) const;
145 /** Convert the weight of the current iterator position to a percentage.
147 * The matching document with the highest weight will get 100% if it
148 * matches all the weighted query terms, and proportionally less if it
149 * only matches some, and other weights are scaled by the same factor.
151 * Documents with a non-zero score will always score at least 1%.
153 * Note that these generally aren't percentages of anything meaningful
154 * (unless you use a custom weighting formula where they are!)
156 int convert_to_percent(const MSetIterator
& it
) const;
158 /** Get the termfreq of a term.
160 * @return The number of documents @a term occurs in.
162 * Since 1.5.0, this method returns 0 if called on an MSet which is
163 * not associated with a database (which is consistent with
164 * Database::get_termfreq() returning 0 when called on a Database
165 * with no sub-databases); in earlier versions,
166 * Xapian::InvalidOperationError was thrown in this case.
168 Xapian::doccount
get_termfreq(const std::string
& term
) const;
170 /** Get the term weight of a term.
172 * @return The maximum weight that @a term could have contributed to a
175 * Since 1.5.0, this method returns 0.0 if called on an MSet which is
176 * not associated with a database, or with a term which wasn't present
177 * in the query (since in both cases the term contributes no weight to any
178 * matching documents); in earlier versions, Xapian::InvalidOperationError
179 * was thrown for the first case, and Xapian::InvalidArgumentError for the
182 double get_termweight(const std::string
& term
) const;
184 /** Rank of first item in this MSet.
186 * This is the parameter `first` passed to Xapian::Enquire::get_mset().
188 Xapian::doccount
get_firstitem() const;
190 /** Lower bound on the total number of matching documents. */
191 Xapian::doccount
get_matches_lower_bound() const;
192 /** Estimate of the total number of matching documents. */
193 Xapian::doccount
get_matches_estimated() const;
194 /** Upper bound on the total number of matching documents. */
195 Xapian::doccount
get_matches_upper_bound() const;
197 /** Lower bound on the total number of matching documents before collapsing.
199 * Conceptually the same as get_matches_lower_bound() for the same query
200 * without any collapse part (though the actual value may differ).
202 Xapian::doccount
get_uncollapsed_matches_lower_bound() const;
203 /** Estimate of the total number of matching documents before collapsing.
205 * Conceptually the same as get_matches_estimated() for the same query
206 * without any collapse part (though the actual value may differ).
208 Xapian::doccount
get_uncollapsed_matches_estimated() const;
209 /** Upper bound on the total number of matching documents before collapsing.
211 * Conceptually the same as get_matches_upper_bound() for the same query
212 * without any collapse part (though the actual value may differ).
214 Xapian::doccount
get_uncollapsed_matches_upper_bound() const;
216 /** The maximum weight attained by any document. */
217 double get_max_attained() const;
218 /** The maximum possible weight any document could achieve. */
219 double get_max_possible() const;
222 /** Model the relevancy of non-query terms in MSet::snippet().
224 * Non-query terms will be assigned a small weight, and the snippet
225 * will tend to prefer snippets which contain a more interesting
226 * background (where the query term content is equivalent).
228 SNIPPET_BACKGROUND_MODEL
= 1,
229 /** Exhaustively evaluate candidate snippets in MSet::snippet().
231 * Without this flag, snippet generation will stop once it thinks
232 * it has found a "good enough" snippet, which will generally reduce
233 * the time taken to generate a snippet.
235 SNIPPET_EXHAUSTIVE
= 2,
236 /** Return the empty string if no term got matched.
238 * If enabled, snippet() returns an empty string if not a single match
239 * was found in text. If not enabled, snippet() returns a (sub)string
240 * of text without any highlighted terms.
242 SNIPPET_EMPTY_WITHOUT_MATCH
= 4
245 /** Generate a snippet.
247 * This method selects a continuous run of words of less than about @a
248 * length bytes from @a text, based mainly on where the query matches
249 * (currently terms, exact phrases and wildcards are taken into account),
250 * but also on the non-query terms in the text.
252 * The returned text can be escaped (by default, it is escaped to make it
253 * suitable for use in HTML), and matches with the query will be
254 * highlighted using @a hi_start and @a hi_end.
256 * If the snippet seems to start or end mid-sentence, then @a omit is
257 * prepended or append (respectively) to indicate this.
259 * The stemmer used to build the query should be specified in @a stemmer.
261 * And @a flags contains flags controlling behaviour.
265 std::string
snippet(const std::string
& text
,
267 const Xapian::Stem
& stemmer
= Xapian::Stem(),
268 unsigned flags
= SNIPPET_BACKGROUND_MODEL
|SNIPPET_EXHAUSTIVE
,
269 const std::string
& hi_start
= "<b>",
270 const std::string
& hi_end
= "</b>",
271 const std::string
& omit
= "...") const;
273 /** Prefetch hint a range of items.
275 * For a remote database, this may start a pipelined fetch of the
276 * requested documents from the remote server.
278 * For a disk-based database, this may send prefetch hints to the
279 * operating system such that the disk blocks the requested documents
280 * are stored in are more likely to be in the cache when we come to
281 * actually read them.
283 void fetch(const MSetIterator
&begin
, const MSetIterator
&end
) const;
285 /** Prefetch hint a single MSet item.
287 * For a remote database, this may start a pipelined fetch of the
288 * requested documents from the remote server.
290 * For a disk-based database, this may send prefetch hints to the
291 * operating system such that the disk blocks the requested documents
292 * are stored in are more likely to be in the cache when we come to
293 * actually read them.
295 void fetch(const MSetIterator
&item
) const;
297 /** Prefetch hint the whole MSet.
299 * For a remote database, this may start a pipelined fetch of the
300 * requested documents from the remote server.
302 * For a disk-based database, this may send prefetch hints to the
303 * operating system such that the disk blocks the requested documents
304 * are stored in are more likely to be in the cache when we come to
305 * actually read them.
307 void fetch() const { fetch_(0, Xapian::doccount(-1)); }
309 /** Return number of items in this MSet object. */
310 Xapian::doccount
size() const;
312 /** Return true if this MSet object is empty. */
313 bool empty() const { return size() == 0; }
315 /** Efficiently swap this MSet object with another. */
316 void swap(MSet
& o
) { internal
.swap(o
.internal
); }
318 /** Return iterator pointing to the first item in this MSet. */
319 MSetIterator
begin() const;
321 /** Return iterator pointing to just after the last item in this MSet. */
322 MSetIterator
end() const;
324 /** Return iterator pointing to the i-th object in this MSet. */
325 MSetIterator
operator[](Xapian::doccount i
) const;
327 /** Return iterator pointing to the last object in this MSet. */
328 MSetIterator
back() const;
330 /// Return a string describing this object.
331 std::string
get_description() const;
333 /** @private @internal MSet is what the C++ STL calls a container.
335 * The following typedefs allow the class to be used in templates in the
336 * same way the standard containers can be.
338 * These are deliberately hidden from the Doxygen-generated docs, as the
339 * machinery here isn't interesting to API users. They just need to know
340 * that Xapian container classes are compatible with the STL.
342 * See "The C++ Programming Language", 3rd ed. section 16.3.1:
346 typedef Xapian::MSetIterator value_type
;
348 typedef Xapian::doccount size_type
;
350 typedef Xapian::doccount_diff difference_type
;
352 typedef Xapian::MSetIterator iterator
;
354 typedef Xapian::MSetIterator const_iterator
;
356 typedef value_type
* pointer
;
358 typedef const value_type
* const_pointer
;
360 typedef value_type
& reference
;
362 typedef const value_type
& const_reference
;
365 /** @private @internal MSet is what the C++ STL calls a container.
367 * The following methods allow the class to be used in templates in the
368 * same way the standard containers can be.
370 * These are deliberately hidden from the Doxygen-generated docs, as the
371 * machinery here isn't interesting to API users. They just need to know
372 * that Xapian container classes are compatible with the STL.
375 // The size is fixed once created.
376 Xapian::doccount
max_size() const { return size(); }
380 /// Iterator over a Xapian::MSet.
381 class XAPIAN_VISIBILITY_DEFAULT MSetIterator
{
384 MSetIterator(const Xapian::MSet
& mset_
, Xapian::doccount off_from_end_
)
385 : mset(mset_
), off_from_end(off_from_end_
) { }
388 /** @private @internal The MSet we are iterating over. */
391 /** @private @internal The current position of the iterator.
393 * We store the offset from the end of @a mset, since that means
394 * MSet::end() just needs to set this member to 0.
396 Xapian::MSet::size_type off_from_end
;
398 /** Create an unpositioned MSetIterator. */
399 MSetIterator() : off_from_end(0) { }
401 /** Get the numeric document id for the current position. */
402 Xapian::docid
operator*() const;
404 /// Advance the iterator to the next position.
405 MSetIterator
& operator++() {
410 /// Advance the iterator to the next position (postfix version).
411 MSetIterator
operator++(int) {
412 MSetIterator retval
= *this;
417 /// Move the iterator to the previous position.
418 MSetIterator
& operator--() {
423 /// Move the iterator to the previous position (postfix version).
424 MSetIterator
operator--(int) {
425 MSetIterator retval
= *this;
430 /** @private @internal MSetIterator is what the C++ STL calls an
431 * random_access_iterator.
433 * The following typedefs allow std::iterator_traits<> to work so that
434 * this iterator can be used with the STL.
436 * These are deliberately hidden from the Doxygen-generated docs, as the
437 * machinery here isn't interesting to API users. They just need to know
438 * that Xapian iterator classes are compatible with the STL.
442 typedef std::random_access_iterator_tag iterator_category
;
444 typedef std::string value_type
;
446 typedef Xapian::termcount_diff difference_type
;
448 typedef std::string
* pointer
;
450 typedef std::string
& reference
;
453 /// Move the iterator forwards by n positions.
454 MSetIterator
& operator+=(difference_type n
) {
459 /// Move the iterator back by n positions.
460 MSetIterator
& operator-=(difference_type n
) {
465 /** Return the iterator incremented by @a n positions.
467 * If @a n is negative, decrements by (-n) positions.
469 MSetIterator
operator+(difference_type n
) const {
470 return MSetIterator(mset
, off_from_end
- n
);
473 /** Return the iterator decremented by @a n positions.
475 * If @a n is negative, increments by (-n) positions.
477 MSetIterator
operator-(difference_type n
) const {
478 return MSetIterator(mset
, off_from_end
+ n
);
481 /** Return the number of positions between @a o and this iterator. */
482 difference_type
operator-(const MSetIterator
& o
) const {
483 return difference_type(o
.off_from_end
) - difference_type(off_from_end
);
486 /** Return the MSet rank for the current position.
488 * The rank of mset[0] is mset.get_firstitem().
490 Xapian::doccount
get_rank() const {
491 return mset
.get_firstitem() + (mset
.size() - off_from_end
);
494 /** Get the Document object for the current position. */
495 Xapian::Document
get_document() const;
497 /** Get the weight for the current position. */
498 double get_weight() const;
500 /** Return the collapse key for the current position.
502 * If collapsing isn't in use, an empty string will be returned.
504 std::string
get_collapse_key() const;
506 /** Return a count of the number of collapses done onto the current key.
508 * This starts at 0, and is incremented each time an item is eliminated
509 * because its key is the same as that of the current item (as returned
510 * by get_collapse_key()).
512 * Note that this is NOT necessarily one less than the total number of
513 * matching documents with this collapse key due to various optimisations
514 * implemented in the matcher - for example, it can skip documents
515 * completely if it can prove their weight wouldn't be enough to make the
518 * You can say is that if get_collapse_count() > 0 then there are
519 * >= get_collapse_count() other documents with the current collapse
520 * key. But if get_collapse_count() == 0 then there may or may not be
521 * other such documents.
523 Xapian::doccount
get_collapse_count() const;
525 /** Convert the weight of the current iterator position to a percentage.
527 * The matching document with the highest weight will get 100% if it
528 * matches all the weighted query terms, and proportionally less if it
529 * only matches some, and other weights are scaled by the same factor.
531 * Documents with a non-zero score will always score at least 1%.
533 * Note that these generally aren't percentages of anything meaningful
534 * (unless you use a custom weighting formula where they are!)
536 int get_percent() const {
537 return mset
.convert_to_percent(get_weight());
540 /// Return a string describing this object.
541 std::string
get_description() const;
545 XAPIAN_NOTHROW(operator==(const MSetIterator
&a
, const MSetIterator
&b
));
547 /// Equality test for MSetIterator objects.
549 operator==(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
551 return a
.off_from_end
== b
.off_from_end
;
555 XAPIAN_NOTHROW(operator!=(const MSetIterator
&a
, const MSetIterator
&b
));
557 /// Inequality test for MSetIterator objects.
559 operator!=(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
565 XAPIAN_NOTHROW(operator<(const MSetIterator
&a
, const MSetIterator
&b
));
567 /// Inequality test for MSetIterator objects.
569 operator<(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
571 return a
.off_from_end
> b
.off_from_end
;
575 XAPIAN_NOTHROW(operator>(const MSetIterator
&a
, const MSetIterator
&b
));
577 /// Inequality test for MSetIterator objects.
579 operator>(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
585 XAPIAN_NOTHROW(operator>=(const MSetIterator
&a
, const MSetIterator
&b
));
587 /// Inequality test for MSetIterator objects.
589 operator>=(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
595 XAPIAN_NOTHROW(operator<=(const MSetIterator
&a
, const MSetIterator
&b
));
597 /// Inequality test for MSetIterator objects.
599 operator<=(const MSetIterator
&a
, const MSetIterator
&b
) XAPIAN_NOEXCEPT
604 /** Return MSetIterator @a it incremented by @a n positions.
606 * If @a n is negative, decrements by (-n) positions.
609 operator+(MSetIterator::difference_type n
, const MSetIterator
& it
)
614 // Inlined methods of MSet which need MSetIterator to have been defined:
617 MSet::fetch(const MSetIterator
&begin_it
, const MSetIterator
&end_it
) const
619 fetch_(begin_it
.off_from_end
, end_it
.off_from_end
);
623 MSet::fetch(const MSetIterator
&item
) const
625 fetch_(item
.off_from_end
, item
.off_from_end
);
629 MSet::begin() const {
630 return MSetIterator(*this, size());
635 // Decrementing the result of end() needs to work, so we must pass in
637 return MSetIterator(*this, 0);
641 MSet::operator[](Xapian::doccount i
) const {
642 return MSetIterator(*this, size() - i
);
647 return MSetIterator(*this, 1);
651 MSet::convert_to_percent(const MSetIterator
& it
) const {
652 return convert_to_percent(it
.get_weight());
657 #endif // XAPIAN_INCLUDED_MSET_H