Document xapian-compact --blocksize takes an argument
[xapian.git] / xapian-core / api / snipperinternal.h
blob49ce9e695bae49ce52ff0fedab02935e76abdb1a
1 /** @file snipperinternal.h
2 * @brief Internals
3 */
4 /* Copyright (C) 2012 Mihai Bivol
5 * Copyright (C) 2014 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #ifndef XAPIAN_INCLUDED_SNIPPERINTERNAL_H
24 #define XAPIAN_INCLUDED_SNIPPERINTERNAL_H
26 #include <xapian/snipper.h>
27 #include <xapian/stem.h>
29 #include <map>
30 #include <string>
31 #include <vector>
33 namespace Xapian {
35 class MSet;
37 class Snipper::Internal : public Xapian::Internal::intrusive_base {
39 private:
40 /** Checks if a term is marked as stemmed. */
41 bool is_stemmed(const std::string & term);
43 /// Copy not allowed
44 Internal(const Internal &);
45 /// Assignment not allowed
46 void operator=(const Internal &);
48 public:
49 typedef int rm_docid;
51 /** Holds information about a document in the relevance model.*/
52 struct RMDocumentInfo {
53 /** ID in the relevance model */
54 rm_docid rm_id;
55 /** Document size in terms */
56 int document_size;
57 /** Weight of the document */
58 double weight;
60 RMDocumentInfo(rm_docid rm_id_, int document_size_, double weight_) :
61 rm_id(rm_id_),
62 document_size(document_size_),
63 weight(weight_) { }
66 /** Holds information about a term in a document */
67 struct TermDocInfo {
68 /** Relevance model document id.*/
69 rm_docid docid;
70 /** Frequency of term in document */
71 Xapian::termcount freq;
73 TermDocInfo(rm_docid docid_, Xapian::termcount freq_) :
74 docid(docid_),
75 freq(freq_) { }
78 /** Holds information about a term in the relevance model */
79 struct RMTermInfo {
80 /** Documents that index the term in relevance model */
81 std::vector<TermDocInfo> indexed_docs_freq;
82 /** Occurrence in collection */
83 int coll_occurrence;
85 RMTermInfo() : coll_occurrence(0) { }
88 /** Holds information about a term and its position in a document */
89 struct TermPositionInfo {
90 std::string term;
91 Xapian::termpos position;
93 TermPositionInfo(std::string term_, Xapian::termpos position_) :
94 term(term_),
95 position(position_) { }
97 bool operator < (const TermPositionInfo & other) const
99 return position < other.position;
103 /** Stemmer used for generating text terms */
104 Stem stemmer;
106 /** Relevance Model documents. */
107 std::vector<RMDocumentInfo> rm_documents;
109 /** Relevance model data for each term */
110 std::map<std::string, RMTermInfo> rm_term_data;
112 /** Relevance model collection size */
113 Xapian::doccount rm_coll_size;
115 /** Relevance model total document weight */
116 double rm_total_weight;
118 Internal() : rm_coll_size(0),
119 rm_total_weight(0) { }
121 /** Return snippet generated from text using the precalculated relevance model */
122 std::string generate_snippet(const std::string & text,
123 size_t length,
124 Xapian::termcount window_size,
125 double smoothing);
127 /** Calculate relevance model based on a MSet.
129 * @param mset The MSet to base the model on
130 * @param rm_docno How many documents to use from @a mset
132 void calculate_rm(const MSet & mset, Xapian::doccount rm_docno);
137 #endif /* XAPIAN_INCLUDED_SNIPPERINTERNAL_H */