1 /** @file snipperinternal.h
4 /* Copyright (C) 2012 Mihai Bivol
5 * Copyright (C) 2014 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 #ifndef XAPIAN_INCLUDED_SNIPPERINTERNAL_H
24 #define XAPIAN_INCLUDED_SNIPPERINTERNAL_H
26 #include <xapian/snipper.h>
27 #include <xapian/stem.h>
37 class Snipper::Internal
: public Xapian::Internal::intrusive_base
{
40 /** Checks if a term is marked as stemmed. */
41 bool is_stemmed(const std::string
& term
);
44 Internal(const Internal
&);
45 /// Assignment not allowed
46 void operator=(const Internal
&);
51 /** Holds information about a document in the relevance model.*/
52 struct RMDocumentInfo
{
53 /** ID in the relevance model */
55 /** Document size in terms */
57 /** Weight of the document */
60 RMDocumentInfo(rm_docid rm_id_
, int document_size_
, double weight_
) :
62 document_size(document_size_
),
66 /** Holds information about a term in a document */
68 /** Relevance model document id.*/
70 /** Frequency of term in document */
71 Xapian::termcount freq
;
73 TermDocInfo(rm_docid docid_
, Xapian::termcount freq_
) :
78 /** Holds information about a term in the relevance model */
80 /** Documents that index the term in relevance model */
81 std::vector
<TermDocInfo
> indexed_docs_freq
;
82 /** Occurrence in collection */
85 RMTermInfo() : coll_occurrence(0) { }
88 /** Holds information about a term and its position in a document */
89 struct TermPositionInfo
{
91 Xapian::termpos position
;
93 TermPositionInfo(std::string term_
, Xapian::termpos position_
) :
95 position(position_
) { }
97 bool operator < (const TermPositionInfo
& other
) const
99 return position
< other
.position
;
103 /** Stemmer used for generating text terms */
106 /** Relevance Model documents. */
107 std::vector
<RMDocumentInfo
> rm_documents
;
109 /** Relevance model data for each term */
110 std::map
<std::string
, RMTermInfo
> rm_term_data
;
112 /** Relevance model collection size */
113 Xapian::doccount rm_coll_size
;
115 /** Relevance model total document weight */
116 double rm_total_weight
;
118 Internal() : rm_coll_size(0),
119 rm_total_weight(0) { }
121 /** Return snippet generated from text using the precalculated relevance model */
122 std::string
generate_snippet(const std::string
& text
,
124 Xapian::termcount window_size
,
127 /** Calculate relevance model based on a MSet.
129 * @param mset The MSet to base the model on
130 * @param rm_docno How many documents to use from @a mset
132 void calculate_rm(const MSet
& mset
, Xapian::doccount rm_docno
);
137 #endif /* XAPIAN_INCLUDED_SNIPPERINTERNAL_H */