1 /** @file glass_inverter.h
2 * @brief Inverter class which "inverts the file".
4 /* Copyright (C) 2009,2010,2013,2014 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_GLASS_INVERTER_H
22 #define XAPIAN_INCLUDED_GLASS_INVERTER_H
24 #include "xapian/types.h"
32 #include "xapian/error.h"
34 class GlassPostListTable
;
35 class GlassPositionListTable
;
41 /** Magic wdf value used for a deleted posting. */
42 const Xapian::termcount DELETED_POSTING
= Xapian::termcount(-1);
44 /** Class which "inverts the file". */
46 friend class GlassPostListTable
;
48 /// Class for storing the changes in frequencies for a term.
49 class PostingChanges
{
50 friend class GlassPostListTable
;
52 /// Change in term frequency,
53 Xapian::termcount_diff tf_delta
;
55 /// Change in collection frequency.
56 Xapian::termcount_diff cf_delta
;
58 /// Changes to this term's postlist.
59 std::map
<Xapian::docid
, Xapian::termcount
> pl_changes
;
62 /// Constructor for an added posting.
63 PostingChanges(Xapian::docid did
, Xapian::termcount wdf
)
64 : tf_delta(1), cf_delta(Xapian::termcount_diff(wdf
))
66 pl_changes
.insert(std::make_pair(did
, wdf
));
69 /// Constructor for a removed posting.
70 PostingChanges(Xapian::docid did
, Xapian::termcount wdf
, bool)
71 : tf_delta(-1), cf_delta(-Xapian::termcount_diff(wdf
))
73 pl_changes
.insert(std::make_pair(did
, DELETED_POSTING
));
76 /// Constructor for an updated posting.
77 PostingChanges(Xapian::docid did
, Xapian::termcount old_wdf
,
78 Xapian::termcount new_wdf
)
79 : tf_delta(0), cf_delta(Xapian::termcount_diff(new_wdf
- old_wdf
))
81 pl_changes
.insert(std::make_pair(did
, new_wdf
));
85 void add_posting(Xapian::docid did
, Xapian::termcount wdf
) {
88 // Add did to term's postlist
89 pl_changes
[did
] = wdf
;
93 void remove_posting(Xapian::docid did
, Xapian::termcount wdf
) {
96 // Remove did from term's postlist.
97 pl_changes
[did
] = DELETED_POSTING
;
100 /// Update a posting.
101 void update_posting(Xapian::docid did
, Xapian::termcount old_wdf
,
102 Xapian::termcount new_wdf
) {
103 cf_delta
+= new_wdf
- old_wdf
;
104 pl_changes
[did
] = new_wdf
;
107 /// Get the term frequency delta.
108 Xapian::termcount_diff
get_tfdelta() const { return tf_delta
; }
110 /// Get the collection frequency delta.
111 Xapian::termcount_diff
get_cfdelta() const { return cf_delta
; }
114 /// Buffered changes to postlists.
115 std::map
<std::string
, PostingChanges
> postlist_changes
;
117 /// Buffered changes to positional data.
118 std::map
<std::string
, std::map
<Xapian::docid
, std::string
> > pos_changes
;
120 void store_positions(const GlassPositionListTable
& position_table
,
122 const std::string
& tname
,
123 const std::vector
<Xapian::termpos
> & posvec
,
126 void set_positionlist(Xapian::docid did
,
127 const std::string
& term
,
128 const std::string
& s
);
131 /// Buffered changes to document lengths.
132 std::map
<Xapian::docid
, Xapian::termcount
> doclen_changes
;
135 void add_posting(Xapian::docid did
, const std::string
& term
,
136 Xapian::doccount wdf
) {
137 std::map
<std::string
, PostingChanges
>::iterator i
;
138 i
= postlist_changes
.find(term
);
139 if (i
== postlist_changes
.end()) {
140 postlist_changes
.insert(
141 std::make_pair(term
, PostingChanges(did
, wdf
)));
143 i
->second
.add_posting(did
, wdf
);
147 void remove_posting(Xapian::docid did
, const std::string
& term
,
148 Xapian::doccount wdf
) {
149 std::map
<std::string
, PostingChanges
>::iterator i
;
150 i
= postlist_changes
.find(term
);
151 if (i
== postlist_changes
.end()) {
152 postlist_changes
.insert(
153 std::make_pair(term
, PostingChanges(did
, wdf
, false)));
155 i
->second
.remove_posting(did
, wdf
);
159 void update_posting(Xapian::docid did
, const std::string
& term
,
160 Xapian::termcount old_wdf
,
161 Xapian::termcount new_wdf
) {
162 std::map
<std::string
, PostingChanges
>::iterator i
;
163 i
= postlist_changes
.find(term
);
164 if (i
== postlist_changes
.end()) {
165 postlist_changes
.insert(
166 std::make_pair(term
, PostingChanges(did
, old_wdf
, new_wdf
)));
168 i
->second
.update_posting(did
, old_wdf
, new_wdf
);
172 void set_positionlist(const GlassPositionListTable
& position_table
,
174 const std::string
& tname
,
175 const Xapian::TermIterator
& term
,
176 bool modifying
= false);
178 void delete_positionlist(Xapian::docid did
,
179 const std::string
& term
);
181 bool get_positionlist(Xapian::docid did
,
182 const std::string
& term
,
183 std::string
& s
) const;
185 bool has_positions(const GlassPositionListTable
& position_table
) const;
188 doclen_changes
.clear();
189 postlist_changes
.clear();
193 void set_doclength(Xapian::docid did
, Xapian::termcount doclen
, bool add
) {
195 Assert(doclen_changes
.find(did
) == doclen_changes
.end() || doclen_changes
[did
] == DELETED_POSTING
);
197 doclen_changes
[did
] = doclen
;
200 void delete_doclength(Xapian::docid did
) {
201 Assert(doclen_changes
.find(did
) == doclen_changes
.end() || doclen_changes
[did
] != DELETED_POSTING
);
202 doclen_changes
[did
] = DELETED_POSTING
;
205 bool get_doclength(Xapian::docid did
, Xapian::termcount
& doclen
) const {
206 std::map
<Xapian::docid
, Xapian::termcount
>::const_iterator i
;
207 i
= doclen_changes
.find(did
);
208 if (i
== doclen_changes
.end())
210 if (rare(i
->second
== DELETED_POSTING
))
211 throw Xapian::DocNotFoundError("Document not found: " + str(did
));
216 /// Flush document length changes.
217 void flush_doclengths(GlassPostListTable
& table
);
219 /// Flush postlist changes for @a term.
220 void flush_post_list(GlassPostListTable
& table
, const std::string
& term
);
222 /// Flush postlist changes for all terms.
223 void flush_all_post_lists(GlassPostListTable
& table
);
225 /// Flush postlist changes for all terms which start with @a pfx.
226 void flush_post_lists(GlassPostListTable
& table
, const std::string
& pfx
);
228 /// Flush all postlist table changes.
229 void flush(GlassPostListTable
& table
);
231 /// Flush position changes.
232 void flush_pos_lists(GlassPositionListTable
& table
);
234 bool get_deltas(const std::string
& term
,
235 Xapian::termcount_diff
& tf_delta
,
236 Xapian::termcount_diff
& cf_delta
) const {
237 std::map
<std::string
, PostingChanges
>::const_iterator i
;
238 i
= postlist_changes
.find(term
);
239 if (i
== postlist_changes
.end()) {
242 tf_delta
= i
->second
.get_tfdelta();
243 cf_delta
= i
->second
.get_cfdelta();
248 #endif // XAPIAN_INCLUDED_GLASS_INVERTER_H