2 * @brief functions to convert Xapian objects to strings and back
4 /* Copyright (C) 2006,2007,2008,2009,2010,2011,2014,2015,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <xapian/document.h>
24 #include <xapian/positioniterator.h>
25 #include <xapian/termiterator.h>
26 #include <xapian/valueiterator.h>
29 #include "api/rsetinternal.h"
31 #include "serialise.h"
32 #include "serialise-double.h"
33 #include "weight/weightinternal.h"
42 serialise_stats(const Xapian::Weight::Internal
&stats
)
46 result
+= encode_length(stats
.total_length
);
47 result
+= encode_length(stats
.collection_size
);
48 result
+= encode_length(stats
.rset_size
);
49 result
+= encode_length(stats
.total_term_count
);
50 result
+= static_cast<char>(stats
.have_max_part
);
52 result
+= encode_length(stats
.termfreqs
.size());
53 map
<string
, TermFreqs
>::const_iterator i
;
54 for (i
= stats
.termfreqs
.begin(); i
!= stats
.termfreqs
.end(); ++i
) {
55 result
+= encode_length(i
->first
.size());
57 result
+= encode_length(i
->second
.termfreq
);
58 if (stats
.rset_size
!= 0)
59 result
+= encode_length(i
->second
.reltermfreq
);
60 result
+= encode_length(i
->second
.collfreq
);
61 if (stats
.have_max_part
)
62 result
+= serialise_double(i
->second
.max_part
);
69 unserialise_stats(const string
&s
, Xapian::Weight::Internal
& stat
)
71 const char * p
= s
.data();
72 const char * p_end
= p
+ s
.size();
74 decode_length(&p
, p_end
, stat
.total_length
);
75 decode_length(&p
, p_end
, stat
.collection_size
);
76 decode_length(&p
, p_end
, stat
.rset_size
);
77 decode_length(&p
, p_end
, stat
.total_term_count
);
78 // If p == p_end, the next decode_length() will report it.
79 stat
.have_max_part
= (p
!= p_end
&& *p
++);
82 decode_length(&p
, p_end
, n
);
85 decode_length_and_check(&p
, p_end
, len
);
88 Xapian::doccount termfreq
;
89 decode_length(&p
, p_end
, termfreq
);
90 Xapian::doccount reltermfreq
;
91 if (stat
.rset_size
== 0) {
94 decode_length(&p
, p_end
, reltermfreq
);
96 Xapian::termcount collfreq
;
97 decode_length(&p
, p_end
, collfreq
);
98 double max_part
= 0.0;
99 if (stat
.have_max_part
)
100 max_part
= unserialise_double(&p
, p_end
);
101 stat
.termfreqs
.insert(make_pair(term
,
110 serialise_rset(const Xapian::RSet
&rset
)
113 if (rset
.internal
.get()) {
114 Xapian::docid lastdid
= 0;
115 for (Xapian::docid did
: rset
.internal
->docs
) {
116 result
+= encode_length(did
- lastdid
- 1);
124 unserialise_rset(const string
&s
)
128 const char * p
= s
.data();
129 const char * p_end
= p
+ s
.size();
131 Xapian::docid did
= 0;
134 decode_length(&p
, p_end
, inc
);
136 rset
.add_document(did
);
143 serialise_document(const Xapian::Document
&doc
)
147 size_t n
= doc
.values_count();
148 result
+= encode_length(n
);
149 Xapian::ValueIterator value
;
150 for (value
= doc
.values_begin(); value
!= doc
.values_end(); ++value
) {
151 result
+= encode_length(value
.get_valueno());
152 result
+= encode_length((*value
).size());
158 n
= doc
.termlist_count();
159 result
+= encode_length(n
);
160 Xapian::TermIterator term
;
161 for (term
= doc
.termlist_begin(); term
!= doc
.termlist_end(); ++term
) {
162 result
+= encode_length((*term
).size());
164 result
+= encode_length(term
.get_wdf());
166 size_t x
= term
.positionlist_count();
167 result
+= encode_length(x
);
168 Xapian::PositionIterator pos
;
169 Xapian::termpos oldpos
= 0;
170 for (pos
= term
.positionlist_begin(); pos
!= term
.positionlist_end(); ++pos
) {
171 Xapian::termpos diff
= *pos
- oldpos
;
172 string delta
= encode_length(diff
);
182 result
+= doc
.get_data();
187 unserialise_document(const string
&s
)
189 Xapian::Document doc
;
190 const char * p
= s
.data();
191 const char * p_end
= p
+ s
.size();
194 decode_length(&p
, p_end
, n_values
);
196 Xapian::valueno slot
;
197 decode_length(&p
, p_end
, slot
);
199 decode_length_and_check(&p
, p_end
, len
);
200 doc
.add_value(slot
, string(p
, len
));
205 decode_length(&p
, p_end
, n_terms
);
208 decode_length_and_check(&p
, p_end
, len
);
212 // Set all the wdf using add_term, then pass wdf_inc 0 to add_posting.
213 Xapian::termcount wdf
;
214 decode_length(&p
, p_end
, wdf
);
215 doc
.add_term(term
, wdf
);
218 decode_length(&p
, p_end
, n_pos
);
219 Xapian::termpos pos
= 0;
222 decode_length(&p
, p_end
, inc
);
224 doc
.add_posting(term
, pos
, 0);
228 doc
.set_data(string(p
, p_end
- p
));