git-tag-release: Push just the new tag; fetch before retry
[xapian.git] / xapian-core / net / serialise.cc
blob3741fb1346f59cf999827de3a55937175020c05b
1 /** @file serialise.cc
2 * @brief functions to convert Xapian objects to strings and back
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2011,2014,2015 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include <xapian/document.h>
24 #include <xapian/positioniterator.h>
25 #include <xapian/termiterator.h>
26 #include <xapian/valueiterator.h>
28 #include "omassert.h"
29 #include "api/omenquireinternal.h"
30 #include "length.h"
31 #include "serialise.h"
32 #include "serialise-double.h"
33 #include "weight/weightinternal.h"
35 #include "autoptr.h"
36 #include <set>
37 #include <string>
39 using namespace std;
41 string
42 serialise_stats(const Xapian::Weight::Internal &stats)
44 string result;
46 result += encode_length(stats.total_length);
47 result += encode_length(stats.collection_size);
48 result += encode_length(stats.rset_size);
49 result += encode_length(stats.total_term_count);
50 result += static_cast<char>(stats.have_max_part);
52 result += encode_length(stats.termfreqs.size());
53 map<string, TermFreqs>::const_iterator i;
54 for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) {
55 result += encode_length(i->first.size());
56 result += i->first;
57 result += encode_length(i->second.termfreq);
58 if (stats.rset_size != 0)
59 result += encode_length(i->second.reltermfreq);
60 result += encode_length(i->second.collfreq);
61 if (stats.have_max_part)
62 result += serialise_double(i->second.max_part);
65 return result;
68 void
69 unserialise_stats(const string &s, Xapian::Weight::Internal & stat)
71 const char * p = s.data();
72 const char * p_end = p + s.size();
74 decode_length(&p, p_end, stat.total_length);
75 decode_length(&p, p_end, stat.collection_size);
76 decode_length(&p, p_end, stat.rset_size);
77 decode_length(&p, p_end, stat.total_term_count);
78 // If p == p_end, the next decode_length() will report it.
79 stat.have_max_part = (p != p_end && *p++);
81 size_t n;
82 decode_length(&p, p_end, n);
83 while (n--) {
84 size_t len;
85 decode_length_and_check(&p, p_end, len);
86 string term(p, len);
87 p += len;
88 Xapian::doccount termfreq;
89 decode_length(&p, p_end, termfreq);
90 Xapian::doccount reltermfreq;
91 if (stat.rset_size == 0) {
92 reltermfreq = 0;
93 } else {
94 decode_length(&p, p_end, reltermfreq);
96 Xapian::termcount collfreq;
97 decode_length(&p, p_end, collfreq);
98 double max_part = 0.0;
99 if (stat.have_max_part)
100 max_part = unserialise_double(&p, p_end);
101 stat.termfreqs.insert(make_pair(term,
102 TermFreqs(termfreq,
103 reltermfreq,
104 collfreq,
105 max_part)));
109 string
110 serialise_mset(const Xapian::MSet &mset)
112 string result;
114 result += encode_length(mset.get_firstitem());
115 result += encode_length(mset.get_matches_lower_bound());
116 result += encode_length(mset.get_matches_estimated());
117 result += encode_length(mset.get_matches_upper_bound());
118 result += encode_length(mset.get_uncollapsed_matches_lower_bound());
119 result += encode_length(mset.get_uncollapsed_matches_estimated());
120 result += encode_length(mset.get_uncollapsed_matches_upper_bound());
121 result += serialise_double(mset.get_max_possible());
122 result += serialise_double(mset.get_max_attained());
124 result += serialise_double(mset.internal->percent_factor);
126 result += encode_length(mset.size());
127 for (size_t i = 0; i != mset.size(); ++i) {
128 const Xapian::Internal::MSetItem & item = mset.internal->items[i];
129 result += serialise_double(item.wt);
130 result += encode_length(item.did);
131 result += encode_length(item.sort_key.size());
132 result += item.sort_key;
133 result += encode_length(item.collapse_key.size());
134 result += item.collapse_key;
135 result += encode_length(item.collapse_count);
138 if (mset.internal->stats)
139 result += serialise_stats(*(mset.internal->stats));
141 return result;
144 Xapian::MSet
145 unserialise_mset(const char * p, const char * p_end)
147 Xapian::doccount firstitem;
148 decode_length(&p, p_end, firstitem);
149 Xapian::doccount matches_lower_bound;
150 decode_length(&p, p_end, matches_lower_bound);
151 Xapian::doccount matches_estimated;
152 decode_length(&p, p_end, matches_estimated);
153 Xapian::doccount matches_upper_bound;
154 decode_length(&p, p_end, matches_upper_bound);
155 Xapian::doccount uncollapsed_lower_bound;
156 decode_length(&p, p_end, uncollapsed_lower_bound);
157 Xapian::doccount uncollapsed_estimated;
158 decode_length(&p, p_end, uncollapsed_estimated);
159 Xapian::doccount uncollapsed_upper_bound;
160 decode_length(&p, p_end, uncollapsed_upper_bound);
161 double max_possible = unserialise_double(&p, p_end);
162 double max_attained = unserialise_double(&p, p_end);
164 double percent_factor = unserialise_double(&p, p_end);
166 vector<Xapian::Internal::MSetItem> items;
167 size_t msize;
168 decode_length(&p, p_end, msize);
169 while (msize-- > 0) {
170 double wt = unserialise_double(&p, p_end);
171 Xapian::docid did;
172 decode_length(&p, p_end, did);
173 size_t len;
174 decode_length_and_check(&p, p_end, len);
175 string sort_key(p, len);
176 p += len;
177 decode_length_and_check(&p, p_end, len);
178 string key(p, len);
179 p += len;
180 Xapian::doccount collapse_cnt;
181 decode_length(&p, p_end, collapse_cnt);
182 items.push_back(Xapian::Internal::MSetItem(wt, did, key, collapse_cnt));
183 swap(items.back().sort_key, sort_key);
186 AutoPtr<Xapian::Weight::Internal> stats;
187 if (p != p_end) {
188 stats.reset(new Xapian::Weight::Internal());
189 unserialise_stats(string(p, p_end - p), *(stats.get()));
192 Xapian::MSet mset;
193 mset.internal = new Xapian::MSet::Internal(
194 firstitem,
195 matches_upper_bound,
196 matches_lower_bound,
197 matches_estimated,
198 uncollapsed_upper_bound,
199 uncollapsed_lower_bound,
200 uncollapsed_estimated,
201 max_possible, max_attained,
202 items, percent_factor);
203 mset.internal->stats = stats.release();
204 return mset;
207 string
208 serialise_rset(const Xapian::RSet &rset)
210 string result;
211 const set<Xapian::docid> & items = rset.internal->get_items();
212 set<Xapian::docid>::const_iterator i;
213 Xapian::docid lastdid = 0;
214 for (i = items.begin(); i != items.end(); ++i) {
215 Xapian::docid did = *i;
216 result += encode_length(did - lastdid - 1);
217 lastdid = did;
219 return result;
222 Xapian::RSet
223 unserialise_rset(const string &s)
225 Xapian::RSet rset;
227 const char * p = s.data();
228 const char * p_end = p + s.size();
230 Xapian::docid did = 0;
231 while (p != p_end) {
232 Xapian::docid inc;
233 decode_length(&p, p_end, inc);
234 did += inc + 1;
235 rset.add_document(did);
238 return rset;
241 string
242 serialise_document(const Xapian::Document &doc)
244 string result;
246 size_t n = doc.values_count();
247 result += encode_length(n);
248 Xapian::ValueIterator value;
249 for (value = doc.values_begin(); value != doc.values_end(); ++value) {
250 result += encode_length(value.get_valueno());
251 result += encode_length((*value).size());
252 result += *value;
253 --n;
255 Assert(n == 0);
257 n = doc.termlist_count();
258 result += encode_length(n);
259 Xapian::TermIterator term;
260 for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) {
261 result += encode_length((*term).size());
262 result += *term;
263 result += encode_length(term.get_wdf());
265 size_t x = term.positionlist_count();
266 result += encode_length(x);
267 Xapian::PositionIterator pos;
268 Xapian::termpos oldpos = 0;
269 for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) {
270 Xapian::termpos diff = *pos - oldpos;
271 string delta = encode_length(diff);
272 result += delta;
273 oldpos = *pos;
274 --x;
276 Assert(x == 0);
277 --n;
279 Assert(n == 0);
281 result += doc.get_data();
282 return result;
285 Xapian::Document
286 unserialise_document(const string &s)
288 Xapian::Document doc;
289 const char * p = s.data();
290 const char * p_end = p + s.size();
292 size_t n_values;
293 decode_length(&p, p_end, n_values);
294 while (n_values--) {
295 Xapian::valueno slot;
296 decode_length(&p, p_end, slot);
297 size_t len;
298 decode_length_and_check(&p, p_end, len);
299 doc.add_value(slot, string(p, len));
300 p += len;
303 size_t n_terms;
304 decode_length(&p, p_end, n_terms);
305 while (n_terms--) {
306 size_t len;
307 decode_length_and_check(&p, p_end, len);
308 string term(p, len);
309 p += len;
311 // Set all the wdf using add_term, then pass wdf_inc 0 to add_posting.
312 Xapian::termcount wdf;
313 decode_length(&p, p_end, wdf);
314 doc.add_term(term, wdf);
316 size_t n_pos;
317 decode_length(&p, p_end, n_pos);
318 Xapian::termpos pos = 0;
319 while (n_pos--) {
320 Xapian::termpos inc;
321 decode_length(&p, p_end, inc);
322 pos += inc;
323 doc.add_posting(term, pos, 0);
327 doc.set_data(string(p, p_end - p));
328 return doc;