[honey] Fix portability to systems without pread()
[xapian.git] / xapian-core / net / serialise.cc
blobd66fcdff77a36a37c3e60b7f0d8ba297ba5df258
1 /** @file serialise.cc
2 * @brief functions to convert Xapian objects to strings and back
3 */
4 /* Copyright (C) 2006,2007,2008,2009,2010,2011,2014,2015,2017 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include <xapian/document.h>
24 #include <xapian/positioniterator.h>
25 #include <xapian/termiterator.h>
26 #include <xapian/valueiterator.h>
28 #include "omassert.h"
29 #include "api/rsetinternal.h"
30 #include "length.h"
31 #include "serialise.h"
32 #include "serialise-double.h"
33 #include "weight/weightinternal.h"
35 #include <memory>
36 #include <set>
37 #include <string>
39 using namespace std;
41 string
42 serialise_stats(const Xapian::Weight::Internal &stats)
44 string result;
46 result += encode_length(stats.total_length);
47 result += encode_length(stats.collection_size);
48 result += encode_length(stats.rset_size);
49 result += encode_length(stats.total_term_count);
50 result += static_cast<char>(stats.have_max_part);
52 result += encode_length(stats.termfreqs.size());
53 map<string, TermFreqs>::const_iterator i;
54 for (i = stats.termfreqs.begin(); i != stats.termfreqs.end(); ++i) {
55 result += encode_length(i->first.size());
56 result += i->first;
57 result += encode_length(i->second.termfreq);
58 if (stats.rset_size != 0)
59 result += encode_length(i->second.reltermfreq);
60 result += encode_length(i->second.collfreq);
61 if (stats.have_max_part)
62 result += serialise_double(i->second.max_part);
65 return result;
68 void
69 unserialise_stats(const string &s, Xapian::Weight::Internal & stat)
71 const char * p = s.data();
72 const char * p_end = p + s.size();
74 decode_length(&p, p_end, stat.total_length);
75 decode_length(&p, p_end, stat.collection_size);
76 decode_length(&p, p_end, stat.rset_size);
77 decode_length(&p, p_end, stat.total_term_count);
78 // If p == p_end, the next decode_length() will report it.
79 stat.have_max_part = (p != p_end && *p++);
81 size_t n;
82 decode_length(&p, p_end, n);
83 while (n--) {
84 size_t len;
85 decode_length_and_check(&p, p_end, len);
86 string term(p, len);
87 p += len;
88 Xapian::doccount termfreq;
89 decode_length(&p, p_end, termfreq);
90 Xapian::doccount reltermfreq;
91 if (stat.rset_size == 0) {
92 reltermfreq = 0;
93 } else {
94 decode_length(&p, p_end, reltermfreq);
96 Xapian::termcount collfreq;
97 decode_length(&p, p_end, collfreq);
98 double max_part = 0.0;
99 if (stat.have_max_part)
100 max_part = unserialise_double(&p, p_end);
101 stat.termfreqs.insert(make_pair(term,
102 TermFreqs(termfreq,
103 reltermfreq,
104 collfreq,
105 max_part)));
109 string
110 serialise_rset(const Xapian::RSet &rset)
112 string result;
113 if (rset.internal.get()) {
114 Xapian::docid lastdid = 0;
115 for (Xapian::docid did : rset.internal->docs) {
116 result += encode_length(did - lastdid - 1);
117 lastdid = did;
120 return result;
123 Xapian::RSet
124 unserialise_rset(const string &s)
126 Xapian::RSet rset;
128 const char * p = s.data();
129 const char * p_end = p + s.size();
131 Xapian::docid did = 0;
132 while (p != p_end) {
133 Xapian::docid inc;
134 decode_length(&p, p_end, inc);
135 did += inc + 1;
136 rset.add_document(did);
139 return rset;
142 string
143 serialise_document(const Xapian::Document &doc)
145 string result;
147 size_t n = doc.values_count();
148 result += encode_length(n);
149 Xapian::ValueIterator value;
150 for (value = doc.values_begin(); value != doc.values_end(); ++value) {
151 result += encode_length(value.get_valueno());
152 result += encode_length((*value).size());
153 result += *value;
154 --n;
156 Assert(n == 0);
158 n = doc.termlist_count();
159 result += encode_length(n);
160 Xapian::TermIterator term;
161 for (term = doc.termlist_begin(); term != doc.termlist_end(); ++term) {
162 result += encode_length((*term).size());
163 result += *term;
164 result += encode_length(term.get_wdf());
166 size_t x = term.positionlist_count();
167 result += encode_length(x);
168 Xapian::PositionIterator pos;
169 Xapian::termpos oldpos = 0;
170 for (pos = term.positionlist_begin(); pos != term.positionlist_end(); ++pos) {
171 Xapian::termpos diff = *pos - oldpos;
172 string delta = encode_length(diff);
173 result += delta;
174 oldpos = *pos;
175 --x;
177 Assert(x == 0);
178 --n;
180 AssertEq(n, 0);
182 result += doc.get_data();
183 return result;
186 Xapian::Document
187 unserialise_document(const string &s)
189 Xapian::Document doc;
190 const char * p = s.data();
191 const char * p_end = p + s.size();
193 size_t n_values;
194 decode_length(&p, p_end, n_values);
195 while (n_values--) {
196 Xapian::valueno slot;
197 decode_length(&p, p_end, slot);
198 size_t len;
199 decode_length_and_check(&p, p_end, len);
200 doc.add_value(slot, string(p, len));
201 p += len;
204 size_t n_terms;
205 decode_length(&p, p_end, n_terms);
206 while (n_terms--) {
207 size_t len;
208 decode_length_and_check(&p, p_end, len);
209 string term(p, len);
210 p += len;
212 // Set all the wdf using add_term, then pass wdf_inc 0 to add_posting.
213 Xapian::termcount wdf;
214 decode_length(&p, p_end, wdf);
215 doc.add_term(term, wdf);
217 size_t n_pos;
218 decode_length(&p, p_end, n_pos);
219 Xapian::termpos pos = 0;
220 while (n_pos--) {
221 Xapian::termpos inc;
222 decode_length(&p, p_end, inc);
223 pos += inc;
224 doc.add_posting(term, pos, 0);
228 doc.set_data(string(p, p_end - p));
229 return doc;