Website now in git not CVS
[xapian.git] / xapian-core / api / postingsource.cc
blobfde6c54a887863a2910426985fdbf455d19eda5f
1 /** @file postingsource.cc
2 * @brief External sources of posting information
3 */
4 /* Copyright (C) 2008,2009,2010,2011,2012,2015,2016 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
6 * Copyright (C) 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <config.h>
25 // We need to be able to set deprecated members of ValuePostingSource.
26 #define XAPIAN_DEPRECATED(X) X
27 #include "xapian/postingsource.h"
29 #include "autoptr.h"
31 #include "backends/database.h"
32 #include "backends/document.h"
33 #include "matcher/multimatch.h"
35 #include "xapian/document.h"
36 #include "xapian/error.h"
37 #include "xapian/queryparser.h" // For sortable_unserialise().
39 #include "omassert.h"
40 #include "net/length.h"
41 #include "serialise-double.h"
42 #include "str.h"
44 #include <cfloat>
46 using namespace std;
48 namespace Xapian {
50 PostingSource::~PostingSource() { }
52 void
53 PostingSource::set_maxweight(double max_weight)
55 if (usual(matcher_)) {
56 MultiMatch * multimatch = static_cast<MultiMatch*>(matcher_);
57 multimatch->recalc_maxweight();
59 max_weight_ = max_weight;
62 double
63 PostingSource::get_weight() const
65 return 0;
68 void
69 PostingSource::skip_to(Xapian::docid did, double min_wt)
71 while (!at_end() && get_docid() < did) {
72 next(min_wt);
76 bool
77 PostingSource::check(Xapian::docid did, double min_wt)
79 skip_to(did, min_wt);
80 return true;
83 PostingSource *
84 PostingSource::clone() const
86 return NULL;
89 string
90 PostingSource::name() const
92 return string();
95 string
96 PostingSource::serialise() const
98 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
101 PostingSource *
102 PostingSource::unserialise(const string &) const
104 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
107 PostingSource *
108 PostingSource::unserialise_with_registry(const std::string &s,
109 const Registry &) const
111 return unserialise(s);
114 string
115 PostingSource::get_description() const
117 return "Xapian::PostingSource subclass";
121 ValuePostingSource::ValuePostingSource(Xapian::valueno slot_)
122 : real_slot(slot_),
123 db(real_db),
124 slot(real_slot),
125 value_it(real_value_it),
126 started(real_started),
127 termfreq_min(real_termfreq_min),
128 termfreq_est(real_termfreq_est),
129 termfreq_max(real_termfreq_max)
133 Xapian::doccount
134 ValuePostingSource::get_termfreq_min() const
136 return real_termfreq_min;
139 Xapian::doccount
140 ValuePostingSource::get_termfreq_est() const
142 return real_termfreq_est;
145 Xapian::doccount
146 ValuePostingSource::get_termfreq_max() const
148 return real_termfreq_max;
151 void
152 ValuePostingSource::next(double min_wt)
154 if (!real_started) {
155 real_started = true;
156 real_value_it = real_db.valuestream_begin(real_slot);
157 } else {
158 ++real_value_it;
161 if (real_value_it == real_db.valuestream_end(real_slot)) return;
163 if (min_wt > get_maxweight()) {
164 real_value_it = real_db.valuestream_end(real_slot);
165 return;
169 void
170 ValuePostingSource::skip_to(Xapian::docid min_docid, double min_wt)
172 if (!real_started) {
173 real_started = true;
174 real_value_it = real_db.valuestream_begin(real_slot);
176 if (real_value_it == real_db.valuestream_end(real_slot)) return;
179 if (min_wt > get_maxweight()) {
180 real_value_it = real_db.valuestream_end(real_slot);
181 return;
183 real_value_it.skip_to(min_docid);
186 bool
187 ValuePostingSource::check(Xapian::docid min_docid, double min_wt)
189 if (!real_started) {
190 real_started = true;
191 real_value_it = real_db.valuestream_begin(real_slot);
193 if (real_value_it == real_db.valuestream_end(real_slot)) return true;
196 if (min_wt > get_maxweight()) {
197 real_value_it = real_db.valuestream_end(real_slot);
198 return true;
200 return real_value_it.check(min_docid);
203 bool
204 ValuePostingSource::at_end() const
206 return real_started && real_value_it == real_db.valuestream_end(real_slot);
209 Xapian::docid
210 ValuePostingSource::get_docid() const
212 return real_value_it.get_docid();
215 void
216 ValuePostingSource::init(const Database & db_)
218 real_db = db_;
219 real_started = false;
220 set_maxweight(DBL_MAX);
221 try {
222 real_termfreq_max = real_db.get_value_freq(real_slot);
223 real_termfreq_est = real_termfreq_max;
224 real_termfreq_min = real_termfreq_max;
225 } catch (const Xapian::UnimplementedError &) {
226 real_termfreq_max = real_db.get_doccount();
227 real_termfreq_est = real_termfreq_max / 2;
228 real_termfreq_min = 0;
233 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_)
234 : ValuePostingSource(slot_)
238 double
239 ValueWeightPostingSource::get_weight() const
241 Assert(!at_end());
242 Assert(get_started());
243 return sortable_unserialise(get_value());
246 ValueWeightPostingSource *
247 ValueWeightPostingSource::clone() const
249 return new ValueWeightPostingSource(get_slot());
252 string
253 ValueWeightPostingSource::name() const
255 return string("Xapian::ValueWeightPostingSource");
258 string
259 ValueWeightPostingSource::serialise() const
261 return encode_length(get_slot());
264 ValueWeightPostingSource *
265 ValueWeightPostingSource::unserialise(const string &s) const
267 const char * p = s.data();
268 const char * end = p + s.size();
270 Xapian::valueno new_slot;
271 decode_length(&p, end, new_slot);
272 if (p != end) {
273 throw Xapian::NetworkError("Bad serialised ValueWeightPostingSource - junk at end");
276 return new ValueWeightPostingSource(new_slot);
279 void
280 ValueWeightPostingSource::init(const Database & db_)
282 ValuePostingSource::init(db_);
284 string upper_bound;
285 try {
286 upper_bound = get_database().get_value_upper_bound(get_slot());
287 } catch (const Xapian::UnimplementedError &) {
288 // ValuePostingSource::init() set the maxweight to DBL_MAX.
289 return;
292 if (upper_bound.empty()) {
293 // This should only happen if there are no entries, in which case the
294 // maxweight is 0.
295 set_maxweight(0.0);
296 } else {
297 set_maxweight(sortable_unserialise(upper_bound));
301 string
302 ValueWeightPostingSource::get_description() const
304 string desc("Xapian::ValueWeightPostingSource(slot=");
305 desc += str(get_slot());
306 desc += ")";
307 return desc;
311 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_)
312 : ValuePostingSource(slot_),
313 default_weight(0.0),
314 max_weight_in_map(0.0)
318 void
319 ValueMapPostingSource::add_mapping(const string & key, double wt)
321 weight_map[key] = wt;
322 max_weight_in_map = max(wt, max_weight_in_map);
325 void
326 ValueMapPostingSource::clear_mappings()
328 weight_map.clear();
329 max_weight_in_map = 0.0;
332 void
333 ValueMapPostingSource::set_default_weight(double wt)
335 default_weight = wt;
338 double
339 ValueMapPostingSource::get_weight() const
341 map<string, double>::const_iterator wit = weight_map.find(get_value());
342 if (wit == weight_map.end()) {
343 return default_weight;
345 return wit->second;
348 ValueMapPostingSource *
349 ValueMapPostingSource::clone() const
351 AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(get_slot()));
352 map<string, double>::const_iterator i;
353 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
354 res->add_mapping(i->first, i->second);
356 res->set_default_weight(default_weight);
357 return res.release();
360 string
361 ValueMapPostingSource::name() const
363 return string("Xapian::ValueMapPostingSource");
366 string
367 ValueMapPostingSource::serialise() const
369 string result = encode_length(get_slot());
370 result += serialise_double(default_weight);
372 map<string, double>::const_iterator i;
373 for (i = weight_map.begin(); i != weight_map.end(); ++i) {
374 result.append(encode_length(i->first.size()));
375 result.append(i->first);
376 result.append(serialise_double(i->second));
379 return result;
382 ValueMapPostingSource *
383 ValueMapPostingSource::unserialise(const string &s) const
385 const char * p = s.data();
386 const char * end = p + s.size();
388 Xapian::valueno new_slot;
389 decode_length(&p, end, new_slot);
390 AutoPtr<ValueMapPostingSource> res(new ValueMapPostingSource(new_slot));
391 res->set_default_weight(unserialise_double(&p, end));
392 while (p != end) {
393 size_t keylen;
394 decode_length_and_check(&p, end, keylen);
395 string key(p, keylen);
396 p += keylen;
397 res->add_mapping(key, unserialise_double(&p, end));
399 return res.release();
402 void
403 ValueMapPostingSource::init(const Database & db_)
405 ValuePostingSource::init(db_);
406 set_maxweight(max(max_weight_in_map, default_weight));
409 string
410 ValueMapPostingSource::get_description() const
412 string desc("Xapian::ValueMapPostingSource(slot=");
413 desc += str(get_slot());
414 desc += ")";
415 return desc;
418 FixedWeightPostingSource::FixedWeightPostingSource(double wt)
419 : started(false)
421 // The weight is fixed at wt, so that's the maxweight too. So just store wt
422 // as the maxweight and we can read it from there when we need it.
423 set_maxweight(wt);
426 Xapian::doccount
427 FixedWeightPostingSource::get_termfreq_min() const
429 return termfreq;
432 Xapian::doccount
433 FixedWeightPostingSource::get_termfreq_est() const
435 return termfreq;
438 Xapian::doccount
439 FixedWeightPostingSource::get_termfreq_max() const
441 return termfreq;
444 double
445 FixedWeightPostingSource::get_weight() const
447 return get_maxweight();
450 void
451 FixedWeightPostingSource::next(double min_wt)
453 if (!started) {
454 started = true;
455 it = db.postlist_begin(string());
456 } else {
457 ++it;
460 if (it == db.postlist_end(string())) return;
462 if (check_docid) {
463 it.skip_to(check_docid + 1);
464 check_docid = 0;
467 if (min_wt > get_maxweight()) {
468 it = db.postlist_end(string());
472 void
473 FixedWeightPostingSource::skip_to(Xapian::docid min_docid, double min_wt)
475 if (!started) {
476 started = true;
477 it = db.postlist_begin(string());
479 if (it == db.postlist_end(string())) return;
482 if (check_docid) {
483 if (min_docid < check_docid)
484 min_docid = check_docid + 1;
485 check_docid = 0;
488 if (min_wt > get_maxweight()) {
489 it = db.postlist_end(string());
490 return;
492 it.skip_to(min_docid);
495 bool
496 FixedWeightPostingSource::check(Xapian::docid min_docid, double)
498 // We're guaranteed not to be called if the document doesn't
499 // exist, so just remember the docid passed, and return true.
500 check_docid = min_docid;
501 return true;
504 bool
505 FixedWeightPostingSource::at_end() const
507 if (check_docid != 0) return false;
508 return started && it == db.postlist_end(string());
511 Xapian::docid
512 FixedWeightPostingSource::get_docid() const
514 if (check_docid != 0) return check_docid;
515 return *it;
518 FixedWeightPostingSource *
519 FixedWeightPostingSource::clone() const
521 return new FixedWeightPostingSource(get_maxweight());
524 string
525 FixedWeightPostingSource::name() const
527 return string("Xapian::FixedWeightPostingSource");
530 string
531 FixedWeightPostingSource::serialise() const
533 return serialise_double(get_maxweight());
536 FixedWeightPostingSource *
537 FixedWeightPostingSource::unserialise(const string &s) const
539 const char * p = s.data();
540 const char * s_end = p + s.size();
541 double new_wt = unserialise_double(&p, s_end);
542 if (p != s_end) {
543 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
545 return new FixedWeightPostingSource(new_wt);
548 void
549 FixedWeightPostingSource::init(const Xapian::Database & db_)
551 db = db_;
552 termfreq = db_.get_doccount();
553 started = false;
554 check_docid = 0;
557 string
558 FixedWeightPostingSource::get_description() const
560 string desc("Xapian::FixedWeightPostingSource(wt=");
561 desc += str(get_maxweight());
562 desc += ")";
563 return desc;