1 /** @file postingsource.cc
2 * @brief External sources of posting information
4 /* Copyright (C) 2008,2009,2010,2011,2012,2015,2016 Olly Betts
5 * Copyright (C) 2008,2009 Lemur Consulting Ltd
6 * Copyright (C) 2010 Richard Boulton
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 // We need to be able to set deprecated members of ValuePostingSource.
26 #define XAPIAN_DEPRECATED(X) X
27 #include "xapian/postingsource.h"
31 #include "backends/database.h"
32 #include "backends/document.h"
33 #include "matcher/multimatch.h"
35 #include "xapian/document.h"
36 #include "xapian/error.h"
37 #include "xapian/queryparser.h" // For sortable_unserialise().
40 #include "net/length.h"
41 #include "serialise-double.h"
50 PostingSource::~PostingSource() { }
53 PostingSource::set_maxweight(double max_weight
)
55 if (usual(matcher_
)) {
56 MultiMatch
* multimatch
= static_cast<MultiMatch
*>(matcher_
);
57 multimatch
->recalc_maxweight();
59 max_weight_
= max_weight
;
63 PostingSource::get_weight() const
69 PostingSource::skip_to(Xapian::docid did
, double min_wt
)
71 while (!at_end() && get_docid() < did
) {
77 PostingSource::check(Xapian::docid did
, double min_wt
)
84 PostingSource::clone() const
90 PostingSource::name() const
96 PostingSource::serialise() const
98 throw Xapian::UnimplementedError("serialise() not supported for this PostingSource");
102 PostingSource::unserialise(const string
&) const
104 throw Xapian::UnimplementedError("unserialise() not supported for this PostingSource");
108 PostingSource::unserialise_with_registry(const std::string
&s
,
109 const Registry
&) const
111 return unserialise(s
);
115 PostingSource::get_description() const
117 return "Xapian::PostingSource subclass";
121 ValuePostingSource::ValuePostingSource(Xapian::valueno slot_
)
125 value_it(real_value_it
),
126 started(real_started
),
127 termfreq_min(real_termfreq_min
),
128 termfreq_est(real_termfreq_est
),
129 termfreq_max(real_termfreq_max
)
134 ValuePostingSource::get_termfreq_min() const
136 return real_termfreq_min
;
140 ValuePostingSource::get_termfreq_est() const
142 return real_termfreq_est
;
146 ValuePostingSource::get_termfreq_max() const
148 return real_termfreq_max
;
152 ValuePostingSource::next(double min_wt
)
156 real_value_it
= real_db
.valuestream_begin(real_slot
);
161 if (real_value_it
== real_db
.valuestream_end(real_slot
)) return;
163 if (min_wt
> get_maxweight()) {
164 real_value_it
= real_db
.valuestream_end(real_slot
);
170 ValuePostingSource::skip_to(Xapian::docid min_docid
, double min_wt
)
174 real_value_it
= real_db
.valuestream_begin(real_slot
);
176 if (real_value_it
== real_db
.valuestream_end(real_slot
)) return;
179 if (min_wt
> get_maxweight()) {
180 real_value_it
= real_db
.valuestream_end(real_slot
);
183 real_value_it
.skip_to(min_docid
);
187 ValuePostingSource::check(Xapian::docid min_docid
, double min_wt
)
191 real_value_it
= real_db
.valuestream_begin(real_slot
);
193 if (real_value_it
== real_db
.valuestream_end(real_slot
)) return true;
196 if (min_wt
> get_maxweight()) {
197 real_value_it
= real_db
.valuestream_end(real_slot
);
200 return real_value_it
.check(min_docid
);
204 ValuePostingSource::at_end() const
206 return real_started
&& real_value_it
== real_db
.valuestream_end(real_slot
);
210 ValuePostingSource::get_docid() const
212 return real_value_it
.get_docid();
216 ValuePostingSource::init(const Database
& db_
)
219 real_started
= false;
220 set_maxweight(DBL_MAX
);
222 real_termfreq_max
= real_db
.get_value_freq(real_slot
);
223 real_termfreq_est
= real_termfreq_max
;
224 real_termfreq_min
= real_termfreq_max
;
225 } catch (const Xapian::UnimplementedError
&) {
226 real_termfreq_max
= real_db
.get_doccount();
227 real_termfreq_est
= real_termfreq_max
/ 2;
228 real_termfreq_min
= 0;
233 ValueWeightPostingSource::ValueWeightPostingSource(Xapian::valueno slot_
)
234 : ValuePostingSource(slot_
)
239 ValueWeightPostingSource::get_weight() const
242 Assert(get_started());
243 return sortable_unserialise(get_value());
246 ValueWeightPostingSource
*
247 ValueWeightPostingSource::clone() const
249 return new ValueWeightPostingSource(get_slot());
253 ValueWeightPostingSource::name() const
255 return string("Xapian::ValueWeightPostingSource");
259 ValueWeightPostingSource::serialise() const
261 return encode_length(get_slot());
264 ValueWeightPostingSource
*
265 ValueWeightPostingSource::unserialise(const string
&s
) const
267 const char * p
= s
.data();
268 const char * end
= p
+ s
.size();
270 Xapian::valueno new_slot
;
271 decode_length(&p
, end
, new_slot
);
273 throw Xapian::NetworkError("Bad serialised ValueWeightPostingSource - junk at end");
276 return new ValueWeightPostingSource(new_slot
);
280 ValueWeightPostingSource::init(const Database
& db_
)
282 ValuePostingSource::init(db_
);
286 upper_bound
= get_database().get_value_upper_bound(get_slot());
287 } catch (const Xapian::UnimplementedError
&) {
288 // ValuePostingSource::init() set the maxweight to DBL_MAX.
292 if (upper_bound
.empty()) {
293 // This should only happen if there are no entries, in which case the
297 set_maxweight(sortable_unserialise(upper_bound
));
302 ValueWeightPostingSource::get_description() const
304 string
desc("Xapian::ValueWeightPostingSource(slot=");
305 desc
+= str(get_slot());
311 ValueMapPostingSource::ValueMapPostingSource(Xapian::valueno slot_
)
312 : ValuePostingSource(slot_
),
314 max_weight_in_map(0.0)
319 ValueMapPostingSource::add_mapping(const string
& key
, double wt
)
321 weight_map
[key
] = wt
;
322 max_weight_in_map
= max(wt
, max_weight_in_map
);
326 ValueMapPostingSource::clear_mappings()
329 max_weight_in_map
= 0.0;
333 ValueMapPostingSource::set_default_weight(double wt
)
339 ValueMapPostingSource::get_weight() const
341 map
<string
, double>::const_iterator wit
= weight_map
.find(get_value());
342 if (wit
== weight_map
.end()) {
343 return default_weight
;
348 ValueMapPostingSource
*
349 ValueMapPostingSource::clone() const
351 AutoPtr
<ValueMapPostingSource
> res(new ValueMapPostingSource(get_slot()));
352 map
<string
, double>::const_iterator i
;
353 for (i
= weight_map
.begin(); i
!= weight_map
.end(); ++i
) {
354 res
->add_mapping(i
->first
, i
->second
);
356 res
->set_default_weight(default_weight
);
357 return res
.release();
361 ValueMapPostingSource::name() const
363 return string("Xapian::ValueMapPostingSource");
367 ValueMapPostingSource::serialise() const
369 string result
= encode_length(get_slot());
370 result
+= serialise_double(default_weight
);
372 map
<string
, double>::const_iterator i
;
373 for (i
= weight_map
.begin(); i
!= weight_map
.end(); ++i
) {
374 result
.append(encode_length(i
->first
.size()));
375 result
.append(i
->first
);
376 result
.append(serialise_double(i
->second
));
382 ValueMapPostingSource
*
383 ValueMapPostingSource::unserialise(const string
&s
) const
385 const char * p
= s
.data();
386 const char * end
= p
+ s
.size();
388 Xapian::valueno new_slot
;
389 decode_length(&p
, end
, new_slot
);
390 AutoPtr
<ValueMapPostingSource
> res(new ValueMapPostingSource(new_slot
));
391 res
->set_default_weight(unserialise_double(&p
, end
));
394 decode_length_and_check(&p
, end
, keylen
);
395 string
key(p
, keylen
);
397 res
->add_mapping(key
, unserialise_double(&p
, end
));
399 return res
.release();
403 ValueMapPostingSource::init(const Database
& db_
)
405 ValuePostingSource::init(db_
);
406 set_maxweight(max(max_weight_in_map
, default_weight
));
410 ValueMapPostingSource::get_description() const
412 string
desc("Xapian::ValueMapPostingSource(slot=");
413 desc
+= str(get_slot());
418 FixedWeightPostingSource::FixedWeightPostingSource(double wt
)
421 // The weight is fixed at wt, so that's the maxweight too. So just store wt
422 // as the maxweight and we can read it from there when we need it.
427 FixedWeightPostingSource::get_termfreq_min() const
433 FixedWeightPostingSource::get_termfreq_est() const
439 FixedWeightPostingSource::get_termfreq_max() const
445 FixedWeightPostingSource::get_weight() const
447 return get_maxweight();
451 FixedWeightPostingSource::next(double min_wt
)
455 it
= db
.postlist_begin(string());
460 if (it
== db
.postlist_end(string())) return;
463 it
.skip_to(check_docid
+ 1);
467 if (min_wt
> get_maxweight()) {
468 it
= db
.postlist_end(string());
473 FixedWeightPostingSource::skip_to(Xapian::docid min_docid
, double min_wt
)
477 it
= db
.postlist_begin(string());
479 if (it
== db
.postlist_end(string())) return;
483 if (min_docid
< check_docid
)
484 min_docid
= check_docid
+ 1;
488 if (min_wt
> get_maxweight()) {
489 it
= db
.postlist_end(string());
492 it
.skip_to(min_docid
);
496 FixedWeightPostingSource::check(Xapian::docid min_docid
, double)
498 // We're guaranteed not to be called if the document doesn't
499 // exist, so just remember the docid passed, and return true.
500 check_docid
= min_docid
;
505 FixedWeightPostingSource::at_end() const
507 if (check_docid
!= 0) return false;
508 return started
&& it
== db
.postlist_end(string());
512 FixedWeightPostingSource::get_docid() const
514 if (check_docid
!= 0) return check_docid
;
518 FixedWeightPostingSource
*
519 FixedWeightPostingSource::clone() const
521 return new FixedWeightPostingSource(get_maxweight());
525 FixedWeightPostingSource::name() const
527 return string("Xapian::FixedWeightPostingSource");
531 FixedWeightPostingSource::serialise() const
533 return serialise_double(get_maxweight());
536 FixedWeightPostingSource
*
537 FixedWeightPostingSource::unserialise(const string
&s
) const
539 const char * p
= s
.data();
540 const char * s_end
= p
+ s
.size();
541 double new_wt
= unserialise_double(&p
, s_end
);
543 throw Xapian::NetworkError("Bad serialised FixedWeightPostingSource - junk at end");
545 return new FixedWeightPostingSource(new_wt
);
549 FixedWeightPostingSource::init(const Xapian::Database
& db_
)
552 termfreq
= db_
.get_doccount();
558 FixedWeightPostingSource::get_description() const
560 string
desc("Xapian::FixedWeightPostingSource(wt=");
561 desc
+= str(get_maxweight());