Add Weight::create() and Weight::create_from_parameters()
[xapian.git] / xapian-core / weight / ifb2weight.cc
blob1ffc0d3ae4eba4050cb1575c518393dee0664392
1 /** @file ifb2weight.cc
2 * @brief Xapian::IfB2Weight class - the IfB2 weighting scheme of the DFR framework.
3 */
4 /* Copyright (C) 2013,2014 Aarsh Shah
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "xapian/weight.h"
24 #include "common/log2.h"
25 #include "weightinternal.h"
27 #include "serialise-double.h"
29 #include "xapian/error.h"
31 using namespace std;
33 namespace Xapian {
35 IfB2Weight::IfB2Weight(double c)
36 : param_c(c)
38 if (param_c <= 0)
39 throw Xapian::InvalidArgumentError("Parameter c is invalid.");
40 need_stat(AVERAGE_LENGTH);
41 need_stat(DOC_LENGTH);
42 need_stat(DOC_LENGTH_MIN);
43 need_stat(COLLECTION_SIZE);
44 need_stat(COLLECTION_FREQ);
45 need_stat(WDF);
46 need_stat(WDF_MAX);
47 need_stat(WQF);
48 need_stat(TERMFREQ);
51 IfB2Weight *
52 IfB2Weight::clone() const
54 return new IfB2Weight(param_c);
57 void
58 IfB2Weight::init(double factor)
60 double wdfn_upper = get_wdf_upper_bound();
61 if (wdfn_upper == 0) {
62 upper_bound = 0.0;
63 return;
66 double F = get_collection_freq();
67 double N = get_collection_size();
69 wdfn_upper *= log2(1 + (param_c * get_average_length()) /
70 get_doclength_lower_bound());
72 // This term is constant for all documents.
73 double idf_max = log2((N + 1.0) / (F + 0.5));
75 /* Calculate constant values to be used in get_sumpart(). */
76 wqf_product_idf = get_wqf() * idf_max * factor;
77 c_product_avlen = param_c * get_average_length();
78 B_constant = (F + 1.0) / get_termfreq();
80 // wdfn * B = wdfn * (F + 1.0) / (get_termfreq() * (wdfn + 1.0)).
81 // By cancelling out wdfn, we get (F + 1.0) / (get_termfreq() * (1.0 + 1.0 / wdfn)).
82 // In order to maximize the product, we need to minimize the denominator, and so we use wdfn_upper.
83 double max_wdfn_product_B = wdfn_upper * B_constant / (wdfn_upper + 1.0);
85 upper_bound = wqf_product_idf * max_wdfn_product_B * factor;
88 string
89 IfB2Weight::name() const
91 return "Xapian::IfB2Weight";
94 string
95 IfB2Weight::short_name() const
97 return "ifb2";
100 string
101 IfB2Weight::serialise() const
103 return serialise_double(param_c);
106 IfB2Weight *
107 IfB2Weight::unserialise(const string & s) const
109 const char *ptr = s.data();
110 const char *end = ptr + s.size();
111 double c = unserialise_double(&ptr, end);
112 if (rare(ptr != end))
113 throw Xapian::SerialisationError("Extra data in IfB2Weight::unserialise()");
114 return new IfB2Weight(c);
117 double
118 IfB2Weight::get_sumpart(Xapian::termcount wdf, Xapian::termcount len,
119 Xapian::termcount) const
121 if (wdf == 0) return 0.0;
122 double wdfn = wdf;
123 wdfn *= log2(1 + c_product_avlen / len);
125 double wdfn_product_B = wdfn * B_constant / (wdfn + 1.0);
127 return (wqf_product_idf * wdfn_product_B);
130 double
131 IfB2Weight::get_maxpart() const
133 return upper_bound;
136 double
137 IfB2Weight::get_sumextra(Xapian::termcount, Xapian::termcount) const
139 return 0;
142 double
143 IfB2Weight::get_maxextra() const
145 return 0;
148 IfB2Weight *
149 IfB2Weight::create_from_parameters(const char * p) const
151 if (*p == '\0')
152 return new Xapian::IfB2Weight();
153 double k = 1.0;
154 if (!Xapian::Weight::Internal::double_param(&p, &k))
155 Xapian::Weight::Internal::parameter_error("Parameter is invalid", "ifb2");
156 if (*p)
157 Xapian::Weight::Internal::parameter_error("Extra data after parameter", "ifb2");
158 return new Xapian::IfB2Weight(k);