Remove unused header include
[xapian.git] / xapian-core / weight / dlhweight.cc
blobffb27dd73a9156f4459dae025933ad168c3165e2
1 /** @file dlhweight.cc
2 * @brief Xapian::DLHWeight class - The DLH weighting scheme of the DFR framework.
3 */
4 /* Copyright (C) 2013, 2014 Aarsh Shah
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "xapian/weight.h"
24 #include "common/log2.h"
25 #include <algorithm>
27 using namespace std;
29 namespace Xapian {
31 DLHWeight *
32 DLHWeight::clone() const
34 return new DLHWeight();
37 void
38 DLHWeight::init(double factor)
40 double wdf_lower = 1.0;
41 double wdf_upper = get_wdf_upper_bound();
42 double len_upper = get_doclength_upper_bound();
44 double min_wdf_to_len = wdf_lower / len_upper;
46 double N = get_collection_size();
47 double F = get_collection_freq();
49 if (wdf_upper == 0) {
50 lower_bound = upper_bound = 0.0;
51 return;
54 // Calculate the lower bound.
55 double min_weight = (wdf_lower * log2((wdf_lower * get_average_length() /
56 len_upper) * (N / F)) -
57 (1.5 * log2(len_upper)) +
58 0.5 * log2(2.0 * M_PI * wdf_lower)) /
59 (wdf_upper + 0.5);
61 lower_bound = get_wqf() * min_weight * factor;
63 // Calculate constant values to be used in get_sumpart().
64 log_constant = get_average_length() * N / F;
65 wqf_product_factor = get_wqf() * factor;
67 // Calculate values for the upper bound.
68 /* An upper bound of the term used in the third log can be obtained by
69 plugging in the upper bound of the length and differentiating the term
70 w.r.t wdf which gives the value of wdf at which the function attains
71 maximum value. */
72 double wdf_var = min(wdf_upper, len_upper / 2.0);
73 double max_product_1 = wdf_var * (1.0 - wdf_var / len_upper);
74 /* An upper bound can also be obtained by taking the minimum and maximum
75 wdf value in the formula as shown. */
76 double max_product_2 = wdf_upper * (1.0 - min_wdf_to_len);
77 /* Take the minimum of the two upper bounds. */
78 double max_product = min(max_product_1, max_product_2);
80 double max_weight = factor *
81 ((wdf_upper * log2(log_constant)) / (wdf_upper + 0.5) +
82 (len_upper - wdf_lower) * log2(1.0 - min_wdf_to_len)
83 / (wdf_lower + 0.5) +
84 0.5 * log2(2.0 * M_PI * max_product) / (wdf_lower + 0.5));
86 upper_bound = ((get_wqf() * max_weight) - lower_bound);
89 string
90 DLHWeight::name() const
92 return "Xapian::DLHWeight";
95 string
96 DLHWeight::serialise() const
98 return string();
101 DLHWeight *
102 DLHWeight::unserialise(const string &) const
104 return new DLHWeight();
107 double
108 DLHWeight::get_sumpart(Xapian::termcount wdf, Xapian::termcount len,
109 Xapian::termcount) const
111 if (wdf == 0) return 0.0;
113 double wdf_to_len = double(wdf) / len;
115 double wt = (wdf * log2(wdf_to_len * log_constant) +
116 (len - wdf) * log2(1.0 - wdf_to_len) +
117 0.5 * log2(2.0 * M_PI * wdf * (1.0 - wdf_to_len))) /
118 (wdf + 0.5);
120 return ((wqf_product_factor * wt) - lower_bound);
123 double
124 DLHWeight::get_maxpart() const
126 return upper_bound;
129 double
130 DLHWeight::get_sumextra(Xapian::termcount, Xapian::termcount) const
132 return 0;
135 double
136 DLHWeight::get_maxextra() const
138 return 0;