2 * @brief Xapian::DLHWeight class - The DLH weighting scheme of the DFR framework.
4 /* Copyright (C) 2013, 2014 Aarsh Shah
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "xapian/weight.h"
24 #include "common/log2.h"
32 DLHWeight::clone() const
34 return new DLHWeight();
38 DLHWeight::init(double factor
)
40 double wdf_lower
= 1.0;
41 double wdf_upper
= get_wdf_upper_bound();
42 double len_upper
= get_doclength_upper_bound();
44 double min_wdf_to_len
= wdf_lower
/ len_upper
;
46 double N
= get_collection_size();
47 double F
= get_collection_freq();
50 lower_bound
= upper_bound
= 0.0;
54 // Calculate the lower bound.
55 double min_weight
= (wdf_lower
* log2((wdf_lower
* get_average_length() /
56 len_upper
) * (N
/ F
)) -
57 (1.5 * log2(len_upper
)) +
58 0.5 * log2(2.0 * M_PI
* wdf_lower
)) /
61 lower_bound
= get_wqf() * min_weight
* factor
;
63 // Calculate constant values to be used in get_sumpart().
64 log_constant
= get_average_length() * N
/ F
;
65 wqf_product_factor
= get_wqf() * factor
;
67 // Calculate values for the upper bound.
68 /* An upper bound of the term used in the third log can be obtained by
69 plugging in the upper bound of the length and differentiating the term
70 w.r.t wdf which gives the value of wdf at which the function attains
72 double wdf_var
= min(wdf_upper
, len_upper
/ 2.0);
73 double max_product_1
= wdf_var
* (1.0 - wdf_var
/ len_upper
);
74 /* An upper bound can also be obtained by taking the minimum and maximum
75 wdf value in the formula as shown. */
76 double max_product_2
= wdf_upper
* (1.0 - min_wdf_to_len
);
77 /* Take the minimum of the two upper bounds. */
78 double max_product
= min(max_product_1
, max_product_2
);
80 double max_weight
= factor
*
81 ((wdf_upper
* log2(log_constant
)) / (wdf_upper
+ 0.5) +
82 (len_upper
- wdf_lower
) * log2(1.0 - min_wdf_to_len
)
84 0.5 * log2(2.0 * M_PI
* max_product
) / (wdf_lower
+ 0.5));
86 upper_bound
= ((get_wqf() * max_weight
) - lower_bound
);
90 DLHWeight::name() const
92 return "Xapian::DLHWeight";
96 DLHWeight::serialise() const
102 DLHWeight::unserialise(const string
&) const
104 return new DLHWeight();
108 DLHWeight::get_sumpart(Xapian::termcount wdf
, Xapian::termcount len
,
109 Xapian::termcount
) const
111 if (wdf
== 0) return 0.0;
113 double wdf_to_len
= double(wdf
) / len
;
115 double wt
= (wdf
* log2(wdf_to_len
* log_constant
) +
116 (len
- wdf
) * log2(1.0 - wdf_to_len
) +
117 0.5 * log2(2.0 * M_PI
* wdf
* (1.0 - wdf_to_len
))) /
120 return ((wqf_product_factor
* wt
) - lower_bound
);
124 DLHWeight::get_maxpart() const
130 DLHWeight::get_sumextra(Xapian::termcount
, Xapian::termcount
) const
136 DLHWeight::get_maxextra() const