2 * @brief Implementation of ERRScore
4 * ERR Score is adapted from the paper: http://olivier.chapelle.cc/pub/err.pdf
5 * Chapelle, Metzler, Zhang, Grinspan (2009)
6 * Expected Reciprocal Rank for Graded Relevance
8 /* Copyright (C) 2014 Hanxiao Sun
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
28 #include "xapian-letor/scorer.h"
36 using namespace Xapian
;
40 LOGCALL_CTOR(API
, "ERRScore", NO_ARGS
);
45 LOGCALL_DTOR(API
, "ERRScore");
49 ERRScore::score(const std::vector
<FeatureVector
> & fvv
) const
51 LOGCALL(API
, double, "ERRScore::score", fvv
);
55 size_t length
= fvv
.size();
56 FeatureVector max_label
= *max_element(fvv
.begin(), fvv
.end(),
59 return x
.get_label() <
62 double max_value
= exp2(max_label
.get_label());
64 // Accumulated probability, which is updated for each document.
67 for (size_t rank
= 1; rank
<= length
; ++rank
) {
68 /* Compute the probability of relevance for the document.
69 * Probability of relevance is calculated in accordance with the gain
70 * function for the Discounted Cumulative Gain in the paper:
71 * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.74.9057&rep=rep1&type=pdf
73 auto label
= fvv
[rank
- 1].get_label();
74 double relevance_probability
= (exp2(label
) - 1) / max_value
;
76 /* err_score = summation over all the documents
77 * ((satisfaction probability * p) / rank).
78 * Expected Reciprocal Rank(err_score) is calculated in accordance with
79 * algorithm 2 in the paper http://olivier.chapelle.cc/pub/err.pdf
80 * The paper assumes discrete relevances but continous relevances can
81 * be scored using this scorer.
83 err_score
= err_score
+ (p
* relevance_probability
/ rank
);
84 p
= p
* (1 - relevance_probability
);