Factor out directory separator knowledge
[xapian.git] / xapian-letor / scorer / err_score.cc
blob7e0173553addac77441b26f70aa3b9cfe35d89e7
1 /** @file err_score.cc
2 * @brief Implementation of ERRScore
4 * ERR Score is adapted from the paper: http://olivier.chapelle.cc/pub/err.pdf
5 * Chapelle, Metzler, Zhang, Grinspan (2009)
6 * Expected Reciprocal Rank for Graded Relevance
7 */
8 /* Copyright (C) 2014 Hanxiao Sun
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 * USA
26 #include <config.h>
28 #include "xapian-letor/scorer.h"
30 #include "debuglog.h"
32 #include <algorithm>
34 using namespace std;
36 using namespace Xapian;
38 ERRScore::ERRScore()
40 LOGCALL_CTOR(API, "ERRScore", NO_ARGS);
43 ERRScore::~ERRScore()
45 LOGCALL_DTOR(API, "ERRScore");
48 double
49 ERRScore::score(const std::vector<FeatureVector> & fvv) const
51 LOGCALL(API, double, "ERRScore::score", fvv);
52 if (fvv.empty()) {
53 return 0;
55 size_t length = fvv.size();
56 FeatureVector max_label = *max_element(fvv.begin(), fvv.end(),
57 [](FeatureVector x,
58 FeatureVector y) {
59 return x.get_label() <
60 y.get_label();
61 });
62 double max_value = exp2(max_label.get_label());
64 // Accumulated probability, which is updated for each document.
65 double p = 1;
66 double err_score = 0;
67 for (size_t rank = 1; rank <= length; ++rank) {
68 /* Compute the probability of relevance for the document.
69 * Probability of relevance is calculated in accordance with the gain
70 * function for the Discounted Cumulative Gain in the paper:
71 * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.74.9057&rep=rep1&type=pdf
73 auto label = fvv[rank - 1].get_label();
74 double relevance_probability = (exp2(label) - 1) / max_value;
76 /* err_score = summation over all the documents
77 * ((satisfaction probability * p) / rank).
78 * Expected Reciprocal Rank(err_score) is calculated in accordance with
79 * algorithm 2 in the paper http://olivier.chapelle.cc/pub/err.pdf
80 * The paper assumes discrete relevances but continous relevances can
81 * be scored using this scorer.
83 err_score = err_score + (p * relevance_probability / rank);
84 p = p * (1 - relevance_probability);
87 return err_score;