xapian-letor/scorer/err_score.cc

   1 /** @file err_score.cc
   2  *  @brief Implementation of ERRScore
   3  *
   4  *  ERR Score is adapted from the paper: http://olivier.chapelle.cc/pub/err.pdf
   5  *  Chapelle, Metzler, Zhang, Grinspan (2009)
   6  *  Expected Reciprocal Rank for Graded Relevance
   7  */
   8 /* Copyright (C) 2014 Hanxiao Sun
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License as
  12  * published by the Free Software Foundation; either version 2 of the
  13  * License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
  23  * USA
  24  */
  25
  26 #include <config.h>
  27
  28 #include "xapian-letor/scorer.h"
  29
  30 #include "debuglog.h"
  31
  32 #include <algorithm>
  33
  34 using namespace std;
  35
  36 using namespace Xapian;
  37
  38 ERRScore::ERRScore()
  39 {
  40     LOGCALL_CTOR(API, "ERRScore", NO_ARGS);
  41 }
  42
  43 ERRScore::~ERRScore()
  44 {
  45     LOGCALL_DTOR(API, "ERRScore");
  46 }
  47
  48 double
  49 ERRScore::score(const std::vector<FeatureVector> & fvv) const
  50 {
  51     LOGCALL(API, double, "ERRScore::score", fvv);
  52     if (fvv.empty()) {
  53         return 0;
  54     }
  55     size_t length = fvv.size();
  56     FeatureVector max_label = *max_element(fvv.begin(), fvv.end(),
  57                                            [](FeatureVector x,
  58                                               FeatureVector y) {
  59                                                   return x.get_label() <
  60                                                          y.get_label();
  61                                               });
  62     double max_value = exp2(max_label.get_label());
  63
  64     // Accumulated probability, which is updated for each document.
  65     double p = 1;
  66     double err_score = 0;
  67     for (size_t rank = 1; rank <= length; ++rank) {
  68         /* Compute the probability of relevance for the document.
  69          * Probability of relevance is calculated in accordance with the gain
  70          * function for the Discounted Cumulative Gain in the paper:
  71          * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.74.9057&rep=rep1&type=pdf
  72          */
  73         auto label = fvv[rank - 1].get_label();
  74         double relevance_probability = (exp2(label) - 1) / max_value;
  75
  76         /* err_score = summation over all the documents
  77         * ((satisfaction probability * p) / rank).
  78         * Expected Reciprocal Rank(err_score) is calculated in accordance with
  79         * algorithm 2 in the paper http://olivier.chapelle.cc/pub/err.pdf
  80         * The paper assumes discrete relevances but continous relevances can
  81         * be scored using this scorer.
  82         */
  83         err_score = err_score + (p * relevance_probability / rank);
  84         p = p * (1 - relevance_probability);
  85     }
  86
  87     return err_score;
  88 }