xapian-letor/ranker/listnet_ranker.cc

   1 /** @file listnet_ranker.cc
   2  *  @brief Implementation of ListNetRanker
   3  *
   4  *  ListNET is adapted from the paper:
   5  *  Cao, Zhe, et al. "Learning to rank: from pairwise approach to listwise
   6  *  approach."
   7  *  Proceedings of the 24th international conference on Machine learning. ACM,
   8  *  2007.
   9  */
  10 /* Copyright (C) 2014 Hanxiao Sun
  11  * Copyright (C) 2016 Ayush Tomar
  12  *
  13  * This program is free software; you can redistribute it and/or
  14  * modify it under the terms of the GNU General Public License as
  15  * published by the Free Software Foundation; either version 2 of the
  16  * License, or (at your option) any later version.
  17  *
  18  * This program is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU General Public License
  24  * along with this program; if not, write to the Free Software
  25  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
  26  * USA
  27  */
  28
  29 #include <config.h>
  30
  31 #include "xapian-letor/ranker.h"
  32
  33 #include "debuglog.h"
  34 #include "serialise-double.h"
  35
  36 #include <algorithm>
  37 #include <cmath>
  38 #include <cstdlib>
  39 #include <fstream>
  40 #include <iomanip>
  41 #include <limits>
  42 #include <sstream>
  43 #include <vector>
  44
  45 using namespace std;
  46 using namespace Xapian;
  47
  48 /// A vector holding ground truth and prediction score probability distribution vectors.
  49 typedef vector <vector <double> > prob_distrib_vector;
  50
  51 ListNETRanker::~ListNETRanker() {
  52     LOGCALL_DTOR(API, "ListNETRanker");
  53 }
  54
  55 static double
  56 calculate_inner_product(const vector<double> &parameters, const vector<double> &fvals) {
  57     LOGCALL_STATIC_VOID(API, "calculate_inner_product", parameters | fvals);
  58     double inner_product = 0.0;
  59     for (size_t i = 0; i < fvals.size(); ++i)
  60         inner_product += parameters[i] * fvals[i];
  61     return inner_product;
  62 }
  63
  64 // From Theorem (8) in Cao et al. "Learning to rank: from pairwise approach to listwise approach."
  65 static prob_distrib_vector
  66 init_probability(const vector<FeatureVector> &feature_vectors, const vector<double> &new_parameters) {
  67     LOGCALL_STATIC_VOID(API, "init_probability", feature_vectors | new_parameters);
  68
  69     // probability distribution, y is ground truth, while z is predict score
  70     vector<double> prob_y;
  71     vector<double> prob_z;
  72     double expsum_y = 0.0;
  73     double expsum_z = 0.0;
  74
  75     for (auto&& v : feature_vectors) {
  76         expsum_y += exp(v.get_label());
  77         expsum_z += exp(calculate_inner_product(new_parameters, v.get_fvals()));
  78     }
  79     for (auto&& v : feature_vectors) {
  80         prob_y.push_back(exp(v.get_label()) / expsum_y);
  81         prob_z.push_back(exp(calculate_inner_product(new_parameters, v.get_fvals())) / expsum_z);
  82     }
  83     vector<vector<double>> prob;
  84     prob.push_back(prob_y);
  85     prob.push_back(prob_z);
  86     return prob;
  87 }
  88
  89 // Equation (6) in paper Cao et al. "Learning to rank: from pairwise approach to listwise approach."
  90 static vector<double>
  91 calculate_gradient(const vector<FeatureVector> &feature_vectors, const prob_distrib_vector &prob) {
  92     LOGCALL_STATIC_VOID(API, "calculate_gradient", feature_vectors | prob);
  93
  94     vector<double> gradient(feature_vectors[0].get_fcount(), 0);
  95
  96     // Hold ground truth probability distribution
  97     const vector<double> prob_y(prob[0]);
  98     // Hold prediction score probability distribution
  99     const vector<double> prob_z(prob[1]);
 100
 101     for (size_t i = 0; i < feature_vectors.size(); ++i) {
 102         vector<double> fvals = feature_vectors[i].get_fvals();
 103         for (size_t k = 0; k < fvals.size(); ++k) {
 104             double first_term = - prob_y[i] * fvals[k];
 105             gradient[k] += first_term;
 106
 107             double second_term = prob_z[i] * fvals[k];
 108             gradient[k] += second_term;
 109         }
 110     }
 111     return gradient;
 112 }
 113
 114 static void
 115 update_parameters(vector<double> &new_parameters, const vector<double> &gradient, double learning_rate) {
 116     LOGCALL_STATIC_VOID(API, "update_parameters", new_parameters | gradient | learning_rate);
 117     for (size_t i = 0; i < new_parameters.size(); ++i) {
 118         new_parameters[i] -= gradient[i] * learning_rate;
 119     }
 120 }
 121
 122 void
 123 ListNETRanker::train(const std::vector<Xapian::FeatureVector> & training_data) {
 124     LOGCALL_VOID(API, "ListNETRanker::train", training_data);
 125     size_t fvv_len = training_data.size();
 126     int feature_cnt = -1;
 127     if (fvv_len != 0) {
 128         feature_cnt = training_data[0].get_fcount();
 129     } else {
 130         throw LetorInternalError("Training data is empty. Check training file.");
 131     }
 132     // initialize the parameters for neural network
 133     vector<double> new_parameters(feature_cnt, 0.0);
 134
 135     // iterations
 136     for (int iter_num = 1; iter_num < iterations; ++iter_num) {
 137         // initialize Probability distributions of y and z
 138         prob_distrib_vector prob = init_probability(training_data, new_parameters);
 139         // compute gradient
 140         vector<double> gradient = calculate_gradient(training_data, prob);
 141         // update parameters: w = w - gradient * learningRate
 142         update_parameters(new_parameters, gradient, learning_rate);
 143     }
 144     swap(parameters, new_parameters);
 145 }
 146
 147 void
 148 ListNETRanker::save_model_to_metadata(const string & model_key) {
 149     LOGCALL_VOID(API, "ListNETRanker::save_model_to_metadata", model_key);
 150     Xapian::WritableDatabase letor_db(get_database_path());
 151     string key = model_key;
 152     if (key.empty()) {
 153         key = "ListNET.model.default";
 154     }
 155     string model_data;
 156     for (size_t i = 0; i < parameters.size(); ++i)
 157         model_data += serialise_double(parameters[i]);
 158     letor_db.set_metadata(key, model_data);
 159 }
 160
 161 void
 162 ListNETRanker::load_model_from_metadata(const string & model_key) {
 163     LOGCALL_VOID(API, "ListNETRanker::load_model_from_metadata", model_key);
 164     Xapian::Database letor_db(get_database_path());
 165     string key = model_key;
 166     if (key.empty()) {
 167         key = "ListNET.model.default";
 168     }
 169     string model_data = letor_db.get_metadata(key);
 170     // Throw exception if no model data associated with key
 171     if (model_data.empty()) {
 172         throw Xapian::LetorInternalError("No model found. Check key.");
 173     }
 174     vector<double> loaded_parameters;
 175     const char *ptr = model_data.data();
 176     const char *end = ptr + model_data.size();
 177     while (ptr != end) {
 178         loaded_parameters.push_back(unserialise_double(&ptr, end));
 179     }
 180     swap(parameters, loaded_parameters);
 181 }
 182
 183 std::vector<FeatureVector>
 184 ListNETRanker::rank_fvv(const std::vector<FeatureVector> & fvv) const {
 185     LOGCALL(API, std::vector<FeatureVector>, "ListNETRanker::rank_fvv", fvv);
 186     std::vector<FeatureVector> testfvv = fvv;
 187     for (size_t i = 0; i < testfvv.size(); ++i) {
 188         double listnet_score = 0;
 189         std::vector<double> fvals = testfvv[i].get_fvals();
 190         if (fvals.size() != parameters.size())
 191             throw LetorInternalError("Model incompatible. Make sure that you are using "
 192                                      "the same set of Features using which the model was created.");
 193         for (size_t j = 0; j < fvals.size(); ++j)
 194             listnet_score += fvals[j] * parameters[j];
 195         testfvv[i].set_score(listnet_score);
 196     }
 197     return testfvv;
 198 }