Make sure EOF is defined
[xapian.git] / xapian-letor / api / featurelist.cc
blob53c1018ad4ecedf1cff5c0f853fcfeec10135646
1 /** @file featurelist.cc
2 * @brief Definition of FeatureList class
3 */
4 /* Copyright (C) 2016 Ayush Tomar
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "xapian-letor/featurelist.h"
25 #include "xapian-letor/feature.h"
26 #include "xapian-letor/featurevector.h"
27 #include "featurelist_internal.h"
29 #include "debuglog.h"
31 using namespace std;
33 namespace Xapian {
35 FeatureList::FeatureList() : internal(new FeatureList::Internal())
37 LOGCALL_CTOR(API, "FeatureList", NO_ARGS);
38 internal->feature.push_back(new TfFeature());
39 internal->feature.push_back(new TfDoclenFeature());
40 internal->feature.push_back(new IdfFeature());
41 internal->feature.push_back(new CollTfCollLenFeature());
42 internal->feature.push_back(new TfIdfDoclenFeature());
43 internal->feature.push_back(new TfDoclenCollTfCollLenFeature());
46 FeatureList::FeatureList(const std::vector<Feature*> & f)
47 : internal(new FeatureList::Internal())
49 LOGCALL_CTOR(API, "FeatureList", f);
50 internal->feature = f;
53 FeatureList::~FeatureList()
55 LOGCALL_DTOR(API, "FeatureList");
56 for (Feature* it : internal->feature)
57 delete it;
58 internal->feature.clear();
61 void
62 FeatureList::normalise(std::vector<FeatureVector> & fvec) const
64 LOGCALL_VOID(API, "FeatureList::normalise", fvec);
65 // find the max value for each feature for all the FeatureVectors in the vector.
66 int num_features = fvec[0].get_fcount();
67 double temp = 0.0;
68 double max[num_features];
70 for (int i = 0; i < num_features; ++i)
71 max[i] = 0.0;
73 for (size_t i = 0; i < fvec.size(); ++i) {
74 for (int j = 0; j < num_features; ++j) {
75 double fval = fvec[i].get_fvals()[j];
76 if (max[j] < fval)
77 max[j] = fval;
80 /* We have the maximum value of each feature overall.
81 Now we need to normalize each feature value of a
82 FeatureVector by dividing it by the corresponding max of the feature value
84 for (size_t i = 0; i < fvec.size(); ++i) {
85 for (int j = 0; j < num_features; ++j) {
86 temp = fvec[i].get_feature_value(j);
87 temp /= max[j];
88 if (max[j] == 0) // Skip if dividing by zero
89 continue;
90 fvec[i].set_feature_value(j, temp);
95 std::vector<FeatureVector>
96 FeatureList::create_feature_vectors(const Xapian::MSet & mset,
97 const Xapian::Query & letor_query,
98 const Xapian::Database & letor_db) const
100 LOGCALL(API, std::vector<FeatureVector>, "FeatureList::create_feature_vectors", mset | letor_query | letor_db);
101 if (mset.empty())
102 return vector<FeatureVector>();
103 std::vector<FeatureVector> fvec;
105 for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
106 Xapian::Document doc = i.get_document();
107 std::vector<double> fvals;
108 internal->set_data(letor_query, letor_db, doc);
109 for (Feature* it : internal->feature) {
110 it->set_database(letor_db);
111 it->set_query(letor_query);
112 it->set_doc(doc);
113 // Computes and populates the Feature with required stats.
114 internal->populate_feature(it);
115 const vector<double>& values = it->get_values();
116 // Append feature values
117 fvals.insert(fvals.end(), values.begin(), values.end());
119 double wt = i.get_weight();
120 // Weight is added as a feature by default.
121 fvals.push_back(wt);
122 Xapian::docid did = doc.get_docid();
123 // construct a FeatureVector object using did and fvals.
124 Xapian::FeatureVector fv(did, fvals);
125 fvec.push_back(fv);
126 internal->clear_stats();
128 normalise(fvec);
129 return fvec;