Correct typo: .clangformat -> .clang-format
[xapian.git] / xapian-letor / feature / tfdoclencolltfcolllenfeature.cc
blobeeadfad414e1190fa0acbd5d17370197bc4ef42d
1 /** @file tfdoclencolltfcolllenfeature.cc
2 * @brief TfDoclenCollTfCollLenFeature class
3 */
4 /* Copyright (C) 2012 Parth Gupta
5 * Copyright (C) 2016 Ayush Tomar
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <config.h>
24 #include "xapian-letor/feature.h"
26 #include "debuglog.h"
27 #include "stringutils.h"
29 using namespace std;
31 namespace Xapian {
33 string
34 TfDoclenCollTfCollLenFeature::name() const
36 return "TfDoclenCollTfCollLenFeature";
39 /** A helper function for feature->get_value()
41 * Checks if the term belongs to the title or is stemmed from the title.
43 inline bool
44 is_title_term(const std::string& term)
46 return startswith(term, 'S') || startswith(term, "ZS");
49 vector<double>
50 TfDoclenCollTfCollLenFeature::get_values() const
52 LOGCALL(API, vector<double>, "TfDoclenCollTfCollLenFeature::get_values", NO_ARGS);
54 vector<double> values;
55 double value = 0;
56 double coll_len;
57 double doc_len;
58 auto coll_len_iterator = collection_length.find("title");
59 if (coll_len_iterator != collection_length.end())
60 coll_len = (double)coll_len_iterator->second;
61 else
62 coll_len = 0;
63 auto doc_len_iterator = doc_length.find("title");
64 if (doc_len_iterator != doc_length.end())
65 doc_len = (double)doc_len_iterator->second;
66 else
67 doc_len = 0;
69 for (TermIterator qt = feature_query.get_unique_terms_begin();
70 qt != feature_query.get_terms_end(); ++qt) {
71 if (is_title_term((*qt))) {
72 double tf;
73 double coll_tf;
74 auto tf_iterator = termfreq.find(*qt);
75 auto coll_tf_iterator = collection_termfreq.find(*qt);
76 if (tf_iterator != termfreq.end())
77 tf = (double)tf_iterator->second;
78 else
79 tf = 0;
80 if (coll_tf_iterator != collection_termfreq.end())
81 coll_tf = (double)coll_tf_iterator->second;
82 else
83 coll_tf = 0;
84 value += log10(1 + ((tf * coll_len) / (1 + (doc_len * coll_tf))));
87 values.push_back(value);
88 value = 0;
89 coll_len_iterator = collection_length.find("body");
90 if (coll_len_iterator != collection_length.end())
91 coll_len = (double)coll_len_iterator->second;
92 else
93 coll_len = 0;
94 doc_len_iterator = doc_length.find("body");
95 if (doc_len_iterator != doc_length.end())
96 doc_len = (double)doc_len_iterator->second;
97 else
98 doc_len = 0;
100 for (Xapian::TermIterator qt = feature_query.get_unique_terms_begin();
101 qt != feature_query.get_terms_end(); ++qt) {
102 if (!is_title_term((*qt))) {
103 double tf;
104 double coll_tf;
105 auto tf_iterator = termfreq.find(*qt);
106 auto coll_tf_iterator = collection_termfreq.find(*qt);
107 if (tf_iterator != termfreq.end())
108 tf = (double)tf_iterator->second;
109 else
110 tf = 0;
111 if (coll_tf_iterator != collection_termfreq.end())
112 coll_tf = (double)coll_tf_iterator->second;
113 else
114 coll_tf = 0;
115 value += log10(1 + ((tf * coll_len) / (1 + (doc_len * coll_tf))));
118 values.push_back(value);
119 value = 0;
120 coll_len_iterator = collection_length.find("whole");
121 if (coll_len_iterator != collection_length.end())
122 coll_len = (double)coll_len_iterator->second;
123 else
124 coll_len = 0;
125 doc_len_iterator = doc_length.find("whole");
126 if (doc_len_iterator != doc_length.end())
127 doc_len = (double)doc_len_iterator->second;
128 else
129 doc_len = 0;
131 for (Xapian::TermIterator qt = feature_query.get_unique_terms_begin();
132 qt != feature_query.get_terms_end(); ++qt) {
133 double tf;
134 double coll_tf;
135 auto tf_iterator = termfreq.find(*qt);
136 auto coll_tf_iterator = collection_termfreq.find(*qt);
137 if (tf_iterator != termfreq.end())
138 tf = (double)tf_iterator->second;
139 else
140 tf = 0;
141 if (coll_tf_iterator != collection_termfreq.end())
142 coll_tf = (double)coll_tf_iterator->second;
143 else
144 coll_tf = 0;
145 value += log10(1 + ((tf * coll_len) / (1 + (doc_len * coll_tf))));
147 values.push_back(value);
149 return values;