Fetch the doclength upper bound at most once
[xapian.git] / xapian-core / cluster / cosine_sim.cc
blobfb3f6a954cf19f1b13766131b57174cab36fe2dd
1 /** @file cosine_sim.cc
2 * @brief Cosine similarity calculation between documents
3 */
4 /* Copyright (C) 2016 Richhiey Thomas
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "xapian/cluster.h"
26 #include "debuglog.h"
28 #include <cmath>
30 using namespace std;
31 using namespace Xapian;
33 string
34 CosineDistance::get_description() const
36 return "CosineDistance()";
39 double
40 CosineDistance::similarity(const PointType &a, const PointType &b) const
42 LOGCALL(API, double, "CosineDistance::similarity", a | b);
43 double denom_a = a.get_magnitude();
44 double denom_b = b.get_magnitude();
45 double inner_product = 0;
47 if (denom_a == 0 || denom_b == 0)
48 return 0.0;
50 for (TermIterator it = a.termlist_begin(); it != a.termlist_end(); ++it) {
51 const string &term = *it;
52 double a_weight = a.get_weight(term);
53 if (a_weight == 0)
54 continue;
55 double b_weight = b.get_weight(term);
56 if (b_weight == 0)
57 continue;
58 inner_product += a_weight * b_weight;
61 return 1 - (inner_product / (sqrt(denom_a * denom_b)));