1 /** @file cosine_sim.cc
2 * @brief Cosine similarity calculation between documents
4 /* Copyright (C) 2016 Richhiey Thomas
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24 #include "xapian/cluster.h"
31 using namespace Xapian
;
34 CosineDistance::get_description() const
36 return "CosineDistance()";
40 CosineDistance::similarity(const PointType
&a
, const PointType
&b
) const
42 LOGCALL(API
, double, "CosineDistance::similarity", a
| b
);
43 double denom_a
= a
.get_magnitude();
44 double denom_b
= b
.get_magnitude();
45 double inner_product
= 0;
47 if (denom_a
== 0 || denom_b
== 0)
50 for (TermIterator it
= a
.termlist_begin(); it
!= a
.termlist_end(); ++it
) {
51 const string
&term
= *it
;
52 double a_weight
= a
.get_weight(term
);
55 double b_weight
= b
.get_weight(term
);
58 inner_product
+= a_weight
* b_weight
;
61 return 1 - (inner_product
/ (sqrt(denom_a
* denom_b
)));