Add colon after prefix when term starts with a colon
[xapian.git] / xapian-core / tests / api_cluster.cc
blob7de6aaca9da66da0391245bf5d7e7a9ff435aff7
1 /** @file api_cluster.cc
2 * @brief Cluster API tests
3 */
4 /* Copyright (C) 2016 Richhiey Thomas
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "api_cluster.h"
26 #include <xapian.h>
28 #include "apitest.h"
29 #include "testsuite.h"
30 #include "testutils.h"
32 static void
33 make_stemmed_cluster_db(Xapian::WritableDatabase &db, const std::string &)
35 static const char* const test_strings[] = {
36 "This line is about a cluster. Cluster is important and is everywhere",
37 "We need to search for special cluster. Cluster cluster cluster",
38 "Computer cluster is a special example of a cluster. Used to search fast",
39 "Another example of cluster is a star cluster. Star cluster has a lot of stars"
42 Xapian::TermGenerator indexer;
43 Xapian::Stem stemmer("english");
44 indexer.set_stemmer(stemmer);
45 for (const std::string& document_data : test_strings) {
46 Xapian::Document document;
47 document.set_data(document_data);
48 indexer.set_document(document);
49 indexer.index_text(document_data);
50 db.add_document(document);
54 /** Round Robin clusterer:
55 * This clusterer is a minimal clusterer which will cluster documents as -
56 * ith document goes to the (i % k)th cluster where k is the number of clusters and
57 * 0 <= i < N; where N is the number of documents
59 class RoundRobin : public Xapian::Clusterer {
60 /// Number of clusters to be formed by the clusterer
61 unsigned int num_of_clusters;
63 public:
64 /** Constructor
66 * @param num_of_clusters_ Number of required clusters
68 explicit RoundRobin(unsigned int num_of_clusters_) : num_of_clusters(num_of_clusters_) {}
70 /** Implements the RoundRobin clustering
72 * @param mset MSet object containing the documents that are to
73 * be clustered
75 Xapian::ClusterSet cluster(const Xapian::MSet &mset);
77 std::string get_description() const {
78 return "RoundRobin()";
82 Xapian::ClusterSet
83 RoundRobin::cluster(const Xapian::MSet &mset)
85 Xapian::TermListGroup tlg(mset);
86 Xapian::ClusterSet cset;
87 std::vector<Xapian::Point> points;
89 for (Xapian::MSetIterator it = mset.begin(); it != mset.end(); ++it)
90 points.push_back(Xapian::Point(tlg, it.get_document()));
92 unsigned int i = 0;
93 while (i < num_of_clusters) {
94 Xapian::Cluster cluster_rr;
95 cset.add_cluster(cluster_rr);
96 i++;
99 unsigned int size = points.size();
100 for (i = 0; i < size; ++i)
101 cset.add_to_cluster(points[i], i % num_of_clusters);
103 return cset;
106 /** Test for cosine distance
107 * Cosine distance = 1 - (cosine of the angle between two vectors).
108 * Thus, if two vectors are equal, the distance between them will be zero
109 * and if two vectors are unequal, the distance will be 1 >= dist >= 0.
111 DEFINE_TESTCASE(cosine_distance1, generated)
113 Xapian::Database db = get_database("stemmed_cluster", make_stemmed_cluster_db);
114 Xapian::Enquire enquire(db);
115 enquire.set_query(Xapian::Query("cluster"));
117 Xapian::MSet matches = enquire.get_mset(0, 4);
118 Xapian::TermListGroup tlg(matches);
119 Xapian::Document doc1 = matches[0].get_document();
120 Xapian::Document doc2 = matches[1].get_document();
121 Xapian::Point x1(tlg, doc1);
122 Xapian::Point x2(tlg, doc2);
124 // Check whether same vector gives zero distance
125 Xapian::CosineDistance d;
126 double distance = d.similarity(x1, x1);
127 TEST_EQUAL(distance, 0);
129 // Check whether two different vectors gives a distance such that
130 // 0 < distance <= 1
131 distance = d.similarity(x1, x2);
132 TEST_REL(distance, >, 0);
133 TEST_REL(distance, <=, 1);
135 return true;
138 /** Round Robin Test
139 * Test that none of the returned clusters are empty
141 DEFINE_TESTCASE(round_robin1, generated)
143 Xapian::Database db = get_database("stemmed_cluster", make_stemmed_cluster_db);
144 Xapian::Enquire enq(db);
145 enq.set_query(Xapian::Query("cluster"));
146 Xapian::MSet matches = enq.get_mset(0, 4);
148 int num_clusters = 3;
149 RoundRobin rr(num_clusters);
150 Xapian::ClusterSet cset = rr.cluster(matches);
151 int size = cset.size();
152 for (int i = 0; i < size; ++i) {
153 Xapian::DocumentSet d = cset[i].get_documents();
154 TEST(d.size() != 0);
156 return true;
159 DEFINE_TESTCASE(stem_stopper1, backend)
161 Xapian::Stem stemmer("english");
162 // By default, stemming strategy used is STEM_SOME
163 Xapian::StemStopper stopper(stemmer);
164 std::string term = "the";
165 stopper.add(term);
166 TEST(stopper(term));
167 TEST(stopper('Z' + stemmer(term)));
168 term = "cluster";
169 TEST(!stopper(term));
170 TEST(!stopper('Z' + stemmer(term)));
172 Xapian::StemStopper stopper_all_z(stemmer, Xapian::StemStopper::STEM_ALL_Z);
173 Xapian::StemStopper stopper_all(stemmer, Xapian::StemStopper::STEM_ALL);
174 term = "because";
175 stopper_all.add(term);
176 stopper_all_z.add(term);
177 TEST(!stopper_all_z(term));
178 TEST(!stopper_all_z(stemmer(term)));
179 TEST(stopper_all_z('Z' + stemmer(term)));
180 TEST(!stopper_all(term));
181 TEST(!stopper_all('Z' + stemmer(term)));
182 TEST(stopper_all(stemmer(term)));
184 Xapian::StemStopper stopper_none(stemmer, Xapian::StemStopper::STEM_NONE);
185 term = "and";
186 stopper_none.add(term);
187 TEST(stopper_none(term));
188 TEST(!stopper_none('Z' + stemmer(term)));
190 return true;