Fix compilation failure with GCC 4.8
[xapian.git] / xapian-letor / tests / api_letor.cc
blobcf913c5a11a9495c94e0630ddeffa14faa89fe46
1 /** @file api_letor.cc
2 * @brief test common features of API classes
3 */
4 /* Copyright (C) 2007,2009,2012,2014,2015,2016 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "api_letor.h"
25 #include <cstdlib>
26 #include <fstream>
27 #include <sstream>
29 #include <xapian.h>
30 #include <xapian-letor.h>
32 #include "apitest.h"
33 #include "testutils.h"
35 using namespace std;
37 // To check for one document edge
38 static void
39 db_index_one_document(Xapian::WritableDatabase& db, const string&)
41 Xapian::Document doc;
42 Xapian::TermGenerator termgenerator;
43 termgenerator.set_document(doc);
44 termgenerator.set_stemmer(Xapian::Stem("en"));
45 termgenerator.index_text("Tigers are solitary animals", 1, "S");
46 termgenerator.index_text("Might be that only one Tiger is good enough to "
47 "Take out a ranker, a Tiger is a good way to "
48 "check if a test is working or Tiger not. Tiger."
49 "What if the next line contains no Tigers? Would "
50 "it make a difference to your ranker ? Tigers "
51 "for the win.", 1, "XD");
52 termgenerator.index_text("The will.");
53 termgenerator.increase_termpos();
54 termgenerator.index_text("Tigers would not be caught if one calls out the "
55 "Tiger from the den. This document is to check if "
56 "in the massive dataset, you forget the sense of "
57 "something you would not like to stop.");
58 db.add_document(doc);
61 static void
62 db_index_two_documents(Xapian::WritableDatabase& db, const string&)
64 Xapian::Document doc;
65 Xapian::TermGenerator termgenerator;
66 termgenerator.set_document(doc);
67 termgenerator.set_stemmer(Xapian::Stem("en"));
68 termgenerator.index_text("Lions, Tigers, Bears and Giraffes", 1, "S");
69 termgenerator.index_text("This paragraph talks about lions and tigers and "
70 "bears (oh, my!). It mentions giraffes, "
71 "but that's not really very important. Lions "
72 "and tigers are big cats, so they must be really "
73 "cuddly. Bears are famous for being cuddly, at "
74 "least when they're teddy bears.", 1, "XD");
75 termgenerator.index_text("Lions, Tigers, Bears and Giraffes");
76 termgenerator.increase_termpos();
77 termgenerator.index_text("This paragraph talks about lions and tigers and "
78 "bears (oh, my!). It mentions giraffes, "
79 "but that's not really very important. Lions "
80 "and tigers are big cats, so they must be really "
81 "cuddly. Bears are famous for being cuddly, at "
82 "least when they're teddy bears.");
83 db.add_document(doc);
84 doc.clear_terms();
85 termgenerator.index_text("Lions, Tigers and Bears", 1, "S");
86 termgenerator.index_text("This is the paragraph of interest. Tigers are "
87 "massive beasts - I wouldn't want to meet a "
88 "hungry one anywhere. Lions are scary even when "
89 "lyin' down. Bears are scary even when bare. "
90 "Together I suspect they'd be less scary, as the "
91 "tigers, lions, and bears would all keep each "
92 "other busy. On the other hand, bears don't live "
93 "in the same continent as far as I know.", 1,
94 "XD");
95 termgenerator.index_text("Lions, Tigers and Bears");
96 termgenerator.increase_termpos();
97 termgenerator.index_text("This is the paragraph of interest. Tigers are "
98 "massive beasts - I wouldn't want to meet a "
99 "hungry one anywhere. Lions are scary even when "
100 "lyin' down. Bears are scary even when bare. "
101 "Together I suspect they'd be less scary, as the "
102 "tigers, lions, and bears would all keep each "
103 "other busy. On the other hand, bears don't live "
104 "in the same continent as far as I know.");
105 db.add_document(doc);
108 // To check for three documents. out of which one is irrelevant
109 static void
110 db_index_three_documents(Xapian::WritableDatabase& db, const string&)
112 Xapian::Document doc;
113 Xapian::TermGenerator termgenerator;
114 termgenerator.set_document(doc);
115 termgenerator.set_stemmer(Xapian::Stem("en"));
116 termgenerator.index_text("The will", 1, "S");
117 termgenerator.index_text("The will are considered stop words in xapian and "
118 "would be thrown off, so the query I want to say "
119 "is score, yes, score. The Score of a game is "
120 "the determining factor of a game, the score is "
121 "what matters at the end of the day. so my advise "
122 "to everyone is to Score it!.", 1, "XD");
123 termgenerator.index_text("Score might be something else too, but this para "
124 "refers to score only at an abstract. Scores are "
125 "in general scoring. Score it!");
126 termgenerator.increase_termpos();
127 termgenerator.index_text("Score score score is important.");
128 db.add_document(doc);
129 doc.clear_terms();
130 termgenerator.index_text("Score score score score score score", 1, "S");
131 termgenerator.index_text("it might have an absurdly high rank in the qrel "
132 "file or might have no rank at all in another. "
133 "Look out for this as a testcase, might be edgy "
134 "good luck and may this be with you.", 1, "XD");
135 termgenerator.index_text("Another irrelevant paragraph to make sure the tf "
136 "values are down, but this increases idf values "
137 "but let's see how this works out.");
138 termgenerator.increase_termpos();
139 termgenerator.index_text("Nothing to do with the query.");
140 db.add_document(doc);
141 doc.clear_terms();
142 termgenerator.index_text("Document has nothing to do with score", 1, "S");
143 termgenerator.index_text("This is just to check if score is given a higher "
144 "score if it is in the subject or not. Nothing "
145 "special, just juding scores by the look of it. "
146 "Some more scores but a bad qrel should be enough "
147 "to make sure it is ranked down.", 1, "XD");
148 termgenerator.index_text("Score might be something else too, but this para "
149 "refers to score only at an abstract. Scores are "
150 "in general scoring. Score it!");
151 termgenerator.increase_termpos();
152 termgenerator.index_text("Score score score is important.");
153 db.add_document(doc);
156 DEFINE_TESTCASE(createfeaturevector, generated)
158 Xapian::FeatureList fl;
159 Xapian::Database db = get_database("apitest_ranker1",
160 db_index_two_documents);
161 Xapian::Enquire enquire(db);
162 enquire.set_query(Xapian::Query("lions"));
163 Xapian::MSet mset;
164 auto fv = fl.create_feature_vectors(mset, Xapian::Query("lions"), db);
165 mset = enquire.get_mset(0, 10);
166 TEST(!mset.empty());
167 TEST_EQUAL(mset.size(), 2);
168 fv = fl.create_feature_vectors(mset, Xapian::Query("lions"), db);
169 TEST_EQUAL(fv.size(), 2);
170 TEST_EQUAL(fv[0].get_fcount(), 19);
171 TEST_EQUAL(fv[1].get_fcount(), 19);
172 return true;
175 DEFINE_TESTCASE(createfeaturevectoronevector, generated)
177 Xapian::FeatureList fl;
178 Xapian::Database db = get_database("apitest_ranker2",
179 db_index_one_document);
180 Xapian::Enquire enquire(db);
181 enquire.set_query(Xapian::Query("tigers"));
182 Xapian::MSet mset;
183 auto fv = fl.create_feature_vectors(mset, Xapian::Query("tigers"), db);
184 mset = enquire.get_mset(0, 10);
185 TEST(!mset.empty());
186 fv = fl.create_feature_vectors(mset, Xapian::Query("tigers"), db);
187 TEST_EQUAL(fv.size(), 1);
188 TEST_EQUAL(fv[0].get_fcount(), 19);
189 return true;
192 DEFINE_TESTCASE(createfeaturevectoronevector_wrongquery, generated)
194 Xapian::FeatureList fl;
195 Xapian::Database db = get_database("apitest_ranker3",
196 db_index_one_document);
197 Xapian::Enquire enquire(db);
198 enquire.set_query(Xapian::Query("llamas"));
199 Xapian::MSet mset;
200 auto fv = fl.create_feature_vectors(mset, Xapian::Query("llamas"), db);
201 mset = enquire.get_mset(0, 10);
202 TEST(mset.empty());
203 fv = fl.create_feature_vectors(mset, Xapian::Query("llamas"), db);
204 TEST_EQUAL(fv.size(), 0);
205 return true;
208 DEFINE_TESTCASE(createfeaturevectorthree, generated)
210 Xapian::FeatureList fl;
211 Xapian::Database db = get_database("apitest_ranker4",
212 db_index_three_documents);
213 Xapian::Enquire enquire(db);
214 enquire.set_query(Xapian::Query("score"));
215 Xapian::MSet mset;
216 auto fv = fl.create_feature_vectors(mset, Xapian::Query("score"), db);
217 mset = enquire.get_mset(0, 10);
218 TEST(!mset.empty());
219 fv = fl.create_feature_vectors(mset, Xapian::Query("score"), db);
220 TEST_EQUAL(fv.size(), 2);
221 TEST_EQUAL(fv[0].get_fcount(), 19);
222 TEST_EQUAL(fv[1].get_fcount(), 19);
223 return true;
226 DEFINE_TESTCASE(preparetrainingfileonedb, generated)
228 string db_path = get_database_path("apitest_listnet_ranker1",
229 db_index_one_document);
230 string data_directory = test_driver::get_srcdir() + "/testdata/";
231 string query = data_directory + "queryone.txt";
232 string qrel = data_directory + "qrelone.txt";
233 string training_data = data_directory + "training_data_one_document.txt";
234 Xapian::prepare_training_file(db_path, query, qrel, 10,
235 "training_output_data_one_doc.txt");
236 ifstream if1(training_data);
237 ifstream if2("training_output_data_one_doc.txt");
238 string line1;
239 string line2;
240 while (getline(if1, line1)) {
241 TEST(getline(if2, line2));
242 istringstream iss1(line1);
243 istringstream iss2(line2);
244 string temp1;
245 string temp2;
246 int i = 0;
247 while ((iss1 >> temp1) && (iss2 >> temp2)) {
248 // The 0th, 1st and 21st literals taken as input, are strings,
249 // and can be compared directly, They are: For example(test):
250 // ("1", "qid:20001" and "#docid=1") at 0th, 1st, and 21st pos
251 // respectively. Whereas the other values are doubles which
252 // would have to tested under TEST_DOUBLE() against precision.
253 if (i == 0 || i == 1 || i == 21) {
254 TEST_EQUAL(temp1, temp2);
255 } else {
256 size_t t1 = temp1.find_first_of(':');
257 size_t t2 = temp2.find_first_of(':');
258 TEST_EQUAL_DOUBLE(stod(temp1.substr(t1 + 1)),
259 stod(temp2.substr(t2 + 1)));
261 i++;
263 TEST_REL(i, ==, 22);
264 TEST(!(iss2 >> temp2));
266 TEST(!getline(if2, line2));
267 return true;
270 // Check stability for an empty qrel file
271 DEFINE_TESTCASE(preparetrainingfileonedb_empty_qrel, generated)
273 string db_path = get_database_path("ranker_empty",
274 db_index_one_document);
275 string data_directory = test_driver::get_srcdir() + "/testdata/";
276 string query = data_directory + "queryone.txt";
277 string qrel = data_directory + "empty_file.txt";
278 string training_data = data_directory + "empty_file.txt";
279 Xapian::prepare_training_file(db_path, query, qrel, 10,
280 "training_output_empty.txt");
281 ifstream if1(training_data);
282 ifstream if2("training_output_empty.txt");
283 string line1;
284 string line2;
285 while (getline(if1, line1)) {
286 TEST(getline(if2, line2));
287 istringstream iss1(line1);
288 istringstream iss2(line2);
289 string temp1;
290 string temp2;
291 int i = 0;
292 while ((iss1 >> temp1) && (iss2 >> temp2)) {
293 if (i == 0 || i == 1 || i == 21) {
294 TEST_EQUAL(temp1, temp2);
295 } else {
296 size_t t1 = temp1.find_first_of(':');
297 size_t t2 = temp2.find_first_of(':');
298 TEST_EQUAL_DOUBLE(stod(temp1.substr(t1 + 1)),
299 stod(temp2.substr(t2 + 1)));
301 i++;
303 TEST_REL(i, ==, 22);
304 TEST(!(iss2 >> temp2));
306 TEST(!getline(if2, line2));
307 return true;
310 DEFINE_TESTCASE(preparetrainingfile_two_docs, generated)
312 string db_path = get_database_path("apitest_listnet_ranker2",
313 db_index_two_documents);
314 string data_directory = test_driver::get_srcdir() + "/testdata/";
315 string query = data_directory + "query.txt";
316 string qrel = data_directory + "qrel.txt";
317 string training_data = data_directory + "training_data.txt";
318 Xapian::prepare_training_file(db_path, query, qrel, 10,
319 "training_output1.txt");
320 ifstream if1(training_data);
321 ifstream if2("training_output1.txt");
322 string line1;
323 string line2;
324 while (getline(if1, line1)) {
325 TEST(getline(if2, line2));
326 istringstream iss1(line1);
327 istringstream iss2(line2);
328 string temp1;
329 string temp2;
330 int i = 0;
331 while ((iss1 >> temp1) && (iss2 >> temp2)) {
332 if (i == 0 || i == 1 || i == 21) {
333 TEST_EQUAL(temp1, temp2);
334 } else {
335 size_t t1 = temp1.find_first_of(':');
336 size_t t2 = temp2.find_first_of(':');
337 TEST_EQUAL_DOUBLE(stod(temp1.substr(t1 + 1)),
338 stod(temp2.substr(t2 + 1)));
340 i++;
342 TEST_REL(i, ==, 22);
343 TEST(!(iss2 >> temp2));
345 TEST(!getline(if2, line2));
346 return true;
349 DEFINE_TESTCASE(preparetrainingfilethree, generated)
351 string db_path = get_database_path("apitest_listnet_ranker4",
352 db_index_three_documents);
353 string data_directory = test_driver::get_srcdir() + "/testdata/";
354 string query = data_directory + "querythree.txt";
355 string qrel = data_directory + "qrelthree_correct.txt";
356 string training_data = data_directory + "training_data_three_correct.txt";
357 Xapian::prepare_training_file(db_path, query, qrel, 10,
358 "training_output_three_correct.txt");
359 ifstream if1(training_data);
360 ifstream if2("training_output_three_correct.txt");
361 string line1;
362 string line2;
363 while (getline(if1, line1)) {
364 TEST(getline(if2, line2));
365 istringstream iss1(line1);
366 istringstream iss2(line2);
367 string temp1;
368 string temp2;
369 int i = 0;
370 while ((iss1 >> temp1) && (iss2 >> temp2)) {
371 if (i == 0 || i == 1 || i == 21) {
372 TEST_EQUAL(temp1, temp2);
373 } else {
374 size_t t1 = temp1.find_first_of(':');
375 size_t t2 = temp2.find_first_of(':');
376 TEST_EQUAL_DOUBLE(stod(temp1.substr(t1 + 1)),
377 stod(temp2.substr(t2 + 1)));
379 i++;
381 TEST_REL(i, ==, 22);
382 TEST(!(iss2 >> temp2));
384 TEST(!getline(if2, line2));
385 return true;
388 // ListNet_Ranker check
389 DEFINE_TESTCASE(listnet_ranker, generated)
391 Xapian::ListNETRanker ranker;
392 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
393 string db_path = get_database_path("apitest_listnet_ranker",
394 db_index_two_documents);
395 Xapian::Enquire enquire((Xapian::Database(db_path)));
396 enquire.set_query(Xapian::Query("lions"));
397 Xapian::MSet mymset = enquire.get_mset(0, 10);
398 string data_directory = test_driver::get_srcdir() + "/testdata/";
399 string query = data_directory + "query.txt";
400 string qrel = data_directory + "qrel.txt";
401 string training_data = data_directory + "training_data.txt";
402 ranker.set_database_path(db_path);
403 TEST_EQUAL(ranker.get_database_path(), db_path);
404 ranker.set_query(Xapian::Query("lions"));
405 ranker.train_model(training_data);
406 Xapian::docid doc1 = *mymset[0];
407 Xapian::docid doc2 = *mymset[1];
408 ranker.rank(mymset);
409 TEST_EQUAL(doc1, *mymset[1]);
410 TEST_EQUAL(doc2, *mymset[0]);
411 mymset = enquire.get_mset(0, 10);
412 ranker.train_model(training_data, "ListNet_Ranker");
413 ranker.rank(mymset, "ListNet_Ranker");
414 TEST_EQUAL(doc1, *mymset[1]);
415 TEST_EQUAL(doc2, *mymset[0]);
416 TEST_EXCEPTION(Xapian::LetorInternalError,
417 ranker.score(query, qrel, "ListNet_Ranker",
418 "scorer_output.txt", 10, ""));
419 TEST_EXCEPTION(Xapian::FileNotFoundError,
420 ranker.score("", qrel, "ListNet_Ranker",
421 "scorer_output.txt", 10));
422 TEST_EXCEPTION(Xapian::FileNotFoundError,
423 ranker.score(qrel, "", "ListNet_Ranker",
424 "scorer_output.txt", 10));
425 ranker.score(query, qrel, "ListNet_Ranker", "ndcg_output_ListNet_2.txt",
426 10);
427 ranker.score(query, qrel, "ListNet_Ranker", "err_output_ListNet_2.txt",
428 10, "ERRScore");
429 return true;
432 DEFINE_TESTCASE(listnet_ranker_one_file, generated)
434 Xapian::ListNETRanker ranker;
435 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
436 string db_path = get_database_path("apitest_listnet_ranker5",
437 db_index_one_document);
438 Xapian::Enquire enquire((Xapian::Database(db_path)));
439 enquire.set_query(Xapian::Query("tigers"));
440 Xapian::MSet mymset = enquire.get_mset(0, 10);
441 string data_directory = test_driver::get_srcdir() + "/testdata/";
442 string query = data_directory + "queryone.txt";
443 string qrel = data_directory + "qrelone.txt";
444 string training_data = data_directory + "training_data_one_document.txt";
445 ranker.set_database_path(db_path);
446 TEST_EQUAL(ranker.get_database_path(), db_path);
447 ranker.set_query(Xapian::Query("tigers"));
448 ranker.train_model(training_data);
449 Xapian::docid doc1 = *mymset[0];
450 ranker.rank(mymset);
451 TEST_EQUAL(doc1, *mymset[0]);
452 mymset = enquire.get_mset(0, 10);
453 ranker.train_model(training_data, "ListNet_Ranker");
454 ranker.rank(mymset, "ListNet_Ranker");
455 TEST_EQUAL(doc1, *mymset[0]);
456 TEST_EXCEPTION(Xapian::LetorInternalError,
457 ranker.score(query, qrel, "ListNet_Ranker",
458 "scorer_output.txt", 10, ""));
459 TEST_EXCEPTION(Xapian::FileNotFoundError,
460 ranker.score("", qrel, "ListNet_Ranker",
461 "scorer_output.txt", 10));
462 TEST_EXCEPTION(Xapian::FileNotFoundError,
463 ranker.score(qrel, "", "ListNet_Ranker",
464 "scorer_output.txt", 10));
465 ranker.score(query, qrel, "ListNet_Ranker", "ndcg_output_ListNet_1.txt",
466 10);
467 ranker.score(query, qrel, "ListNet_Ranker", "err_output_ListNet_1.txt", 10,
468 "ERRScore");
469 return true;
472 DEFINE_TESTCASE(listnet_ranker_three_correct, generated)
474 Xapian::ListNETRanker ranker;
475 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
476 string db_path = get_database_path("apitest_listnet_ranker6",
477 db_index_three_documents);
478 Xapian::Enquire enquire((Xapian::Database(db_path)));
479 enquire.set_query(Xapian::Query("score"));
480 Xapian::MSet mymset = enquire.get_mset(0, 10);
481 string data_directory = test_driver::get_srcdir() + "/testdata/";
482 string query = data_directory + "querythree.txt";
483 string qrel = data_directory + "qrelthree_correct.txt";
484 string training_data = data_directory + "training_data_three_correct.txt";
485 ranker.set_database_path(db_path);
486 TEST_EQUAL(ranker.get_database_path(), db_path);
487 ranker.set_query(Xapian::Query("score"));
488 ranker.train_model(training_data);
489 Xapian::docid doc1 = *mymset[0];
490 Xapian::docid doc2 = *mymset[1];
491 ranker.rank(mymset);
492 TEST_EQUAL(doc1, *mymset[1]);
493 TEST_EQUAL(doc2, *mymset[0]);
494 mymset = enquire.get_mset(0, 10);
495 ranker.train_model(training_data, "ListNet_Ranker");
496 ranker.rank(mymset, "ListNet_Ranker");
497 TEST_EQUAL(doc1, *mymset[1]);
498 TEST_EQUAL(doc2, *mymset[0]);
499 TEST_EXCEPTION(Xapian::LetorInternalError,
500 ranker.score(query, qrel, "ListNet_Ranker",
501 "scorer_output.txt", 10, ""));
502 TEST_EXCEPTION(Xapian::FileNotFoundError,
503 ranker.score("", qrel, "ListNet_Ranker",
504 "scorer_output.txt", 10));
505 TEST_EXCEPTION(Xapian::FileNotFoundError,
506 ranker.score(qrel, "", "ListNet_Ranker",
507 "scorer_output.txt", 10));
508 ranker.score(query, qrel, "ListNet_Ranker", "ndcg_output_ListNet_3.txt=",
509 10);
510 ranker.score(query, qrel, "ListNet_Ranker", "ndcg_output_ListNet_3.txt", 10,
511 "ERRScore");
512 return true;
515 /// SVM_ranker check
516 DEFINE_TESTCASE(svm_ranker, generated)
518 Xapian::SVMRanker ranker;
519 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
520 string db_path = get_database_path("apitest_svm_ranker",
521 db_index_two_documents);
522 Xapian::Enquire enquire((Xapian::Database(db_path)));
523 enquire.set_query(Xapian::Query("lions"));
524 Xapian::MSet mymset = enquire.get_mset(0, 10);
525 string data_directory = test_driver::get_srcdir() + "/testdata/";
526 string query = data_directory + "query.txt";
527 string qrel = data_directory + "qrel.txt";
528 string training_data = data_directory + "training_data.txt";
529 ranker.set_database_path(db_path);
530 TEST_EQUAL(ranker.get_database_path(), db_path);
531 ranker.set_query(Xapian::Query("lions"));
532 ranker.train_model(training_data);
533 Xapian::docid doc1 = *mymset[0];
534 Xapian::docid doc2 = *mymset[1];
535 ranker.rank(mymset);
536 TEST_EQUAL(doc1, *mymset[1]);
537 TEST_EQUAL(doc2, *mymset[0]);
538 mymset = enquire.get_mset(0, 10);
539 ranker.train_model(training_data, "SVM_Ranker");
540 ranker.rank(mymset, "SVM_Ranker");
541 TEST_EQUAL(doc1, *mymset[1]);
542 TEST_EQUAL(doc2, *mymset[0]);
543 TEST_EXCEPTION(Xapian::LetorInternalError,
544 ranker.score(query, qrel, "SVM_Ranker",
545 "scorer_output.txt", 10, ""));
546 TEST_EXCEPTION(Xapian::FileNotFoundError,
547 ranker.score("", qrel, "SVM_Ranker",
548 "scorer_output.txt", 10));
549 TEST_EXCEPTION(Xapian::FileNotFoundError,
550 ranker.score(qrel, "", "SVM_Ranker",
551 "scorer_output.txt", 10));
552 ranker.score(query, qrel, "SVM_Ranker", "ndcg_output_svm_2.txt", 10);
553 ranker.score(query, qrel, "SVM_Ranker", "err_output_svm_2.txt", 10,
554 "ERRScore");
555 return true;
558 DEFINE_TESTCASE(svm_ranker_one_file, generated)
560 Xapian::SVMRanker ranker;
561 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
562 string db_path = get_database_path("apitest_svm_ranker1",
563 db_index_one_document);
564 Xapian::Enquire enquire((Xapian::Database(db_path)));
565 enquire.set_query(Xapian::Query("tigers"));
566 Xapian::MSet mymset = enquire.get_mset(0, 10);
567 string data_directory = test_driver::get_srcdir() + "/testdata/";
568 string query = data_directory + "queryone.txt";
569 string qrel = data_directory + "qrelone.txt";
570 string training_data = data_directory + "training_data_one_document.txt";
571 ranker.set_database_path(db_path);
572 TEST_EQUAL(ranker.get_database_path(), db_path);
573 ranker.set_query(Xapian::Query("tigers"));
574 ranker.train_model(training_data);
575 Xapian::docid doc1 = *mymset[0];
576 ranker.rank(mymset);
577 TEST_EQUAL(doc1, *mymset[0]);
578 mymset = enquire.get_mset(0, 10);
579 ranker.train_model(training_data, "SVM_Ranker");
580 ranker.rank(mymset, "SVM_Ranker");
581 TEST_EQUAL(doc1, *mymset[0]);
582 TEST_EXCEPTION(Xapian::LetorInternalError,
583 ranker.score(query, qrel, "SVM_Ranker",
584 "scorer_output.txt", 10, ""));
585 TEST_EXCEPTION(Xapian::FileNotFoundError,
586 ranker.score("", qrel, "SVM_Ranker",
587 "scorer_output.txt", 10));
588 TEST_EXCEPTION(Xapian::FileNotFoundError,
589 ranker.score(qrel, "", "SVM_Ranker",
590 "scorer_output.txt", 10));
591 ranker.score(query, qrel, "SVM_Ranker", "ndcg_output_svm_1.txt", 10);
592 ranker.score(query, qrel, "SVM_Ranker", "err_output_svm_1.txt", 10,
593 "ERRScore");
594 return true;
597 DEFINE_TESTCASE(svm_ranker_three_correct, generated)
599 Xapian::SVMRanker ranker;
600 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
601 string db_path = get_database_path("apitest_svm_ranker2",
602 db_index_three_documents);
603 Xapian::Enquire enquire((Xapian::Database(db_path)));
604 enquire.set_query(Xapian::Query("score"));
605 Xapian::MSet mymset = enquire.get_mset(0, 10);
606 string data_directory = test_driver::get_srcdir() + "/testdata/";
607 string query = data_directory + "querythree.txt";
608 string qrel = data_directory + "qrelthree_correct.txt";
609 string training_data = data_directory + "training_data_three_correct.txt";
610 ranker.set_database_path(db_path);
611 TEST_EQUAL(ranker.get_database_path(), db_path);
612 ranker.set_query(Xapian::Query("score"));
613 ranker.train_model(training_data);
614 Xapian::docid doc1 = *mymset[0];
615 Xapian::docid doc2 = *mymset[1];
616 ranker.rank(mymset);
617 TEST_EQUAL(doc1, *mymset[1]);
618 TEST_EQUAL(doc2, *mymset[0]);
619 mymset = enquire.get_mset(0, 10);
620 ranker.train_model(training_data, "SVM_Ranker");
621 ranker.rank(mymset, "SVM_Ranker");
622 TEST_EQUAL(doc1, *mymset[1]);
623 TEST_EQUAL(doc2, *mymset[0]);
624 TEST_EXCEPTION(Xapian::LetorInternalError,
625 ranker.score(query, qrel, "SVM_Ranker",
626 "scorer_output.txt", 10, ""));
627 TEST_EXCEPTION(Xapian::FileNotFoundError,
628 ranker.score("", qrel, "SVM_Ranker",
629 "scorer_output.txt", 10));
630 TEST_EXCEPTION(Xapian::FileNotFoundError,
631 ranker.score(qrel, "", "SVM_Ranker",
632 "scorer_output.txt", 10));
633 ranker.score(query, qrel, "SVM_Ranker", "ndcg_output_svm_3.txt", 10);
634 ranker.score(query, qrel, "SVM_Ranker", "err_output_svm_3.txt", 10,
635 "ERRScore");
636 return true;
639 // ListMLE_Ranker check
640 DEFINE_TESTCASE(listmle_ranker, generated)
642 Xapian::ListMLERanker ranker;
643 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
644 string db_path = get_database_path("apitest_listmle_ranker",
645 db_index_two_documents);
646 Xapian::Enquire enquire((Xapian::Database(db_path)));
647 enquire.set_query(Xapian::Query("lions"));
648 Xapian::MSet mymset = enquire.get_mset(0, 10);
649 string data_directory = test_driver::get_srcdir() + "/testdata/";
650 string query = data_directory + "query.txt";
651 string qrel = data_directory + "qrel.txt";
652 string training_data = data_directory + "training_data.txt";
653 ranker.set_database_path(db_path);
654 TEST_EQUAL(ranker.get_database_path(), db_path);
655 ranker.set_query(Xapian::Query("lions"));
656 ranker.train_model(training_data);
657 Xapian::docid doc1 = *mymset[0];
658 Xapian::docid doc2 = *mymset[1];
659 ranker.rank(mymset);
660 TEST_EQUAL(doc1, *mymset[1]);
661 TEST_EQUAL(doc2, *mymset[0]);
662 mymset = enquire.get_mset(0, 10);
663 ranker.train_model(training_data, "ListMLE_Ranker");
664 ranker.rank(mymset, "ListMLE_Ranker");
665 TEST_EQUAL(doc1, *mymset[1]);
666 TEST_EQUAL(doc2, *mymset[0]);
667 TEST_EXCEPTION(Xapian::LetorInternalError,
668 ranker.score(query, qrel, "ListMLE_Ranker",
669 "scorer_output.txt", 10, ""));
670 TEST_EXCEPTION(Xapian::FileNotFoundError,
671 ranker.score("", qrel, "ListMLE_Ranker",
672 "scorer_output.txt", 10));
673 TEST_EXCEPTION(Xapian::FileNotFoundError,
674 ranker.score(qrel, "", "ListMLE_Ranker",
675 "scorer_output.txt", 10));
676 ranker.score(query, qrel, "ListMLE_Ranker", "ndcg_output_listmle_2.txt",
677 10);
678 ranker.score(query, qrel, "ListMLE_Ranker", "err_output_listmle_2.txt", 10,
679 "ERRScore");
680 return true;
683 DEFINE_TESTCASE(listmle_ranker_one_file, generated)
685 Xapian::ListMLERanker ranker;
686 TEST_EXCEPTION(Xapian::FileNotFoundError, ranker.train_model(""));
687 string db_path = get_database_path("apitest_listmle_ranker1",
688 db_index_one_document);
689 Xapian::Enquire enquire((Xapian::Database(db_path)));
690 enquire.set_query(Xapian::Query("tigers"));
691 Xapian::MSet mymset = enquire.get_mset(0, 10);
692 string data_directory = test_driver::get_srcdir() + "/testdata/";
693 string query = data_directory + "queryone.txt";
694 string qrel = data_directory + "qrelone.txt";
695 string training_data = data_directory + "training_data_one_document.txt";
696 ranker.set_database_path(db_path);
697 TEST_EQUAL(ranker.get_database_path(), db_path);
698 ranker.set_query(Xapian::Query("tigers"));
699 ranker.train_model(training_data);
700 Xapian::docid doc1 = *mymset[0];
701 ranker.rank(mymset);
702 TEST_EQUAL(doc1, *mymset[0]);
703 mymset = enquire.get_mset(0, 10);
704 ranker.train_model(training_data, "ListMLE_Ranker");
705 ranker.rank(mymset, "ListMLE_Ranker");
706 TEST_EQUAL(doc1, *mymset[0]);
707 TEST_EXCEPTION(Xapian::LetorInternalError,
708 ranker.score(query, qrel, "ListMLE_Ranker",
709 "scorer_output.txt", 10, ""));
710 TEST_EXCEPTION(Xapian::FileNotFoundError,
711 ranker.score("", qrel, "ListMLE_Ranker",
712 "scorer_output.txt", 10));
713 TEST_EXCEPTION(Xapian::FileNotFoundError,
714 ranker.score(qrel, "", "ListMLE_Ranker",
715 "scorer_output.txt", 10));
716 ranker.score(query, qrel, "ListMLE_Ranker", "ndcg_output_listmle_1.txt",
717 10);
718 ranker.score(query, qrel, "ListMLE_Ranker", "err_output_listmle_1.txt", 10,
719 "ERRScore");
720 return true;
723 DEFINE_TESTCASE(listmle_ranker_three_correct, generated)
725 Xapian::ListMLERanker ranker;
726 string db_path = get_database_path("apitest_listmle_ranker2",
727 db_index_three_documents);
728 Xapian::Enquire enquire((Xapian::Database(db_path)));
729 enquire.set_query(Xapian::Query("score"));
730 Xapian::MSet mymset = enquire.get_mset(0, 10);
731 string data_directory = test_driver::get_srcdir() + "/testdata/";
732 string query = data_directory + "querythree.txt";
733 string qrel = data_directory + "qrelthree_correct.txt";
734 string training_data = data_directory + "training_data_three_correct.txt";
735 ranker.set_database_path(db_path);
736 TEST_EQUAL(ranker.get_database_path(), db_path);
737 ranker.set_query(Xapian::Query("score"));
738 ranker.train_model(training_data);
739 Xapian::docid doc1 = *mymset[0];
740 Xapian::docid doc2 = *mymset[1];
741 ranker.rank(mymset);
742 TEST_EQUAL(mymset.size(), 2);
743 TEST_EQUAL(doc1, *mymset[1]);
744 TEST_EQUAL(doc2, *mymset[0]);
745 mymset = enquire.get_mset(0, 10);
746 ranker.train_model(training_data, "ListMLE_Ranker");
747 ranker.rank(mymset, "ListMLE_Ranker");
748 TEST_EQUAL(doc1, *mymset[1]);
749 TEST_EQUAL(doc2, *mymset[0]);
750 TEST_EXCEPTION(Xapian::LetorInternalError,
751 ranker.score(query, qrel, "ListMLE_Ranker",
752 "scorer_output.txt", 10, ""));
753 TEST_EXCEPTION(Xapian::FileNotFoundError,
754 ranker.score("", qrel, "ListMLE_Ranker",
755 "scorer_output.txt", 10));
756 TEST_EXCEPTION(Xapian::FileNotFoundError,
757 ranker.score(qrel, "", "ListMLE_Ranker",
758 "scorer_output.txt", 10));
759 ranker.score(query, qrel, "ListMLE_Ranker", "ndcg_output_listmle_3.txt",
760 10);
761 ranker.score(query, qrel, "ListMLE_Ranker", "err_output_listmle_3.txt", 10,
762 "ERRScore");
763 return true;
766 // Featurename check
767 DEFINE_TESTCASE(featurename, !backend)
769 Xapian::TfDoclenCollTfCollLenFeature feature1;
770 Xapian::TfDoclenFeature feature2;
771 Xapian::IdfFeature feature3;
772 Xapian::TfFeature feature4;
773 Xapian::TfIdfDoclenFeature feature5;
774 Xapian::CollTfCollLenFeature feature6;
775 TEST_EQUAL(feature1.name(), "TfDoclenCollTfCollLenFeature");
776 TEST_EQUAL(feature2.name(), "TfDoclenFeature");
777 TEST_EQUAL(feature3.name(), "IdfFeature");
778 TEST_EQUAL(feature4.name(), "TfFeature");
779 TEST_EQUAL(feature5.name(), "TfIdfDoclenFeature");
780 TEST_EQUAL(feature6.name(), "CollTfCollLenFeature");
782 return true;
785 DEFINE_TESTCASE(err_scorer, !backend)
787 /* Derived from the example mentioned in the blogpost
788 * https://lingpipe-blog.com/2010/03/09/chapelle-metzler-zhang-grinspan-2009-expected-reciprocal-rank-for-graded-relevance/
790 vector<Xapian::FeatureVector> fvv;
791 Xapian::FeatureVector temp1;
792 Xapian::FeatureVector temp2;
793 Xapian::FeatureVector temp3;
794 temp1.set_label(3);
795 fvv.push_back(temp1);
796 temp2.set_label(2);
797 fvv.push_back(temp2);
798 temp3.set_label(4);
799 fvv.push_back(temp3);
800 Xapian::ERRScore err;
801 double err_score = err.score(fvv);
803 TEST(abs(err_score - 0.63) < 0.01);
805 return true;