2 * @brief test common features of API classes
4 /* Copyright (C) 2007,2009,2012,2014,2015,2016 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "api_letor.h"
30 #include <xapian-letor.h>
33 #include "testutils.h"
37 // To check for one document edge
39 db_index_one_document(Xapian::WritableDatabase
& db
, const string
&)
42 Xapian::TermGenerator termgenerator
;
43 termgenerator
.set_document(doc
);
44 termgenerator
.set_stemmer(Xapian::Stem("en"));
45 termgenerator
.index_text("Tigers are solitary animals", 1, "S");
46 termgenerator
.index_text("Might be that only one Tiger is good enough to "
47 "Take out a ranker, a Tiger is a good way to "
48 "check if a test is working or Tiger not. Tiger."
49 "What if the next line contains no Tigers? Would "
50 "it make a difference to your ranker ? Tigers "
51 "for the win.", 1, "XD");
52 termgenerator
.index_text("The will.");
53 termgenerator
.increase_termpos();
54 termgenerator
.index_text("Tigers would not be caught if one calls out the "
55 "Tiger from the den. This document is to check if "
56 "in the massive dataset, you forget the sense of "
57 "something you would not like to stop.");
62 db_index_two_documents(Xapian::WritableDatabase
& db
, const string
&)
65 Xapian::TermGenerator termgenerator
;
66 termgenerator
.set_document(doc
);
67 termgenerator
.set_stemmer(Xapian::Stem("en"));
68 termgenerator
.index_text("Lions, Tigers, Bears and Giraffes", 1, "S");
69 termgenerator
.index_text("This paragraph talks about lions and tigers and "
70 "bears (oh, my!). It mentions giraffes, "
71 "but that's not really very important. Lions "
72 "and tigers are big cats, so they must be really "
73 "cuddly. Bears are famous for being cuddly, at "
74 "least when they're teddy bears.", 1, "XD");
75 termgenerator
.index_text("Lions, Tigers, Bears and Giraffes");
76 termgenerator
.increase_termpos();
77 termgenerator
.index_text("This paragraph talks about lions and tigers and "
78 "bears (oh, my!). It mentions giraffes, "
79 "but that's not really very important. Lions "
80 "and tigers are big cats, so they must be really "
81 "cuddly. Bears are famous for being cuddly, at "
82 "least when they're teddy bears.");
85 termgenerator
.index_text("Lions, Tigers and Bears", 1, "S");
86 termgenerator
.index_text("This is the paragraph of interest. Tigers are "
87 "massive beasts - I wouldn't want to meet a "
88 "hungry one anywhere. Lions are scary even when "
89 "lyin' down. Bears are scary even when bare. "
90 "Together I suspect they'd be less scary, as the "
91 "tigers, lions, and bears would all keep each "
92 "other busy. On the other hand, bears don't live "
93 "in the same continent as far as I know.", 1,
95 termgenerator
.index_text("Lions, Tigers and Bears");
96 termgenerator
.increase_termpos();
97 termgenerator
.index_text("This is the paragraph of interest. Tigers are "
98 "massive beasts - I wouldn't want to meet a "
99 "hungry one anywhere. Lions are scary even when "
100 "lyin' down. Bears are scary even when bare. "
101 "Together I suspect they'd be less scary, as the "
102 "tigers, lions, and bears would all keep each "
103 "other busy. On the other hand, bears don't live "
104 "in the same continent as far as I know.");
105 db
.add_document(doc
);
108 // To check for three documents. out of which one is irrelevant
110 db_index_three_documents(Xapian::WritableDatabase
& db
, const string
&)
112 Xapian::Document doc
;
113 Xapian::TermGenerator termgenerator
;
114 termgenerator
.set_document(doc
);
115 termgenerator
.set_stemmer(Xapian::Stem("en"));
116 termgenerator
.index_text("The will", 1, "S");
117 termgenerator
.index_text("The will are considered stop words in xapian and "
118 "would be thrown off, so the query I want to say "
119 "is score, yes, score. The Score of a game is "
120 "the determining factor of a game, the score is "
121 "what matters at the end of the day. so my advise "
122 "to everyone is to Score it!.", 1, "XD");
123 termgenerator
.index_text("Score might be something else too, but this para "
124 "refers to score only at an abstract. Scores are "
125 "in general scoring. Score it!");
126 termgenerator
.increase_termpos();
127 termgenerator
.index_text("Score score score is important.");
128 db
.add_document(doc
);
130 termgenerator
.index_text("Score score score score score score", 1, "S");
131 termgenerator
.index_text("it might have an absurdly high rank in the qrel "
132 "file or might have no rank at all in another. "
133 "Look out for this as a testcase, might be edgy "
134 "good luck and may this be with you.", 1, "XD");
135 termgenerator
.index_text("Another irrelevant paragraph to make sure the tf "
136 "values are down, but this increases idf values "
137 "but let's see how this works out.");
138 termgenerator
.increase_termpos();
139 termgenerator
.index_text("Nothing to do with the query.");
140 db
.add_document(doc
);
142 termgenerator
.index_text("Document has nothing to do with score", 1, "S");
143 termgenerator
.index_text("This is just to check if score is given a higher "
144 "score if it is in the subject or not. Nothing "
145 "special, just juding scores by the look of it. "
146 "Some more scores but a bad qrel should be enough "
147 "to make sure it is ranked down.", 1, "XD");
148 termgenerator
.index_text("Score might be something else too, but this para "
149 "refers to score only at an abstract. Scores are "
150 "in general scoring. Score it!");
151 termgenerator
.increase_termpos();
152 termgenerator
.index_text("Score score score is important.");
153 db
.add_document(doc
);
156 DEFINE_TESTCASE(createfeaturevector
, generated
)
158 Xapian::FeatureList fl
;
159 Xapian::Database db
= get_database("apitest_ranker1",
160 db_index_two_documents
);
161 Xapian::Enquire
enquire(db
);
162 enquire
.set_query(Xapian::Query("lions"));
164 auto fv
= fl
.create_feature_vectors(mset
, Xapian::Query("lions"), db
);
165 mset
= enquire
.get_mset(0, 10);
167 TEST_EQUAL(mset
.size(), 2);
168 fv
= fl
.create_feature_vectors(mset
, Xapian::Query("lions"), db
);
169 TEST_EQUAL(fv
.size(), 2);
170 TEST_EQUAL(fv
[0].get_fcount(), 19);
171 TEST_EQUAL(fv
[1].get_fcount(), 19);
175 DEFINE_TESTCASE(createfeaturevectoronevector
, generated
)
177 Xapian::FeatureList fl
;
178 Xapian::Database db
= get_database("apitest_ranker2",
179 db_index_one_document
);
180 Xapian::Enquire
enquire(db
);
181 enquire
.set_query(Xapian::Query("tigers"));
183 auto fv
= fl
.create_feature_vectors(mset
, Xapian::Query("tigers"), db
);
184 mset
= enquire
.get_mset(0, 10);
186 fv
= fl
.create_feature_vectors(mset
, Xapian::Query("tigers"), db
);
187 TEST_EQUAL(fv
.size(), 1);
188 TEST_EQUAL(fv
[0].get_fcount(), 19);
192 DEFINE_TESTCASE(createfeaturevectoronevector_wrongquery
, generated
)
194 Xapian::FeatureList fl
;
195 Xapian::Database db
= get_database("apitest_ranker3",
196 db_index_one_document
);
197 Xapian::Enquire
enquire(db
);
198 enquire
.set_query(Xapian::Query("llamas"));
200 auto fv
= fl
.create_feature_vectors(mset
, Xapian::Query("llamas"), db
);
201 mset
= enquire
.get_mset(0, 10);
203 fv
= fl
.create_feature_vectors(mset
, Xapian::Query("llamas"), db
);
204 TEST_EQUAL(fv
.size(), 0);
208 DEFINE_TESTCASE(createfeaturevectorthree
, generated
)
210 Xapian::FeatureList fl
;
211 Xapian::Database db
= get_database("apitest_ranker4",
212 db_index_three_documents
);
213 Xapian::Enquire
enquire(db
);
214 enquire
.set_query(Xapian::Query("score"));
216 auto fv
= fl
.create_feature_vectors(mset
, Xapian::Query("score"), db
);
217 mset
= enquire
.get_mset(0, 10);
219 fv
= fl
.create_feature_vectors(mset
, Xapian::Query("score"), db
);
220 TEST_EQUAL(fv
.size(), 2);
221 TEST_EQUAL(fv
[0].get_fcount(), 19);
222 TEST_EQUAL(fv
[1].get_fcount(), 19);
226 DEFINE_TESTCASE(preparetrainingfileonedb
, generated
)
228 string db_path
= get_database_path("apitest_listnet_ranker1",
229 db_index_one_document
);
230 string data_directory
= test_driver::get_srcdir() + "/testdata/";
231 string query
= data_directory
+ "queryone.txt";
232 string qrel
= data_directory
+ "qrelone.txt";
233 string training_data
= data_directory
+ "training_data_one_document.txt";
234 Xapian::prepare_training_file(db_path
, query
, qrel
, 10,
235 "training_output_data_one_doc.txt");
236 ifstream
if1(training_data
);
237 ifstream
if2("training_output_data_one_doc.txt");
240 while (getline(if1
, line1
)) {
241 TEST(getline(if2
, line2
));
242 istringstream
iss1(line1
);
243 istringstream
iss2(line2
);
247 while ((iss1
>> temp1
) && (iss2
>> temp2
)) {
248 // The 0th, 1st and 21st literals taken as input, are strings,
249 // and can be compared directly, They are: For example(test):
250 // ("1", "qid:20001" and "#docid=1") at 0th, 1st, and 21st pos
251 // respectively. Whereas the other values are doubles which
252 // would have to tested under TEST_DOUBLE() against precision.
253 if (i
== 0 || i
== 1 || i
== 21) {
254 TEST_EQUAL(temp1
, temp2
);
256 size_t t1
= temp1
.find_first_of(':');
257 size_t t2
= temp2
.find_first_of(':');
258 TEST_EQUAL_DOUBLE(stod(temp1
.substr(t1
+ 1)),
259 stod(temp2
.substr(t2
+ 1)));
264 TEST(!(iss2
>> temp2
));
266 TEST(!getline(if2
, line2
));
270 // Check stability for an empty qrel file
271 DEFINE_TESTCASE(preparetrainingfileonedb_empty_qrel
, generated
)
273 string db_path
= get_database_path("ranker_empty",
274 db_index_one_document
);
275 string data_directory
= test_driver::get_srcdir() + "/testdata/";
276 string query
= data_directory
+ "queryone.txt";
277 string qrel
= data_directory
+ "empty_file.txt";
278 string training_data
= data_directory
+ "empty_file.txt";
279 Xapian::prepare_training_file(db_path
, query
, qrel
, 10,
280 "training_output_empty.txt");
281 ifstream
if1(training_data
);
282 ifstream
if2("training_output_empty.txt");
285 while (getline(if1
, line1
)) {
286 TEST(getline(if2
, line2
));
287 istringstream
iss1(line1
);
288 istringstream
iss2(line2
);
292 while ((iss1
>> temp1
) && (iss2
>> temp2
)) {
293 if (i
== 0 || i
== 1 || i
== 21) {
294 TEST_EQUAL(temp1
, temp2
);
296 size_t t1
= temp1
.find_first_of(':');
297 size_t t2
= temp2
.find_first_of(':');
298 TEST_EQUAL_DOUBLE(stod(temp1
.substr(t1
+ 1)),
299 stod(temp2
.substr(t2
+ 1)));
304 TEST(!(iss2
>> temp2
));
306 TEST(!getline(if2
, line2
));
310 DEFINE_TESTCASE(preparetrainingfile_two_docs
, generated
)
312 string db_path
= get_database_path("apitest_listnet_ranker2",
313 db_index_two_documents
);
314 string data_directory
= test_driver::get_srcdir() + "/testdata/";
315 string query
= data_directory
+ "query.txt";
316 string qrel
= data_directory
+ "qrel.txt";
317 string training_data
= data_directory
+ "training_data.txt";
318 Xapian::prepare_training_file(db_path
, query
, qrel
, 10,
319 "training_output1.txt");
320 ifstream
if1(training_data
);
321 ifstream
if2("training_output1.txt");
324 while (getline(if1
, line1
)) {
325 TEST(getline(if2
, line2
));
326 istringstream
iss1(line1
);
327 istringstream
iss2(line2
);
331 while ((iss1
>> temp1
) && (iss2
>> temp2
)) {
332 if (i
== 0 || i
== 1 || i
== 21) {
333 TEST_EQUAL(temp1
, temp2
);
335 size_t t1
= temp1
.find_first_of(':');
336 size_t t2
= temp2
.find_first_of(':');
337 TEST_EQUAL_DOUBLE(stod(temp1
.substr(t1
+ 1)),
338 stod(temp2
.substr(t2
+ 1)));
343 TEST(!(iss2
>> temp2
));
345 TEST(!getline(if2
, line2
));
349 DEFINE_TESTCASE(preparetrainingfilethree
, generated
)
351 string db_path
= get_database_path("apitest_listnet_ranker4",
352 db_index_three_documents
);
353 string data_directory
= test_driver::get_srcdir() + "/testdata/";
354 string query
= data_directory
+ "querythree.txt";
355 string qrel
= data_directory
+ "qrelthree_correct.txt";
356 string training_data
= data_directory
+ "training_data_three_correct.txt";
357 Xapian::prepare_training_file(db_path
, query
, qrel
, 10,
358 "training_output_three_correct.txt");
359 ifstream
if1(training_data
);
360 ifstream
if2("training_output_three_correct.txt");
363 while (getline(if1
, line1
)) {
364 TEST(getline(if2
, line2
));
365 istringstream
iss1(line1
);
366 istringstream
iss2(line2
);
370 while ((iss1
>> temp1
) && (iss2
>> temp2
)) {
371 if (i
== 0 || i
== 1 || i
== 21) {
372 TEST_EQUAL(temp1
, temp2
);
374 size_t t1
= temp1
.find_first_of(':');
375 size_t t2
= temp2
.find_first_of(':');
376 TEST_EQUAL_DOUBLE(stod(temp1
.substr(t1
+ 1)),
377 stod(temp2
.substr(t2
+ 1)));
382 TEST(!(iss2
>> temp2
));
384 TEST(!getline(if2
, line2
));
388 // ListNet_Ranker check
389 DEFINE_TESTCASE(listnet_ranker
, generated
)
391 Xapian::ListNETRanker ranker
;
392 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
393 string db_path
= get_database_path("apitest_listnet_ranker",
394 db_index_two_documents
);
395 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
396 enquire
.set_query(Xapian::Query("lions"));
397 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
398 string data_directory
= test_driver::get_srcdir() + "/testdata/";
399 string query
= data_directory
+ "query.txt";
400 string qrel
= data_directory
+ "qrel.txt";
401 string training_data
= data_directory
+ "training_data.txt";
402 ranker
.set_database_path(db_path
);
403 TEST_EQUAL(ranker
.get_database_path(), db_path
);
404 ranker
.set_query(Xapian::Query("lions"));
405 ranker
.train_model(training_data
);
406 Xapian::docid doc1
= *mymset
[0];
407 Xapian::docid doc2
= *mymset
[1];
409 TEST_EQUAL(doc1
, *mymset
[1]);
410 TEST_EQUAL(doc2
, *mymset
[0]);
411 mymset
= enquire
.get_mset(0, 10);
412 ranker
.train_model(training_data
, "ListNet_Ranker");
413 ranker
.rank(mymset
, "ListNet_Ranker");
414 TEST_EQUAL(doc1
, *mymset
[1]);
415 TEST_EQUAL(doc2
, *mymset
[0]);
416 TEST_EXCEPTION(Xapian::LetorInternalError
,
417 ranker
.score(query
, qrel
, "ListNet_Ranker",
418 "scorer_output.txt", 10, ""));
419 TEST_EXCEPTION(Xapian::FileNotFoundError
,
420 ranker
.score("", qrel
, "ListNet_Ranker",
421 "scorer_output.txt", 10));
422 TEST_EXCEPTION(Xapian::FileNotFoundError
,
423 ranker
.score(qrel
, "", "ListNet_Ranker",
424 "scorer_output.txt", 10));
425 ranker
.score(query
, qrel
, "ListNet_Ranker", "ndcg_output_ListNet_2.txt",
427 ranker
.score(query
, qrel
, "ListNet_Ranker", "err_output_ListNet_2.txt",
432 DEFINE_TESTCASE(listnet_ranker_one_file
, generated
)
434 Xapian::ListNETRanker ranker
;
435 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
436 string db_path
= get_database_path("apitest_listnet_ranker5",
437 db_index_one_document
);
438 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
439 enquire
.set_query(Xapian::Query("tigers"));
440 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
441 string data_directory
= test_driver::get_srcdir() + "/testdata/";
442 string query
= data_directory
+ "queryone.txt";
443 string qrel
= data_directory
+ "qrelone.txt";
444 string training_data
= data_directory
+ "training_data_one_document.txt";
445 ranker
.set_database_path(db_path
);
446 TEST_EQUAL(ranker
.get_database_path(), db_path
);
447 ranker
.set_query(Xapian::Query("tigers"));
448 ranker
.train_model(training_data
);
449 Xapian::docid doc1
= *mymset
[0];
451 TEST_EQUAL(doc1
, *mymset
[0]);
452 mymset
= enquire
.get_mset(0, 10);
453 ranker
.train_model(training_data
, "ListNet_Ranker");
454 ranker
.rank(mymset
, "ListNet_Ranker");
455 TEST_EQUAL(doc1
, *mymset
[0]);
456 TEST_EXCEPTION(Xapian::LetorInternalError
,
457 ranker
.score(query
, qrel
, "ListNet_Ranker",
458 "scorer_output.txt", 10, ""));
459 TEST_EXCEPTION(Xapian::FileNotFoundError
,
460 ranker
.score("", qrel
, "ListNet_Ranker",
461 "scorer_output.txt", 10));
462 TEST_EXCEPTION(Xapian::FileNotFoundError
,
463 ranker
.score(qrel
, "", "ListNet_Ranker",
464 "scorer_output.txt", 10));
465 ranker
.score(query
, qrel
, "ListNet_Ranker", "ndcg_output_ListNet_1.txt",
467 ranker
.score(query
, qrel
, "ListNet_Ranker", "err_output_ListNet_1.txt", 10,
472 DEFINE_TESTCASE(listnet_ranker_three_correct
, generated
)
474 Xapian::ListNETRanker ranker
;
475 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
476 string db_path
= get_database_path("apitest_listnet_ranker6",
477 db_index_three_documents
);
478 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
479 enquire
.set_query(Xapian::Query("score"));
480 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
481 string data_directory
= test_driver::get_srcdir() + "/testdata/";
482 string query
= data_directory
+ "querythree.txt";
483 string qrel
= data_directory
+ "qrelthree_correct.txt";
484 string training_data
= data_directory
+ "training_data_three_correct.txt";
485 ranker
.set_database_path(db_path
);
486 TEST_EQUAL(ranker
.get_database_path(), db_path
);
487 ranker
.set_query(Xapian::Query("score"));
488 ranker
.train_model(training_data
);
489 Xapian::docid doc1
= *mymset
[0];
490 Xapian::docid doc2
= *mymset
[1];
492 TEST_EQUAL(doc1
, *mymset
[1]);
493 TEST_EQUAL(doc2
, *mymset
[0]);
494 mymset
= enquire
.get_mset(0, 10);
495 ranker
.train_model(training_data
, "ListNet_Ranker");
496 ranker
.rank(mymset
, "ListNet_Ranker");
497 TEST_EQUAL(doc1
, *mymset
[1]);
498 TEST_EQUAL(doc2
, *mymset
[0]);
499 TEST_EXCEPTION(Xapian::LetorInternalError
,
500 ranker
.score(query
, qrel
, "ListNet_Ranker",
501 "scorer_output.txt", 10, ""));
502 TEST_EXCEPTION(Xapian::FileNotFoundError
,
503 ranker
.score("", qrel
, "ListNet_Ranker",
504 "scorer_output.txt", 10));
505 TEST_EXCEPTION(Xapian::FileNotFoundError
,
506 ranker
.score(qrel
, "", "ListNet_Ranker",
507 "scorer_output.txt", 10));
508 ranker
.score(query
, qrel
, "ListNet_Ranker", "ndcg_output_ListNet_3.txt=",
510 ranker
.score(query
, qrel
, "ListNet_Ranker", "ndcg_output_ListNet_3.txt", 10,
516 DEFINE_TESTCASE(svm_ranker
, generated
)
518 Xapian::SVMRanker ranker
;
519 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
520 string db_path
= get_database_path("apitest_svm_ranker",
521 db_index_two_documents
);
522 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
523 enquire
.set_query(Xapian::Query("lions"));
524 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
525 string data_directory
= test_driver::get_srcdir() + "/testdata/";
526 string query
= data_directory
+ "query.txt";
527 string qrel
= data_directory
+ "qrel.txt";
528 string training_data
= data_directory
+ "training_data.txt";
529 ranker
.set_database_path(db_path
);
530 TEST_EQUAL(ranker
.get_database_path(), db_path
);
531 ranker
.set_query(Xapian::Query("lions"));
532 ranker
.train_model(training_data
);
533 Xapian::docid doc1
= *mymset
[0];
534 Xapian::docid doc2
= *mymset
[1];
536 TEST_EQUAL(doc1
, *mymset
[1]);
537 TEST_EQUAL(doc2
, *mymset
[0]);
538 mymset
= enquire
.get_mset(0, 10);
539 ranker
.train_model(training_data
, "SVM_Ranker");
540 ranker
.rank(mymset
, "SVM_Ranker");
541 TEST_EQUAL(doc1
, *mymset
[1]);
542 TEST_EQUAL(doc2
, *mymset
[0]);
543 TEST_EXCEPTION(Xapian::LetorInternalError
,
544 ranker
.score(query
, qrel
, "SVM_Ranker",
545 "scorer_output.txt", 10, ""));
546 TEST_EXCEPTION(Xapian::FileNotFoundError
,
547 ranker
.score("", qrel
, "SVM_Ranker",
548 "scorer_output.txt", 10));
549 TEST_EXCEPTION(Xapian::FileNotFoundError
,
550 ranker
.score(qrel
, "", "SVM_Ranker",
551 "scorer_output.txt", 10));
552 ranker
.score(query
, qrel
, "SVM_Ranker", "ndcg_output_svm_2.txt", 10);
553 ranker
.score(query
, qrel
, "SVM_Ranker", "err_output_svm_2.txt", 10,
558 DEFINE_TESTCASE(svm_ranker_one_file
, generated
)
560 Xapian::SVMRanker ranker
;
561 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
562 string db_path
= get_database_path("apitest_svm_ranker1",
563 db_index_one_document
);
564 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
565 enquire
.set_query(Xapian::Query("tigers"));
566 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
567 string data_directory
= test_driver::get_srcdir() + "/testdata/";
568 string query
= data_directory
+ "queryone.txt";
569 string qrel
= data_directory
+ "qrelone.txt";
570 string training_data
= data_directory
+ "training_data_one_document.txt";
571 ranker
.set_database_path(db_path
);
572 TEST_EQUAL(ranker
.get_database_path(), db_path
);
573 ranker
.set_query(Xapian::Query("tigers"));
574 ranker
.train_model(training_data
);
575 Xapian::docid doc1
= *mymset
[0];
577 TEST_EQUAL(doc1
, *mymset
[0]);
578 mymset
= enquire
.get_mset(0, 10);
579 ranker
.train_model(training_data
, "SVM_Ranker");
580 ranker
.rank(mymset
, "SVM_Ranker");
581 TEST_EQUAL(doc1
, *mymset
[0]);
582 TEST_EXCEPTION(Xapian::LetorInternalError
,
583 ranker
.score(query
, qrel
, "SVM_Ranker",
584 "scorer_output.txt", 10, ""));
585 TEST_EXCEPTION(Xapian::FileNotFoundError
,
586 ranker
.score("", qrel
, "SVM_Ranker",
587 "scorer_output.txt", 10));
588 TEST_EXCEPTION(Xapian::FileNotFoundError
,
589 ranker
.score(qrel
, "", "SVM_Ranker",
590 "scorer_output.txt", 10));
591 ranker
.score(query
, qrel
, "SVM_Ranker", "ndcg_output_svm_1.txt", 10);
592 ranker
.score(query
, qrel
, "SVM_Ranker", "err_output_svm_1.txt", 10,
597 DEFINE_TESTCASE(svm_ranker_three_correct
, generated
)
599 Xapian::SVMRanker ranker
;
600 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
601 string db_path
= get_database_path("apitest_svm_ranker2",
602 db_index_three_documents
);
603 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
604 enquire
.set_query(Xapian::Query("score"));
605 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
606 string data_directory
= test_driver::get_srcdir() + "/testdata/";
607 string query
= data_directory
+ "querythree.txt";
608 string qrel
= data_directory
+ "qrelthree_correct.txt";
609 string training_data
= data_directory
+ "training_data_three_correct.txt";
610 ranker
.set_database_path(db_path
);
611 TEST_EQUAL(ranker
.get_database_path(), db_path
);
612 ranker
.set_query(Xapian::Query("score"));
613 ranker
.train_model(training_data
);
614 Xapian::docid doc1
= *mymset
[0];
615 Xapian::docid doc2
= *mymset
[1];
617 TEST_EQUAL(doc1
, *mymset
[1]);
618 TEST_EQUAL(doc2
, *mymset
[0]);
619 mymset
= enquire
.get_mset(0, 10);
620 ranker
.train_model(training_data
, "SVM_Ranker");
621 ranker
.rank(mymset
, "SVM_Ranker");
622 TEST_EQUAL(doc1
, *mymset
[1]);
623 TEST_EQUAL(doc2
, *mymset
[0]);
624 TEST_EXCEPTION(Xapian::LetorInternalError
,
625 ranker
.score(query
, qrel
, "SVM_Ranker",
626 "scorer_output.txt", 10, ""));
627 TEST_EXCEPTION(Xapian::FileNotFoundError
,
628 ranker
.score("", qrel
, "SVM_Ranker",
629 "scorer_output.txt", 10));
630 TEST_EXCEPTION(Xapian::FileNotFoundError
,
631 ranker
.score(qrel
, "", "SVM_Ranker",
632 "scorer_output.txt", 10));
633 ranker
.score(query
, qrel
, "SVM_Ranker", "ndcg_output_svm_3.txt", 10);
634 ranker
.score(query
, qrel
, "SVM_Ranker", "err_output_svm_3.txt", 10,
639 // ListMLE_Ranker check
640 DEFINE_TESTCASE(listmle_ranker
, generated
)
642 Xapian::ListMLERanker ranker
;
643 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
644 string db_path
= get_database_path("apitest_listmle_ranker",
645 db_index_two_documents
);
646 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
647 enquire
.set_query(Xapian::Query("lions"));
648 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
649 string data_directory
= test_driver::get_srcdir() + "/testdata/";
650 string query
= data_directory
+ "query.txt";
651 string qrel
= data_directory
+ "qrel.txt";
652 string training_data
= data_directory
+ "training_data.txt";
653 ranker
.set_database_path(db_path
);
654 TEST_EQUAL(ranker
.get_database_path(), db_path
);
655 ranker
.set_query(Xapian::Query("lions"));
656 ranker
.train_model(training_data
);
657 Xapian::docid doc1
= *mymset
[0];
658 Xapian::docid doc2
= *mymset
[1];
660 TEST_EQUAL(doc1
, *mymset
[1]);
661 TEST_EQUAL(doc2
, *mymset
[0]);
662 mymset
= enquire
.get_mset(0, 10);
663 ranker
.train_model(training_data
, "ListMLE_Ranker");
664 ranker
.rank(mymset
, "ListMLE_Ranker");
665 TEST_EQUAL(doc1
, *mymset
[1]);
666 TEST_EQUAL(doc2
, *mymset
[0]);
667 TEST_EXCEPTION(Xapian::LetorInternalError
,
668 ranker
.score(query
, qrel
, "ListMLE_Ranker",
669 "scorer_output.txt", 10, ""));
670 TEST_EXCEPTION(Xapian::FileNotFoundError
,
671 ranker
.score("", qrel
, "ListMLE_Ranker",
672 "scorer_output.txt", 10));
673 TEST_EXCEPTION(Xapian::FileNotFoundError
,
674 ranker
.score(qrel
, "", "ListMLE_Ranker",
675 "scorer_output.txt", 10));
676 ranker
.score(query
, qrel
, "ListMLE_Ranker", "ndcg_output_listmle_2.txt",
678 ranker
.score(query
, qrel
, "ListMLE_Ranker", "err_output_listmle_2.txt", 10,
683 DEFINE_TESTCASE(listmle_ranker_one_file
, generated
)
685 Xapian::ListMLERanker ranker
;
686 TEST_EXCEPTION(Xapian::FileNotFoundError
, ranker
.train_model(""));
687 string db_path
= get_database_path("apitest_listmle_ranker1",
688 db_index_one_document
);
689 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
690 enquire
.set_query(Xapian::Query("tigers"));
691 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
692 string data_directory
= test_driver::get_srcdir() + "/testdata/";
693 string query
= data_directory
+ "queryone.txt";
694 string qrel
= data_directory
+ "qrelone.txt";
695 string training_data
= data_directory
+ "training_data_one_document.txt";
696 ranker
.set_database_path(db_path
);
697 TEST_EQUAL(ranker
.get_database_path(), db_path
);
698 ranker
.set_query(Xapian::Query("tigers"));
699 ranker
.train_model(training_data
);
700 Xapian::docid doc1
= *mymset
[0];
702 TEST_EQUAL(doc1
, *mymset
[0]);
703 mymset
= enquire
.get_mset(0, 10);
704 ranker
.train_model(training_data
, "ListMLE_Ranker");
705 ranker
.rank(mymset
, "ListMLE_Ranker");
706 TEST_EQUAL(doc1
, *mymset
[0]);
707 TEST_EXCEPTION(Xapian::LetorInternalError
,
708 ranker
.score(query
, qrel
, "ListMLE_Ranker",
709 "scorer_output.txt", 10, ""));
710 TEST_EXCEPTION(Xapian::FileNotFoundError
,
711 ranker
.score("", qrel
, "ListMLE_Ranker",
712 "scorer_output.txt", 10));
713 TEST_EXCEPTION(Xapian::FileNotFoundError
,
714 ranker
.score(qrel
, "", "ListMLE_Ranker",
715 "scorer_output.txt", 10));
716 ranker
.score(query
, qrel
, "ListMLE_Ranker", "ndcg_output_listmle_1.txt",
718 ranker
.score(query
, qrel
, "ListMLE_Ranker", "err_output_listmle_1.txt", 10,
723 DEFINE_TESTCASE(listmle_ranker_three_correct
, generated
)
725 Xapian::ListMLERanker ranker
;
726 string db_path
= get_database_path("apitest_listmle_ranker2",
727 db_index_three_documents
);
728 Xapian::Enquire
enquire((Xapian::Database(db_path
)));
729 enquire
.set_query(Xapian::Query("score"));
730 Xapian::MSet mymset
= enquire
.get_mset(0, 10);
731 string data_directory
= test_driver::get_srcdir() + "/testdata/";
732 string query
= data_directory
+ "querythree.txt";
733 string qrel
= data_directory
+ "qrelthree_correct.txt";
734 string training_data
= data_directory
+ "training_data_three_correct.txt";
735 ranker
.set_database_path(db_path
);
736 TEST_EQUAL(ranker
.get_database_path(), db_path
);
737 ranker
.set_query(Xapian::Query("score"));
738 ranker
.train_model(training_data
);
739 Xapian::docid doc1
= *mymset
[0];
740 Xapian::docid doc2
= *mymset
[1];
742 TEST_EQUAL(mymset
.size(), 2);
743 TEST_EQUAL(doc1
, *mymset
[1]);
744 TEST_EQUAL(doc2
, *mymset
[0]);
745 mymset
= enquire
.get_mset(0, 10);
746 ranker
.train_model(training_data
, "ListMLE_Ranker");
747 ranker
.rank(mymset
, "ListMLE_Ranker");
748 TEST_EQUAL(doc1
, *mymset
[1]);
749 TEST_EQUAL(doc2
, *mymset
[0]);
750 TEST_EXCEPTION(Xapian::LetorInternalError
,
751 ranker
.score(query
, qrel
, "ListMLE_Ranker",
752 "scorer_output.txt", 10, ""));
753 TEST_EXCEPTION(Xapian::FileNotFoundError
,
754 ranker
.score("", qrel
, "ListMLE_Ranker",
755 "scorer_output.txt", 10));
756 TEST_EXCEPTION(Xapian::FileNotFoundError
,
757 ranker
.score(qrel
, "", "ListMLE_Ranker",
758 "scorer_output.txt", 10));
759 ranker
.score(query
, qrel
, "ListMLE_Ranker", "ndcg_output_listmle_3.txt",
761 ranker
.score(query
, qrel
, "ListMLE_Ranker", "err_output_listmle_3.txt", 10,
767 DEFINE_TESTCASE(featurename
, !backend
)
769 Xapian::TfDoclenCollTfCollLenFeature feature1
;
770 Xapian::TfDoclenFeature feature2
;
771 Xapian::IdfFeature feature3
;
772 Xapian::TfFeature feature4
;
773 Xapian::TfIdfDoclenFeature feature5
;
774 Xapian::CollTfCollLenFeature feature6
;
775 TEST_EQUAL(feature1
.name(), "TfDoclenCollTfCollLenFeature");
776 TEST_EQUAL(feature2
.name(), "TfDoclenFeature");
777 TEST_EQUAL(feature3
.name(), "IdfFeature");
778 TEST_EQUAL(feature4
.name(), "TfFeature");
779 TEST_EQUAL(feature5
.name(), "TfIdfDoclenFeature");
780 TEST_EQUAL(feature6
.name(), "CollTfCollLenFeature");
785 DEFINE_TESTCASE(err_scorer
, !backend
)
787 /* Derived from the example mentioned in the blogpost
788 * https://lingpipe-blog.com/2010/03/09/chapelle-metzler-zhang-grinspan-2009-expected-reciprocal-rank-for-graded-relevance/
790 vector
<Xapian::FeatureVector
> fvv
;
791 Xapian::FeatureVector temp1
;
792 Xapian::FeatureVector temp2
;
793 Xapian::FeatureVector temp3
;
795 fvv
.push_back(temp1
);
797 fvv
.push_back(temp2
);
799 fvv
.push_back(temp3
);
800 Xapian::ERRScore err
;
801 double err_score
= err
.score(fvv
);
803 TEST(abs(err_score
- 0.63) < 0.01);