Use << instead of + with FAIL_TEST and SKIP_TEST
[xapian.git] / xapian-core / tests / api_snippets.cc
blob714deb8b1c55d5892288dc74f831917d53d5b32c
1 /* api_snippets.cc: tests snippets
3 * Copyright 2012 Mihai Bivol
4 * Copyright 2015,2016 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #include <config.h>
24 #include "api_snippets.h"
26 #include <fstream>
27 #include <string>
29 #include <xapian.h>
31 #include "apitest.h"
32 #include "backendmanager_local.h"
33 #include "testsuite.h"
34 #include "testutils.h"
36 #include <iostream>
38 using namespace std;
40 struct snippet_testcase {
41 const char * input;
42 size_t len;
43 const char * expect;
46 /// Test snippets without stemming.
47 DEFINE_TESTCASE(snippet1, backend) {
48 Xapian::Enquire enquire(get_database("apitest_simpledata"));
49 enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
50 Xapian::Query("rubbish"),
51 Xapian::Query("example")));
52 Xapian::MSet mset = enquire.get_mset(0, 0);
54 static const snippet_testcase testcases[] = {
55 // Test highlighting in full sample.
56 { "Rubbish and junk", 20, "<b>Rubbish</b> and junk" },
57 { "Project R.U.B.B.I.S.H. greenlit", 31, "Project <b>R.U.B.B.I.S.H.</b> greenlit" },
58 { "What a load of rubbish", 100, "What a load of <b>rubbish</b>" },
59 { "Example rubbish", 100, "<b>Example</b> <b>rubbish</b>" },
60 { "An example of rubbish", 100, "An <b>example</b> of <b>rubbish</b>" },
61 { "Rubbish example of rubbish", 100, "<b>Rubbish</b> <b>example</b> of <b>rubbish</b>" },
63 // Test selection of snippet.
64 { "Rubbish and junk", 12, "<b>Rubbish</b> and..." },
65 { "Project R.U.B.B.I.S.H. greenlit", 14, "...<b>R.U.B.B.I.S.H.</b>..." },
66 { "What a load of rubbish", 12, "...of <b>rubbish</b>" },
67 { "What a load of rubbish", 8, "...<b>rubbish</b>" },
68 { "Rubbish example where the start is better than the rubbish ending", 18, "<b>Rubbish</b> <b>example</b>..." },
70 // Should prefer "interesting" words for context.
71 { "And of the rubbish document to this", 18, "...<b>rubbish</b> document..." },
72 { "And if they document rubbish to be this", 18, "...document <b>rubbish</b>..." },
75 for (auto i : testcases) {
76 TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len), i.expect);
79 return true;
82 /// Test snippets with stemming.
83 DEFINE_TESTCASE(snippetstem1, backend) {
84 Xapian::Enquire enquire(get_database("apitest_simpledata"));
85 enquire.set_query(Xapian::Query(Xapian::Query::OP_OR,
86 Xapian::Query("rubbish"),
87 Xapian::Query("Zexampl")));
88 Xapian::MSet mset = enquire.get_mset(0, 0);
90 static const snippet_testcase testcases[] = {
91 // "rubbish" isn't stemmed, example is.
92 { "You rubbished my ideas", 24, "You rubbished my ideas" },
93 { "Rubbished all my examples", 20, "...all my <b>examples</b>" },
94 { "Examples of text", 20, "<b>Examples</b> of text" },
97 Xapian::Stem stem("en");
98 for (auto i : testcases) {
99 TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
102 return true;
105 /// Test snippets with phrases.
106 DEFINE_TESTCASE(snippetphrase1, backend) {
107 Xapian::Enquire enquire(get_database("apitest_simpledata"));
108 Xapian::Query q(Xapian::Query::OP_PHRASE,
109 Xapian::Query("rubbish"),
110 Xapian::Query("example"));
111 // Regression test - a phrase with a follow sibling query would crash in
112 // the highlighting code.
113 enquire.set_query(q &~ Xapian::Query("banana"));
114 Xapian::MSet mset = enquire.get_mset(0, 0);
116 static const snippet_testcase testcases[] = {
117 { "An example of rubbish", 18, "...example of rubbish" },
118 { "This is a rubbish example", 20, "...is a <b>rubbish example</b>" },
119 { "Example of a rubbish example of rubbish", 45, "Example of a <b>rubbish example</b> of rubbish" },
120 { "Example of a rubbish example of rubbish", 18, "...<b>rubbish example</b> of..." },
121 { "rubbish rubbish example example", 45, "rubbish <b>rubbish example</b> example" },
122 { "rubbish example rubbish example", 45, "<b>rubbish example</b> <b>rubbish example</b>" },
125 Xapian::Stem stem("en");
126 for (auto i : testcases) {
127 TEST_STRINGS_EQUAL(mset.snippet(i.input, i.len, stem), i.expect);
130 return true;
133 /// Index file to a DB with TermGenerator.
134 static void
135 make_tg_db(Xapian::WritableDatabase &db, const string & source)
137 string file = test_driver::get_srcdir();
138 file += "/testdata/";
139 file += source;
140 file += ".txt";
141 ifstream input;
142 input.open(file.c_str());
143 if (!input.is_open()) {
144 FAIL_TEST("Couldn't open input: " << file);
147 Xapian::TermGenerator tg;
148 tg.set_stemmer(Xapian::Stem("en"));
149 while (!input.eof()) {
150 Xapian::Document doc;
151 tg.set_document(doc);
152 string line, data;
153 while (true) {
154 getline(input, line);
155 if (find_if(line.begin(), line.end(), C_isnotspace) == line.end())
156 break;
157 tg.index_text(line);
158 if (!data.empty()) data += ' ';
159 data += line;
161 doc.set_data(data);
162 db.add_document(doc);
166 /// Test snippets in various ways.
167 DEFINE_TESTCASE(snippetmisc1, generated) {
168 Xapian::Database db = get_database("snippet", make_tg_db, "snippet");
169 Xapian::Enquire enquire(db);
170 enquire.set_weighting_scheme(Xapian::BoolWeight());
171 Xapian::Stem stem("en");
173 static const char * words[] = { "do", "we", "have" };
174 Xapian::Query q(Xapian::Query::OP_PHRASE, words, words + 3);
175 enquire.set_query(q);
176 Xapian::MSet mset = enquire.get_mset(0, 6);
177 TEST_EQUAL(mset.size(), 3);
178 TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 40, stem),
179 "...much o'brien <b>do we have</b>? Miles O'Brien...");
180 TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 40, stem),
181 "...Unicode: How much o’brien <b>do we have</b>?");
182 TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 32, stem),
183 "We do have we <b>do we have</b> do we.");
185 enquire.set_query(Xapian::Query("Zwelcom") | Xapian::Query("Zmike"));
186 mset = enquire.get_mset(0, 6);
187 TEST_EQUAL(mset.size(), 3);
188 TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 25, stem),
189 "<b>Welcome</b> to <b>Mike's</b>...");
190 TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 5, stem),
191 "<b>Mike</b>...");
192 TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 10, stem),
193 "...<b>Mike</b> can...");
195 enquire.set_query(Xapian::Query(q.OP_WILDCARD, "m"));
196 mset = enquire.get_mset(0, 6);
197 TEST_EQUAL(mset.size(), 5);
198 TEST_STRINGS_EQUAL(mset.snippet(mset[0].get_document().get_data(), 18, stem),
199 "...<b>Mike's</b> <b>Mechanical</b>...");
200 TEST_STRINGS_EQUAL(mset.snippet(mset[1].get_document().get_data(), 80, stem),
201 "<b>Mike</b> <b>McDonald</b> is a <b>mechanic</b> who enjoys repairing things of a <b>mechanical</b> sort.");
202 TEST_STRINGS_EQUAL(mset.snippet(mset[2].get_document().get_data(), 102, stem),
203 "From autos to zip-lines, from tea-lights to x-rays, from sea ships to u-boats - <b>Mike</b> can fix them all.");
204 TEST_STRINGS_EQUAL(mset.snippet(mset[3].get_document().get_data(), 64, stem),
205 "How <b>much</b> o'brien do we have? <b>Miles</b> O'Brien, that's how <b>much</b>.");
206 // The requested length is in bytes, so the "fancy" apostrophe results in
207 // fewer Unicode characters in this sample than the previous one.
208 TEST_STRINGS_EQUAL(mset.snippet(mset[4].get_document().get_data(), 64, stem),
209 "...<b>much</b> o’brien do we have? <b>Miles</b> O’Brien, that’s how <b>much</b>.");
211 return true;