Use << instead of + with FAIL_TEST and SKIP_TEST
[xapian.git] / xapian-core / tests / stemtest.cc
blob1a4c857a91e278cd16569792962dc7186201e3e4
1 /* stemtest.cc
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2007,2008,2009,2012,2015 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #include <config.h>
25 #include <cstdlib>
27 #include <string>
28 #include <fstream>
29 #include <iostream>
31 #include <xapian.h>
32 #include "testsuite.h"
34 using namespace std;
36 static const int JUNKSIZE = 2 * 1048576;
38 static string language;
40 static Xapian::Stem stemmer;
42 static string srcdir;
44 static int seed;
46 // run stemmers on random text
47 static bool
48 test_stemrandom()
50 static const char wordchars[] =
51 "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789^\0";
53 tout << "Stemming random text... (seed " << seed << ")" << endl;
54 srand(seed);
56 string word;
57 int stemmed_size = 0;
58 for (int c = JUNKSIZE; c; --c) {
59 char ch = wordchars[(rand() >> 8) % sizeof wordchars];
60 if (ch) {
61 word += ch;
62 continue;
64 stemmed_size += stemmer(word).length();
65 word.resize(0);
67 stemmed_size += stemmer(word).length();
68 tout << "Input size " << JUNKSIZE << ", stemmed size " << stemmed_size
69 << endl;
71 if (stemmed_size > JUNKSIZE * 101 / 100) {
72 FAIL_TEST("Stemmed data is significantly bigger than input: "
73 << stemmed_size << " vs. " << JUNKSIZE);
75 if (stemmed_size < JUNKSIZE / 2) {
76 FAIL_TEST("Stemmed data is significantly smaller than input: "
77 << stemmed_size << " vs. " << JUNKSIZE);
79 return true;
82 // run stemmers on random junk
83 static bool
84 test_stemjunk()
86 tout << "Stemming random junk... (seed " << seed << ")" << endl;
87 srand(seed);
89 string word;
90 int stemmed_size = 0;
91 for (int c = JUNKSIZE; c; --c) {
92 char ch = rand() >> 8;
93 if (ch) {
94 word += ch;
95 continue;
97 stemmed_size += stemmer(word).length();
98 word.resize(0);
100 stemmed_size += stemmer(word).length();
101 tout << "Input size " << JUNKSIZE << ", stemmed size " << stemmed_size
102 << endl;
104 if (stemmed_size > JUNKSIZE * 101 / 100) {
105 FAIL_TEST("Stemmed data is significantly bigger than input ("
106 << stemmed_size << " vs. " << JUNKSIZE);
108 if (stemmed_size < JUNKSIZE / 2) {
109 FAIL_TEST("Stemmed data is significantly smaller than input ("
110 << stemmed_size << " vs. " << JUNKSIZE);
112 return true;
115 static bool
116 test_stemdict()
118 string dir = srcdir + "/../../xapian-data/stemming/";
120 ifstream voc((dir + language + "/voc.txt").c_str());
121 if (!voc.is_open()) {
122 SKIP_TEST(language << "/voc.txt not found");
125 ifstream st((dir + language + "/output.txt").c_str());
126 if (!st.is_open()) {
127 voc.close();
128 FAIL_TEST(language << "/output.txt not found");
131 tout << "Testing " << language << " with Snowball dictionary..." << endl;
133 int pass = 1;
134 while (true) {
135 string word, stem, expect;
136 while (!voc.eof() && !st.eof()) {
137 getline(voc, word);
138 getline(st, expect);
140 stem = stemmer(word);
142 TEST_EQUAL(stem, expect);
144 voc.close();
145 st.close();
147 if (pass == 2) break;
149 voc.open((dir + language + "/voc2.txt").c_str());
150 if (!voc.is_open()) break;
152 st.open((dir + language + "/output2.txt").c_str());
153 if (!st.is_open()) {
154 voc.close();
155 FAIL_TEST(language << "/output2.txt not found");
157 tout << "Testing " << language << " with supplemental dictionary..."
158 << endl;
159 ++pass;
162 return true;
165 // ##################################################################
166 // # End of actual tests #
167 // ##################################################################
169 /// The lists of tests to perform
170 static const test_desc tests[] = {
171 {"stemrandom", test_stemrandom},
172 {"stemjunk", test_stemjunk},
173 {"stemdict", test_stemdict},
174 {0, 0}
177 int main(int argc, char **argv)
178 try {
179 string langs = Xapian::Stem::get_available_languages();
180 test_driver::add_command_line_option("languages", 'l', &langs);
182 seed = 42;
183 string seed_str;
184 test_driver::add_command_line_option("seed", 's', &seed_str);
186 test_driver::parse_command_line(argc, argv);
187 srcdir = test_driver::get_srcdir();
188 int result = 0;
190 if (!seed_str.empty()) seed = atoi(seed_str.c_str());
191 cout << "The random seed is " << seed << endl;
192 cout << "Please report the seed when reporting a test failure." << endl;
194 string::size_type b = 0;
195 while (b != langs.size()) {
196 string::size_type a = b;
197 while (b < langs.size() && langs[b] != ' ') ++b;
198 language.assign(langs, a, b - a);
199 while (b < langs.size() && langs[b] == ' ') ++b;
200 cout << "Running tests with " << language << " stemmer..." << endl;
201 stemmer = Xapian::Stem(language);
202 result = max(result, test_driver::run(tests));
204 return result;
205 } catch (const char * e) {
206 cout << e << endl;
207 return 1;