3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2007,2008,2009,2012,2015 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
32 #include "testsuite.h"
36 static const int JUNKSIZE
= 2 * 1048576;
38 static string language
;
40 static Xapian::Stem stemmer
;
46 // run stemmers on random text
50 static const char wordchars
[] =
51 "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789^\0";
53 tout
<< "Stemming random text... (seed " << seed
<< ")" << endl
;
58 for (int c
= JUNKSIZE
; c
; --c
) {
59 char ch
= wordchars
[(rand() >> 8) % sizeof wordchars
];
64 stemmed_size
+= stemmer(word
).length();
67 stemmed_size
+= stemmer(word
).length();
68 tout
<< "Input size " << JUNKSIZE
<< ", stemmed size " << stemmed_size
71 if (stemmed_size
> JUNKSIZE
* 101 / 100) {
72 FAIL_TEST("Stemmed data is significantly bigger than input: "
73 << stemmed_size
<< " vs. " << JUNKSIZE
);
75 if (stemmed_size
< JUNKSIZE
/ 2) {
76 FAIL_TEST("Stemmed data is significantly smaller than input: "
77 << stemmed_size
<< " vs. " << JUNKSIZE
);
82 // run stemmers on random junk
86 tout
<< "Stemming random junk... (seed " << seed
<< ")" << endl
;
91 for (int c
= JUNKSIZE
; c
; --c
) {
92 char ch
= rand() >> 8;
97 stemmed_size
+= stemmer(word
).length();
100 stemmed_size
+= stemmer(word
).length();
101 tout
<< "Input size " << JUNKSIZE
<< ", stemmed size " << stemmed_size
104 if (stemmed_size
> JUNKSIZE
* 101 / 100) {
105 FAIL_TEST("Stemmed data is significantly bigger than input ("
106 << stemmed_size
<< " vs. " << JUNKSIZE
);
108 if (stemmed_size
< JUNKSIZE
/ 2) {
109 FAIL_TEST("Stemmed data is significantly smaller than input ("
110 << stemmed_size
<< " vs. " << JUNKSIZE
);
118 string dir
= srcdir
+ "/../../xapian-data/stemming/";
120 ifstream
voc((dir
+ language
+ "/voc.txt").c_str());
121 if (!voc
.is_open()) {
122 SKIP_TEST(language
<< "/voc.txt not found");
125 ifstream
st((dir
+ language
+ "/output.txt").c_str());
128 FAIL_TEST(language
<< "/output.txt not found");
131 tout
<< "Testing " << language
<< " with Snowball dictionary..." << endl
;
135 string word
, stem
, expect
;
136 while (!voc
.eof() && !st
.eof()) {
140 stem
= stemmer(word
);
142 TEST_EQUAL(stem
, expect
);
147 if (pass
== 2) break;
149 voc
.open((dir
+ language
+ "/voc2.txt").c_str());
150 if (!voc
.is_open()) break;
152 st
.open((dir
+ language
+ "/output2.txt").c_str());
155 FAIL_TEST(language
<< "/output2.txt not found");
157 tout
<< "Testing " << language
<< " with supplemental dictionary..."
165 // ##################################################################
166 // # End of actual tests #
167 // ##################################################################
169 /// The lists of tests to perform
170 static const test_desc tests
[] = {
171 {"stemrandom", test_stemrandom
},
172 {"stemjunk", test_stemjunk
},
173 {"stemdict", test_stemdict
},
177 int main(int argc
, char **argv
)
179 string langs
= Xapian::Stem::get_available_languages();
180 test_driver::add_command_line_option("languages", 'l', &langs
);
184 test_driver::add_command_line_option("seed", 's', &seed_str
);
186 test_driver::parse_command_line(argc
, argv
);
187 srcdir
= test_driver::get_srcdir();
190 if (!seed_str
.empty()) seed
= atoi(seed_str
.c_str());
191 cout
<< "The random seed is " << seed
<< endl
;
192 cout
<< "Please report the seed when reporting a test failure." << endl
;
194 string::size_type b
= 0;
195 while (b
!= langs
.size()) {
196 string::size_type a
= b
;
197 while (b
< langs
.size() && langs
[b
] != ' ') ++b
;
198 language
.assign(langs
, a
, b
- a
);
199 while (b
< langs
.size() && langs
[b
] == ' ') ++b
;
200 cout
<< "Running tests with " << language
<< " stemmer..." << endl
;
201 stemmer
= Xapian::Stem(language
);
202 result
= max(result
, test_driver::run(tests
));
205 } catch (const char * e
) {