Bug 20244: Improve Elasticsearch ISBN indexing
[koha.git] / t / Koha / SearchEngine / Elasticsearch.t
blob933fdfb644d1c08a30557e9e8ab9e4b355828a30
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
18 use Modern::Perl;
20 use Test::More tests => 4;
21 use Test::Exception;
23 use t::lib::Mocks;
25 use Test::MockModule;
27 use MARC::Record;
28 use Try::Tiny;
30 use Koha::SearchEngine::Elasticsearch;
31 use Koha::SearchEngine::Elasticsearch::Search;
33 subtest '_read_configuration() tests' => sub {
35 plan tests => 10;
37 my $configuration;
38 t::lib::Mocks::mock_config( 'elasticsearch', undef );
40 # 'elasticsearch' missing in configuration
41 throws_ok {
42 $configuration = Koha::SearchEngine::Elasticsearch::_read_configuration;
44 'Koha::Exceptions::Config::MissingEntry',
45 'Configuration problem, exception thrown';
46 is(
47 $@->message,
48 "Missing 'elasticsearch' block in config file",
49 'Exception message is correct'
52 # 'elasticsearch' present but no 'server' entry
53 t::lib::Mocks::mock_config( 'elasticsearch', {} );
54 throws_ok {
55 $configuration = Koha::SearchEngine::Elasticsearch::_read_configuration;
57 'Koha::Exceptions::Config::MissingEntry',
58 'Configuration problem, exception thrown';
59 is(
60 $@->message,
61 "Missing 'server' entry in config file for elasticsearch",
62 'Exception message is correct'
65 # 'elasticsearch' and 'server' entries present, but no 'index_name'
66 t::lib::Mocks::mock_config( 'elasticsearch', { server => 'a_server' } );
67 throws_ok {
68 $configuration = Koha::SearchEngine::Elasticsearch::_read_configuration;
70 'Koha::Exceptions::Config::MissingEntry',
71 'Configuration problem, exception thrown';
72 is(
73 $@->message,
74 "Missing 'index_name' entry in config file for elasticsearch",
75 'Exception message is correct'
78 # Correct configuration, only one server
79 t::lib::Mocks::mock_config( 'elasticsearch', { server => 'a_server', index_name => 'index' } );
81 $configuration = Koha::SearchEngine::Elasticsearch::_read_configuration;
82 is( $configuration->{index_name}, 'index', 'Index configuration parsed correctly' );
83 is_deeply( $configuration->{nodes}, ['a_server'], 'Server configuration parsed correctly' );
85 # Correct configuration, two servers
86 my @servers = ('a_server', 'another_server');
87 t::lib::Mocks::mock_config( 'elasticsearch', { server => \@servers, index_name => 'index' } );
89 $configuration = Koha::SearchEngine::Elasticsearch::_read_configuration;
90 is( $configuration->{index_name}, 'index', 'Index configuration parsed correctly' );
91 is_deeply( $configuration->{nodes}, \@servers , 'Server configuration parsed correctly' );
94 subtest 'get_elasticsearch_settings() tests' => sub {
96 plan tests => 1;
98 my $settings;
100 # test reading index settings
101 my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
102 $settings = $es->get_elasticsearch_settings();
103 is( $settings->{index}{analysis}{analyzer}{analyser_phrase}{tokenizer}, 'keyword', 'Index settings parsed correctly' );
106 subtest 'get_elasticsearch_mappings() tests' => sub {
108 plan tests => 1;
110 my $mappings;
112 # test reading mappings
113 my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
114 $mappings = $es->get_elasticsearch_mappings();
115 is( $mappings->{data}{_all}{type}, 'string', 'Field mappings parsed correctly' );
118 subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub {
120 plan tests => 32;
122 t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
124 my @mappings = (
126 name => 'control_number',
127 type => 'string',
128 facet => 0,
129 suggestible => 0,
130 sort => undef,
131 marc_type => 'marc21',
132 marc_field => '001',
135 name => 'isbn',
136 type => 'isbn',
137 facet => 0,
138 suggestible => 0,
139 sort => 0,
140 marc_type => 'marc21',
141 marc_field => '020a',
144 name => 'author',
145 type => 'string',
146 facet => 1,
147 suggestible => 1,
148 sort => undef,
149 marc_type => 'marc21',
150 marc_field => '100a',
153 name => 'author',
154 type => 'string',
155 facet => 1,
156 suggestible => 1,
157 sort => 1,
158 marc_type => 'marc21',
159 marc_field => '110a',
162 name => 'title',
163 type => 'string',
164 facet => 0,
165 suggestible => 1,
166 sort => 1,
167 marc_type => 'marc21',
168 marc_field => '245(ab)ab',
171 name => 'unimarc_title',
172 type => 'string',
173 facet => 0,
174 suggestible => 1,
175 sort => 1,
176 marc_type => 'unimarc',
177 marc_field => '245a',
180 name => 'title',
181 type => 'string',
182 facet => 0,
183 suggestible => undef,
184 sort => 0,
185 marc_type => 'marc21',
186 marc_field => '220',
189 name => 'title_wildcard',
190 type => 'string',
191 facet => 0,
192 suggestible => 0,
193 sort => undef,
194 marc_type => 'marc21',
195 marc_field => '245',
198 name => 'sum_item_price',
199 type => 'sum',
200 facet => 0,
201 suggestible => 0,
202 sort => 0,
203 marc_type => 'marc21',
204 marc_field => '952g',
207 name => 'items_withdrawn_status',
208 type => 'boolean',
209 facet => 0,
210 suggestible => 0,
211 sort => 0,
212 marc_type => 'marc21',
213 marc_field => '9520',
216 name => 'type_of_record',
217 type => 'string',
218 facet => 0,
219 suggestible => 0,
220 sort => 0,
221 marc_type => 'marc21',
222 marc_field => 'leader_/6',
225 name => 'type_of_record_and_bib_level',
226 type => 'string',
227 facet => 0,
228 suggestible => 0,
229 sort => 0,
230 marc_type => 'marc21',
231 marc_field => 'leader_/6-7',
235 my $se = Test::MockModule->new('Koha::SearchEngine::Elasticsearch');
236 $se->mock('_foreach_mapping', sub {
237 my ($self, $sub) = @_;
239 foreach my $map (@mappings) {
240 $sub->(
241 $map->{name},
242 $map->{type},
243 $map->{facet},
244 $map->{suggestible},
245 $map->{sort},
246 $map->{marc_type},
247 $map->{marc_field}
252 my $see = Koha::SearchEngine::Elasticsearch::Search->new({ index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX });
254 my $marc_record_1 = MARC::Record->new();
255 $marc_record_1->leader(' cam 22 a 4500');
256 $marc_record_1->append_fields(
257 MARC::Field->new('001', '123'),
258 MARC::Field->new('020', '', '', a => '1-56619-909-3'),
259 MARC::Field->new('100', '', '', a => 'Author 1'),
260 MARC::Field->new('110', '', '', a => 'Corp Author'),
261 MARC::Field->new('210', '', '', a => 'Title 1'),
262 MARC::Field->new('245', '', '', a => 'Title:', b => 'first record'),
263 MARC::Field->new('999', '', '', c => '1234567'),
264 # ' ' for testing trimming of white space in boolean value callback:
265 MARC::Field->new('952', '', '', 0 => ' ', g => '123.30'),
266 MARC::Field->new('952', '', '', 0 => 0, g => '127.20'),
268 my $marc_record_2 = MARC::Record->new();
269 $marc_record_2->leader(' cam 22 a 4500');
270 $marc_record_2->append_fields(
271 MARC::Field->new('100', '', '', a => 'Author 2'),
272 # MARC::Field->new('210', '', '', a => 'Title 2'),
273 # MARC::Field->new('245', '', '', a => 'Title: second record'),
274 MARC::Field->new('999', '', '', c => '1234568'),
275 MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric'),
277 my $records = [$marc_record_1, $marc_record_2];
279 $see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first
281 my $docs = $see->marc_records_to_documents($records);
283 # First record:
284 is(scalar @{$docs}, 2, 'Two records converted to documents');
286 is($docs->[0][0], '1234567', 'First document biblionumber should be set as first element in document touple');
288 is_deeply($docs->[0][1]->{control_number}, ['123'], 'First record control number should be set correctly');
290 is(scalar @{$docs->[0][1]->{author}}, 2, 'First document author field should contain two values');
291 is_deeply($docs->[0][1]->{author}, ['Author 1', 'Corp Author'], 'First document author field should be set correctly');
293 is(scalar @{$docs->[0][1]->{author__sort}}, 2, 'First document author__sort field should have two values');
294 is_deeply($docs->[0][1]->{author__sort}, ['Author 1', 'Corp Author'], 'First document author__sort field should be set correctly');
296 is(scalar @{$docs->[0][1]->{title__sort}}, 3, 'First document title__sort field should have three values');
297 is_deeply($docs->[0][1]->{title__sort}, ['Title:', 'first record', 'Title: first record'], 'First document title__sort field should be set correctly');
299 is(scalar @{$docs->[0][1]->{title_wildcard}}, 2, 'First document title_wildcard field should have two values');
300 is_deeply($docs->[0][1]->{title_wildcard}, ['Title:', 'first record'], 'First document title_wildcard field should be set correctly');
302 is(scalar @{$docs->[0][1]->{author__suggestion}}, 2, 'First document author__suggestion field should contain two values');
303 is_deeply(
304 $docs->[0][1]->{author__suggestion},
307 'input' => 'Author 1'
310 'input' => 'Corp Author'
313 'First document author__suggestion field should be set correctly'
316 is(scalar @{$docs->[0][1]->{title__suggestion}}, 3, 'First document title__suggestion field should contain three values');
317 is_deeply(
318 $docs->[0][1]->{title__suggestion},
320 { 'input' => 'Title:' },
321 { 'input' => 'first record' },
322 { 'input' => 'Title: first record' }
324 'First document title__suggestion field should be set correctly'
327 ok(!(defined $docs->[0][1]->{title__facet}), 'First document should have no title__facet field');
329 is(scalar @{$docs->[0][1]->{author__facet}}, 2, 'First document author__facet field should have two values');
330 is_deeply(
331 $docs->[0][1]->{author__facet},
332 ['Author 1', 'Corp Author'],
333 'First document author__facet field should be set correctly'
336 is(scalar @{$docs->[0][1]->{items_withdrawn_status}}, 2, 'First document items_withdrawn_status field should have two values');
337 is_deeply(
338 $docs->[0][1]->{items_withdrawn_status},
339 ['false', 'false'],
340 'First document items_withdrawn_status field should be set correctly'
344 $docs->[0][1]->{sum_item_price},
345 '250.5',
346 'First document sum_item_price field should be set correctly'
349 ok(defined $docs->[0][1]->{marc_data}, 'First document marc_data field should be set');
350 ok(defined $docs->[0][1]->{marc_format}, 'First document marc_format field should be set');
351 is($docs->[0][1]->{marc_format}, 'base64ISO2709', 'First document marc_format should be set correctly');
353 my $decoded_marc_record = $see->decode_record_from_result($docs->[0][1]);
355 ok($decoded_marc_record->isa('MARC::Record'), "base64ISO2709 record successfully decoded from result");
356 is($decoded_marc_record->as_usmarc(), $marc_record_1->as_usmarc(), "Decoded base64ISO2709 record has same data as original record");
358 is(scalar @{$docs->[0][1]->{type_of_record}}, 1, 'First document type_of_record field should have one value');
359 is_deeply(
360 $docs->[0][1]->{type_of_record},
361 ['a'],
362 'First document type_of_record field should be set correctly'
365 is(scalar @{$docs->[0][1]->{type_of_record_and_bib_level}}, 1, 'First document type_of_record_and_bib_level field should have one value');
366 is_deeply(
367 $docs->[0][1]->{type_of_record_and_bib_level},
368 ['am'],
369 'First document type_of_record_and_bib_level field should be set correctly'
372 is(scalar @{$docs->[0][1]->{isbn}}, 4, 'First document isbn field should contain four values');
373 is_deeply($docs->[0][1]->{isbn}, ['978-1-56619-909-4', '9781566199094', '1-56619-909-3', '1566199093'], 'First document isbn field should be set correctly');
375 # Second record:
377 is(scalar @{$docs->[1][1]->{author}}, 1, 'Second document author field should contain one value');
378 is_deeply($docs->[1][1]->{author}, ['Author 2'], 'Second document author field should be set correctly');
380 is(scalar @{$docs->[1][1]->{items_withdrawn_status}}, 1, 'Second document items_withdrawn_status field should have one value');
381 is_deeply(
382 $docs->[1][1]->{items_withdrawn_status},
383 ['true'],
384 'Second document items_withdrawn_status field should be set correctly'
388 $docs->[1][1]->{sum_item_price},
390 'Second document sum_item_price field should be set correctly'
393 # Mappings marc_type:
395 ok(!(defined $docs->[0][1]->{unimarc_title}), "No mapping when marc_type doesn't match marc flavour");
397 # Marc serialization format fallback for records exceeding ISO2709 max record size
399 my $large_marc_record = MARC::Record->new();
400 $large_marc_record->leader(' cam 22 a 4500');
402 $large_marc_record->append_fields(
403 MARC::Field->new('100', '', '', a => 'Author 1'),
404 MARC::Field->new('110', '', '', a => 'Corp Author'),
405 MARC::Field->new('210', '', '', a => 'Title 1'),
406 MARC::Field->new('245', '', '', a => 'Title:', b => 'large record'),
407 MARC::Field->new('999', '', '', c => '1234567'),
410 my $item_field = MARC::Field->new('952', '', '', o => '123456789123456789123456789', p => '123456789', z => 'test');
411 my $items_count = 1638;
412 while(--$items_count) {
413 $large_marc_record->append_fields($item_field);
416 $docs = $see->marc_records_to_documents([$large_marc_record]);
418 is($docs->[0][1]->{marc_format}, 'MARCXML', 'For record exceeding max record size marc_format should be set correctly');
420 $decoded_marc_record = $see->decode_record_from_result($docs->[0][1]);
422 ok($decoded_marc_record->isa('MARC::Record'), "MARCXML record successfully decoded from result");
423 is($decoded_marc_record->as_xml_record(), $large_marc_record->as_xml_record(), "Decoded MARCXML record has same data as original record");
425 push @mappings, {
426 name => 'title',
427 type => 'string',
428 facet => 0,
429 suggestible => 1,
430 sort => 1,
431 marc_type => 'marc21',
432 marc_field => '245((ab)ab',
435 my $exception = try {
436 $see->marc_records_to_documents($records);
438 catch {
439 return $_;
442 ok(defined $exception, "Exception has been thrown when processing mapping with unmatched opening parenthesis");
443 ok($exception->isa("Koha::Exceptions::Elasticsearch::MARCFieldExprParseError"), "Exception is of correct class");
444 ok($exception->message =~ /Unmatched opening parenthesis/, "Exception has the correct message");
446 pop @mappings;
447 push @mappings, {
448 name => 'title',
449 type => 'string',
450 facet => 0,
451 suggestible => 1,
452 sort => 1,
453 marc_type => 'marc21',
454 marc_field => '245(ab))ab',
457 $exception = try {
458 $see->marc_records_to_documents($records);
460 catch {
461 return $_;
464 ok(defined $exception, "Exception has been thrown when processing mapping with unmatched closing parenthesis");
465 ok($exception->isa("Koha::Exceptions::Elasticsearch::MARCFieldExprParseError"), "Exception is of correct class");
466 ok($exception->message =~ /Unmatched closing parenthesis/, "Exception has the correct message");