Bug 22592: Add index scan emulation to Elasticsearch
[koha.git] / t / db_dependent / Koha / SearchEngine / Elasticsearch / QueryBuilder.t
blob08e823578aa059712496c320d7ac54884d07b22e
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
18 use Modern::Perl;
20 use C4::Context;
21 use Test::Exception;
22 use t::lib::Mocks;
23 use t::lib::TestBuilder;
24 use Test::More tests => 6;
26 use List::Util qw( all );
28 use Koha::Database;
29 use Koha::SearchEngine::Elasticsearch::QueryBuilder;
31 my $schema = Koha::Database->new->schema;
32 $schema->storage->txn_begin;
34 my $se = Test::MockModule->new( 'Koha::SearchEngine::Elasticsearch' );
35 $se->mock( 'get_elasticsearch_mappings', sub {
36 my ($self) = @_;
38 my %all_mappings;
40 my $mappings = {
41 data => {
42 properties => {
43 title => {
44 type => 'text'
46 title__sort => {
47 type => 'text'
49 subject => {
50 type => 'text',
51 facet => 1
53 itemnumber => {
54 type => 'integer'
56 sortablenumber => {
57 type => 'integer'
59 sortablenumber__sort => {
60 type => 'integer'
62 Heading => {
63 type => 'text'
65 Heading__sort => {
66 type => 'text'
71 $all_mappings{$self->index} = $mappings;
73 my $sort_fields = {
74 $self->index => {
75 title => 1,
76 subject => 0,
77 itemnumber => 0,
78 sortablenumber => 1,
79 mainentry => 1
82 $self->sort_fields($sort_fields->{$self->index});
84 return $all_mappings{$self->index};
85 });
87 my $cache = Koha::Caches->get_instance();
88 my $clear_search_fields_cache = sub {
89 $cache->clear_from_cache('elasticsearch_search_fields_staff_client');
90 $cache->clear_from_cache('elasticsearch_search_fields_opac');
93 subtest 'build_authorities_query_compat() tests' => sub {
94 plan tests => 55;
96 my $qb;
98 ok(
99 $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new({ 'index' => 'authorities' }),
100 'Creating new query builder object for authorities'
103 my $koha_to_index_name = $Koha::SearchEngine::Elasticsearch::QueryBuilder::koha_to_index_name;
104 my $search_term = 'a';
105 foreach my $koha_name ( keys %{ $koha_to_index_name } ) {
106 my $query = $qb->build_authorities_query_compat( [ $koha_name ], undef, undef, ['contains'], [$search_term], 'AUTH_TYPE', 'asc' );
107 if ( $koha_name eq 'all' || $koha_name eq 'any' ) {
108 is( $query->{query}->{bool}->{must}[0]->{query_string}->{query},
109 "a*");
110 } else {
111 is( $query->{query}->{bool}->{must}[0]->{query_string}->{query},
112 "a*");
114 is( $query->{query}->{bool}->{must}[0]->{query_string}->{analyze_wildcard}, JSON::true, 'Set analyze_wildcard true' );
117 $search_term = 'Donald Duck';
118 foreach my $koha_name ( keys %{ $koha_to_index_name } ) {
119 my $query = $qb->build_authorities_query_compat( [ $koha_name ], undef, undef, ['contains'], [$search_term], 'AUTH_TYPE', 'asc' );
120 is( $query->{query}->{bool}->{must}[0]->{query_string}->{query}, "(Donald*) AND (Duck*)" );
121 if ( $koha_name eq 'all' || $koha_name eq 'any' ) {
122 isa_ok( $query->{query}->{bool}->{must}[0]->{query_string}->{fields}, 'ARRAY')
123 } else {
124 is( $query->{query}->{bool}->{must}[0]->{query_string}->{default_field}, $koha_to_index_name->{$koha_name} );
128 foreach my $koha_name ( keys %{ $koha_to_index_name } ) {
129 my $query = $qb->build_authorities_query_compat( [ $koha_name ], undef, undef, ['is'], [$search_term], 'AUTH_TYPE', 'asc' );
130 if ( $koha_name eq 'all' || $koha_name eq 'any' ) {
132 $query->{query}->{bool}->{must}[0]->{multi_match}->{query},
133 "Donald Duck"
135 my $all_matches = all { /\.ci_raw$/ }
136 @{$query->{query}->{bool}->{must}[0]->{multi_match}->{fields}};
137 ok( $all_matches, 'Correct fields parameter for "is" query in "any" or "all"' );
138 } else {
140 $query->{query}->{bool}->{must}[0]->{term}->{$koha_to_index_name->{$koha_name} . ".ci_raw"},
141 "Donald Duck"
146 foreach my $koha_name ( keys %{ $koha_to_index_name } ) {
147 my $query = $qb->build_authorities_query_compat( [ $koha_name ], undef, undef, ['start'], [$search_term], 'AUTH_TYPE', 'asc' );
148 if ( $koha_name eq 'all' || $koha_name eq 'any' ) {
149 my $all_matches = all { (%{$_->{prefix}})[0] =~ /\.ci_raw$/ && (%{$_->{prefix}})[1] eq "Donald Duck" }
150 @{$query->{query}->{bool}->{must}[0]->{bool}->{should}};
151 ok( $all_matches, "Correct multiple prefix query" );
152 } else {
153 is( $query->{query}->{bool}->{must}[0]->{prefix}->{$koha_to_index_name->{$koha_name} . ".ci_raw"}, "Donald Duck" );
157 # Sorting
158 my $query = $qb->build_authorities_query_compat( [ 'mainentry' ], undef, undef, ['start'], [$search_term], 'AUTH_TYPE', 'HeadingAsc' );
159 is_deeply(
160 $query->{sort},
163 'heading__sort' => 'asc'
166 "ascending sort parameter properly formed"
168 $query = $qb->build_authorities_query_compat( [ 'mainentry' ], undef, undef, ['start'], [$search_term], 'AUTH_TYPE', 'HeadingDsc' );
169 is_deeply(
170 $query->{sort},
173 'heading__sort' => 'desc'
176 "descending sort parameter properly formed"
179 # Authorities type
180 $query = $qb->build_authorities_query_compat( [ 'mainentry' ], undef, undef, ['contains'], [$search_term], 'AUTH_TYPE', 'asc' );
181 is_deeply(
182 $query->{query}->{bool}->{filter},
183 { term => { 'authtype.raw' => 'AUTH_TYPE' } },
184 "authorities type code is used as filter"
187 # Failing case
188 throws_ok {
189 $qb->build_authorities_query_compat( [ 'tomas' ], undef, undef, ['contains'], [$search_term], 'AUTH_TYPE', 'asc' );
191 'Koha::Exceptions::WrongParameter',
192 'Exception thrown on invalid value in the marclist param';
195 subtest 'build_query tests' => sub {
196 plan tests => 48;
198 my $qb;
201 $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new({ 'index' => 'biblios' }),
202 'Creating new query builder object for biblios'
205 my @sort_by = 'title_asc';
206 my @sort_params = $qb->_convert_sort_fields(@sort_by);
207 my %options;
208 $options{sort} = \@sort_params;
209 my $query = $qb->build_query('test', %options);
211 is_deeply(
212 $query->{sort},
215 'title__sort' => {
216 'order' => 'asc'
220 "sort parameter properly formed"
223 t::lib::Mocks::mock_preference('FacetMaxCount','37');
224 $query = $qb->build_query('test', %options);
225 ok( defined $query->{aggregations}{ccode}{terms}{size},'we need to ask for a size or we get only 5 facet' );
226 is( $query->{aggregations}{ccode}{terms}{size}, 37,'we ask for the size as defined by the syspref FacetMaxCount');
228 t::lib::Mocks::mock_preference('DisplayLibraryFacets','both');
229 $query = $qb->build_query();
230 ok( defined $query->{aggregations}{homebranch},
231 'homebranch added to facets if DisplayLibraryFacets=both' );
232 ok( defined $query->{aggregations}{holdingbranch},
233 'holdingbranch added to facets if DisplayLibraryFacets=both' );
234 t::lib::Mocks::mock_preference('DisplayLibraryFacets','holding');
235 $query = $qb->build_query();
236 ok( !defined $query->{aggregations}{homebranch},
237 'homebranch not added to facets if DisplayLibraryFacets=holding' );
238 ok( defined $query->{aggregations}{holdingbranch},
239 'holdingbranch added to facets if DisplayLibraryFacets=holding' );
240 t::lib::Mocks::mock_preference('DisplayLibraryFacets','home');
241 $query = $qb->build_query();
242 ok( defined $query->{aggregations}{homebranch},
243 'homebranch added to facets if DisplayLibraryFacets=home' );
244 ok( !defined $query->{aggregations}{holdingbranch},
245 'holdingbranch not added to facets if DisplayLibraryFacets=home' );
247 t::lib::Mocks::mock_preference( 'QueryAutoTruncate', '' );
249 ( undef, $query ) = $qb->build_query_compat( undef, ['donald duck'] );
251 $query->{query}{query_string}{query},
252 "(donald duck)",
253 "query not altered if QueryAutoTruncate disabled"
256 ( undef, $query ) = $qb->build_query_compat( undef, ['donald duck'], ['title'] );
258 $query->{query}{query_string}{query},
259 '(title:(donald duck))',
260 'multiple words in a query term are enclosed in parenthesis'
263 ( undef, $query ) = $qb->build_query_compat( ['AND'], ['donald duck', 'disney'], ['title', 'author'] );
265 $query->{query}{query_string}{query},
266 '(title:(donald duck)) AND (author:disney)',
267 'multiple query terms are enclosed in parenthesis while a single one is not'
270 my ($simple_query, $query_cgi, $query_desc);
271 ( undef, $query, $simple_query, $query_cgi, $query_desc ) = $qb->build_query_compat( undef, ['"donald duck"', 'walt disney'], ['ti', 'au'] );
272 is($query_cgi, 'idx=ti&q=%22donald%20duck%22&idx=au&q=walt%20disney', 'query cgi ok for multiterm query');
273 is($query_desc, '(title:("donald duck")) (author:(walt disney))', 'query desc ok for multiterm query');
275 ( undef, $query ) = $qb->build_query_compat( undef, ['2019'], ['yr,st-year'] );
277 $query->{query}{query_string}{query},
278 '(date-of-publication:2019)',
279 'Year in an st-year search is handled properly'
282 ( undef, $query ) = $qb->build_query_compat( undef, ['2018-2019'], ['yr,st-year'] );
284 $query->{query}{query_string}{query},
285 '(date-of-publication:[2018 TO 2019])',
286 'Year range in an st-year search is handled properly'
289 ( undef, $query ) = $qb->build_query_compat( undef, ['-2019'], ['yr,st-year'] );
291 $query->{query}{query_string}{query},
292 '(date-of-publication:[* TO 2019])',
293 'Open start year in year range of an st-year search is handled properly'
296 ( undef, $query ) = $qb->build_query_compat( undef, ['2019-'], ['yr,st-year'] );
298 $query->{query}{query_string}{query},
299 '(date-of-publication:[2019 TO *])',
300 'Open end year in year range of an st-year search is handled properly'
303 ( undef, $query ) = $qb->build_query_compat( undef, ['2019-'], ['yr,st-year'], ['yr,st-numeric=-2019'] );
305 $query->{query}{query_string}{query},
306 '(date-of-publication:[2019 TO *]) AND copydate:[* TO 2019]',
307 'Open end year in year range of an st-year search is handled properly'
310 # Enable auto-truncation
311 t::lib::Mocks::mock_preference( 'QueryAutoTruncate', '1' );
313 ( undef, $query ) = $qb->build_query_compat( undef, ['donald duck'] );
315 $query->{query}{query_string}{query},
316 "(donald* duck*)",
317 "simple query is auto truncated when QueryAutoTruncate enabled"
320 # Ensure reserved words are not truncated
321 ( undef, $query ) = $qb->build_query_compat( undef,
322 ['donald or duck and mickey not mouse'] );
324 $query->{query}{query_string}{query},
325 "(donald* or duck* and mickey* not mouse*)",
326 "reserved words are not affected by QueryAutoTruncate"
329 ( undef, $query ) = $qb->build_query_compat( undef, ['donald* duck*'] );
331 $query->{query}{query_string}{query},
332 "(donald* duck*)",
333 "query with '*' is unaltered when QueryAutoTruncate is enabled"
336 ( undef, $query ) = $qb->build_query_compat( undef, ['donald duck and the mouse'] );
338 $query->{query}{query_string}{query},
339 "(donald* duck* and the* mouse*)",
340 "individual words are all truncated and stopwords ignored"
343 ( undef, $query ) = $qb->build_query_compat( undef, ['*'] );
345 $query->{query}{query_string}{query},
346 "(*)",
347 "query of just '*' is unaltered when QueryAutoTruncate is enabled"
350 ( undef, $query ) = $qb->build_query_compat( undef, ['"donald duck"'], undef, ['available'] );
352 $query->{query}{query_string}{query},
353 '("donald duck") AND onloan:false',
354 "query with quotes is unaltered when QueryAutoTruncate is enabled"
358 ( undef, $query ) = $qb->build_query_compat( undef, ['"donald duck" and "the mouse"'] );
360 $query->{query}{query_string}{query},
361 '("donald duck" and "the mouse")',
362 "all quoted strings are unaltered if more than one in query"
365 ( undef, $query ) = $qb->build_query_compat( undef, ['barcode:123456'] );
367 $query->{query}{query_string}{query},
368 '(barcode:123456*)',
369 "query of specific field is truncated"
372 ( undef, $query ) = $qb->build_query_compat( undef, ['Local-number:"123456"'] );
374 $query->{query}{query_string}{query},
375 '(local-number:"123456")',
376 "query of specific field including hyphen and quoted is not truncated, field name is converted to lower case"
379 ( undef, $query ) = $qb->build_query_compat( undef, ['Local-number:123456'] );
381 $query->{query}{query_string}{query},
382 '(local-number:123456*)',
383 "query of specific field including hyphen and not quoted is truncated, field name is converted to lower case"
386 ( undef, $query ) = $qb->build_query_compat( undef, ['Local-number.raw:123456'] );
388 $query->{query}{query_string}{query},
389 '(local-number.raw:123456*)',
390 "query of specific field including period and not quoted is truncated, field name is converted to lower case"
393 ( undef, $query ) = $qb->build_query_compat( undef, ['Local-number.raw:"123456"'] );
395 $query->{query}{query_string}{query},
396 '(local-number.raw:"123456")',
397 "query of specific field including period and quoted is not truncated, field name is converted to lower case"
400 ( undef, $query ) = $qb->build_query_compat( undef, ['J.R.R'] );
402 $query->{query}{query_string}{query},
403 '(J.R.R*)',
404 "query including period is truncated but not split at periods"
407 ( undef, $query ) = $qb->build_query_compat( undef, ['title:"donald duck"'] );
409 $query->{query}{query_string}{query},
410 '(title:"donald duck")',
411 "query of specific field is not truncated when surrounded by quotes"
414 ( undef, $query ) = $qb->build_query_compat( undef, ['donald duck'], ['title'] );
416 $query->{query}{query_string}{query},
417 '(title:(donald* duck*))',
418 'words of a multi-word term are properly truncated'
421 ( undef, $query ) = $qb->build_query_compat( ['AND'], ['donald duck', 'disney'], ['title', 'author'] );
423 $query->{query}{query_string}{query},
424 '(title:(donald* duck*)) AND (author:disney*)',
425 'words of a multi-word term and single-word term are properly truncated'
428 ( undef, $query ) = $qb->build_query_compat( undef, ['title:"donald duck"'], undef, undef, undef, undef, undef, { suppress => 1 } );
430 $query->{query}{query_string}{query},
431 '(title:"donald duck") AND suppress:0',
432 "query of specific field is added AND suppress:0"
435 ( undef, $query, $simple_query, $query_cgi, $query_desc ) = $qb->build_query_compat( undef, ['title:"donald duck"'], undef, undef, undef, undef, undef, { suppress => 0 } );
437 $query->{query}{query_string}{query},
438 '(title:"donald duck")',
439 "query of specific field is not added AND suppress:0"
441 is($query_cgi, 'idx=&q=title%3A%22donald%20duck%22', 'query cgi');
442 is($query_desc, 'title:"donald duck"', 'query desc ok');
444 # Scan queries
445 ( undef, $query, $simple_query, $query_cgi, $query_desc ) = $qb->build_query_compat( undef, ['new'], ['au'], undef, undef, 1 );
447 $query->{query}{query_string}{query},
448 '*',
449 "scan query is properly formed"
451 is_deeply(
452 $query->{aggregations}{'author'}{'terms'},
454 field => 'author__facet',
455 order => { '_term' => 'asc' },
456 include => '[nN][eE][wW].*'
458 "scan aggregation request is properly formed"
460 is($query_cgi, 'idx=au&q=new&scan=1', 'query cgi');
461 is($query_desc, 'new', 'query desc ok');
463 ( undef, $query, $simple_query, $query_cgi, $query_desc ) = $qb->build_query_compat( undef, ['new'], [], undef, undef, 1 );
465 $query->{query}{query_string}{query},
466 '*',
467 "scan query is properly formed"
469 is_deeply(
470 $query->{aggregations}{'subject'}{'terms'},
472 field => 'subject__facet',
473 order => { '_term' => 'asc' },
474 include => '[nN][eE][wW].*'
476 "scan aggregation request is properly formed"
478 is($query_cgi, 'idx=&q=new&scan=1', 'query cgi');
479 is($query_desc, 'new', 'query desc ok');
483 subtest 'build query from form subtests' => sub {
484 plan tests => 5;
486 my $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new({ 'index' => 'authorities' }),
487 #when searching for authorities from a record the form returns marclist with blanks for unentered terms
488 my @marclist = ('mainmainentry','mainentry','match', 'all');
489 my @values = ( undef, 'Hamilton', undef, undef);
490 my @operator = ( 'contains', 'contains', 'contains', 'contains');
492 my $query = $qb->build_authorities_query_compat( \@marclist, undef,
493 undef, \@operator , \@values, 'AUTH_TYPE', 'asc' );
494 is($query->{query}->{bool}->{must}[0]->{query_string}->{query}, "Hamilton*","Expected search is populated");
495 is( scalar @{ $query->{query}->{bool}->{must} }, 1,"Only defined search is populated");
497 @values[2] = 'Jefferson';
498 $query = $qb->build_authorities_query_compat( \@marclist, undef,
499 undef, \@operator , \@values, 'AUTH_TYPE', 'asc' );
500 is($query->{query}->{bool}->{must}[0]->{query_string}->{query}, "Hamilton*","First index searched as expected");
501 is($query->{query}->{bool}->{must}[1]->{query_string}->{query}, "Jefferson*","Second index searched when populated");
502 is( scalar @{ $query->{query}->{bool}->{must} }, 2,"Only defined searches are populated");
507 subtest 'build_query with weighted fields tests' => sub {
508 plan tests => 4;
510 $se->mock( '_load_elasticsearch_mappings', sub {
511 return {
512 biblios => {
513 abstract => {
514 label => 'abstract',
515 type => 'string',
516 opac => 1,
517 staff_client => 0,
518 mappings => [{
519 marc_field => '520',
520 marc_type => 'marc21',
523 acqdate => {
524 label => 'acqdate',
525 type => 'string',
526 opac => 0,
527 staff_client => 1,
528 mappings => [{
529 marc_field => '952d',
530 marc_type => 'marc21',
531 search => 0,
532 }, {
533 marc_field => '9955',
534 marc_type => 'marc21',
535 search => 0,
538 title => {
539 label => 'title',
540 type => 'string',
541 opac => 0,
542 staff_client => 1,
543 mappings => [{
544 marc_field => '130',
545 marc_type => 'marc21'
548 subject => {
549 label => 'subject',
550 type => 'string',
551 opac => 0,
552 staff_client => 1,
553 mappings => [{
554 marc_field => '600a',
555 marc_type => 'marc21'
562 my $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new( { index => 'biblios' } );
563 Koha::SearchFields->search({})->delete;
564 Koha::SearchEngine::Elasticsearch->reset_elasticsearch_mappings();
566 my $search_field;
567 $search_field = Koha::SearchFields->find({ name => 'title' });
568 $search_field->update({ weight => 25.0 });
569 $search_field = Koha::SearchFields->find({ name => 'subject' });
570 $search_field->update({ weight => 15.5 });
571 $clear_search_fields_cache->();
573 my ( undef, $query ) = $qb->build_query_compat( undef, ['title:"donald duck"'], undef, undef,
574 undef, undef, undef, { weighted_fields => 1 });
576 my $fields = $query->{query}{query_string}{fields};
578 is(@{$fields}, 2, 'Search field with no searchable mappings has been excluded');
580 my @found = grep { $_ eq 'title^25.00' } @{$fields};
581 is(@found, 1, 'Search field title has correct weight');
583 @found = grep { $_ eq 'subject^15.50' } @{$fields};
584 is(@found, 1, 'Search field subject has correct weight');
586 ( undef, $query ) = $qb->build_query_compat( undef, ['title:"donald duck"'], undef, undef,
587 undef, undef, undef, { weighted_fields => 1, is_opac => 1 });
589 $fields = $query->{query}{query_string}{fields};
591 is_deeply(
592 $fields,
593 ['abstract'],
594 'Only OPAC search fields are used when opac search is performed'
598 subtest "_convert_sort_fields() tests" => sub {
599 plan tests => 3;
601 my $qb;
604 $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new({ 'index' => 'biblios' }),
605 'Creating new query builder object for biblios'
608 my @sort_by = $qb->_convert_sort_fields(qw( call_number_asc author_dsc ));
609 is_deeply(
610 \@sort_by,
612 { field => 'local-classification', direction => 'asc' },
613 { field => 'author', direction => 'desc' }
615 'sort fields should have been split correctly'
618 # We could expect this to pass, but direction is undef instead of 'desc'
619 @sort_by = $qb->_convert_sort_fields(qw( call_number_asc author_desc ));
620 is_deeply(
621 \@sort_by,
623 { field => 'local-classification', direction => 'asc' },
624 { field => 'author', direction => 'desc' }
626 'sort fields should have been split correctly'
630 subtest "_sort_field() tests" => sub {
631 plan tests => 5;
633 my $qb;
636 $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new({ 'index' => 'biblios' }),
637 'Creating new query builder object for biblios'
640 my $f = $qb->_sort_field('title');
643 'title__sort',
644 'title sort mapped correctly'
647 $f = $qb->_sort_field('subject');
650 'subject.raw',
651 'subject sort mapped correctly'
654 $f = $qb->_sort_field('itemnumber');
657 'itemnumber',
658 'itemnumber sort mapped correctly'
661 $f = $qb->_sort_field('sortablenumber');
664 'sortablenumber__sort',
665 'sortablenumber sort mapped correctly'
669 $schema->storage->txn_rollback;