From 6426a82014aacd738dd94ca2ff1c9056af893d45 Mon Sep 17 00:00:00 2001 From: Julian Maurice Date: Fri, 13 Oct 2017 09:53:04 +0000 Subject: [PATCH] Bug 18374: (QA follow-up) Simplify _truncate_terms By using a different split regex, we can simplify a bit the process of appending '*' to every word of the query Signed-off-by: Julian Maurice Signed-off-by: Jonathan Druart --- Koha/SearchEngine/Elasticsearch/QueryBuilder.pm | 40 ++++++++++--------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index d7a83e272e..b8dc257278 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -794,30 +794,22 @@ operands and double quoted strings. sub _truncate_terms { my ( $self, $query ) = @_; - my @stops = qw/and or not/; - my @new_terms; - my @quote_split = split /(["])([^"]+)\1/, $query; - #Above splits the string based on matching pairs of double quotes - #In practice we get ('','"','donald duck',' ','"','the mouse',' and pete') - #given the string '"donald duck" "the mouse" and pete' - #so we ignore empties, quote the ones after a '"' and split the rest on spaces - for (my $i=0; $i < @quote_split; $i++ ) { - next if ( $quote_split[$i] eq '' || $quote_split[$i] eq ' ' ); - if ( $quote_split[$i] eq '"' ){ - $i++; - $quote_split[$i] = '"'.$quote_split[$i].'"'; - push @new_terms, $quote_split[$i] - } else { - my @space_split = split /[\(\s\)]/, $quote_split[$i]; - foreach my $term (@space_split) { - next if ( $term eq '' || $term eq ' ' ); - $term .= "*" unless ( ( grep { lc($term) =~ /^$_$/ } @stops ) || ( $term =~ /\*$/ ) ); - push @new_terms, $term; - } - } - } - $query = join ' ', @new_terms; - return $query; + + # '"donald duck" "the mouse" and peter" get split into + # ['', '"donald duck"', '', ' ', '', '"the mouse"', '', ' ', 'and', ' ', 'pete'] + my @tokens = split /("[^"]+"|\s+)/, $query; + + # Filter out empty tokens + my @words = grep { $_ !~ /^\s*$/ } @tokens; + + # Append '*' to words if needed, ie. if it's not surrounded by quotes, not + # terminated by '*' and not a keyword + my @terms = map { + my $w = $_; + (/^"/ or /\*$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*"; + } @words; + + return join ' ', @terms; } 1; -- 2.11.4.GIT