Bug 18374: (QA follow-up) Simplify _truncate_terms
[koha.git] / Koha / SearchEngine / Elasticsearch / QueryBuilder.pm
blobb8dc2572788ae324af7ee4ca8dc7846c3bb85e44
1 package Koha::SearchEngine::Elasticsearch::QueryBuilder;
3 # This file is part of Koha.
5 # Copyright 2014 Catalyst IT Ltd.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 =head1 NAME
22 Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch
23 query objects from user-supplied queries
25 =head1 DESCRIPTION
27 This provides the functions that take a user-supplied search query, and
28 provides something that can be given to elasticsearch to get answers.
30 =head1 SYNOPSIS
32 use Koha::SearchEngine::Elasticsearch::QueryBuilder;
33 $builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
34 my $simple_query = $builder->build_query("hello");
35 # This is currently undocumented because the original code is undocumented
36 my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
38 =head1 METHODS
40 =cut
42 use base qw(Koha::SearchEngine::Elasticsearch);
43 use Carp;
44 use JSON;
45 use List::MoreUtils qw/ each_array /;
46 use Modern::Perl;
47 use URI::Escape;
49 use C4::Context;
50 use Koha::Exceptions;
52 =head2 build_query
54 my $simple_query = $builder->build_query("hello", %options)
56 This will build a query that can be issued to elasticsearch from the provided
57 string input. This expects a lucene style search form (see
58 L<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax>
59 for details.)
61 It'll make an attempt to respect the various query options.
63 Additional options can be provided with the C<%options> hash.
65 =over 4
67 =item sort
69 This should be an arrayref of hashrefs, each containing a C<field> and an
70 C<direction> (optional, defaults to C<asc>.) The results will be sorted
71 according to these values. Valid values for C<direction> are 'asc' and 'desc'.
73 =back
75 =cut
77 sub build_query {
78 my ( $self, $query, %options ) = @_;
80 my $stemming = C4::Context->preference("QueryStemming") || 0;
81 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
82 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
83 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
85 $query = '*' unless defined $query;
87 my $res;
88 $res->{query} = {
89 query_string => {
90 query => $query,
91 fuzziness => $fuzzy_enabled ? 'auto' : '0',
92 default_operator => 'AND',
93 default_field => '_all',
94 lenient => JSON::true,
98 if ( $options{sort} ) {
99 foreach my $sort ( @{ $options{sort} } ) {
100 my ( $f, $d ) = @$sort{qw/ field direction /};
101 die "Invalid sort direction, $d"
102 if $d && ( $d ne 'asc' && $d ne 'desc' );
103 $d = 'asc' unless $d;
105 # TODO account for fields that don't have a 'phrase' type
107 $f = $self->_sort_field($f);
108 push @{ $res->{sort} }, { "$f.phrase" => { order => $d } };
112 # See _convert_facets in Search.pm for how these get turned into
113 # things that Koha can use.
114 $res->{aggregations} = {
115 author => { terms => { field => "author__facet" } },
116 subject => { terms => { field => "subject__facet" } },
117 itype => { terms => { field => "itype__facet" } },
118 location => { terms => { field => "location__facet" } },
119 'su-geo' => { terms => { field => "su-geo__facet" } },
120 se => { terms => { field => "se__facet" } },
121 ccode => { terms => { field => "ccode__facet" } },
124 my $display_library_facets = C4::Context->preference('DisplayLibraryFacets');
125 if ( $display_library_facets eq 'both'
126 or $display_library_facets eq 'home' ) {
127 $res->{aggregations}{homebranch} = { terms => { field => "homebranch__facet" } };
129 if ( $display_library_facets eq 'both'
130 or $display_library_facets eq 'holding' ) {
131 $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet" } };
133 if ( my $ef = $options{expanded_facet} ) {
134 $res->{aggregations}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount');
136 return $res;
139 =head2 build_browse_query
141 my $browse_query = $builder->build_browse_query($field, $query);
143 This performs a "starts with" style query on a particular field. The field
144 to be searched must have been indexed with an appropriate mapping as a
145 "phrase" subfield, which pretty much everything has.
147 =cut
149 # XXX this isn't really a browse query like we want in the end
150 sub build_browse_query {
151 my ( $self, $field, $query ) = @_;
153 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
155 return { query => '*' } if !defined $query;
157 # TODO this should come from Koha::SearchEngine::Elasticsearch
158 my %field_whitelist = (
159 title => 1,
160 author => 1,
162 $field = 'title' if !exists $field_whitelist{$field};
163 my $sort = $self->_sort_field($field);
164 my $res = {
165 query => {
166 match_phrase_prefix => {
167 "$field.phrase" => {
168 query => $query,
169 operator => 'or',
170 fuzziness => $fuzzy_enabled ? 'auto' : '0',
174 sort => [ { "$sort.phrase" => { order => "asc" } } ],
178 =head2 build_query_compat
180 my (
181 $error, $query, $simple_query, $query_cgi,
182 $query_desc, $limit, $limit_cgi, $limit_desc,
183 $stopwords_removed, $query_type
185 = $builder->build_query_compat( \@operators, \@operands, \@indexes,
186 \@limits, \@sort_by, $scan, $lang );
188 This handles a search using the same api as L<C4::Search::buildQuery> does.
190 A very simple query will go in with C<$operands> set to ['query'], and
191 C<$sort_by> set to ['pubdate_dsc']. This simple case will return with
192 C<$query> set to something that can perform the search, C<$simple_query>
193 set to just the search term, C<$query_cgi> set to something that can
194 reproduce this search, and C<$query_desc> set to something else.
196 =cut
198 sub build_query_compat {
199 my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan,
200 $lang, $params )
201 = @_;
203 #die Dumper ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan, $lang );
204 my @sort_params = $self->_convert_sort_fields(@$sort_by);
205 my @index_params = $self->_convert_index_fields(@$indexes);
206 my $limits = $self->_fix_limit_special_cases($orig_limits);
208 # Merge the indexes in with the search terms and the operands so that
209 # each search thing is a handy unit.
210 unshift @$operators, undef; # The first one can't have an op
211 my @search_params;
212 my $ea = each_array( @$operands, @$operators, @index_params );
213 while ( my ( $oand, $otor, $index ) = $ea->() ) {
214 next if ( !defined($oand) || $oand eq '' );
215 push @search_params, {
216 operand => $self->_clean_search_term($oand), # the search terms
217 operator => defined($otor) ? uc $otor : undef, # AND and so on
218 $index ? %$index : (),
222 # We build a string query from limits and the queries. An alternative
223 # would be to pass them separately into build_query and let it build
224 # them into a structured ES query itself. Maybe later, though that'd be
225 # more robust.
226 my $query_str = join( ' AND ',
227 join( ' ', $self->_create_query_string(@search_params) ) || (),
228 $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
230 # If there's no query on the left, let's remove the junk left behind
231 $query_str =~ s/^ AND //;
232 my %options;
233 $options{sort} = \@sort_params;
234 $options{expanded_facet} = $params->{expanded_facet};
235 my $query = $self->build_query( $query_str, %options );
237 #die Dumper($query);
238 # We roughly emulate the CGI parameters of the zebra query builder
239 my $query_cgi;
240 $query_cgi = 'idx=kw&q=' . uri_escape_utf8( $operands->[0] ) if @$operands;
241 my $simple_query;
242 $simple_query = $operands->[0] if @$operands == 1;
243 my $query_desc = $simple_query;
244 my $limit = $self->_join_queries( $self->_convert_index_strings(@$limits));
245 my $limit_cgi = ( $orig_limits and @$orig_limits )
246 ? '&limit=' . join( '&limit=', map { uri_escape_utf8($_) } @$orig_limits )
247 : '';
248 my $limit_desc;
249 $limit_desc = "$limit" if $limit;
250 return (
251 undef, $query, $simple_query, $query_cgi, $query_desc,
252 $limit, $limit_cgi, $limit_desc, undef, undef
256 =head2 build_authorities_query
258 my $query = $builder->build_authorities_query(\%search);
260 This takes a nice description of an authority search and turns it into a black-box
261 query that can then be passed to the appropriate searcher.
263 The search description is a hashref that looks something like:
266 searches => [
268 where => 'Heading', # search the main entry
269 operator => 'exact', # require an exact match
270 value => 'frogs', # the search string
273 where => '', # search all entries
274 operator => '', # default keyword, right truncation
275 value => 'pond',
278 sort => {
279 field => 'Heading',
280 order => 'desc',
282 authtypecode => 'TOPIC_TERM',
285 =cut
287 sub build_authorities_query {
288 my ( $self, $search ) = @_;
290 # Start by making the query parts
291 my @query_parts;
292 my @filter_parts;
293 foreach my $s ( @{ $search->{searches} } ) {
294 my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
295 $wh = '_all' if $wh eq '';
296 if ( $op eq 'is' || $op eq '=' ) {
298 # look for something that matches completely
299 # note, '=' is about numerical vals. May need special handling.
300 # _allphrase is a special field that only groups the exact
301 # matches. Also, we lowercase our search because the ES
302 # index lowercases its values, and term searches don't get the
303 # search analyzer applied to them.
304 push @filter_parts, { term => { "$wh.phrase" => lc $val } };
306 elsif ( $op eq 'exact' ) {
308 # left and right truncation, otherwise an exact phrase
309 push @query_parts, { match_phrase => { $wh => $val } };
311 elsif ( $op eq 'start' ) {
313 # startswith search
314 push @query_parts, { wildcard => { "$wh.phrase" => lc "$val*" } };
316 else {
317 # regular wordlist stuff
318 push @query_parts, { match => { $wh => $val } };
322 # Merge the query and filter parts appropriately
323 # 'should' behaves like 'or', if we want 'and', use 'must'
324 my $query_part = { bool => { should => \@query_parts } };
325 my $filter_part = { bool => { should => \@filter_parts } };
327 # We need to add '.phrase' to all the sort headings otherwise it'll sort
328 # based on the tokenised form.
329 my %s;
330 if ( exists $search->{sort} ) {
331 foreach my $k ( keys %{ $search->{sort} } ) {
332 my $f = $self->_sort_field($k);
333 $s{"$f.phrase"} = $search->{sort}{$k};
335 $search->{sort} = \%s;
338 # extract the sort stuff
339 my %sort;
340 %sort = ( sort => [ $search->{sort} ] ) if exists $search->{sort};
341 my $query;
342 if (@filter_parts) {
343 $query =
344 { query =>
345 { filtered => { filter => $filter_part, query => $query_part } }
348 else {
349 $query = { query => $query_part };
351 $query = { %$query, %sort };
352 return $query;
356 =head2 build_authorities_query_compat
358 my ($query) =
359 $builder->build_authorities_query_compat( \@marclist, \@and_or,
360 \@excluding, \@operator, \@value, $authtypecode, $orderby );
362 This builds a query for searching for authorities, in the style of
363 L<C4::AuthoritiesMarc::SearchAuthorities>.
365 Arguments:
367 =over 4
369 =item marclist
371 An arrayref containing where the particular term should be searched for.
372 Options are: mainmainentry, mainentry, match, match-heading, see-from, and
373 thesaurus. If left blank, any field is used.
375 =item and_or
377 Totally ignored. It is never used in L<C4::AuthoritiesMarc::SearchAuthorities>.
379 =item excluding
381 Also ignored.
383 =item operator
385 What form of search to do. Options are: is (phrase, no trunction, whole field
386 must match), = (number exact match), exact (phrase, but with left and right
387 truncation). If left blank, then word list, right truncted, anywhere is used.
389 =item value
391 The actual user-provided string value to search for.
393 =item authtypecode
395 The authority type code to search within. If blank, then all will be searched.
397 =item orderby
399 The order to sort the results by. Options are Relevance, HeadingAsc,
400 HeadingDsc, AuthidAsc, AuthidDsc.
402 =back
404 marclist, operator, and value must be the same length, and the values at
405 index /i/ all relate to each other.
407 This returns a query, which is a black box object that can be passed to the
408 appropriate search object.
410 =cut
412 our $koha_to_index_name = {
413 mainmainentry => 'Heading-Main',
414 mainentry => 'Heading',
415 match => 'Match',
416 'match-heading' => 'Match-heading',
417 'see-from' => 'Match-heading-see-from',
418 thesaurus => 'Subject-heading-thesaurus',
419 all => ''
422 sub build_authorities_query_compat {
423 my ( $self, $marclist, $and_or, $excluding, $operator, $value,
424 $authtypecode, $orderby )
425 = @_;
427 # This turns the old-style many-options argument form into a more
428 # extensible hash form that is understood by L<build_authorities_query>.
429 my @searches;
431 # Make sure everything exists
432 foreach my $m (@$marclist) {
433 Koha::Exceptions::WrongParameter->throw("Invalid marclist field provided: $m")
434 unless exists $koha_to_index_name->{$m};
436 for ( my $i = 0 ; $i < @$value ; $i++ ) {
437 push @searches,
439 where => $koha_to_index_name->{$marclist->[$i]},
440 operator => $operator->[$i],
441 value => $value->[$i],
445 my %sort;
446 my $sort_field =
447 ( $orderby =~ /^Heading/ ) ? 'Heading'
448 : ( $orderby =~ /^Auth/ ) ? 'Local-Number'
449 : undef;
450 if ($sort_field) {
451 my $sort_order = ( $orderby =~ /Asc$/ ) ? 'asc' : 'desc';
452 %sort = ( $sort_field => $sort_order, );
454 my %search = (
455 searches => \@searches,
456 authtypecode => $authtypecode,
458 $search{sort} = \%sort if %sort;
459 my $query = $self->build_authorities_query( \%search );
460 return $query;
463 =head2 _convert_sort_fields
465 my @sort_params = _convert_sort_fields(@sort_by)
467 Converts the zebra-style sort index information into elasticsearch-style.
469 C<@sort_by> is the same as presented to L<build_query_compat>, and it returns
470 something that can be sent to L<build_query>.
472 =cut
474 sub _convert_sort_fields {
475 my ( $self, @sort_by ) = @_;
477 # Turn the sorting into something we care about.
478 my %sort_field_convert = (
479 acqdate => 'acqdate',
480 author => 'author',
481 call_number => 'callnum',
482 popularity => 'issues',
483 relevance => undef, # default
484 title => 'title',
485 pubdate => 'pubdate',
487 my %sort_order_convert =
488 ( qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) );
490 # Convert the fields and orders, drop anything we don't know about.
491 grep { $_->{field} } map {
492 my ( $f, $d ) = split /_/;
494 field => $sort_field_convert{$f},
495 direction => $sort_order_convert{$d}
497 } @sort_by;
500 =head2 _convert_index_fields
502 my @index_params = $self->_convert_index_fields(@indexes);
504 Converts zebra-style search index notation into elasticsearch-style.
506 C<@indexes> is an array of index names, as presented to L<build_query_compat>,
507 and it returns something that can be sent to L<build_query>.
509 B<TODO>: this will pull from the elasticsearch mappings table to figure out
510 types.
512 =cut
514 our %index_field_convert = (
515 'kw' => '_all',
516 'ti' => 'title',
517 'au' => 'author',
518 'su' => 'subject',
519 'nb' => 'isbn',
520 'se' => 'title-series',
521 'callnum' => 'callnum',
522 'itype' => 'itype',
523 'ln' => 'ln',
524 'branch' => 'homebranch',
525 'fic' => 'lf',
526 'mus' => 'rtype',
527 'aud' => 'ta',
528 'hi' => 'Host-Item-Number',
531 sub _convert_index_fields {
532 my ( $self, @indexes ) = @_;
534 my %index_type_convert =
535 ( __default => undef, phr => 'phrase', rtrn => 'right-truncate' );
537 # Convert according to our table, drop anything that doesn't convert.
538 # If a field starts with mc- we save it as it's used (and removed) later
539 # when joining things, to indicate we make it an 'OR' join.
540 # (Sorry, this got a bit ugly after special cases were found.)
541 grep { $_->{field} } map {
542 my ( $f, $t ) = split /,/;
543 my $mc = '';
544 if ($f =~ /^mc-/) {
545 $mc = 'mc-';
546 $f =~ s/^mc-//;
548 my $r = {
549 field => $index_field_convert{$f},
550 type => $index_type_convert{ $t // '__default' }
552 $r->{field} = ($mc . $r->{field}) if $mc && $r->{field};
554 } @indexes;
557 =head2 _convert_index_strings
559 my @searches = $self->_convert_index_strings(@searches);
561 Similar to L<_convert_index_fields>, this takes strings of the form
562 B<field:search term> and rewrites the field from zebra-style to
563 elasticsearch-style. Anything it doesn't understand is returned verbatim.
565 =cut
567 sub _convert_index_strings {
568 my ( $self, @searches ) = @_;
569 my @res;
570 foreach my $s (@searches) {
571 next if $s eq '';
572 my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/;
573 unless ( defined($field) && defined($term) ) {
574 push @res, $s;
575 next;
577 my ($conv) = $self->_convert_index_fields($field);
578 unless ( defined($conv) ) {
579 push @res, $s;
580 next;
582 push @res, $conv->{field} . ":"
583 . $self->_modify_string_by_type( %$conv, operand => $term );
585 return @res;
588 =head2 _convert_index_strings_freeform
590 my $search = $self->_convert_index_strings_freeform($search);
592 This is similar to L<_convert_index_strings>, however it'll search out the
593 things to change within the string. So it can handle strings such as
594 C<(su:foo) AND (su:bar)>, converting the C<su> appropriately.
596 If there is something of the form "su,complete-subfield" or something, the
597 second part is stripped off as we can't yet handle that. Making it work
598 will have to wait for a real query parser.
600 =cut
602 sub _convert_index_strings_freeform {
603 my ( $self, $search ) = @_;
604 while ( my ( $zeb, $es ) = each %index_field_convert ) {
605 $search =~ s/\b$zeb(?:,[\w-]*)?:/$es:/g;
607 return $search;
610 =head2 _modify_string_by_type
612 my $str = $self->_modify_string_by_type(%index_field);
614 If you have a search term (operand) and a type (phrase, right-truncated), this
615 will convert the string to have the function in lucene search terms, e.g.
616 wrapping quotes around it.
618 =cut
620 sub _modify_string_by_type {
621 my ( $self, %idx ) = @_;
623 my $type = $idx{type} || '';
624 my $str = $idx{operand};
625 return $str unless $str; # Empty or undef, we can't use it.
627 $str .= '*' if $type eq 'right-truncate';
628 $str = '"' . $str . '"' if $type eq 'phrase';
629 return $str;
632 =head2 _join_queries
634 my $query_str = $self->_join_queries(@query_parts);
636 This takes a list of query parts, that might be search terms on their own, or
637 booleaned together, or specifying fields, or whatever, wraps them in
638 parentheses, and ANDs them all together. Suitable for feeding to the ES
639 query string query.
641 Note: doesn't AND them together if they specify an index that starts with "mc"
642 as that was a special case in the original code for dealing with multiple
643 choice options (you can't search for something that has an itype of A and
644 and itype of B otherwise.)
646 =cut
648 sub _join_queries {
649 my ( $self, @parts ) = @_;
651 my @norm_parts = grep { defined($_) && $_ ne '' && $_ !~ /^mc-/ } @parts;
652 my @mc_parts =
653 map { s/^mc-//r } grep { defined($_) && $_ ne '' && $_ =~ /^mc-/ } @parts;
654 return () unless @norm_parts + @mc_parts;
655 return ( @norm_parts, @mc_parts )[0] if @norm_parts + @mc_parts == 1;
656 my $grouped_mc =
657 @mc_parts ? '(' . ( join ' OR ', map { "($_)" } @mc_parts ) . ')' : ();
659 # Handy trick: $x || () inside a join means that if $x ends up as an
660 # empty string, it gets replaced with (), which makes join ignore it.
661 # (bad effect: this'll also happen to '0', this hopefully doesn't matter
662 # in this case.)
663 join( ' AND ',
664 join( ' AND ', map { "($_)" } @norm_parts ) || (),
665 $grouped_mc || () );
668 =head2 _make_phrases
670 my @phrased_queries = $self->_make_phrases(@query_parts);
672 This takes the supplied queries and forces them to be phrases by wrapping
673 quotes around them. It understands field prefixes, e.g. 'subject:' and puts
674 the quotes outside of them if they're there.
676 =cut
678 sub _make_phrases {
679 my ( $self, @parts ) = @_;
680 map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts;
683 =head2 _create_query_string
685 my @query_strings = $self->_create_query_string(@queries);
687 Given a list of hashrefs, it will turn them into a lucene-style query string.
688 The hash should contain field, type (both for the indexes), operator, and
689 operand.
691 =cut
693 sub _create_query_string {
694 my ( $self, @queries ) = @_;
696 map {
697 my $otor = $_->{operator} ? $_->{operator} . ' ' : '';
698 my $field = $_->{field} ? $_->{field} . ':' : '';
700 my $oand = $self->_modify_string_by_type(%$_);
701 "$otor($field$oand)";
702 } @queries;
705 =head2 _clean_search_term
707 my $term = $self->_clean_search_term($term);
709 This cleans a search term by removing any funny characters that may upset
710 ES and give us an error. It also calls L<_convert_index_strings_freeform>
711 to ensure those parts are correct.
713 =cut
715 sub _clean_search_term {
716 my ( $self, $term ) = @_;
718 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
720 # Some hardcoded searches (like with authorities) produce things like
721 # 'an=123', when it ought to be 'an:123' for our purposes.
722 $term =~ s/=/:/g;
723 $term = $self->_convert_index_strings_freeform($term);
724 $term =~ s/[{}]/"/g;
725 $term = $self->_truncate_terms($term) if ($auto_truncation);
726 return $term;
729 =head2 _fix_limit_special_cases
731 my $limits = $self->_fix_limit_special_cases($limits);
733 This converts any special cases that the limit specifications have into things
734 that are more readily processable by the rest of the code.
736 The argument should be an arrayref, and it'll return an arrayref.
738 =cut
740 sub _fix_limit_special_cases {
741 my ( $self, $limits ) = @_;
743 my @new_lim;
744 foreach my $l (@$limits) {
746 # This is set up by opac-search.pl
747 if ( $l =~ /^yr,st-numeric,ge=/ ) {
748 my ( $start, $end ) =
749 ( $l =~ /^yr,st-numeric,ge=(.*) and yr,st-numeric,le=(.*)$/ );
750 next unless defined($start) && defined($end);
751 push @new_lim, "copydate:[$start TO $end]";
753 elsif ( $l =~ /^yr,st-numeric=/ ) {
754 my ($date) = ( $l =~ /^yr,st-numeric=(.*)$/ );
755 next unless defined($date);
756 push @new_lim, "copydate:$date";
758 elsif ( $l =~ /^available$/ ) {
759 push @new_lim, 'onloan:0';
761 else {
762 push @new_lim, $l;
765 return \@new_lim;
768 =head2 _sort_field
770 my $field = $self->_sort_field($field);
772 Given a field name, this works out what the actual name of the version to sort
773 on should be. Often it's the same, sometimes it involves sticking "__sort" on
774 the end. Maybe it'll be something else in the future, who knows?
776 =cut
778 sub _sort_field {
779 my ($self, $f) = @_;
780 if ($self->sort_fields()->{$f}) {
781 $f .= '__sort';
783 return $f;
786 =head2 _truncate_terms
788 my $query = $self->_truncate_terms($query);
790 Given a string query this function appends '*' wildcard to all terms except
791 operands and double quoted strings.
793 =cut
795 sub _truncate_terms {
796 my ( $self, $query ) = @_;
798 # '"donald duck" "the mouse" and peter" get split into
799 # ['', '"donald duck"', '', ' ', '', '"the mouse"', '', ' ', 'and', ' ', 'pete']
800 my @tokens = split /("[^"]+"|\s+)/, $query;
802 # Filter out empty tokens
803 my @words = grep { $_ !~ /^\s*$/ } @tokens;
805 # Append '*' to words if needed, ie. if it's not surrounded by quotes, not
806 # terminated by '*' and not a keyword
807 my @terms = map {
808 my $w = $_;
809 (/^"/ or /\*$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*";
810 } @words;
812 return join ' ', @terms;