Bug 19559: Add '-' to list of characters we don't split search terms on
[koha.git] / Koha / SearchEngine / Elasticsearch / QueryBuilder.pm
bloba021a0cb5298adf5ccd0382de1032ec3e6d81dee
1 package Koha::SearchEngine::Elasticsearch::QueryBuilder;
3 # This file is part of Koha.
5 # Copyright 2014 Catalyst IT Ltd.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 =head1 NAME
22 Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch
23 query objects from user-supplied queries
25 =head1 DESCRIPTION
27 This provides the functions that take a user-supplied search query, and
28 provides something that can be given to elasticsearch to get answers.
30 =head1 SYNOPSIS
32 use Koha::SearchEngine::Elasticsearch::QueryBuilder;
33 $builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
34 my $simple_query = $builder->build_query("hello");
35 # This is currently undocumented because the original code is undocumented
36 my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
38 =head1 METHODS
40 =cut
42 use base qw(Koha::SearchEngine::Elasticsearch);
43 use Carp;
44 use JSON;
45 use List::MoreUtils qw/ each_array /;
46 use Modern::Perl;
47 use URI::Escape;
49 use C4::Context;
50 use Koha::Exceptions;
52 =head2 build_query
54 my $simple_query = $builder->build_query("hello", %options)
56 This will build a query that can be issued to elasticsearch from the provided
57 string input. This expects a lucene style search form (see
58 L<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax>
59 for details.)
61 It'll make an attempt to respect the various query options.
63 Additional options can be provided with the C<%options> hash.
65 =over 4
67 =item sort
69 This should be an arrayref of hashrefs, each containing a C<field> and an
70 C<direction> (optional, defaults to C<asc>.) The results will be sorted
71 according to these values. Valid values for C<direction> are 'asc' and 'desc'.
73 =back
75 =cut
77 sub build_query {
78 my ( $self, $query, %options ) = @_;
80 my $stemming = C4::Context->preference("QueryStemming") || 0;
81 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
82 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
83 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
85 $query = '*' unless defined $query;
87 my $res;
88 $res->{query} = {
89 query_string => {
90 query => $query,
91 fuzziness => $fuzzy_enabled ? 'auto' : '0',
92 default_operator => 'AND',
93 default_field => '_all',
94 lenient => JSON::true,
98 if ( $options{sort} ) {
99 foreach my $sort ( @{ $options{sort} } ) {
100 my ( $f, $d ) = @$sort{qw/ field direction /};
101 die "Invalid sort direction, $d"
102 if $d && ( $d ne 'asc' && $d ne 'desc' );
103 $d = 'asc' unless $d;
105 # TODO account for fields that don't have a 'phrase' type
107 $f = $self->_sort_field($f);
108 push @{ $res->{sort} }, { "$f.phrase" => { order => $d } };
112 # See _convert_facets in Search.pm for how these get turned into
113 # things that Koha can use.
114 $res->{aggregations} = {
115 author => { terms => { field => "author__facet" } },
116 subject => { terms => { field => "subject__facet" } },
117 itype => { terms => { field => "itype__facet" } },
118 location => { terms => { field => "location__facet" } },
119 'su-geo' => { terms => { field => "su-geo__facet" } },
120 se => { terms => { field => "se__facet" } },
121 ccode => { terms => { field => "ccode__facet" } },
124 my $display_library_facets = C4::Context->preference('DisplayLibraryFacets');
125 if ( $display_library_facets eq 'both'
126 or $display_library_facets eq 'home' ) {
127 $res->{aggregations}{homebranch} = { terms => { field => "homebranch__facet" } };
129 if ( $display_library_facets eq 'both'
130 or $display_library_facets eq 'holding' ) {
131 $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet" } };
133 if ( my $ef = $options{expanded_facet} ) {
134 $res->{aggregations}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount');
136 return $res;
139 =head2 build_browse_query
141 my $browse_query = $builder->build_browse_query($field, $query);
143 This performs a "starts with" style query on a particular field. The field
144 to be searched must have been indexed with an appropriate mapping as a
145 "phrase" subfield, which pretty much everything has.
147 =cut
149 # XXX this isn't really a browse query like we want in the end
150 sub build_browse_query {
151 my ( $self, $field, $query ) = @_;
153 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
155 return { query => '*' } if !defined $query;
157 # TODO this should come from Koha::SearchEngine::Elasticsearch
158 my %field_whitelist = (
159 title => 1,
160 author => 1,
162 $field = 'title' if !exists $field_whitelist{$field};
163 my $sort = $self->_sort_field($field);
164 my $res = {
165 query => {
166 match_phrase_prefix => {
167 "$field.phrase" => {
168 query => $query,
169 operator => 'or',
170 fuzziness => $fuzzy_enabled ? 'auto' : '0',
174 sort => [ { "$sort.phrase" => { order => "asc" } } ],
178 =head2 build_query_compat
180 my (
181 $error, $query, $simple_query, $query_cgi,
182 $query_desc, $limit, $limit_cgi, $limit_desc,
183 $stopwords_removed, $query_type
185 = $builder->build_query_compat( \@operators, \@operands, \@indexes,
186 \@limits, \@sort_by, $scan, $lang );
188 This handles a search using the same api as L<C4::Search::buildQuery> does.
190 A very simple query will go in with C<$operands> set to ['query'], and
191 C<$sort_by> set to ['pubdate_dsc']. This simple case will return with
192 C<$query> set to something that can perform the search, C<$simple_query>
193 set to just the search term, C<$query_cgi> set to something that can
194 reproduce this search, and C<$query_desc> set to something else.
196 =cut
198 sub build_query_compat {
199 my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan,
200 $lang, $params )
201 = @_;
203 #die Dumper ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan, $lang );
204 my @sort_params = $self->_convert_sort_fields(@$sort_by);
205 my @index_params = $self->_convert_index_fields(@$indexes);
206 my $limits = $self->_fix_limit_special_cases($orig_limits);
207 if ( $params->{suppress} ) { push @$limits, "suppress:0"; }
209 # Merge the indexes in with the search terms and the operands so that
210 # each search thing is a handy unit.
211 unshift @$operators, undef; # The first one can't have an op
212 my @search_params;
213 my $ea = each_array( @$operands, @$operators, @index_params );
214 while ( my ( $oand, $otor, $index ) = $ea->() ) {
215 next if ( !defined($oand) || $oand eq '' );
216 push @search_params, {
217 operand => $self->_clean_search_term($oand), # the search terms
218 operator => defined($otor) ? uc $otor : undef, # AND and so on
219 $index ? %$index : (),
223 # We build a string query from limits and the queries. An alternative
224 # would be to pass them separately into build_query and let it build
225 # them into a structured ES query itself. Maybe later, though that'd be
226 # more robust.
227 my $query_str = join( ' AND ',
228 join( ' ', $self->_create_query_string(@search_params) ) || (),
229 $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
231 # If there's no query on the left, let's remove the junk left behind
232 $query_str =~ s/^ AND //;
233 my %options;
234 $options{sort} = \@sort_params;
235 $options{expanded_facet} = $params->{expanded_facet};
236 my $query = $self->build_query( $query_str, %options );
238 #die Dumper($query);
239 # We roughly emulate the CGI parameters of the zebra query builder
240 my $query_cgi;
241 $query_cgi = 'idx=kw&q=' . uri_escape_utf8( $operands->[0] ) if @$operands;
242 my $simple_query;
243 $simple_query = $operands->[0] if @$operands == 1;
244 my $query_desc = $simple_query;
245 my $limit = $self->_join_queries( $self->_convert_index_strings(@$limits));
246 my $limit_cgi = ( $orig_limits and @$orig_limits )
247 ? '&limit=' . join( '&limit=', map { uri_escape_utf8($_) } @$orig_limits )
248 : '';
249 my $limit_desc;
250 $limit_desc = "$limit" if $limit;
251 return (
252 undef, $query, $simple_query, $query_cgi, $query_desc,
253 $limit, $limit_cgi, $limit_desc, undef, undef
257 =head2 build_authorities_query
259 my $query = $builder->build_authorities_query(\%search);
261 This takes a nice description of an authority search and turns it into a black-box
262 query that can then be passed to the appropriate searcher.
264 The search description is a hashref that looks something like:
267 searches => [
269 where => 'Heading', # search the main entry
270 operator => 'exact', # require an exact match
271 value => 'frogs', # the search string
274 where => '', # search all entries
275 operator => '', # default keyword, right truncation
276 value => 'pond',
279 sort => {
280 field => 'Heading',
281 order => 'desc',
283 authtypecode => 'TOPIC_TERM',
286 =cut
288 sub build_authorities_query {
289 my ( $self, $search ) = @_;
291 # Start by making the query parts
292 my @query_parts;
293 my @filter_parts;
294 foreach my $s ( @{ $search->{searches} } ) {
295 my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
296 $wh = '_all' if $wh eq '';
297 if ( $op eq 'is' || $op eq '=' ) {
299 # look for something that matches completely
300 # note, '=' is about numerical vals. May need special handling.
301 # _allphrase is a special field that only groups the exact
302 # matches. Also, we lowercase our search because the ES
303 # index lowercases its values, and term searches don't get the
304 # search analyzer applied to them.
305 push @filter_parts, { term => { "$wh.phrase" => lc $val } };
307 elsif ( $op eq 'exact' ) {
309 # left and right truncation, otherwise an exact phrase
310 push @query_parts, { match_phrase => { $wh => $val } };
312 elsif ( $op eq 'start' ) {
314 # startswith search
315 push @query_parts, { wildcard => { "$wh.phrase" => lc "$val*" } };
317 else {
318 # regular wordlist stuff
319 push @query_parts, { match => { $wh => $val } };
323 # Merge the query and filter parts appropriately
324 # 'should' behaves like 'or', if we want 'and', use 'must'
325 my $query_part = { bool => { should => \@query_parts } };
326 my $filter_part = { bool => { should => \@filter_parts } };
328 # We need to add '.phrase' to all the sort headings otherwise it'll sort
329 # based on the tokenised form.
330 my %s;
331 if ( exists $search->{sort} ) {
332 foreach my $k ( keys %{ $search->{sort} } ) {
333 my $f = $self->_sort_field($k);
334 $s{"$f.phrase"} = $search->{sort}{$k};
336 $search->{sort} = \%s;
339 # extract the sort stuff
340 my %sort;
341 %sort = ( sort => [ $search->{sort} ] ) if exists $search->{sort};
342 my $query;
343 if (@filter_parts) {
344 $query =
345 { query =>
346 { filtered => { filter => $filter_part, query => $query_part } }
349 else {
350 $query = { query => $query_part };
352 $query = { %$query, %sort };
353 return $query;
357 =head2 build_authorities_query_compat
359 my ($query) =
360 $builder->build_authorities_query_compat( \@marclist, \@and_or,
361 \@excluding, \@operator, \@value, $authtypecode, $orderby );
363 This builds a query for searching for authorities, in the style of
364 L<C4::AuthoritiesMarc::SearchAuthorities>.
366 Arguments:
368 =over 4
370 =item marclist
372 An arrayref containing where the particular term should be searched for.
373 Options are: mainmainentry, mainentry, match, match-heading, see-from, and
374 thesaurus. If left blank, any field is used.
376 =item and_or
378 Totally ignored. It is never used in L<C4::AuthoritiesMarc::SearchAuthorities>.
380 =item excluding
382 Also ignored.
384 =item operator
386 What form of search to do. Options are: is (phrase, no trunction, whole field
387 must match), = (number exact match), exact (phrase, but with left and right
388 truncation). If left blank, then word list, right truncted, anywhere is used.
390 =item value
392 The actual user-provided string value to search for.
394 =item authtypecode
396 The authority type code to search within. If blank, then all will be searched.
398 =item orderby
400 The order to sort the results by. Options are Relevance, HeadingAsc,
401 HeadingDsc, AuthidAsc, AuthidDsc.
403 =back
405 marclist, operator, and value must be the same length, and the values at
406 index /i/ all relate to each other.
408 This returns a query, which is a black box object that can be passed to the
409 appropriate search object.
411 =cut
413 our $koha_to_index_name = {
414 mainmainentry => 'Heading-Main',
415 mainentry => 'Heading',
416 match => 'Match',
417 'match-heading' => 'Match-heading',
418 'see-from' => 'Match-heading-see-from',
419 thesaurus => 'Subject-heading-thesaurus',
420 all => ''
423 sub build_authorities_query_compat {
424 my ( $self, $marclist, $and_or, $excluding, $operator, $value,
425 $authtypecode, $orderby )
426 = @_;
428 # This turns the old-style many-options argument form into a more
429 # extensible hash form that is understood by L<build_authorities_query>.
430 my @searches;
432 # Make sure everything exists
433 foreach my $m (@$marclist) {
434 Koha::Exceptions::WrongParameter->throw("Invalid marclist field provided: $m")
435 unless exists $koha_to_index_name->{$m};
437 for ( my $i = 0 ; $i < @$value ; $i++ ) {
438 push @searches,
440 where => $koha_to_index_name->{$marclist->[$i]},
441 operator => $operator->[$i],
442 value => $value->[$i],
446 my %sort;
447 my $sort_field =
448 ( $orderby =~ /^Heading/ ) ? 'Heading'
449 : ( $orderby =~ /^Auth/ ) ? 'Local-Number'
450 : undef;
451 if ($sort_field) {
452 my $sort_order = ( $orderby =~ /Asc$/ ) ? 'asc' : 'desc';
453 %sort = ( $sort_field => $sort_order, );
455 my %search = (
456 searches => \@searches,
457 authtypecode => $authtypecode,
459 $search{sort} = \%sort if %sort;
460 my $query = $self->build_authorities_query( \%search );
461 return $query;
464 =head2 _convert_sort_fields
466 my @sort_params = _convert_sort_fields(@sort_by)
468 Converts the zebra-style sort index information into elasticsearch-style.
470 C<@sort_by> is the same as presented to L<build_query_compat>, and it returns
471 something that can be sent to L<build_query>.
473 =cut
475 sub _convert_sort_fields {
476 my ( $self, @sort_by ) = @_;
478 # Turn the sorting into something we care about.
479 my %sort_field_convert = (
480 acqdate => 'acqdate',
481 author => 'author',
482 call_number => 'callnum',
483 popularity => 'issues',
484 relevance => undef, # default
485 title => 'title',
486 pubdate => 'pubdate',
488 my %sort_order_convert =
489 ( qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) );
491 # Convert the fields and orders, drop anything we don't know about.
492 grep { $_->{field} } map {
493 my ( $f, $d ) = split /_/;
495 field => $sort_field_convert{$f},
496 direction => $sort_order_convert{$d}
498 } @sort_by;
501 =head2 _convert_index_fields
503 my @index_params = $self->_convert_index_fields(@indexes);
505 Converts zebra-style search index notation into elasticsearch-style.
507 C<@indexes> is an array of index names, as presented to L<build_query_compat>,
508 and it returns something that can be sent to L<build_query>.
510 B<TODO>: this will pull from the elasticsearch mappings table to figure out
511 types.
513 =cut
515 our %index_field_convert = (
516 'kw' => '_all',
517 'ti' => 'title',
518 'au' => 'author',
519 'su' => 'subject',
520 'nb' => 'isbn',
521 'se' => 'title-series',
522 'callnum' => 'callnum',
523 'itype' => 'itype',
524 'ln' => 'ln',
525 'branch' => 'homebranch',
526 'fic' => 'lf',
527 'mus' => 'rtype',
528 'aud' => 'ta',
529 'hi' => 'Host-Item-Number',
532 sub _convert_index_fields {
533 my ( $self, @indexes ) = @_;
535 my %index_type_convert =
536 ( __default => undef, phr => 'phrase', rtrn => 'right-truncate' );
538 # Convert according to our table, drop anything that doesn't convert.
539 # If a field starts with mc- we save it as it's used (and removed) later
540 # when joining things, to indicate we make it an 'OR' join.
541 # (Sorry, this got a bit ugly after special cases were found.)
542 grep { $_->{field} } map {
543 my ( $f, $t ) = split /,/;
544 my $mc = '';
545 if ($f =~ /^mc-/) {
546 $mc = 'mc-';
547 $f =~ s/^mc-//;
549 my $r = {
550 field => $index_field_convert{$f},
551 type => $index_type_convert{ $t // '__default' }
553 $r->{field} = ($mc . $r->{field}) if $mc && $r->{field};
555 } @indexes;
558 =head2 _convert_index_strings
560 my @searches = $self->_convert_index_strings(@searches);
562 Similar to L<_convert_index_fields>, this takes strings of the form
563 B<field:search term> and rewrites the field from zebra-style to
564 elasticsearch-style. Anything it doesn't understand is returned verbatim.
566 =cut
568 sub _convert_index_strings {
569 my ( $self, @searches ) = @_;
570 my @res;
571 foreach my $s (@searches) {
572 next if $s eq '';
573 my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/;
574 unless ( defined($field) && defined($term) ) {
575 push @res, $s;
576 next;
578 my ($conv) = $self->_convert_index_fields($field);
579 unless ( defined($conv) ) {
580 push @res, $s;
581 next;
583 push @res, $conv->{field} . ":"
584 . $self->_modify_string_by_type( %$conv, operand => $term );
586 return @res;
589 =head2 _convert_index_strings_freeform
591 my $search = $self->_convert_index_strings_freeform($search);
593 This is similar to L<_convert_index_strings>, however it'll search out the
594 things to change within the string. So it can handle strings such as
595 C<(su:foo) AND (su:bar)>, converting the C<su> appropriately.
597 If there is something of the form "su,complete-subfield" or something, the
598 second part is stripped off as we can't yet handle that. Making it work
599 will have to wait for a real query parser.
601 =cut
603 sub _convert_index_strings_freeform {
604 my ( $self, $search ) = @_;
605 while ( my ( $zeb, $es ) = each %index_field_convert ) {
606 $search =~ s/\b$zeb(?:,[\w-]*)?:/$es:/g;
608 return $search;
611 =head2 _modify_string_by_type
613 my $str = $self->_modify_string_by_type(%index_field);
615 If you have a search term (operand) and a type (phrase, right-truncated), this
616 will convert the string to have the function in lucene search terms, e.g.
617 wrapping quotes around it.
619 =cut
621 sub _modify_string_by_type {
622 my ( $self, %idx ) = @_;
624 my $type = $idx{type} || '';
625 my $str = $idx{operand};
626 return $str unless $str; # Empty or undef, we can't use it.
628 $str .= '*' if $type eq 'right-truncate';
629 $str = '"' . $str . '"' if $type eq 'phrase';
630 return $str;
633 =head2 _join_queries
635 my $query_str = $self->_join_queries(@query_parts);
637 This takes a list of query parts, that might be search terms on their own, or
638 booleaned together, or specifying fields, or whatever, wraps them in
639 parentheses, and ANDs them all together. Suitable for feeding to the ES
640 query string query.
642 Note: doesn't AND them together if they specify an index that starts with "mc"
643 as that was a special case in the original code for dealing with multiple
644 choice options (you can't search for something that has an itype of A and
645 and itype of B otherwise.)
647 =cut
649 sub _join_queries {
650 my ( $self, @parts ) = @_;
652 my @norm_parts = grep { defined($_) && $_ ne '' && $_ !~ /^mc-/ } @parts;
653 my @mc_parts =
654 map { s/^mc-//r } grep { defined($_) && $_ ne '' && $_ =~ /^mc-/ } @parts;
655 return () unless @norm_parts + @mc_parts;
656 return ( @norm_parts, @mc_parts )[0] if @norm_parts + @mc_parts == 1;
657 my $grouped_mc =
658 @mc_parts ? '(' . ( join ' OR ', map { "($_)" } @mc_parts ) . ')' : ();
660 # Handy trick: $x || () inside a join means that if $x ends up as an
661 # empty string, it gets replaced with (), which makes join ignore it.
662 # (bad effect: this'll also happen to '0', this hopefully doesn't matter
663 # in this case.)
664 join( ' AND ',
665 join( ' AND ', map { "($_)" } @norm_parts ) || (),
666 $grouped_mc || () );
669 =head2 _make_phrases
671 my @phrased_queries = $self->_make_phrases(@query_parts);
673 This takes the supplied queries and forces them to be phrases by wrapping
674 quotes around them. It understands field prefixes, e.g. 'subject:' and puts
675 the quotes outside of them if they're there.
677 =cut
679 sub _make_phrases {
680 my ( $self, @parts ) = @_;
681 map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts;
684 =head2 _create_query_string
686 my @query_strings = $self->_create_query_string(@queries);
688 Given a list of hashrefs, it will turn them into a lucene-style query string.
689 The hash should contain field, type (both for the indexes), operator, and
690 operand.
692 =cut
694 sub _create_query_string {
695 my ( $self, @queries ) = @_;
697 map {
698 my $otor = $_->{operator} ? $_->{operator} . ' ' : '';
699 my $field = $_->{field} ? $_->{field} . ':' : '';
701 my $oand = $self->_modify_string_by_type(%$_);
702 "$otor($field$oand)";
703 } @queries;
706 =head2 _clean_search_term
708 my $term = $self->_clean_search_term($term);
710 This cleans a search term by removing any funny characters that may upset
711 ES and give us an error. It also calls L<_convert_index_strings_freeform>
712 to ensure those parts are correct.
714 =cut
716 sub _clean_search_term {
717 my ( $self, $term ) = @_;
719 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
721 # Some hardcoded searches (like with authorities) produce things like
722 # 'an=123', when it ought to be 'an:123' for our purposes.
723 $term =~ s/=/:/g;
724 $term = $self->_convert_index_strings_freeform($term);
725 $term =~ s/[{}]/"/g;
726 $term = $self->_truncate_terms($term) if ($auto_truncation);
727 return $term;
730 =head2 _fix_limit_special_cases
732 my $limits = $self->_fix_limit_special_cases($limits);
734 This converts any special cases that the limit specifications have into things
735 that are more readily processable by the rest of the code.
737 The argument should be an arrayref, and it'll return an arrayref.
739 =cut
741 sub _fix_limit_special_cases {
742 my ( $self, $limits ) = @_;
744 my @new_lim;
745 foreach my $l (@$limits) {
747 # This is set up by opac-search.pl
748 if ( $l =~ /^yr,st-numeric,ge=/ ) {
749 my ( $start, $end ) =
750 ( $l =~ /^yr,st-numeric,ge=(.*) and yr,st-numeric,le=(.*)$/ );
751 next unless defined($start) && defined($end);
752 push @new_lim, "copydate:[$start TO $end]";
754 elsif ( $l =~ /^yr,st-numeric=/ ) {
755 my ($date) = ( $l =~ /^yr,st-numeric=(.*)$/ );
756 next unless defined($date);
757 push @new_lim, "copydate:$date";
759 elsif ( $l =~ /^available$/ ) {
760 push @new_lim, 'onloan:0';
762 else {
763 push @new_lim, $l;
766 return \@new_lim;
769 =head2 _sort_field
771 my $field = $self->_sort_field($field);
773 Given a field name, this works out what the actual name of the version to sort
774 on should be. Often it's the same, sometimes it involves sticking "__sort" on
775 the end. Maybe it'll be something else in the future, who knows?
777 =cut
779 sub _sort_field {
780 my ($self, $f) = @_;
781 if ($self->sort_fields()->{$f}) {
782 $f .= '__sort';
784 return $f;
787 =head2 _truncate_terms
789 my $query = $self->_truncate_terms($query);
791 Given a string query this function appends '*' wildcard to all terms except
792 operands and double quoted strings.
794 =cut
796 sub _truncate_terms {
797 my ( $self, $query ) = @_;
799 # '"donald duck" title:"the mouse" and peter" get split into
800 # ['', '"donald duck"', '', ' ', '', 'title:"the mouse"', '', ' ', 'and', ' ', 'pete']
801 my @tokens = split /((?:[\w-]+:)?"[^"]+"|\s+)/, $query;
803 # Filter out empty tokens
804 my @words = grep { $_ !~ /^\s*$/ } @tokens;
806 # Append '*' to words if needed, ie. if it's not surrounded by quotes, not
807 # terminated by '*' and not a keyword
808 my @terms = map {
809 my $w = $_;
810 (/"$/ or /\*$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*";
811 } @words;
813 return join ' ', @terms;