C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  17
  18 use Modern::Perl;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  22 use C4::Koha;      # getFacets
  23 use Koha::DateUtils;
  24 use Koha::Libraries;
  25 use Lingua::Stem;
  26 use C4::Search::PazPar2;
  27 use XML::Simple;
  28 use C4::XSLT;
  29 use C4::Reserves;    # GetReserveStatus
  30 use C4::Debug;
  31 use C4::Charset;
  32 use Koha::AuthorisedValues;
  33 use Koha::ItemTypes;
  34 use Koha::Libraries;
  35 use Koha::Patrons;
  36 use Koha::RecordProcessor;
  37 use YAML;
  38 use URI::Escape;
  39 use Business::ISBN;
  40 use MARC::Record;
  41 use MARC::Field;
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  43
  44 BEGIN {
  45     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  46 }
  47
  48 =head1 NAME
  49
  50 C4::Search - Functions for searching the Koha catalog.
  51
  52 =head1 SYNOPSIS
  53
  54 See opac/opac-search.pl or catalogue/search.pl for example of usage
  55
  56 =head1 DESCRIPTION
  57
  58 This module provides searching functions for Koha's bibliographic databases
  59
  60 =head1 FUNCTIONS
  61
  62 =cut
  63
  64 @ISA    = qw(Exporter);
  65 @EXPORT = qw(
  66   &FindDuplicate
  67   &SimpleSearch
  68   &searchResults
  69   &getRecords
  70   &buildQuery
  71   &GetDistinctValues
  72   &enabled_staff_search_views
  73 );
  74
  75 # make all your functions, whether exported or not;
  76
  77 =head2 FindDuplicate
  78
  79 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  80
  81 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  82
  83 =cut
  84
  85 sub FindDuplicate {
  86     my ($record) = @_;
  87     my $dbh = C4::Context->dbh;
  88     my $result = TransformMarcToKoha( $record, '' );
  89     my $sth;
  90     my $query;
  91     my $search;
  92     my $type;
  93     my ( $biblionumber, $title );
  94
  95     # search duplicate on ISBN, easy and fast..
  96     # ... normalize first
  97     if ( $result->{isbn} ) {
  98         $result->{isbn} =~ s/\(.*$//;
  99         $result->{isbn} =~ s/\s+$//;
 100         $query = "isbn:$result->{isbn}";
 101     }
 102     else {
 103
 104         my $titleindex = 'ti,ext';
 105         my $authorindex = 'au,ext';
 106         my $op = 'and';
 107
 108         $result->{title} =~ s /\\//g;
 109         $result->{title} =~ s /\"//g;
 110         $result->{title} =~ s /\(//g;
 111         $result->{title} =~ s /\)//g;
 112
 113         # FIXME: instead of removing operators, could just do
 114         # quotes around the value
 115         $result->{title} =~ s/(and|or|not)//g;
 116         $query = "$titleindex:\"$result->{title}\"";
 117         if   ( $result->{author} ) {
 118             $result->{author} =~ s /\\//g;
 119             $result->{author} =~ s /\"//g;
 120             $result->{author} =~ s /\(//g;
 121             $result->{author} =~ s /\)//g;
 122
 123             # remove valid operators
 124             $result->{author} =~ s/(and|or|not)//g;
 125             $query .= " $op $authorindex:\"$result->{author}\"";
 126         }
 127     }
 128
 129     my ( $error, $searchresults, undef ) = SimpleSearch($query); # FIXME :: hardcoded !
 130     my @results;
 131     if (!defined $error) {
 132         foreach my $possible_duplicate_record (@{$searchresults}) {
 133             my $marcrecord = new_record_from_zebra(
 134                 'biblioserver',
 135                 $possible_duplicate_record
 136             );
 137
 138             my $result = TransformMarcToKoha( $marcrecord, '' );
 139
 140             # FIXME :: why 2 $biblionumber ?
 141             if ($result) {
 142                 push @results, $result->{'biblionumber'};
 143                 push @results, $result->{'title'};
 144             }
 145         }
 146     }
 147     return @results;
 148 }
 149
 150 =head2 SimpleSearch
 151
 152 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers], [%options] );
 153
 154 This function provides a simple search API on the bibliographic catalog
 155
 156 =over 2
 157
 158 =item C<input arg:>
 159
 160     * $query can be a simple keyword or a complete CCL query
 161     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 162     * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0
 163     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 164     * %options is optional. (e.g. "skip_normalize" allows you to skip changing : to = )
 165
 166
 167 =item C<Return:>
 168
 169     Returns an array consisting of three elements
 170     * $error is undefined unless an error is detected
 171     * $results is a reference to an array of records.
 172     * $total_hits is the number of hits that would have been returned with no limit
 173
 174     If an error is returned the two other return elements are undefined. If error itself is undefined
 175     the other two elements are always defined
 176
 177 =item C<usage in the script:>
 178
 179 =back
 180
 181 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 182
 183 if (defined $error) {
 184     $template->param(query_error => $error);
 185     warn "error: ".$error;
 186     output_html_with_http_headers $input, $cookie, $template->output;
 187     exit;
 188 }
 189
 190 my $hits = @{$marcresults};
 191 my @results;
 192
 193 for my $r ( @{$marcresults} ) {
 194     my $marcrecord = MARC::File::USMARC::decode($r);
 195     my $biblio = TransformMarcToKoha($marcrecord,q{});
 196
 197     #build the iarray of hashs for the template.
 198     push @results, {
 199         title           => $biblio->{'title'},
 200         subtitle        => $biblio->{'subtitle'},
 201         biblionumber    => $biblio->{'biblionumber'},
 202         author          => $biblio->{'author'},
 203         publishercode   => $biblio->{'publishercode'},
 204         publicationyear => $biblio->{'publicationyear'},
 205         };
 206
 207 }
 208
 209 $template->param(result=>\@results);
 210
 211 =cut
 212
 213 sub SimpleSearch {
 214     my ( $query, $offset, $max_results, $servers, %options )  = @_;
 215
 216     return ( 'No query entered', undef, undef ) unless $query;
 217     # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 218     my @servers = defined ( $servers ) ? @$servers : ( 'biblioserver' );
 219     my @zoom_queries;
 220     my @tmpresults;
 221     my @zconns;
 222     my $results = [];
 223     my $total_hits = 0;
 224
 225     # Initialize & Search Zebra
 226     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 227         eval {
 228             $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 229             $query =~ s/:/=/g unless $options{skip_normalize};
 230             $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 231             $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 232
 233             # error handling
 234             my $error =
 235                 $zconns[$i]->errmsg() . " ("
 236               . $zconns[$i]->errcode() . ") "
 237               . $zconns[$i]->addinfo() . " "
 238               . $zconns[$i]->diagset();
 239
 240             return ( $error, undef, undef ) if $zconns[$i]->errcode();
 241         };
 242         if ($@) {
 243
 244             # caught a ZOOM::Exception
 245             my $error =
 246                 $@->message() . " ("
 247               . $@->code() . ") "
 248               . $@->addinfo() . " "
 249               . $@->diagset();
 250             warn $error." for query: $query";
 251             return ( $error, undef, undef );
 252         }
 253     }
 254
 255     _ZOOM_event_loop(
 256         \@zconns,
 257         \@tmpresults,
 258         sub {
 259             my ($i, $size) = @_;
 260             my $first_record = defined($offset) ? $offset + 1 : 1;
 261             my $hits = $tmpresults[ $i - 1 ]->size();
 262             $total_hits += $hits;
 263             my $last_record = $hits;
 264             if ( defined $max_results && $offset + $max_results < $hits ) {
 265                 $last_record = $offset + $max_results;
 266             }
 267
 268             for my $j ( $first_record .. $last_record ) {
 269                 my $record = eval {
 270                   $tmpresults[ $i - 1 ]->record( $j - 1 )->raw()
 271                   ;    # 0 indexed
 272                 };
 273                 push @{$results}, $record if defined $record;
 274             }
 275         }
 276     );
 277
 278     foreach my $zoom_query (@zoom_queries) {
 279         $zoom_query->destroy();
 280     }
 281
 282     return ( undef, $results, $total_hits );
 283 }
 284
 285 =head2 getRecords
 286
 287 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 288
 289         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 290         $results_per_page, $offset,       $branches,       $itemtypes,
 291         $query_type,       $scan,         $opac
 292     );
 293
 294 The all singing, all dancing, multi-server, asynchronous, scanning,
 295 searching, record nabbing, facet-building
 296
 297 See verbose embedded documentation.
 298
 299 =cut
 300
 301 sub getRecords {
 302     my (
 303         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 304         $results_per_page, $offset,       $branches,         $itemtypes,
 305         $query_type,       $scan,         $opac
 306     ) = @_;
 307
 308     my @servers = @$servers_ref;
 309     my @sort_by = @$sort_by_ref;
 310     $offset = 0 if $offset < 0;
 311
 312     # Initialize variables for the ZOOM connection and results object
 313     my $zconn;
 314     my @zconns;
 315     my @results;
 316     my $results_hashref = ();
 317
 318     # TODO simplify this structure ( { branchcode => $branchname } is enought) and remove this parameter
 319     $branches ||= { map { $_->branchcode => { branchname => $_->branchname } } Koha::Libraries->search };
 320
 321     # Initialize variables for the faceted results objects
 322     my $facets_counter = {};
 323     my $facets_info    = {};
 324     my $facets         = getFacets();
 325
 326     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 327
 328     ### LOOP THROUGH THE SERVERS
 329     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 330         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 331
 332 # perform the search, create the results objects
 333 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 334         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 335
 336         #$query_to_use = $simple_query if $scan;
 337         warn $simple_query if ( $scan and $DEBUG );
 338
 339         # Check if we've got a query_type defined, if so, use it
 340         eval {
 341             if ($query_type) {
 342                 if ($query_type =~ /^ccl/) {
 343                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 344                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 345                 } elsif ($query_type =~ /^cql/) {
 346                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
 347                 } elsif ($query_type =~ /^pqf/) {
 348                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
 349                 } else {
 350                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 351                 }
 352             } elsif ($scan) {
 353                     $results[$i] = $zconns[$i]->scan(  new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 354             } else {
 355                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 356             }
 357         };
 358         if ($@) {
 359             warn "WARNING: query problem with $query_to_use " . $@;
 360         }
 361
 362         # Concatenate the sort_by limits and pass them to the results object
 363         # Note: sort will override rank
 364         my $sort_by;
 365         foreach my $sort (@sort_by) {
 366             if ( $sort eq "author_az" || $sort eq "author_asc" ) {
 367                 $sort_by .= "1=1003 <i ";
 368             }
 369             elsif ( $sort eq "author_za" || $sort eq "author_dsc" ) {
 370                 $sort_by .= "1=1003 >i ";
 371             }
 372             elsif ( $sort eq "popularity_asc" ) {
 373                 $sort_by .= "1=9003 <i ";
 374             }
 375             elsif ( $sort eq "popularity_dsc" ) {
 376                 $sort_by .= "1=9003 >i ";
 377             }
 378             elsif ( $sort eq "call_number_asc" ) {
 379                 $sort_by .= "1=8007  <i ";
 380             }
 381             elsif ( $sort eq "call_number_dsc" ) {
 382                 $sort_by .= "1=8007 >i ";
 383             }
 384             elsif ( $sort eq "pubdate_asc" ) {
 385                 $sort_by .= "1=31 <i ";
 386             }
 387             elsif ( $sort eq "pubdate_dsc" ) {
 388                 $sort_by .= "1=31 >i ";
 389             }
 390             elsif ( $sort eq "acqdate_asc" ) {
 391                 $sort_by .= "1=32 <i ";
 392             }
 393             elsif ( $sort eq "acqdate_dsc" ) {
 394                 $sort_by .= "1=32 >i ";
 395             }
 396             elsif ( $sort eq "title_az" || $sort eq "title_asc" ) {
 397                 $sort_by .= "1=4 <i ";
 398             }
 399             elsif ( $sort eq "title_za" || $sort eq "title_dsc" ) {
 400                 $sort_by .= "1=4 >i ";
 401             }
 402             else {
 403                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 404             }
 405         }
 406         if ( $sort_by && !$scan && $results[$i] ) {
 407             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 408                 warn "WARNING sort $sort_by failed";
 409             }
 410         }
 411     }    # finished looping through servers
 412
 413     # The big moment: asynchronously retrieve results from all servers
 414         _ZOOM_event_loop(
 415             \@zconns,
 416             \@results,
 417             sub {
 418                 my ( $i, $size ) = @_;
 419                 my $results_hash;
 420
 421                 # loop through the results
 422                 $results_hash->{'hits'} = $size;
 423                 my $times;
 424                 if ( $offset + $results_per_page <= $size ) {
 425                     $times = $offset + $results_per_page;
 426                 }
 427                 else {
 428                     $times = $size;
 429                 }
 430
 431                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 432                     my $records_hash;
 433                     my $record;
 434
 435                     ## Check if it's an index scan
 436                     if ($scan) {
 437                         my ( $term, $occ ) = $results[ $i - 1 ]->display_term($j);
 438
 439                  # here we create a minimal MARC record and hand it off to the
 440                  # template just like a normal result ... perhaps not ideal, but
 441                  # it works for now
 442                         my $tmprecord = MARC::Record->new();
 443                         $tmprecord->encoding('UTF-8');
 444                         my $tmptitle;
 445                         my $tmpauthor;
 446
 447                 # the minimal record in author/title (depending on MARC flavour)
 448                         if ( C4::Context->preference("marcflavour") eq
 449                             "UNIMARC" )
 450                         {
 451                             $tmptitle = MARC::Field->new(
 452                                 '200', ' ', ' ',
 453                                 a => $term,
 454                                 f => $occ
 455                             );
 456                             $tmprecord->append_fields($tmptitle);
 457                         }
 458                         else {
 459                             $tmptitle =
 460                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 461                             $tmpauthor =
 462                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 463                             $tmprecord->append_fields($tmptitle);
 464                             $tmprecord->append_fields($tmpauthor);
 465                         }
 466                         $results_hash->{'RECORDS'}[$j] =
 467                           $tmprecord->as_usmarc();
 468                     }
 469
 470                     # not an index scan
 471                     else {
 472                         $record = $results[ $i - 1 ]->record($j)->raw();
 473                         # warn "RECORD $j:".$record;
 474                         $results_hash->{'RECORDS'}[$j] = $record;
 475                     }
 476
 477                 }
 478                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 479
 480                 # Fill the facets while we're looping, but only for the
 481                 # biblioserver and not for a scan
 482                 if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) {
 483                     $facets_counter = GetFacets( $results[ $i - 1 ] );
 484                     $facets_info    = _get_facets_info( $facets );
 485                 }
 486
 487                 # BUILD FACETS
 488                 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 489                     for my $link_value (
 490                         sort { $a cmp $b } keys %$facets_counter
 491                       )
 492                     {
 493                         my @this_facets_array;
 494                         for my $one_facet (
 495                             sort {
 496                                 $facets_counter->{$link_value}
 497                                   ->{$b} <=> $facets_counter->{$link_value}
 498                                   ->{$a}
 499                             } keys %{ $facets_counter->{$link_value} }
 500                           )
 501                         {
 502 # Sanitize the link value : parenthesis, question and exclamation mark will cause errors with CCL
 503                             my $facet_link_value = $one_facet;
 504                             $facet_link_value =~ s/[()!?¡¿؟]/ /g;
 505
 506                             # fix the length that will display in the label,
 507                             my $facet_label_value = $one_facet;
 508                             my $facet_max_length  = C4::Context->preference(
 509                                 'FacetLabelTruncationLength')
 510                               || 20;
 511                             $facet_label_value =
 512                               substr( $one_facet, 0, $facet_max_length )
 513                               . "..."
 514                               if length($facet_label_value) >
 515                                   $facet_max_length;
 516
 517                         # if it's a branch, label by the name, not the code,
 518                             if ( $link_value =~ /branch/ ) {
 519                                 if (   defined $branches
 520                                     && ref($branches) eq "HASH"
 521                                     && defined $branches->{$one_facet}
 522                                     && ref( $branches->{$one_facet} ) eq
 523                                     "HASH" )
 524                                 {
 525                                     $facet_label_value =
 526                                       $branches->{$one_facet}
 527                                       ->{'branchname'};
 528                                 }
 529                                 else {
 530                                     $facet_label_value = "*";
 531                                 }
 532                             }
 533
 534                       # if it's a itemtype, label by the name, not the code,
 535                             if ( $link_value =~ /itype/ ) {
 536                                 if (   defined $itemtypes
 537                                     && ref($itemtypes) eq "HASH"
 538                                     && defined $itemtypes->{$one_facet}
 539                                     && ref( $itemtypes->{$one_facet} ) eq
 540                                     "HASH" )
 541                                 {
 542                                     $facet_label_value =
 543                                       $itemtypes->{$one_facet}
 544                                       ->{translated_description};
 545                                 }
 546                             }
 547
 548            # also, if it's a location code, use the name instead of the code
 549                             if ( $link_value =~ /location/ ) {
 550                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 551                                 my $av = Koha::AuthorisedValues->search({ category => 'LOC', authorised_value => $one_facet });
 552                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 553                             }
 554
 555                             # also, if it's a collection code, use the name instead of the code
 556                             if ( $link_value =~ /ccode/ ) {
 557                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 558                                 my $av = Koha::AuthorisedValues->search({ category => 'CCODE', authorised_value => $one_facet });
 559                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 560                             }
 561
 562             # but we're down with the whole label being in the link's title.
 563                             push @this_facets_array,
 564                               {
 565                                 facet_count =>
 566                                   $facets_counter->{$link_value}
 567                                   ->{$one_facet},
 568                                 facet_label_value => $facet_label_value,
 569                                 facet_title_value => $one_facet,
 570                                 facet_link_value  => $facet_link_value,
 571                                 type_link_value   => $link_value,
 572                               }
 573                               if ($facet_label_value);
 574                         }
 575
 576                         push @facets_loop,
 577                           {
 578                             type_link_value => $link_value,
 579                             type_id         => $link_value . "_id",
 580                             "type_label_"
 581                               . $facets_info->{$link_value}->{'label_value'} =>
 582                               1,
 583                             facets     => \@this_facets_array,
 584                           }
 585                           unless (
 586                             (
 587                                 $facets_info->{$link_value}->{'label_value'} =~
 588                                 /Libraries/
 589                             )
 590                             and ( Koha::Libraries->search->count == 1 )
 591                           );
 592                     }
 593                 }
 594             }
 595         );
 596
 597     # This sorts the facets into alphabetical order
 598     if (@facets_loop) {
 599         foreach my $f (@facets_loop) {
 600             $f->{facets} = [ sort { uc($a->{facet_label_value}) cmp uc($b->{facet_label_value}) } @{ $f->{facets} } ];
 601         }
 602     }
 603
 604     return ( undef, $results_hashref, \@facets_loop );
 605 }
 606
 607 sub GetFacets {
 608
 609     my $rs = shift;
 610     my $facets;
 611
 612     my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0;
 613
 614     if ( $use_zebra_facets ) {
 615         $facets = _get_facets_from_zebra( $rs );
 616     } else {
 617         $facets = _get_facets_from_records( $rs );
 618     }
 619
 620     return $facets;
 621 }
 622
 623 sub _get_facets_from_records {
 624
 625     my $rs = shift;
 626
 627     my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20;
 628     my $facets_config  = getFacets();
 629     my $facets         = {};
 630     my $size           = $rs->size();
 631     my $jmax           = $size > $facets_maxrecs
 632                             ? $facets_maxrecs
 633                             : $size;
 634
 635     for ( my $j = 0 ; $j < $jmax ; $j++ ) {
 636
 637         my $marc_record = new_record_from_zebra (
 638                 'biblioserver',
 639                 $rs->record( $j )->raw()
 640         );
 641
 642         if ( ! defined $marc_record ) {
 643             warn "ERROR DECODING RECORD - $@: " .
 644                 $rs->record( $j )->raw();
 645             next;
 646         }
 647
 648         _get_facets_data_from_record( $marc_record, $facets_config, $facets );
 649     }
 650
 651     return $facets;
 652 }
 653
 654 =head2 _get_facets_data_from_record
 655
 656     C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter );
 657
 658 Internal function that extracts facets information from a MARC::Record object
 659 and populates $facets_counter for using in getRecords.
 660
 661 $facets is expected to be filled with C4::Koha::getFacets output (i.e. the configured
 662 facets for Zebra).
 663
 664 =cut
 665
 666 sub _get_facets_data_from_record {
 667
 668     my ( $marc_record, $facets, $facets_counter ) = @_;
 669
 670     for my $facet (@$facets) {
 671
 672         my @used_datas = ();
 673
 674         foreach my $tag ( @{ $facet->{ tags } } ) {
 675
 676             # tag number is the first three digits
 677             my $tag_num          = substr( $tag, 0, 3 );
 678             # subfields are the remainder
 679             my $subfield_letters = substr( $tag, 3 );
 680
 681             my @fields = $marc_record->field( $tag_num );
 682             foreach my $field (@fields) {
 683                 # If $field->indicator(1) eq 'z', it means it is a 'see from'
 684                 # field introduced because of IncludeSeeFromInSearches, so skip it
 685                 next if $field->indicator(1) eq 'z';
 686
 687                 my $data = $field->as_string( $subfield_letters, $facet->{ sep } );
 688
 689                 unless ( grep { $_ eq $data } @used_datas ) {
 690                     push @used_datas, $data;
 691                     $facets_counter->{ $facet->{ idx } }->{ $data }++;
 692                 }
 693             }
 694         }
 695     }
 696 }
 697
 698 =head2 _get_facets_from_zebra
 699
 700     my $facets = _get_facets_from_zebra( $result_set )
 701
 702 Retrieves facets for a specified result set. It loops through the facets defined
 703 in C4::Koha::getFacets and returns a hash with the following structure:
 704
 705    {  facet_idx => {
 706             facet_value => count
 707       },
 708       ...
 709    }
 710
 711 =cut
 712
 713 sub _get_facets_from_zebra {
 714
 715     my $rs = shift;
 716
 717     # save current elementSetName
 718     my $elementSetName = $rs->option( 'elementSetName' );
 719
 720     my $facets_loop = getFacets();
 721     my $facets_data  = {};
 722     # loop through defined facets and fill the facets hashref
 723     foreach my $facet ( @$facets_loop ) {
 724
 725         my $idx = $facet->{ idx };
 726         my $sep = $facet->{ sep };
 727         my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep );
 728         if ( $facet_values ) {
 729             # we've actually got a result
 730             $facets_data->{ $idx } = $facet_values;
 731         }
 732     }
 733     # set elementSetName to its previous value to avoid side effects
 734     $rs->option( elementSetName => $elementSetName );
 735
 736     return $facets_data;
 737 }
 738
 739 =head2 _get_facet_from_result_set
 740
 741     my $facet_values =
 742         C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep )
 743
 744 Internal function that extracts facet information for a specific index ($facet_idx) and
 745 returns a hash containing facet values and count:
 746
 747     {
 748         $facet_value => $count ,
 749         ...
 750     }
 751
 752 Warning: this function has the side effect of changing the elementSetName for the result
 753 set. It is a helper function for the main loop, which takes care of backing it up for
 754 restoring.
 755
 756 =cut
 757
 758 sub _get_facet_from_result_set {
 759
 760     my $facet_idx = shift;
 761     my $rs        = shift;
 762     my $sep       = shift;
 763
 764     my $internal_sep  = '<*>';
 765     my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20;
 766
 767     return if ( ! defined $facet_idx || ! defined $rs );
 768     # zebra's facet element, untokenized index
 769     my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount;
 770     # configure zebra results for retrieving the desired facet
 771     $rs->option( elementSetName => $facet_element );
 772     # get the facet record from result set
 773     my $facet = $rs->record( 0 )->raw;
 774     # if the facet has no restuls...
 775     return if !defined $facet;
 776     # TODO: benchmark DOM vs. SAX performance
 777     my $facet_dom = XML::LibXML->load_xml(
 778       string => ($facet)
 779     );
 780     my @terms = $facet_dom->getElementsByTagName('term');
 781     return if ! @terms;
 782
 783     my $facets = {};
 784     foreach my $term ( @terms ) {
 785         my $facet_value = $term->textContent;
 786         $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
 787         $facets->{ $facet_value } = $term->getAttribute( 'occur' );
 788     }
 789
 790     return $facets;
 791 }
 792
 793 =head2 _get_facets_info
 794
 795     my $facets_info = C4::Search::_get_facets_info( $facets )
 796
 797 Internal function that extracts facets information and properly builds
 798 the data structure needed to render facet labels.
 799
 800 =cut
 801
 802 sub _get_facets_info {
 803
 804     my $facets = shift;
 805
 806     my $facets_info = {};
 807
 808     for my $facet ( @$facets ) {
 809         $facets_info->{ $facet->{ idx } }->{ label_value } = $facet->{ label };
 810     }
 811
 812     return $facets_info;
 813 }
 814
 815 sub pazGetRecords {
 816     my (
 817         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 818         $results_per_page, $offset,       $branches,       $query_type,
 819         $scan
 820     ) = @_;
 821     #NOTE: Parameter $branches is not used here !
 822
 823     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 824     $paz->init();
 825     $paz->search($simple_query);
 826     sleep 1;   # FIXME: WHY?
 827
 828     # do results
 829     my $results_hashref = {};
 830     my $stats = XMLin($paz->stat);
 831     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 832
 833     # for a grouped search result, the number of hits
 834     # is the number of groups returned; 'bib_hits' will have
 835     # the total number of bibs.
 836     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 837     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 838
 839     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 840         my $recid = $hit->{recid}->[0];
 841
 842         my $work_title = $hit->{'md-work-title'}->[0];
 843         my $work_author;
 844         if (exists $hit->{'md-work-author'}) {
 845             $work_author = $hit->{'md-work-author'}->[0];
 846         }
 847         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 848
 849         my $result_group = {};
 850         $result_group->{'group_label'} = $group_label;
 851         $result_group->{'group_merge_key'} = $recid;
 852
 853         my $count = 1;
 854         if (exists $hit->{count}) {
 855             $count = $hit->{count}->[0];
 856         }
 857         $result_group->{'group_count'} = $count;
 858
 859         for (my $i = 0; $i < $count; $i++) {
 860             # FIXME -- may need to worry about diacritics here
 861             my $rec = $paz->record($recid, $i);
 862             push @{ $result_group->{'RECORDS'} }, $rec;
 863         }
 864
 865         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 866     }
 867
 868     # pass through facets
 869     my $termlist_xml = $paz->termlist('author,subject');
 870     my $terms = XMLin($termlist_xml, forcearray => 1);
 871     my @facets_loop = ();
 872     #die Dumper($results);
 873 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 874 #        my @facets = ();
 875 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 876 #            push @facets, {
 877 #                facet_label_value => $facet->{'name'}->[0],
 878 #            };
 879 #        }
 880 #        push @facets_loop, ( {
 881 #            type_label => $list,
 882 #            facets => \@facets,
 883 #        } );
 884 #    }
 885
 886     return ( undef, $results_hashref, \@facets_loop );
 887 }
 888
 889 # TRUNCATION
 890 sub _detect_truncation {
 891     my ( $operand, $index ) = @_;
 892     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 893         @regexpr );
 894     $operand =~ s/^ //g;
 895     my @wordlist = split( /\s/, $operand );
 896     foreach my $word (@wordlist) {
 897         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 898             push @rightlefttruncated, $word;
 899         }
 900         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 901             push @lefttruncated, $word;
 902         }
 903         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 904             push @righttruncated, $word;
 905         }
 906         elsif ( index( $word, "*" ) < 0 ) {
 907             push @nontruncated, $word;
 908         }
 909         else {
 910             push @regexpr, $word;
 911         }
 912     }
 913     return (
 914         \@nontruncated,       \@righttruncated, \@lefttruncated,
 915         \@rightlefttruncated, \@regexpr
 916     );
 917 }
 918
 919 # STEMMING
 920 sub _build_stemmed_operand {
 921     my ($operand,$lang) = @_;
 922     require Lingua::Stem::Snowball ;
 923     my $stemmed_operand=q{};
 924
 925     # Stemmer needs language
 926     return $operand unless $lang;
 927
 928     # If operand contains a digit, it is almost certainly an identifier, and should
 929     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 930     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 931     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 932     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 933     return $operand if $operand =~ /\d/;
 934
 935 # FIXME: the locale should be set based on the user's language and/or search choice
 936     #warn "$lang";
 937     # Make sure we only use the first two letters from the language code
 938     $lang = lc(substr($lang, 0, 2));
 939     # The language codes for the two variants of Norwegian will now be "nb" and "nn",
 940     # none of which Lingua::Stem::Snowball can use, so we need to "translate" them
 941     if ($lang eq 'nb' || $lang eq 'nn') {
 942       $lang = 'no';
 943     }
 944     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
 945                                                encoding => "UTF-8" );
 946
 947     my @words = split( / /, $operand );
 948     my @stems = $stemmer->stem(\@words);
 949     for my $stem (@stems) {
 950         $stemmed_operand .= "$stem";
 951         $stemmed_operand .= "?"
 952           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 953         $stemmed_operand .= " ";
 954     }
 955     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 956     return $stemmed_operand;
 957 }
 958
 959 # FIELD WEIGHTING
 960 sub _build_weighted_query {
 961
 962 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 963 # pretty well but could work much better if we had a smarter query parser
 964     my ( $operand, $stemmed_operand, $index ) = @_;
 965     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 966     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 967     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 968     $operand =~ s/"/ /g;    # Bug 7518: searches with quotation marks don't work
 969
 970     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 971
 972     # Keyword, or, no index specified
 973     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 974         $weighted_query .=
 975           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 976         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 977         $weighted_query .= " or Title-cover,phr,r3=\"$operand\"";    # phrase title
 978         $weighted_query .= " or ti,wrdl,r4=\"$operand\"";    # words in title
 979           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 980           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 981         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 982           if $fuzzy_enabled;    # add fuzzy, word list
 983         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 984           if ( $stemming and $stemmed_operand )
 985           ;                     # add stemming, right truncation
 986         $weighted_query .= " or wrdl,r9=\"$operand\"";
 987
 988         # embedded sorting: 0 a-z; 1 z-a
 989         # $weighted_query .= ") or (sort1,aut=1";
 990     }
 991
 992     # Barcode searches should skip this process
 993     elsif ( $index eq 'bc' ) {
 994         $weighted_query .= "bc=\"$operand\"";
 995     }
 996
 997     # Authority-number searches should skip this process
 998     elsif ( $index eq 'an' ) {
 999         $weighted_query .= "an=\"$operand\"";
1000     }
1001
1002     # If the index is numeric, don't autoquote it.
1003     elsif ( $index =~ /,st-numeric$/ ) {
1004         $weighted_query .= " $index=$operand";
1005     }
1006
1007     # If the index already has more than one qualifier, wrap the operand
1008     # in quotes and pass it back (assumption is that the user knows what they
1009     # are doing and won't appreciate us mucking up their query
1010     elsif ( $index =~ ',' ) {
1011         $weighted_query .= " $index=\"$operand\"";
1012     }
1013
1014     #TODO: build better cases based on specific search indexes
1015     else {
1016         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
1017           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
1018         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
1019         $weighted_query .= " or $index,wrdl,r6=\"$operand\"";    # word list index
1020         $weighted_query .= " or $index,wrdl,fuzzy,r8=\"$operand\""
1021           if $fuzzy_enabled;    # add fuzzy, word list
1022         $weighted_query .= " or $index,wrdl,rt,r9=\"$stemmed_operand\""
1023           if ( $stemming and $stemmed_operand );    # add stemming, right truncation
1024     }
1025
1026     $weighted_query .= "))";                       # close rank specification
1027     return $weighted_query;
1028 }
1029
1030 =head2 getIndexes
1031
1032 Return an array with available indexes.
1033
1034 =cut
1035
1036 sub getIndexes{
1037     my @indexes = (
1038                     # biblio indexes
1039                     'ab',
1040                     'Abstract',
1041                     'acqdate',
1042                     'allrecords',
1043                     'an',
1044                     'Any',
1045                     'at',
1046                     'arl',
1047                     'arp',
1048                     'au',
1049                     'aub',
1050                     'aud',
1051                     'audience',
1052                     'auo',
1053                     'aut',
1054                     'Author',
1055                     'Author-in-order ',
1056                     'Author-personal-bibliography',
1057                     'Authority-Number',
1058                     'authtype',
1059                     'bc',
1060                     'Bib-level',
1061                     'biblionumber',
1062                     'bio',
1063                     'biography',
1064                     'callnum',
1065                     'cfn',
1066                     'Chronological-subdivision',
1067                     'cn-bib-source',
1068                     'cn-bib-sort',
1069                     'cn-class',
1070                     'cn-item',
1071                     'cn-prefix',
1072                     'cn-suffix',
1073                     'cpn',
1074                     'Code-institution',
1075                     'Conference-name',
1076                     'Conference-name-heading',
1077                     'Conference-name-see',
1078                     'Conference-name-seealso',
1079                     'Content-type',
1080                     'Control-number',
1081                     'copydate',
1082                     'Corporate-name',
1083                     'Corporate-name-heading',
1084                     'Corporate-name-see',
1085                     'Corporate-name-seealso',
1086                     'Country-publication',
1087                     'ctype',
1088                     'curriculum',
1089                     'date-entered-on-file',
1090                     'Date-of-acquisition',
1091                     'Date-of-publication',
1092                     'Date-time-last-modified',
1093                     'Dewey-classification',
1094                     'Dissertation-information',
1095                     'diss',
1096                     'dtlm',
1097                     'EAN',
1098                     'extent',
1099                     'fic',
1100                     'fiction',
1101                     'Form-subdivision',
1102                     'format',
1103                     'Geographic-subdivision',
1104                     'he',
1105                     'Heading',
1106                     'Heading-use-main-or-added-entry',
1107                     'Heading-use-series-added-entry ',
1108                     'Heading-use-subject-added-entry',
1109                     'Host-item',
1110                     'id-other',
1111                     'ident',
1112                     'Identifier-standard',
1113                     'Illustration-code',
1114                     'Index-term-genre',
1115                     'Index-term-uncontrolled',
1116                     'Interest-age-level',
1117                     'Interest-grade-level',
1118                     'ISBN',
1119                     'isbn',
1120                     'ISSN',
1121                     'issn',
1122                     'itemtype',
1123                     'kw',
1124                     'Koha-Auth-Number',
1125                     'l-format',
1126                     'language',
1127                     'language-original',
1128                     'lc-card',
1129                     'LC-card-number',
1130                     'lcn',
1131                     'lex',
1132                     'lexile-number',
1133                     'llength',
1134                     'ln',
1135                     'ln-audio',
1136                     'ln-subtitle',
1137                     'Local-classification',
1138                     'Local-number',
1139                     'Match-heading',
1140                     'Match-heading-see-from',
1141                     'Material-type',
1142                     'mc-itemtype',
1143                     'mc-rtype',
1144                     'mus',
1145                     'name',
1146                     'Music-number',
1147                     'Name-geographic',
1148                     'Name-geographic-heading',
1149                     'Name-geographic-see',
1150                     'Name-geographic-seealso',
1151                     'nb',
1152                     'Note',
1153                     'notes',
1154                     'ns',
1155                     'nt',
1156                     'Other-control-number',
1157                     'pb',
1158                     'Personal-name',
1159                     'Personal-name-heading',
1160                     'Personal-name-see',
1161                     'Personal-name-seealso',
1162                     'pl',
1163                     'Place-publication',
1164                     'pn',
1165                     'popularity',
1166                     'pubdate',
1167                     'Publisher',
1168                     'Provider',
1169                     'pv',
1170                     'Reading-grade-level',
1171                     'Record-control-number',
1172                     'rcn',
1173                     'Record-type',
1174                     'rtype',
1175                     'se',
1176                     'See',
1177                     'See-also',
1178                     'sn',
1179                     'Stock-number',
1180                     'su',
1181                     'Subject',
1182                     'Subject-heading-thesaurus',
1183                     'Subject-name-personal',
1184                     'Subject-subdivision',
1185                     'Summary',
1186                     'Suppress',
1187                     'su-geo',
1188                     'su-na',
1189                     'su-to',
1190                     'su-ut',
1191                     'ut',
1192                     'Term-genre-form',
1193                     'Term-genre-form-heading',
1194                     'Term-genre-form-see',
1195                     'Term-genre-form-seealso',
1196                     'ti',
1197                     'Title',
1198                     'Title-cover',
1199                     'Title-series',
1200                     'Title-uniform',
1201                     'Title-uniform-heading',
1202                     'Title-uniform-see',
1203                     'Title-uniform-seealso',
1204                     'totalissues',
1205                     'yr',
1206
1207                     # items indexes
1208                     'acqsource',
1209                     'barcode',
1210                     'bc',
1211                     'branch',
1212                     'ccode',
1213                     'classification-source',
1214                     'cn-sort',
1215                     'coded-location-qualifier',
1216                     'copynumber',
1217                     'damaged',
1218                     'datelastborrowed',
1219                     'datelastseen',
1220                     'holdingbranch',
1221                     'homebranch',
1222                     'issues',
1223                     'item',
1224                     'itemnumber',
1225                     'itype',
1226                     'Local-classification',
1227                     'location',
1228                     'lost',
1229                     'materials-specified',
1230                     'mc-ccode',
1231                     'mc-itype',
1232                     'mc-loc',
1233                     'notforloan',
1234                     'Number-local-acquisition',
1235                     'onloan',
1236                     'price',
1237                     'renewals',
1238                     'replacementprice',
1239                     'replacementpricedate',
1240                     'reserves',
1241                     'restricted',
1242                     'stack',
1243                     'stocknumber',
1244                     'inv',
1245                     'uri',
1246                     'withdrawn',
1247
1248                     # subject related
1249                   );
1250
1251     return \@indexes;
1252 }
1253
1254 =head2 buildQuery
1255
1256 ( $error, $query,
1257 $simple_query, $query_cgi,
1258 $query_desc, $limit,
1259 $limit_cgi, $limit_desc,
1260 $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
1261
1262 Build queries and limits in CCL, CGI, Human,
1263 handle truncation, stemming, field weighting, fuzziness, etc.
1264
1265 See verbose embedded documentation.
1266
1267
1268 =cut
1269
1270 sub buildQuery {
1271     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
1272
1273     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
1274
1275     my $query_desc;
1276
1277     # dereference
1278     my @operators = $operators ? @$operators : ();
1279     my @indexes   = $indexes   ? @$indexes   : ();
1280     my @operands  = $operands  ? @$operands  : ();
1281     my @limits    = $limits    ? @$limits    : ();
1282     my @sort_by   = $sort_by   ? @$sort_by   : ();
1283
1284     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
1285     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
1286     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
1287     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
1288
1289     my $query        = $operands[0];
1290     my $simple_query = $operands[0];
1291
1292     # initialize the variables we're passing back
1293     my $query_cgi;
1294     my $query_type;
1295
1296     my $limit;
1297     my $limit_cgi;
1298     my $limit_desc;
1299
1300     my $cclq       = 0;
1301     my $cclindexes = getIndexes();
1302     if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) {
1303         while ( !$cclq && $query =~ /(?:^|\W)([\w-]+)(,[\w-]+)*[:=]/g ) {
1304             my $dx = lc($1);
1305             $cclq = grep { lc($_) eq $dx } @$cclindexes;
1306         }
1307         $query = "ccl=$query" if $cclq;
1308     }
1309
1310 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1311 # DIAGNOSTIC ONLY!!
1312     if ( $query =~ /^ccl=/ ) {
1313         my $q=$';
1314         # This is needed otherwise ccl= and &limit won't work together, and
1315         # this happens when selecting a subject on the opac-detail page
1316         @limits = grep {!/^$/} @limits;
1317         my $original_q = $q; # without available part
1318         unless ( grep { $_ eq 'available' } @limits ) {
1319             $q =~ s| and \( \(allrecords,AlwaysMatches=''\) and \(not-onloan-count,st-numeric >= 1\) and \(lost,st-numeric=0\) \)||;
1320             $original_q = $q;
1321         }
1322         if ( @limits ) {
1323             if ( grep { $_ eq 'available' } @limits ) {
1324                 $q .= q| and ( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )|;
1325                 @limits = grep {!/^available$/} @limits;
1326             }
1327             $q .= ' and '.join(' and ', @limits) if @limits;
1328         }
1329         return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $original_q, '', '', '', 'ccl' );
1330     }
1331     if ( $query =~ /^cql=/ ) {
1332         return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', 'cql' );
1333     }
1334     if ( $query =~ /^pqf=/ ) {
1335         $query_desc = $';
1336         $query_cgi = "q=pqf=".uri_escape_utf8($');
1337         return ( undef, $', $', $query_cgi, $query_desc, '', '', '', 'pqf' );
1338     }
1339
1340     # pass nested queries directly
1341     # FIXME: need better handling of some of these variables in this case
1342     # Nested queries aren't handled well and this implementation is flawed and causes users to be
1343     # unable to search for anything containing () commenting out, will be rewritten for 3.4.0
1344 #    if ( $query =~ /(\(|\))/ ) {
1345 #        return (
1346 #            undef,              $query, $simple_query, $query_cgi,
1347 #            $query,             $limit, $limit_cgi,    $limit_desc,
1348 #            'ccl'
1349 #        );
1350 #    }
1351
1352 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1353 # query operands and indexes and add stemming, truncation, field weighting, etc.
1354 # Once we do so, we'll end up with a value in $query, just like if we had an
1355 # incoming $query from the user
1356     else {
1357         $query = ""
1358           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1359         my $previous_operand
1360           ;    # a flag used to keep track if there was a previous query
1361                # if there was, we can apply the current operator
1362                # for every operand
1363         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1364
1365             # COMBINE OPERANDS, INDEXES AND OPERATORS
1366             if ( ($operands[$i] // '') ne '' ) {
1367                 $operands[$i]=~s/^\s+//;
1368
1369               # A flag to determine whether or not to add the index to the query
1370                 my $indexes_set;
1371
1372 # If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling
1373                 if ( $operands[$i] =~ /\w(:|=)/ || $scan ) {
1374                     $weight_fields    = 0;
1375                     $stemming         = 0;
1376                 } else {
1377                     $operands[$i] =~ s/\?/{?}/g; # need to escape question marks
1378                 }
1379                 my $operand = $operands[$i];
1380                 my $index   = $indexes[$i] || 'kw';
1381
1382                 # Add index-specific attributes
1383
1384                 #Afaik, this 'yr' condition will only ever be met in the staff client advanced search
1385                 #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query,
1386                 #which is processed higher up in this sub. Other than that, year searches are typically
1387                 #handled as limits which are not processed her either.
1388
1389                 # Search ranges: Date of Publication, st-numeric
1390                 if ( $index =~ /(yr|st-numeric)/ ) {
1391                     #weight_fields/relevance search causes errors with date ranges
1392                     #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
1393                     #In the case of YYYY-YYYY, it will return no results
1394                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1395                 }
1396
1397                 # Date of Acquisition
1398                 elsif ( $index =~ /acqdate/ ) {
1399                     #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format
1400                     #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the
1401                       #top of the results just because they have lots of item records matching that date.
1402                     #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
1403                       #irrelevant here
1404                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1405                 }
1406                 # ISBN,ISSN,Standard Number, don't need special treatment
1407                 elsif ( $index eq 'nb' || $index eq 'ns' || $index eq 'hi' ) {
1408                     (
1409                         $stemming,      $auto_truncation,
1410                         $weight_fields, $fuzzy_enabled
1411                     ) = ( 0, 0, 0, 0 );
1412
1413                     if ( $index eq 'nb' ) {
1414                         if ( C4::Context->preference("SearchWithISBNVariations") ) {
1415                             my @isbns = C4::Koha::GetVariationsOfISBN( $operand );
1416                             $operands[$i] = $operand =  '(nb=' . join(' OR nb=', @isbns) . ')';
1417                             $indexes[$i] = $index = 'kw';
1418                         }
1419                     }
1420                 }
1421
1422                 # Set default structure attribute (word list)
1423                 my $struct_attr = q{};
1424                 unless ( $indexes_set || $index =~ /,(st-|phr|ext|wrdl)/ || $index =~ /^(nb|ns)$/ ) {
1425                     $struct_attr = ",wrdl";
1426                 }
1427
1428                 # Some helpful index variants
1429                 my $index_plus       = $index . $struct_attr . ':';
1430                 my $index_plus_comma = $index . $struct_attr . ',';
1431
1432                 if ($auto_truncation){
1433                         unless ( $index =~ /,(st-|phr|ext)/ ) {
1434                                                 #FIXME only valid with LTR scripts
1435                                                 $operand=join(" ",map{
1436                                                                                         (index($_,"*")>0?"$_":"$_*")
1437                                                                                          }split (/\s+/,$operand));
1438                                                 warn $operand if $DEBUG;
1439                                         }
1440                                 }
1441
1442                 # Detect Truncation
1443                 my $truncated_operand;
1444                 my( $nontruncated, $righttruncated, $lefttruncated,
1445                     $rightlefttruncated, $regexpr
1446                 ) = _detect_truncation( $operand, $index );
1447                 warn
1448 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1449                   if $DEBUG;
1450
1451                 # Apply Truncation
1452                 if (
1453                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1454                     scalar(@$rightlefttruncated) > 0 )
1455                 {
1456
1457                # Don't field weight or add the index to the query, we do it here
1458                     $indexes_set = 1;
1459                     undef $weight_fields;
1460                     my $previous_truncation_operand;
1461                     if (scalar @$nontruncated) {
1462                         $truncated_operand .= "$index_plus @$nontruncated ";
1463                         $previous_truncation_operand = 1;
1464                     }
1465                     if (scalar @$righttruncated) {
1466                         $truncated_operand .= "and " if $previous_truncation_operand;
1467                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1468                         $previous_truncation_operand = 1;
1469                     }
1470                     if (scalar @$lefttruncated) {
1471                         $truncated_operand .= "and " if $previous_truncation_operand;
1472                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1473                         $previous_truncation_operand = 1;
1474                     }
1475                     if (scalar @$rightlefttruncated) {
1476                         $truncated_operand .= "and " if $previous_truncation_operand;
1477                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1478                         $previous_truncation_operand = 1;
1479                     }
1480                 }
1481                 $operand = $truncated_operand if $truncated_operand;
1482                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1483
1484                 # Handle Stemming
1485                 my $stemmed_operand;
1486                 $stemmed_operand = _build_stemmed_operand($operand, $lang)
1487                                                                                 if $stemming;
1488
1489                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1490
1491                 # Handle Field Weighting
1492                 my $weighted_operand;
1493                 if ($weight_fields) {
1494                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1495                     $operand = $weighted_operand;
1496                     $indexes_set = 1;
1497                 }
1498
1499                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1500
1501                 ($query,$query_cgi,$query_desc,$previous_operand) = _build_initial_query({
1502                     query => $query,
1503                     query_cgi => $query_cgi,
1504                     query_desc => $query_desc,
1505                     operator => ($operators[ $i - 1 ]) ? $operators[ $i - 1 ] : '',
1506                     parsed_operand => $operand,
1507                     original_operand => $operands[$i] // '',
1508                     index => $index,
1509                     index_plus => $index_plus,
1510                     indexes_set => $indexes_set,
1511                     previous_operand => $previous_operand,
1512                 });
1513
1514             }    #/if $operands
1515         }    # /for
1516     }
1517     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1518
1519     # add limits
1520     my %group_OR_limits;
1521     my $availability_limit;
1522     foreach my $this_limit (@limits) {
1523         next unless $this_limit;
1524         if ( $this_limit =~ /available/ ) {
1525 #
1526 ## 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1527 ## In English:
1528 ## all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1529             $availability_limit .=
1530 "( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )";
1531             $limit_cgi  .= "&limit=available";
1532             $limit_desc .= "";
1533         }
1534
1535         # group_OR_limits, prefixed by mc-
1536         # OR every member of the group
1537         elsif ( $this_limit =~ /mc/ ) {
1538             my ($k,$v) = split(/:/, $this_limit,2);
1539             if ( $k !~ /mc-i(tem)?type/ ) {
1540                 # in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes
1541                 $this_limit =~ tr/"//d;
1542                 $this_limit = $k.":'".$v."'";
1543             }
1544
1545             $group_OR_limits{$k} .= " or " if $group_OR_limits{$k};
1546             $limit_desc      .= " or " if $group_OR_limits{$k};
1547             $group_OR_limits{$k} .= "$this_limit";
1548             $limit_cgi       .= "&limit=" . uri_escape_utf8($this_limit);
1549             $limit_desc      .= " $this_limit";
1550         }
1551
1552         # Regular old limits
1553         else {
1554             $limit .= " and " if $limit || $query;
1555             $limit      .= "$this_limit";
1556             $limit_cgi  .= "&limit=" . uri_escape_utf8($this_limit);
1557             if ($this_limit =~ /^branch:(.+)/) {
1558                 my $branchcode = $1;
1559                 my $library = Koha::Libraries->find( $branchcode );
1560                 if (defined $library) {
1561                     $limit_desc .= " branch:" . $library->branchname;
1562                 } else {
1563                     $limit_desc .= " $this_limit";
1564                 }
1565             } else {
1566                 $limit_desc .= " $this_limit";
1567             }
1568         }
1569     }
1570     foreach my $k (keys (%group_OR_limits)) {
1571         $limit .= " and " if ( $query || $limit );
1572         $limit .= "($group_OR_limits{$k})";
1573     }
1574     if ($availability_limit) {
1575         $limit .= " and " if ( $query || $limit );
1576         $limit .= "($availability_limit)";
1577     }
1578
1579     # Normalize the query and limit strings
1580     # This is flawed , means we can't search anything with : in it
1581     # if user wants to do ccl or cql, start the query with that
1582 #    $query =~ s/:/=/g;
1583     #NOTE: We use several several different regexps here as you can't have variable length lookback assertions
1584     $query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g;
1585     $query =~ s/(?<=(wrdl)):/=/g;
1586     $query =~ s/(?<=(trn|phr)):/=/g;
1587     $query =~ s/(?<=(st-numeric)):/=/g;
1588     $query =~ s/(?<=(st-year)):/=/g;
1589     $query =~ s/(?<=(st-date-normalized)):/=/g;
1590
1591     # Removing warnings for later substitutions
1592     $query      //= q{};
1593     $query_desc //= q{};
1594     $query_cgi  //= q{};
1595     $limit      //= q{};
1596     $limit_desc //= q{};
1597     $limit =~ s/:/=/g;
1598     for ( $query, $query_desc, $limit, $limit_desc ) {
1599         s/  +/ /g;    # remove extra spaces
1600         s/^ //g;     # remove any beginning spaces
1601         s/ $//g;     # remove any ending spaces
1602         s/==/=/g;    # remove double == from query
1603     }
1604     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1605
1606     for ($query_cgi,$simple_query) {
1607         s/"//g;
1608     }
1609     # append the limit to the query
1610     $query .= " " . $limit;
1611
1612     # Warnings if DEBUG
1613     if ($DEBUG) {
1614         warn "QUERY:" . $query;
1615         warn "QUERY CGI:" . $query_cgi;
1616         warn "QUERY DESC:" . $query_desc;
1617         warn "LIMIT:" . $limit;
1618         warn "LIMIT CGI:" . $limit_cgi;
1619         warn "LIMIT DESC:" . $limit_desc;
1620         warn "---------\nLeave buildQuery\n---------";
1621     }
1622
1623     return (
1624         undef,              $query, $simple_query, $query_cgi,
1625         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1626         $query_type
1627     );
1628 }
1629
1630 =head2 _build_initial_query
1631
1632   ($query, $query_cgi, $query_desc, $previous_operand) = _build_initial_query($initial_query_params);
1633
1634   Build a section of the initial query containing indexes, operators, and operands.
1635
1636 =cut
1637
1638 sub _build_initial_query {
1639     my ($params) = @_;
1640
1641     my $operator = "";
1642     if ($params->{previous_operand}){
1643         #If there is a previous operand, add a supplied operator or the default 'and'
1644         $operator = ($params->{operator}) ? " ".($params->{operator})." " : ' and ';
1645     }
1646
1647     #NOTE: indexes_set is typically set when doing truncation or field weighting
1648     my $operand = ($params->{indexes_set}) ? $params->{parsed_operand} : $params->{index_plus}.$params->{parsed_operand};
1649
1650     #e.g. "kw,wrdl:test"
1651     #e.g. " and kw,wrdl:test"
1652     $params->{query} .= $operator . $operand;
1653
1654     $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator;
1655     $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index};
1656     $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if $params->{original_operand};
1657
1658     #e.g. " and kw,wrdl: test"
1659     $params->{query_desc} .= $operator . ( $params->{index_plus} // q{} ) . " " . ( $params->{original_operand} // q{} );
1660
1661     $params->{previous_operand} = 1 unless $params->{previous_operand}; #If there is no previous operand, mark this as one
1662
1663     return ($params->{query}, $params->{query_cgi}, $params->{query_desc}, $params->{previous_operand});
1664 }
1665
1666 =head2 searchResults
1667
1668   my @search_results = searchResults($search_context, $searchdesc, $hits,
1669                                      $results_per_page, $offset, $scan,
1670                                      @marcresults);
1671
1672 Format results in a form suitable for passing to the template
1673
1674 =cut
1675
1676 # IMO this subroutine is pretty messy still -- it's responsible for
1677 # building the HTML output for the template
1678 sub searchResults {
1679     my ( $search_context, $searchdesc, $hits, $results_per_page, $offset, $scan, $marcresults, $xslt_variables ) = @_;
1680     my $dbh = C4::Context->dbh;
1681     my @newresults;
1682
1683     require C4::Items;
1684
1685     $search_context->{'interface'} = 'opac' if !$search_context->{'interface'} || $search_context->{'interface'} ne 'intranet';
1686     my ($is_opac, $hidelostitems);
1687     if ($search_context->{'interface'} eq 'opac') {
1688         $hidelostitems = C4::Context->preference('hidelostitems');
1689         $is_opac       = 1;
1690     }
1691
1692     my $record_processor = Koha::RecordProcessor->new({
1693         filters => 'ViewPolicy'
1694     });
1695
1696     #Build branchnames hash
1697     my %branches = map { $_->branchcode => $_->branchname } Koha::Libraries->search({}, { order_by => 'branchname' });
1698
1699 # FIXME - We build an authorised values hash here, using the default framework
1700 # though it is possible to have different authvals for different fws.
1701
1702     my $shelflocations =
1703       { map { $_->{authorised_value} => $_->{lib} } Koha::AuthorisedValues->get_descriptions_by_koha_field( { frameworkcode => '', kohafield => 'items.location' } ) };
1704
1705     # get notforloan authorised value list (see $shelflocations  FIXME)
1706     my $av = Koha::MarcSubfieldStructures->search({ frameworkcode => '', kohafield => 'items.notforloan', authorised_value => [ -and => {'!=' => undef }, {'!=' => ''}] });
1707     my $notforloan_authorised_value = $av->count ? $av->next->authorised_value : undef;
1708
1709     #Get itemtype hash
1710     my $itemtypes = Koha::ItemTypes->search_with_localization;
1711     my %itemtypes = map { $_->{itemtype} => $_ } @{ $itemtypes->unblessed };
1712
1713     #search item field code
1714     my ($itemtag, undef) = &GetMarcFromKohaField( "items.itemnumber" );
1715
1716     ## find column names of items related to MARC
1717     my %subfieldstosearch;
1718     my @columns = Koha::Database->new()->schema()->resultset('Item')->result_source->columns;
1719     for my $column ( @columns ) {
1720         my ( $tagfield, $tagsubfield ) =
1721           &GetMarcFromKohaField( "items." . $column );
1722         if ( defined $tagsubfield ) {
1723             $subfieldstosearch{$column} = $tagsubfield;
1724         }
1725     }
1726
1727     # handle which records to actually retrieve
1728     my $times;
1729     if ( $hits && $offset + $results_per_page <= $hits ) {
1730         $times = $offset + $results_per_page;
1731     }
1732     else {
1733         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1734     }
1735
1736     my $marcflavour = C4::Context->preference("marcflavour");
1737     # We get the biblionumber position in MARC
1738     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField( 'biblio.biblionumber' );
1739
1740     # set stuff for XSLT processing here once, not later again for every record we retrieved
1741     my $xslfile;
1742     my $xslsyspref;
1743     if( $is_opac ){
1744         $xslsyspref = "OPACXSLTResultsDisplay";
1745         $xslfile = C4::Context->preference( $xslsyspref );
1746     } else {
1747         $xslsyspref = "XSLTResultsDisplay";
1748         $xslfile = C4::Context->preference( $xslsyspref ) || "default";
1749     }
1750     my $lang   = $xslfile ? C4::Languages::getlanguage()  : undef;
1751     my $sysxml = $xslfile ? C4::XSLT::get_xslt_sysprefs() : undef;
1752
1753     my $userenv = C4::Context->userenv;
1754     my $logged_in_user
1755         = ( defined $userenv and $userenv->{number} )
1756         ? Koha::Patrons->find( $userenv->{number} )
1757         : undef;
1758     my $patron_category_hide_lost_items = ($logged_in_user) ? $logged_in_user->category->hidelostitems : 0;
1759
1760     # loop through all of the records we've retrieved
1761     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1762
1763         my $marcrecord;
1764         if ($scan) {
1765             # For Scan searches we built USMARC data
1766             $marcrecord = MARC::Record->new_from_usmarc( $marcresults->[$i]);
1767         } else {
1768             # Normal search, render from Zebra's output
1769             $marcrecord = new_record_from_zebra(
1770                 'biblioserver',
1771                 $marcresults->[$i]
1772             );
1773
1774             if ( ! defined $marcrecord ) {
1775                 warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i];
1776                 next;
1777             }
1778         }
1779
1780         my $fw = $scan
1781              ? undef
1782              : $bibliotag < 10
1783                ? GetFrameworkCode($marcrecord->field($bibliotag)->data)
1784                : GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
1785
1786         SetUTF8Flag($marcrecord);
1787         my $oldbiblio = TransformMarcToKoha( $marcrecord, $fw );
1788         $oldbiblio->{result_number} = $i + 1;
1789
1790                 $oldbiblio->{normalized_upc}  = GetNormalizedUPC(       $marcrecord,$marcflavour);
1791                 $oldbiblio->{normalized_ean}  = GetNormalizedEAN(       $marcrecord,$marcflavour);
1792                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1793                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1794                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1795
1796                 # edition information, if any
1797         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1798
1799         my $itemtype = $oldbiblio->{itemtype} ? $itemtypes{$oldbiblio->{itemtype}} : undef;
1800         # add imageurl to itemtype if there is one
1801         $oldbiblio->{imageurl} = $itemtype ? getitemtypeimagelocation( $search_context->{'interface'}, $itemtype->{imageurl} ) : q{};
1802         # Build summary if there is one (the summary is defined in the itemtypes table)
1803         $oldbiblio->{description} = $itemtype ? $itemtype->{translated_description} : q{};
1804
1805         # FIXME: this is only used in the deprecated non-XLST opac results
1806         if ( !$xslfile && $is_opac && $itemtype && $itemtype->{summary} ) {
1807             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1808             my @fields  = $marcrecord->fields();
1809
1810             my $newsummary;
1811             foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
1812                 my $tags = {};
1813                 foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
1814                     $tag =~ /(.{3})(.)/;
1815                     if($marcrecord->field($1)){
1816                         my @abc = $marcrecord->field($1)->subfield($2);
1817                         $tags->{$tag} = $#abc + 1 ;
1818                     }
1819                 }
1820
1821                 # We catch how many times to repeat this line
1822                 my $max = 0;
1823                 foreach my $tag (keys(%$tags)){
1824                     $max = $tags->{$tag} if($tags->{$tag} > $max);
1825                  }
1826
1827                 # we replace, and repeat each line
1828                 for (my $i = 0 ; $i < $max ; $i++){
1829                     my $newline = $line;
1830
1831                     foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
1832                         $tag =~ /(.{3})(.)/;
1833
1834                         if($marcrecord->field($1)){
1835                             my @repl = $marcrecord->field($1)->subfield($2);
1836                             my $subfieldvalue = $repl[$i];
1837                             $newline =~ s/\[$tag\]/$subfieldvalue/g;
1838                         }
1839                     }
1840                     $newsummary .= "$newline\n";
1841                 }
1842             }
1843
1844             $newsummary =~ s/\[(.*?)]//g;
1845             $newsummary =~ s/\n/<br\/>/g;
1846             $oldbiblio->{summary} = $newsummary;
1847         }
1848
1849         # Pull out the items fields
1850         my @fields = $marcrecord->field($itemtag);
1851         my $marcflavor = C4::Context->preference("marcflavour");
1852
1853         # adding linked items that belong to host records
1854         if ( C4::Context->preference('EasyAnalyticalRecords') ) {
1855             my $analyticsfield = '773';
1856             if ($marcflavor eq 'MARC21' || $marcflavor eq 'NORMARC') {
1857                 $analyticsfield = '773';
1858             } elsif ($marcflavor eq 'UNIMARC') {
1859                 $analyticsfield = '461';
1860             }
1861             foreach my $hostfield ( $marcrecord->field($analyticsfield)) {
1862                 my $hostbiblionumber = $hostfield->subfield("0");
1863                 my $linkeditemnumber = $hostfield->subfield("9");
1864                 if( $hostbiblionumber ) {
1865                     my $linkeditemmarc = C4::Items::GetMarcItem( $hostbiblionumber, $linkeditemnumber );
1866                     if ($linkeditemmarc) {
1867                         my $linkeditemfield = $linkeditemmarc->field($itemtag);
1868                         if ($linkeditemfield) {
1869                             push( @fields, $linkeditemfield );
1870                         }
1871                     }
1872                 }
1873             }
1874         }
1875
1876         # Setting item statuses for display
1877         my @available_items_loop;
1878         my @onloan_items_loop;
1879         my @other_items_loop;
1880
1881         my $available_items;
1882         my $onloan_items;
1883         my $other_items;
1884
1885         my $ordered_count         = 0;
1886         my $available_count       = 0;
1887         my $onloan_count          = 0;
1888         my $longoverdue_count     = 0;
1889         my $other_count           = 0;
1890         my $withdrawn_count        = 0;
1891         my $itemlost_count        = 0;
1892         my $hideatopac_count      = 0;
1893         my $itembinding_count     = 0;
1894         my $itemdamaged_count     = 0;
1895         my $item_in_transit_count = 0;
1896         my $can_place_holds       = 0;
1897         my $item_onhold_count     = 0;
1898         my $notforloan_count      = 0;
1899         my $items_count           = scalar(@fields);
1900         my $maxitems_pref = C4::Context->preference('maxItemsinSearchResults');
1901         my $maxitems = $maxitems_pref ? $maxitems_pref - 1 : 1;
1902         my @hiddenitems; # hidden itemnumbers based on OpacHiddenItems syspref
1903
1904         # loop through every item
1905         foreach my $field (@fields) {
1906             my $item;
1907
1908             # populate the items hash
1909             foreach my $code ( keys %subfieldstosearch ) {
1910                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1911             }
1912             $item->{description} = $itemtypes{ $item->{itype} }{translated_description} if $item->{itype};
1913
1914                 # OPAC hidden items
1915             if ($is_opac) {
1916                 # hidden because lost
1917                 if ($hidelostitems && $item->{itemlost}) {
1918                     $hideatopac_count++;
1919                     next;
1920                 }
1921                 # hidden based on OpacHiddenItems syspref
1922                 my @hi = C4::Items::GetHiddenItemnumbers({ items=> [ $item ], borcat => $search_context->{category} });
1923                 if (scalar @hi) {
1924                     push @hiddenitems, @hi;
1925                     $hideatopac_count++;
1926                     next;
1927                 }
1928             }
1929
1930             my $hbranch     = C4::Context->preference('StaffSearchResultsDisplayBranch');
1931             my $otherbranch = $hbranch eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1932
1933             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1934             if ($item->{$hbranch}) {
1935                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1936             }
1937             elsif ($item->{$otherbranch}) {     # Last resort
1938                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1939             }
1940
1941             my $prefix =
1942                 ( $item->{$hbranch} ? $item->{$hbranch} . '--' : q{} )
1943               . ( $item->{location} ? $item->{location} : q{} )
1944               . ( $item->{itype}    ? $item->{itype}    : q{} )
1945               . ( $item->{itemcallnumber} ? $item->{itemcallnumber} : q{} );
1946 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1947             if ( $item->{onloan}
1948                 and $logged_in_user
1949                 and !( $patron_category_hide_lost_items and $item->{itemlost} ) )
1950             {
1951                 $onloan_count++;
1952                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1953                 $onloan_items->{$key}->{due_date} = $item->{onloan};
1954                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1955                 $onloan_items->{$key}->{branchname}     = $item->{branchname};
1956                 $onloan_items->{$key}->{location}       = $shelflocations->{ $item->{location} };
1957                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1958                 $onloan_items->{$key}->{description}    = $item->{description};
1959                 $onloan_items->{$key}->{imageurl} =
1960                   getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype} }->{imageurl} );
1961
1962                 # if something's checked out and lost, mark it as 'long overdue'
1963                 if ( $item->{itemlost} ) {
1964                     $onloan_items->{$key}->{longoverdue}++;
1965                     $longoverdue_count++;
1966                 }
1967                 else {    # can place holds as long as item isn't lost
1968                     $can_place_holds = 1;
1969                 }
1970             }
1971
1972          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1973             else {
1974
1975                 my $itemtype = C4::Context->preference("item-level_itypes")? $item->{itype}: $oldbiblio->{itemtype};
1976                 $item->{notforloan} = 1 if !$item->{notforloan} &&
1977                     $itemtype && $itemtypes{ $itemtype }->{notforloan};
1978
1979                 # item is on order
1980                 if ( $item->{notforloan} < 0 ) {
1981                     $ordered_count++;
1982                 } elsif ( $item->{notforloan} > 0 ) {
1983                     $notforloan_count++;
1984                 }
1985
1986                 # is item in transit?
1987                 my $transfertwhen = '';
1988                 my ($transfertfrom, $transfertto);
1989
1990                 # is item on the reserve shelf?
1991                 my $reservestatus = '';
1992
1993                 unless ($item->{withdrawn}
1994                         || $item->{itemlost}
1995                         || $item->{damaged}
1996                         || $item->{notforloan}
1997                         || ( C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck')
1998                         && $items_count > C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') ) ) {
1999
2000                     # A couple heuristics to limit how many times
2001                     # we query the database for item transfer information, sacrificing
2002                     # accuracy in some cases for speed;
2003                     #
2004                     # 1. don't query if item has one of the other statuses
2005                     # 2. don't check transit status if the bib has
2006                     #    more than 20 items
2007                     #
2008                     # FIXME: to avoid having the query the database like this, and to make
2009                     #        the in transit status count as unavailable for search limiting,
2010                     #        should map transit status to record indexed in Zebra.
2011                     #
2012                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
2013                     $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} );
2014                 }
2015
2016                 # item is withdrawn, lost, damaged, not for loan, reserved or in transit
2017                 if (   $item->{withdrawn}
2018                     || $item->{itemlost}
2019                     || $item->{damaged}
2020                     || $item->{notforloan}
2021                     || $reservestatus eq 'Waiting'
2022                     || ($transfertwhen && $transfertwhen ne ''))
2023                 {
2024                     $withdrawn_count++        if $item->{withdrawn};
2025                     $itemlost_count++        if $item->{itemlost};
2026                     $itemdamaged_count++     if $item->{damaged};
2027                     $item_in_transit_count++ if $transfertwhen && $transfertwhen ne '';
2028                     $item_onhold_count++     if $reservestatus eq 'Waiting';
2029                     $item->{status} = ($item->{withdrawn}//q{}) . "-" . ($item->{itemlost}//q{}) . "-" . ($item->{damaged}//q{}) . "-" . ($item->{notforloan}//q{});
2030
2031                     # can place a hold on a item if
2032                     # not lost nor withdrawn
2033                     # not damaged unless AllowHoldsOnDamagedItems is true
2034                     # item is either for loan or on order (notforloan < 0)
2035                     $can_place_holds = 1
2036                       if (
2037                            !$item->{itemlost}
2038                         && !$item->{withdrawn}
2039                         && ( !$item->{damaged} || C4::Context->preference('AllowHoldsOnDamagedItems') )
2040                         && ( !$item->{notforloan} || $item->{notforloan} < 0 )
2041                       );
2042
2043                     $other_count++;
2044
2045                     my $key = $prefix . $item->{status};
2046                     foreach (qw(withdrawn itemlost damaged branchname itemcallnumber)) {
2047                         $other_items->{$key}->{$_} = $item->{$_};
2048                     }
2049                     $other_items->{$key}->{intransit} = ( $transfertwhen ne '' ) ? 1 : 0;
2050                     $other_items->{$key}->{onhold} = ($reservestatus) ? 1 : 0;
2051                     $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value and $item->{notforloan};
2052                     $other_items->{$key}->{count}++ if $item->{$hbranch};
2053                     $other_items->{$key}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2054                     $other_items->{$key}->{description} = $item->{description};
2055                     $other_items->{$key}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2056                 }
2057                 # item is available
2058                 else {
2059                     $can_place_holds = 1;
2060                     $available_count++;
2061                     $available_items->{$prefix}->{count}++ if $item->{$hbranch};
2062                     foreach (qw(branchname itemcallnumber description)) {
2063                         $available_items->{$prefix}->{$_} = $item->{$_};
2064                     }
2065                     $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2066                     $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2067                 }
2068             }
2069         }    # notforloan, item level and biblioitem level
2070
2071         # if all items are hidden, do not show the record
2072         if ($items_count > 0 && $hideatopac_count == $items_count) {
2073             next;
2074         }
2075
2076         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
2077         for my $key ( sort keys %$onloan_items ) {
2078             (++$onloanitemscount > $maxitems) and last;
2079             push @onloan_items_loop, $onloan_items->{$key};
2080         }
2081         for my $key ( sort keys %$other_items ) {
2082             (++$otheritemscount > $maxitems) and last;
2083             push @other_items_loop, $other_items->{$key};
2084         }
2085         for my $key ( sort keys %$available_items ) {
2086             (++$availableitemscount > $maxitems) and last;
2087             push @available_items_loop, $available_items->{$key}
2088         }
2089
2090         # XSLT processing of some stuff
2091         # we fetched the sysprefs already before the loop through all retrieved record!
2092         if (!$scan && $xslfile) {
2093             $record_processor->options({
2094                 frameworkcode => $fw,
2095                 interface     => $search_context->{'interface'}
2096             });
2097
2098             $record_processor->process($marcrecord);
2099             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, $xslsyspref, 1, \@hiddenitems, $sysxml, $xslfile, $lang, $xslt_variables);
2100         }
2101
2102         # if biblio level itypes are used and itemtype is notforloan, it can't be reserved either
2103         if (!C4::Context->preference("item-level_itypes")) {
2104             if ($itemtype && $itemtype->{notforloan}) {
2105                 $can_place_holds = 0;
2106             }
2107         }
2108         $oldbiblio->{norequests} = 1 unless $can_place_holds;
2109         $oldbiblio->{items_count}          = $items_count;
2110         $oldbiblio->{available_items_loop} = \@available_items_loop;
2111         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
2112         $oldbiblio->{other_items_loop}     = \@other_items_loop;
2113         $oldbiblio->{availablecount}       = $available_count;
2114         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
2115         $oldbiblio->{onloancount}          = $onloan_count;
2116         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
2117         $oldbiblio->{othercount}           = $other_count;
2118         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
2119         $oldbiblio->{withdrawncount}        = $withdrawn_count;
2120         $oldbiblio->{itemlostcount}        = $itemlost_count;
2121         $oldbiblio->{damagedcount}         = $itemdamaged_count;
2122         $oldbiblio->{intransitcount}       = $item_in_transit_count;
2123         $oldbiblio->{onholdcount}          = $item_onhold_count;
2124         $oldbiblio->{orderedcount}         = $ordered_count;
2125         $oldbiblio->{notforloancount}      = $notforloan_count;
2126
2127         if (C4::Context->preference("AlternateHoldingsField") && $items_count == 0) {
2128             my $fieldspec = C4::Context->preference("AlternateHoldingsField");
2129             my $subfields = substr $fieldspec, 3;
2130             my $holdingsep = C4::Context->preference("AlternateHoldingsSeparator") || ' ';
2131             my @alternateholdingsinfo = ();
2132             my @holdingsfields = $marcrecord->field(substr $fieldspec, 0, 3);
2133             my $alternateholdingscount = 0;
2134
2135             for my $field (@holdingsfields) {
2136                 my %holding = ( holding => '' );
2137                 my $havesubfield = 0;
2138                 for my $subfield ($field->subfields()) {
2139                     if ((index $subfields, $$subfield[0]) >= 0) {
2140                         $holding{'holding'} .= $holdingsep if (length $holding{'holding'} > 0);
2141                         $holding{'holding'} .= $$subfield[1];
2142                         $havesubfield++;
2143                     }
2144                 }
2145                 if ($havesubfield) {
2146                     push(@alternateholdingsinfo, \%holding);
2147                     $alternateholdingscount++;
2148                 }
2149             }
2150
2151             $oldbiblio->{'ALTERNATEHOLDINGS'} = \@alternateholdingsinfo;
2152             $oldbiblio->{'alternateholdings_count'} = $alternateholdingscount;
2153         }
2154
2155         $oldbiblio->{biblio_object} = Koha::Biblios->find( $oldbiblio->{biblionumber} );
2156
2157         push( @newresults, $oldbiblio );
2158     }
2159
2160     return @newresults;
2161 }
2162
2163 =head2 enabled_staff_search_views
2164
2165 %hash = enabled_staff_search_views()
2166
2167 This function returns a hash that contains three flags obtained from the system
2168 preferences, used to determine whether a particular staff search results view
2169 is enabled.
2170
2171 =over 2
2172
2173 =item C<Output arg:>
2174
2175     * $hash{can_view_MARC} is true only if the MARC view is enabled
2176     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2177     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2178
2179 =item C<usage in the script:>
2180
2181 =back
2182
2183 $template->param ( C4::Search::enabled_staff_search_views );
2184
2185 =cut
2186
2187 sub enabled_staff_search_views
2188 {
2189         return (
2190                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2191                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2192                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2193         );
2194 }
2195
2196 =head2 z3950_search_args
2197
2198 $arrayref = z3950_search_args($matchpoints)
2199
2200 This function returns an array reference that contains the search parameters to be
2201 passed to the Z39.50 search script (z3950_search.pl). The array elements
2202 are hash refs whose keys are name and value, and whose values are the
2203 name of a search parameter, the value of that search parameter and the URL encoded
2204 value of that parameter.
2205
2206 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2207
2208 The search parameter values are obtained from the bibliographic record whose
2209 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2210
2211 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2212 a general purpose search argument. In this case, the returned array contains only
2213 entry: the key is 'title' and the value is derived from $matchpoints.
2214
2215 If a search parameter value is undefined or empty, it is not included in the returned
2216 array.
2217
2218 The returned array reference may be passed directly to the template parameters.
2219
2220 =over 2
2221
2222 =item C<Output arg:>
2223
2224     * $array containing hash refs as described above
2225
2226 =item C<usage in the script:>
2227
2228 =back
2229
2230 $data = Biblio::GetBiblioData($bibno);
2231 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2232
2233 *OR*
2234
2235 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2236
2237 =cut
2238
2239 sub z3950_search_args {
2240     my $bibrec = shift;
2241
2242     my $isbn_string = ref( $bibrec ) ? $bibrec->{title} : $bibrec;
2243     my $isbn = Business::ISBN->new( $isbn_string );
2244
2245     if (defined $isbn && $isbn->is_valid)
2246     {
2247         if ( ref($bibrec) ) {
2248             $bibrec->{isbn} = $isbn_string;
2249             $bibrec->{title} = undef;
2250         } else {
2251             $bibrec = { isbn => $isbn_string };
2252         }
2253     }
2254     else {
2255         $bibrec = { title => $bibrec } if !ref $bibrec;
2256     }
2257     my $array = [];
2258     for my $field (qw/ lccn isbn issn title author dewey subject /)
2259     {
2260         push @$array, { name => $field, value => $bibrec->{$field} }
2261           if defined $bibrec->{$field};
2262     }
2263     return $array;
2264 }
2265
2266 =head2 GetDistinctValues($field);
2267
2268 C<$field> is a reference to the fields array
2269
2270 =cut
2271
2272 sub GetDistinctValues {
2273     my ($fieldname,$string)=@_;
2274     # returns a reference to a hash of references to branches...
2275     if ($fieldname=~/\./){
2276                         my ($table,$column)=split /\./, $fieldname;
2277                         my $dbh = C4::Context->dbh;
2278                         warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column " if $DEBUG;
2279                         my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
2280                         $sth->execute;
2281                         my $elements=$sth->fetchall_arrayref({});
2282                         return $elements;
2283    }
2284    else {
2285                 $string||= qq("");
2286                 my @servers=qw<biblioserver authorityserver>;
2287                 my (@zconns,@results);
2288         for ( my $i = 0 ; $i < @servers ; $i++ ) {
2289                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
2290                         $results[$i] =
2291                       $zconns[$i]->scan(
2292                         ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
2293                       );
2294                 }
2295                 # The big moment: asynchronously retrieve results from all servers
2296                 my @elements;
2297         _ZOOM_event_loop(
2298             \@zconns,
2299             \@results,
2300             sub {
2301                 my ( $i, $size ) = @_;
2302                 for ( my $j = 0 ; $j < $size ; $j++ ) {
2303                     my %hashscan;
2304                     @hashscan{qw(value cnt)} =
2305                       $results[ $i - 1 ]->display_term($j);
2306                     push @elements, \%hashscan;
2307                 }
2308             }
2309         );
2310                 return \@elements;
2311    }
2312 }
2313
2314 =head2 _ZOOM_event_loop
2315
2316     _ZOOM_event_loop(\@zconns, \@results, sub {
2317         my ( $i, $size ) = @_;
2318         ....
2319     } );
2320
2321 Processes a ZOOM event loop and passes control to a closure for
2322 processing the results, and destroying the resultsets.
2323
2324 =cut
2325
2326 sub _ZOOM_event_loop {
2327     my ($zconns, $results, $callback) = @_;
2328     while ( ( my $i = ZOOM::event( $zconns ) ) != 0 ) {
2329         my $ev = $zconns->[ $i - 1 ]->last_event();
2330         if ( $ev == ZOOM::Event::ZEND ) {
2331             next unless $results->[ $i - 1 ];
2332             my $size = $results->[ $i - 1 ]->size();
2333             if ( $size > 0 ) {
2334                 $callback->($i, $size);
2335             }
2336         }
2337     }
2338
2339     foreach my $result (@$results) {
2340         $result->destroy();
2341     }
2342 }
2343
2344 =head2 new_record_from_zebra
2345
2346 Given raw data from a searchengine result set, return a MARC::Record object
2347
2348 This helper function is needed to take into account all the involved
2349 system preferences and configuration variables to properly create the
2350 MARC::Record object.
2351
2352 If we are using GRS-1, then the raw data we get from Zebra should be USMARC
2353 data. If we are using DOM, then it has to be MARCXML.
2354
2355 If we are using elasticsearch, it'll already be a MARC::Record and this
2356 function needs a new name.
2357
2358 =cut
2359
2360 sub new_record_from_zebra {
2361
2362     my $server   = shift;
2363     my $raw_data = shift;
2364     # Set the default indexing modes
2365     my $search_engine = C4::Context->preference("SearchEngine");
2366     if ($search_engine eq 'Elasticsearch') {
2367         return ref $raw_data eq 'MARC::Record' ? $raw_data : MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2368     }
2369     my $index_mode = ( $server eq 'biblioserver' )
2370                         ? C4::Context->config('zebra_bib_index_mode') // 'dom'
2371                         : C4::Context->config('zebra_auth_index_mode') // 'dom';
2372
2373     my $marc_record =  eval {
2374         if ( $index_mode eq 'dom' ) {
2375             MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2376         } else {
2377             MARC::Record->new_from_usmarc( $raw_data );
2378         }
2379     };
2380
2381     if ($@) {
2382         return;
2383     } else {
2384         return $marc_record;
2385     }
2386
2387 }
2388
2389 END { }    # module clean-up code here (global destructor)
2390
2391 1;
2392 __END__
2393
2394 =head1 AUTHOR
2395
2396 Koha Development Team <http://koha-community.org/>
2397
2398 =cut