C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField
  22 use C4::Koha;      # getFacets
  23 use Lingua::Stem;
  24 use C4::Search::PazPar2;
  25 use XML::Simple;
  26 use C4::Dates qw(format_date);
  27 use C4::XSLT;
  28
  29 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  30
  31 # set the version for version checking
  32 BEGIN {
  33     $VERSION = 3.01;
  34     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  35 }
  36
  37 =head1 NAME
  38
  39 C4::Search - Functions for searching the Koha catalog.
  40
  41 =head1 SYNOPSIS
  42
  43 See opac/opac-search.pl or catalogue/search.pl for example of usage
  44
  45 =head1 DESCRIPTION
  46
  47 This module provides searching functions for Koha's bibliographic databases
  48
  49 =head1 FUNCTIONS
  50
  51 =cut
  52
  53 @ISA    = qw(Exporter);
  54 @EXPORT = qw(
  55   &findseealso
  56   &FindDuplicate
  57   &SimpleSearch
  58   &searchResults
  59   &getRecords
  60   &buildQuery
  61   &NZgetRecords
  62   &ModBiblios
  63 );
  64
  65 # make all your functions, whether exported or not;
  66
  67 =head2 findseealso($dbh,$fields);
  68
  69 C<$dbh> is a link to the DB handler.
  70
  71 use C4::Context;
  72 my $dbh =C4::Context->dbh;
  73
  74 C<$fields> is a reference to the fields array
  75
  76 This function modifies the @$fields array and adds related fields to search on.
  77
  78 FIXME: this function is probably deprecated in Koha 3
  79
  80 =cut
  81
  82 sub findseealso {
  83     my ( $dbh, $fields ) = @_;
  84     my $tagslib = GetMarcStructure(1);
  85     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  86         my ($tag)      = substr( @$fields[$i], 1, 3 );
  87         my ($subfield) = substr( @$fields[$i], 4, 1 );
  88         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  89           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  90     }
  91 }
  92
  93 =head2 FindDuplicate
  94
  95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  96
  97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  98
  99 =cut
 100
 101 sub FindDuplicate {
 102     my ($record) = @_;
 103     my $dbh = C4::Context->dbh;
 104     my $result = TransformMarcToKoha( $dbh, $record, '' );
 105     my $sth;
 106     my $query;
 107     my $search;
 108     my $type;
 109     my ( $biblionumber, $title );
 110
 111     # search duplicate on ISBN, easy and fast..
 112     # ... normalize first
 113     if ( $result->{isbn} ) {
 114         $result->{isbn} =~ s/\(.*$//;
 115         $result->{isbn} =~ s/\s+$//;
 116         $query = "isbn=$result->{isbn}";
 117     }
 118     else {
 119         $result->{title} =~ s /\\//g;
 120         $result->{title} =~ s /\"//g;
 121         $result->{title} =~ s /\(//g;
 122         $result->{title} =~ s /\)//g;
 123
 124         # FIXME: instead of removing operators, could just do
 125         # quotes around the value
 126         $result->{title} =~ s/(and|or|not)//g;
 127         $query = "ti,ext=$result->{title}";
 128         $query .= " and itemtype=$result->{itemtype}"
 129           if ( $result->{itemtype} );
 130         if   ( $result->{author} ) {
 131             $result->{author} =~ s /\\//g;
 132             $result->{author} =~ s /\"//g;
 133             $result->{author} =~ s /\(//g;
 134             $result->{author} =~ s /\)//g;
 135
 136             # remove valid operators
 137             $result->{author} =~ s/(and|or|not)//g;
 138             $query .= " and au,ext=$result->{author}";
 139         }
 140     }
 141
 142     # FIXME: add error handling
 143     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 144     my @results;
 145     foreach my $possible_duplicate_record (@$searchresults) {
 146         my $marcrecord =
 147           MARC::Record->new_from_usmarc($possible_duplicate_record);
 148         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 149
 150         # FIXME :: why 2 $biblionumber ?
 151         if ($result) {
 152             push @results, $result->{'biblionumber'};
 153             push @results, $result->{'title'};
 154         }
 155     }
 156     return @results;
 157 }
 158
 159 =head2 SimpleSearch
 160
 161 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 162
 163 This function provides a simple search API on the bibliographic catalog
 164
 165 =over 2
 166
 167 =item C<input arg:>
 168
 169     * $query can be a simple keyword or a complete CCL query
 170     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 171     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 172     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 173
 174
 175 =item C<Output:>
 176
 177     * $error is a empty unless an error is detected
 178     * \@results is an array of records.
 179     * $total_hits is the number of hits that would have been returned with no limit
 180
 181 =item C<usage in the script:>
 182
 183 =back
 184
 185 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 186
 187 if (defined $error) {
 188     $template->param(query_error => $error);
 189     warn "error: ".$error;
 190     output_html_with_http_headers $input, $cookie, $template->output;
 191     exit;
 192 }
 193
 194 my $hits = scalar @$marcresults;
 195 my @results;
 196
 197 for my $i (0..$hits) {
 198     my %resultsloop;
 199     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 200     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 201
 202     #build the hash for the template.
 203     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 204     $resultsloop{title}           = $biblio->{'title'};
 205     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 206     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 207     $resultsloop{author}          = $biblio->{'author'};
 208     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 209     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 210
 211     push @results, \%resultsloop;
 212 }
 213
 214 $template->param(result=>\@results);
 215
 216 =cut
 217
 218 sub SimpleSearch {
 219     my ( $query, $offset, $max_results, $servers )  = @_;
 220
 221     if ( C4::Context->preference('NoZebra') ) {
 222         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 223         my $search_result =
 224           (      $result->{hits}
 225               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 226         return ( undef, $search_result, scalar($search_result) );
 227     }
 228     else {
 229         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 230         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 231         my @results;
 232         my @zoom_queries;
 233         my @tmpresults;
 234         my @zconns;
 235         my $total_hits;
 236         return ( "No query entered", undef, undef ) unless $query;
 237
 238         # Initialize & Search Zebra
 239         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 240             eval {
 241                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 242                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 243                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 244
 245                 # error handling
 246                 my $error =
 247                     $zconns[$i]->errmsg() . " ("
 248                   . $zconns[$i]->errcode() . ") "
 249                   . $zconns[$i]->addinfo() . " "
 250                   . $zconns[$i]->diagset();
 251
 252                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 253             };
 254             if ($@) {
 255
 256                 # caught a ZOOM::Exception
 257                 my $error =
 258                     $@->message() . " ("
 259                   . $@->code() . ") "
 260                   . $@->addinfo() . " "
 261                   . $@->diagset();
 262                 warn $error;
 263                 return ( $error, undef, undef );
 264             }
 265         }
 266         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 267             my $event = $zconns[ $i - 1 ]->last_event();
 268             if ( $event == ZOOM::Event::ZEND ) {
 269
 270                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 271                 my $hits = $tmpresults[ $i - 1 ]->size();
 272                 $total_hits += $hits;
 273                 my $last_record = $hits;
 274                 if ( defined $max_results && $offset + $max_results < $hits ) {
 275                     $last_record  = $offset + $max_results;
 276                 }
 277
 278                 for my $j ( $first_record..$last_record ) {
 279                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 280                     push @results, $record;
 281                 }
 282             }
 283         }
 284
 285         foreach my $result (@tmpresults) {
 286             $result->destroy();
 287         }
 288         foreach my $zoom_query (@zoom_queries) {
 289             $zoom_query->destroy();
 290         }
 291
 292         return ( undef, \@results, $total_hits );
 293     }
 294 }
 295
 296 =head2 getRecords
 297
 298 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 299
 300         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 301         $results_per_page, $offset,       $expanded_facet, $branches,
 302         $query_type,       $scan
 303     );
 304
 305 The all singing, all dancing, multi-server, asynchronous, scanning,
 306 searching, record nabbing, facet-building
 307
 308 See verbse embedded documentation.
 309
 310 =cut
 311
 312 sub getRecords {
 313     my (
 314         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 315         $results_per_page, $offset,       $expanded_facet, $branches,
 316         $query_type,       $scan
 317     ) = @_;
 318
 319     my @servers = @$servers_ref;
 320     my @sort_by = @$sort_by_ref;
 321
 322     # Initialize variables for the ZOOM connection and results object
 323     my $zconn;
 324     my @zconns;
 325     my @results;
 326     my $results_hashref = ();
 327
 328     # Initialize variables for the faceted results objects
 329     my $facets_counter = ();
 330     my $facets_info    = ();
 331     my $facets         = getFacets();
 332
 333     my @facets_loop
 334       ;    # stores the ref to array of hashes for template facets loop
 335
 336     ### LOOP THROUGH THE SERVERS
 337     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 338         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 339
 340 # perform the search, create the results objects
 341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 342         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 343
 344         #$query_to_use = $simple_query if $scan;
 345         warn $simple_query if ( $scan and $DEBUG );
 346
 347         # Check if we've got a query_type defined, if so, use it
 348         eval {
 349             if ($query_type)
 350             {
 351                 if ( $query_type =~ /^ccl/ ) {
 352                     $query_to_use =~
 353                       s/\:/\=/g;    # change : to = last minute (FIXME)
 354                     $results[$i] =
 355                       $zconns[$i]->search(
 356                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 357                       );
 358                 }
 359                 elsif ( $query_type =~ /^cql/ ) {
 360                     $results[$i] =
 361                       $zconns[$i]->search(
 362                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 363                 }
 364                 elsif ( $query_type =~ /^pqf/ ) {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 368                 }
 369             }
 370             else {
 371                 if ($scan) {
 372                     $results[$i] =
 373                       $zconns[$i]->scan(
 374                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 375                       );
 376                 }
 377                 else {
 378                     $results[$i] =
 379                       $zconns[$i]->search(
 380                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 381                       );
 382                 }
 383             }
 384         };
 385         if ($@) {
 386             warn "WARNING: query problem with $query_to_use " . $@;
 387         }
 388
 389         # Concatenate the sort_by limits and pass them to the results object
 390         # Note: sort will override rank
 391         my $sort_by;
 392         foreach my $sort (@sort_by) {
 393             if ( $sort eq "author_az" ) {
 394                 $sort_by .= "1=1003 <i ";
 395             }
 396             elsif ( $sort eq "author_za" ) {
 397                 $sort_by .= "1=1003 >i ";
 398             }
 399             elsif ( $sort eq "popularity_asc" ) {
 400                 $sort_by .= "1=9003 <i ";
 401             }
 402             elsif ( $sort eq "popularity_dsc" ) {
 403                 $sort_by .= "1=9003 >i ";
 404             }
 405             elsif ( $sort eq "call_number_asc" ) {
 406                 $sort_by .= "1=20  <i ";
 407             }
 408             elsif ( $sort eq "call_number_dsc" ) {
 409                 $sort_by .= "1=20 >i ";
 410             }
 411             elsif ( $sort eq "pubdate_asc" ) {
 412                 $sort_by .= "1=31 <i ";
 413             }
 414             elsif ( $sort eq "pubdate_dsc" ) {
 415                 $sort_by .= "1=31 >i ";
 416             }
 417             elsif ( $sort eq "acqdate_asc" ) {
 418                 $sort_by .= "1=32 <i ";
 419             }
 420             elsif ( $sort eq "acqdate_dsc" ) {
 421                 $sort_by .= "1=32 >i ";
 422             }
 423             elsif ( $sort eq "title_az" ) {
 424                 $sort_by .= "1=4 <i ";
 425             }
 426             elsif ( $sort eq "title_za" ) {
 427                 $sort_by .= "1=4 >i ";
 428             }
 429         }
 430         if ($sort_by) {
 431             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 432                 warn "WARNING sort $sort_by failed";
 433             }
 434         }
 435     }    # finished looping through servers
 436
 437     # The big moment: asynchronously retrieve results from all servers
 438     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 439         my $ev = $zconns[ $i - 1 ]->last_event();
 440         if ( $ev == ZOOM::Event::ZEND ) {
 441             next unless $results[ $i - 1 ];
 442             my $size = $results[ $i - 1 ]->size();
 443             if ( $size > 0 ) {
 444                 my $results_hash;
 445
 446                 # loop through the results
 447                 $results_hash->{'hits'} = $size;
 448                 my $times;
 449                 if ( $offset + $results_per_page <= $size ) {
 450                     $times = $offset + $results_per_page;
 451                 }
 452                 else {
 453                     $times = $size;
 454                 }
 455                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 456                     my $records_hash;
 457                     my $record;
 458                     my $facet_record;
 459
 460                     ## Check if it's an index scan
 461                     if ($scan) {
 462                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 463
 464                  # here we create a minimal MARC record and hand it off to the
 465                  # template just like a normal result ... perhaps not ideal, but
 466                  # it works for now
 467                         my $tmprecord = MARC::Record->new();
 468                         $tmprecord->encoding('UTF-8');
 469                         my $tmptitle;
 470                         my $tmpauthor;
 471
 472                 # the minimal record in author/title (depending on MARC flavour)
 473                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 474                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 475                             $tmprecord->append_fields($tmptitle);
 476                         } else {
 477                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 478                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 479                             $tmprecord->append_fields($tmptitle);
 480                             $tmprecord->append_fields($tmpauthor);
 481                         }
 482                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 483                     }
 484
 485                     # not an index scan
 486                     else {
 487                         $record = $results[ $i - 1 ]->record($j)->raw();
 488
 489                         # warn "RECORD $j:".$record;
 490                         $results_hash->{'RECORDS'}[$j] = $record;
 491
 492             # Fill the facets while we're looping, but only for the biblioserver
 493                         $facet_record = MARC::Record->new_from_usmarc($record)
 494                           if $servers[ $i - 1 ] =~ /biblioserver/;
 495
 496                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 497                         if ($facet_record) {
 498                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 499
 500                                 if ( $facets->[$k] ) {
 501                                     my @fields;
 502                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 503                                     {
 504                                         push @fields,
 505                                           $facet_record->field($tag);
 506                                     }
 507                                     for my $field (@fields) {
 508                                         my @subfields = $field->subfields();
 509                                         for my $subfield (@subfields) {
 510                                             my ( $code, $data ) = @$subfield;
 511                                             if ( $code eq
 512                                                 $facets->[$k]->{'subfield'} )
 513                                             {
 514                                                 $facets_counter->{ $facets->[$k]
 515                                                       ->{'link_value'} }
 516                                                   ->{$data}++;
 517                                             }
 518                                         }
 519                                     }
 520                                     $facets_info->{ $facets->[$k]
 521                                           ->{'link_value'} }->{'label_value'} =
 522                                       $facets->[$k]->{'label_value'};
 523                                     $facets_info->{ $facets->[$k]
 524                                           ->{'link_value'} }->{'expanded'} =
 525                                       $facets->[$k]->{'expanded'};
 526                                 }
 527                             }
 528                         }
 529                     }
 530                 }
 531                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 532             }
 533
 534             # warn "connection ", $i-1, ": $size hits";
 535             # warn $results[$i-1]->record(0)->render() if $size > 0;
 536
 537             # BUILD FACETS
 538             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 539                 for my $link_value (
 540                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 541                     keys %$facets_counter )
 542                 {
 543                     my $expandable;
 544                     my $number_of_facets;
 545                     my @this_facets_array;
 546                     for my $one_facet (
 547                         sort {
 548                             $facets_counter->{$link_value}
 549                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 550                         } keys %{ $facets_counter->{$link_value} }
 551                       )
 552                     {
 553                         $number_of_facets++;
 554                         if (   ( $number_of_facets < 6 )
 555                             || ( $expanded_facet eq $link_value )
 556                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 557                         {
 558
 559                       # Sanitize the link value ), ( will cause errors with CCL,
 560                             my $facet_link_value = $one_facet;
 561                             $facet_link_value =~ s/(\(|\))/ /g;
 562
 563                             # fix the length that will display in the label,
 564                             my $facet_label_value = $one_facet;
 565                             $facet_label_value =
 566                               substr( $one_facet, 0, 20 ) . "..."
 567                               unless length($facet_label_value) <= 20;
 568
 569                             # if it's a branch, label by the name, not the code,
 570                             if ( $link_value =~ /branch/ ) {
 571                                 $facet_label_value =
 572                                   $branches->{$one_facet}->{'branchname'};
 573                             }
 574
 575                 # but we're down with the whole label being in the link's title.
 576                             my $facet_title_value = $one_facet;
 577
 578                             push @this_facets_array,
 579                               (
 580                                 {
 581                                     facet_count =>
 582                                       $facets_counter->{$link_value}
 583                                       ->{$one_facet},
 584                                     facet_label_value => $facet_label_value,
 585                                     facet_title_value => $facet_title_value,
 586                                     facet_link_value  => $facet_link_value,
 587                                     type_link_value   => $link_value,
 588                                 },
 589                               );
 590                         }
 591                     }
 592
 593                     # handle expanded option
 594                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 595                         $expandable = 1
 596                           if ( ( $number_of_facets > 6 )
 597                             && ( $expanded_facet ne $link_value ) );
 598                     }
 599                     push @facets_loop,
 600                       (
 601                         {
 602                             type_link_value => $link_value,
 603                             type_id         => $link_value . "_id",
 604                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 605                             facets     => \@this_facets_array,
 606                             expandable => $expandable,
 607                             expand     => $link_value,
 608                         }
 609                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 610                 }
 611             }
 612         }
 613     }
 614     return ( undef, $results_hashref, \@facets_loop );
 615 }
 616
 617 sub pazGetRecords {
 618     my (
 619         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 620         $results_per_page, $offset,       $expanded_facet, $branches,
 621         $query_type,       $scan
 622     ) = @_;
 623
 624     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 625     $paz->init();
 626     $paz->search($simple_query);
 627     sleep 1;
 628
 629     # do results
 630     my $results_hashref = {};
 631     my $stats = XMLin($paz->stat);
 632     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 633
 634     # for a grouped search result, the number of hits
 635     # is the number of groups returned; 'bib_hits' will have
 636     # the total number of bibs.
 637     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 638     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 639
 640     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 641         my $recid = $hit->{recid}->[0];
 642
 643         my $work_title = $hit->{'md-work-title'}->[0];
 644         my $work_author;
 645         if (exists $hit->{'md-work-author'}) {
 646             $work_author = $hit->{'md-work-author'}->[0];
 647         }
 648         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 649
 650         my $result_group = {};
 651         $result_group->{'group_label'} = $group_label;
 652         $result_group->{'group_merge_key'} = $recid;
 653
 654         my $count = 1;
 655         if (exists $hit->{count}) {
 656             $count = $hit->{count}->[0];
 657         }
 658         $result_group->{'group_count'} = $count;
 659
 660         for (my $i = 0; $i < $count; $i++) {
 661             # FIXME -- may need to worry about diacritics here
 662             my $rec = $paz->record($recid, $i);
 663             push @{ $result_group->{'RECORDS'} }, $rec;
 664         }
 665
 666         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 667     }
 668
 669     # pass through facets
 670     my $termlist_xml = $paz->termlist('author,subject');
 671     my $terms = XMLin($termlist_xml, forcearray => 1);
 672     my @facets_loop = ();
 673     #die Dumper($results);
 674 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 675 #        my @facets = ();
 676 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 677 #            push @facets, {
 678 #                facet_label_value => $facet->{'name'}->[0],
 679 #            };
 680 #        }
 681 #        push @facets_loop, ( {
 682 #            type_label => $list,
 683 #            facets => \@facets,
 684 #        } );
 685 #    }
 686
 687     return ( undef, $results_hashref, \@facets_loop );
 688 }
 689
 690 # STOPWORDS
 691 sub _remove_stopwords {
 692     my ( $operand, $index ) = @_;
 693     my @stopwords_removed;
 694
 695     # phrase and exact-qualified indexes shouldn't have stopwords removed
 696     if ( $index !~ m/phr|ext/ ) {
 697
 698 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 699 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 700 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 701 #       is a stopword, we'd get "çon" and wouldn't find anything...
 702         foreach ( keys %{ C4::Context->stopwords } ) {
 703             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 704             if ( $operand =~
 705                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 706             {
 707                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 708                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 709                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 710                                 $operand =~ s/$1//gi;
 711                 push @stopwords_removed, $_;
 712             }
 713         }
 714     }
 715     return ( $operand, \@stopwords_removed );
 716 }
 717
 718 # TRUNCATION
 719 sub _detect_truncation {
 720     my ( $operand, $index ) = @_;
 721     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 722         @regexpr );
 723     $operand =~ s/^ //g;
 724     my @wordlist = split( /\s/, $operand );
 725     foreach my $word (@wordlist) {
 726         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 727             push @rightlefttruncated, $word;
 728         }
 729         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 730             push @lefttruncated, $word;
 731         }
 732         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 733             push @righttruncated, $word;
 734         }
 735         elsif ( index( $word, "*" ) < 0 ) {
 736             push @nontruncated, $word;
 737         }
 738         else {
 739             push @regexpr, $word;
 740         }
 741     }
 742     return (
 743         \@nontruncated,       \@righttruncated, \@lefttruncated,
 744         \@rightlefttruncated, \@regexpr
 745     );
 746 }
 747
 748 # STEMMING
 749 sub _build_stemmed_operand {
 750     my ($operand) = @_;
 751     my $stemmed_operand;
 752
 753     # If operand contains a digit, it is almost certainly an identifier, and should
 754     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 755     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 756     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 757     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 758     return $operand if $operand =~ /\d/;
 759
 760 # FIXME: the locale should be set based on the user's language and/or search choice
 761     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 762
 763 # FIXME: these should be stored in the db so the librarian can modify the behavior
 764     $stemmer->add_exceptions(
 765         {
 766             'and' => 'and',
 767             'or'  => 'or',
 768             'not' => 'not',
 769         }
 770     );
 771     my @words = split( / /, $operand );
 772     my $stems = $stemmer->stem(@words);
 773     for my $stem (@$stems) {
 774         $stemmed_operand .= "$stem";
 775         $stemmed_operand .= "?"
 776           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 777         $stemmed_operand .= " ";
 778     }
 779     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 780     return $stemmed_operand;
 781 }
 782
 783 # FIELD WEIGHTING
 784 sub _build_weighted_query {
 785
 786 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 787 # pretty well but could work much better if we had a smarter query parser
 788     my ( $operand, $stemmed_operand, $index ) = @_;
 789     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 790     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 791     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 792
 793     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 794
 795     # Keyword, or, no index specified
 796     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 797         $weighted_query .=
 798           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 799         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 800         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 801           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 802           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 803         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 804           if $fuzzy_enabled;    # add fuzzy, word list
 805         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 806           if ( $stemming and $stemmed_operand )
 807           ;                     # add stemming, right truncation
 808         $weighted_query .= " or wrdl,r9=\"$operand\"";
 809
 810         # embedded sorting: 0 a-z; 1 z-a
 811         # $weighted_query .= ") or (sort1,aut=1";
 812     }
 813
 814     # Barcode searches should skip this process
 815     elsif ( $index eq 'bc' ) {
 816         $weighted_query .= "bc=\"$operand\"";
 817     }
 818
 819     # Authority-number searches should skip this process
 820     elsif ( $index eq 'an' ) {
 821         $weighted_query .= "an=\"$operand\"";
 822     }
 823
 824     # If the index already has more than one qualifier, wrap the operand
 825     # in quotes and pass it back (assumption is that the user knows what they
 826     # are doing and won't appreciate us mucking up their query
 827     elsif ( $index =~ ',' ) {
 828         $weighted_query .= " $index=\"$operand\"";
 829     }
 830
 831     #TODO: build better cases based on specific search indexes
 832     else {
 833         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 834           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 835         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 836         $weighted_query .=
 837           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 838     }
 839
 840     $weighted_query .= "))";                       # close rank specification
 841     return $weighted_query;
 842 }
 843
 844 =head2 buildQuery
 845
 846 ( $error, $query,
 847 $simple_query, $query_cgi,
 848 $query_desc, $limit,
 849 $limit_cgi, $limit_desc,
 850 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 851
 852 Build queries and limits in CCL, CGI, Human,
 853 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 854
 855 See verbose embedded documentation.
 856
 857
 858 =cut
 859
 860 sub buildQuery {
 861     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 862
 863     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 864
 865     # dereference
 866     my @operators = @$operators if $operators;
 867     my @indexes   = @$indexes   if $indexes;
 868     my @operands  = @$operands  if $operands;
 869     my @limits    = @$limits    if $limits;
 870     my @sort_by   = @$sort_by   if $sort_by;
 871
 872     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 873     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 874     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 875     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 876     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 877
 878     # no stemming/weight/fuzzy in NoZebra
 879     if ( C4::Context->preference("NoZebra") ) {
 880         $stemming      = 0;
 881         $weight_fields = 0;
 882         $fuzzy_enabled = 0;
 883     }
 884
 885     my $query        = $operands[0];
 886     my $simple_query = $operands[0];
 887
 888     # initialize the variables we're passing back
 889     my $query_cgi;
 890     my $query_desc;
 891     my $query_type;
 892
 893     my $limit;
 894     my $limit_cgi;
 895     my $limit_desc;
 896
 897     my $stopwords_removed;    # flag to determine if stopwords have been removed
 898
 899 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 900 # DIAGNOSTIC ONLY!!
 901     if ( $query =~ /^ccl=/ ) {
 902         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 903     }
 904     if ( $query =~ /^cql=/ ) {
 905         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 906     }
 907     if ( $query =~ /^pqf=/ ) {
 908         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 909     }
 910
 911     # pass nested queries directly
 912     # FIXME: need better handling of some of these variables in this case
 913     if ( $query =~ /(\(|\))/ ) {
 914         return (
 915             undef,              $query, $simple_query, $query_cgi,
 916             $query,             $limit, $limit_cgi,    $limit_desc,
 917             $stopwords_removed, 'ccl'
 918         );
 919     }
 920
 921 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 922 # query operands and indexes and add stemming, truncation, field weighting, etc.
 923 # Once we do so, we'll end up with a value in $query, just like if we had an
 924 # incoming $query from the user
 925     else {
 926         $query = ""
 927           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 928         my $previous_operand
 929           ;    # a flag used to keep track if there was a previous query
 930                # if there was, we can apply the current operator
 931                # for every operand
 932         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 933
 934             # COMBINE OPERANDS, INDEXES AND OPERATORS
 935             if ( $operands[$i] ) {
 936
 937               # A flag to determine whether or not to add the index to the query
 938                 my $indexes_set;
 939
 940 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 941                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 942                     $weight_fields    = 0;
 943                     $stemming         = 0;
 944                     $remove_stopwords = 0;
 945                 }
 946                 my $operand = $operands[$i];
 947                 my $index   = $indexes[$i];
 948
 949                 # Add index-specific attributes
 950                 # Date of Publication
 951                 if ( $index eq 'yr' ) {
 952                     $index .= ",st-numeric";
 953                     $indexes_set++;
 954                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 955                 }
 956
 957                 # Date of Acquisition
 958                 elsif ( $index eq 'acqdate' ) {
 959                     $index .= ",st-date-normalized";
 960                     $indexes_set++;
 961                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 962                 }
 963                 # ISBN,ISSN,Standard Number, don't need special treatment
 964                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 965                     $indexes_set++;
 966                     (
 967                         $stemming,      $auto_truncation,
 968                         $weight_fields, $fuzzy_enabled,
 969                         $remove_stopwords
 970                     ) = ( 0, 0, 0, 0, 0 );
 971
 972                 }
 973                 # Set default structure attribute (word list)
 974                 my $struct_attr;
 975                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 976                     $struct_attr = ",wrdl";
 977                 }
 978
 979                 # Some helpful index variants
 980                 my $index_plus       = $index . $struct_attr . ":" if $index;
 981                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 982
 983                 # Remove Stopwords
 984                 if ($remove_stopwords) {
 985                     ( $operand, $stopwords_removed ) =
 986                       _remove_stopwords( $operand, $index );
 987                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 988                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 989                       if ( $stopwords_removed && $DEBUG );
 990                 }
 991
 992                 # Detect Truncation
 993                 my ( $nontruncated, $righttruncated, $lefttruncated,
 994                     $rightlefttruncated, $regexpr );
 995                 my $truncated_operand;
 996                 (
 997                     $nontruncated, $righttruncated, $lefttruncated,
 998                     $rightlefttruncated, $regexpr
 999                 ) = _detect_truncation( $operand, $index );
1000                 warn
1001 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1002                   if $DEBUG;
1003
1004                 # Apply Truncation
1005                 if (
1006                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1007                     scalar(@$rightlefttruncated) > 0 )
1008                 {
1009
1010                # Don't field weight or add the index to the query, we do it here
1011                     $indexes_set = 1;
1012                     undef $weight_fields;
1013                     my $previous_truncation_operand;
1014                     if ( scalar(@$nontruncated) > 0 ) {
1015                         $truncated_operand .= "$index_plus @$nontruncated ";
1016                         $previous_truncation_operand = 1;
1017                     }
1018                     if ( scalar(@$righttruncated) > 0 ) {
1019                         $truncated_operand .= "and "
1020                           if $previous_truncation_operand;
1021                         $truncated_operand .=
1022                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1023                         $previous_truncation_operand = 1;
1024                     }
1025                     if ( scalar(@$lefttruncated) > 0 ) {
1026                         $truncated_operand .= "and "
1027                           if $previous_truncation_operand;
1028                         $truncated_operand .=
1029                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1030                         $previous_truncation_operand = 1;
1031                     }
1032                     if ( scalar(@$rightlefttruncated) > 0 ) {
1033                         $truncated_operand .= "and "
1034                           if $previous_truncation_operand;
1035                         $truncated_operand .=
1036                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1037                         $previous_truncation_operand = 1;
1038                     }
1039                 }
1040                 $operand = $truncated_operand if $truncated_operand;
1041                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1042
1043                 # Handle Stemming
1044                 my $stemmed_operand;
1045                 $stemmed_operand = _build_stemmed_operand($operand)
1046                   if $stemming;
1047                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1048
1049                 # Handle Field Weighting
1050                 my $weighted_operand;
1051                 $weighted_operand =
1052                   _build_weighted_query( $operand, $stemmed_operand, $index )
1053                   if $weight_fields;
1054                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1055                 $operand = $weighted_operand if $weight_fields;
1056                 $indexes_set = 1 if $weight_fields;
1057
1058                 # If there's a previous operand, we need to add an operator
1059                 if ($previous_operand) {
1060
1061                     # User-specified operator
1062                     if ( $operators[ $i - 1 ] ) {
1063                         $query     .= " $operators[$i-1] ";
1064                         $query     .= " $index_plus " unless $indexes_set;
1065                         $query     .= " $operand";
1066                         $query_cgi .= "&op=$operators[$i-1]";
1067                         $query_cgi .= "&idx=$index" if $index;
1068                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1069                         $query_desc .=
1070                           " $operators[$i-1] $index_plus $operands[$i]";
1071                     }
1072
1073                     # Default operator is and
1074                     else {
1075                         $query      .= " and ";
1076                         $query      .= "$index_plus " unless $indexes_set;
1077                         $query      .= "$operand";
1078                         $query_cgi  .= "&op=and&idx=$index" if $index;
1079                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1080                         $query_desc .= " and $index_plus $operands[$i]";
1081                     }
1082                 }
1083
1084                 # There isn't a pervious operand, don't need an operator
1085                 else {
1086
1087                     # Field-weighted queries already have indexes set
1088                     $query .= " $index_plus " unless $indexes_set;
1089                     $query .= $operand;
1090                     $query_desc .= " $index_plus $operands[$i]";
1091                     $query_cgi  .= "&idx=$index" if $index;
1092                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1093                     $previous_operand = 1;
1094                 }
1095             }    #/if $operands
1096         }    # /for
1097     }
1098     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1099
1100     # add limits
1101     my $group_OR_limits;
1102     my $availability_limit;
1103     foreach my $this_limit (@limits) {
1104         if ( $this_limit =~ /available/ ) {
1105
1106 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1107 # In English:
1108 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1109             $availability_limit .=
1110 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1111             $limit_cgi  .= "&limit=available";
1112             $limit_desc .= "";
1113         }
1114
1115         # group_OR_limits, prefixed by mc-
1116         # OR every member of the group
1117         elsif ( $this_limit =~ /mc/ ) {
1118             $group_OR_limits .= " or " if $group_OR_limits;
1119             $limit_desc      .= " or " if $group_OR_limits;
1120             $group_OR_limits .= "$this_limit";
1121             $limit_cgi       .= "&limit=$this_limit";
1122             $limit_desc      .= " $this_limit";
1123         }
1124
1125         # Regular old limits
1126         else {
1127             $limit .= " and " if $limit || $query;
1128             $limit      .= "$this_limit";
1129             $limit_cgi  .= "&limit=$this_limit";
1130             $limit_desc .= " $this_limit";
1131         }
1132     }
1133     if ($group_OR_limits) {
1134         $limit .= " and " if ( $query || $limit );
1135         $limit .= "($group_OR_limits)";
1136     }
1137     if ($availability_limit) {
1138         $limit .= " and " if ( $query || $limit );
1139         $limit .= "($availability_limit)";
1140     }
1141
1142     # Normalize the query and limit strings
1143     $query =~ s/:/=/g;
1144     $limit =~ s/:/=/g;
1145     for ( $query, $query_desc, $limit, $limit_desc ) {
1146         $_ =~ s/  / /g;    # remove extra spaces
1147         $_ =~ s/^ //g;     # remove any beginning spaces
1148         $_ =~ s/ $//g;     # remove any ending spaces
1149         $_ =~ s/==/=/g;    # remove double == from query
1150     }
1151     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1152
1153     for ($query_cgi,$simple_query) {
1154         $_ =~ s/"//g;
1155     }
1156     # append the limit to the query
1157     $query .= " " . $limit;
1158
1159     # Warnings if DEBUG
1160     if ($DEBUG) {
1161         warn "QUERY:" . $query;
1162         warn "QUERY CGI:" . $query_cgi;
1163         warn "QUERY DESC:" . $query_desc;
1164         warn "LIMIT:" . $limit;
1165         warn "LIMIT CGI:" . $limit_cgi;
1166         warn "LIMIT DESC:" . $limit_desc;
1167         warn "---------\nLeave buildQuery\n---------";
1168     }
1169     return (
1170         undef,              $query, $simple_query, $query_cgi,
1171         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1172         $stopwords_removed, $query_type
1173     );
1174 }
1175
1176 =head2 searchResults
1177
1178 Format results in a form suitable for passing to the template
1179
1180 =cut
1181
1182 # IMO this subroutine is pretty messy still -- it's responsible for
1183 # building the HTML output for the template
1184 sub searchResults {
1185     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1186     my $dbh = C4::Context->dbh;
1187     my $even = 1;
1188     my @newresults;
1189
1190     # add search-term highlighting via <span>s on the search terms
1191     my $span_terms_hashref;
1192     for my $span_term ( split( / /, $searchdesc ) ) {
1193         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1194         $span_terms_hashref->{$span_term}++;
1195     }
1196
1197     #Build branchnames hash
1198     #find branchname
1199     #get branch information.....
1200     my %branches;
1201     my $bsth =
1202       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1203       ;    # FIXME : use C4::Koha::GetBranches
1204     $bsth->execute();
1205     while ( my $bdata = $bsth->fetchrow_hashref ) {
1206         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1207     }
1208 # FIXME - We build an authorised values hash here, using the default framework
1209 # though it is possible to have different authvals for different fws.
1210
1211     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1212
1213     # get notforloan authorised value list (see $shelflocations  FIXME)
1214     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1215
1216     #Build itemtype hash
1217     #find itemtype & itemtype image
1218     my %itemtypes;
1219     $bsth =
1220       $dbh->prepare(
1221         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1222       );
1223     $bsth->execute();
1224     while ( my $bdata = $bsth->fetchrow_hashref ) {
1225                 foreach (qw(description imageurl summary notforloan)) {
1226                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1227                 }
1228     }
1229
1230     #search item field code
1231     my $sth =
1232       $dbh->prepare(
1233 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1234       );
1235     $sth->execute;
1236     my ($itemtag) = $sth->fetchrow;
1237
1238     ## find column names of items related to MARC
1239     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1240     $sth2->execute;
1241     my %subfieldstosearch;
1242     while ( ( my $column ) = $sth2->fetchrow ) {
1243         my ( $tagfield, $tagsubfield ) =
1244           &GetMarcFromKohaField( "items." . $column, "" );
1245         $subfieldstosearch{$column} = $tagsubfield;
1246     }
1247
1248     # handle which records to actually retrieve
1249     my $times;
1250     if ( $hits && $offset + $results_per_page <= $hits ) {
1251         $times = $offset + $results_per_page;
1252     }
1253     else {
1254         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1255     }
1256
1257     # loop through all of the records we've retrieved
1258     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1259         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1260         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1261         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1262         $oldbiblio->{result_number} = $i + 1;
1263
1264         # add imageurl to itemtype if there is one
1265         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1266
1267                 my $biblio_authorised_value_images = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{biblionumber} ) );
1268                 $oldbiblio->{authorised_value_images} = $biblio_authorised_value_images;
1269         my $aisbn = $oldbiblio->{'isbn'};
1270         $aisbn =~ /(\d*[X]*)/;
1271         $oldbiblio->{amazonisbn} = $1;
1272                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1273  # Build summary if there is one (the summary is defined in the itemtypes table)
1274  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1275         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1276             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1277             my @fields  = $marcrecord->fields();
1278             foreach my $field (@fields) {
1279                 my $tag      = $field->tag();
1280                 my $tagvalue = $field->as_string();
1281                 $summary =~
1282                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1283                 unless ( $tag < 10 ) {
1284                     my @subf = $field->subfields;
1285                     for my $i ( 0 .. $#subf ) {
1286                         my $subfieldcode  = $subf[$i][0];
1287                         my $subfieldvalue = $subf[$i][1];
1288                         my $tagsubf       = $tag . $subfieldcode;
1289                         $summary =~
1290 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1291                     }
1292                 }
1293             }
1294             # FIXME: yuk
1295             $summary =~ s/\[(.*?)]//g;
1296             $summary =~ s/\n/<br\/>/g;
1297             $oldbiblio->{summary} = $summary;
1298         }
1299
1300         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1301         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1302         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1303         # Add search-term highlighting to the whole record where they match using <span>s
1304         if (C4::Context->preference("OpacHighlightedWords")){
1305             my $searchhighlightblob;
1306             for my $highlight_field ( $marcrecord->fields ) {
1307
1308     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1309                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1310                 for my $subfield ($highlight_field->subfields()) {
1311                     my $match;
1312                     next if $subfield->[0] eq '9';
1313                     my $field = $subfield->[1];
1314                     for my $term ( keys %$span_terms_hashref ) {
1315                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1316                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1317                         $match++;
1318                         }
1319                     }
1320                     $searchhighlightblob .= $field . " ... " if $match;
1321                 }
1322
1323             }
1324             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1325             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1326         }
1327
1328         # Add search-term highlighting to the title, subtitle, etc. fields
1329         for my $term ( keys %$span_terms_hashref ) {
1330             my $old_term = $term;
1331             if ( length($term) > 3 ) {
1332                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1333                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1334                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1335                                 }
1336             }
1337         }
1338
1339         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1340
1341         # Pull out the items fields
1342         my @fields = $marcrecord->field($itemtag);
1343
1344         # Setting item statuses for display
1345         my @available_items_loop;
1346         my @onloan_items_loop;
1347         my @other_items_loop;
1348
1349         my $available_items;
1350         my $onloan_items;
1351         my $other_items;
1352
1353         my $ordered_count         = 0;
1354         my $available_count       = 0;
1355         my $onloan_count          = 0;
1356         my $longoverdue_count     = 0;
1357         my $other_count           = 0;
1358         my $wthdrawn_count        = 0;
1359         my $itemlost_count        = 0;
1360         my $itembinding_count     = 0;
1361         my $itemdamaged_count     = 0;
1362         my $item_in_transit_count = 0;
1363         my $can_place_holds       = 0;
1364         my $items_count           = scalar(@fields);
1365         my $items_counter;
1366         my $maxitems =
1367           ( C4::Context->preference('maxItemsinSearchResults') )
1368           ? C4::Context->preference('maxItemsinSearchResults') - 1
1369           : 1;
1370
1371         # loop through every item
1372         foreach my $field (@fields) {
1373             my $item;
1374             $items_counter++;
1375
1376             # populate the items hash
1377             foreach my $code ( keys %subfieldstosearch ) {
1378                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1379             }
1380                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1381                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1382             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1383             if ($item->{$hbranch}) {
1384                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1385             }
1386             elsif ($item->{$otherbranch}) {     # Last resort
1387                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1388             }
1389
1390                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1391 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1392             if ( $item->{onloan} ) {
1393                 $onloan_count++;
1394                                 my $key = $prefix . $item->{due_date};
1395                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1396                                 $onloan_items->{$key}->{count}++ if $item->{homebranch};
1397                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1398                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1399                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1400                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1401                 # if something's checked out and lost, mark it as 'long overdue'
1402                 if ( $item->{itemlost} ) {
1403                     $onloan_items->{$prefix}->{longoverdue}++;
1404                     $longoverdue_count++;
1405                 } else {        # can place holds as long as item isn't lost
1406                     $can_place_holds = 1;
1407                 }
1408             }
1409
1410          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1411             else {
1412
1413                 # item is on order
1414                 if ( $item->{notforloan} == -1 ) {
1415                     $ordered_count++;
1416                 }
1417
1418                 # is item in transit?
1419                 my $transfertwhen = '';
1420                 my ($transfertfrom, $transfertto);
1421
1422                 unless ($item->{wthdrawn}
1423                         || $item->{itemlost}
1424                         || $item->{damaged}
1425                         || $item->{notforloan}
1426                         || $items_count > 20) {
1427
1428                     # A couple heuristics to limit how many times
1429                     # we query the database for item transfer information, sacrificing
1430                     # accuracy in some cases for speed;
1431                     #
1432                     # 1. don't query if item has one of the other statuses
1433                     # 2. don't check transit status if the bib has
1434                     #    more than 20 items
1435                     #
1436                     # FIXME: to avoid having the query the database like this, and to make
1437                     #        the in transit status count as unavailable for search limiting,
1438                     #        should map transit status to record indexed in Zebra.
1439                     #
1440                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1441                 }
1442
1443                 # item is withdrawn, lost or damaged
1444                 if (   $item->{wthdrawn}
1445                     || $item->{itemlost}
1446                     || $item->{damaged}
1447                     || $item->{notforloan}
1448                     || ($transfertwhen ne ''))
1449                 {
1450                     $wthdrawn_count++        if $item->{wthdrawn};
1451                     $itemlost_count++        if $item->{itemlost};
1452                     $itemdamaged_count++     if $item->{damaged};
1453                     $item_in_transit_count++ if $transfertwhen ne '';
1454                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1455                     $other_count++;
1456
1457                                         my $key = $prefix . $item->{status};
1458                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1459                         $other_items->{$key}->{$_} = $item->{$_};
1460                                         }
1461                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1462                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1463                                         $other_items->{$key}->{count}++ if $item->{homebranch};
1464                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1465                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1466                 }
1467                 # item is available
1468                 else {
1469                     $can_place_holds = 1;
1470                     $available_count++;
1471                                         $available_items->{$prefix}->{count}++ if $item->{homebranch};
1472                                         foreach (qw(branchname itemcallnumber)) {
1473                         $available_items->{$prefix}->{$_} = $item->{$_};
1474                                         }
1475                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1476                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1477                 }
1478             }
1479         }    # notforloan, item level and biblioitem level
1480         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1481         $maxitems =
1482           ( C4::Context->preference('maxItemsinSearchResults') )
1483           ? C4::Context->preference('maxItemsinSearchResults') - 1
1484           : 1;
1485         for my $key ( sort keys %$onloan_items ) {
1486             (++$onloanitemscount > $maxitems) and last;
1487             push @onloan_items_loop, $onloan_items->{$key};
1488         }
1489         for my $key ( sort keys %$other_items ) {
1490             (++$otheritemscount > $maxitems) and last;
1491             push @other_items_loop, $other_items->{$key};
1492         }
1493         for my $key ( sort keys %$available_items ) {
1494             (++$availableitemscount > $maxitems) and last;
1495             push @available_items_loop, $available_items->{$key}
1496         }
1497
1498         # XSLT processing of some stuff
1499         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1500             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber},C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1501             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1502         }
1503
1504         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1505         $can_place_holds = 0
1506           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1507         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1508         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1509         $oldbiblio->{items_count}          = $items_count;
1510         $oldbiblio->{available_items_loop} = \@available_items_loop;
1511         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1512         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1513         $oldbiblio->{availablecount}       = $available_count;
1514         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1515         $oldbiblio->{onloancount}          = $onloan_count;
1516         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1517         $oldbiblio->{othercount}           = $other_count;
1518         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1519         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1520         $oldbiblio->{itemlostcount}        = $itemlost_count;
1521         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1522         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1523         $oldbiblio->{orderedcount}         = $ordered_count;
1524         $oldbiblio->{isbn} =~
1525           s/-//g;    # deleting - in isbn to enable amazon content
1526         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'} ) );
1527         push( @newresults, $oldbiblio );
1528     }
1529     return @newresults;
1530 }
1531
1532 #----------------------------------------------------------------------
1533 #
1534 # Non-Zebra GetRecords#
1535 #----------------------------------------------------------------------
1536
1537 =head2 NZgetRecords
1538
1539   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1540
1541 =cut
1542
1543 sub NZgetRecords {
1544     my (
1545         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1546         $results_per_page, $offset,       $expanded_facet, $branches,
1547         $query_type,       $scan
1548     ) = @_;
1549     warn "query =$query" if $DEBUG;
1550     my $result = NZanalyse($query);
1551     warn "results =$result" if $DEBUG;
1552     return ( undef,
1553         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1554         undef );
1555 }
1556
1557 =head2 NZanalyse
1558
1559   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1560   the list is built from an inverted index in the nozebra SQL table
1561   note that title is here only for convenience : the sorting will be very fast when requested on title
1562   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1563
1564 =cut
1565
1566 sub NZanalyse {
1567     my ( $string, $server ) = @_;
1568 #     warn "---------"       if $DEBUG;
1569     warn " NZanalyse" if $DEBUG;
1570 #     warn "---------"       if $DEBUG;
1571
1572  # $server contains biblioserver or authorities, depending on what we search on.
1573  #warn "querying : $string on $server";
1574     $server = 'biblioserver' unless $server;
1575
1576 # if we have a ", replace the content to discard temporarily any and/or/not inside
1577     my $commacontent;
1578     if ( $string =~ /"/ ) {
1579         $string =~ s/"(.*?)"/__X__/;
1580         $commacontent = $1;
1581         warn "commacontent : $commacontent" if $DEBUG;
1582     }
1583
1584 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1585 # then, call again NZanalyse with $left and $right
1586 # (recursive until we find a leaf (=> something without and/or/not)
1587 # delete repeated operator... Would then go in infinite loop
1588     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1589     }
1590
1591     #process parenthesis before.
1592     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1593         my $left     = $1;
1594         my $right    = $4;
1595         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1596         warn
1597 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1598           if $DEBUG;
1599         my $leftresult = NZanalyse( $left, $server );
1600         if ($operator) {
1601             my $rightresult = NZanalyse( $right, $server );
1602
1603             # OK, we have the results for right and left part of the query
1604             # depending of operand, intersect, union or exclude both lists
1605             # to get a result list
1606             if ( $operator eq ' and ' ) {
1607                 return NZoperatorAND($leftresult,$rightresult);
1608             }
1609             elsif ( $operator eq ' or ' ) {
1610
1611                 # just merge the 2 strings
1612                 return $leftresult . $rightresult;
1613             }
1614             elsif ( $operator eq ' not ' ) {
1615                 return NZoperatorNOT($leftresult,$rightresult);
1616             }
1617         }
1618         else {
1619 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1620             return $leftresult;
1621         }
1622     }
1623     warn "string :" . $string if $DEBUG;
1624     my $left = "";
1625     my $right = "";
1626     my $operator = "";
1627     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1628         $left     = $1;
1629         $right    = $3;
1630         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1631     }
1632     warn "no parenthesis. left : $left operator: $operator right: $right"
1633       if $DEBUG;
1634
1635     # it's not a leaf, we have a and/or/not
1636     if ($operator) {
1637
1638         # reintroduce comma content if needed
1639         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1640         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1641         warn "node : $left / $operator / $right\n" if $DEBUG;
1642         my $leftresult  = NZanalyse( $left,  $server );
1643         my $rightresult = NZanalyse( $right, $server );
1644         warn " leftresult : $leftresult" if $DEBUG;
1645         warn " rightresult : $rightresult" if $DEBUG;
1646         # OK, we have the results for right and left part of the query
1647         # depending of operand, intersect, union or exclude both lists
1648         # to get a result list
1649         if ( $operator eq ' and ' ) {
1650             warn "NZAND";
1651             return NZoperatorAND($leftresult,$rightresult);
1652         }
1653         elsif ( $operator eq ' or ' ) {
1654
1655             # just merge the 2 strings
1656             return $leftresult . $rightresult;
1657         }
1658         elsif ( $operator eq ' not ' ) {
1659             return NZoperatorNOT($leftresult,$rightresult);
1660         }
1661         else {
1662
1663 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1664             die "error : operand unknown : $operator for $string";
1665         }
1666
1667         # it's a leaf, do the real SQL query and return the result
1668     }
1669     else {
1670         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1671         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1672         #remove trailing blank at the beginning
1673         $string =~ s/^ //g;
1674         warn "leaf:$string" if $DEBUG;
1675
1676         # parse the string in in operator/operand/value again
1677         my $left = "";
1678         my $operator = "";
1679         my $right = "";
1680         if ($string =~ /(.*)(>=|<=)(.*)/) {
1681             $left     = $1;
1682             $operator = $2;
1683             $right    = $3;
1684         } else {
1685             $left = $string;
1686         }
1687 #         warn "handling leaf... left:$left operator:$operator right:$right"
1688 #           if $DEBUG;
1689         unless ($operator) {
1690             if ($string =~ /(.*)(>|<|=)(.*)/) {
1691                 $left     = $1;
1692                 $operator = $2;
1693                 $right    = $3;
1694                 warn
1695     "handling unless (operator)... left:$left operator:$operator right:$right"
1696                 if $DEBUG;
1697             } else {
1698                 $left = $string;
1699             }
1700         }
1701         my $results;
1702
1703 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1704         $left =~ s/ .*$//;
1705
1706         # automatic replace for short operators
1707         $left = 'title'            if $left =~ '^ti$';
1708         $left = 'author'           if $left =~ '^au$';
1709         $left = 'publisher'        if $left =~ '^pb$';
1710         $left = 'subject'          if $left =~ '^su$';
1711         $left = 'koha-Auth-Number' if $left =~ '^an$';
1712         $left = 'keyword'          if $left =~ '^kw$';
1713         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1714         if ( $operator && $left ne 'keyword' ) {
1715
1716             #do a specific search
1717             my $dbh = C4::Context->dbh;
1718             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1719             my $sth =
1720               $dbh->prepare(
1721 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1722               );
1723             warn "$left / $operator / $right\n" if $DEBUG;
1724
1725             # split each word, query the DB and build the biblionumbers result
1726             #sanitizing leftpart
1727             $left =~ s/^\s+|\s+$//;
1728             foreach ( split / /, $right ) {
1729                 my $biblionumbers;
1730                 $_ =~ s/^\s+|\s+$//;
1731                 next unless $_;
1732                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1733                 $sth->execute( $server, $left, $_ )
1734                   or warn "execute failed: $!";
1735                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1736
1737 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1738 # otherwise, fill the result
1739                     $biblionumbers .= $line
1740                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1741                     warn "result : $value "
1742                       . ( $right  =~ /\d/ ) . "=="
1743                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1744                 }
1745
1746 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1747                 if ($results) {
1748                     warn "NZAND" if $DEBUG;
1749                     $results = NZoperatorAND($biblionumbers,$results);
1750                 }
1751                 else {
1752                     $results = $biblionumbers;
1753                 }
1754             }
1755         }
1756         else {
1757
1758       #do a complete search (all indexes), if index='kw' do complete search too.
1759             my $dbh = C4::Context->dbh;
1760             my $sth =
1761               $dbh->prepare(
1762 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1763               );
1764
1765             # split each word, query the DB and build the biblionumbers result
1766             foreach ( split / /, $string ) {
1767                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1768                 warn "search on all indexes on $_" if $DEBUG;
1769                 my $biblionumbers;
1770                 next unless $_;
1771                 $sth->execute( $server, $_ );
1772                 while ( my $line = $sth->fetchrow ) {
1773                     $biblionumbers .= $line;
1774                 }
1775
1776 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1777                 if ($results) {
1778                     $results = NZoperatorAND($biblionumbers,$results);
1779                 }
1780                 else {
1781                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1782                     $results = $biblionumbers;
1783                 }
1784             }
1785         }
1786         warn "return : $results for LEAF : $string" if $DEBUG;
1787         return $results;
1788     }
1789     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1790 }
1791
1792 sub NZoperatorAND{
1793     my ($rightresult, $leftresult)=@_;
1794
1795     my @leftresult = split /;/, $leftresult;
1796     warn " @leftresult / $rightresult \n" if $DEBUG;
1797
1798     #             my @rightresult = split /;/,$leftresult;
1799     my $finalresult;
1800
1801 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1802 # the result is stored twice, to have the same weight for AND than OR.
1803 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1804 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1805     foreach (@leftresult) {
1806         my $value = $_;
1807         my $countvalue;
1808         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1809         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1810             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1811             $finalresult .=
1812                 "$value-$countvalue;$value-$countvalue;";
1813         }
1814     }
1815     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1816     return $finalresult;
1817 }
1818
1819 sub NZoperatorOR{
1820     my ($rightresult, $leftresult)=@_;
1821     return $rightresult.$leftresult;
1822 }
1823
1824 sub NZoperatorNOT{
1825     my ($leftresult, $rightresult)=@_;
1826
1827     my @leftresult = split /;/, $leftresult;
1828
1829     #             my @rightresult = split /;/,$leftresult;
1830     my $finalresult;
1831     foreach (@leftresult) {
1832         my $value=$_;
1833         $value=$1 if $value=~m/(.*)-\d+$/;
1834         unless ($rightresult =~ "$value-") {
1835             $finalresult .= "$_;";
1836         }
1837     }
1838     return $finalresult;
1839 }
1840
1841 =head2 NZorder
1842
1843   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1844
1845   TODO :: Description
1846
1847 =cut
1848
1849 sub NZorder {
1850     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1851     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1852
1853     # order title asc by default
1854     #     $ordering = '1=36 <i' unless $ordering;
1855     $results_per_page = 20 unless $results_per_page;
1856     $offset           = 0  unless $offset;
1857     my $dbh = C4::Context->dbh;
1858
1859     #
1860     # order by POPULARITY
1861     #
1862     if ( $ordering =~ /popularity/ ) {
1863         my %result;
1864         my %popularity;
1865
1866         # popularity is not in MARC record, it's builded from a specific query
1867         my $sth =
1868           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1869         foreach ( split /;/, $biblionumbers ) {
1870             my ( $biblionumber, $title ) = split /,/, $_;
1871             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1872             $sth->execute($biblionumber);
1873             my $popularity = $sth->fetchrow || 0;
1874
1875 # hint : the key is popularity.title because we can have
1876 # many results with the same popularity. In this cas, sub-ordering is done by title
1877 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1878 # (un-frequent, I agree, but we won't forget anything that way ;-)
1879             $popularity{ sprintf( "%10d", $popularity ) . $title
1880                   . $biblionumber } = $biblionumber;
1881         }
1882
1883     # sort the hash and return the same structure as GetRecords (Zebra querying)
1884         my $result_hash;
1885         my $numbers = 0;
1886         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1887             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1888                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1889                   $result{ $popularity{$key} }->as_usmarc();
1890             }
1891         }
1892         else {                                    # sort popularity ASC
1893             foreach my $key ( sort ( keys %popularity ) ) {
1894                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1895                   $result{ $popularity{$key} }->as_usmarc();
1896             }
1897         }
1898         my $finalresult = ();
1899         $result_hash->{'hits'}         = $numbers;
1900         $finalresult->{'biblioserver'} = $result_hash;
1901         return $finalresult;
1902
1903         #
1904         # ORDER BY author
1905         #
1906     }
1907     elsif ( $ordering =~ /author/ ) {
1908         my %result;
1909         foreach ( split /;/, $biblionumbers ) {
1910             my ( $biblionumber, $title ) = split /,/, $_;
1911             my $record = GetMarcBiblio($biblionumber);
1912             my $author;
1913             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1914                 $author = $record->subfield( '200', 'f' );
1915                 $author = $record->subfield( '700', 'a' ) unless $author;
1916             }
1917             else {
1918                 $author = $record->subfield( '100', 'a' );
1919             }
1920
1921 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1922 # and we don't want to get only 1 result for each of them !!!
1923             $result{ $author . $biblionumber } = $record;
1924         }
1925
1926     # sort the hash and return the same structure as GetRecords (Zebra querying)
1927         my $result_hash;
1928         my $numbers = 0;
1929         if ( $ordering eq 'author_za' ) {    # sort by author desc
1930             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1931                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1932                   $result{$key}->as_usmarc();
1933             }
1934         }
1935         else {                               # sort by author ASC
1936             foreach my $key ( sort ( keys %result ) ) {
1937                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1938                   $result{$key}->as_usmarc();
1939             }
1940         }
1941         my $finalresult = ();
1942         $result_hash->{'hits'}         = $numbers;
1943         $finalresult->{'biblioserver'} = $result_hash;
1944         return $finalresult;
1945
1946         #
1947         # ORDER BY callnumber
1948         #
1949     }
1950     elsif ( $ordering =~ /callnumber/ ) {
1951         my %result;
1952         foreach ( split /;/, $biblionumbers ) {
1953             my ( $biblionumber, $title ) = split /,/, $_;
1954             my $record = GetMarcBiblio($biblionumber);
1955             my $callnumber;
1956             my ( $callnumber_tag, $callnumber_subfield ) =
1957               GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1958             ( $callnumber_tag, $callnumber_subfield ) =
1959               GetMarcFromKohaField('biblioitems.callnumber')
1960               unless $callnumber_tag;
1961             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1962                 $callnumber = $record->subfield( '200', 'f' );
1963             }
1964             else {
1965                 $callnumber = $record->subfield( '100', 'a' );
1966             }
1967
1968 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1969 # and we don't want to get only 1 result for each of them !!!
1970             $result{ $callnumber . $biblionumber } = $record;
1971         }
1972
1973     # sort the hash and return the same structure as GetRecords (Zebra querying)
1974         my $result_hash;
1975         my $numbers = 0;
1976         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1977             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1978                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1979                   $result{$key}->as_usmarc();
1980             }
1981         }
1982         else {                                     # sort by title ASC
1983             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1984                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1985                   $result{$key}->as_usmarc();
1986             }
1987         }
1988         my $finalresult = ();
1989         $result_hash->{'hits'}         = $numbers;
1990         $finalresult->{'biblioserver'} = $result_hash;
1991         return $finalresult;
1992     }
1993     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1994         my %result;
1995         foreach ( split /;/, $biblionumbers ) {
1996             my ( $biblionumber, $title ) = split /,/, $_;
1997             my $record = GetMarcBiblio($biblionumber);
1998             my ( $publicationyear_tag, $publicationyear_subfield ) =
1999               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2000             my $publicationyear =
2001               $record->subfield( $publicationyear_tag,
2002                 $publicationyear_subfield );
2003
2004 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2005 # and we don't want to get only 1 result for each of them !!!
2006             $result{ $publicationyear . $biblionumber } = $record;
2007         }
2008
2009     # sort the hash and return the same structure as GetRecords (Zebra querying)
2010         my $result_hash;
2011         my $numbers = 0;
2012         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2013             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2014                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2015                   $result{$key}->as_usmarc();
2016             }
2017         }
2018         else {                                 # sort by pub year ASC
2019             foreach my $key ( sort ( keys %result ) ) {
2020                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2021                   $result{$key}->as_usmarc();
2022             }
2023         }
2024         my $finalresult = ();
2025         $result_hash->{'hits'}         = $numbers;
2026         $finalresult->{'biblioserver'} = $result_hash;
2027         return $finalresult;
2028
2029         #
2030         # ORDER BY title
2031         #
2032     }
2033     elsif ( $ordering =~ /title/ ) {
2034
2035 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2036         my %result;
2037         foreach ( split /;/, $biblionumbers ) {
2038             my ( $biblionumber, $title ) = split /,/, $_;
2039
2040 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2041 # and we don't want to get only 1 result for each of them !!!
2042 # hint & speed improvement : we can order without reading the record
2043 # so order, and read records only for the requested page !
2044             $result{ $title . $biblionumber } = $biblionumber;
2045         }
2046
2047     # sort the hash and return the same structure as GetRecords (Zebra querying)
2048         my $result_hash;
2049         my $numbers = 0;
2050         if ( $ordering eq 'title_az' ) {    # sort by title desc
2051             foreach my $key ( sort ( keys %result ) ) {
2052                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2053             }
2054         }
2055         else {                              # sort by title ASC
2056             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2057                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2058             }
2059         }
2060
2061         # limit the $results_per_page to result size if it's more
2062         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2063
2064         # for the requested page, replace biblionumber by the complete record
2065         # speed improvement : avoid reading too much things
2066         for (
2067             my $counter = $offset ;
2068             $counter <= $offset + $results_per_page ;
2069             $counter++
2070           )
2071         {
2072             $result_hash->{'RECORDS'}[$counter] =
2073               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2074         }
2075         my $finalresult = ();
2076         $result_hash->{'hits'}         = $numbers;
2077         $finalresult->{'biblioserver'} = $result_hash;
2078         return $finalresult;
2079     }
2080     else {
2081
2082 #
2083 # order by ranking
2084 #
2085 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2086         my %result;
2087         my %count_ranking;
2088         foreach ( split /;/, $biblionumbers ) {
2089             my ( $biblionumber, $title ) = split /,/, $_;
2090             $title =~ /(.*)-(\d)/;
2091
2092             # get weight
2093             my $ranking = $2;
2094
2095 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2096 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2097 # biblio N has ranking = 6
2098             $count_ranking{$biblionumber} += $ranking;
2099         }
2100
2101 # build the result by "inverting" the count_ranking hash
2102 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2103 #         warn "counting";
2104         foreach ( keys %count_ranking ) {
2105             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2106         }
2107
2108     # sort the hash and return the same structure as GetRecords (Zebra querying)
2109         my $result_hash;
2110         my $numbers = 0;
2111         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2112             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2113         }
2114
2115         # limit the $results_per_page to result size if it's more
2116         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2117
2118         # for the requested page, replace biblionumber by the complete record
2119         # speed improvement : avoid reading too much things
2120         for (
2121             my $counter = $offset ;
2122             $counter <= $offset + $results_per_page ;
2123             $counter++
2124           )
2125         {
2126             $result_hash->{'RECORDS'}[$counter] =
2127               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2128               if $result_hash->{'RECORDS'}[$counter];
2129         }
2130         my $finalresult = ();
2131         $result_hash->{'hits'}         = $numbers;
2132         $finalresult->{'biblioserver'} = $result_hash;
2133         return $finalresult;
2134     }
2135 }
2136
2137 =head2 ModBiblios
2138
2139 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2140
2141 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2142 test parameter if set donot perform change to records in database.
2143
2144 =over 2
2145
2146 =item C<input arg:>
2147
2148     * $listbiblios is an array ref to marcrecords to be changed
2149     * $tagsubfield is the reference of the subfield to change.
2150     * $initvalue is the value to search the record for
2151     * $targetvalue is the value to set the subfield to
2152     * $test is to be set only not to perform changes in database.
2153
2154 =item C<Output arg:>
2155     * $countchanged counts all the changes performed.
2156     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2157
2158 =item C<usage in the script:>
2159
2160 =back
2161
2162 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2163 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2164 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2165
2166 =cut
2167
2168 sub ModBiblios {
2169     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2170     my $countmatched;
2171     my @unmatched;
2172     my ( $tag, $subfield ) = ( $1, $2 )
2173       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2174     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2175         $tag = $tag . $subfield;
2176         undef $subfield;
2177     }
2178     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber');
2179     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2180     if ($tag eq $itemtag) {
2181         # do not allow the embedded item tag to be
2182         # edited from here
2183         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2184         return (0, []);
2185     }
2186     foreach my $usmarc (@$listbiblios) {
2187         my $record;
2188         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2189         my $biblionumber;
2190         if ($@) {
2191
2192             # usmarc is not a valid usmarc May be a biblionumber
2193             # FIXME - sorry, please let's figure out whether
2194             #         this function is to be passed a list of
2195             #         record numbers or a list of MARC::Record
2196             #         objects.  The former is probably better
2197             #         because the MARC records supplied by Zebra
2198             #         may be not current.
2199             $record       = GetMarcBiblio($usmarc);
2200             $biblionumber = $usmarc;
2201         }
2202         else {
2203             if ( $bntag >= 010 ) {
2204                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2205             }
2206             else {
2207                 $biblionumber = $record->field($bntag)->data;
2208             }
2209         }
2210
2211         #GetBiblionumber is to be written.
2212         #Could be replaced by TransformMarcToKoha (But Would be longer)
2213         if ( $record->field($tag) ) {
2214             my $modify = 0;
2215             foreach my $field ( $record->field($tag) ) {
2216                 if ($subfield) {
2217                     if (
2218                         $field->delete_subfield(
2219                             'code'  => $subfield,
2220                             'match' => qr($initvalue)
2221                         )
2222                       )
2223                     {
2224                         $countmatched++;
2225                         $modify = 1;
2226                         $field->update( $subfield, $targetvalue )
2227                           if ($targetvalue);
2228                     }
2229                 }
2230                 else {
2231                     if ( $tag >= 010 ) {
2232                         if ( $field->delete_field($field) ) {
2233                             $countmatched++;
2234                             $modify = 1;
2235                         }
2236                     }
2237                     else {
2238                         $field->data = $targetvalue
2239                           if ( $field->data =~ qr($initvalue) );
2240                     }
2241                 }
2242             }
2243
2244             #       warn $record->as_formatted;
2245             if ($modify) {
2246                 ModBiblio( $record, $biblionumber,
2247                     GetFrameworkCode($biblionumber) )
2248                   unless ($test);
2249             }
2250             else {
2251                 push @unmatched, $biblionumber;
2252             }
2253         }
2254         else {
2255             push @unmatched, $biblionumber;
2256         }
2257     }
2258     return ( $countmatched, \@unmatched );
2259 }
2260
2261 END { }    # module clean-up code here (global destructor)
2262
2263 1;
2264 __END__
2265
2266 =head1 AUTHOR
2267
2268 Koha Developement team <info@koha.org>
2269
2270 =cut