Bug 18811: DBRev 17.05.00.006
[koha.git] / Koha / MetaSearcher.pm
blob65f72398a7b1b68a1409ef75a8d28676a7e08c45
1 package Koha::MetaSearcher;
3 # Copyright 2014 ByWater Solutions
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use Modern::Perl;
22 use base 'Class::Accessor';
24 use C4::Charset qw( MarcToUTF8Record );
25 use C4::Search qw(); # Purely for new_record_from_zebra
26 use DBIx::Class::ResultClass::HashRefInflator;
27 use IO::Select;
28 use Koha::Caches;
29 use Koha::Database;
30 use Koha::MetadataRecord;
31 use MARC::File::XML;
32 use Storable qw( store_fd fd_retrieve );
33 use Time::HiRes qw( clock_gettime CLOCK_MONOTONIC );
34 use UUID;
35 use ZOOM;
37 use sort 'stable';
39 __PACKAGE__->mk_accessors( qw( fetch offset on_error resultset ) );
41 sub new {
42 my ( $class, $options ) = @_;
44 my ( $uuid, $uuidstring );
45 UUID::generate($uuid);
46 UUID::unparse( $uuid, $uuidstring );
48 return bless {
49 offset => 0,
50 fetch => 100,
51 on_error => sub {},
52 results => [],
53 resultset => $uuidstring,
54 %{ $options || {} }
55 }, $class;
58 sub handle_hit {
59 my ( $self, $index, $server, $marcrecord ) = @_;
61 my $record = Koha::MetadataRecord->new( { schema => 'marc', record => $marcrecord } );
63 my %fetch = (
64 title => 'biblio.title',
65 seriestitle => 'biblio.seriestitle',
66 author => 'biblio.author',
67 isbn =>'biblioitems.isbn',
68 issn =>'biblioitems.issn',
69 lccn =>'biblioitems.lccn', #LC control number (not call number)
70 edition =>'biblioitems.editionstatement',
71 date => 'biblio.copyrightdate', #MARC21
72 date2 => 'biblioitems.publicationyear', #UNIMARC
75 my $metadata = {};
76 while ( my ( $key, $kohafield ) = each %fetch ) {
77 $metadata->{$key} = $record->getKohaField($kohafield);
79 $metadata->{date} //= $metadata->{date2};
81 push @{ $self->{results} }, {
82 server => $server,
83 index => $index,
84 record => $marcrecord,
85 metadata => $metadata,
89 sub search {
90 my ( $self, $server_ids, $query ) = @_;
92 my $resultset_expiry = 300;
94 my $cache = Koha::Caches->get_instance();
95 my $schema = Koha::Database->new->schema;
96 my $stats = {
97 num_fetched => {
98 map { $_ => 0 } @$server_ids
100 num_hits => {
101 map { $_ => 0 } @$server_ids
103 total_fetched => 0,
104 total_hits => 0,
106 my $start = clock_gettime( CLOCK_MONOTONIC );
107 my $select = IO::Select->new;
109 my @cached_sets;
110 my @servers;
112 foreach my $server_id ( @$server_ids ) {
113 if ( $server_id =~ /^\d+$/ ) {
114 # Z39.50 server
115 my $server = $schema->resultset('Z3950server')->find(
116 { id => $server_id },
117 { result_class => 'DBIx::Class::ResultClass::HashRefInflator' },
119 $server->{type} = 'z3950';
121 push @servers, $server;
122 } elsif ( $server_id =~ /(\w+)(?::(\w+))?/ ) {
123 # Special server
124 push @servers, {
125 type => $1,
126 extra => $2,
127 id => $server_id,
128 host => $server_id,
129 name => $server_id,
134 # HashRefInflator is used so that the information will survive into the fork
135 foreach my $server ( @servers ) {
136 if ( $cache ) {
137 my $set = $cache->get_from_cache( 'z3950-resultset-' . $self->resultset . '-' . $server->{id} );
138 if ( ref($set) eq 'HASH' ) {
139 $set->{server} = $server;
140 push @cached_sets, $set;
141 next;
145 $select->add( $self->_start_worker( $server, $query ) );
148 # Handle these while the servers are searching
149 foreach my $set ( @cached_sets ) {
150 $self->_handle_hits( $stats, $set );
153 while ( $select->count ) {
154 foreach my $readfh ( $select->can_read() ) {
155 my $result = fd_retrieve( $readfh );
157 $select->remove( $readfh );
158 close $readfh;
159 wait;
161 next if ( ref $result ne 'HASH' );
163 if ( $result->{error} ) {
164 $self->{on_error}->( $result->{server}, $result->{error} );
165 next;
168 $self->_handle_hits( $stats, $result );
170 if ( $cache ) {
171 $cache->set_in_cache( 'z3950-resultset-' . $self->resultset . '-' . $result->{server}->{id}, {
172 hits => $result->{hits},
173 num_fetched => $result->{num_fetched},
174 num_hits => $result->{num_hits},
175 }, { expiry => $resultset_expiry } );
180 $stats->{time} = clock_gettime( CLOCK_MONOTONIC ) - $start;
182 return $stats;
185 sub _start_worker {
186 my ( $self, $server, $query ) = @_;
187 pipe my $readfh, my $writefh;
189 # Accessing the cache or Koha database after the fork is risky, so get any resources we need
190 # here.
191 my $pid;
192 my $marcflavour = C4::Context->preference('marcflavour');
194 if ( ( $pid = fork ) ) {
195 # Parent process
196 close $writefh;
198 return $readfh;
199 } elsif ( !defined $pid ) {
200 # Error
202 $self->{on_error}->( $server, 'Failed to fork' );
203 return;
206 close $readfh;
207 my $connection;
208 my ( $num_hits, $num_fetched, $hits, $results );
210 eval {
211 if ( $server->{type} eq 'z3950' ) {
212 my $zoptions = ZOOM::Options->new();
213 $zoptions->option( 'elementSetName', 'F' );
214 $zoptions->option( 'databaseName', $server->{db} );
215 $zoptions->option( 'user', $server->{userid} ) if $server->{userid};
216 $zoptions->option( 'password', $server->{password} ) if $server->{password};
217 $zoptions->option( 'preferredRecordSyntax', $server->{syntax} );
218 $zoptions->option( 'timeout', $server->{timeout} ) if $server->{timeout};
220 $connection = ZOOM::Connection->create($zoptions);
222 $connection->connect( $server->{host}, $server->{port} );
223 $results = $connection->search_pqf( $query ); # Starts the search
224 } elsif ( $server->{type} eq 'koha' ) {
225 $connection = C4::Context->Zconn( $server->{extra} );
226 $results = $connection->search_pqf( $query ); # Starts the search
227 } elsif ( $server->{type} eq 'batch' ) {
228 $server->{encoding} = 'utf-8';
231 if ($@) {
232 store_fd {
233 error => $connection ? $connection->exception() : $@,
234 server => $server,
235 }, $writefh;
236 exit;
239 if ( $server->{type} eq 'batch' ) {
240 # TODO: actually handle PQF
241 $query =~ s/@\w+ (?:\d+=\d+ )?//g;
242 $query =~ s/"//g;
244 my $schema = Koha::Database->new->schema;
245 $schema->storage->debug(1);
246 my $match_condition = [ map +{ -like => '%' . $_ . '%' }, split( /\s+/, $query ) ];
247 $hits = [ $schema->resultset('ImportRecord')->search(
249 import_batch_id => $server->{extra},
250 -or => [
251 { 'import_biblios.title' => $match_condition },
252 { 'import_biblios.author' => $match_condition },
253 { 'import_biblios.isbn' => $match_condition },
254 { 'import_biblios.issn' => $match_condition },
258 join => [ qw( import_biblios ) ],
259 rows => $self->{fetch},
261 )->get_column( 'marc' )->all ];
263 $num_hits = $num_fetched = scalar @$hits;
264 } else {
265 $num_hits = $results->size;
266 $num_fetched = ( $self->{offset} + $self->{fetch} ) < $num_hits ? $self->{fetch} : $num_hits;
268 $hits = [ map { $_->raw() } @{ $results->records( $self->{offset}, $num_fetched, 1 ) } ];
271 if ( !@$hits && $connection && $connection->exception() ) {
272 store_fd {
273 error => $connection->exception(),
274 server => $server,
275 }, $writefh;
276 exit;
279 if ( $server->{type} eq 'koha' ) {
280 $hits = [ map { C4::Search::new_record_from_zebra( $server->{extra}, $_ ) } @$hits ];
281 } else {
282 $hits = [ map { $self->_import_record( $_, $marcflavour, $server->{encoding} ? $server->{encoding} : "iso-5426" ) } @$hits ];
285 store_fd {
286 hits => $hits,
287 num_fetched => $num_fetched,
288 num_hits => $num_hits,
289 server => $server,
290 }, $writefh;
292 exit;
295 sub _import_record {
296 my ( $self, $raw, $marcflavour, $encoding ) = @_;
298 my ( $marcrecord ) = MarcToUTF8Record( $raw, $marcflavour, $encoding ); #ignores charset return values
300 return $marcrecord;
303 sub _handle_hits {
304 my ( $self, $stats, $set ) = @_;
306 my $server = $set->{server};
308 my $num_hits = $stats->{num_hits}->{ $server->{id} } = $set->{num_hits};
309 my $num_fetched = $stats->{num_fetched}->{ $server->{id} } = $set->{num_fetched};
311 $stats->{total_hits} += $num_hits;
312 $stats->{total_fetched} += $num_fetched;
314 foreach my $j ( 0..$#{ $set->{hits} } ) {
315 $self->handle_hit( $self->{offset} + $j, $server, $set->{hits}->[$j] );
319 sub sort {
320 my ( $self, $key, $direction ) = @_;
322 my $empty_flip = -1; # Determines the flip of ordering for records with empty sort keys.
324 foreach my $hit ( @{ $self->{results} } ) {
325 ( $hit->{sort_key} = $hit->{metadata}->{$key} || '' ) =~ s/\W//g;
328 $self->{results} = [ sort {
329 # Sort empty records at the end
330 return -$empty_flip unless $a->{sort_key};
331 return $empty_flip unless $b->{sort_key};
333 $direction * ( $a->{sort_key} cmp $b->{sort_key} );
334 } @{ $self->{results} } ];
337 sub results {
338 my ( $self, $offset, $length ) = @_;
340 my @subset;
342 foreach my $i ( $offset..( $offset + $length - 1 ) ) {
343 push @subset, $self->{results}->[$i] if $self->{results}->[$i];
346 return @subset;