Bug 16699: Remove requirement from borrowernumberQueryParam
[koha.git] / Koha / ElasticSearch / Indexer.pm
blob258c6c50f4095ddd90ef1def41b7b8e5c5d68ee5
1 package Koha::ElasticSearch::Indexer;
3 # Copyright 2013 Catalyst IT
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 3 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 use Carp;
21 use Modern::Perl;
22 use base qw(Koha::ElasticSearch);
23 use Data::Dumper;
25 # For now just marc, but we can do anything here really
26 use Catmandu::Importer::MARC;
27 use Catmandu::Store::ElasticSearch;
29 Koha::ElasticSearch::Indexer->mk_accessors(qw( store ));
31 =head1 NAME
33 Koha::ElasticSearch::Indexer - handles adding new records to the index
35 =head1 SYNOPSIS
37 my $indexer = Koha::ElasticSearch::Indexer->new(
38 { index => Koha::SearchEngine::BIBLIOS_INDEX } );
39 $indexer->drop_index();
40 $indexer->update_index(\@biblionumbers, \@records);
42 =head1 FUNCTIONS
44 =head2 $indexer->update_index($biblionums, $records);
46 C<$biblionums> is an arrayref containing the biblionumbers for the records.
48 C<$records> is an arrayref containing the L<MARC::Record>s themselves.
50 The values in the arrays must match up, and the 999$c value in the MARC record
51 will be rewritten using the values in C<$biblionums> to ensure they are correct.
52 If C<$biblionums> is C<undef>, this won't happen, but you should be sure that
53 999$c is correct on your own then.
55 Note that this will modify the original record if C<$biblionums> is supplied.
56 If that's a problem, clone them first.
58 =cut
60 sub update_index {
61 my ($self, $biblionums, $records) = @_;
63 # TODO should have a separate path for dealing with a large number
64 # of records at once where we use the bulk update functions in ES.
65 if ($biblionums) {
66 $self->_sanitise_records($biblionums, $records);
69 my $from = $self->_convert_marc_to_json($records);
70 if ( !$self->store ) {
71 my $params = $self->get_elasticsearch_params();
72 $self->store(
73 Catmandu::Store::ElasticSearch->new(
74 %$params,
75 index_settings => $self->get_elasticsearch_settings(),
76 index_mappings => $self->get_elasticsearch_mappings(),
80 $self->store->bag->add_many($from);
81 $self->store->bag->commit;
82 return 1;
85 =head2 $indexer->update_index_background($biblionums, $records)
87 This has exactly the same API as C<update_index_background> however it'll
88 return immediately. It'll start a background process that does the adding.
90 If it fails to add to Elasticsearch then it'll add to a queue that will cause
91 it to be updated by a regular index cron job in the future.
93 # TODO implement in the future - I don't know the best way of doing this yet.
94 # If fork: make sure process group is changed so apache doesn't wait for us.
96 =cut
98 sub update_index_background {
99 my $self = shift;
100 $self->update_index(@_);
103 =head2 $indexer->delete_index($biblionums)
105 C<$biblionums> is an arrayref of biblionumbers to delete from the index.
107 =cut
109 sub delete_index {
110 my ($self, $biblionums) = @_;
112 if ( !$self->store ) {
113 my $params = $self->get_elasticsearch_params();
114 $self->store(
115 Catmandu::Store::ElasticSearch->new(
116 %$params,
117 index_settings => $self->get_elasticsearch_settings(),
118 index_mappings => $self->get_elasticsearch_mappings(),
122 $self->store->bag->delete($_) foreach @$biblionums;
123 $self->store->bag->commit;
126 =head2 $indexer->delete_index_background($biblionums)
128 Identical to L<delete_index>, this will return immediately and start a
129 background process to do the actual deleting.
131 =cut
133 # TODO implement in the future
135 sub delete_index_background {
136 my $self = shift;
137 $self->delete_index(@_);
140 =head2 $indexer->drop_index();
142 Drops the index from the elasticsearch server. Calling C<update_index>
143 after this will recreate it again.
145 =cut
147 sub drop_index {
148 my ($self) = @_;
150 if (!$self->store) {
151 # If this index doesn't exist, this will create it. Then it'll be
152 # deleted. That's not the end of the world however.
153 my $params = $self->get_elasticsearch_params();
154 $self->store(
155 Catmandu::Store::ElasticSearch->new(
156 %$params,
157 index_settings => $self->get_elasticsearch_settings(),
158 index_mappings => $self->get_elasticsearch_mappings(),
162 $self->store->drop();
163 $self->store(undef);
166 sub _sanitise_records {
167 my ($self, $biblionums, $records) = @_;
169 confess "Unequal number of values in \$biblionums and \$records." if (@$biblionums != @$records);
171 my $c = @$biblionums;
172 for (my $i=0; $i<$c; $i++) {
173 my $bibnum = $biblionums->[$i];
174 my $rec = $records->[$i];
175 # I've seen things you people wouldn't believe. Attack ships on fire
176 # off the shoulder of Orion. I watched C-beams glitter in the dark near
177 # the Tannhauser gate. MARC records where 999$c doesn't match the
178 # biblionumber column. All those moments will be lost in time... like
179 # tears in rain...
180 $rec->delete_fields($rec->field('999'));
181 $rec->append_fields(MARC::Field->new('999','','','c' => $bibnum, 'd' => $bibnum));
185 sub _convert_marc_to_json {
186 my $self = shift;
187 my $records = shift;
188 my $importer =
189 Catmandu::Importer::MARC->new( records => $records, id => '999c' );
190 my $fixer = Catmandu::Fix->new( fixes => $self->get_fixer_rules() );
191 $importer = $fixer->fix($importer);
192 return $importer;
197 __END__
199 =head1 AUTHOR
201 =over 4
203 =item Chris Cormack C<< <chrisc@catalyst.net.nz> >>
205 =item Robin Sheat C<< <robin@catalyst.net.nz> >>
207 =back