Bug 17196: Fix build_oai_sets.pl
[koha.git] / misc / search_tools / rebuild_elastic_search.pl
blob6faa32c63e70a471063b8e707a56f8c00b6ffa8d
1 #!/usr/bin/perl
3 # This inserts records from a Koha database into elastic search
5 # Copyright 2014 Catalyst IT
7 # This file is part of Koha.
9 # Koha is free software; you can redistribute it and/or modify it under the
10 # terms of the GNU General Public License as published by the Free Software
11 # Foundation; either version 3 of the License, or (at your option) any later
12 # version.
14 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with Koha; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 =head1 NAME
24 rebuild_elastic_search.pl - inserts records from a Koha database into Elasticsearch
26 =head1 SYNOPSIS
28 B<rebuild_elastic_search.pl>
29 [B<-c|--commit>=C<count>]
30 [B<-v|--verbose>]
31 [B<-h|--help>]
32 [B<--man>]
34 =head1 DESCRIPTION
36 Inserts records from a Koha database into Elasticsearch.
38 =head1 OPTIONS
40 =over
42 =item B<-c|--commit>=C<count>
44 Specify how many records will be batched up before they're added to Elasticsearch.
45 Higher should be faster, but will cause more RAM usage. Default is 5000.
47 =item B<-d|--delete>
49 Delete the index and recreate it before indexing.
51 =item B<-a|--authorities>
53 Index the authorities only. Combining this with B<-b> is the same as
54 specifying neither and so both get indexed.
56 =item B<-b|--biblios>
58 Index the biblios only. Combining this with B<-a> is the same as
59 specifying neither and so both get indexed.
61 =item B<-bn|--bnumber>
63 Only index the supplied biblionumber, mostly for testing purposes. May be
64 repeated. This also applies to authorities via authid, so if you're using it,
65 you probably only want to do one or the other at a time.
67 =item B<-v|--verbose>
69 By default, this program only emits warnings and errors. This makes it talk
70 more. Add more to make it even more wordy, in particular when debugging.
72 =item B<-h|--help>
74 Help!
76 =item B<--man>
78 Full documentation.
80 =back
82 =cut
84 use autodie;
85 use Getopt::Long;
86 use C4::Context;
87 use Koha::MetadataRecord::Authority;
88 use Koha::BiblioUtils;
89 use Koha::SearchEngine::Elasticsearch::Indexer;
90 use MARC::Field;
91 use MARC::Record;
92 use Modern::Perl;
93 use Pod::Usage;
95 use Data::Dumper; # TODO remove
97 my $verbose = 0;
98 my $commit = 5000;
99 my ($delete, $help, $man);
100 my ($index_biblios, $index_authorities);
101 my (@biblionumbers);
103 GetOptions(
104 'c|commit=i' => \$commit,
105 'd|delete' => \$delete,
106 'a|authorities' => \$index_authorities,
107 'b|biblios' => \$index_biblios,
108 'bn|bnumber=i' => \@biblionumbers,
109 'v|verbose+' => \$verbose,
110 'h|help' => \$help,
111 'man' => \$man,
114 # Default is to do both
115 unless ($index_authorities || $index_biblios) {
116 $index_authorities = $index_biblios = 1;
119 pod2usage(1) if $help;
120 pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
122 sanity_check();
124 my $next;
125 if ($index_biblios) {
126 _log(1, "Indexing biblios\n");
127 if (@biblionumbers) {
128 $next = sub {
129 my $r = shift @biblionumbers;
130 return () unless defined $r;
131 return ($r, Koha::BiblioUtils->get_from_biblionumber($r, item_data => 1 ));
133 } else {
134 my $records = Koha::BiblioUtils->get_all_biblios_iterator();
135 $next = sub {
136 $records->next();
139 do_reindex($next, $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX);
141 if ($index_authorities) {
142 _log(1, "Indexing authorities\n");
143 if (@biblionumbers) {
144 $next = sub {
145 my $r = shift @biblionumbers;
146 return () unless defined $r;
147 my $a = Koha::MetadataRecord::Authority->get_from_authid($r);
148 return ($r, $a->record);
150 } else {
151 my $records = Koha::MetadataRecord::Authority->get_all_authorities_iterator();
152 $next = sub {
153 $records->next();
156 do_reindex($next, $Koha::SearchEngine::Elasticsearch::AUTHORITIES_INDEX);
159 sub do_reindex {
160 my ( $next, $index_name ) = @_;
162 my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new( { index => $index_name } );
163 if ($delete) {
165 # We know it's safe to not recreate the indexer because update_index
166 # hasn't been called yet.
167 $indexer->drop_index();
170 my $count = 0;
171 my $commit_count = $commit;
172 my ( @id_buffer, @commit_buffer );
173 while ( my $record = $next->() ) {
174 my $id = $record->id;
175 my $record = $record->record;
176 _log( 1, "$id\n" );
177 $count++;
179 push @id_buffer, $id;
180 push @commit_buffer, $record;
181 if ( !( --$commit_count ) ) {
182 _log( 2, "Committing...\n" );
183 $indexer->update_index( \@id_buffer, \@commit_buffer );
184 $commit_count = $commit;
185 @id_buffer = ();
186 @commit_buffer = ();
190 # There are probably uncommitted records
191 $indexer->update_index( \@id_buffer, \@commit_buffer );
192 _log( 1, "$count records indexed.\n" );
195 # Checks some basic stuff to ensure that it's sane before we start.
196 sub sanity_check {
197 # Do we have an elasticsearch block defined?
198 my $conf = C4::Context->config('elasticsearch');
199 die "No 'elasticsearch' block is defined in koha-conf.xml.\n" if ( !$conf );
202 # Output progress information.
204 # _log($level, $msg);
206 # Will output $msg if the verbosity setting is set to $level or more. Will
207 # not include a trailing newline.
208 sub _log {
209 my ($level, $msg) = @_;
211 print $msg if ($verbose >= $level);