3 # This compares record counts from a Koha database to Elasticsearch
5 # Copyright 2019 ByWater Solutions
7 # This file is part of Koha.
9 # Koha is free software; you can redistribute it and/or modify it
10 # under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3 of the License, or
12 # (at your option) any later version.
14 # Koha is distributed in the hope that it will be useful, but
15 # WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 compare_es_to_db.pl - compares record counts from a Koha database to Elasticsearch
28 B<compare_es_to_db.pl>
33 use Array
::Utils
qw( array_minus );
37 use Koha
::Authorities
;
40 use Koha
::SearchEngine
::Elasticsearch
;
42 foreach my $index ( ('biblios','authorities') ){
43 print "=================\n";
44 print "Checking $index\n";
45 my @db_records = $index eq 'biblios' ? Koha
::Biblios
->search()->get_column('biblionumber') : Koha
::Authorities
->search()->get_column('authid');
47 my $searcher = Koha
::SearchEngine
::Elasticsearch
->new({ index => $index });
48 my $es = $searcher->get_elasticsearch();
49 my $count = $es->indices->stats( index => $searcher->index_name )
50 ->{_all
}{primaries
}{docs
}{count
};
51 print "Count in db for $index is " . scalar @db_records . ", count in Elasticsearch is $count\n";
53 # Now we get all the ids from Elasticsearch
54 # The scroll lets us iterate through, it fetches chunks of 'size' as we move through
55 my $scroll = $es->scroll_helper(
56 index => $searcher->index_name,
69 # Here is where we actually iterate through
70 # Fetching each record, pushing the id into the array
72 print "Fetching Elasticsearch records ids";
73 while (my $doc = $scroll->next ){
74 print "." if !($i % 500);
75 print "\n$i records retrieved" if !($i % 5000);
76 push @es_ids, $doc->{_id
};
80 # Fetch values for providing record links
81 my $es_params = $searcher->get_elasticsearch_params;
82 my $es_base = "$es_params->{nodes}[0]/".$searcher->index_name;
83 my $opac_base = C4
::Context
->preference('OPACBaseURL');
85 print "\nComparing arrays, this may take a while\n";
87 my @koha_problems = sort { $a <=> $b } array_minus
(@db_records, @es_ids);
88 my @es_problems = sort { $a <=> $b } array_minus
(@es_ids, @db_records);
90 print "All records match\n" unless ( @koha_problems || @es_problems );
92 if ( @koha_problems ){
93 print "=================\n";
94 print "Records that exist in Koha but not in ES\n";
95 for my $problem ( @koha_problems ){
96 if ( $index eq 'biblios' ) {
98 print " Visit here to see record: $opac_base/cgi-bin/koha/opac-detail.pl?biblionumber=$problem\n";
99 } elsif ( $index eq 'authorities' ) {
101 print " Visit here to see record: $opac_base/cgi-bin/koha/opac-authoritiesdetail.pl?authid=$problem\n";
107 print "=================\n";
108 print "Records that exist in ES but not in Koha\n";
109 for my $problem ( @es_problems ){
111 print " Enter this command to view record: curl $es_base/data/$problem?pretty=true\n";