4 local scriptname
=$
(basename $0)
8 Index Koha records by chunks. It is useful when a record causes errors and
9 stops the indexing process. With this script, if indexing of one chunk fails,
10 that chunk is split into two or more chunks, and indexing continues on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using zebraidx for indexing).
15 $scriptname [-t type] [-l X] [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f] [--reset-index]
18 -o | --offset Offset parameter of rebuild_zebra.pl.
20 -l | --length Length parameter of rebuild_zebra.pl. If omitted, the
21 length is automatically calculated to index all
23 -s | --chunks-size Initial chunk size (number of records indexed at once)
25 -d | --export-dir Where rebuild_zebra.pl will export data
27 -L | --log-dir Log directory
29 -r | --remove-logs Clean log directory before start
31 -t | --type Record type ('biblios' or 'authorities')
33 -f | --force Don't ask for confirmation before start
34 -h | --help Display this help message
35 --reset-index Reset Zebra index for 'type'
44 my $indexmode = '"$INDEXMODE"';
45 my $prefix = '"\"$prefix\""';
47 my ($i,$count) = (0,0);
48 open(my $fh, "<", '"\"$file\""');
49 open(my $out, ">", sprintf("$prefix%02d", $i));
54 open($out, ">", sprintf("$prefix%02d", $i));
56 if ($indexmode eq "dom" && $line !~ /<collection>/) {
57 print $out "<collection>";
61 $count++ if ($line =~ m|^</record>|);
62 if ($count == $size) {
63 if ($indexmode eq "dom" && $line !~ m|</collection>|) {
64 print $out "</collection>";
80 if [ $chunkssize -lt 1 ]; then
81 echo "Fail on file $file"
84 local prefix
="${file}_${chunkssize}_"
85 echo "Splitting file in chunks of $chunkssize records"
86 splitfile
$file $prefix $chunkssize
88 dir
=$
(dirname $prefix)
89 local files
="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
90 for chunkfile
in $files; do
91 echo "Indexing $chunkfile"
92 size
=$
(grep '^</record>' $chunkfile |
wc -l)
93 logfile
="$LOGDIR/zebraidx.$(basename $chunkfile).log"
94 ZEBRAIDX_CMD
="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml update $chunkfile"
95 $ZEBRAIDX_CMD >$logfile 2>&1
96 grep "Records: $size" $logfile >/dev
/null
2>&1
98 echo "Indexing failed. See log file $logfile"
99 echo "Split file and continue..."
100 indexfile
$chunkfile $
(($chunkssize/2))
102 ZEBRAIDX_CMD
="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml commit"
103 $ZEBRAIDX_CMD >> $logfile 2>&1
112 EXPORTDIR
=/tmp
/rebuild
/export
113 LOGDIR
=/tmp
/rebuild
/logs
166 if [ $HELP = "yes" ]; then
171 if [ -z $KOHA_CONF ]; then
172 echo "KOHA_CONF is not set"
176 if [ -z $PERL5LIB ]; then
177 echo "PERL5LIB is not set"
191 SQLTABLE
="auth_header"
194 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
201 if [ -z $PERL ]; then
202 echo "perl not found"
206 if [ -z $LENGTH ]; then
209 my ($count) = C4::Context->dbh->selectrow_array(qq{
210 SELECT COUNT(*) FROM '"$SQLTABLE"'
216 ZEBRAIDX
=`which zebraidx`
217 if [ -z $ZEBRAIDX ]; then
218 echo "zebraidx not found"
222 REBUILDZEBRA
="`dirname $0`/rebuild_zebra.pl"
223 if [ ! -f $REBUILDZEBRA ]; then
224 echo "$REBUILDZEBRA: file not found"
230 echo "========================================================================="
231 echo "KOHA_CONF: $KOHA_CONF"
232 echo "PERL5LIB: $PERL5LIB"
233 echo "-------------------------------------------------------------------------"
234 echo "Start at offset: $OFFSET"
235 echo "Total number of records to index: $LENGTH"
236 echo "Initial chunk size: $CHUNKSSIZE"
237 echo "Export directory: $EXPORTDIR"
238 echo "Log directory: $LOGDIR"
239 echo "Remove logs before start? $RMLOGS"
240 echo "Type of record: $TYPE"
241 echo "Reset index before start? $RESETINDEX"
242 echo "-------------------------------------------------------------------------"
243 echo "zebraidx path: $ZEBRAIDX"
244 echo "rebuild_zebra path: $REBUILDZEBRA"
245 echo "perl path: $PERL"
246 echo "========================================================================="
248 if [ $NOCONFIRM != "yes" ]; then
250 echo -n "Confirm ? [Y/n] "
252 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
256 if [ $confirm = "n" ]; then
262 if [ $?
-ne 0 ]; then
263 echo "Failed to create directory $EXPORTDIR. Aborting."
268 if [ $?
-ne 0 ]; then
269 echo "Failed to create directory $LOGDIR. Aborting."
273 if [ $RMLOGS = "yes" ]; then
277 REBUILDZEBRA_CMD
="$REBUILDZEBRA $TYPESWITCH -v -x -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
278 echo "\n$REBUILDZEBRA_CMD"
284 EXPORTFILE
="$EXPORTDIR/biblio/exported_records"
285 indexmode_config_name
="zebra_bib_index_mode"
288 EXPORTFILE
="$EXPORTDIR/authority/exported_records"
289 indexmode_config_name
="zebra_auth_index_mode"
292 echo "Error: TYPE '$TYPE' is not supported"
296 INDEXMODE
=$
(perl
-e '
298 print C4::Context->config('"$indexmode_config_name"');
301 CONFIGFILE
=$
(perl
-e '
303 my $zebra_server = ('"$TYPE"' eq "biblios") ? "biblioserver" : "authorityserver";
304 print C4::Context->zebraconfig($zebra_server)->{config};
307 if [ $RESETINDEX = "yes" ]; then
308 RESETINDEX_CMD
="$ZEBRAIDX -c $CONFIGFILE init"
309 echo "\n$RESETINDEX_CMD"
314 indexfile
$EXPORTFILE $CHUNKSSIZE