4 local scriptname
=$
(basename $0)
8 Index Koha records by chunks. It is useful when a record causes errors and
9 stops the indexing process. With this script, if indexing of one chunk fails,
10 that chunk is split into two or more chunks, and indexing continues on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using zebraidx for indexing).
15 $scriptname [-t type] [-l X] [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f] [--reset-index]
18 -o | --offset Offset parameter of rebuild_zebra.pl.
20 -l | --length Length parameter of rebuild_zebra.pl. If omitted, the
21 length is automatically calculated to index all
23 -s | --chunks-size Initial chunk size (number of records indexed at once)
25 -d | --export-dir Where rebuild_zebra.pl will export data
27 -x | --exclude-export Do not export Biblios from Koha, but use the existing
29 -L | --log-dir Log directory
31 -r | --remove-logs Clean log directory before start
33 -t | --type Record type ('biblios' or 'authorities')
35 -f | --force Don't ask for confirmation before start
36 -h | --help Display this help message
37 --reset-index Reset Zebra index for 'type'
46 my $indexmode = '"$INDEXMODE"';
47 my $prefix = '"\"$prefix\""';
49 my ($i,$count) = (0,0);
50 open(my $fh, "<", '"\"$file\""');
51 open(my $out, ">", sprintf("$prefix%02d", $i));
56 open($out, ">", sprintf("$prefix%02d", $i));
58 if ($indexmode eq "dom" && $line !~ /<collection>/) {
59 print $out "<collection>";
63 $count++ if ($line =~ m|^</record>|);
64 if ($count == $size) {
65 if ($indexmode eq "dom" && $line !~ m|</collection>|) {
66 print $out "</collection>";
82 if [ $chunkssize -lt 1 ]; then
83 echo "Fail on file $file"
86 local prefix
="${file}_${chunkssize}_"
87 echo "Splitting file in chunks of $chunkssize records"
88 splitfile
$file $prefix $chunkssize
90 dir
=$
(dirname $prefix)
91 local files
="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
92 for chunkfile
in $files; do
93 echo "Indexing $chunkfile"
94 size
=$
(grep '^</record>' $chunkfile |
wc -l)
95 logfile
="$LOGDIR/zebraidx.$(basename $chunkfile).log"
96 ZEBRAIDX_CMD
="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml update $chunkfile"
97 $ZEBRAIDX_CMD >$logfile 2>&1
98 grep "Records: $size" $logfile >/dev
/null
2>&1
100 echo "Indexing failed. See log file $logfile"
101 echo "Split file and continue..."
102 indexfile
$chunkfile $
(($chunkssize/2))
104 ZEBRAIDX_CMD
="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml commit"
105 $ZEBRAIDX_CMD >> $logfile 2>&1
114 EXPORTDIR
=/tmp
/rebuild
/export
116 LOGDIR
=/tmp
/rebuild
/logs
146 -x |
--exclude-export )
172 if [ $HELP = "yes" ]; then
177 if [ -z $KOHA_CONF ]; then
178 echo "KOHA_CONF is not set"
182 if [ -z $PERL5LIB ]; then
183 echo "PERL5LIB is not set"
197 SQLTABLE
="auth_header"
200 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
207 if [ -z $PERL ]; then
208 echo "perl not found"
212 if [ -z $LENGTH ]; then
215 my ($count) = C4::Context->dbh->selectrow_array(qq{
216 SELECT COUNT(*) FROM '"$SQLTABLE"'
222 ZEBRAIDX
=`which zebraidx`
223 if [ -z $ZEBRAIDX ]; then
224 echo "zebraidx not found"
228 REBUILDZEBRA
="`dirname $0`/rebuild_zebra.pl"
229 if [ ! -f $REBUILDZEBRA ]; then
230 echo "$REBUILDZEBRA: file not found"
236 echo "========================================================================="
237 echo "KOHA_CONF: $KOHA_CONF"
238 echo "PERL5LIB: $PERL5LIB"
239 echo "-------------------------------------------------------------------------"
240 echo "Start at offset: $OFFSET"
241 echo "Total number of records to index: $LENGTH"
242 echo "Initial chunk size: $CHUNKSSIZE"
243 echo "Export directory: $EXPORTDIR"
244 echo "Exclude re-exporting: $EXCLUDEEXPORT"
245 echo "Log directory: $LOGDIR"
246 echo "Remove logs before start? $RMLOGS"
247 echo "Type of record: $TYPE"
248 echo "Reset index before start? $RESETINDEX"
249 echo "-------------------------------------------------------------------------"
250 echo "zebraidx path: $ZEBRAIDX"
251 echo "rebuild_zebra path: $REBUILDZEBRA"
252 echo "perl path: $PERL"
253 echo "========================================================================="
255 if [ $NOCONFIRM != "yes" ]; then
257 echo -n "Confirm ? [Y/n] "
259 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
263 if [ $confirm = "n" ]; then
269 if [ $?
-ne 0 ]; then
270 echo "Failed to create directory $EXPORTDIR. Aborting."
275 if [ $?
-ne 0 ]; then
276 echo "Failed to create directory $LOGDIR. Aborting."
280 if [ $RMLOGS = "yes" ]; then
284 if [ $EXCLUDEEXPORT = "no" ]; then
285 REBUILDZEBRA_CMD
="$REBUILDZEBRA $TYPESWITCH -v -x -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
286 echo "\n$REBUILDZEBRA_CMD"
293 EXPORTFILE
="$EXPORTDIR/biblio/exported_records"
294 indexmode_config_name
="zebra_bib_index_mode"
297 EXPORTFILE
="$EXPORTDIR/authority/exported_records"
298 indexmode_config_name
="zebra_auth_index_mode"
301 echo "Error: TYPE '$TYPE' is not supported"
305 INDEXMODE
=$
(perl
-e '
307 print C4::Context->config('"$indexmode_config_name"');
310 CONFIGFILE
=$
(perl
-e '
312 my $zebra_server = ('"$TYPE"' eq "biblios") ? "biblioserver" : "authorityserver";
313 print C4::Context->zebraconfig($zebra_server)->{config};
316 if [ $RESETINDEX = "yes" ]; then
317 RESETINDEX_CMD
="$ZEBRAIDX -c $CONFIGFILE init"
318 echo "\n$RESETINDEX_CMD"
323 indexfile
$EXPORTFILE $CHUNKSSIZE