Increment version for 3.22.18
[koha.git] / misc / migration_tools / rebuild_zebra_sliced.sh
blob7ddf32e9c5f801b70fb78a293389f372971a58d6
1 #!/bin/sh
3 usage() {
4 local scriptname=$(basename $0)
5 cat <<EOF
6 $scriptname
8 Index Koha records by chunks. It is useful when a record causes errors and
9 stops the indexing process. With this script, if indexing of one chunk fails,
10 that chunk is split into two or more chunks, and indexing continues on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using zebraidx for indexing).
14 Usage:
15 $scriptname [-t type] [-l X] [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f] [--reset-index]
16 $scriptname -h
18 -o | --offset Offset parameter of rebuild_zebra.pl.
19 Default: $OFFSET
20 -l | --length Length parameter of rebuild_zebra.pl. If omitted, the
21 length is automatically calculated to index all
22 records
23 -s | --chunks-size Initial chunk size (number of records indexed at once)
24 Default: $CHUNKSSIZE
25 -d | --export-dir Where rebuild_zebra.pl will export data
26 Default: $EXPORTDIR
27 -L | --log-dir Log directory
28 Default: $LOGDIR
29 -r | --remove-logs Clean log directory before start
30 Default: $RMLOGS
31 -t | --type Record type ('biblios' or 'authorities')
32 Default: $TYPE
33 -f | --force Don't ask for confirmation before start
34 -h | --help Display this help message
35 --reset-index Reset Zebra index for 'type'
36 EOF
39 splitfile() {
40 local file=$1
41 local prefix=$2
42 local size=$3
43 local script='
44 my $indexmode = '"$INDEXMODE"';
45 my $prefix = '"\"$prefix\""';
46 my $size = '"$size"';
47 my ($i,$count) = (0,0);
48 open(my $fh, "<", '"\"$file\""');
49 open(my $out, ">", sprintf("$prefix%02d", $i));
50 my $closed = 0;
51 while (<$fh>) {
52 my $line = $_;
53 if ($closed) {
54 open($out, ">", sprintf("$prefix%02d", $i));
55 $closed = 0;
56 if ($indexmode eq "dom" && $line !~ /<collection>/) {
57 print $out "<collection>";
60 print $out $line;
61 $count++ if ($line =~ m|^</record>|);
62 if ($count == $size) {
63 if ($indexmode eq "dom" && $line !~ m|</collection>|) {
64 print $out "</collection>";
66 $count = 0;
67 $i++;
68 close($out);
69 $closed = 1;
73 $PERL -e "$script"
76 indexfile() {
77 local file=$1
78 local chunkssize=$2
80 if [ $chunkssize -lt 1 ]; then
81 echo "Fail on file $file"
82 else
84 local prefix="${file}_${chunkssize}_"
85 echo "Splitting file in chunks of $chunkssize records"
86 splitfile $file $prefix $chunkssize
88 dir=$(dirname $prefix)
89 local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
90 for chunkfile in $files; do
91 echo "Indexing $chunkfile"
92 size=$(grep '^</record>' $chunkfile | wc -l)
93 logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
94 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml update $chunkfile"
95 $ZEBRAIDX_CMD >$logfile 2>&1
96 grep "Records: $size" $logfile >/dev/null 2>&1
97 if [ $? -ne 0 ]; then
98 echo "Indexing failed. See log file $logfile"
99 echo "Split file and continue..."
100 indexfile $chunkfile $(($chunkssize/2))
101 else
102 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml commit"
103 $ZEBRAIDX_CMD >> $logfile 2>&1
105 done
109 OFFSET=0
110 LENGTH=
111 CHUNKSSIZE=10000
112 EXPORTDIR=/tmp/rebuild/export
113 LOGDIR=/tmp/rebuild/logs
114 RMLOGS=no
115 NOCONFIRM=no
116 TYPE=biblios
117 HELP=no
118 RESETINDEX=no
120 # Get parameters
121 while [ $1 ]; do
122 case $1 in
123 -o | --offset )
124 shift
125 OFFSET=$1
127 -l | --length )
128 shift
129 LENGTH=$1
131 -s | --chunks-size )
132 shift
133 CHUNKSSIZE=$1
135 -d | --export-dir )
136 shift
137 EXPORTDIR=$1
139 -L | --log-dir )
140 shift
141 LOGDIR=$1
143 -r | --remove-logs )
144 RMLOGS=yes
146 -t | --type )
147 shift
148 TYPE=$1
150 -f | --force )
151 NOCONFIRM=yes
153 -h | --help )
154 HELP=yes
156 --reset-index )
157 RESETINDEX=yes
160 usage
161 exit 1
162 esac
163 shift
164 done
166 if [ $HELP = "yes" ]; then
167 usage
168 exit 0
171 if [ -z $KOHA_CONF ]; then
172 echo "KOHA_CONF is not set"
173 exit 1
176 if [ -z $PERL5LIB ]; then
177 echo "PERL5LIB is not set"
178 exit 1
182 TYPESWITCH=
183 SQLTABLE=
184 case $TYPE in
185 biblios )
186 TYPESWITCH=-b
187 SQLTABLE="biblio"
189 authorities )
190 TYPESWITCH=-a
191 SQLTABLE="auth_header"
194 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
195 TYPESWITCH=-b
196 TYPE=biblios
197 SQLTABLE="biblio"
198 esac
200 PERL=`which perl`
201 if [ -z $PERL ]; then
202 echo "perl not found"
203 exit 1
206 if [ -z $LENGTH ]; then
207 LENGTH=$($PERL -e '
208 use C4::Context;
209 my ($count) = C4::Context->dbh->selectrow_array(qq{
210 SELECT COUNT(*) FROM '"$SQLTABLE"'
212 print $count;
216 ZEBRAIDX=`which zebraidx`
217 if [ -z $ZEBRAIDX ]; then
218 echo "zebraidx not found"
219 exit 1
222 REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
223 if [ ! -f $REBUILDZEBRA ]; then
224 echo "$REBUILDZEBRA: file not found"
225 exit 1
228 echo ""
229 echo "Configuration"
230 echo "========================================================================="
231 echo "KOHA_CONF: $KOHA_CONF"
232 echo "PERL5LIB: $PERL5LIB"
233 echo "-------------------------------------------------------------------------"
234 echo "Start at offset: $OFFSET"
235 echo "Total number of records to index: $LENGTH"
236 echo "Initial chunk size: $CHUNKSSIZE"
237 echo "Export directory: $EXPORTDIR"
238 echo "Log directory: $LOGDIR"
239 echo "Remove logs before start? $RMLOGS"
240 echo "Type of record: $TYPE"
241 echo "Reset index before start? $RESETINDEX"
242 echo "-------------------------------------------------------------------------"
243 echo "zebraidx path: $ZEBRAIDX"
244 echo "rebuild_zebra path: $REBUILDZEBRA"
245 echo "perl path: $PERL"
246 echo "========================================================================="
248 if [ $NOCONFIRM != "yes" ]; then
249 confirm=y
250 echo -n "Confirm ? [Y/n] "
251 read response
252 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
253 confirm=n
256 if [ $confirm = "n" ]; then
257 exit 0
261 mkdir -p $EXPORTDIR
262 if [ $? -ne 0 ]; then
263 echo "Failed to create directory $EXPORTDIR. Aborting."
264 exit 1
267 mkdir -p $LOGDIR
268 if [ $? -ne 0 ]; then
269 echo "Failed to create directory $LOGDIR. Aborting."
270 exit 1
273 if [ $RMLOGS = "yes" ]; then
274 rm -f $LOGDIR/*.log
277 REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -x -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
278 echo "\n$REBUILDZEBRA_CMD"
279 $REBUILDZEBRA_CMD
281 EXPORTFILE=
282 case $TYPE in
283 biblios )
284 EXPORTFILE="$EXPORTDIR/biblio/exported_records"
285 indexmode_config_name="zebra_bib_index_mode"
287 authorities )
288 EXPORTFILE="$EXPORTDIR/authority/exported_records"
289 indexmode_config_name="zebra_auth_index_mode"
292 echo "Error: TYPE '$TYPE' is not supported"
293 exit 1
294 esac
296 INDEXMODE=$(perl -e '
297 use C4::Context;
298 print C4::Context->config('"$indexmode_config_name"');
301 CONFIGFILE=$(perl -e '
302 use C4::Context;
303 my $zebra_server = ('"$TYPE"' eq "biblios") ? "biblioserver" : "authorityserver";
304 print C4::Context->zebraconfig($zebra_server)->{config};
307 if [ $RESETINDEX = "yes" ]; then
308 RESETINDEX_CMD="$ZEBRAIDX -c $CONFIGFILE init"
309 echo "\n$RESETINDEX_CMD"
310 $RESETINDEX_CMD
311 echo ""
314 indexfile $EXPORTFILE $CHUNKSSIZE