Bug 26285: Follow E.164 pattern for validating SMS numbers
[koha.git] / misc / migration_tools / rebuild_zebra_sliced.sh
blob1eeb551b31a5c056b8c9f88ef3d8cf90dbd4cb7d
1 #!/bin/sh
3 usage() {
4 local scriptname=$(basename $0)
5 cat <<EOF
6 $scriptname
8 Index Koha records by chunks. It is useful when a record causes errors and
9 stops the indexing process. With this script, if indexing of one chunk fails,
10 that chunk is split into two or more chunks, and indexing continues on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using zebraidx for indexing).
14 Usage:
15 $scriptname [-t type] [-l X] [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f] [--reset-index]
16 $scriptname -h
18 -o | --offset Offset parameter of rebuild_zebra.pl.
19 Default: $OFFSET
20 -l | --length Length parameter of rebuild_zebra.pl. If omitted, the
21 length is automatically calculated to index all
22 records
23 -s | --chunks-size Initial chunk size (number of records indexed at once)
24 Default: $CHUNKSSIZE
25 -d | --export-dir Where rebuild_zebra.pl will export data
26 Default: $EXPORTDIR
27 -x | --exclude-export Do not export Biblios from Koha, but use the existing
28 export-dir
29 -L | --log-dir Log directory
30 Default: $LOGDIR
31 -r | --remove-logs Clean log directory before start
32 Default: $RMLOGS
33 -t | --type Record type ('biblios' or 'authorities')
34 Default: $TYPE
35 -f | --force Don't ask for confirmation before start
36 -h | --help Display this help message
37 --reset-index Reset Zebra index for 'type'
38 EOF
41 splitfile() {
42 local file=$1
43 local prefix=$2
44 local size=$3
45 local script='
46 my $indexmode = '"$INDEXMODE"';
47 my $prefix = '"\"$prefix\""';
48 my $size = '"$size"';
49 my ($i,$count) = (0,0);
50 open(my $fh, "<", '"\"$file\""');
51 open(my $out, ">", sprintf("$prefix%02d", $i));
52 my $closed = 0;
53 while (<$fh>) {
54 my $line = $_;
55 if ($closed) {
56 open($out, ">", sprintf("$prefix%02d", $i));
57 $closed = 0;
58 if ($indexmode eq "dom" && $line !~ /<collection>/) {
59 print $out "<collection>";
62 print $out $line;
63 $count++ if ($line =~ m|^</record>|);
64 if ($count == $size) {
65 if ($indexmode eq "dom" && $line !~ m|</collection>|) {
66 print $out "</collection>";
68 $count = 0;
69 $i++;
70 close($out);
71 $closed = 1;
75 $PERL -e "$script"
78 indexfile() {
79 local file=$1
80 local chunkssize=$2
82 if [ $chunkssize -lt 1 ]; then
83 echo "Fail on file $file"
84 else
86 local prefix="${file}_${chunkssize}_"
87 echo "Splitting file in chunks of $chunkssize records"
88 splitfile $file $prefix $chunkssize
90 dir=$(dirname $prefix)
91 local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
92 for chunkfile in $files; do
93 echo "Indexing $chunkfile"
94 size=$(grep '^</record>' $chunkfile | wc -l)
95 logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
96 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml update $chunkfile"
97 $ZEBRAIDX_CMD >$logfile 2>&1
98 grep "Records: $size" $logfile >/dev/null 2>&1
99 if [ $? -ne 0 ]; then
100 echo "Indexing failed. See log file $logfile"
101 echo "Split file and continue..."
102 indexfile $chunkfile $(($chunkssize/2))
103 else
104 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml commit"
105 $ZEBRAIDX_CMD >> $logfile 2>&1
107 done
111 OFFSET=0
112 LENGTH=
113 CHUNKSSIZE=10000
114 EXPORTDIR=/tmp/rebuild/export
115 EXCLUDEEXPORT=no
116 LOGDIR=/tmp/rebuild/logs
117 RMLOGS=no
118 NOCONFIRM=no
119 TYPE=biblios
120 HELP=no
121 RESETINDEX=no
123 # Get parameters
124 while [ $1 ]; do
125 case $1 in
126 -o | --offset )
127 shift
128 OFFSET=$1
130 -l | --length )
131 shift
132 LENGTH=$1
134 -s | --chunks-size )
135 shift
136 CHUNKSSIZE=$1
138 -d | --export-dir )
139 shift
140 EXPORTDIR=$1
142 -L | --log-dir )
143 shift
144 LOGDIR=$1
146 -x | --exclude-export )
147 EXCLUDEEXPORT=yes
149 -r | --remove-logs )
150 RMLOGS=yes
152 -t | --type )
153 shift
154 TYPE=$1
156 -f | --force )
157 NOCONFIRM=yes
159 -h | --help )
160 HELP=yes
162 --reset-index )
163 RESETINDEX=yes
166 usage
167 exit 1
168 esac
169 shift
170 done
172 if [ $HELP = "yes" ]; then
173 usage
174 exit 0
177 if [ -z $KOHA_CONF ]; then
178 echo "KOHA_CONF is not set"
179 exit 1
182 if [ -z $PERL5LIB ]; then
183 echo "PERL5LIB is not set"
184 exit 1
188 TYPESWITCH=
189 SQLTABLE=
190 case $TYPE in
191 biblios )
192 TYPESWITCH=-b
193 SQLTABLE="biblio"
195 authorities )
196 TYPESWITCH=-a
197 SQLTABLE="auth_header"
200 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
201 TYPESWITCH=-b
202 TYPE=biblios
203 SQLTABLE="biblio"
204 esac
206 PERL=`which perl`
207 if [ -z $PERL ]; then
208 echo "perl not found"
209 exit 1
212 if [ -z $LENGTH ]; then
213 LENGTH=$($PERL -e '
214 use C4::Context;
215 my ($count) = C4::Context->dbh->selectrow_array(qq{
216 SELECT COUNT(*) FROM '"$SQLTABLE"'
218 print $count;
222 ZEBRAIDX=`which zebraidx`
223 if [ -z $ZEBRAIDX ]; then
224 echo "zebraidx not found"
225 exit 1
228 REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
229 if [ ! -f $REBUILDZEBRA ]; then
230 echo "$REBUILDZEBRA: file not found"
231 exit 1
234 echo ""
235 echo "Configuration"
236 echo "========================================================================="
237 echo "KOHA_CONF: $KOHA_CONF"
238 echo "PERL5LIB: $PERL5LIB"
239 echo "-------------------------------------------------------------------------"
240 echo "Start at offset: $OFFSET"
241 echo "Total number of records to index: $LENGTH"
242 echo "Initial chunk size: $CHUNKSSIZE"
243 echo "Export directory: $EXPORTDIR"
244 echo "Exclude re-exporting: $EXCLUDEEXPORT"
245 echo "Log directory: $LOGDIR"
246 echo "Remove logs before start? $RMLOGS"
247 echo "Type of record: $TYPE"
248 echo "Reset index before start? $RESETINDEX"
249 echo "-------------------------------------------------------------------------"
250 echo "zebraidx path: $ZEBRAIDX"
251 echo "rebuild_zebra path: $REBUILDZEBRA"
252 echo "perl path: $PERL"
253 echo "========================================================================="
255 if [ $NOCONFIRM != "yes" ]; then
256 confirm=y
257 echo -n "Confirm ? [Y/n] "
258 read response
259 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
260 confirm=n
263 if [ $confirm = "n" ]; then
264 exit 0
268 mkdir -p $EXPORTDIR
269 if [ $? -ne 0 ]; then
270 echo "Failed to create directory $EXPORTDIR. Aborting."
271 exit 1
274 mkdir -p $LOGDIR
275 if [ $? -ne 0 ]; then
276 echo "Failed to create directory $LOGDIR. Aborting."
277 exit 1
280 if [ $RMLOGS = "yes" ]; then
281 rm -f $LOGDIR/*.log
284 if [ $EXCLUDEEXPORT = "no" ]; then
285 REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -x -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
286 echo "\n$REBUILDZEBRA_CMD"
287 $REBUILDZEBRA_CMD
290 EXPORTFILE=
291 case $TYPE in
292 biblios )
293 EXPORTFILE="$EXPORTDIR/biblio/exported_records"
294 indexmode_config_name="zebra_bib_index_mode"
296 authorities )
297 EXPORTFILE="$EXPORTDIR/authority/exported_records"
298 indexmode_config_name="zebra_auth_index_mode"
301 echo "Error: TYPE '$TYPE' is not supported"
302 exit 1
303 esac
305 INDEXMODE=$(perl -e '
306 use C4::Context;
307 print C4::Context->config('"$indexmode_config_name"');
310 CONFIGFILE=$(perl -e '
311 use C4::Context;
312 my $zebra_server = ('"$TYPE"' eq "biblios") ? "biblioserver" : "authorityserver";
313 print C4::Context->zebraconfig($zebra_server)->{config};
316 if [ $RESETINDEX = "yes" ]; then
317 RESETINDEX_CMD="$ZEBRAIDX -c $CONFIGFILE init"
318 echo "\n$RESETINDEX_CMD"
319 $RESETINDEX_CMD
320 echo ""
323 indexfile $EXPORTFILE $CHUNKSSIZE