3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
23 use File
::Temp qw
/ tempdir /;
26 use C4
::AuthoritiesMarc
;
28 use Koha
::RecordProcessor
;
31 use constant LOCK_FILENAME
=> 'rebuild..LCK';
33 # script that checks zebradir structure & create directories & mandatory files if needed
37 $|=1; # flushes output
38 # If the cron job starts us in an unreadable dir, we will break without
40 chdir $ENV{HOME
} if (!(-r
'.'));
55 my $process_zebraqueue;
56 my $process_zebraqueue_skip_deletes;
57 my $do_not_clear_zebraqueue;
62 my $run_user = (getpwuid($<))[0];
63 my $wait_for_lock = 0;
65 my $table = 'biblioitems';
67 my $verbose_logging = 0;
68 my $zebraidx_log_opt = " -v none,fatal,warn ";
69 my $result = GetOptions
(
70 'daemon' => \
$daemon_mode,
71 'sleep:i' => \
$daemon_sleep,
76 'I|skip-index' => \
$skip_index,
77 'nosanitize' => \
$nosanitize,
79 'noxml' => \
$as_usmarc,
82 'h|help' => \
$want_help,
84 'y' => \
$do_not_clear_zebraqueue,
85 'z' => \
$process_zebraqueue,
86 'skip-deletes' => \
$process_zebraqueue_skip_deletes,
88 'length:i' => \
$length,
89 'offset:i' => \
$offset,
90 'v+' => \
$verbose_logging,
91 'run-as-root' => \
$run_as_root,
92 'wait-for-lock' => \
$wait_for_lock,
93 't|table:s' => \
$table,
96 if (not $result or $want_help) {
102 warn "Warning: You passed -x which is already the default and is now deprecated\n";
103 undef $as_xml; # Should not be used later
106 if( not defined $run_as_root and $run_user eq 'root') {
107 my $msg = "Warning: You are running this script as the user 'root'.\n";
108 $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
109 $msg .= "Please do '$0 --help' to see usage.\n";
113 if ( $as_usmarc and $nosanitize ) {
114 my $msg = "Cannot specify both -noxml and -nosanitize\n";
115 $msg .= "Please do '$0 --help' to see usage.\n";
119 if ($process_zebraqueue and ($skip_export or $reset)) {
120 my $msg = "Cannot specify -r or -s if -z is specified\n";
121 $msg .= "Please do '$0 --help' to see usage.\n";
125 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
126 my $msg = "Cannot specify both -y and -z\n";
127 $msg .= "Please do '$0 --help' to see usage.\n";
132 # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
133 if ($skip_export or $keep_export or $skip_index or
134 $where or $length or $offset) {
135 my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n";
136 $msg .= "Please do '$0 --help' to see usage.\n";
141 $process_zebraqueue = 1;
144 if (not $biblios and not $authorities) {
145 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
146 $msg .= "Please do '$0 --help' to see usage.\n";
150 our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio' );
151 unless ( grep { /^$table$/ } @tables_allowed_for_select ) {
152 die "Cannot specify -t|--table with value '$table'. Only "
153 . ( join ', ', @tables_allowed_for_select )
158 # -v is for verbose, which seems backwards here because of how logging is set
159 # on the CLI of zebraidx. It works this way. The default is to not log much
160 if ($verbose_logging >= 2) {
161 $zebraidx_log_opt = '-v none,fatal,warn,all';
165 unless ($directory) {
167 $directory = tempdir
(CLEANUP
=> ($keep_export ?
0 : 1));
171 my $biblioserverdir = C4
::Context
->zebraconfig('biblioserver')->{directory
};
172 my $authorityserverdir = C4
::Context
->zebraconfig('authorityserver')->{directory
};
174 my $kohadir = C4
::Context
->config('intranetdir');
175 my $bib_index_mode = C4
::Context
->config('zebra_bib_index_mode') // 'dom';
176 my $auth_index_mode = C4
::Context
->config('zebra_auth_index_mode') // 'dom';
178 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField
("biblio.biblionumber","");
179 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField
("biblioitems.biblioitemnumber","");
181 my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
182 <collection xmlns="http://www.loc.gov/MARC21/slim">
185 my $marcxml_close = q{
189 # Protect again simultaneous update of the zebra index by using a lock file.
190 # Create our own lock directory if its missing. This shouild be created
191 # by koha-zebra-ctl.sh or at system installation. If the desired directory
192 # does not exist and cannot be created, we fall back on /tmp - which will
195 my ($lockfile, $LockFH);
197 C4
::Context
->config("zebra_lockdir"),
198 '/var/lock/zebra_' . C4
::Context
->config('database'),
199 '/tmp/zebra_' . C4
::Context
->config('database')
201 #we try three possibilities (we really want to lock :)
203 ($LockFH, $lockfile) = _create_lockfile
($_.'/rebuild');
204 last if defined $LockFH;
206 if( !defined $LockFH ) {
207 print "WARNING: Could not create lock file $lockfile: $!\n";
208 print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n";
209 print "Verify file permissions for it too.\n";
210 $use_flock = 0; # we disable file locking now and will continue
212 # note that this mimics old behavior (before we used
216 if ( $verbose_logging ) {
217 print "Zebra configuration information\n";
218 print "================================\n";
219 print "Zebra biblio directory = $biblioserverdir\n";
220 print "Zebra authorities directory = $authorityserverdir\n";
221 print "Koha directory = $kohadir\n";
222 print "Lockfile = $lockfile\n" if $lockfile;
223 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
224 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
225 print "================================\n";
228 my $tester = XML
::LibXML
->new();
231 # The main work is done here by calling do_one_pass(). We have added locking
232 # avoid race conditions between full rebuilds and incremental updates either from
233 # daemon mode or periodic invocation from cron. The race can lead to an updated
234 # record being overwritten by a rebuild if the update is applied after the export
235 # by the rebuild and before the rebuild finishes (more likely to affect large
238 # We have chosen to exit immediately by default if we cannot obtain the lock
239 # to prevent the potential for a infinite backlog from cron invocations, but an
240 # option (wait-for-lock) is provided to let the program wait for the lock.
241 # See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details.
244 # For incremental updates, skip the update if the updates are locked
245 if (_flock
($LockFH, LOCK_EX
|LOCK_NB
)) {
247 $dbh = C4
::Context
->dbh;
248 do_one_pass
() if ( zebraqueue_not_empty
() );
250 if ($@
&& $verbose_logging) {
251 warn "Warning : $@\n";
253 _flock
($LockFH, LOCK_UN
);
258 # all one-off invocations
259 my $lock_mode = ($wait_for_lock) ? LOCK_EX
: LOCK_EX
|LOCK_NB
;
260 if (_flock
($LockFH, $lock_mode)) {
261 $dbh = C4
::Context
->dbh;
263 _flock
($LockFH, LOCK_UN
);
265 print "Skipping rebuild/update because flock failed on $lockfile: $!\n";
270 if ( $verbose_logging ) {
271 print "====================\n";
273 print "====================\n";
276 print "NOTHING cleaned : the export $directory has been kept.\n";
277 print "You can re-run this script with the -s ";
279 print " and -d $directory parameters";
284 print "if you just want to rebuild zebra after changing the record.abs\n";
285 print "or another zebra config file\n";
287 unless ($use_tempdir) {
288 # if we're using a temporary directory
289 # created by File::Temp, it will be removed
291 rmtree
($directory, 0, 1);
292 print "directory $directory deleted\n";
298 index_records
('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
300 print "skipping authorities\n" if ( $verbose_logging );
304 index_records
('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
306 print "skipping biblios\n" if ( $verbose_logging );
310 # Check the zebra update queue and return true if there are records to process
311 # This routine will handle each of -ab, -a, or -b, but in practice we force
312 # -ab when in daemon mode.
313 sub zebraqueue_not_empty
{
316 if ($authorities && $biblios) {
317 $where_str = 'done = 0;';
319 $where_str = 'server = "biblioserver" AND done = 0;';
321 $where_str = 'server = "authorityserver" AND done = 0;';
324 $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str );
327 my $count = $query->fetchrow_arrayref->[0];
328 print "queued records: $count\n" if $verbose_logging > 0;
332 # This checks to see if the zebra directories exist under the provided path.
333 # If they don't, then zebra is likely to spit the dummy. This returns true
334 # if the directories had to be created, false otherwise.
335 sub check_zebra_dirs
{
336 my ($base) = shift() . '/';
337 my $needed_repairing = 0;
338 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
339 foreach my $dir (@dirs) {
340 my $bdir = $base . $dir;
342 $needed_repairing = 1;
343 mkdir $bdir || die "Unable to create '$bdir': $!\n";
344 print "$0: needed to create '$bdir'\n";
347 return $needed_repairing;
348 } # ---------- end of subroutine check_zebra_dirs ----------
351 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_usmarc, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
353 my $num_records_exported = 0;
354 my $records_deleted = {};
355 my $need_reset = check_zebra_dirs
($server_dir);
357 print "$0: found broken zebra server directories: forcing a rebuild\n";
360 if ($skip_export && $verbose_logging) {
361 print "====================\n";
362 print "SKIPPING $record_type export\n";
363 print "====================\n";
365 if ( $verbose_logging ) {
366 print "====================\n";
367 print "exporting $record_type\n";
368 print "====================\n";
370 mkdir "$directory" unless (-d
$directory);
371 mkdir "$directory/$record_type" unless (-d
"$directory/$record_type");
372 if ($process_zebraqueue) {
375 unless ( $process_zebraqueue_skip_deletes ) {
376 $entries = select_zebraqueue_records
($record_type, 'deleted');
377 mkdir "$directory/del_$record_type" unless (-d
"$directory/del_$record_type");
378 $records_deleted = generate_deleted_marc_records
($record_type, $entries, "$directory/del_$record_type", $as_usmarc);
379 mark_zebraqueue_batch_done
($entries);
382 $entries = select_zebraqueue_records
($record_type, 'updated');
383 mkdir "$directory/upd_$record_type" unless (-d
"$directory/upd_$record_type");
384 $num_records_exported = export_marc_records_from_list
($record_type,$entries, "$directory/upd_$record_type", $as_usmarc, $records_deleted);
385 mark_zebraqueue_batch_done
($entries);
388 my $sth = select_all_records
($record_type);
389 $num_records_exported = export_marc_records_from_sth
($record_type, $sth, "$directory/$record_type", $as_usmarc, $nosanitize);
390 unless ($do_not_clear_zebraqueue) {
391 mark_all_zebraqueue_done
($record_type);
397 # and reindexing everything
400 if ($verbose_logging) {
401 print "====================\n";
402 print "SKIPPING $record_type indexing\n";
403 print "====================\n";
406 if ( $verbose_logging ) {
407 print "====================\n";
408 print "REINDEXING zebra\n";
409 print "====================\n";
411 my $record_fmt = ($as_usmarc) ?
'iso2709' : 'marcxml' ;
412 if ($process_zebraqueue) {
413 do_indexing
($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
414 if %$records_deleted;
415 do_indexing
($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
416 if $num_records_exported;
418 do_indexing
($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
419 if ($num_records_exported or $skip_export);
425 sub select_zebraqueue_records
{
426 my ($record_type, $update_type) = @_;
428 my $server = ($record_type eq 'biblio') ?
'biblioserver' : 'authorityserver';
429 my $op = ($update_type eq 'deleted') ?
'recordDelete' : 'specialUpdate';
431 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
437 $sth->execute($server, $op);
438 my $entries = $sth->fetchall_arrayref({});
441 sub mark_all_zebraqueue_done
{
442 my ($record_type) = @_;
444 my $server = ($record_type eq 'biblio') ?
'biblioserver' : 'authorityserver';
446 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
449 $sth->execute($server);
452 sub mark_zebraqueue_batch_done
{
455 $dbh->{AutoCommit
} = 0;
456 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
458 foreach my $id (map { $_->{id
} } @
$entries) {
461 $dbh->{AutoCommit
} = 1;
464 sub select_all_records
{
465 my $record_type = shift;
466 return ($record_type eq 'biblio') ? select_all_biblios
() : select_all_authorities
();
469 sub select_all_authorities
{
470 my $strsth=qq{SELECT authid FROM auth_header
};
471 $strsth.=qq{ WHERE
$where } if ($where);
472 $strsth.=qq{ LIMIT
$length } if ($length && !$offset);
473 $strsth.=qq{ LIMIT
$offset,$length } if ($length && $offset);
474 my $sth = $dbh->prepare($strsth);
479 sub select_all_biblios
{
480 $table = 'biblioitems'
481 unless grep { /^$table$/ } @tables_allowed_for_select;
482 my $strsth = qq{ SELECT biblionumber FROM
$table };
483 $strsth.=qq{ WHERE
$where } if ($where);
484 $strsth.=qq{ LIMIT
$length } if ($length && !$offset);
485 $strsth.=qq{ LIMIT
$offset,$length } if ($offset);
486 my $sth = $dbh->prepare($strsth);
491 sub export_marc_records_from_sth
{
492 my ($record_type, $sth, $directory, $as_usmarc, $nosanitize) = @_;
494 my $num_exported = 0;
495 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
497 print {$fh} $marcxml_open
501 my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField
("items.itemnumber",'');
502 while (my ($record_number) = $sth->fetchrow_array) {
503 print "." if ( $verbose_logging );
504 print "\r$i" unless ($i++ %100 or !$verbose_logging);
506 my $marcxml = $record_type eq 'biblio'
507 ? GetXmlBiblio
( $record_number )
508 : GetAuthorityXML
( $record_number );
509 if ($record_type eq 'biblio'){
510 my @items = GetItemsInfo
($record_number);
512 my $record = MARC
::Record
->new;
513 $record->encoding('UTF-8');
515 foreach my $item (@items){
516 my $record = Item2Marc
($item, $record_number);
517 push @itemsrecord, $record->field($itemtag);
519 $record->insert_fields_ordered(@itemsrecord);
520 my $itemsxml = $record->as_xml_record();
522 substr($marcxml, 0, length($marcxml)-10) .
523 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
526 # extra test to ensure that result is valid XML; otherwise
527 # Zebra won't parse it in DOM mode
529 my $doc = $tester->parse_string($marcxml);
532 warn "Error exporting record $record_number ($record_type): $@\n";
536 $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
537 print {$fh} $marcxml;
542 my ($marc) = get_corrected_marc_record
($record_type, $record_number, $as_usmarc);
547 $rec = $marc->as_usmarc();
549 $rec = $marc->as_xml_record(C4
::Context
->preference('marcflavour'));
551 my $doc = $tester->parse_string($rec);
554 die "invalid XML: $@";
556 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
562 warn "Error exporting record $record_number ($record_type) ".($as_usmarc ?
"not XML" : "XML");
563 warn "... specific error is $@" if $verbose_logging;
567 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
568 print {$fh} $marcxml_close
572 return $num_exported;
575 sub export_marc_records_from_list
{
576 my ($record_type, $entries, $directory, $as_usmarc, $records_deleted) = @_;
578 my $num_exported = 0;
579 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
581 print {$fh} $marcxml_open
586 # Skip any deleted records. We check for this anyway, but this reduces error spam
587 my %found = %$records_deleted;
588 foreach my $record_number ( map { $_->{biblio_auth_number
} }
589 grep { !$found{ $_->{biblio_auth_number
} }++ }
591 print "." if ( $verbose_logging );
592 print "\r$i" unless ($i++ %100 or !$verbose_logging);
593 my ($marc) = get_corrected_marc_record
($record_type, $record_number, $as_usmarc);
598 $rec = $marc->as_usmarc();
600 $rec = $marc->as_xml_record(C4
::Context
->preference('marcflavour'));
601 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
607 warn "Error exporting record $record_number ($record_type) ".($as_usmarc ?
"not XML" : "XML");
611 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
613 print {$fh} $marcxml_close
617 return $num_exported;
620 sub generate_deleted_marc_records
{
622 my ($record_type, $entries, $directory, $as_usmarc) = @_;
624 my $records_deleted = {};
625 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
627 print {$fh} $marcxml_open
631 foreach my $record_number (map { $_->{biblio_auth_number
} } @
$entries ) {
632 print "\r$i" unless ($i++ %100 or !$verbose_logging);
633 print "." if ( $verbose_logging );
635 my $marc = MARC
::Record
->new();
636 if ($record_type eq 'biblio') {
637 fix_biblio_ids
($marc, $record_number, $record_number);
639 fix_authority_id
($marc, $record_number);
641 if (C4
::Context
->preference("marcflavour") eq "UNIMARC") {
642 fix_unimarc_100
($marc);
647 $rec = $marc->as_usmarc();
649 $rec = $marc->as_xml_record(C4
::Context
->preference('marcflavour'));
650 # Remove the record's XML header
651 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
655 $records_deleted->{$record_number} = 1;
657 print "\nRecords exported: $i\n" if ( $verbose_logging );
659 print {$fh} $marcxml_close
663 return $records_deleted;
666 sub get_corrected_marc_record
{
667 my ($record_type, $record_number, $as_usmarc) = @_;
669 my $marc = get_raw_marc_record
($record_type, $record_number, $as_usmarc);
673 if ($record_type eq 'authority') {
674 fix_authority_id
($marc, $record_number);
675 } elsif ($record_type eq 'biblio' && C4
::Context
->preference('IncludeSeeFromInSearches')) {
676 my $normalizer = Koha
::RecordProcessor
->new( { filters
=> 'EmbedSeeFromHeadings' } );
677 $marc = $normalizer->process($marc);
679 if (C4
::Context
->preference("marcflavour") eq "UNIMARC") {
680 fix_unimarc_100
($marc);
687 sub get_raw_marc_record
{
688 my ($record_type, $record_number, $as_usmarc) = @_;
691 if ($record_type eq 'biblio') {
693 my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
694 $fetch_sth->execute($record_number);
695 if (my ($blob) = $fetch_sth->fetchrow_array) {
696 $marc = MARC
::Record
->new_from_usmarc($blob);
698 warn "error creating MARC::Record from $blob";
701 # failure to find a bib is not a problem -
702 # a delete could have been done before
703 # trying to process a record update
705 $fetch_sth->finish();
708 eval { $marc = GetMarcBiblio
($record_number, 1); };
710 # here we do warn since catching an exception
711 # means that the bib was found but failed
713 warn "error retrieving biblio $record_number";
718 eval { $marc = GetAuthority
($record_number); };
720 warn "error retrieving authority $record_number";
728 # FIXME - this routine is suspect
729 # It blanks the Leader/00-05 and Leader/12-16 to
730 # force them to be recalculated correct when
731 # the $marc->as_usmarc() or $marc->as_xml() is called.
732 # But why is this necessary? It would be a serious bug
733 # in MARC::Record (definitely) and MARC::File::XML (arguably)
734 # if they are emitting incorrect leader values.
737 my $leader = $marc->leader;
738 substr($leader, 0, 5) = ' ';
739 substr($leader, 10, 7) = '22 ';
740 $marc->leader(substr($leader, 0, 24));
744 # FIXME - it is essential to ensure that the biblionumber is present,
745 # otherwise, Zebra will choke on the record. However, this
746 # logic belongs in the relevant C4::Biblio APIs.
748 my $biblionumber = shift;
749 my $biblioitemnumber;
751 $biblioitemnumber = shift;
753 my $sth = $dbh->prepare(
754 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
755 $sth->execute($biblionumber);
756 ($biblioitemnumber) = $sth->fetchrow_array;
758 unless ($biblioitemnumber) {
759 warn "failed to get biblioitemnumber for biblio $biblionumber";
764 # FIXME - this is cheating on two levels
765 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
766 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
767 # present in the MARC::Record object ought to be part of GetMarcBiblio.
769 # On the other hand, this better for now than what rebuild_zebra.pl used to
770 # do, which was duplicate the code for inserting the biblionumber
771 # and biblioitemnumber
772 C4
::Biblio
::_koha_marc_update_bib_ids
($marc, '', $biblionumber, $biblioitemnumber);
777 sub fix_authority_id
{
778 # FIXME - as with fix_biblio_ids, the authid must be present
779 # for Zebra's sake. However, this really belongs
780 # in C4::AuthoritiesMarc.
781 my ($marc, $authid) = @_;
782 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
783 $marc->delete_field($marc->field('001'));
784 $marc->insert_fields_ordered(MARC
::Field
->new('001',$authid));
788 sub fix_unimarc_100
{
789 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
793 my $length_100a = length($marc->subfield( 100, "a" ));
794 if ( $length_100a and $length_100a == 36 ) {
795 $string = $marc->subfield( 100, "a" );
796 my $f100 = $marc->field(100);
797 $marc->delete_field($f100);
800 $string = POSIX
::strftime
( "%Y%m%d", localtime );
802 $string = sprintf( "%-*s", 35, $string );
804 substr( $string, 22, 6, "frey50" );
805 $length_100a = length($marc->subfield( 100, "a" ));
806 unless ( $length_100a and $length_100a == 36 ) {
807 $marc->delete_field($marc->field(100));
808 $marc->insert_grouped_field(MARC
::Field
->new( 100, "", "", "a" => $string ));
813 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
815 my $zebra_server = ($record_type eq 'biblio') ?
'biblioserver' : 'authorityserver';
816 my $zebra_db_name = ($record_type eq 'biblio') ?
'biblios' : 'authorities';
817 my $zebra_config = C4
::Context
->zebraconfig($zebra_server)->{'config'};
818 my $zebra_db_dir = C4
::Context
->zebraconfig($zebra_server)->{'directory'};
822 if ($noshadow or $reset_index) {
826 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
827 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
828 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
832 # test if flock is present; if so, use it; if not, return true
833 # op refers to the official flock operations including LOCK_EX,
835 # combining LOCK_EX with LOCK_NB returns immediately
837 if( !defined($use_flock) ) {
838 #check if flock is present; if not, you will have a fatal error
839 my $lock_acquired = eval { flock($fh, $op) };
840 # assuming that $fh and $op are fine(..), an undef $lock_acquired
842 $use_flock = defined($lock_acquired) ?
1 : 0;
843 print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock;
844 return 1 if !$use_flock;
845 return $lock_acquired;
847 return 1 if !$use_flock;
848 return flock($fh, $op);
852 sub _create_lockfile
{ #returns undef on failure
855 eval { mkpath
($dir, 0, oct(755)) };
858 return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME
;
859 return ( $fh, $dir.'/'.LOCK_FILENAME
);
864 $0: reindex MARC bibs
and/or authorities
in Zebra
.
866 Use this batch job to reindex all biblio
or authority
867 records
in your Koha database
.
871 -b
index bibliographic records
873 -a
index authority records
875 -daemon Run
in daemon mode
. The program will
loop checking
876 for entries on the zebraqueue table
, processing
877 them incrementally
if present
, and then
sleep
878 for a few seconds before repeating the process
879 Checking the zebraqueue table is done with a cheap
880 SQL query
. This allows
for near realtime update of
881 the zebra search
index with low
system overhead
.
882 Use
-sleep to control the checking interval
.
884 Daemon mode implies
-z
, -a
, -b
. The program will
885 refuse to start
if options are present that
do not
886 make sense
while running as an incremental update
887 daemon
(e
.g
. -r
or -offset
).
889 -sleep 10 Seconds to
sleep between checks of the zebraqueue
890 table
in daemon mode
. The
default is
5 seconds
.
892 -z
select only updated
and deleted
893 records marked
in the zebraqueue
894 table
. Cannot be used with
-r
897 --skip
-deletes only
select record updates
, not record
898 deletions
, to avoid potential excessive
899 I
/O
when zebraidx processes deletions
.
900 If this option is used
for normal indexing
,
901 a cronjob should be set up to run
902 rebuild_zebra
.pl
-z without
--skip
-deletes
904 Only effective with
-z
.
906 -r clear Zebra
index before
907 adding records to
index. Implies
-w
.
909 -d Temporary directory
for indexing
.
910 If
not specified
, one is automatically
911 created
. The export directory
912 is automatically deleted
unless
913 you supply the
-k switch
.
915 -k Do
not delete export directory
.
917 -s Skip export
. Used
if you have
918 already exported the records
921 -noxml
index from ISO MARC blob
922 instead of MARC XML
. This
923 option is recommended only
926 -nosanitize export biblio
/authority records directly from DB marcxml
927 field without sanitizing records
. It speed up
928 dump process but could fail
if DB contains badly
929 encoded records
. Works only with
-x
,
931 -w skip shadow indexing
for this batch
933 -y
do NOT clear zebraqueue after indexing
; normally
,
934 after doing batch indexing
, zebraqueue should be
935 marked done
for the affected record type
(s
) so that
936 a running zebraqueue_daemon doesn
't try to reindex
937 the same records - specify -y to override this.
938 Cannot be used with -z.
940 -v increase the amount of logging. Normally only
941 warnings and errors from the indexing are shown.
942 Use log level 2 (-v -v) to include all Zebra logs.
944 --length 1234 how many biblio you want to export
945 --offset 1243 offset you want to start to
946 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
947 note that the numbers are NOT related to biblionumber, that's the intended behaviour
.
948 --where let you specify a WHERE query
, like itemtype
='BOOK'
949 or something like that
951 --run
-as
-root explicitily allow script to run as
'root' user
953 --wait-for-lock when not running
in daemon mode
, the
default
954 behavior is to abort a rebuild
if the rebuild
955 lock is busy
. This option will cause the program
956 to
wait for the
lock to free
and then
continue
957 processing the rebuild request
,
959 --table specify a table
(can be items
, biblioitems
or biblio
) to retrieve biblionumber to
index.
960 biblioitems is the
default value
.
962 --help
or -h show this message
.