Bug 7286: rebuild_zebra_sliced for biblios and authorities
[koha.git] / misc / migration_tools / rebuild_zebra.pl
blob1621e84b9bd7d4adf8236a8a16f2b589fcfa4e99
1 #!/usr/bin/perl
3 use strict;
4 #use warnings; FIXME - Bug 2505
6 use C4::Context;
7 use Getopt::Long;
8 use File::Temp qw/ tempdir /;
9 use File::Path;
10 use C4::Biblio;
11 use C4::AuthoritiesMarc;
12 use C4::Items;
15 # script that checks zebradir structure & create directories & mandatory files if needed
19 $|=1; # flushes output
20 # If the cron job starts us in an unreadable dir, we will break without
21 # this.
22 chdir $ENV{HOME} if (!(-r '.'));
23 my $directory;
24 my $nosanitize;
25 my $skip_export;
26 my $keep_export;
27 my $skip_index;
28 my $reset;
29 my $biblios;
30 my $authorities;
31 my $noxml;
32 my $noshadow;
33 my $do_munge;
34 my $want_help;
35 my $as_xml;
36 my $process_zebraqueue;
37 my $do_not_clear_zebraqueue;
38 my $length;
39 my $where;
40 my $offset;
41 my $verbose_logging = 0;
42 my $zebraidx_log_opt = " -v none,fatal,warn ";
43 my $result = GetOptions(
44 'd:s' => \$directory,
45 'r|reset' => \$reset,
46 's' => \$skip_export,
47 'k' => \$keep_export,
48 'I|skip-index' => \$skip_index,
49 'nosanitize' => \$nosanitize,
50 'b' => \$biblios,
51 'noxml' => \$noxml,
52 'w' => \$noshadow,
53 'munge-config' => \$do_munge,
54 'a' => \$authorities,
55 'h|help' => \$want_help,
56 'x' => \$as_xml,
57 'y' => \$do_not_clear_zebraqueue,
58 'z' => \$process_zebraqueue,
59 'where:s' => \$where,
60 'length:i' => \$length,
61 'offset:i' => \$offset,
62 'v+' => \$verbose_logging,
66 if (not $result or $want_help) {
67 print_usage();
68 exit 0;
71 if (not $biblios and not $authorities) {
72 my $msg = "Must specify -b or -a to reindex bibs or authorities\n";
73 $msg .= "Please do '$0 --help' to see usage.\n";
74 die $msg;
77 if ( !$as_xml and $nosanitize ) {
78 my $msg = "Cannot specify both -no_xml and -nosanitize\n";
79 $msg .= "Please do '$0 --help' to see usage.\n";
80 die $msg;
83 if ($process_zebraqueue and ($skip_export or $reset)) {
84 my $msg = "Cannot specify -r or -s if -z is specified\n";
85 $msg .= "Please do '$0 --help' to see usage.\n";
86 die $msg;
89 if ($process_zebraqueue and $do_not_clear_zebraqueue) {
90 my $msg = "Cannot specify both -y and -z\n";
91 $msg .= "Please do '$0 --help' to see usage.\n";
92 die $msg;
95 if ($noshadow) {
96 $noshadow = ' -n ';
99 # -v is for verbose, which seems backwards here because of how logging is set
100 # on the CLI of zebraidx. It works this way. The default is to not log much
101 if ($verbose_logging >= 2) {
102 $zebraidx_log_opt = '-v none,fatal,warn,all';
105 my $use_tempdir = 0;
106 unless ($directory) {
107 $use_tempdir = 1;
108 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
112 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
113 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
115 my $kohadir = C4::Context->config('intranetdir');
116 my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1';
117 my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom';
119 my $dbh = C4::Context->dbh;
120 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
121 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
123 if ( $verbose_logging ) {
124 print "Zebra configuration information\n";
125 print "================================\n";
126 print "Zebra biblio directory = $biblioserverdir\n";
127 print "Zebra authorities directory = $authorityserverdir\n";
128 print "Koha directory = $kohadir\n";
129 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
130 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
131 print "================================\n";
134 if ($do_munge) {
135 munge_config();
138 if ($authorities) {
139 index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
140 } else {
141 print "skipping authorities\n" if ( $verbose_logging );
144 if ($biblios) {
145 index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
146 } else {
147 print "skipping biblios\n" if ( $verbose_logging );
151 if ( $verbose_logging ) {
152 print "====================\n";
153 print "CLEANING\n";
154 print "====================\n";
156 if ($keep_export) {
157 print "NOTHING cleaned : the export $directory has been kept.\n";
158 print "You can re-run this script with the -s ";
159 if ($use_tempdir) {
160 print " and -d $directory parameters";
161 } else {
162 print "parameter";
164 print "\n";
165 print "if you just want to rebuild zebra after changing the record.abs\n";
166 print "or another zebra config file\n";
167 } else {
168 unless ($use_tempdir) {
169 # if we're using a temporary directory
170 # created by File::Temp, it will be removed
171 # automatically.
172 rmtree($directory, 0, 1);
173 print "directory $directory deleted\n";
177 # This checks to see if the zebra directories exist under the provided path.
178 # If they don't, then zebra is likely to spit the dummy. This returns true
179 # if the directories had to be created, false otherwise.
180 sub check_zebra_dirs {
181 my ($base) = shift() . '/';
182 my $needed_repairing = 0;
183 my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
184 foreach my $dir (@dirs) {
185 my $bdir = $base . $dir;
186 if (! -d $bdir) {
187 $needed_repairing = 1;
188 mkdir $bdir || die "Unable to create '$bdir': $!\n";
189 print "$0: needed to create '$bdir'\n";
192 return $needed_repairing;
193 } # ---------- end of subroutine check_zebra_dirs ----------
195 sub index_records {
196 my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
198 my $num_records_exported = 0;
199 my $records_deleted;
200 my $need_reset = check_zebra_dirs($server_dir);
201 if ($need_reset) {
202 print "$0: found broken zebra server directories: forcing a rebuild\n";
203 $reset = 1;
205 if ($skip_export && $verbose_logging) {
206 print "====================\n";
207 print "SKIPPING $record_type export\n";
208 print "====================\n";
209 } else {
210 if ( $verbose_logging ) {
211 print "====================\n";
212 print "exporting $record_type\n";
213 print "====================\n";
215 mkdir "$directory" unless (-d $directory);
216 mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
217 if ($process_zebraqueue) {
218 my $entries = select_zebraqueue_records($record_type, 'deleted');
219 mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
220 $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml);
221 mark_zebraqueue_batch_done($entries);
222 $entries = select_zebraqueue_records($record_type, 'updated');
223 mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
224 $num_records_exported = export_marc_records_from_list($record_type,
225 $entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted);
226 mark_zebraqueue_batch_done($entries);
227 } else {
228 my $sth = select_all_records($record_type);
229 $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);
230 unless ($do_not_clear_zebraqueue) {
231 mark_all_zebraqueue_done($record_type);
237 # and reindexing everything
239 if ($skip_index) {
240 if ($verbose_logging) {
241 print "====================\n";
242 print "SKIPPING $record_type indexing\n";
243 print "====================\n";
245 } else {
246 if ( $verbose_logging ) {
247 print "====================\n";
248 print "REINDEXING zebra\n";
249 print "====================\n";
251 my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
252 if ($process_zebraqueue) {
253 do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
254 if %$records_deleted;
255 do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
256 if $num_records_exported;
257 } else {
258 do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
259 if ($num_records_exported or $skip_export);
265 sub select_zebraqueue_records {
266 my ($record_type, $update_type) = @_;
268 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
269 my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
271 my $sth = $dbh->prepare("SELECT id, biblio_auth_number
272 FROM zebraqueue
273 WHERE server = ?
274 AND operation = ?
275 AND done = 0
276 ORDER BY id DESC");
277 $sth->execute($server, $op);
278 my $entries = $sth->fetchall_arrayref({});
281 sub mark_all_zebraqueue_done {
282 my ($record_type) = @_;
284 my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
286 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
287 WHERE server = ?
288 AND done = 0");
289 $sth->execute($server);
292 sub mark_zebraqueue_batch_done {
293 my ($entries) = @_;
295 $dbh->{AutoCommit} = 0;
296 my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
297 $dbh->commit();
298 foreach my $id (map { $_->{id} } @$entries) {
299 $sth->execute($id);
301 $dbh->{AutoCommit} = 1;
304 sub select_all_records {
305 my $record_type = shift;
306 return ($record_type eq 'biblio') ? select_all_biblios() : select_all_authorities();
309 sub select_all_authorities {
310 my $strsth=qq{SELECT authid FROM auth_header};
311 $strsth.=qq{ WHERE $where } if ($where);
312 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
313 $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
314 my $sth = $dbh->prepare($strsth);
315 $sth->execute();
316 return $sth;
319 sub select_all_biblios {
320 my $strsth = qq{ SELECT biblionumber FROM biblioitems };
321 $strsth.=qq{ WHERE $where } if ($where);
322 $strsth.=qq{ LIMIT $length } if ($length && !$offset);
323 $strsth.=qq{ LIMIT $offset,$length } if ($offset);
324 my $sth = $dbh->prepare($strsth);
325 $sth->execute();
326 return $sth;
329 sub include_xml_wrapper {
330 my $as_xml = shift;
331 my $record_type = shift;
333 return 0 unless $as_xml;
334 return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';
335 return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';
336 return 0;
340 sub export_marc_records_from_sth {
341 my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
343 my $num_exported = 0;
344 open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
345 if (include_xml_wrapper($as_xml, $record_type)) {
346 # include XML declaration and root element
347 print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
349 my $i = 0;
350 my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
351 while (my ($record_number) = $sth->fetchrow_array) {
352 print "." if ( $verbose_logging );
353 print "\r$i" unless ($i++ %100 or !$verbose_logging);
354 if ( $nosanitize ) {
355 my $marcxml = $record_type eq 'biblio'
356 ? GetXmlBiblio( $record_number )
357 : GetAuthorityXML( $record_number );
358 if ($record_type eq 'biblio'){
359 my @items = GetItemsInfo($record_number);
360 if (@items){
361 my $record = MARC::Record->new;
362 $record->encoding('UTF-8');
363 my @itemsrecord;
364 foreach my $item (@items){
365 my $record = Item2Marc($item, $record_number);
366 push @itemsrecord, $record->field($itemtag);
368 $record->insert_fields_ordered(@itemsrecord);
369 my $itemsxml = $record->as_xml_record();
370 $marcxml =
371 substr($marcxml, 0, length($marcxml)-10) .
372 substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
375 if ( $marcxml ) {
376 print {$fh} $marcxml if $marcxml;
377 $num_exported++;
379 next;
381 my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
382 if (defined $marc) {
383 eval {
384 my $rec;
385 if ($as_xml) {
386 $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
387 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
388 } else {
389 $rec = $marc->as_usmarc();
391 print {$fh} $rec;
392 $num_exported++;
394 if ($@) {
395 warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
399 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
400 print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
401 close $fh;
402 return $num_exported;
405 sub export_marc_records_from_list {
406 my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
408 my $num_exported = 0;
409 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
410 if (include_xml_wrapper($as_xml, $record_type)) {
411 # include XML declaration and root element
412 print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
414 my $i = 0;
416 # Skip any deleted records. We check for this anyway, but this reduces error spam
417 my %found = %$records_deleted;
418 foreach my $record_number ( map { $_->{biblio_auth_number} }
419 grep { !$found{ $_->{biblio_auth_number} }++ }
420 @$entries ) {
421 print "." if ( $verbose_logging );
422 print "\r$i" unless ($i++ %100 or !$verbose_logging);
423 my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
424 if (defined $marc) {
425 eval {
426 my $rec;
427 if ($as_xml) {
428 $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
429 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
430 } else {
431 $rec = $marc->as_usmarc();
433 print {$fh} $rec;
434 $num_exported++;
436 if ($@) {
437 warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
439 $num_exported++;
442 print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
443 print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
444 close $fh;
445 return $num_exported;
448 sub generate_deleted_marc_records {
449 my ($record_type, $entries, $directory, $as_xml) = @_;
451 my $records_deleted = {};
452 open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
453 if (include_xml_wrapper($as_xml, $record_type)) {
454 # include XML declaration and root element
455 print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
457 my $i = 0;
458 foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
459 print "\r$i" unless ($i++ %100 or !$verbose_logging);
460 print "." if ( $verbose_logging );
462 my $marc = MARC::Record->new();
463 if ($record_type eq 'biblio') {
464 fix_biblio_ids($marc, $record_number, $record_number);
465 } else {
466 fix_authority_id($marc, $record_number);
468 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
469 fix_unimarc_100($marc);
472 my $rec;
473 if ($as_xml) {
474 $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
475 $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
476 } else {
477 $rec = $marc->as_usmarc();
479 print {$fh} $rec;
481 $records_deleted->{$record_number} = 1;
483 print "\nRecords exported: $i\n" if ( $verbose_logging );
484 print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
485 close $fh;
486 return $records_deleted;
491 sub get_corrected_marc_record {
492 my ($record_type, $record_number, $noxml) = @_;
494 my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
496 if (defined $marc) {
497 fix_leader($marc);
498 if ($record_type eq 'authority') {
499 fix_authority_id($marc, $record_number);
501 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
502 fix_unimarc_100($marc);
506 return $marc;
509 sub get_raw_marc_record {
510 my ($record_type, $record_number, $noxml) = @_;
512 my $marc;
513 if ($record_type eq 'biblio') {
514 if ($noxml) {
515 my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
516 $fetch_sth->execute($record_number);
517 if (my ($blob) = $fetch_sth->fetchrow_array) {
518 $marc = MARC::Record->new_from_usmarc($blob);
519 unless ($marc) {
520 warn "error creating MARC::Record from $blob";
523 # failure to find a bib is not a problem -
524 # a delete could have been done before
525 # trying to process a record update
527 $fetch_sth->finish();
528 return unless $marc;
529 } else {
530 eval { $marc = GetMarcBiblio($record_number, 1); };
531 if ($@ || !$marc) {
532 # here we do warn since catching an exception
533 # means that the bib was found but failed
534 # to be parsed
535 warn "error retrieving biblio $record_number";
536 return;
539 } else {
540 eval { $marc = GetAuthority($record_number); };
541 if ($@) {
542 warn "error retrieving authority $record_number";
543 return;
546 return $marc;
549 sub fix_leader {
550 # FIXME - this routine is suspect
551 # It blanks the Leader/00-05 and Leader/12-16 to
552 # force them to be recalculated correct when
553 # the $marc->as_usmarc() or $marc->as_xml() is called.
554 # But why is this necessary? It would be a serious bug
555 # in MARC::Record (definitely) and MARC::File::XML (arguably)
556 # if they are emitting incorrect leader values.
557 my $marc = shift;
559 my $leader = $marc->leader;
560 substr($leader, 0, 5) = ' ';
561 substr($leader, 10, 7) = '22 ';
562 $marc->leader(substr($leader, 0, 24));
565 sub fix_biblio_ids {
566 # FIXME - it is essential to ensure that the biblionumber is present,
567 # otherwise, Zebra will choke on the record. However, this
568 # logic belongs in the relevant C4::Biblio APIs.
569 my $marc = shift;
570 my $biblionumber = shift;
571 my $biblioitemnumber;
572 if (@_) {
573 $biblioitemnumber = shift;
574 } else {
575 my $sth = $dbh->prepare(
576 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
577 $sth->execute($biblionumber);
578 ($biblioitemnumber) = $sth->fetchrow_array;
579 $sth->finish;
580 unless ($biblioitemnumber) {
581 warn "failed to get biblioitemnumber for biblio $biblionumber";
582 return 0;
586 # FIXME - this is cheating on two levels
587 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
588 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
589 # present in the MARC::Record object ought to be part of GetMarcBiblio.
591 # On the other hand, this better for now than what rebuild_zebra.pl used to
592 # do, which was duplicate the code for inserting the biblionumber
593 # and biblioitemnumber
594 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
596 return 1;
599 sub fix_authority_id {
600 # FIXME - as with fix_biblio_ids, the authid must be present
601 # for Zebra's sake. However, this really belongs
602 # in C4::AuthoritiesMarc.
603 my ($marc, $authid) = @_;
604 unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
605 $marc->delete_field($marc->field('001'));
606 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
610 sub fix_unimarc_100 {
611 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
612 my $marc = shift;
614 my $string;
615 if ( length($marc->subfield( 100, "a" )) == 36 ) {
616 $string = $marc->subfield( 100, "a" );
617 my $f100 = $marc->field(100);
618 $marc->delete_field($f100);
620 else {
621 $string = POSIX::strftime( "%Y%m%d", localtime );
622 $string =~ s/\-//g;
623 $string = sprintf( "%-*s", 35, $string );
625 substr( $string, 22, 6, "frey50" );
626 unless ( length($marc->subfield( 100, "a" )) == 36 ) {
627 $marc->delete_field($marc->field(100));
628 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
632 sub do_indexing {
633 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format, $zebraidx_log_opt) = @_;
635 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
636 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
637 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
638 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
640 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
641 system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
642 system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
646 sub print_usage {
647 print <<_USAGE_;
648 $0: reindex MARC bibs and/or authorities in Zebra.
650 Use this batch job to reindex all biblio or authority
651 records in your Koha database. This job is useful
652 only if you are using Zebra; if you are using the 'NoZebra'
653 mode, this job should not be used.
655 Parameters:
656 -b index bibliographic records
658 -a index authority records
660 -z select only updated and deleted
661 records marked in the zebraqueue
662 table. Cannot be used with -r
663 or -s.
665 -r clear Zebra index before
666 adding records to index
668 -d Temporary directory for indexing.
669 If not specified, one is automatically
670 created. The export directory
671 is automatically deleted unless
672 you supply the -k switch.
674 -k Do not delete export directory.
676 -s Skip export. Used if you have
677 already exported the records
678 in a previous run.
680 -noxml index from ISO MARC blob
681 instead of MARC XML. This
682 option is recommended only
683 for advanced user.
685 -x export and index as xml instead of is02709 (biblios only).
686 use this if you might have records > 99,999 chars,
688 -nosanitize export biblio/authority records directly from DB marcxml
689 field without sanitizing records. It speed up
690 dump process but could fail if DB contains badly
691 encoded records. Works only with -x,
693 -w skip shadow indexing for this batch
695 -y do NOT clear zebraqueue after indexing; normally,
696 after doing batch indexing, zebraqueue should be
697 marked done for the affected record type(s) so that
698 a running zebraqueue_daemon doesn't try to reindex
699 the same records - specify -y to override this.
700 Cannot be used with -z.
702 -v increase the amount of logging. Normally only
703 warnings and errors from the indexing are shown.
704 Use log level 2 (-v -v) to include all Zebra logs.
706 --length 1234 how many biblio you want to export
707 --offset 1243 offset you want to start to
708 example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
709 note that the numbers are NOT related to biblionumber, that's the intended behaviour.
710 --where let you specify a WHERE query, like itemtype='BOOK'
711 or something like that
713 --munge-config Deprecated option to try
714 to fix Zebra config files.
715 --help or -h show this message.
716 _USAGE_
719 # FIXME: the following routines are deprecated and
720 # will be removed once it is determined whether
721 # a script to fix Zebra configuration files is
722 # actually needed.
723 sub munge_config {
725 # creating zebra-biblios.cfg depending on system
728 # getting zebraidx directory
729 my $zebraidxdir;
730 foreach (qw(/usr/local/bin/zebraidx
731 /opt/bin/zebraidx
732 /usr/bin/zebraidx
733 )) {
734 if ( -f $_ ) {
735 $zebraidxdir=$_;
739 unless ($zebraidxdir) {
740 print qq|
741 ERROR: could not find zebraidx directory
742 ERROR: Either zebra is not installed,
743 ERROR: or it's in a directory I don't checked.
744 ERROR: do a which zebraidx and edit this file to add the result you get
746 exit;
748 $zebraidxdir =~ s/\/bin\/.*//;
749 print "Info : zebra is in $zebraidxdir \n";
751 # getting modules directory
752 my $modulesdir;
753 foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so
754 /usr/local/lib/idzebra/modules/mod-grs-xml.so
755 /usr/lib/idzebra/modules/mod-grs-xml.so
756 /usr/lib/idzebra-2.0/modules/mod-grs-xml.so
757 )) {
758 if ( -f $_ ) {
759 $modulesdir=$_;
763 unless ($modulesdir) {
764 print qq|
765 ERROR: could not find mod-grs-xml.so directory
766 ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so,
767 ERROR: or it's in a directory I don't checked.
768 ERROR: find where mod-grs-xml.so is and edit this file to add the result you get
770 exit;
772 $modulesdir =~ s/\/modules\/.*//;
773 print "Info: zebra modules dir : $modulesdir\n";
775 # getting tab directory
776 my $tabdir;
777 foreach (qw(/usr/local/share/idzebra/tab/explain.att
778 /usr/local/share/idzebra-2.0/tab/explain.att
779 /usr/share/idzebra/tab/explain.att
780 /usr/share/idzebra-2.0/tab/explain.att
781 )) {
782 if ( -f $_ ) {
783 $tabdir=$_;
787 unless ($tabdir) {
788 print qq|
789 ERROR: could not find explain.att directory
790 ERROR: Either zebra is not properly compiled,
791 ERROR: or it's in a directory I don't checked.
792 ERROR: find where explain.att is and edit this file to add the result you get
794 exit;
796 $tabdir =~ s/\/tab\/.*//;
797 print "Info: tab dir : $tabdir\n";
800 # AUTHORITIES creating directory structure
802 my $created_dir_or_file = 0;
803 if ($authorities) {
804 if ( $verbose_logging ) {
805 print "====================\n";
806 print "checking directories & files for authorities\n";
807 print "====================\n";
809 unless (-d "$authorityserverdir") {
810 system("mkdir -p $authorityserverdir");
811 print "Info: created $authorityserverdir\n";
812 $created_dir_or_file++;
814 unless (-d "$authorityserverdir/lock") {
815 mkdir "$authorityserverdir/lock";
816 print "Info: created $authorityserverdir/lock\n";
817 $created_dir_or_file++;
819 unless (-d "$authorityserverdir/register") {
820 mkdir "$authorityserverdir/register";
821 print "Info: created $authorityserverdir/register\n";
822 $created_dir_or_file++;
824 unless (-d "$authorityserverdir/shadow") {
825 mkdir "$authorityserverdir/shadow";
826 print "Info: created $authorityserverdir/shadow\n";
827 $created_dir_or_file++;
829 unless (-d "$authorityserverdir/tab") {
830 mkdir "$authorityserverdir/tab";
831 print "Info: created $authorityserverdir/tab\n";
832 $created_dir_or_file++;
834 unless (-d "$authorityserverdir/key") {
835 mkdir "$authorityserverdir/key";
836 print "Info: created $authorityserverdir/key\n";
837 $created_dir_or_file++;
840 unless (-d "$authorityserverdir/etc") {
841 mkdir "$authorityserverdir/etc";
842 print "Info: created $authorityserverdir/etc\n";
843 $created_dir_or_file++;
847 # AUTHORITIES : copying mandatory files
849 # the record model, depending on marc flavour
850 unless (-f "$authorityserverdir/tab/record.abs") {
851 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
852 system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/authorities/record.abs $authorityserverdir/tab/record.abs");
853 print "Info: copied record.abs for UNIMARC\n";
854 } else {
855 system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/authorities/record.abs $authorityserverdir/tab/record.abs");
856 print "Info: copied record.abs for USMARC\n";
858 $created_dir_or_file++;
860 unless (-f "$authorityserverdir/tab/sort-string-utf.chr") {
861 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/sort-string-utf.chr");
862 print "Info: copied sort-string-utf.chr\n";
863 $created_dir_or_file++;
865 unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {
866 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/word-phrase-utf.chr");
867 print "Info: copied word-phase-utf.chr\n";
868 $created_dir_or_file++;
870 unless (-f "$authorityserverdir/tab/auth1.att") {
871 system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att");
872 print "Info: copied auth1.att\n";
873 $created_dir_or_file++;
875 unless (-f "$authorityserverdir/tab/default.idx") {
876 system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx");
877 print "Info: copied default.idx\n";
878 $created_dir_or_file++;
881 unless (-f "$authorityserverdir/etc/ccl.properties") {
882 # system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
883 system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties");
884 print "Info: copied ccl.properties\n";
885 $created_dir_or_file++;
887 unless (-f "$authorityserverdir/etc/pqf.properties") {
888 # system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
889 system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties");
890 print "Info: copied pqf.properties\n";
891 $created_dir_or_file++;
895 # AUTHORITIES : copying mandatory files
897 unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
898 open my $zd, '>:encoding(UTF-8)' ,C4::Context->zebraconfig('authorityserver')->{config};
899 print {$zd} "
900 # generated by KOHA/misc/migration_tools/rebuild_zebra.pl
901 profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
903 encoding: UTF-8
904 # Files that describe the attribute sets supported.
905 attset: auth1.att
906 attset: explain.att
907 attset: gils.att
909 modulePath:$modulesdir/modules/
910 # Specify record type
911 iso2709.recordType:grs.marcxml.record
912 recordType:grs.xml
913 recordId: (auth1,Local-Number)
914 storeKeys:1
915 storeData:1
918 # Lock File Area
919 lockDir: $authorityserverdir/lock
920 perm.anonymous:r
921 perm.kohaadmin:rw
922 register: $authorityserverdir/register:4G
923 shadow: $authorityserverdir/shadow:4G
925 # Temp File area for result sets
926 setTmpDir: $authorityserverdir/tmp
928 # Temp File area for index program
929 keyTmpDir: $authorityserverdir/key
931 # Approx. Memory usage during indexing
932 memMax: 40M
933 rank:rank-1
935 print "Info: creating zebra-authorities.cfg\n";
936 $created_dir_or_file++;
939 if ($created_dir_or_file) {
940 print "Info: created : $created_dir_or_file directories & files\n";
941 } else {
942 print "Info: file & directories OK\n";
946 if ($biblios) {
947 if ( $verbose_logging ) {
948 print "====================\n";
949 print "checking directories & files for biblios\n";
950 print "====================\n";
954 # BIBLIOS : creating directory structure
956 unless (-d "$biblioserverdir") {
957 system("mkdir -p $biblioserverdir");
958 print "Info: created $biblioserverdir\n";
959 $created_dir_or_file++;
961 unless (-d "$biblioserverdir/lock") {
962 mkdir "$biblioserverdir/lock";
963 print "Info: created $biblioserverdir/lock\n";
964 $created_dir_or_file++;
966 unless (-d "$biblioserverdir/register") {
967 mkdir "$biblioserverdir/register";
968 print "Info: created $biblioserverdir/register\n";
969 $created_dir_or_file++;
971 unless (-d "$biblioserverdir/shadow") {
972 mkdir "$biblioserverdir/shadow";
973 print "Info: created $biblioserverdir/shadow\n";
974 $created_dir_or_file++;
976 unless (-d "$biblioserverdir/tab") {
977 mkdir "$biblioserverdir/tab";
978 print "Info: created $biblioserverdir/tab\n";
979 $created_dir_or_file++;
981 unless (-d "$biblioserverdir/key") {
982 mkdir "$biblioserverdir/key";
983 print "Info: created $biblioserverdir/key\n";
984 $created_dir_or_file++;
986 unless (-d "$biblioserverdir/etc") {
987 mkdir "$biblioserverdir/etc";
988 print "Info: created $biblioserverdir/etc\n";
989 $created_dir_or_file++;
993 # BIBLIOS : copying mandatory files
995 # the record model, depending on marc flavour
996 unless (-f "$biblioserverdir/tab/record.abs") {
997 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
998 system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/biblios/record.abs $biblioserverdir/tab/record.abs");
999 print "Info: copied record.abs for UNIMARC\n";
1000 } else {
1001 system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/biblios/record.abs $biblioserverdir/tab/record.abs");
1002 print "Info: copied record.abs for USMARC\n";
1004 $created_dir_or_file++;
1006 unless (-f "$biblioserverdir/tab/sort-string-utf.chr") {
1007 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/sort-string-utf.chr");
1008 print "Info: copied sort-string-utf.chr\n";
1009 $created_dir_or_file++;
1011 unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {
1012 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/word-phrase-utf.chr");
1013 print "Info: copied word-phase-utf.chr\n";
1014 $created_dir_or_file++;
1016 unless (-f "$biblioserverdir/tab/bib1.att") {
1017 system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att");
1018 print "Info: copied bib1.att\n";
1019 $created_dir_or_file++;
1021 unless (-f "$biblioserverdir/tab/default.idx") {
1022 system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx");
1023 print "Info: copied default.idx\n";
1024 $created_dir_or_file++;
1026 unless (-f "$biblioserverdir/etc/ccl.properties") {
1027 # system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
1028 system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties");
1029 print "Info: copied ccl.properties\n";
1030 $created_dir_or_file++;
1032 unless (-f "$biblioserverdir/etc/pqf.properties") {
1033 # system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
1034 system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties");
1035 print "Info: copied pqf.properties\n";
1036 $created_dir_or_file++;
1040 # BIBLIOS : copying mandatory files
1042 unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
1043 open my $zd, '>:encoding(UTF-8)', C4::Context->zebraconfig('biblioserver')->{config};
1044 print {$zd} "
1045 # generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl
1046 profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
1048 encoding: UTF-8
1049 # Files that describe the attribute sets supported.
1050 attset:bib1.att
1051 attset:explain.att
1052 attset:gils.att
1054 modulePath:$modulesdir/modules/
1055 # Specify record type
1056 iso2709.recordType:grs.marcxml.record
1057 recordType:grs.xml
1058 recordId: (bib1,Local-Number)
1059 storeKeys:1
1060 storeData:1
1063 # Lock File Area
1064 lockDir: $biblioserverdir/lock
1065 perm.anonymous:r
1066 perm.kohaadmin:rw
1067 register: $biblioserverdir/register:4G
1068 shadow: $biblioserverdir/shadow:4G
1070 # Temp File area for result sets
1071 setTmpDir: $biblioserverdir/tmp
1073 # Temp File area for index program
1074 keyTmpDir: $biblioserverdir/key
1076 # Approx. Memory usage during indexing
1077 memMax: 40M
1078 rank:rank-1
1080 print "Info: creating zebra-biblios.cfg\n";
1081 $created_dir_or_file++;
1084 if ($created_dir_or_file) {
1085 print "Info: created : $created_dir_or_file directories & files\n";
1086 } else {
1087 print "Info: file & directories OK\n";