Bug 15395: Allow correct handling of plural translation
[koha.git] / misc / migration_tools / bulkmarcimport.pl
blobd820d3e9912282344e0749935412baa938daf541
1 #!/usr/bin/perl
2 # Import an iso2709 file into Koha 3
4 use Modern::Perl;
5 #use diagnostics;
6 BEGIN {
7 # find Koha's Perl modules
8 # test carefully before changing this
9 use FindBin;
10 eval { require "$FindBin::Bin/../kohalib.pl" };
13 # Koha modules used
14 use MARC::File::USMARC;
15 use MARC::File::XML;
16 use MARC::Record;
17 use MARC::Batch;
18 use MARC::Charset;
20 use C4::Context;
21 use C4::Biblio;
22 use C4::Koha;
23 use C4::Debug;
24 use C4::Charset;
25 use C4::Items;
26 use C4::MarcModificationTemplates;
28 use YAML;
29 use Unicode::Normalize;
30 use Time::HiRes qw(gettimeofday);
31 use Getopt::Long;
32 use IO::File;
33 use Pod::Usage;
35 use Koha::Biblios;
36 use Koha::SearchEngine;
37 use Koha::SearchEngine::Search;
39 use open qw( :std :encoding(UTF-8) );
40 binmode( STDOUT, ":encoding(UTF-8)" );
41 my ( $input_marc_file, $number, $offset) = ('',0,0);
42 my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format,$biblios,$authorities,$keepids,$match, $isbn_check, $logfile);
43 my ( $insert, $filters, $update, $all, $yamlfile, $authtypes, $append );
44 my $cleanisbn = 1;
45 my ($sourcetag,$sourcesubfield,$idmapfl, $dedup_barcode);
46 my $framework = '';
47 my $localcust;
48 my $marc_mod_template = '';
49 my $marc_mod_template_id = -1;
51 $|=1;
53 GetOptions(
54 'commit:f' => \$commit,
55 'file:s' => \$input_marc_file,
56 'n:f' => \$number,
57 'o|offset:f' => \$offset,
58 'h' => \$version,
59 'd' => \$delete,
60 't|test' => \$test_parameter,
61 's' => \$skip_marc8_conversion,
62 'c:s' => \$char_encoding,
63 'v:+' => \$verbose,
64 'fk' => \$fk_off,
65 'm:s' => \$format,
66 'l:s' => \$logfile,
67 'append' => \$append,
68 'k|keepids:s' => \$keepids,
69 'b|biblios' => \$biblios,
70 'a|authorities' => \$authorities,
71 'authtypes:s' => \$authtypes,
72 'filter=s@' => \$filters,
73 'insert' => \$insert,
74 'update' => \$update,
75 'all' => \$all,
76 'match=s@' => \$match,
77 'i|isbn' => \$isbn_check,
78 'x:s' => \$sourcetag,
79 'y:s' => \$sourcesubfield,
80 'idmap:s' => \$idmapfl,
81 'cleanisbn!' => \$cleanisbn,
82 'yaml:s' => \$yamlfile,
83 'dedupbarcode' => \$dedup_barcode,
84 'framework=s' => \$framework,
85 'custom:s' => \$localcust,
86 'marcmodtemplate:s' => \$marc_mod_template,
88 $biblios ||= !$authorities;
89 $insert ||= !$update;
90 my $writemode = ($append) ? "a" : "w";
92 pod2usage( -msg => "\nYou must specify either --biblios or --authorities, not both.\n", -exitval ) if $biblios && $authorities;
94 if ($all) {
95 $insert = 1;
96 $update = 1;
99 if ($version || ($input_marc_file eq '')) {
100 pod2usage( -verbose => 2 );
101 exit;
103 if( $update && !( $match || $isbn_check ) ) {
104 warn "Using -update without -match or -isbn seems to be useless.\n";
107 if(defined $localcust) { #local customize module
108 if(!-e $localcust) {
109 $localcust= $localcust||'LocalChanges'; #default name
110 $localcust=~ s/^.*\/([^\/]+)$/$1/; #extract file name only
111 $localcust=~ s/\.pm$//; #remove extension
112 my $fqcust= $FindBin::Bin."/$localcust.pm"; #try migration_tools dir
113 if(-e $fqcust) {
114 $localcust= $fqcust;
116 else {
117 print "WARNING: customize module $localcust.pm not found!\n";
118 exit 1;
121 require $localcust if $localcust;
122 $localcust=\&customize if $localcust;
125 if($marc_mod_template ne '') {
126 my @templates = GetModificationTemplates();
127 foreach my $this_template (@templates) {
128 if($this_template->{'name'} eq $marc_mod_template) {
129 if($marc_mod_template_id < 0) {
130 $marc_mod_template_id = $this_template->{'template_id'};
131 } else {
132 print "WARNING: MARC modification template name " .
133 "'$marc_mod_template' matches multiple templates. " .
134 "Please rename these templates\n";
135 exit 1;
139 if($marc_mod_template_id < 0) {
140 die "Can't located MARC modification template '$marc_mod_template'\n";
141 } else {
142 print "Records will be modified using MARC modofication template: $marc_mod_template\n" if $verbose;
146 my $dbh = C4::Context->dbh;
147 my $heading_fields=get_heading_fields();
149 if (defined $idmapfl) {
150 open(IDMAP,">$idmapfl") or die "cannot open $idmapfl \n";
153 if ((not defined $sourcesubfield) && (not defined $sourcetag)){
154 $sourcetag="910";
155 $sourcesubfield="a";
159 # Disable logging for the biblios and authorities import operation. It would unnecessarily
160 # slow the import
162 # Disable the syspref cache so we can change logging settings
163 C4::Context->disable_syspref_cache();
164 # Save current CataloguingLog and AuthoritiesLog sysprefs values
165 my $CataloguingLog = C4::Context->preference( 'CataloguingLog' );
166 my $AuthoritiesLog = C4::Context->preference( 'AuthoritiesLog' );
167 # Disable logging for both
168 C4::Context->set_preference( 'CataloguingLog', 0 );
169 C4::Context->set_preference( 'AuthoritiesLog', 0 );
171 if ($fk_off) {
172 $dbh->do("SET FOREIGN_KEY_CHECKS = 0");
176 if ($delete) {
177 if ($biblios){
178 print "deleting biblios\n";
179 $dbh->do("truncate biblio");
180 $dbh->do("truncate biblioitems");
181 $dbh->do("truncate items");
183 else {
184 print "deleting authorities\n";
185 $dbh->do("truncate auth_header");
187 $dbh->do("truncate zebraqueue");
192 if ($test_parameter) {
193 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
196 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
198 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
199 my $starttime = gettimeofday;
200 my $batch;
201 my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer
202 if (defined $format && $format =~ /XML/i) {
203 # ugly hack follows -- MARC::File::XML, when used by MARC::Batch,
204 # appears to try to convert incoming XML records from MARC-8
205 # to UTF-8. Setting the BinaryEncoding key turns that off
206 # TODO: see what happens to ISO-8859-1 XML files.
207 # TODO: determine if MARC::Batch can be fixed to handle
208 # XML records properly -- it probably should be
209 # be using a proper push or pull XML parser to
210 # extract the records, not using regexes to look
211 # for <record>.*</record>.
212 $MARC::File::XML::_load_args{BinaryEncoding} = 'utf-8';
213 my $recordformat= ($marcFlavour eq "MARC21"?"USMARC":uc($marcFlavour));
214 #UNIMARC Authorities have a different way to manage encoding than UNIMARC biblios.
215 $recordformat=$recordformat."AUTH" if ($authorities and $marcFlavour ne "MARC21");
216 $MARC::File::XML::_load_args{RecordFormat} = $recordformat;
217 $batch = MARC::Batch->new( 'XML', $fh );
218 } else {
219 $batch = MARC::Batch->new( 'USMARC', $fh );
221 $batch->warnings_off();
222 $batch->strict_off();
223 my $i=0;
224 my $commitnum = $commit ? $commit : 50;
225 my $yamlhash;
227 # Skip file offset
228 if ( $offset ) {
229 print "Skipping file offset: $offset records\n";
230 $batch->next() while ($offset--);
233 my ($tagid,$subfieldid);
234 if ($authorities){
235 $tagid='001';
237 else {
238 ( $tagid, $subfieldid ) =
239 GetMarcFromKohaField( "biblio.biblionumber", $framework );
240 $tagid||="001";
243 # the SQL query to search on isbn
244 my $sth_isbn = $dbh->prepare("SELECT biblionumber,biblioitemnumber FROM biblioitems WHERE isbn=?");
246 $dbh->{AutoCommit} = 0;
247 my $loghandle;
248 if ($logfile){
249 $loghandle= IO::File->new($logfile, $writemode) ;
250 print $loghandle "id;operation;status\n";
253 my $searcher = Koha::SearchEngine::Search->new(
255 index => (
256 $authorities
257 ? $Koha::SearchEngine::AUTHORITIES_INDEX
258 : $Koha::SearchEngine::BIBLIOS_INDEX
263 RECORD: while ( ) {
264 my $record;
265 # get records
266 eval { $record = $batch->next() };
267 if ( $@ ) {
268 print "Bad MARC record $i: $@ skipped\n";
269 # FIXME - because MARC::Batch->next() combines grabbing the next
270 # blob and parsing it into one operation, a correctable condition
271 # such as a MARC-8 record claiming that it's UTF-8 can't be recovered
272 # from because we don't have access to the original blob. Note
273 # that the staging import can deal with this condition (via
274 # C4::Charset::MarcToUTF8Record) because it doesn't use MARC::Batch.
275 next;
277 # skip if we get an empty record (that is MARC valid, but will result in AddBiblio failure
278 last unless ( $record );
279 $i++;
280 if( ($verbose//1)==1 ) { #no dot for verbose==2
281 print "." . ( $i % 100==0 ? "\n$i" : '' );
284 # transcode the record to UTF8 if needed & applicable.
285 if ($record->encoding() eq 'MARC-8' and not $skip_marc8_conversion) {
286 # FIXME update condition
287 my ($guessed_charset, $charset_errors);
288 ($record, $guessed_charset, $charset_errors) = MarcToUTF8Record($record, $marcFlavour.(($authorities and $marcFlavour ne "MARC21")?'AUTH':''));
289 if ($guessed_charset eq 'failed') {
290 warn "ERROR: failed to perform character conversion for record $i\n";
291 next RECORD;
294 SetUTF8Flag($record);
295 if($marc_mod_template_id > 0) {
296 print "Modifying MARC\n" if $verbose;
297 ModifyRecordWithTemplate( $marc_mod_template_id, $record );
299 &$localcust($record) if $localcust;
300 my $isbn;
301 # remove trailing - in isbn (only for biblios, of course)
302 if( $biblios ) {
303 my $tag = $marcFlavour eq 'UNIMARC' ? '010' : '020';
304 my $field = $record->field($tag);
305 $isbn = $field && $field->subfield('a');
306 if ( $isbn && $cleanisbn ) {
307 $isbn =~ s/-//g;
308 $field->update('a' => $isbn);
311 my $id;
312 # search for duplicates (based on Local-number)
313 my $originalid;
314 $originalid = GetRecordId( $record, $tagid, $subfieldid );
315 if ($match) {
316 require C4::Search;
317 my $query = build_query( $match, $record );
318 my $server = ( $authorities ? 'authorityserver' : 'biblioserver' );
319 $debug && warn $query;
320 my ( $error, $results, $totalhits ) = $searcher->simple_search_compat( $query, 0, 3, [$server] );
321 # changed to warn so able to continue with one broken record
322 if ( defined $error ) {
323 warn "unable to search the database for duplicates : $error";
324 printlog( { id => $id || $originalid || $match, op => "match", status => "ERROR" } ) if ($logfile);
325 next RECORD;
327 $debug && warn "$query $server : $totalhits";
328 if ( $results && scalar(@$results) == 1 ) {
329 my $marcrecord = C4::Search::new_record_from_zebra( $server, $results->[0] );
330 SetUTF8Flag($marcrecord);
331 $id = GetRecordId( $marcrecord, $tagid, $subfieldid );
332 if ( $authorities && $marcFlavour ) {
333 #Skip if authority in database is the same as the on in database
334 if ( $marcrecord->field('005') && $record->field('005') &&
335 $marcrecord->field('005')->data && $record->field('005')->data &&
336 $marcrecord->field('005')->data >= $record->field('005')->data ) {
337 if ($yamlfile) {
338 $yamlhash->{$originalid}->{'authid'} = $id;
340 # we recover all subfields of the heading authorities
341 my @subfields;
342 foreach my $field ( $marcrecord->field("2..") ) {
343 push @subfields, map { ( $_->[0] =~ /[a-z]/ ? $_->[1] : () ) } $field->subfields();
345 $yamlhash->{$originalid}->{'subfields'} = \@subfields;
347 next;
350 } elsif ( $results && scalar(@$results) > 1 ) {
351 $debug && warn "more than one match for $query";
352 } else {
353 $debug && warn "nomatch for $query";
356 if ($keepids && $originalid) {
357 my $storeidfield;
358 if ( length($keepids) == 3 ) {
359 $storeidfield = MARC::Field->new( $keepids, $originalid );
360 } else {
361 $storeidfield = MARC::Field->new( substr( $keepids, 0, 3 ), "", "", substr( $keepids, 3, 1 ), $originalid );
363 $record->insert_fields_ordered($storeidfield);
364 $record->delete_field( $record->field($tagid) );
366 foreach my $stringfilter (@$filters) {
367 if ( length($stringfilter) == 3 ) {
368 foreach my $field ( $record->field($stringfilter) ) {
369 $record->delete_field($field);
370 $debug && warn "removed : ", $field->as_string;
372 } elsif ($stringfilter =~ /([0-9]{3})([a-z0-9])(.*)/) {
373 my $removetag = $1;
374 my $removesubfield = $2;
375 my $removematch = $3;
376 if ( ( $removetag > "010" ) && $removesubfield ) {
377 foreach my $field ( $record->field($removetag) ) {
378 $field->delete_subfield( code => "$removesubfield", match => $removematch );
379 $debug && warn "Potentially removed : ", $field->subfield($removesubfield);
384 unless ($test_parameter) {
385 if ($authorities){
386 use C4::AuthoritiesMarc;
387 my $authtypecode=GuessAuthTypeCode($record, $heading_fields);
388 my $authid= ($id?$id:GuessAuthId($record));
389 if ($authid && GetAuthority($authid) && $update ){
390 ## Authority has an id and is in database : Replace
391 eval { ( $authid ) = ModAuthority($authid,$record, $authtypecode) };
392 if ($@){
393 warn "Problem with authority $authid Cannot Modify";
394 printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ERROR"}) if ($logfile);
396 else{
397 printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ok"}) if ($logfile);
400 elsif (defined $authid) {
401 ## An authid is defined but no authority in database : add
402 eval { ( $authid ) = AddAuthority($record,$authid, $authtypecode) };
403 if ($@){
404 warn "Problem with authority $authid Cannot Add ".$@;
405 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
407 else{
408 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
411 else {
412 ## True insert in database
413 eval { ( $authid ) = AddAuthority($record,"", $authtypecode) };
414 if ($@){
415 warn "Problem with authority $authid Cannot Add".$@;
416 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
418 else{
419 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
422 if ($yamlfile) {
423 $yamlhash->{$originalid}->{'authid'} = $authid;
424 my @subfields;
425 foreach my $field ( $record->field("2..") ) {
426 push @subfields, map { ( $_->[0] =~ /[a-z]/ ? $_->[1] : () ) } $field->subfields();
428 $yamlhash->{$originalid}->{'subfields'} = \@subfields;
431 else {
432 my ( $biblionumber, $biblioitemnumber, $itemnumbers_ref, $errors_ref );
433 $biblionumber = $id;
434 # check for duplicate, based on ISBN (skip it if we already have found a duplicate with match parameter
435 if (!$biblionumber && $isbn_check && $isbn) {
436 # warn "search ISBN : $isbn";
437 $sth_isbn->execute($isbn);
438 ($biblionumber,$biblioitemnumber) = $sth_isbn->fetchrow;
440 if (defined $idmapfl) {
441 if ($sourcetag < "010"){
442 if ($record->field($sourcetag)){
443 my $source = $record->field($sourcetag)->data();
444 printf(IDMAP "%s|%s\n",$source,$biblionumber);
446 } else {
447 my $source=$record->subfield($sourcetag,$sourcesubfield);
448 printf(IDMAP "%s|%s\n",$source,$biblionumber);
451 # create biblio, unless we already have it ( either match or isbn )
452 if ($biblionumber) {
453 eval{
454 $biblioitemnumber = Koha::Biblios->find( $biblionumber )->biblioitem->biblioitemnumber;
456 if ($update) {
457 eval { ModBiblio( $record, $biblionumber, GetFrameworkCode($biblionumber) ) };
458 if ($@) {
459 warn "ERROR: Edit biblio $biblionumber failed: $@\n";
460 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "ERROR" } ) if ($logfile);
461 next RECORD;
462 } else {
463 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "ok" } ) if ($logfile);
465 } else {
466 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "warning : already in database" } ) if ($logfile);
468 } else {
469 if ($insert) {
470 eval { ( $biblionumber, $biblioitemnumber ) = AddBiblio( $record, '', { defer_marc_save => 1 } ) };
471 if ($@) {
472 warn "ERROR: Adding biblio $biblionumber failed: $@\n";
473 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "ERROR" } ) if ($logfile);
474 next RECORD;
475 } else {
476 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "ok" } ) if ($logfile);
478 } else {
479 warn "WARNING: Updating record ".($id||$originalid)." failed";
480 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "warning : not in database" } ) if ($logfile);
481 next RECORD;
484 eval { ( $itemnumbers_ref, $errors_ref ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
485 my $error_adding = $@;
486 # Work on a clone so that if there are real errors, we can maybe
487 # fix them up later.
488 my $clone_record = $record->clone();
489 C4::Biblio::_strip_item_fields($clone_record, '');
490 # This sets the marc fields if there was an error, and also calls
491 # defer_marc_save.
492 ModBiblioMarc( $clone_record, $biblionumber, $framework );
493 if ( $error_adding ) {
494 warn "ERROR: Adding items to bib $biblionumber failed: $error_adding";
495 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
496 # if we failed because of an exception, assume that
497 # the MARC columns in biblioitems were not set.
498 next RECORD;
500 else{
501 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ok"}) if ($logfile);
503 if ($dedup_barcode && grep { exists $_->{error_code} && $_->{error_code} eq 'duplicate_barcode' } @$errors_ref) {
504 # Find the record called 'barcode'
505 my ($tag, $sub) = C4::Biblio::GetMarcFromKohaField('items.barcode', $framework);
506 # Now remove any items that didn't have a duplicate_barcode error,
507 # erase the barcodes on items that did, and re-add those items.
508 my %dupes;
509 foreach my $i (0 .. $#{$errors_ref}) {
510 my $ref = $errors_ref->[$i];
511 if ($ref && ($ref->{error_code} eq 'duplicate_barcode')) {
512 $dupes{$ref->{item_sequence}} = 1;
513 # Delete the error message because we're going to
514 # retry this one.
515 delete $errors_ref->[$i];
518 my $seq = 0;
519 foreach my $field ($record->field($tag)) {
520 $seq++;
521 if ($dupes{$seq}) {
522 # Here we remove the barcode
523 $field->delete_subfield(code => $sub);
524 } else {
525 # otherwise we delete the field because we don't want
526 # two of them
527 $record->delete_fields($field);
530 # Now re-add the record as before, adding errors to the prev list
531 my $more_errors;
532 eval { ( $itemnumbers_ref, $more_errors ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
533 if ( $@ ) {
534 warn "ERROR: Adding items to bib $biblionumber failed: $@\n";
535 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
536 # if we failed because of an exception, assume that
537 # the MARC columns in biblioitems were not set.
538 ModBiblioMarc( $record, $biblionumber, $framework );
539 next RECORD;
540 } else {
541 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ok"}) if ($logfile);
543 push @$errors_ref, @{ $more_errors };
545 if ($#{ $errors_ref } > -1) {
546 report_item_errors($biblionumber, $errors_ref);
548 $yamlhash->{$originalid} = $biblionumber if ($yamlfile);
550 $dbh->commit() if (0 == $i % $commitnum);
552 print $record->as_formatted()."\n" if ($verbose//0)==2;
553 last if $i == $number;
555 $dbh->commit();
556 $dbh->{AutoCommit} = 1;
559 if ($fk_off) {
560 $dbh->do("SET FOREIGN_KEY_CHECKS = 1");
563 # Restore CataloguingLog
564 C4::Context->set_preference( 'CataloguingLog', $CataloguingLog );
565 # Restore AuthoritiesLog
566 C4::Context->set_preference( 'AuthoritiesLog', $AuthoritiesLog );
568 my $timeneeded = gettimeofday - $starttime;
569 print "\n$i MARC records done in $timeneeded seconds\n";
570 if ($logfile){
571 print $loghandle "file : $input_marc_file\n";
572 print $loghandle "$i MARC records done in $timeneeded seconds\n";
573 $loghandle->close;
575 if ($yamlfile) {
576 open my $yamlfileout, q{>}, "$yamlfile" or die "cannot open $yamlfile \n";
577 print $yamlfileout Dump($yamlhash);
579 exit 0;
581 sub GetRecordId{
582 my $marcrecord=shift;
583 my $tag=shift;
584 my $subfield=shift;
585 my $id;
586 if ($tag lt "010"){
587 return $marcrecord->field($tag)->data() if $marcrecord->field($tag);
589 elsif ($subfield){
590 if ($marcrecord->field($tag)){
591 return $marcrecord->subfield($tag,$subfield);
594 return $id;
596 sub build_query {
597 my $match = shift;
598 my $record=shift;
599 my @searchstrings;
600 foreach my $matchingpoint (@$match){
601 my $string = build_simplequery($matchingpoint,$record);
602 push @searchstrings,$string if (length($string)>0);
604 my $QParser;
605 $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
606 my $op;
607 if ($QParser) {
608 $op = '&&';
609 } else {
610 $op = 'and';
612 return join(" $op ",@searchstrings);
614 sub build_simplequery {
615 my $element=shift;
616 my $record=shift;
617 my @searchstrings;
618 my ($index,$recorddata)=split /,/,$element;
619 if ($recorddata=~/(\d{3})(.*)/) {
620 my ($tag,$subfields) =($1,$2);
621 foreach my $field ($record->field($tag)){
622 if (length($field->as_string("$subfields"))>0){
623 push @searchstrings,"$index:\"".$field->as_string("$subfields")."\"";
627 my $QParser;
628 $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
629 my $op;
630 if ($QParser) {
631 $op = '&&';
632 } else {
633 $op = 'and';
635 return join(" $op ",@searchstrings);
637 sub report_item_errors {
638 my $biblionumber = shift;
639 my $errors_ref = shift;
641 foreach my $error (@{ $errors_ref }) {
642 next if !$error;
643 my $msg = "Item not added (bib $biblionumber, item tag #$error->{'item_sequence'}, barcode $error->{'item_barcode'}): ";
644 my $error_code = $error->{'error_code'};
645 $error_code =~ s/_/ /g;
646 $msg .= "$error_code $error->{'error_information'}";
647 print $msg, "\n";
650 sub printlog{
651 my $logelements=shift;
652 print $loghandle join( ";", map { defined $_ ? $_ : "" } @$logelements{qw<id op status>} ), "\n";
654 sub get_heading_fields{
655 my $headingfields;
656 if ($authtypes){
657 $headingfields=YAML::LoadFile($authtypes);
658 $headingfields={C4::Context->preference('marcflavour')=>$headingfields};
659 $debug && warn YAML::Dump($headingfields);
661 unless ($headingfields){
662 $headingfields=$dbh->selectall_hashref("SELECT auth_tag_to_report, authtypecode from auth_types",'auth_tag_to_report',{Slice=>{}});
663 $headingfields={C4::Context->preference('marcflavour')=>$headingfields};
665 return $headingfields;
668 =head1 NAME
670 bulkmarcimport.pl - Import bibliographic/authority records into Koha
672 =head1 USAGE
674 $ export KOHA_CONF=/etc/koha.conf
675 $ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 \\
676 -file /home/jmf/koha.mrc -n 3000
678 =head1 WARNING
680 Don't use this script before you've entered and checked your MARC parameters
681 tables twice (or more!). Otherwise, the import won't work correctly and you
682 will get invalid data.
684 =head1 DESCRIPTION
686 =over
688 =item B<-h>
690 This version/help screen
692 =item B<-b, -biblios>
694 Type of import: bibliographic records
696 =item B<-a, -authorities>
698 Type of import: authority records
700 =item B<-file>=I<FILE>
702 The I<FILE> to import
704 =item B<-v>
706 Verbose mode. 1 means "some infos", 2 means "MARC dumping"
708 =item B<-fk>
710 Turn off foreign key checks during import.
712 =item B<-n>=I<NUMBER>
714 The I<NUMBER> of records to import. If missing, all the file is imported
716 =item B<-o, -offset>=I<NUMBER>
718 File offset before importing, ie I<NUMBER> of records to skip.
720 =item B<-commit>=I<NUMBER>
722 The I<NUMBER> of records to wait before performing a 'commit' operation
724 =item B<-l>
726 File logs actions done for each record and their status into file
728 =item B<-append>
730 If specified, data will be appended to the logfile. If not, the logfile will be erased for each execution.
732 =item B<-t, -test>
734 Test mode: parses the file, saying what it would do, but doing nothing.
736 =item B<-s>
738 Skip automatic conversion of MARC-8 to UTF-8. This option is provided for
739 debugging.
741 =item B<-c>=I<CHARACTERISTIC>
743 The I<CHARACTERISTIC> MARC flavour. At the moment, only I<MARC21> and
744 I<UNIMARC> are supported. MARC21 by default.
746 =item B<-d>
748 Delete EVERYTHING related to biblio in koha-DB before import. Tables: biblio,
749 biblioitems, items
751 =item B<-m>=I<FORMAT>
753 Input file I<FORMAT>: I<MARCXML> or I<ISO2709> (defaults to ISO2709)
755 =item B<-authtypes>
757 file yamlfile with authoritiesTypes and distinguishable record field in order
758 to store the correct authtype
760 =item B<-yaml>
762 yaml file format a yaml file with ids
764 =item B<-filter>
766 list of fields that will not be imported. Can be any from 000 to 999 or field,
767 subfield and subfield's matching value such as 200avalue
769 =item B<-insert>
771 if set, only insert when possible
773 =item B<-update>
775 if set, only updates (any biblio should have a matching record)
777 =item B<-all>
779 if set, do whatever is required
781 =item B<-k, -keepids>=<FIELD>
783 Field store ids in I<FIELD> (useful for authorities, where 001 contains the
784 authid for Koha, that can contain a very valuable info for authorities coming
785 from LOC or BNF. useless for biblios probably)
787 =item B<-match>=<FIELD>
789 I<FIELD> matchindex,fieldtomatch matchpoint to use to deduplicate fieldtomatch
790 can be either 001 to 999 or field and list of subfields as such 100abcde
792 =item B<-i,-isbn>
794 If set, a search will be done on isbn, and, if the same isbn is found, the
795 biblio is not added. It's another method to deduplicate. B<-match> & B<-isbn>
796 can be both set.
798 =item B<-cleanisbn>
800 Clean ISBN fields from entering biblio records, ie removes hyphens. By default,
801 ISBN are cleaned. --nocleanisbn will keep ISBN unchanged.
803 =item B<-x>=I<TAG>
805 Source bib I<TAG> for reporting the source bib number
807 =item B<-y>=I<SUBFIELD>
809 Source I<SUBFIELD> for reporting the source bib number
811 =item B<-idmap>=I<FILE>
813 I<FILE> for the koha bib and source id
815 =item B<-keepids>
817 Store ids in 009 (useful for authorities, where 001 contains the authid for
818 Koha, that can contain a very valuable info for authorities coming from LOC or
819 BNF. useless for biblios probably)
821 =item B<-dedupbarcode>
823 If set, whenever a duplicate barcode is detected, it is removed and the attempt
824 to add the record is retried, thereby giving the record a blank barcode. This
825 is useful when something has set barcodes to be a biblio ID, or similar
826 (usually other software.)
828 =item B<-framework>
830 This is the code for the framework that the requested records will have attached
831 to them when they are created. If not specified, then the default framework
832 will be used.
834 =item B<-custom>=I<MODULE>
836 This parameter allows you to use a local module with a customize subroutine
837 that is called for each MARC record.
838 If no filename is passed, LocalChanges.pm is assumed to be in the
839 migration_tools subdirectory. You may pass an absolute file name or a file name
840 from the migration_tools directory.
842 =item B<-marcmodtemplate>=I<TEMPLATE>
844 This parameter allows you to specify the name of an existing MARC
845 modification template to apply as the MARC records are imported (these
846 templates are created in the "MARC modification templates" tool in Koha).
847 If not specified, no MARC modification templates are used (default).
849 =back
851 =cut