Bug 11936: (QA follow-up) Consistent log message for item insert
[koha.git] / misc / migration_tools / bulkmarcimport.pl
blob7a5a581a5ecdea96d77add0c84f544076a5c9048
1 #!/usr/bin/perl
2 # Import an iso2709 file into Koha 3
4 use Modern::Perl;
5 #use diagnostics;
6 BEGIN {
7 # find Koha's Perl modules
8 # test carefully before changing this
9 use FindBin;
10 eval { require "$FindBin::Bin/../kohalib.pl" };
13 # Koha modules used
14 use MARC::File::USMARC;
15 use MARC::File::XML;
16 use MARC::Record;
17 use MARC::Batch;
18 use MARC::Charset;
20 use C4::Context;
21 use C4::Biblio;
22 use C4::Koha;
23 use C4::Debug;
24 use C4::Charset;
25 use C4::Items;
26 use C4::MarcModificationTemplates;
28 use YAML;
29 use Unicode::Normalize;
30 use Time::HiRes qw(gettimeofday);
31 use Getopt::Long;
32 use IO::File;
33 use Pod::Usage;
35 use Koha::Biblios;
36 use Koha::SearchEngine;
37 use Koha::SearchEngine::Search;
39 use open qw( :std :encoding(UTF-8) );
40 binmode( STDOUT, ":encoding(UTF-8)" );
41 my ( $input_marc_file, $number, $offset) = ('',0,0);
42 my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format,$biblios,$authorities,$keepids,$match, $isbn_check, $logfile);
43 my ( $insert, $filters, $update, $all, $yamlfile, $authtypes, $append );
44 my $cleanisbn = 1;
45 my ($sourcetag,$sourcesubfield,$idmapfl, $dedup_barcode);
46 my $framework = '';
47 my $localcust;
48 my $marc_mod_template = '';
49 my $marc_mod_template_id = -1;
51 $|=1;
53 GetOptions(
54 'commit:f' => \$commit,
55 'file:s' => \$input_marc_file,
56 'n:f' => \$number,
57 'o|offset:f' => \$offset,
58 'h' => \$version,
59 'd' => \$delete,
60 't|test' => \$test_parameter,
61 's' => \$skip_marc8_conversion,
62 'c:s' => \$char_encoding,
63 'v:+' => \$verbose,
64 'fk' => \$fk_off,
65 'm:s' => \$format,
66 'l:s' => \$logfile,
67 'append' => \$append,
68 'k|keepids:s' => \$keepids,
69 'b|biblios' => \$biblios,
70 'a|authorities' => \$authorities,
71 'authtypes:s' => \$authtypes,
72 'filter=s@' => \$filters,
73 'insert' => \$insert,
74 'update' => \$update,
75 'all' => \$all,
76 'match=s@' => \$match,
77 'i|isbn' => \$isbn_check,
78 'x:s' => \$sourcetag,
79 'y:s' => \$sourcesubfield,
80 'idmap:s' => \$idmapfl,
81 'cleanisbn!' => \$cleanisbn,
82 'yaml:s' => \$yamlfile,
83 'dedupbarcode' => \$dedup_barcode,
84 'framework=s' => \$framework,
85 'custom:s' => \$localcust,
86 'marcmodtemplate:s' => \$marc_mod_template,
88 $biblios ||= !$authorities;
89 $insert ||= !$update;
90 my $writemode = ($append) ? "a" : "w";
92 pod2usage( -msg => "\nYou must specify either --biblios or --authorities, not both.\n", -exitval ) if $biblios && $authorities;
94 if ($all) {
95 $insert = 1;
96 $update = 1;
99 if ($version || ($input_marc_file eq '')) {
100 pod2usage( -verbose => 2 );
101 exit;
104 if(defined $localcust) { #local customize module
105 if(!-e $localcust) {
106 $localcust= $localcust||'LocalChanges'; #default name
107 $localcust=~ s/^.*\/([^\/]+)$/$1/; #extract file name only
108 $localcust=~ s/\.pm$//; #remove extension
109 my $fqcust= $FindBin::Bin."/$localcust.pm"; #try migration_tools dir
110 if(-e $fqcust) {
111 $localcust= $fqcust;
113 else {
114 print "WARNING: customize module $localcust.pm not found!\n";
115 exit 1;
118 require $localcust if $localcust;
119 $localcust=\&customize if $localcust;
122 if($marc_mod_template ne '') {
123 my @templates = GetModificationTemplates();
124 foreach my $this_template (@templates) {
125 if($this_template->{'name'} eq $marc_mod_template) {
126 if($marc_mod_template_id < 0) {
127 $marc_mod_template_id = $this_template->{'template_id'};
128 } else {
129 print "WARNING: MARC modification template name " .
130 "'$marc_mod_template' matches multiple templates. " .
131 "Please rename these templates\n";
132 exit 1;
136 if($marc_mod_template_id < 0) {
137 die "Can't located MARC modification template '$marc_mod_template'\n";
138 } else {
139 print "Records will be modified using MARC modofication template: $marc_mod_template\n" if $verbose;
143 my $dbh = C4::Context->dbh;
144 my $heading_fields=get_heading_fields();
146 if (defined $idmapfl) {
147 open(IDMAP,">$idmapfl") or die "cannot open $idmapfl \n";
150 if ((not defined $sourcesubfield) && (not defined $sourcetag)){
151 $sourcetag="910";
152 $sourcesubfield="a";
156 # Disable logging for the biblios and authorities import operation. It would unnecessarily
157 # slow the import
159 # Disable the syspref cache so we can change logging settings
160 C4::Context->disable_syspref_cache();
161 # Save current CataloguingLog and AuthoritiesLog sysprefs values
162 my $CataloguingLog = C4::Context->preference( 'CataloguingLog' );
163 my $AuthoritiesLog = C4::Context->preference( 'AuthoritiesLog' );
164 # Disable logging for both
165 C4::Context->set_preference( 'CataloguingLog', 0 );
166 C4::Context->set_preference( 'AuthoritiesLog', 0 );
168 if ($fk_off) {
169 $dbh->do("SET FOREIGN_KEY_CHECKS = 0");
173 if ($delete) {
174 if ($biblios){
175 print "deleting biblios\n";
176 $dbh->do("truncate biblio");
177 $dbh->do("truncate biblioitems");
178 $dbh->do("truncate items");
180 else {
181 print "deleting authorities\n";
182 $dbh->do("truncate auth_header");
184 $dbh->do("truncate zebraqueue");
189 if ($test_parameter) {
190 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
193 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
195 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
196 my $starttime = gettimeofday;
197 my $batch;
198 my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer
199 if (defined $format && $format =~ /XML/i) {
200 # ugly hack follows -- MARC::File::XML, when used by MARC::Batch,
201 # appears to try to convert incoming XML records from MARC-8
202 # to UTF-8. Setting the BinaryEncoding key turns that off
203 # TODO: see what happens to ISO-8859-1 XML files.
204 # TODO: determine if MARC::Batch can be fixed to handle
205 # XML records properly -- it probably should be
206 # be using a proper push or pull XML parser to
207 # extract the records, not using regexes to look
208 # for <record>.*</record>.
209 $MARC::File::XML::_load_args{BinaryEncoding} = 'utf-8';
210 my $recordformat= ($marcFlavour eq "MARC21"?"USMARC":uc($marcFlavour));
211 #UNIMARC Authorities have a different way to manage encoding than UNIMARC biblios.
212 $recordformat=$recordformat."AUTH" if ($authorities and $marcFlavour ne "MARC21");
213 $MARC::File::XML::_load_args{RecordFormat} = $recordformat;
214 $batch = MARC::Batch->new( 'XML', $fh );
215 } else {
216 $batch = MARC::Batch->new( 'USMARC', $fh );
218 $batch->warnings_off();
219 $batch->strict_off();
220 my $i=0;
221 my $commitnum = $commit ? $commit : 50;
222 my $yamlhash;
224 # Skip file offset
225 if ( $offset ) {
226 print "Skipping file offset: $offset records\n";
227 $batch->next() while ($offset--);
230 my ($tagid,$subfieldid);
231 if ($authorities){
232 $tagid='001';
234 else {
235 ( $tagid, $subfieldid ) =
236 GetMarcFromKohaField( "biblio.biblionumber", $framework );
237 $tagid||="001";
240 # the SQL query to search on isbn
241 my $sth_isbn = $dbh->prepare("SELECT biblionumber,biblioitemnumber FROM biblioitems WHERE isbn=?");
243 $dbh->{AutoCommit} = 0;
244 my $loghandle;
245 if ($logfile){
246 $loghandle= IO::File->new($logfile, $writemode) ;
247 print $loghandle "id;operation;status\n";
250 my $searcher = Koha::SearchEngine::Search->new(
252 index => (
253 $authorities
254 ? $Koha::SearchEngine::AUTHORITIES_INDEX
255 : $Koha::SearchEngine::BIBLIOS_INDEX
260 RECORD: while ( ) {
261 my $record;
262 # get records
263 eval { $record = $batch->next() };
264 if ( $@ ) {
265 print "Bad MARC record $i: $@ skipped\n";
266 # FIXME - because MARC::Batch->next() combines grabbing the next
267 # blob and parsing it into one operation, a correctable condition
268 # such as a MARC-8 record claiming that it's UTF-8 can't be recovered
269 # from because we don't have access to the original blob. Note
270 # that the staging import can deal with this condition (via
271 # C4::Charset::MarcToUTF8Record) because it doesn't use MARC::Batch.
272 next;
274 # skip if we get an empty record (that is MARC valid, but will result in AddBiblio failure
275 last unless ( $record );
276 $i++;
277 if( ($verbose//1)==1 ) { #no dot for verbose==2
278 print "." . ( $i % 100==0 ? "\n$i" : '' );
281 # transcode the record to UTF8 if needed & applicable.
282 if ($record->encoding() eq 'MARC-8' and not $skip_marc8_conversion) {
283 # FIXME update condition
284 my ($guessed_charset, $charset_errors);
285 ($record, $guessed_charset, $charset_errors) = MarcToUTF8Record($record, $marcFlavour.(($authorities and $marcFlavour ne "MARC21")?'AUTH':''));
286 if ($guessed_charset eq 'failed') {
287 warn "ERROR: failed to perform character conversion for record $i\n";
288 next RECORD;
291 SetUTF8Flag($record);
292 if($marc_mod_template_id > 0) {
293 print "Modifying MARC\n" if $verbose;
294 ModifyRecordWithTemplate( $marc_mod_template_id, $record );
296 &$localcust($record) if $localcust;
297 my $isbn;
298 # remove trailing - in isbn (only for biblios, of course)
299 if ($biblios && $cleanisbn) {
300 my $tag = $marcFlavour eq 'UNIMARC' ? '010' : '020';
301 my $field = $record->field($tag);
302 my $isbn = $field && $field->subfield('a');
303 if ( $isbn ) {
304 $isbn =~ s/-//g;
305 $field->update('a' => $isbn);
308 my $id;
309 # search for duplicates (based on Local-number)
310 my $originalid;
311 $originalid = GetRecordId( $record, $tagid, $subfieldid );
312 if ($match) {
313 require C4::Search;
314 my $query = build_query( $match, $record );
315 my $server = ( $authorities ? 'authorityserver' : 'biblioserver' );
316 $debug && warn $query;
317 my ( $error, $results, $totalhits ) = $searcher->simple_search_compat( $query, 0, 3, [$server] );
318 # changed to warn so able to continue with one broken record
319 if ( defined $error ) {
320 warn "unable to search the database for duplicates : $error";
321 printlog( { id => $id || $originalid || $match, op => "match", status => "ERROR" } ) if ($logfile);
322 next RECORD;
324 $debug && warn "$query $server : $totalhits";
325 if ( $results && scalar(@$results) == 1 ) {
326 my $marcrecord = C4::Search::new_record_from_zebra( $server, $results->[0] );
327 SetUTF8Flag($marcrecord);
328 $id = GetRecordId( $marcrecord, $tagid, $subfieldid );
329 if ( $authorities && $marcFlavour ) {
330 #Skip if authority in database is the same as the on in database
331 if ( $marcrecord->field('005') && $record->field('005') &&
332 $marcrecord->field('005')->data && $record->field('005')->data &&
333 $marcrecord->field('005')->data >= $record->field('005')->data ) {
334 if ($yamlfile) {
335 $yamlhash->{$originalid}->{'authid'} = $id;
337 # we recover all subfields of the heading authorities
338 my @subfields;
339 foreach my $field ( $marcrecord->field("2..") ) {
340 push @subfields, map { ( $_->[0] =~ /[a-z]/ ? $_->[1] : () ) } $field->subfields();
342 $yamlhash->{$originalid}->{'subfields'} = \@subfields;
344 next;
347 } elsif ( $results && scalar(@$results) > 1 ) {
348 $debug && warn "more than one match for $query";
349 } else {
350 $debug && warn "nomatch for $query";
353 if ($keepids && $originalid) {
354 my $storeidfield;
355 if ( length($keepids) == 3 ) {
356 $storeidfield = MARC::Field->new( $keepids, $originalid );
357 } else {
358 $storeidfield = MARC::Field->new( substr( $keepids, 0, 3 ), "", "", substr( $keepids, 3, 1 ), $originalid );
360 $record->insert_fields_ordered($storeidfield);
361 $record->delete_field( $record->field($tagid) );
363 foreach my $stringfilter (@$filters) {
364 if ( length($stringfilter) == 3 ) {
365 foreach my $field ( $record->field($stringfilter) ) {
366 $record->delete_field($field);
367 $debug && warn "removed : ", $field->as_string;
369 } elsif ($stringfilter =~ /([0-9]{3})([a-z0-9])(.*)/) {
370 my $removetag = $1;
371 my $removesubfield = $2;
372 my $removematch = $3;
373 if ( ( $removetag > "010" ) && $removesubfield ) {
374 foreach my $field ( $record->field($removetag) ) {
375 $field->delete_subfield( code => "$removesubfield", match => $removematch );
376 $debug && warn "Potentially removed : ", $field->subfield($removesubfield);
381 unless ($test_parameter) {
382 if ($authorities){
383 use C4::AuthoritiesMarc;
384 my $authtypecode=GuessAuthTypeCode($record, $heading_fields);
385 my $authid= ($id?$id:GuessAuthId($record));
386 if ($authid && GetAuthority($authid) && $update ){
387 ## Authority has an id and is in database : Replace
388 eval { ( $authid ) = ModAuthority($authid,$record, $authtypecode) };
389 if ($@){
390 warn "Problem with authority $authid Cannot Modify";
391 printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ERROR"}) if ($logfile);
393 else{
394 printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ok"}) if ($logfile);
397 elsif (defined $authid) {
398 ## An authid is defined but no authority in database : add
399 eval { ( $authid ) = AddAuthority($record,$authid, $authtypecode) };
400 if ($@){
401 warn "Problem with authority $authid Cannot Add ".$@;
402 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
404 else{
405 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
408 else {
409 ## True insert in database
410 eval { ( $authid ) = AddAuthority($record,"", $authtypecode) };
411 if ($@){
412 warn "Problem with authority $authid Cannot Add".$@;
413 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
415 else{
416 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
419 if ($yamlfile) {
420 $yamlhash->{$originalid}->{'authid'} = $authid;
421 my @subfields;
422 foreach my $field ( $record->field("2..") ) {
423 push @subfields, map { ( $_->[0] =~ /[a-z]/ ? $_->[1] : () ) } $field->subfields();
425 $yamlhash->{$originalid}->{'subfields'} = \@subfields;
428 else {
429 my ( $biblionumber, $biblioitemnumber, $itemnumbers_ref, $errors_ref );
430 $biblionumber = $id;
431 # check for duplicate, based on ISBN (skip it if we already have found a duplicate with match parameter
432 if (!$biblionumber && $isbn_check && $isbn) {
433 # warn "search ISBN : $isbn";
434 $sth_isbn->execute($isbn);
435 ($biblionumber,$biblioitemnumber) = $sth_isbn->fetchrow;
437 if (defined $idmapfl) {
438 if ($sourcetag < "010"){
439 if ($record->field($sourcetag)){
440 my $source = $record->field($sourcetag)->data();
441 printf(IDMAP "%s|%s\n",$source,$biblionumber);
443 } else {
444 my $source=$record->subfield($sourcetag,$sourcesubfield);
445 printf(IDMAP "%s|%s\n",$source,$biblionumber);
448 # create biblio, unless we already have it ( either match or isbn )
449 if ($biblionumber) {
450 eval{
451 $biblioitemnumber = Koha::Biblios->find( $biblionumber )->biblioitem->biblioitemnumber;
453 if ($update) {
454 eval { ( $biblionumber, $biblioitemnumber ) = ModBiblio( $record, $biblionumber, GetFrameworkCode($biblionumber) ) };
455 if ($@) {
456 warn "ERROR: Edit biblio $biblionumber failed: $@\n";
457 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "ERROR" } ) if ($logfile);
458 next RECORD;
459 } else {
460 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "ok" } ) if ($logfile);
462 } else {
463 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "warning : already in database" } ) if ($logfile);
465 } else {
466 if ($insert) {
467 eval { ( $biblionumber, $biblioitemnumber ) = AddBiblio( $record, '', { defer_marc_save => 1 } ) };
468 if ($@) {
469 warn "ERROR: Adding biblio $biblionumber failed: $@\n";
470 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "ERROR" } ) if ($logfile);
471 next RECORD;
472 } else {
473 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "ok" } ) if ($logfile);
475 } else {
476 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "warning : not in database" } ) if ($logfile);
479 eval { ( $itemnumbers_ref, $errors_ref ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
480 my $error_adding = $@;
481 # Work on a clone so that if there are real errors, we can maybe
482 # fix them up later.
483 my $clone_record = $record->clone();
484 C4::Biblio::_strip_item_fields($clone_record, '');
485 # This sets the marc fields if there was an error, and also calls
486 # defer_marc_save.
487 ModBiblioMarc( $clone_record, $biblionumber, $framework );
488 if ( $error_adding ) {
489 warn "ERROR: Adding items to bib $biblionumber failed: $error_adding";
490 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
491 # if we failed because of an exception, assume that
492 # the MARC columns in biblioitems were not set.
493 next RECORD;
495 else{
496 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ok"}) if ($logfile);
498 if ($dedup_barcode && grep { exists $_->{error_code} && $_->{error_code} eq 'duplicate_barcode' } @$errors_ref) {
499 # Find the record called 'barcode'
500 my ($tag, $sub) = C4::Biblio::GetMarcFromKohaField('items.barcode', $framework);
501 # Now remove any items that didn't have a duplicate_barcode error,
502 # erase the barcodes on items that did, and re-add those items.
503 my %dupes;
504 foreach my $i (0 .. $#{$errors_ref}) {
505 my $ref = $errors_ref->[$i];
506 if ($ref && ($ref->{error_code} eq 'duplicate_barcode')) {
507 $dupes{$ref->{item_sequence}} = 1;
508 # Delete the error message because we're going to
509 # retry this one.
510 delete $errors_ref->[$i];
513 my $seq = 0;
514 foreach my $field ($record->field($tag)) {
515 $seq++;
516 if ($dupes{$seq}) {
517 # Here we remove the barcode
518 $field->delete_subfield(code => $sub);
519 } else {
520 # otherwise we delete the field because we don't want
521 # two of them
522 $record->delete_fields($field);
525 # Now re-add the record as before, adding errors to the prev list
526 my $more_errors;
527 eval { ( $itemnumbers_ref, $more_errors ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
528 if ( $@ ) {
529 warn "ERROR: Adding items to bib $biblionumber failed: $@\n";
530 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
531 # if we failed because of an exception, assume that
532 # the MARC columns in biblioitems were not set.
533 ModBiblioMarc( $record, $biblionumber, $framework );
534 next RECORD;
535 } else {
536 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ok"}) if ($logfile);
538 push @$errors_ref, @{ $more_errors };
540 if ($#{ $errors_ref } > -1) {
541 report_item_errors($biblionumber, $errors_ref);
543 $yamlhash->{$originalid} = $biblionumber if ($yamlfile);
545 $dbh->commit() if (0 == $i % $commitnum);
547 print $record->as_formatted()."\n" if ($verbose//0)==2;
548 last if $i == $number;
550 $dbh->commit();
551 $dbh->{AutoCommit} = 1;
554 if ($fk_off) {
555 $dbh->do("SET FOREIGN_KEY_CHECKS = 1");
558 # Restore CataloguingLog
559 C4::Context->set_preference( 'CataloguingLog', $CataloguingLog );
560 # Restore AuthoritiesLog
561 C4::Context->set_preference( 'AuthoritiesLog', $AuthoritiesLog );
563 my $timeneeded = gettimeofday - $starttime;
564 print "\n$i MARC records done in $timeneeded seconds\n";
565 if ($logfile){
566 print $loghandle "file : $input_marc_file\n";
567 print $loghandle "$i MARC records done in $timeneeded seconds\n";
568 $loghandle->close;
570 if ($yamlfile) {
571 open my $yamlfileout, q{>}, "$yamlfile" or die "cannot open $yamlfile \n";
572 print $yamlfileout Dump($yamlhash);
574 exit 0;
576 sub GetRecordId{
577 my $marcrecord=shift;
578 my $tag=shift;
579 my $subfield=shift;
580 my $id;
581 if ($tag lt "010"){
582 return $marcrecord->field($tag)->data() if $marcrecord->field($tag);
584 elsif ($subfield){
585 if ($marcrecord->field($tag)){
586 return $marcrecord->subfield($tag,$subfield);
589 return $id;
591 sub build_query {
592 my $match = shift;
593 my $record=shift;
594 my @searchstrings;
595 foreach my $matchingpoint (@$match){
596 my $string = build_simplequery($matchingpoint,$record);
597 push @searchstrings,$string if (length($string)>0);
599 my $QParser;
600 $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
601 my $op;
602 if ($QParser) {
603 $op = '&&';
604 } else {
605 $op = 'and';
607 return join(" $op ",@searchstrings);
609 sub build_simplequery {
610 my $element=shift;
611 my $record=shift;
612 my @searchstrings;
613 my ($index,$recorddata)=split /,/,$element;
614 if ($recorddata=~/(\d{3})(.*)/) {
615 my ($tag,$subfields) =($1,$2);
616 foreach my $field ($record->field($tag)){
617 if (length($field->as_string("$subfields"))>0){
618 push @searchstrings,"$index:\"".$field->as_string("$subfields")."\"";
622 my $QParser;
623 $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
624 my $op;
625 if ($QParser) {
626 $op = '&&';
627 } else {
628 $op = 'and';
630 return join(" $op ",@searchstrings);
632 sub report_item_errors {
633 my $biblionumber = shift;
634 my $errors_ref = shift;
636 foreach my $error (@{ $errors_ref }) {
637 next if !$error;
638 my $msg = "Item not added (bib $biblionumber, item tag #$error->{'item_sequence'}, barcode $error->{'item_barcode'}): ";
639 my $error_code = $error->{'error_code'};
640 $error_code =~ s/_/ /g;
641 $msg .= "$error_code $error->{'error_information'}";
642 print $msg, "\n";
645 sub printlog{
646 my $logelements=shift;
647 print $loghandle join( ";", map { defined $_ ? $_ : "" } @$logelements{qw<id op status>} ), "\n";
649 sub get_heading_fields{
650 my $headingfields;
651 if ($authtypes){
652 $headingfields=YAML::LoadFile($authtypes);
653 $headingfields={C4::Context->preference('marcflavour')=>$headingfields};
654 $debug && warn YAML::Dump($headingfields);
656 unless ($headingfields){
657 $headingfields=$dbh->selectall_hashref("SELECT auth_tag_to_report, authtypecode from auth_types",'auth_tag_to_report',{Slice=>{}});
658 $headingfields={C4::Context->preference('marcflavour')=>$headingfields};
660 return $headingfields;
663 =head1 NAME
665 bulkmarcimport.pl - Import bibliographic/authority records into Koha
667 =head1 USAGE
669 $ export KOHA_CONF=/etc/koha.conf
670 $ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 \\
671 -file /home/jmf/koha.mrc -n 3000
673 =head1 WARNING
675 Don't use this script before you've entered and checked your MARC parameters
676 tables twice (or more!). Otherwise, the import won't work correctly and you
677 will get invalid data.
679 =head1 DESCRIPTION
681 =over
683 =item B<-h>
685 This version/help screen
687 =item B<-b, -biblios>
689 Type of import: bibliographic records
691 =item B<-a, -authorities>
693 Type of import: authority records
695 =item B<-file>=I<FILE>
697 The I<FILE> to import
699 =item B<-v>
701 Verbose mode. 1 means "some infos", 2 means "MARC dumping"
703 =item B<-fk>
705 Turn off foreign key checks during import.
707 =item B<-n>=I<NUMBER>
709 The I<NUMBER> of records to import. If missing, all the file is imported
711 =item B<-o, -offset>=I<NUMBER>
713 File offset before importing, ie I<NUMBER> of records to skip.
715 =item B<-commit>=I<NUMBER>
717 The I<NUMBER> of records to wait before performing a 'commit' operation
719 =item B<-l>
721 File logs actions done for each record and their status into file
723 =item B<-append>
725 If specified, data will be appended to the logfile. If not, the logfile will be erased for each execution.
727 =item B<-t, -test>
729 Test mode: parses the file, saying what it would do, but doing nothing.
731 =item B<-s>
733 Skip automatic conversion of MARC-8 to UTF-8. This option is provided for
734 debugging.
736 =item B<-c>=I<CHARACTERISTIC>
738 The I<CHARACTERISTIC> MARC flavour. At the moment, only I<MARC21> and
739 I<UNIMARC> are supported. MARC21 by default.
741 =item B<-d>
743 Delete EVERYTHING related to biblio in koha-DB before import. Tables: biblio,
744 biblioitems, items
746 =item B<-m>=I<FORMAT>
748 Input file I<FORMAT>: I<MARCXML> or I<ISO2709> (defaults to ISO2709)
750 =item B<-authtypes>
752 file yamlfile with authoritiesTypes and distinguishable record field in order
753 to store the correct authtype
755 =item B<-yaml>
757 yaml file format a yaml file with ids
759 =item B<-filter>
761 list of fields that will not be imported. Can be any from 000 to 999 or field,
762 subfield and subfield's matching value such as 200avalue
764 =item B<-insert>
766 if set, only insert when possible
768 =item B<-update>
770 if set, only updates (any biblio should have a matching record)
772 =item B<-all>
774 if set, do whatever is required
776 =item B<-k, -keepids>=<FIELD>
778 Field store ids in I<FIELD> (useful for authorities, where 001 contains the
779 authid for Koha, that can contain a very valuable info for authorities coming
780 from LOC or BNF. useless for biblios probably)
782 =item B<-match>=<FIELD>
784 I<FIELD> matchindex,fieldtomatch matchpoint to use to deduplicate fieldtomatch
785 can be either 001 to 999 or field and list of subfields as such 100abcde
787 =item B<-i,-isbn>
789 If set, a search will be done on isbn, and, if the same isbn is found, the
790 biblio is not added. It's another method to deduplicate. B<-match> & B<-isbn>
791 can be both set.
793 =item B<-cleanisbn>
795 Clean ISBN fields from entering biblio records, ie removes hyphens. By default,
796 ISBN are cleaned. --nocleanisbn will keep ISBN unchanged.
798 =item B<-x>=I<TAG>
800 Source bib I<TAG> for reporting the source bib number
802 =item B<-y>=I<SUBFIELD>
804 Source I<SUBFIELD> for reporting the source bib number
806 =item B<-idmap>=I<FILE>
808 I<FILE> for the koha bib and source id
810 =item B<-keepids>
812 Store ids in 009 (useful for authorities, where 001 contains the authid for
813 Koha, that can contain a very valuable info for authorities coming from LOC or
814 BNF. useless for biblios probably)
816 =item B<-dedupbarcode>
818 If set, whenever a duplicate barcode is detected, it is removed and the attempt
819 to add the record is retried, thereby giving the record a blank barcode. This
820 is useful when something has set barcodes to be a biblio ID, or similar
821 (usually other software.)
823 =item B<-framework>
825 This is the code for the framework that the requested records will have attached
826 to them when they are created. If not specified, then the default framework
827 will be used.
829 =item B<-custom>=I<MODULE>
831 This parameter allows you to use a local module with a customize subroutine
832 that is called for each MARC record.
833 If no filename is passed, LocalChanges.pm is assumed to be in the
834 migration_tools subdirectory. You may pass an absolute file name or a file name
835 from the migration_tools directory.
837 =item B<-marcmodtemplate>=I<TEMPLATE>
839 This parameter allows you to specify the name of an existing MARC
840 modification template to apply as the MARC records are imported (these
841 templates are created in the "MARC modification templates" tool in Koha).
842 If not specified, no MARC modification templates are used (default).
844 =back
846 =cut