Bug 20434: Update UNIMARC framework - auth (GENRE/FORM)
[koha.git] / misc / migration_tools / bulkmarcimport.pl
blobbc438aac38eab5860ed0e3aeaef0592e6df23039
1 #!/usr/bin/perl
2 # Import an iso2709 file into Koha 3
4 use Modern::Perl;
5 #use diagnostics;
6 BEGIN {
7 # find Koha's Perl modules
8 # test carefully before changing this
9 use FindBin;
10 eval { require "$FindBin::Bin/../kohalib.pl" };
13 # Koha modules used
14 use MARC::File::USMARC;
15 use MARC::File::XML;
16 use MARC::Record;
17 use MARC::Batch;
18 use MARC::Charset;
20 use Koha::Script;
21 use C4::Context;
22 use C4::Biblio;
23 use C4::Koha;
24 use C4::Debug;
25 use C4::Charset;
26 use C4::Items;
27 use C4::MarcModificationTemplates;
29 use YAML;
30 use Unicode::Normalize;
31 use Time::HiRes qw(gettimeofday);
32 use Getopt::Long;
33 use IO::File;
34 use Pod::Usage;
36 use Koha::Biblios;
37 use Koha::SearchEngine;
38 use Koha::SearchEngine::Search;
40 use open qw( :std :encoding(UTF-8) );
41 binmode( STDOUT, ":encoding(UTF-8)" );
42 my ( $input_marc_file, $number, $offset) = ('',0,0);
43 my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format,$biblios,$authorities,$keepids,$match, $isbn_check, $logfile);
44 my ( $insert, $filters, $update, $all, $yamlfile, $authtypes, $append );
45 my $cleanisbn = 1;
46 my ($sourcetag,$sourcesubfield,$idmapfl, $dedup_barcode);
47 my $framework = '';
48 my $localcust;
49 my $marc_mod_template = '';
50 my $marc_mod_template_id = -1;
52 $|=1;
54 GetOptions(
55 'commit:f' => \$commit,
56 'file:s' => \$input_marc_file,
57 'n:f' => \$number,
58 'o|offset:f' => \$offset,
59 'h' => \$version,
60 'd' => \$delete,
61 't|test' => \$test_parameter,
62 's' => \$skip_marc8_conversion,
63 'c:s' => \$char_encoding,
64 'v:+' => \$verbose,
65 'fk' => \$fk_off,
66 'm:s' => \$format,
67 'l:s' => \$logfile,
68 'append' => \$append,
69 'k|keepids:s' => \$keepids,
70 'b|biblios' => \$biblios,
71 'a|authorities' => \$authorities,
72 'authtypes:s' => \$authtypes,
73 'filter=s@' => \$filters,
74 'insert' => \$insert,
75 'update' => \$update,
76 'all' => \$all,
77 'match=s@' => \$match,
78 'i|isbn' => \$isbn_check,
79 'x:s' => \$sourcetag,
80 'y:s' => \$sourcesubfield,
81 'idmap:s' => \$idmapfl,
82 'cleanisbn!' => \$cleanisbn,
83 'yaml:s' => \$yamlfile,
84 'dedupbarcode' => \$dedup_barcode,
85 'framework=s' => \$framework,
86 'custom:s' => \$localcust,
87 'marcmodtemplate:s' => \$marc_mod_template,
89 $biblios ||= !$authorities;
90 $insert ||= !$update;
91 my $writemode = ($append) ? "a" : "w";
93 pod2usage( -msg => "\nYou must specify either --biblios or --authorities, not both.\n", -exitval ) if $biblios && $authorities;
95 if ($all) {
96 $insert = 1;
97 $update = 1;
100 if ($version || ($input_marc_file eq '')) {
101 pod2usage( -verbose => 2 );
102 exit;
104 if( $update && !( $match || $isbn_check ) ) {
105 warn "Using -update without -match or -isbn seems to be useless.\n";
108 if(defined $localcust) { #local customize module
109 if(!-e $localcust) {
110 $localcust= $localcust||'LocalChanges'; #default name
111 $localcust=~ s/^.*\/([^\/]+)$/$1/; #extract file name only
112 $localcust=~ s/\.pm$//; #remove extension
113 my $fqcust= $FindBin::Bin."/$localcust.pm"; #try migration_tools dir
114 if(-e $fqcust) {
115 $localcust= $fqcust;
117 else {
118 print "WARNING: customize module $localcust.pm not found!\n";
119 exit 1;
122 require $localcust if $localcust;
123 $localcust=\&customize if $localcust;
126 if($marc_mod_template ne '') {
127 my @templates = GetModificationTemplates();
128 foreach my $this_template (@templates) {
129 if($this_template->{'name'} eq $marc_mod_template) {
130 if($marc_mod_template_id < 0) {
131 $marc_mod_template_id = $this_template->{'template_id'};
132 } else {
133 print "WARNING: MARC modification template name " .
134 "'$marc_mod_template' matches multiple templates. " .
135 "Please rename these templates\n";
136 exit 1;
140 if($marc_mod_template_id < 0) {
141 die "Can't located MARC modification template '$marc_mod_template'\n";
142 } else {
143 print "Records will be modified using MARC modofication template: $marc_mod_template\n" if $verbose;
147 my $dbh = C4::Context->dbh;
148 my $heading_fields=get_heading_fields();
150 if (defined $idmapfl) {
151 open(IDMAP,">$idmapfl") or die "cannot open $idmapfl \n";
154 if ((not defined $sourcesubfield) && (not defined $sourcetag)){
155 $sourcetag="910";
156 $sourcesubfield="a";
160 # Disable logging for the biblios and authorities import operation. It would unnecessarily
161 # slow the import
163 # Disable the syspref cache so we can change logging settings
164 C4::Context->disable_syspref_cache();
165 # Save current CataloguingLog and AuthoritiesLog sysprefs values
166 my $CataloguingLog = C4::Context->preference( 'CataloguingLog' );
167 my $AuthoritiesLog = C4::Context->preference( 'AuthoritiesLog' );
168 # Disable logging for both
169 C4::Context->set_preference( 'CataloguingLog', 0 );
170 C4::Context->set_preference( 'AuthoritiesLog', 0 );
172 if ($fk_off) {
173 $dbh->do("SET FOREIGN_KEY_CHECKS = 0");
177 if ($delete) {
178 if ($biblios){
179 print "deleting biblios\n";
180 $dbh->do("DELETE FROM biblio");
181 $dbh->do("ALTER TABLE biblio AUTO_INCREMENT = 1");
182 $dbh->do("DELETE FROM biblioitems");
183 $dbh->do("ALTER TABLE biblioitems AUTO_INCREMENT = 1");
184 $dbh->do("DELETE FROM items");
185 $dbh->do("ALTER TABLE items AUTO_INCREMENT = 1");
187 else {
188 print "deleting authorities\n";
189 $dbh->do("truncate auth_header");
191 $dbh->do("truncate zebraqueue");
196 if ($test_parameter) {
197 print "TESTING MODE ONLY\n DOING NOTHING\n===============\n";
200 my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21';
202 print "Characteristic MARC flavour: $marcFlavour\n" if $verbose;
203 my $starttime = gettimeofday;
204 my $batch;
205 my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer
206 if (defined $format && $format =~ /XML/i) {
207 # ugly hack follows -- MARC::File::XML, when used by MARC::Batch,
208 # appears to try to convert incoming XML records from MARC-8
209 # to UTF-8. Setting the BinaryEncoding key turns that off
210 # TODO: see what happens to ISO-8859-1 XML files.
211 # TODO: determine if MARC::Batch can be fixed to handle
212 # XML records properly -- it probably should be
213 # be using a proper push or pull XML parser to
214 # extract the records, not using regexes to look
215 # for <record>.*</record>.
216 $MARC::File::XML::_load_args{BinaryEncoding} = 'utf-8';
217 my $recordformat= ($marcFlavour eq "MARC21"?"USMARC":uc($marcFlavour));
218 #UNIMARC Authorities have a different way to manage encoding than UNIMARC biblios.
219 $recordformat=$recordformat."AUTH" if ($authorities and $marcFlavour ne "MARC21");
220 $MARC::File::XML::_load_args{RecordFormat} = $recordformat;
221 $batch = MARC::Batch->new( 'XML', $fh );
222 } else {
223 $batch = MARC::Batch->new( 'USMARC', $fh );
225 $batch->warnings_off();
226 $batch->strict_off();
227 my $i=0;
228 my $commitnum = $commit ? $commit : 50;
229 my $yamlhash;
231 # Skip file offset
232 if ( $offset ) {
233 print "Skipping file offset: $offset records\n";
234 $batch->next() while ($offset--);
237 my ($tagid,$subfieldid);
238 if ($authorities){
239 $tagid='001';
241 else {
242 ( $tagid, $subfieldid ) =
243 GetMarcFromKohaField( "biblio.biblionumber" );
244 $tagid||="001";
247 # the SQL query to search on isbn
248 my $sth_isbn = $dbh->prepare("SELECT biblionumber,biblioitemnumber FROM biblioitems WHERE isbn=?");
250 $dbh->{AutoCommit} = 0;
251 my $loghandle;
252 if ($logfile){
253 $loghandle= IO::File->new($logfile, $writemode) ;
254 print $loghandle "id;operation;status\n";
257 my $searcher = Koha::SearchEngine::Search->new(
259 index => (
260 $authorities
261 ? $Koha::SearchEngine::AUTHORITIES_INDEX
262 : $Koha::SearchEngine::BIBLIOS_INDEX
267 RECORD: while ( ) {
268 my $record;
269 # get records
270 eval { $record = $batch->next() };
271 if ( $@ ) {
272 print "Bad MARC record $i: $@ skipped\n";
273 # FIXME - because MARC::Batch->next() combines grabbing the next
274 # blob and parsing it into one operation, a correctable condition
275 # such as a MARC-8 record claiming that it's UTF-8 can't be recovered
276 # from because we don't have access to the original blob. Note
277 # that the staging import can deal with this condition (via
278 # C4::Charset::MarcToUTF8Record) because it doesn't use MARC::Batch.
279 next;
281 # skip if we get an empty record (that is MARC valid, but will result in AddBiblio failure
282 last unless ( $record );
283 $i++;
284 if( ($verbose//1)==1 ) { #no dot for verbose==2
285 print "." . ( $i % 100==0 ? "\n$i" : '' );
288 # transcode the record to UTF8 if needed & applicable.
289 if ($record->encoding() eq 'MARC-8' and not $skip_marc8_conversion) {
290 # FIXME update condition
291 my ($guessed_charset, $charset_errors);
292 ($record, $guessed_charset, $charset_errors) = MarcToUTF8Record($record, $marcFlavour.(($authorities and $marcFlavour ne "MARC21")?'AUTH':''));
293 if ($guessed_charset eq 'failed') {
294 warn "ERROR: failed to perform character conversion for record $i\n";
295 next RECORD;
298 SetUTF8Flag($record);
299 if($marc_mod_template_id > 0) {
300 print "Modifying MARC\n" if $verbose;
301 ModifyRecordWithTemplate( $marc_mod_template_id, $record );
303 &$localcust($record) if $localcust;
304 my $isbn;
305 # remove trailing - in isbn (only for biblios, of course)
306 if( $biblios ) {
307 my $tag = $marcFlavour eq 'UNIMARC' ? '010' : '020';
308 my $field = $record->field($tag);
309 $isbn = $field && $field->subfield('a');
310 if ( $isbn && $cleanisbn ) {
311 $isbn =~ s/-//g;
312 $field->update('a' => $isbn);
315 my $id;
316 # search for duplicates (based on Local-number)
317 my $originalid;
318 $originalid = GetRecordId( $record, $tagid, $subfieldid );
319 if ($match) {
320 require C4::Search;
321 my $query = build_query( $match, $record );
322 my $server = ( $authorities ? 'authorityserver' : 'biblioserver' );
323 $debug && warn $query;
324 my ( $error, $results, $totalhits ) = $searcher->simple_search_compat( $query, 0, 3, [$server] );
325 # changed to warn so able to continue with one broken record
326 if ( defined $error ) {
327 warn "unable to search the database for duplicates : $error";
328 printlog( { id => $id || $originalid || $match, op => "match", status => "ERROR" } ) if ($logfile);
329 next RECORD;
331 $debug && warn "$query $server : $totalhits";
332 if ( $results && scalar(@$results) == 1 ) {
333 my $marcrecord = C4::Search::new_record_from_zebra( $server, $results->[0] );
334 SetUTF8Flag($marcrecord);
335 $id = GetRecordId( $marcrecord, $tagid, $subfieldid );
336 if ( $authorities && $marcFlavour ) {
337 #Skip if authority in database is the same as the on in database
338 if ( $marcrecord->field('005') && $record->field('005') &&
339 $marcrecord->field('005')->data && $record->field('005')->data &&
340 $marcrecord->field('005')->data >= $record->field('005')->data ) {
341 if ($yamlfile) {
342 $yamlhash->{$originalid}->{'authid'} = $id;
344 # we recover all subfields of the heading authorities
345 my @subfields;
346 foreach my $field ( $marcrecord->field("2..") ) {
347 push @subfields, map { ( $_->[0] =~ /[a-z]/ ? $_->[1] : () ) } $field->subfields();
349 $yamlhash->{$originalid}->{'subfields'} = \@subfields;
351 next;
354 } elsif ( $results && scalar(@$results) > 1 ) {
355 $debug && warn "more than one match for $query";
356 } else {
357 $debug && warn "nomatch for $query";
360 if ($keepids && $originalid) {
361 my $storeidfield;
362 if ( length($keepids) == 3 ) {
363 $storeidfield = MARC::Field->new( $keepids, $originalid );
364 } else {
365 $storeidfield = MARC::Field->new( substr( $keepids, 0, 3 ), "", "", substr( $keepids, 3, 1 ), $originalid );
367 $record->insert_fields_ordered($storeidfield);
368 $record->delete_field( $record->field($tagid) );
370 foreach my $stringfilter (@$filters) {
371 if ( length($stringfilter) == 3 ) {
372 foreach my $field ( $record->field($stringfilter) ) {
373 $record->delete_field($field);
374 $debug && warn "removed : ", $field->as_string;
376 } elsif ($stringfilter =~ /([0-9]{3})([a-z0-9])(.*)/) {
377 my $removetag = $1;
378 my $removesubfield = $2;
379 my $removematch = $3;
380 if ( ( $removetag > "010" ) && $removesubfield ) {
381 foreach my $field ( $record->field($removetag) ) {
382 $field->delete_subfield( code => "$removesubfield", match => $removematch );
383 $debug && warn "Potentially removed : ", $field->subfield($removesubfield);
388 unless ($test_parameter) {
389 if ($authorities){
390 use C4::AuthoritiesMarc;
391 my $authtypecode=GuessAuthTypeCode($record, $heading_fields);
392 my $authid= ($id?$id:GuessAuthId($record));
393 if ($authid && GetAuthority($authid) && $update ){
394 ## Authority has an id and is in database : Replace
395 eval { ( $authid ) = ModAuthority($authid,$record, $authtypecode) };
396 if ($@){
397 warn "Problem with authority $authid Cannot Modify";
398 printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ERROR"}) if ($logfile);
400 else{
401 printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ok"}) if ($logfile);
404 elsif (defined $authid) {
405 ## An authid is defined but no authority in database : add
406 eval { ( $authid ) = AddAuthority($record,$authid, $authtypecode) };
407 if ($@){
408 warn "Problem with authority $authid Cannot Add ".$@;
409 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
411 else{
412 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
415 else {
416 ## True insert in database
417 eval { ( $authid ) = AddAuthority($record,"", $authtypecode) };
418 if ($@){
419 warn "Problem with authority $authid Cannot Add".$@;
420 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
422 else{
423 printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
426 if ($yamlfile) {
427 $yamlhash->{$originalid}->{'authid'} = $authid;
428 my @subfields;
429 foreach my $field ( $record->field("2..") ) {
430 push @subfields, map { ( $_->[0] =~ /[a-z]/ ? $_->[1] : () ) } $field->subfields();
432 $yamlhash->{$originalid}->{'subfields'} = \@subfields;
435 else {
436 my ( $biblionumber, $biblioitemnumber, $itemnumbers_ref, $errors_ref );
437 $biblionumber = $id;
438 # check for duplicate, based on ISBN (skip it if we already have found a duplicate with match parameter
439 if (!$biblionumber && $isbn_check && $isbn) {
440 # warn "search ISBN : $isbn";
441 $sth_isbn->execute($isbn);
442 ($biblionumber,$biblioitemnumber) = $sth_isbn->fetchrow;
444 if (defined $idmapfl) {
445 if ($sourcetag < "010"){
446 if ($record->field($sourcetag)){
447 my $source = $record->field($sourcetag)->data();
448 printf(IDMAP "%s|%s\n",$source,$biblionumber);
450 } else {
451 my $source=$record->subfield($sourcetag,$sourcesubfield);
452 printf(IDMAP "%s|%s\n",$source,$biblionumber);
455 # create biblio, unless we already have it ( either match or isbn )
456 if ($biblionumber) {
457 eval{
458 $biblioitemnumber = Koha::Biblios->find( $biblionumber )->biblioitem->biblioitemnumber;
460 if ($update) {
461 eval { ModBiblio( $record, $biblionumber, GetFrameworkCode($biblionumber) ) };
462 if ($@) {
463 warn "ERROR: Edit biblio $biblionumber failed: $@\n";
464 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "ERROR" } ) if ($logfile);
465 next RECORD;
466 } else {
467 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "ok" } ) if ($logfile);
469 } else {
470 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "warning : already in database" } ) if ($logfile);
472 } else {
473 if ($insert) {
474 eval { ( $biblionumber, $biblioitemnumber ) = AddBiblio( $record, '', { defer_marc_save => 1 } ) };
475 if ($@) {
476 warn "ERROR: Adding biblio $biblionumber failed: $@\n";
477 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "ERROR" } ) if ($logfile);
478 next RECORD;
479 } else {
480 printlog( { id => $id || $originalid || $biblionumber, op => "insert", status => "ok" } ) if ($logfile);
482 } else {
483 warn "WARNING: Updating record ".($id||$originalid)." failed";
484 printlog( { id => $id || $originalid || $biblionumber, op => "update", status => "warning : not in database" } ) if ($logfile);
485 next RECORD;
488 eval { ( $itemnumbers_ref, $errors_ref ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
489 my $error_adding = $@;
490 # Work on a clone so that if there are real errors, we can maybe
491 # fix them up later.
492 my $clone_record = $record->clone();
493 C4::Biblio::_strip_item_fields($clone_record, '');
494 # This sets the marc fields if there was an error, and also calls
495 # defer_marc_save.
496 ModBiblioMarc( $clone_record, $biblionumber, $framework );
497 if ( $error_adding ) {
498 warn "ERROR: Adding items to bib $biblionumber failed: $error_adding";
499 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
500 # if we failed because of an exception, assume that
501 # the MARC columns in biblioitems were not set.
502 next RECORD;
504 else{
505 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ok"}) if ($logfile);
507 if ($dedup_barcode && grep { exists $_->{error_code} && $_->{error_code} eq 'duplicate_barcode' } @$errors_ref) {
508 # Find the record called 'barcode'
509 my ($tag, $sub) = C4::Biblio::GetMarcFromKohaField( 'items.barcode' );
510 # Now remove any items that didn't have a duplicate_barcode error,
511 # erase the barcodes on items that did, and re-add those items.
512 my %dupes;
513 foreach my $i (0 .. $#{$errors_ref}) {
514 my $ref = $errors_ref->[$i];
515 if ($ref && ($ref->{error_code} eq 'duplicate_barcode')) {
516 $dupes{$ref->{item_sequence}} = 1;
517 # Delete the error message because we're going to
518 # retry this one.
519 delete $errors_ref->[$i];
522 my $seq = 0;
523 foreach my $field ($record->field($tag)) {
524 $seq++;
525 if ($dupes{$seq}) {
526 # Here we remove the barcode
527 $field->delete_subfield(code => $sub);
528 } else {
529 # otherwise we delete the field because we don't want
530 # two of them
531 $record->delete_fields($field);
534 # Now re-add the record as before, adding errors to the prev list
535 my $more_errors;
536 eval { ( $itemnumbers_ref, $more_errors ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
537 if ( $@ ) {
538 warn "ERROR: Adding items to bib $biblionumber failed: $@\n";
539 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
540 # if we failed because of an exception, assume that
541 # the MARC columns in biblioitems were not set.
542 ModBiblioMarc( $record, $biblionumber, $framework );
543 next RECORD;
544 } else {
545 printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ok"}) if ($logfile);
547 push @$errors_ref, @{ $more_errors };
549 if ($#{ $errors_ref } > -1) {
550 report_item_errors($biblionumber, $errors_ref);
552 $yamlhash->{$originalid} = $biblionumber if ($yamlfile);
554 $dbh->commit() if (0 == $i % $commitnum);
556 print $record->as_formatted()."\n" if ($verbose//0)==2;
557 last if $i == $number;
559 $dbh->commit();
560 $dbh->{AutoCommit} = 1;
563 if ($fk_off) {
564 $dbh->do("SET FOREIGN_KEY_CHECKS = 1");
567 # Restore CataloguingLog
568 C4::Context->set_preference( 'CataloguingLog', $CataloguingLog );
569 # Restore AuthoritiesLog
570 C4::Context->set_preference( 'AuthoritiesLog', $AuthoritiesLog );
572 my $timeneeded = gettimeofday - $starttime;
573 print "\n$i MARC records done in $timeneeded seconds\n";
574 if ($logfile){
575 print $loghandle "file : $input_marc_file\n";
576 print $loghandle "$i MARC records done in $timeneeded seconds\n";
577 $loghandle->close;
579 if ($yamlfile) {
580 open my $yamlfileout, q{>}, "$yamlfile" or die "cannot open $yamlfile \n";
581 print $yamlfileout Dump($yamlhash);
583 exit 0;
585 sub GetRecordId{
586 my $marcrecord=shift;
587 my $tag=shift;
588 my $subfield=shift;
589 my $id;
590 if ($tag lt "010"){
591 return $marcrecord->field($tag)->data() if $marcrecord->field($tag);
593 elsif ($subfield){
594 if ($marcrecord->field($tag)){
595 return $marcrecord->subfield($tag,$subfield);
598 return $id;
600 sub build_query {
601 my $match = shift;
602 my $record=shift;
603 my @searchstrings;
604 foreach my $matchingpoint (@$match){
605 my $string = build_simplequery($matchingpoint,$record);
606 push @searchstrings,$string if (length($string)>0);
608 my $QParser;
609 $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
610 my $op;
611 if ($QParser) {
612 $op = '&&';
613 } else {
614 $op = 'and';
616 return join(" $op ",@searchstrings);
618 sub build_simplequery {
619 my $element=shift;
620 my $record=shift;
621 my @searchstrings;
622 my ($index,$recorddata)=split /,/,$element;
623 if ($recorddata=~/(\d{3})(.*)/) {
624 my ($tag,$subfields) =($1,$2);
625 foreach my $field ($record->field($tag)){
626 if (length($field->as_string("$subfields"))>0){
627 push @searchstrings,"$index:\"".$field->as_string("$subfields")."\"";
631 my $QParser;
632 $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
633 my $op;
634 if ($QParser) {
635 $op = '&&';
636 } else {
637 $op = 'and';
639 return join(" $op ",@searchstrings);
641 sub report_item_errors {
642 my $biblionumber = shift;
643 my $errors_ref = shift;
645 foreach my $error (@{ $errors_ref }) {
646 next if !$error;
647 my $msg = "Item not added (bib $biblionumber, item tag #$error->{'item_sequence'}, barcode $error->{'item_barcode'}): ";
648 my $error_code = $error->{'error_code'};
649 $error_code =~ s/_/ /g;
650 $msg .= "$error_code $error->{'error_information'}";
651 print $msg, "\n";
654 sub printlog{
655 my $logelements=shift;
656 print $loghandle join( ";", map { defined $_ ? $_ : "" } @$logelements{qw<id op status>} ), "\n";
658 sub get_heading_fields{
659 my $headingfields;
660 if ($authtypes){
661 $headingfields=YAML::LoadFile($authtypes);
662 $headingfields={C4::Context->preference('marcflavour')=>$headingfields};
663 $debug && warn YAML::Dump($headingfields);
665 unless ($headingfields){
666 $headingfields=$dbh->selectall_hashref("SELECT auth_tag_to_report, authtypecode from auth_types",'auth_tag_to_report',{Slice=>{}});
667 $headingfields={C4::Context->preference('marcflavour')=>$headingfields};
669 return $headingfields;
672 =head1 NAME
674 bulkmarcimport.pl - Import bibliographic/authority records into Koha
676 =head1 USAGE
678 $ export KOHA_CONF=/etc/koha.conf
679 $ perl misc/migration_tools/bulkmarcimport.pl -d -commit 1000 \\
680 -file /home/jmf/koha.mrc -n 3000
682 =head1 WARNING
684 Don't use this script before you've entered and checked your MARC parameters
685 tables twice (or more!). Otherwise, the import won't work correctly and you
686 will get invalid data.
688 =head1 DESCRIPTION
690 =over
692 =item B<-h>
694 This version/help screen
696 =item B<-b, -biblios>
698 Type of import: bibliographic records
700 =item B<-a, -authorities>
702 Type of import: authority records
704 =item B<-file>=I<FILE>
706 The I<FILE> to import
708 =item B<-v>
710 Verbose mode. 1 means "some infos", 2 means "MARC dumping"
712 =item B<-fk>
714 Turn off foreign key checks during import.
716 =item B<-n>=I<NUMBER>
718 The I<NUMBER> of records to import. If missing, all the file is imported
720 =item B<-o, -offset>=I<NUMBER>
722 File offset before importing, ie I<NUMBER> of records to skip.
724 =item B<-commit>=I<NUMBER>
726 The I<NUMBER> of records to wait before performing a 'commit' operation
728 =item B<-l>
730 File logs actions done for each record and their status into file
732 =item B<-append>
734 If specified, data will be appended to the logfile. If not, the logfile will be erased for each execution.
736 =item B<-t, -test>
738 Test mode: parses the file, saying what it would do, but doing nothing.
740 =item B<-s>
742 Skip automatic conversion of MARC-8 to UTF-8. This option is provided for
743 debugging.
745 =item B<-c>=I<CHARACTERISTIC>
747 The I<CHARACTERISTIC> MARC flavour. At the moment, only I<MARC21> and
748 I<UNIMARC> are supported. MARC21 by default.
750 =item B<-d>
752 Delete EVERYTHING related to biblio in koha-DB before import. Tables: biblio,
753 biblioitems, items
755 =item B<-m>=I<FORMAT>
757 Input file I<FORMAT>: I<MARCXML> or I<ISO2709> (defaults to ISO2709)
759 =item B<-authtypes>
761 file yamlfile with authoritiesTypes and distinguishable record field in order
762 to store the correct authtype
764 =item B<-yaml>
766 yaml file format a yaml file with ids
768 =item B<-filter>
770 list of fields that will not be imported. Can be any from 000 to 999 or field,
771 subfield and subfield's matching value such as 200avalue
773 =item B<-insert>
775 if set, only insert when possible
777 =item B<-update>
779 if set, only updates (any biblio should have a matching record)
781 =item B<-all>
783 if set, do whatever is required
785 =item B<-k, -keepids>=<FIELD>
787 Field store ids in I<FIELD> (useful for authorities, where 001 contains the
788 authid for Koha, that can contain a very valuable info for authorities coming
789 from LOC or BNF. useless for biblios probably)
791 =item B<-match>=<FIELD>
793 I<FIELD> matchindex,fieldtomatch matchpoint to use to deduplicate fieldtomatch
794 can be either 001 to 999 or field and list of subfields as such 100abcde
796 =item B<-i,-isbn>
798 If set, a search will be done on isbn, and, if the same isbn is found, the
799 biblio is not added. It's another method to deduplicate. B<-match> & B<-isbn>
800 can be both set.
802 =item B<-cleanisbn>
804 Clean ISBN fields from entering biblio records, ie removes hyphens. By default,
805 ISBN are cleaned. --nocleanisbn will keep ISBN unchanged.
807 =item B<-x>=I<TAG>
809 Source bib I<TAG> for reporting the source bib number
811 =item B<-y>=I<SUBFIELD>
813 Source I<SUBFIELD> for reporting the source bib number
815 =item B<-idmap>=I<FILE>
817 I<FILE> for the koha bib and source id
819 =item B<-keepids>
821 Store ids in 009 (useful for authorities, where 001 contains the authid for
822 Koha, that can contain a very valuable info for authorities coming from LOC or
823 BNF. useless for biblios probably)
825 =item B<-dedupbarcode>
827 If set, whenever a duplicate barcode is detected, it is removed and the attempt
828 to add the record is retried, thereby giving the record a blank barcode. This
829 is useful when something has set barcodes to be a biblio ID, or similar
830 (usually other software.)
832 =item B<-framework>
834 This is the code for the framework that the requested records will have attached
835 to them when they are created. If not specified, then the default framework
836 will be used.
838 =item B<-custom>=I<MODULE>
840 This parameter allows you to use a local module with a customize subroutine
841 that is called for each MARC record.
842 If no filename is passed, LocalChanges.pm is assumed to be in the
843 migration_tools subdirectory. You may pass an absolute file name or a file name
844 from the migration_tools directory.
846 =item B<-marcmodtemplate>=I<TEMPLATE>
848 This parameter allows you to specify the name of an existing MARC
849 modification template to apply as the MARC records are imported (these
850 templates are created in the "MARC modification templates" tool in Koha).
851 If not specified, no MARC modification templates are used (default).
853 =back
855 =cut