Bug 13757: (QA followup) Make opac-memberentry.pl handle attrs deletion
[koha.git] / C4 / Ris.pm
blob5bf8ecc98df84df46d9d5573f49d3072e7efef6e
1 package C4::Ris;
3 # Original script :
4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
6 ##
7 ## usage: perl marc2ris < infile.marc > outfile.ris
8 ##
9 ## Dependencies: perl 5.6.0 or later
10 ## MARC::Record
11 ## MARC::Charset
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
19 ##
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
44 # Modified 2016 by Universidad de El Salvador
46 # This file is part of Koha.
48 # Koha is free software; you can redistribute it and/or modify it
49 # under the terms of the GNU General Public License as published by
50 # the Free Software Foundation; either version 3 of the License, or
51 # (at your option) any later version.
53 # Koha is distributed in the hope that it will be useful, but
54 # WITHOUT ANY WARRANTY; without even the implied warranty of
55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 # GNU General Public License for more details.
58 # You should have received a copy of the GNU General Public License
59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
63 use Modern::Perl;
65 use List::MoreUtils qw/uniq/;
66 use vars qw(@ISA @EXPORT);
68 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
69 use Koha::SimpleMARC qw(read_field);
72 @ISA = qw(Exporter);
74 # only export API methods
76 @EXPORT = qw(
77 &marc2ris
80 our $marcprint = 0; # Debug flag;
82 =head1 marc2bibtex - Convert from UNIMARC to RIS
84 my ($ris) = marc2ris($record);
86 Returns a RIS scalar
88 C<$record> - a MARC::Record object
90 =cut
92 sub marc2ris {
93 my ($record) = @_;
94 my $output;
96 my $marcflavour = C4::Context->preference("marcflavour");
97 my $intype = lc($marcflavour);
99 # Let's redirect stdout
100 open my $oldout, ">&STDOUT";
101 my $outvar;
102 close STDOUT;
103 open STDOUT,'>:encoding(utf8)', \$outvar;
105 ## First we should check the character encoding. This may be
106 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
107 ## by 'a' at position 09 (zero-based) of the leader
108 my $leader = $record->leader();
109 if ( $intype eq "marc21" ) {
110 if ( $leader =~ /^.{9}a/ ) {
111 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
113 else {
114 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
117 ## else: other MARC formats do not specify the character encoding
118 ## we assume it's *not* UTF-8
120 my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
121 my $ris_additional_fields;
122 if ($RisExportAdditionalFields) {
123 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
124 $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
125 if ($@) {
126 warn "Unable to parse RisExportAdditionalFields : $@";
127 $ris_additional_fields = undef;
131 ## start RIS dataset
132 if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
133 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
134 my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
135 if ($type) {
136 print "TY - $type\r\n";
138 else {
139 &print_typetag($leader);
142 else {
143 &print_typetag($leader);
146 ## retrieve all author fields and collect them in a list
147 my @author_fields;
149 if ($intype eq "unimarc") {
150 ## Fields 700, 701, and 702 can contain author names
151 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
153 else { ## marc21, ukmarc
154 ## Field 100 sometimes carries main author
155 ## Field(s) 700 carry added entries - personal names
156 @author_fields = ($record->field('100'), $record->field('700'));
159 ## loop over all author fields
160 foreach my $field (@author_fields) {
161 if (length($field)) {
162 my $author = &get_author($field);
163 print "AU - ",$author,"\r\n";
167 # ToDo: should we specify anonymous as author if we didn't find
168 # one? or use one of the corporate/meeting names below?
170 ## add corporate names or meeting names as editors ??
171 my @editor_fields;
173 if ($intype eq "unimarc") {
174 ## Fields 710, 711, and 712 can carry corporate names
175 ## Field(s) 720, 721, 722, 730 have additional candidates
176 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
178 else { ## marc21, ukmarc
179 ## Fields 110 and 111 carry the main entries - corporate name and
180 ## meeting name, respectively
181 ## Field(s) 710, 711 carry added entries - personal names
182 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
185 ## loop over all editor fields
186 foreach my $field (@editor_fields) {
187 if (length($field)) {
188 my $editor = &get_editor($field);
189 print "ED - ",$editor,"\r\n";
193 ## get info from the title field
194 if ($intype eq "unimarc") {
195 &print_title($record->field('200'));
197 else { ## marc21, ukmarc
198 &print_title($record->field('245'));
201 ## series title
202 if ($intype eq "unimarc") {
203 &print_stitle($record->field('225'));
205 else { ## marc21, ukmarc
206 &print_stitle($record->field('490'));
209 ## ISBN/ISSN
210 if ($intype eq "unimarc") {
211 &print_isbn($record->field('010'));
212 &print_issn($record->field('011'));
214 elsif ($intype eq "ukmarc") {
215 &print_isbn($record->field('021'));
216 ## this is just an assumption
217 &print_issn($record->field('022'));
219 else { ## assume marc21
220 &print_isbn($record->field('020'));
221 &print_issn($record->field('022'));
224 if ($intype eq "marc21") {
225 &print_loc_callno($record->field('050'));
226 &print_dewey($record->field('082'));
228 ## else: unimarc, ukmarc do not seem to store call numbers?
230 ## publication info
231 if ($intype eq "unimarc") {
232 &print_pubinfo($record->field('210'));
234 else { ## marc21, ukmarc
235 if ($record->field('264')) {
236 &print_pubinfo($record->field('264'));
238 else {
239 &print_pubinfo($record->field('260'));
243 ## 6XX fields contain KW candidates. We add all of them to a
245 my @field_list;
246 if ($intype eq "unimarc") {
247 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
248 } elsif ($intype eq "ukmarc") {
249 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
250 } else { ## assume marc21
251 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
254 my @kwpool;
255 for my $f ( @field_list ) {
256 my @fields = $record->field($f);
257 push @kwpool, ( get_keywords("$f",$record->field($f)) );
260 # Remove duplicate
261 @kwpool = uniq @kwpool;
263 for my $kw ( @kwpool ) {
264 print "KW - ", $kw, "\r\n";
267 ## 5XX have various candidates for notes and abstracts. We pool
268 ## all notes-like stuff in one list.
269 my @notepool;
271 ## these fields have notes candidates
272 if ($intype eq "unimarc") {
273 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
274 &pool_subx(\@notepool, $_, $record->field($_));
277 elsif ($intype eq "ukmarc") {
278 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
279 &pool_subx(\@notepool, $_, $record->field($_));
282 else { ## assume marc21
283 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
284 &pool_subx(\@notepool, $_, $record->field($_));
288 my $allnotes = join "; ", @notepool;
290 if (length($allnotes) > 0) {
291 print "N1 - ", $allnotes, "\r\n";
294 ## 320/520 have the abstract
295 if ($intype eq "unimarc") {
296 &print_abstract($record->field('320'));
298 elsif ($intype eq "ukmarc") {
299 &print_abstract($record->field('512'), $record->field('513'));
301 else { ## assume marc21
302 &print_abstract($record->field('520'));
305 # 856u has the URI
306 if ($record->field('856')) {
307 print_uri($record->field('856'));
310 if ($ris_additional_fields) {
311 foreach my $ris_tag ( keys %$ris_additional_fields ) {
312 next if $ris_tag eq 'TY';
314 my @fields =
315 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
316 ? @{ $ris_additional_fields->{$ris_tag} }
317 : $ris_additional_fields->{$ris_tag};
319 for my $tag (@fields) {
320 my ( $f, $sf ) = split( /\$/, $tag );
321 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
322 foreach my $v (@values) {
323 print "$ris_tag - $v\r\n";
329 ## end RIS dataset
330 print "ER - \r\n";
332 # Let's re-redirect stdout
333 close STDOUT;
334 open STDOUT, ">&", $oldout;
336 return $outvar;
341 ##********************************************************************
342 ## print_typetag(): prints the first line of a RIS dataset including
343 ## the preceding newline
344 ## Argument: the leader of a MARC dataset
345 ## Returns: the value at leader position 06
346 ##********************************************************************
347 sub print_typetag {
348 my ($leader)= @_;
349 ## the keys of typehash are the allowed values at position 06
350 ## of the leader of a MARC record, the values are the RIS types
351 ## that might appropriately represent these types.
352 my %ustypehash = (
353 "a" => "BOOK",
354 "c" => "MUSIC",
355 "d" => "MUSIC",
356 "e" => "MAP",
357 "f" => "MAP",
358 "g" => "ADVS",
359 "i" => "SOUND",
360 "j" => "SOUND",
361 "k" => "ART",
362 "m" => "DATA",
363 "o" => "GEN",
364 "p" => "GEN",
365 "r" => "ART",
366 "t" => "MANSCPT",
369 my %unitypehash = (
370 "a" => "BOOK",
371 "b" => "MANSCPT",
372 "c" => "MUSIC",
373 "d" => "MUSIC",
374 "e" => "MAP",
375 "f" => "MAP",
376 "g" => "ADVS",
377 "i" => "SOUND",
378 "j" => "SOUND",
379 "k" => "ART",
380 "l" => "ELEC",
381 "m" => "GEN",
382 "r" => "ART",
385 ## The type of a MARC record is found at position 06 of the leader
386 my $typeofrecord = defined($leader) && length $leader >=6 ?
387 substr($leader, 6, 1): undef;
388 ## Pos 07 == Bibliographic level
389 my $biblevel = defined($leader) && length $leader >=7 ?
390 substr($leader, 7, 1): '';
392 ## TODO: for books, field 008 positions 24-27 might have a few more
393 ## hints
395 my %typehash;
396 my $marcflavour = C4::Context->preference("marcflavour");
397 my $intype = lc($marcflavour);
398 if ($intype eq "unimarc") {
399 %typehash = %unitypehash;
401 else {
402 %typehash = %ustypehash;
405 if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
406 print "TY - GEN\r\n"; ## most reasonable default
407 warn ("no type found - assume GEN") if $marcprint;
408 } elsif ( $typeofrecord =~ "a" ) {
409 if ( $biblevel eq 'a' ) {
410 print "TY - GEN\r\n"; ## monographic component part
411 } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
412 print "TY - SER\r\n"; ## serial or serial component part
413 } elsif ( $biblevel eq 'm' ) {
414 print "TY - $typehash{$typeofrecord}\r\n"; ## book
415 } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
416 print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
417 } elsif ( $biblevel eq 'i' ) {
418 print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
420 } else {
421 print "TY - $typehash{$typeofrecord}\r\n";
424 ## use $typeofrecord as the return value, just in case
425 $typeofrecord;
428 ##********************************************************************
429 ## normalize_author(): normalizes an authorname
430 ## Arguments: authorname subfield a
431 ## authorname subfield b
432 ## authorname subfield c
433 ## name type if known: 0=direct order
434 ## 1=only surname or full name in
435 ## inverted order
436 ## 3=family, clan, dynasty name
437 ## Returns: the normalized authorname
438 ##********************************************************************
439 sub normalize_author {
440 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
442 if ($nametype == 0) {
443 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
444 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
445 return $rawauthora;
447 elsif ($nametype == 1) {
448 ## start munging subfield a (the real name part)
449 ## remove spaces after separators
450 $rawauthora =~ s%([,.]+) *%$1%g;
452 ## remove trailing separators after spaces
453 $rawauthora =~ s% *[,;:/]*$%%;
455 ## remove periods after a non-abbreviated name
456 $rawauthora =~ s%(\w{2,})\.%$1%g;
458 ## start munging subfield b (something like the suffix)
459 ## remove trailing separators after spaces
460 $rawauthorb =~ s% *[,;:/]*$%%;
462 ## we currently ignore subfield c until someone complains
463 if (length($rawauthorb) > 0) {
464 return join ", ", ($rawauthora, $rawauthorb);
466 else {
467 return $rawauthora;
470 elsif ($nametype == 3) {
471 return $rawauthora;
475 ##********************************************************************
476 ## get_author(): gets authorname info from MARC fields 100, 700
477 ## Argument: field (100 or 700)
478 ## Returns: an author string in the format found in the record
479 ##********************************************************************
480 sub get_author {
481 my ($authorfield) = @_;
482 my ($indicator);
484 ## the sequence of the name parts is encoded either in indicator
485 ## 1 (marc21) or 2 (unimarc)
486 my $marcflavour = C4::Context->preference("marcflavour");
487 my $intype = lc($marcflavour);
488 if ($intype eq "unimarc") {
489 $indicator = 2;
491 else { ## assume marc21
492 $indicator = 1;
495 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
496 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
497 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
498 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
499 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
500 if ($intype eq "ukmarc") {
501 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
502 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
504 else {
505 normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
509 ##********************************************************************
510 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
511 ## Argument: field (110, 111, 710, or 711)
512 ## Returns: an author string in the format found in the record
513 ##********************************************************************
514 sub get_editor {
515 my ($editorfield) = @_;
517 if (!$editorfield) {
518 return;
520 else {
521 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
522 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
523 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
524 return $editorfield->subfield('a');
528 ##********************************************************************
529 ## print_title(): gets info from MARC field 245
530 ## Arguments: field (245)
531 ## Returns:
532 ##********************************************************************
533 sub print_title {
534 my ($titlefield) = @_;
535 if (!$titlefield) {
536 print "<marc>empty title field (245)\r\n" if $marcprint;
537 warn("empty title field (245)") if $marcprint;
539 else {
540 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
541 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
542 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
544 ## The title is usually written in a very odd notation. The title
545 ## proper ($a) often ends with a space followed by a separator like
546 ## a slash or a colon. The subtitle ($b) doesn't start with a space
547 ## so simple concatenation looks odd. We have to conditionally remove
548 ## the separator and make sure there's a space between title and
549 ## subtitle
551 my $clean_title = $titlefield->subfield('a');
553 my $clean_subtitle = $titlefield->subfield('b');
554 $clean_subtitle ||= q{};
555 $clean_title =~ s% *[/:;.]$%%;
556 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
558 my $marcflavour = C4::Context->preference("marcflavour");
559 my $intype = lc($marcflavour);
560 if (length($clean_title) > 0
561 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
562 print "TI - ", $clean_title;
564 ## subfield $b is relevant only for marc21/ukmarc
565 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
566 print ": ",$clean_subtitle;
568 print "\r\n";
571 ## The statement of responsibility is just this: horrors. There is
572 ## no formal definition how authors, editors and the like should
573 ## be written and designated. The field is free-form and resistant
574 ## to all parsing efforts, so this information is lost on me
576 return;
579 ##********************************************************************
580 ## print_stitle(): prints info from series title field
581 ## Arguments: field
582 ## Returns:
583 ##********************************************************************
584 sub print_stitle {
585 my ($titlefield) = @_;
587 if (!$titlefield) {
588 print "<marc>empty series title field\r\n" if $marcprint;
590 else {
591 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
592 my $clean_title = $titlefield->subfield('a');
594 $clean_title =~ s% *[/:;.]$%%;
596 if (length($clean_title) > 0) {
597 print "T2 - ", $clean_title,"\r\n";
600 my $marcflavour = C4::Context->preference("marcflavour");
601 my $intype = lc($marcflavour);
602 if ($intype eq "unimarc") {
603 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
604 if (length($titlefield->subfield('v')) > 0) {
605 print "VL - ", $titlefield->subfield('v'),"\r\n";
609 return;
612 ##********************************************************************
613 ## print_isbn(): gets info from MARC field 020
614 ## Arguments: field (020)
615 ##********************************************************************
616 sub print_isbn {
617 my($isbnfield) = @_;
619 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
620 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
621 warn("no isbn found") if $marcprint;
623 else {
624 if (length ($isbnfield->subfield('a')) < 10) {
625 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
626 warn("truncated isbn") if $marcprint;
629 my $isbn = $isbnfield->subfield('a');
630 print "SN - ", $isbn, "\r\n";
634 ##********************************************************************
635 ## print_issn(): gets info from MARC field 022
636 ## Arguments: field (022)
637 ##********************************************************************
638 sub print_issn {
639 my($issnfield) = @_;
641 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
642 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
643 warn("no issn found") if $marcprint;
645 else {
646 if (length ($issnfield->subfield('a')) < 9) {
647 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
648 warn("truncated issn") if $marcprint;
651 my $issn = substr($issnfield->subfield('a'), 0, 9);
652 print "SN - ", $issn, "\r\n";
657 # print_uri() prints info from 856 u
659 sub print_uri {
660 my @f856s = @_;
662 foreach my $f856 (@f856s) {
663 if (my $uri = $f856->subfield('u')) {
664 print "UR - ", $uri, "\r\n";
669 ##********************************************************************
670 ## print_loc_callno(): gets info from MARC field 050
671 ## Arguments: field (050)
672 ##********************************************************************
673 sub print_loc_callno {
674 my($callnofield) = @_;
676 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
677 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
678 warn("no LOC call number found") if $marcprint;
680 else {
681 print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
685 ##********************************************************************
686 ## print_dewey(): gets info from MARC field 082
687 ## Arguments: field (082)
688 ##********************************************************************
689 sub print_dewey {
690 my($deweyfield) = @_;
692 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
693 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
694 warn("no Dewey number found") if $marcprint;
696 else {
697 print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
701 ##********************************************************************
702 ## print_pubinfo(): gets info from MARC field 260
703 ## Arguments: field (260)
704 ##********************************************************************
705 sub print_pubinfo {
706 my($pubinfofield) = @_;
708 if (!$pubinfofield) {
709 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
710 warn("no publication information found") if $marcprint;
712 else {
713 ## the following information is available in MARC21:
714 ## $a place -> CY
715 ## $b publisher -> PB
716 ## $c date -> PY
717 ## the corresponding subfields for UNIMARC:
718 ## $a place -> CY
719 ## $c publisher -> PB
720 ## $d date -> PY
722 ## all of them are repeatable. We pool all places into a
723 ## comma-separated list in CY. We also pool all publishers
724 ## into a comma-separated list in PB. We break the rule with
725 ## the date field because this wouldn't make much sense. In
726 ## this case, we use the first occurrence for PY, the second
727 ## for Y2, and ignore the rest
729 my @pubsubfields = $pubinfofield->subfields();
730 my @cities;
731 my @publishers;
732 my $pycounter = 0;
734 my $pubsub_place;
735 my $pubsub_publisher;
736 my $pubsub_date;
738 my $marcflavour = C4::Context->preference("marcflavour");
739 my $intype = lc($marcflavour);
740 if ($intype eq "unimarc") {
741 $pubsub_place = "a";
742 $pubsub_publisher = "c";
743 $pubsub_date = "d";
745 else { ## assume marc21
746 $pubsub_place = "a";
747 $pubsub_publisher = "b";
748 $pubsub_date = "c";
751 ## loop over all subfield list entries
752 for my $tuple (@pubsubfields) {
753 ## each tuple consists of the subfield code and the value
754 if (@$tuple[0] eq $pubsub_place) {
755 ## strip any trailing crap
756 $_ = @$tuple[1];
757 s% *[,;:/]$%%;
758 ## pool all occurrences in a list
759 push (@cities, $_);
761 elsif (@$tuple[0] eq $pubsub_publisher) {
762 ## strip any trailing crap
763 $_ = @$tuple[1];
764 s% *[,;:/]$%%;
765 ## pool all occurrences in a list
766 push (@publishers, $_);
768 elsif (@$tuple[0] eq $pubsub_date) {
769 ## the dates are free-form, so we want to extract
770 ## a four-digit year and leave the rest as
771 ## "other info"
772 my $protoyear = @$tuple[1];
773 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
775 ## strip any separator chars at the end
776 $protoyear =~ s% *[\.;:/]*$%%;
778 ## isolate a four-digit year. We discard anything
779 ## preceding the year, but keep everything after
780 ## the year as other info.
781 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
783 ## check what we've got. If there is no four-digit
784 ## year, make it up. If digits are replaced by '-',
785 ## replace those with 0s
787 if (index($protoyear, "/") == 4) {
788 ## have year info
789 ## replace all '-' in the four-digit year
790 ## by '0'
791 substr($protoyear,0,4) =~ s!-!0!g;
793 else {
794 ## have no year info
795 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
796 $protoyear = "0000///$protoyear";
797 warn("no four-digit year found, use 0000") if $marcprint;
800 if ($pycounter == 0 && length($protoyear)) {
801 print "PY - $protoyear\r\n";
803 elsif ($pycounter == 1 && length($_)) {
804 print "Y2 - $protoyear\r\n";
806 ## else: discard
808 ## else: discard
811 ## now dump the collected CY and PB lists
812 if (@cities > 0) {
813 print "CY - ", join(", ", @cities), "\r\n";
815 if (@publishers > 0) {
816 print "PB - ", join(", ", @publishers), "\r\n";
821 ##********************************************************************
822 ## get_keywords(): prints info from MARC fields 6XX
823 ## Arguments: list of fields (6XX)
824 ##********************************************************************
825 sub get_keywords {
826 my($fieldname, @keywords) = @_;
828 my @kw;
829 ## a list of all possible subfields
830 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
832 ## loop over all 6XX fields
833 foreach my $kwfield (@keywords) {
834 if ($kwfield != undef) {
835 ## authornames get special treatment
836 if ($fieldname eq "600") {
837 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
838 push @kw, $val;
839 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
841 else {
842 ## retrieve all available subfields
843 my @kwsubfields = $kwfield->subfields();
845 ## loop over all available subfield tuples
846 foreach my $kwtuple (@kwsubfields) {
847 ## loop over all subfields to check
848 foreach my $subfield (@subfields) {
849 ## [0] contains subfield code
850 if (@$kwtuple[0] eq $subfield) {
851 ## [1] contains value, remove trailing separators
852 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
853 if (length(@$kwtuple[1]) > 0) {
854 push @kw, @$kwtuple[1];
855 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
857 ## we can leave the subfields loop here
858 last;
865 return @kw;
868 ##********************************************************************
869 ## pool_subx(): adds contents of several subfields to a list
870 ## Arguments: reference to a list
871 ## field name
872 ## list of fields (5XX)
873 ##********************************************************************
874 sub pool_subx {
875 my($aref, $fieldname, @notefields) = @_;
877 ## we use a list that contains the interesting subfields
878 ## for each field
879 # ToDo: this is apparently correct only for marc21
880 my @subfields;
882 if ($fieldname eq "500") {
883 @subfields = ('a');
885 elsif ($fieldname eq "501") {
886 @subfields = ('a');
888 elsif ($fieldname eq "502") {
889 @subfields = ('a');
891 elsif ($fieldname eq "504") {
892 @subfields = ('a', 'b');
894 elsif ($fieldname eq "505") {
895 @subfields = ('a', 'g', 'r', 't', 'u');
897 elsif ($fieldname eq "506") {
898 @subfields = ('a', 'b', 'c', 'd', 'e');
900 elsif ($fieldname eq "507") {
901 @subfields = ('a', 'b');
903 elsif ($fieldname eq "508") {
904 @subfields = ('a');
906 elsif ($fieldname eq "510") {
907 @subfields = ('a', 'b', 'c', 'x', '3');
909 elsif ($fieldname eq "511") {
910 @subfields = ('a');
912 elsif ($fieldname eq "513") {
913 @subfields = ('a', 'b');
915 elsif ($fieldname eq "514") {
916 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
918 elsif ($fieldname eq "515") {
919 @subfields = ('a');
921 elsif ($fieldname eq "516") {
922 @subfields = ('a');
924 elsif ($fieldname eq "518") {
925 @subfields = ('a', '3');
927 elsif ($fieldname eq "521") {
928 @subfields = ('a', 'b', '3');
930 elsif ($fieldname eq "522") {
931 @subfields = ('a');
933 elsif ($fieldname eq "524") {
934 @subfields = ('a', '2', '3');
936 elsif ($fieldname eq "525") {
937 @subfields = ('a');
939 elsif ($fieldname eq "526") {
940 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
942 elsif ($fieldname eq "530") {
943 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
945 elsif ($fieldname eq "533") {
946 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
948 elsif ($fieldname eq "534") {
949 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
951 elsif ($fieldname eq "535") {
952 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
955 ## loop over all notefields
956 foreach my $notefield (@notefields) {
957 if (defined $notefield) {
958 ## retrieve all available subfield tuples
959 my @notesubfields = $notefield->subfields();
961 ## loop over all subfield tuples
962 foreach my $notetuple (@notesubfields) {
963 ## loop over all subfields to check
964 foreach my $subfield (@subfields) {
965 ## [0] contains subfield code
966 if (@$notetuple[0] eq $subfield) {
967 ## [1] contains value, remove trailing separators
968 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
969 @$notetuple[1] =~ s% *[,;.:/]*$%%;
970 if (length(@$notetuple[1]) > 0) {
971 ## add to list
972 push @{$aref}, @$notetuple[1];
974 last;
982 ##********************************************************************
983 ## print_abstract(): prints abstract fields
984 ## Arguments: list of fields (520)
985 ##********************************************************************
986 sub print_abstract {
987 # ToDo: take care of repeatable subfields
988 my(@abfields) = @_;
990 ## we check the following subfields
991 my @subfields = ('a', 'b');
993 ## we generate a list for all useful strings
994 my @abstrings;
996 ## loop over all abfields
997 foreach my $abfield (@abfields) {
998 foreach my $field (@subfields) {
999 if ( length( $abfield->subfield($field) ) > 0 ) {
1000 my $ab = $abfield->subfield($field);
1002 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1004 ## strip trailing separators
1005 $ab =~ s% *[;,:./]*$%%;
1007 ## add string to the list
1008 push( @abstrings, $ab );
1013 my $allabs = join "; ", @abstrings;
1015 if (length($allabs) > 0) {
1016 print "N2 - ", $allabs, "\r\n";