Bug 25261: (QA follow-up) Add unit tests
[koha.git] / C4 / Ris.pm
blobde8dccc43c6906a0201091fd1cc6a847a39f3fb6
1 package C4::Ris;
3 # Original script :
4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
6 ##
7 ## usage: perl marc2ris < infile.marc > outfile.ris
8 ##
9 ## Dependencies: perl 5.6.0 or later
10 ## MARC::Record
11 ## MARC::Charset
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
19 ##
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
44 # Modified 2016 by Universidad de El Salvador
46 # This file is part of Koha.
48 # Koha is free software; you can redistribute it and/or modify it
49 # under the terms of the GNU General Public License as published by
50 # the Free Software Foundation; either version 3 of the License, or
51 # (at your option) any later version.
53 # Koha is distributed in the hope that it will be useful, but
54 # WITHOUT ANY WARRANTY; without even the implied warranty of
55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 # GNU General Public License for more details.
58 # You should have received a copy of the GNU General Public License
59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
63 use Modern::Perl;
65 use List::MoreUtils qw/uniq/;
66 use vars qw(@ISA @EXPORT);
68 use Koha::SimpleMARC qw(read_field);
71 @ISA = qw(Exporter);
73 # only export API methods
75 @EXPORT = qw(
76 &marc2ris
79 our $marcprint = 0; # Debug flag;
81 =head1 marc2bibtex - Convert from UNIMARC to RIS
83 my ($ris) = marc2ris($record);
85 Returns a RIS scalar
87 C<$record> - a MARC::Record object
89 =cut
91 sub marc2ris {
92 my ($record) = @_;
94 my $marcflavour = C4::Context->preference("marcflavour");
95 my $intype = lc($marcflavour);
97 # Let's redirect stdout
98 open my $oldout, ">&STDOUT";
99 my $outvar;
100 close STDOUT;
101 open STDOUT,'>:encoding(utf8)', \$outvar;
103 ## First we should check the character encoding. This may be
104 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
105 ## by 'a' at position 09 (zero-based) of the leader
106 my $leader = $record->leader();
107 if ( $intype eq "marc21" ) {
108 if ( $leader =~ /^.{9}a/ ) {
109 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
111 else {
112 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
115 ## else: other MARC formats do not specify the character encoding
116 ## we assume it's *not* UTF-8
118 my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
119 my $ris_additional_fields;
120 if ($RisExportAdditionalFields) {
121 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
122 $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
123 if ($@) {
124 warn "Unable to parse RisExportAdditionalFields : $@";
125 $ris_additional_fields = undef;
129 ## start RIS dataset
130 if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
131 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
132 my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
133 if ($type) {
134 print "TY - $type\r\n";
136 else {
137 &print_typetag($leader);
140 else {
141 &print_typetag($leader);
144 ## retrieve all author fields and collect them in a list
145 my @author_fields;
147 if ($intype eq "unimarc") {
148 ## Fields 700, 701, and 702 can contain author names
149 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
151 else { ## marc21, ukmarc
152 ## Field 100 sometimes carries main author
153 ## Field(s) 700 carry added entries - personal names
154 @author_fields = ($record->field('100'), $record->field('700'));
157 ## loop over all author fields
158 foreach my $field (@author_fields) {
159 if (length($field)) {
160 my $author = &get_author($field);
161 print "AU - ",$author,"\r\n";
165 # ToDo: should we specify anonymous as author if we didn't find
166 # one? or use one of the corporate/meeting names below?
168 ## add corporate names or meeting names as editors ??
169 my @editor_fields;
171 if ($intype eq "unimarc") {
172 ## Fields 710, 711, and 712 can carry corporate names
173 ## Field(s) 720, 721, 722, 730 have additional candidates
174 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
176 else { ## marc21, ukmarc
177 ## Fields 110 and 111 carry the main entries - corporate name and
178 ## meeting name, respectively
179 ## Field(s) 710, 711 carry added entries - personal names
180 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
183 ## loop over all editor fields
184 foreach my $field (@editor_fields) {
185 if (length($field)) {
186 my $editor = &get_editor($field);
187 print "ED - ",$editor,"\r\n";
191 ## get info from the title field
192 if ($intype eq "unimarc") {
193 &print_title($record->field('200'));
195 else { ## marc21, ukmarc
196 &print_title($record->field('245'));
199 ## series title
200 if ($intype eq "unimarc") {
201 &print_stitle($record->field('225'));
203 else { ## marc21, ukmarc
204 &print_stitle($record->field('490'));
207 ## ISBN/ISSN
208 if ($intype eq "unimarc") {
209 &print_isbn($record->field('010'));
210 &print_issn($record->field('011'));
212 elsif ($intype eq "ukmarc") {
213 &print_isbn($record->field('021'));
214 ## this is just an assumption
215 &print_issn($record->field('022'));
217 else { ## assume marc21
218 &print_isbn($record->field('020'));
219 &print_issn($record->field('022'));
222 if ($intype eq "marc21") {
223 &print_loc_callno($record->field('050'));
224 &print_dewey($record->field('082'));
226 ## else: unimarc, ukmarc do not seem to store call numbers?
228 ## publication info
229 if ($intype eq "unimarc") {
230 &print_pubinfo($record->field('210'));
232 else { ## marc21, ukmarc
233 if ($record->field('264')) {
234 &print_pubinfo($record->field('264'));
236 else {
237 &print_pubinfo($record->field('260'));
241 ## 6XX fields contain KW candidates. We add all of them to a
243 my @field_list;
244 if ($intype eq "unimarc") {
245 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
246 } elsif ($intype eq "ukmarc") {
247 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
248 } else { ## assume marc21
249 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
252 my @kwpool;
253 for my $f ( @field_list ) {
254 my @fields = $record->field($f);
255 push @kwpool, ( get_keywords("$f",$record->field($f)) );
258 # Remove duplicate
259 @kwpool = uniq @kwpool;
261 for my $kw ( @kwpool ) {
262 print "KW - ", $kw, "\r\n";
265 ## 5XX have various candidates for notes and abstracts. We pool
266 ## all notes-like stuff in one list.
267 my @notepool;
269 ## these fields have notes candidates
270 if ($intype eq "unimarc") {
271 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
272 &pool_subx(\@notepool, $_, $record->field($_));
275 elsif ($intype eq "ukmarc") {
276 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
277 &pool_subx(\@notepool, $_, $record->field($_));
280 else { ## assume marc21
281 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
282 &pool_subx(\@notepool, $_, $record->field($_));
286 my $allnotes = join "; ", @notepool;
288 if (length($allnotes) > 0) {
289 print "N1 - ", $allnotes, "\r\n";
292 ## 320/520 have the abstract
293 if ($intype eq "unimarc") {
294 &print_abstract($record->field('320'));
296 elsif ($intype eq "ukmarc") {
297 &print_abstract($record->field('512'), $record->field('513'));
299 else { ## assume marc21
300 &print_abstract($record->field('520'));
303 # 856u has the URI
304 if ($record->field('856')) {
305 print_uri($record->field('856'));
308 if ($ris_additional_fields) {
309 foreach my $ris_tag ( keys %$ris_additional_fields ) {
310 next if $ris_tag eq 'TY';
312 my @fields =
313 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
314 ? @{ $ris_additional_fields->{$ris_tag} }
315 : $ris_additional_fields->{$ris_tag};
317 for my $tag (@fields) {
318 my ( $f, $sf ) = split( /\$/, $tag );
319 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
320 foreach my $v (@values) {
321 print "$ris_tag - $v\r\n";
327 ## end RIS dataset
328 print "ER - \r\n";
330 # Let's re-redirect stdout
331 close STDOUT;
332 open STDOUT, ">&", $oldout;
334 return $outvar;
339 ##********************************************************************
340 ## print_typetag(): prints the first line of a RIS dataset including
341 ## the preceding newline
342 ## Argument: the leader of a MARC dataset
343 ## Returns: the value at leader position 06
344 ##********************************************************************
345 sub print_typetag {
346 my ($leader)= @_;
347 ## the keys of typehash are the allowed values at position 06
348 ## of the leader of a MARC record, the values are the RIS types
349 ## that might appropriately represent these types.
350 my %ustypehash = (
351 "a" => "BOOK",
352 "c" => "MUSIC",
353 "d" => "MUSIC",
354 "e" => "MAP",
355 "f" => "MAP",
356 "g" => "ADVS",
357 "i" => "SOUND",
358 "j" => "SOUND",
359 "k" => "ART",
360 "m" => "DATA",
361 "o" => "GEN",
362 "p" => "GEN",
363 "r" => "ART",
364 "t" => "MANSCPT",
367 my %unitypehash = (
368 "a" => "BOOK",
369 "b" => "MANSCPT",
370 "c" => "MUSIC",
371 "d" => "MUSIC",
372 "e" => "MAP",
373 "f" => "MAP",
374 "g" => "ADVS",
375 "i" => "SOUND",
376 "j" => "SOUND",
377 "k" => "ART",
378 "l" => "ELEC",
379 "m" => "GEN",
380 "r" => "ART",
383 ## The type of a MARC record is found at position 06 of the leader
384 my $typeofrecord = defined($leader) && length $leader >=6 ?
385 substr($leader, 6, 1): undef;
386 ## Pos 07 == Bibliographic level
387 my $biblevel = defined($leader) && length $leader >=7 ?
388 substr($leader, 7, 1): '';
390 ## TODO: for books, field 008 positions 24-27 might have a few more
391 ## hints
393 my %typehash;
394 my $marcflavour = C4::Context->preference("marcflavour");
395 my $intype = lc($marcflavour);
396 if ($intype eq "unimarc") {
397 %typehash = %unitypehash;
399 else {
400 %typehash = %ustypehash;
403 if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
404 print "TY - GEN\r\n"; ## most reasonable default
405 warn ("no type found - assume GEN") if $marcprint;
406 } elsif ( $typeofrecord =~ "a" ) {
407 if ( $biblevel eq 'a' ) {
408 print "TY - GEN\r\n"; ## monographic component part
409 } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
410 print "TY - SER\r\n"; ## serial or serial component part
411 } elsif ( $biblevel eq 'm' ) {
412 print "TY - $typehash{$typeofrecord}\r\n"; ## book
413 } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
414 print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
415 } elsif ( $biblevel eq 'i' ) {
416 print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
418 } else {
419 print "TY - $typehash{$typeofrecord}\r\n";
422 ## use $typeofrecord as the return value, just in case
423 $typeofrecord;
426 ##********************************************************************
427 ## normalize_author(): normalizes an authorname
428 ## Arguments: authorname subfield a
429 ## authorname subfield b
430 ## authorname subfield c
431 ## name type if known: 0=direct order
432 ## 1=only surname or full name in
433 ## inverted order
434 ## 3=family, clan, dynasty name
435 ## Returns: the normalized authorname
436 ##********************************************************************
437 sub normalize_author {
438 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
440 if ($nametype == 0) {
441 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
442 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
443 return $rawauthora;
445 elsif ($nametype == 1) {
446 ## start munging subfield a (the real name part)
447 ## remove spaces after separators
448 $rawauthora =~ s%([,.]+) *%$1%g;
450 ## remove trailing separators after spaces
451 $rawauthora =~ s% *[,;:/]*$%%;
453 ## remove periods after a non-abbreviated name
454 $rawauthora =~ s%(\w{2,})\.%$1%g;
456 ## start munging subfield b (something like the suffix)
457 ## remove trailing separators after spaces
458 $rawauthorb =~ s% *[,;:/]*$%%;
460 ## we currently ignore subfield c until someone complains
461 if (length($rawauthorb) > 0) {
462 return join ", ", ($rawauthora, $rawauthorb);
464 else {
465 return $rawauthora;
468 elsif ($nametype == 3) {
469 return $rawauthora;
473 ##********************************************************************
474 ## get_author(): gets authorname info from MARC fields 100, 700
475 ## Argument: field (100 or 700)
476 ## Returns: an author string in the format found in the record
477 ##********************************************************************
478 sub get_author {
479 my ($authorfield) = @_;
480 my ($indicator);
482 ## the sequence of the name parts is encoded either in indicator
483 ## 1 (marc21) or 2 (unimarc)
484 my $marcflavour = C4::Context->preference("marcflavour");
485 my $intype = lc($marcflavour);
486 if ($intype eq "unimarc") {
487 $indicator = 2;
489 else { ## assume marc21
490 $indicator = 1;
493 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
494 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
495 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
496 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
497 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
498 if ($intype eq "ukmarc") {
499 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
500 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
502 else {
503 normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
507 ##********************************************************************
508 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
509 ## Argument: field (110, 111, 710, or 711)
510 ## Returns: an author string in the format found in the record
511 ##********************************************************************
512 sub get_editor {
513 my ($editorfield) = @_;
515 if (!$editorfield) {
516 return;
518 else {
519 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
520 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
521 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
522 return $editorfield->subfield('a');
526 ##********************************************************************
527 ## print_title(): gets info from MARC field 245
528 ## Arguments: field (245)
529 ## Returns:
530 ##********************************************************************
531 sub print_title {
532 my ($titlefield) = @_;
533 if (!$titlefield) {
534 print "<marc>empty title field (245)\r\n" if $marcprint;
535 warn("empty title field (245)") if $marcprint;
537 else {
538 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
539 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
540 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
542 ## The title is usually written in a very odd notation. The title
543 ## proper ($a) often ends with a space followed by a separator like
544 ## a slash or a colon. The subtitle ($b) doesn't start with a space
545 ## so simple concatenation looks odd. We have to conditionally remove
546 ## the separator and make sure there's a space between title and
547 ## subtitle
549 my $clean_title = $titlefield->subfield('a');
551 my $clean_subtitle = $titlefield->subfield('b');
552 $clean_subtitle ||= q{};
553 $clean_title =~ s% *[/:;.]$%%;
554 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
556 my $marcflavour = C4::Context->preference("marcflavour");
557 my $intype = lc($marcflavour);
558 if (length($clean_title) > 0
559 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
560 print "TI - ", $clean_title;
562 ## subfield $b is relevant only for marc21/ukmarc
563 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
564 print ": ",$clean_subtitle;
566 print "\r\n";
569 ## The statement of responsibility is just this: horrors. There is
570 ## no formal definition how authors, editors and the like should
571 ## be written and designated. The field is free-form and resistant
572 ## to all parsing efforts, so this information is lost on me
574 return;
577 ##********************************************************************
578 ## print_stitle(): prints info from series title field
579 ## Arguments: field
580 ## Returns:
581 ##********************************************************************
582 sub print_stitle {
583 my ($titlefield) = @_;
585 if (!$titlefield) {
586 print "<marc>empty series title field\r\n" if $marcprint;
588 else {
589 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
590 my $clean_title = $titlefield->subfield('a');
592 $clean_title =~ s% *[/:;.]$%%;
594 if (length($clean_title) > 0) {
595 print "T2 - ", $clean_title,"\r\n";
598 my $marcflavour = C4::Context->preference("marcflavour");
599 my $intype = lc($marcflavour);
600 if ($intype eq "unimarc") {
601 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
602 if (length($titlefield->subfield('v')) > 0) {
603 print "VL - ", $titlefield->subfield('v'),"\r\n";
607 return;
610 ##********************************************************************
611 ## print_isbn(): gets info from MARC field 020
612 ## Arguments: field (020)
613 ##********************************************************************
614 sub print_isbn {
615 my($isbnfield) = @_;
617 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
618 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
619 warn("no isbn found") if $marcprint;
621 else {
622 if (length ($isbnfield->subfield('a')) < 10) {
623 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
624 warn("truncated isbn") if $marcprint;
627 my $isbn = $isbnfield->subfield('a');
628 print "SN - ", $isbn, "\r\n";
632 ##********************************************************************
633 ## print_issn(): gets info from MARC field 022
634 ## Arguments: field (022)
635 ##********************************************************************
636 sub print_issn {
637 my($issnfield) = @_;
639 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
640 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
641 warn("no issn found") if $marcprint;
643 else {
644 if (length ($issnfield->subfield('a')) < 9) {
645 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
646 warn("truncated issn") if $marcprint;
649 my $issn = substr($issnfield->subfield('a'), 0, 9);
650 print "SN - ", $issn, "\r\n";
655 # print_uri() prints info from 856 u
657 sub print_uri {
658 my @f856s = @_;
660 foreach my $f856 (@f856s) {
661 if (my $uri = $f856->subfield('u')) {
662 print "UR - ", $uri, "\r\n";
667 ##********************************************************************
668 ## print_loc_callno(): gets info from MARC field 050
669 ## Arguments: field (050)
670 ##********************************************************************
671 sub print_loc_callno {
672 my($callnofield) = @_;
674 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
675 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
676 warn("no LOC call number found") if $marcprint;
678 else {
679 print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
683 ##********************************************************************
684 ## print_dewey(): gets info from MARC field 082
685 ## Arguments: field (082)
686 ##********************************************************************
687 sub print_dewey {
688 my($deweyfield) = @_;
690 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
691 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
692 warn("no Dewey number found") if $marcprint;
694 else {
695 print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
699 ##********************************************************************
700 ## print_pubinfo(): gets info from MARC field 260
701 ## Arguments: field (260)
702 ##********************************************************************
703 sub print_pubinfo {
704 my($pubinfofield) = @_;
706 if (!$pubinfofield) {
707 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
708 warn("no publication information found") if $marcprint;
710 else {
711 ## the following information is available in MARC21:
712 ## $a place -> CY
713 ## $b publisher -> PB
714 ## $c date -> PY
715 ## the corresponding subfields for UNIMARC:
716 ## $a place -> CY
717 ## $c publisher -> PB
718 ## $d date -> PY
720 ## all of them are repeatable. We pool all places into a
721 ## comma-separated list in CY. We also pool all publishers
722 ## into a comma-separated list in PB. We break the rule with
723 ## the date field because this wouldn't make much sense. In
724 ## this case, we use the first occurrence for PY, the second
725 ## for Y2, and ignore the rest
727 my @pubsubfields = $pubinfofield->subfields();
728 my @cities;
729 my @publishers;
730 my $pycounter = 0;
732 my $pubsub_place;
733 my $pubsub_publisher;
734 my $pubsub_date;
736 my $marcflavour = C4::Context->preference("marcflavour");
737 my $intype = lc($marcflavour);
738 if ($intype eq "unimarc") {
739 $pubsub_place = "a";
740 $pubsub_publisher = "c";
741 $pubsub_date = "d";
743 else { ## assume marc21
744 $pubsub_place = "a";
745 $pubsub_publisher = "b";
746 $pubsub_date = "c";
749 ## loop over all subfield list entries
750 for my $tuple (@pubsubfields) {
751 ## each tuple consists of the subfield code and the value
752 if (@$tuple[0] eq $pubsub_place) {
753 ## strip any trailing crap
754 $_ = @$tuple[1];
755 s% *[,;:/]$%%;
756 ## pool all occurrences in a list
757 push (@cities, $_);
759 elsif (@$tuple[0] eq $pubsub_publisher) {
760 ## strip any trailing crap
761 $_ = @$tuple[1];
762 s% *[,;:/]$%%;
763 ## pool all occurrences in a list
764 push (@publishers, $_);
766 elsif (@$tuple[0] eq $pubsub_date) {
767 ## the dates are free-form, so we want to extract
768 ## a four-digit year and leave the rest as
769 ## "other info"
770 my $protoyear = @$tuple[1];
771 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
773 ## strip any separator chars at the end
774 $protoyear =~ s% *[\.;:/]*$%%;
776 ## isolate a four-digit year. We discard anything
777 ## preceding the year, but keep everything after
778 ## the year as other info.
779 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
781 ## check what we've got. If there is no four-digit
782 ## year, make it up. If digits are replaced by '-',
783 ## replace those with 0s
785 if (index($protoyear, "/") == 4) {
786 ## have year info
787 ## replace all '-' in the four-digit year
788 ## by '0'
789 substr($protoyear,0,4) =~ s!-!0!g;
791 else {
792 ## have no year info
793 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
794 $protoyear = "0000///$protoyear";
795 warn("no four-digit year found, use 0000") if $marcprint;
798 if ($pycounter == 0 && length($protoyear)) {
799 print "PY - $protoyear\r\n";
801 elsif ($pycounter == 1 && length($_)) {
802 print "Y2 - $protoyear\r\n";
804 ## else: discard
806 ## else: discard
809 ## now dump the collected CY and PB lists
810 if (@cities > 0) {
811 print "CY - ", join(", ", @cities), "\r\n";
813 if (@publishers > 0) {
814 print "PB - ", join(", ", @publishers), "\r\n";
819 ##********************************************************************
820 ## get_keywords(): prints info from MARC fields 6XX
821 ## Arguments: list of fields (6XX)
822 ##********************************************************************
823 sub get_keywords {
824 my($fieldname, @keywords) = @_;
826 my @kw;
827 ## a list of all possible subfields
828 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
830 ## loop over all 6XX fields
831 foreach my $kwfield (@keywords) {
832 if ($kwfield != undef) {
833 ## authornames get special treatment
834 if ($fieldname eq "600") {
835 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
836 push @kw, $val;
837 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
839 else {
840 ## retrieve all available subfields
841 my @kwsubfields = $kwfield->subfields();
843 ## loop over all available subfield tuples
844 foreach my $kwtuple (@kwsubfields) {
845 ## loop over all subfields to check
846 foreach my $subfield (@subfields) {
847 ## [0] contains subfield code
848 if (@$kwtuple[0] eq $subfield) {
849 ## [1] contains value, remove trailing separators
850 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
851 if (length(@$kwtuple[1]) > 0) {
852 push @kw, @$kwtuple[1];
853 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
855 ## we can leave the subfields loop here
856 last;
863 return @kw;
866 ##********************************************************************
867 ## pool_subx(): adds contents of several subfields to a list
868 ## Arguments: reference to a list
869 ## field name
870 ## list of fields (5XX)
871 ##********************************************************************
872 sub pool_subx {
873 my($aref, $fieldname, @notefields) = @_;
875 ## we use a list that contains the interesting subfields
876 ## for each field
877 # ToDo: this is apparently correct only for marc21
878 my @subfields;
880 if ($fieldname eq "500") {
881 @subfields = ('a');
883 elsif ($fieldname eq "501") {
884 @subfields = ('a');
886 elsif ($fieldname eq "502") {
887 @subfields = ('a');
889 elsif ($fieldname eq "504") {
890 @subfields = ('a', 'b');
892 elsif ($fieldname eq "505") {
893 @subfields = ('a', 'g', 'r', 't', 'u');
895 elsif ($fieldname eq "506") {
896 @subfields = ('a', 'b', 'c', 'd', 'e');
898 elsif ($fieldname eq "507") {
899 @subfields = ('a', 'b');
901 elsif ($fieldname eq "508") {
902 @subfields = ('a');
904 elsif ($fieldname eq "510") {
905 @subfields = ('a', 'b', 'c', 'x', '3');
907 elsif ($fieldname eq "511") {
908 @subfields = ('a');
910 elsif ($fieldname eq "513") {
911 @subfields = ('a', 'b');
913 elsif ($fieldname eq "514") {
914 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
916 elsif ($fieldname eq "515") {
917 @subfields = ('a');
919 elsif ($fieldname eq "516") {
920 @subfields = ('a');
922 elsif ($fieldname eq "518") {
923 @subfields = ('a', '3');
925 elsif ($fieldname eq "521") {
926 @subfields = ('a', 'b', '3');
928 elsif ($fieldname eq "522") {
929 @subfields = ('a');
931 elsif ($fieldname eq "524") {
932 @subfields = ('a', '2', '3');
934 elsif ($fieldname eq "525") {
935 @subfields = ('a');
937 elsif ($fieldname eq "526") {
938 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
940 elsif ($fieldname eq "530") {
941 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
943 elsif ($fieldname eq "533") {
944 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
946 elsif ($fieldname eq "534") {
947 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
949 elsif ($fieldname eq "535") {
950 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
953 ## loop over all notefields
954 foreach my $notefield (@notefields) {
955 if (defined $notefield) {
956 ## retrieve all available subfield tuples
957 my @notesubfields = $notefield->subfields();
959 ## loop over all subfield tuples
960 foreach my $notetuple (@notesubfields) {
961 ## loop over all subfields to check
962 foreach my $subfield (@subfields) {
963 ## [0] contains subfield code
964 if (@$notetuple[0] eq $subfield) {
965 ## [1] contains value, remove trailing separators
966 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
967 @$notetuple[1] =~ s% *[,;.:/]*$%%;
968 if (length(@$notetuple[1]) > 0) {
969 ## add to list
970 push @{$aref}, @$notetuple[1];
972 last;
980 ##********************************************************************
981 ## print_abstract(): prints abstract fields
982 ## Arguments: list of fields (520)
983 ##********************************************************************
984 sub print_abstract {
985 # ToDo: take care of repeatable subfields
986 my(@abfields) = @_;
988 ## we check the following subfields
989 my @subfields = ('a', 'b');
991 ## we generate a list for all useful strings
992 my @abstrings;
994 ## loop over all abfields
995 foreach my $abfield (@abfields) {
996 foreach my $field (@subfields) {
997 if ( length( $abfield->subfield($field) ) > 0 ) {
998 my $ab = $abfield->subfield($field);
1000 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1002 ## strip trailing separators
1003 $ab =~ s% *[;,:./]*$%%;
1005 ## add string to the list
1006 push( @abstrings, $ab );
1011 my $allabs = join "; ", @abstrings;
1013 if (length($allabs) > 0) {
1014 print "N2 - ", $allabs, "\r\n";