4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
7 ## usage: perl marc2ris < infile.marc > outfile.ris
9 ## Dependencies: perl 5.6.0 or later
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
44 # Modified 2016 by Universidad de El Salvador
46 # This file is part of Koha.
48 # Koha is free software; you can redistribute it and/or modify it
49 # under the terms of the GNU General Public License as published by
50 # the Free Software Foundation; either version 3 of the License, or
51 # (at your option) any later version.
53 # Koha is distributed in the hope that it will be useful, but
54 # WITHOUT ANY WARRANTY; without even the implied warranty of
55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 # GNU General Public License for more details.
58 # You should have received a copy of the GNU General Public License
59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
65 use List
::MoreUtils qw
/uniq/;
66 use vars
qw(@ISA @EXPORT);
68 use Koha::SimpleMARC qw(read_field);
73 # only export API methods
79 our $marcprint = 0; # Debug flag;
81 =head1 marc2bibtex - Convert from UNIMARC to RIS
83 my ($ris) = marc2ris($record);
87 C<$record> - a MARC::Record object
94 my $marcflavour = C4
::Context
->preference("marcflavour");
95 my $intype = lc($marcflavour);
97 # Let's redirect stdout
98 open my $oldout, ">&STDOUT";
101 open STDOUT
,'>:encoding(utf8)', \
$outvar;
103 ## First we should check the character encoding. This may be
104 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
105 ## by 'a' at position 09 (zero-based) of the leader
106 my $leader = $record->leader();
107 if ( $intype eq "marc21" ) {
108 if ( $leader =~ /^.{9}a/ ) {
109 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
112 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
115 ## else: other MARC formats do not specify the character encoding
116 ## we assume it's *not* UTF-8
118 my $RisExportAdditionalFields = C4
::Context
->preference('RisExportAdditionalFields');
119 my $ris_additional_fields;
120 if ($RisExportAdditionalFields) {
121 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
122 $ris_additional_fields = eval { YAML
::Load
($RisExportAdditionalFields); };
124 warn "Unable to parse RisExportAdditionalFields : $@";
125 $ris_additional_fields = undef;
130 if ( $ris_additional_fields && $ris_additional_fields->{TY
} ) {
131 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY
} );
132 my ( $type ) = read_field
( { record
=> $record, field
=> $f, subfield
=> $sf, field_numbers
=> [1] } );
134 print "TY - $type\r\n";
137 &print_typetag
($leader);
141 &print_typetag
($leader);
144 ## retrieve all author fields and collect them in a list
147 if ($intype eq "unimarc") {
148 ## Fields 700, 701, and 702 can contain author names
149 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
151 else { ## marc21, ukmarc
152 ## Field 100 sometimes carries main author
153 ## Field(s) 700 carry added entries - personal names
154 @author_fields = ($record->field('100'), $record->field('700'));
157 ## loop over all author fields
158 foreach my $field (@author_fields) {
159 if (length($field)) {
160 my $author = &get_author
($field);
161 print "AU - ",$author,"\r\n";
165 # ToDo: should we specify anonymous as author if we didn't find
166 # one? or use one of the corporate/meeting names below?
168 ## add corporate names or meeting names as editors ??
171 if ($intype eq "unimarc") {
172 ## Fields 710, 711, and 712 can carry corporate names
173 ## Field(s) 720, 721, 722, 730 have additional candidates
174 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
176 else { ## marc21, ukmarc
177 ## Fields 110 and 111 carry the main entries - corporate name and
178 ## meeting name, respectively
179 ## Field(s) 710, 711 carry added entries - personal names
180 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
183 ## loop over all editor fields
184 foreach my $field (@editor_fields) {
185 if (length($field)) {
186 my $editor = &get_editor
($field);
187 print "ED - ",$editor,"\r\n";
191 ## get info from the title field
192 if ($intype eq "unimarc") {
193 &print_title
($record->field('200'));
195 else { ## marc21, ukmarc
196 &print_title
($record->field('245'));
200 if ($intype eq "unimarc") {
201 &print_stitle
($record->field('225'));
203 else { ## marc21, ukmarc
204 &print_stitle
($record->field('490'));
208 if ($intype eq "unimarc") {
209 &print_isbn
($record->field('010'));
210 &print_issn
($record->field('011'));
212 elsif ($intype eq "ukmarc") {
213 &print_isbn
($record->field('021'));
214 ## this is just an assumption
215 &print_issn
($record->field('022'));
217 else { ## assume marc21
218 &print_isbn
($record->field('020'));
219 &print_issn
($record->field('022'));
222 if ($intype eq "marc21") {
223 &print_loc_callno
($record->field('050'));
224 &print_dewey
($record->field('082'));
226 ## else: unimarc, ukmarc do not seem to store call numbers?
229 if ($intype eq "unimarc") {
230 &print_pubinfo
($record->field('210'));
232 else { ## marc21, ukmarc
233 if ($record->field('264')) {
234 &print_pubinfo
($record->field('264'));
237 &print_pubinfo
($record->field('260'));
241 ## 6XX fields contain KW candidates. We add all of them to a
244 if ($intype eq "unimarc") {
245 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
246 } elsif ($intype eq "ukmarc") {
247 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
248 } else { ## assume marc21
249 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
253 for my $f ( @field_list ) {
254 my @fields = $record->field($f);
255 push @kwpool, ( get_keywords
("$f",$record->field($f)) );
259 @kwpool = uniq
@kwpool;
261 for my $kw ( @kwpool ) {
262 print "KW - ", $kw, "\r\n";
265 ## 5XX have various candidates for notes and abstracts. We pool
266 ## all notes-like stuff in one list.
269 ## these fields have notes candidates
270 if ($intype eq "unimarc") {
271 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
272 &pool_subx
(\
@notepool, $_, $record->field($_));
275 elsif ($intype eq "ukmarc") {
276 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
277 &pool_subx
(\
@notepool, $_, $record->field($_));
280 else { ## assume marc21
281 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
282 &pool_subx
(\
@notepool, $_, $record->field($_));
286 my $allnotes = join "; ", @notepool;
288 if (length($allnotes) > 0) {
289 print "N1 - ", $allnotes, "\r\n";
292 ## 320/520 have the abstract
293 if ($intype eq "unimarc") {
294 &print_abstract
($record->field('320'));
296 elsif ($intype eq "ukmarc") {
297 &print_abstract
($record->field('512'), $record->field('513'));
299 else { ## assume marc21
300 &print_abstract
($record->field('520'));
304 if ($record->field('856')) {
305 print_uri
($record->field('856'));
308 if ($ris_additional_fields) {
309 foreach my $ris_tag ( keys %$ris_additional_fields ) {
310 next if $ris_tag eq 'TY';
313 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
314 ? @
{ $ris_additional_fields->{$ris_tag} }
315 : $ris_additional_fields->{$ris_tag};
317 for my $tag (@fields) {
318 my ( $f, $sf ) = split( /\$/, $tag );
319 my @values = read_field
( { record
=> $record, field
=> $f, subfield
=> $sf } );
320 foreach my $v (@values) {
321 print "$ris_tag - $v\r\n";
330 # Let's re-redirect stdout
332 open STDOUT
, ">&", $oldout;
339 ##********************************************************************
340 ## print_typetag(): prints the first line of a RIS dataset including
341 ## the preceding newline
342 ## Argument: the leader of a MARC dataset
343 ## Returns: the value at leader position 06
344 ##********************************************************************
347 ## the keys of typehash are the allowed values at position 06
348 ## of the leader of a MARC record, the values are the RIS types
349 ## that might appropriately represent these types.
383 ## The type of a MARC record is found at position 06 of the leader
384 my $typeofrecord = defined($leader) && length $leader >=6 ?
385 substr($leader, 6, 1): undef;
386 ## Pos 07 == Bibliographic level
387 my $biblevel = defined($leader) && length $leader >=7 ?
388 substr($leader, 7, 1): '';
390 ## TODO: for books, field 008 positions 24-27 might have a few more
394 my $marcflavour = C4
::Context
->preference("marcflavour");
395 my $intype = lc($marcflavour);
396 if ($intype eq "unimarc") {
397 %typehash = %unitypehash;
400 %typehash = %ustypehash;
403 if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
404 print "TY - GEN\r\n"; ## most reasonable default
405 warn ("no type found - assume GEN") if $marcprint;
406 } elsif ( $typeofrecord =~ "a" ) {
407 if ( $biblevel eq 'a' ) {
408 print "TY - GEN\r\n"; ## monographic component part
409 } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
410 print "TY - SER\r\n"; ## serial or serial component part
411 } elsif ( $biblevel eq 'm' ) {
412 print "TY - $typehash{$typeofrecord}\r\n"; ## book
413 } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
414 print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
415 } elsif ( $biblevel eq 'i' ) {
416 print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
419 print "TY - $typehash{$typeofrecord}\r\n";
422 ## use $typeofrecord as the return value, just in case
426 ##********************************************************************
427 ## normalize_author(): normalizes an authorname
428 ## Arguments: authorname subfield a
429 ## authorname subfield b
430 ## authorname subfield c
431 ## name type if known: 0=direct order
432 ## 1=only surname or full name in
434 ## 3=family, clan, dynasty name
435 ## Returns: the normalized authorname
436 ##********************************************************************
437 sub normalize_author
{
438 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
440 if ($nametype == 0) {
441 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
442 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
445 elsif ($nametype == 1) {
446 ## start munging subfield a (the real name part)
447 ## remove spaces after separators
448 $rawauthora =~ s
%([,.]+) *%$1%g;
450 ## remove trailing separators after spaces
451 $rawauthora =~ s
% *[,;:/]*$%%;
453 ## remove periods after a non-abbreviated name
454 $rawauthora =~ s
%(\w
{2,})\
.%$1%g;
456 ## start munging subfield b (something like the suffix)
457 ## remove trailing separators after spaces
458 $rawauthorb =~ s
% *[,;:/]*$%%;
460 ## we currently ignore subfield c until someone complains
461 if (length($rawauthorb) > 0) {
462 return join ", ", ($rawauthora, $rawauthorb);
468 elsif ($nametype == 3) {
473 ##********************************************************************
474 ## get_author(): gets authorname info from MARC fields 100, 700
475 ## Argument: field (100 or 700)
476 ## Returns: an author string in the format found in the record
477 ##********************************************************************
479 my ($authorfield) = @_;
482 ## the sequence of the name parts is encoded either in indicator
483 ## 1 (marc21) or 2 (unimarc)
484 my $marcflavour = C4
::Context
->preference("marcflavour");
485 my $intype = lc($marcflavour);
486 if ($intype eq "unimarc") {
489 else { ## assume marc21
493 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
494 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
495 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
496 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
497 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
498 if ($intype eq "ukmarc") {
499 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
500 normalize_author
($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
503 normalize_author
($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
507 ##********************************************************************
508 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
509 ## Argument: field (110, 111, 710, or 711)
510 ## Returns: an author string in the format found in the record
511 ##********************************************************************
513 my ($editorfield) = @_;
519 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
520 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
521 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
522 return $editorfield->subfield('a');
526 ##********************************************************************
527 ## print_title(): gets info from MARC field 245
528 ## Arguments: field (245)
530 ##********************************************************************
532 my ($titlefield) = @_;
534 print "<marc>empty title field (245)\r\n" if $marcprint;
535 warn("empty title field (245)") if $marcprint;
538 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
539 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
540 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
542 ## The title is usually written in a very odd notation. The title
543 ## proper ($a) often ends with a space followed by a separator like
544 ## a slash or a colon. The subtitle ($b) doesn't start with a space
545 ## so simple concatenation looks odd. We have to conditionally remove
546 ## the separator and make sure there's a space between title and
549 my $clean_title = $titlefield->subfield('a');
551 my $clean_subtitle = $titlefield->subfield('b');
552 $clean_subtitle ||= q{};
553 $clean_title =~ s
% *[/:;.]$%%;
554 $clean_subtitle =~ s
%^ *(.*) *[/:;.]$%$1%;
556 my $marcflavour = C4
::Context
->preference("marcflavour");
557 my $intype = lc($marcflavour);
558 if (length($clean_title) > 0
559 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
560 print "TI - ", $clean_title;
562 ## subfield $b is relevant only for marc21/ukmarc
563 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
564 print ": ",$clean_subtitle;
569 ## The statement of responsibility is just this: horrors. There is
570 ## no formal definition how authors, editors and the like should
571 ## be written and designated. The field is free-form and resistant
572 ## to all parsing efforts, so this information is lost on me
577 ##********************************************************************
578 ## print_stitle(): prints info from series title field
581 ##********************************************************************
583 my ($titlefield) = @_;
586 print "<marc>empty series title field\r\n" if $marcprint;
589 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
590 my $clean_title = $titlefield->subfield('a');
592 $clean_title =~ s
% *[/:;.]$%%;
594 if (length($clean_title) > 0) {
595 print "T2 - ", $clean_title,"\r\n";
598 my $marcflavour = C4
::Context
->preference("marcflavour");
599 my $intype = lc($marcflavour);
600 if ($intype eq "unimarc") {
601 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
602 if (length($titlefield->subfield('v')) > 0) {
603 print "VL - ", $titlefield->subfield('v'),"\r\n";
610 ##********************************************************************
611 ## print_isbn(): gets info from MARC field 020
612 ## Arguments: field (020)
613 ##********************************************************************
617 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
618 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
619 warn("no isbn found") if $marcprint;
622 if (length ($isbnfield->subfield('a')) < 10) {
623 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
624 warn("truncated isbn") if $marcprint;
627 my $isbn = $isbnfield->subfield('a');
628 print "SN - ", $isbn, "\r\n";
632 ##********************************************************************
633 ## print_issn(): gets info from MARC field 022
634 ## Arguments: field (022)
635 ##********************************************************************
639 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
640 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
641 warn("no issn found") if $marcprint;
644 if (length ($issnfield->subfield('a')) < 9) {
645 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
646 warn("truncated issn") if $marcprint;
649 my $issn = substr($issnfield->subfield('a'), 0, 9);
650 print "SN - ", $issn, "\r\n";
655 # print_uri() prints info from 856 u
660 foreach my $f856 (@f856s) {
661 if (my $uri = $f856->subfield('u')) {
662 print "UR - ", $uri, "\r\n";
667 ##********************************************************************
668 ## print_loc_callno(): gets info from MARC field 050
669 ## Arguments: field (050)
670 ##********************************************************************
671 sub print_loc_callno
{
672 my($callnofield) = @_;
674 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
675 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
676 warn("no LOC call number found") if $marcprint;
679 print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
683 ##********************************************************************
684 ## print_dewey(): gets info from MARC field 082
685 ## Arguments: field (082)
686 ##********************************************************************
688 my($deweyfield) = @_;
690 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
691 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
692 warn("no Dewey number found") if $marcprint;
695 print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
699 ##********************************************************************
700 ## print_pubinfo(): gets info from MARC field 260
701 ## Arguments: field (260)
702 ##********************************************************************
704 my($pubinfofield) = @_;
706 if (!$pubinfofield) {
707 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
708 warn("no publication information found") if $marcprint;
711 ## the following information is available in MARC21:
713 ## $b publisher -> PB
715 ## the corresponding subfields for UNIMARC:
717 ## $c publisher -> PB
720 ## all of them are repeatable. We pool all places into a
721 ## comma-separated list in CY. We also pool all publishers
722 ## into a comma-separated list in PB. We break the rule with
723 ## the date field because this wouldn't make much sense. In
724 ## this case, we use the first occurrence for PY, the second
725 ## for Y2, and ignore the rest
727 my @pubsubfields = $pubinfofield->subfields();
733 my $pubsub_publisher;
736 my $marcflavour = C4
::Context
->preference("marcflavour");
737 my $intype = lc($marcflavour);
738 if ($intype eq "unimarc") {
740 $pubsub_publisher = "c";
743 else { ## assume marc21
745 $pubsub_publisher = "b";
749 ## loop over all subfield list entries
750 for my $tuple (@pubsubfields) {
751 ## each tuple consists of the subfield code and the value
752 if (@
$tuple[0] eq $pubsub_place) {
753 ## strip any trailing crap
756 ## pool all occurrences in a list
759 elsif (@
$tuple[0] eq $pubsub_publisher) {
760 ## strip any trailing crap
763 ## pool all occurrences in a list
764 push (@publishers, $_);
766 elsif (@
$tuple[0] eq $pubsub_date) {
767 ## the dates are free-form, so we want to extract
768 ## a four-digit year and leave the rest as
770 my $protoyear = @
$tuple[1];
771 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
773 ## strip any separator chars at the end
774 $protoyear =~ s
% *[\
.;:/]*$%%;
776 ## isolate a four-digit year. We discard anything
777 ## preceding the year, but keep everything after
778 ## the year as other info.
779 $protoyear =~ s
%\D
*([0-9\
-]{4})(.*)%$1///$2%;
781 ## check what we've got. If there is no four-digit
782 ## year, make it up. If digits are replaced by '-',
783 ## replace those with 0s
785 if (index($protoyear, "/") == 4) {
787 ## replace all '-' in the four-digit year
789 substr($protoyear,0,4) =~ s!-!0!g;
793 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
794 $protoyear = "0000///$protoyear";
795 warn("no four-digit year found, use 0000") if $marcprint;
798 if ($pycounter == 0 && length($protoyear)) {
799 print "PY - $protoyear\r\n";
801 elsif ($pycounter == 1 && length($_)) {
802 print "Y2 - $protoyear\r\n";
809 ## now dump the collected CY and PB lists
811 print "CY - ", join(", ", @cities), "\r\n";
813 if (@publishers > 0) {
814 print "PB - ", join(", ", @publishers), "\r\n";
819 ##********************************************************************
820 ## get_keywords(): prints info from MARC fields 6XX
821 ## Arguments: list of fields (6XX)
822 ##********************************************************************
824 my($fieldname, @keywords) = @_;
827 ## a list of all possible subfields
828 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
830 ## loop over all 6XX fields
831 foreach my $kwfield (@keywords) {
832 if ($kwfield != undef) {
833 ## authornames get special treatment
834 if ($fieldname eq "600") {
835 my $val = normalize_author
($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
837 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
840 ## retrieve all available subfields
841 my @kwsubfields = $kwfield->subfields();
843 ## loop over all available subfield tuples
844 foreach my $kwtuple (@kwsubfields) {
845 ## loop over all subfields to check
846 foreach my $subfield (@subfields) {
847 ## [0] contains subfield code
848 if (@
$kwtuple[0] eq $subfield) {
849 ## [1] contains value, remove trailing separators
850 @
$kwtuple[1] =~ s
% *[,;.:/]*$%%;
851 if (length(@
$kwtuple[1]) > 0) {
852 push @kw, @
$kwtuple[1];
853 print "<marc>Field $fieldname subfield $subfield:", @
$kwtuple[1], "\r\n" if $marcprint;
855 ## we can leave the subfields loop here
866 ##********************************************************************
867 ## pool_subx(): adds contents of several subfields to a list
868 ## Arguments: reference to a list
870 ## list of fields (5XX)
871 ##********************************************************************
873 my($aref, $fieldname, @notefields) = @_;
875 ## we use a list that contains the interesting subfields
877 # ToDo: this is apparently correct only for marc21
880 if ($fieldname eq "500") {
883 elsif ($fieldname eq "501") {
886 elsif ($fieldname eq "502") {
889 elsif ($fieldname eq "504") {
890 @subfields = ('a', 'b');
892 elsif ($fieldname eq "505") {
893 @subfields = ('a', 'g', 'r', 't', 'u');
895 elsif ($fieldname eq "506") {
896 @subfields = ('a', 'b', 'c', 'd', 'e');
898 elsif ($fieldname eq "507") {
899 @subfields = ('a', 'b');
901 elsif ($fieldname eq "508") {
904 elsif ($fieldname eq "510") {
905 @subfields = ('a', 'b', 'c', 'x', '3');
907 elsif ($fieldname eq "511") {
910 elsif ($fieldname eq "513") {
911 @subfields = ('a', 'b');
913 elsif ($fieldname eq "514") {
914 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
916 elsif ($fieldname eq "515") {
919 elsif ($fieldname eq "516") {
922 elsif ($fieldname eq "518") {
923 @subfields = ('a', '3');
925 elsif ($fieldname eq "521") {
926 @subfields = ('a', 'b', '3');
928 elsif ($fieldname eq "522") {
931 elsif ($fieldname eq "524") {
932 @subfields = ('a', '2', '3');
934 elsif ($fieldname eq "525") {
937 elsif ($fieldname eq "526") {
938 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
940 elsif ($fieldname eq "530") {
941 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
943 elsif ($fieldname eq "533") {
944 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
946 elsif ($fieldname eq "534") {
947 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
949 elsif ($fieldname eq "535") {
950 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
953 ## loop over all notefields
954 foreach my $notefield (@notefields) {
955 if (defined $notefield) {
956 ## retrieve all available subfield tuples
957 my @notesubfields = $notefield->subfields();
959 ## loop over all subfield tuples
960 foreach my $notetuple (@notesubfields) {
961 ## loop over all subfields to check
962 foreach my $subfield (@subfields) {
963 ## [0] contains subfield code
964 if (@
$notetuple[0] eq $subfield) {
965 ## [1] contains value, remove trailing separators
966 print "<marc>field $fieldname subfield $subfield: ", @
$notetuple[1], "\r\n" if $marcprint;
967 @
$notetuple[1] =~ s
% *[,;.:/]*$%%;
968 if (length(@
$notetuple[1]) > 0) {
970 push @
{$aref}, @
$notetuple[1];
980 ##********************************************************************
981 ## print_abstract(): prints abstract fields
982 ## Arguments: list of fields (520)
983 ##********************************************************************
985 # ToDo: take care of repeatable subfields
988 ## we check the following subfields
989 my @subfields = ('a', 'b');
991 ## we generate a list for all useful strings
994 ## loop over all abfields
995 foreach my $abfield (@abfields) {
996 foreach my $field (@subfields) {
997 if ( length( $abfield->subfield($field) ) > 0 ) {
998 my $ab = $abfield->subfield($field);
1000 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1002 ## strip trailing separators
1003 $ab =~ s
% *[;,:./]*$%%;
1005 ## add string to the list
1006 push( @abstrings, $ab );
1011 my $allabs = join "; ", @abstrings;
1013 if (length($allabs) > 0) {
1014 print "N2 - ", $allabs, "\r\n";