4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
7 ## usage: perl marc2ris < infile.marc > outfile.ris
9 ## Dependencies: perl 5.6.0 or later
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
44 # Modified 2016 by Universidad de El Salvador
46 # This file is part of Koha.
48 # Koha is free software; you can redistribute it and/or modify it
49 # under the terms of the GNU General Public License as published by
50 # the Free Software Foundation; either version 3 of the License, or
51 # (at your option) any later version.
53 # Koha is distributed in the hope that it will be useful, but
54 # WITHOUT ANY WARRANTY; without even the implied warranty of
55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
56 # GNU General Public License for more details.
58 # You should have received a copy of the GNU General Public License
59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
65 use List
::MoreUtils qw
/uniq/;
66 use vars
qw(@ISA @EXPORT);
68 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
69 use Koha
::SimpleMARC
qw(read_field);
74 # only export API methods
80 our $marcprint = 0; # Debug flag;
82 =head1 marc2bibtex - Convert from UNIMARC to RIS
84 my ($ris) = marc2ris($record);
88 C<$record> - a MARC::Record object
96 my $marcflavour = C4
::Context
->preference("marcflavour");
97 my $intype = lc($marcflavour);
99 # Let's redirect stdout
100 open my $oldout, ">&STDOUT";
103 open STDOUT
,'>:encoding(utf8)', \
$outvar;
105 ## First we should check the character encoding. This may be
106 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
107 ## by 'a' at position 09 (zero-based) of the leader
108 my $leader = $record->leader();
109 if ( $intype eq "marc21" ) {
110 if ( $leader =~ /^.{9}a/ ) {
111 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
114 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
117 ## else: other MARC formats do not specify the character encoding
118 ## we assume it's *not* UTF-8
120 my $RisExportAdditionalFields = C4
::Context
->preference('RisExportAdditionalFields');
121 my $ris_additional_fields;
122 if ($RisExportAdditionalFields) {
123 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
124 $ris_additional_fields = eval { YAML
::Load
($RisExportAdditionalFields); };
126 warn "Unable to parse RisExportAdditionalFields : $@";
127 $ris_additional_fields = undef;
132 if ( $ris_additional_fields && $ris_additional_fields->{TY
} ) {
133 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY
} );
134 my ( $type ) = read_field
( { record
=> $record, field
=> $f, subfield
=> $sf, field_numbers
=> [1] } );
136 print "TY - $type\r\n";
139 &print_typetag
($leader);
143 &print_typetag
($leader);
146 ## retrieve all author fields and collect them in a list
149 if ($intype eq "unimarc") {
150 ## Fields 700, 701, and 702 can contain author names
151 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
153 else { ## marc21, ukmarc
154 ## Field 100 sometimes carries main author
155 ## Field(s) 700 carry added entries - personal names
156 @author_fields = ($record->field('100'), $record->field('700'));
159 ## loop over all author fields
160 foreach my $field (@author_fields) {
161 if (length($field)) {
162 my $author = &get_author
($field);
163 print "AU - ",$author,"\r\n";
167 # ToDo: should we specify anonymous as author if we didn't find
168 # one? or use one of the corporate/meeting names below?
170 ## add corporate names or meeting names as editors ??
173 if ($intype eq "unimarc") {
174 ## Fields 710, 711, and 712 can carry corporate names
175 ## Field(s) 720, 721, 722, 730 have additional candidates
176 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
178 else { ## marc21, ukmarc
179 ## Fields 110 and 111 carry the main entries - corporate name and
180 ## meeting name, respectively
181 ## Field(s) 710, 711 carry added entries - personal names
182 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
185 ## loop over all editor fields
186 foreach my $field (@editor_fields) {
187 if (length($field)) {
188 my $editor = &get_editor
($field);
189 print "ED - ",$editor,"\r\n";
193 ## get info from the title field
194 if ($intype eq "unimarc") {
195 &print_title
($record->field('200'));
197 else { ## marc21, ukmarc
198 &print_title
($record->field('245'));
202 if ($intype eq "unimarc") {
203 &print_stitle
($record->field('225'));
205 else { ## marc21, ukmarc
206 &print_stitle
($record->field('490'));
210 if ($intype eq "unimarc") {
211 &print_isbn
($record->field('010'));
212 &print_issn
($record->field('011'));
214 elsif ($intype eq "ukmarc") {
215 &print_isbn
($record->field('021'));
216 ## this is just an assumption
217 &print_issn
($record->field('022'));
219 else { ## assume marc21
220 &print_isbn
($record->field('020'));
221 &print_issn
($record->field('022'));
224 if ($intype eq "marc21") {
225 &print_loc_callno
($record->field('050'));
226 &print_dewey
($record->field('082'));
228 ## else: unimarc, ukmarc do not seem to store call numbers?
231 if ($intype eq "unimarc") {
232 &print_pubinfo
($record->field('210'));
234 else { ## marc21, ukmarc
235 if ($record->field('264')) {
236 &print_pubinfo
($record->field('264'));
239 &print_pubinfo
($record->field('260'));
243 ## 6XX fields contain KW candidates. We add all of them to a
246 if ($intype eq "unimarc") {
247 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
248 } elsif ($intype eq "ukmarc") {
249 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
250 } else { ## assume marc21
251 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
255 for my $f ( @field_list ) {
256 my @fields = $record->field($f);
257 push @kwpool, ( get_keywords
("$f",$record->field($f)) );
261 @kwpool = uniq
@kwpool;
263 for my $kw ( @kwpool ) {
264 print "KW - ", $kw, "\r\n";
267 ## 5XX have various candidates for notes and abstracts. We pool
268 ## all notes-like stuff in one list.
271 ## these fields have notes candidates
272 if ($intype eq "unimarc") {
273 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
274 &pool_subx
(\
@notepool, $_, $record->field($_));
277 elsif ($intype eq "ukmarc") {
278 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
279 &pool_subx
(\
@notepool, $_, $record->field($_));
282 else { ## assume marc21
283 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
284 &pool_subx
(\
@notepool, $_, $record->field($_));
288 my $allnotes = join "; ", @notepool;
290 if (length($allnotes) > 0) {
291 print "N1 - ", $allnotes, "\r\n";
294 ## 320/520 have the abstract
295 if ($intype eq "unimarc") {
296 &print_abstract
($record->field('320'));
298 elsif ($intype eq "ukmarc") {
299 &print_abstract
($record->field('512'), $record->field('513'));
301 else { ## assume marc21
302 &print_abstract
($record->field('520'));
306 if ($record->field('856')) {
307 print_uri
($record->field('856'));
310 if ($ris_additional_fields) {
311 foreach my $ris_tag ( keys %$ris_additional_fields ) {
312 next if $ris_tag eq 'TY';
315 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
316 ? @
{ $ris_additional_fields->{$ris_tag} }
317 : $ris_additional_fields->{$ris_tag};
319 for my $tag (@fields) {
320 my ( $f, $sf ) = split( /\$/, $tag );
321 my @values = read_field
( { record
=> $record, field
=> $f, subfield
=> $sf } );
322 foreach my $v (@values) {
323 print "$ris_tag - $v\r\n";
332 # Let's re-redirect stdout
334 open STDOUT
, ">&", $oldout;
341 ##********************************************************************
342 ## print_typetag(): prints the first line of a RIS dataset including
343 ## the preceding newline
344 ## Argument: the leader of a MARC dataset
345 ## Returns: the value at leader position 06
346 ##********************************************************************
349 ## the keys of typehash are the allowed values at position 06
350 ## of the leader of a MARC record, the values are the RIS types
351 ## that might appropriately represent these types.
385 ## The type of a MARC record is found at position 06 of the leader
386 my $typeofrecord = defined($leader) && length $leader >=6 ?
387 substr($leader, 6, 1): undef;
388 ## Pos 07 == Bibliographic level
389 my $biblevel = defined($leader) && length $leader >=7 ?
390 substr($leader, 7, 1): '';
392 ## TODO: for books, field 008 positions 24-27 might have a few more
396 my $marcflavour = C4
::Context
->preference("marcflavour");
397 my $intype = lc($marcflavour);
398 if ($intype eq "unimarc") {
399 %typehash = %unitypehash;
402 %typehash = %ustypehash;
405 if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
406 print "TY - GEN\r\n"; ## most reasonable default
407 warn ("no type found - assume GEN") if $marcprint;
408 } elsif ( $typeofrecord =~ "a" ) {
409 if ( $biblevel eq 'a' ) {
410 print "TY - GEN\r\n"; ## monographic component part
411 } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
412 print "TY - SER\r\n"; ## serial or serial component part
413 } elsif ( $biblevel eq 'm' ) {
414 print "TY - $typehash{$typeofrecord}\r\n"; ## book
415 } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
416 print "TY - GEN\r\n"; ## collections, part of collections or made-up collections
417 } elsif ( $biblevel eq 'i' ) {
418 print "TY - DATA\r\n"; ## updating loose-leafe as Dataset
421 print "TY - $typehash{$typeofrecord}\r\n";
424 ## use $typeofrecord as the return value, just in case
428 ##********************************************************************
429 ## normalize_author(): normalizes an authorname
430 ## Arguments: authorname subfield a
431 ## authorname subfield b
432 ## authorname subfield c
433 ## name type if known: 0=direct order
434 ## 1=only surname or full name in
436 ## 3=family, clan, dynasty name
437 ## Returns: the normalized authorname
438 ##********************************************************************
439 sub normalize_author
{
440 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
442 if ($nametype == 0) {
443 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
444 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
447 elsif ($nametype == 1) {
448 ## start munging subfield a (the real name part)
449 ## remove spaces after separators
450 $rawauthora =~ s
%([,.]+) *%$1%g;
452 ## remove trailing separators after spaces
453 $rawauthora =~ s
% *[,;:/]*$%%;
455 ## remove periods after a non-abbreviated name
456 $rawauthora =~ s
%(\w
{2,})\
.%$1%g;
458 ## start munging subfield b (something like the suffix)
459 ## remove trailing separators after spaces
460 $rawauthorb =~ s
% *[,;:/]*$%%;
462 ## we currently ignore subfield c until someone complains
463 if (length($rawauthorb) > 0) {
464 return join ", ", ($rawauthora, $rawauthorb);
470 elsif ($nametype == 3) {
475 ##********************************************************************
476 ## get_author(): gets authorname info from MARC fields 100, 700
477 ## Argument: field (100 or 700)
478 ## Returns: an author string in the format found in the record
479 ##********************************************************************
481 my ($authorfield) = @_;
484 ## the sequence of the name parts is encoded either in indicator
485 ## 1 (marc21) or 2 (unimarc)
486 my $marcflavour = C4
::Context
->preference("marcflavour");
487 my $intype = lc($marcflavour);
488 if ($intype eq "unimarc") {
491 else { ## assume marc21
495 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
496 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
497 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
498 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
499 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
500 if ($intype eq "ukmarc") {
501 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
502 normalize_author
($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
505 normalize_author
($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
509 ##********************************************************************
510 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
511 ## Argument: field (110, 111, 710, or 711)
512 ## Returns: an author string in the format found in the record
513 ##********************************************************************
515 my ($editorfield) = @_;
521 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
522 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
523 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
524 return $editorfield->subfield('a');
528 ##********************************************************************
529 ## print_title(): gets info from MARC field 245
530 ## Arguments: field (245)
532 ##********************************************************************
534 my ($titlefield) = @_;
536 print "<marc>empty title field (245)\r\n" if $marcprint;
537 warn("empty title field (245)") if $marcprint;
540 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
541 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
542 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
544 ## The title is usually written in a very odd notation. The title
545 ## proper ($a) often ends with a space followed by a separator like
546 ## a slash or a colon. The subtitle ($b) doesn't start with a space
547 ## so simple concatenation looks odd. We have to conditionally remove
548 ## the separator and make sure there's a space between title and
551 my $clean_title = $titlefield->subfield('a');
553 my $clean_subtitle = $titlefield->subfield('b');
554 $clean_subtitle ||= q{};
555 $clean_title =~ s
% *[/:;.]$%%;
556 $clean_subtitle =~ s
%^ *(.*) *[/:;.]$%$1%;
558 my $marcflavour = C4
::Context
->preference("marcflavour");
559 my $intype = lc($marcflavour);
560 if (length($clean_title) > 0
561 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
562 print "TI - ", $clean_title;
564 ## subfield $b is relevant only for marc21/ukmarc
565 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
566 print ": ",$clean_subtitle;
571 ## The statement of responsibility is just this: horrors. There is
572 ## no formal definition how authors, editors and the like should
573 ## be written and designated. The field is free-form and resistant
574 ## to all parsing efforts, so this information is lost on me
579 ##********************************************************************
580 ## print_stitle(): prints info from series title field
583 ##********************************************************************
585 my ($titlefield) = @_;
588 print "<marc>empty series title field\r\n" if $marcprint;
591 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
592 my $clean_title = $titlefield->subfield('a');
594 $clean_title =~ s
% *[/:;.]$%%;
596 if (length($clean_title) > 0) {
597 print "T2 - ", $clean_title,"\r\n";
600 my $marcflavour = C4
::Context
->preference("marcflavour");
601 my $intype = lc($marcflavour);
602 if ($intype eq "unimarc") {
603 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
604 if (length($titlefield->subfield('v')) > 0) {
605 print "VL - ", $titlefield->subfield('v'),"\r\n";
612 ##********************************************************************
613 ## print_isbn(): gets info from MARC field 020
614 ## Arguments: field (020)
615 ##********************************************************************
619 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
620 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
621 warn("no isbn found") if $marcprint;
624 if (length ($isbnfield->subfield('a')) < 10) {
625 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
626 warn("truncated isbn") if $marcprint;
629 my $isbn = $isbnfield->subfield('a');
630 print "SN - ", $isbn, "\r\n";
634 ##********************************************************************
635 ## print_issn(): gets info from MARC field 022
636 ## Arguments: field (022)
637 ##********************************************************************
641 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
642 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
643 warn("no issn found") if $marcprint;
646 if (length ($issnfield->subfield('a')) < 9) {
647 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
648 warn("truncated issn") if $marcprint;
651 my $issn = substr($issnfield->subfield('a'), 0, 9);
652 print "SN - ", $issn, "\r\n";
657 # print_uri() prints info from 856 u
662 foreach my $f856 (@f856s) {
663 if (my $uri = $f856->subfield('u')) {
664 print "UR - ", $uri, "\r\n";
669 ##********************************************************************
670 ## print_loc_callno(): gets info from MARC field 050
671 ## Arguments: field (050)
672 ##********************************************************************
673 sub print_loc_callno
{
674 my($callnofield) = @_;
676 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
677 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
678 warn("no LOC call number found") if $marcprint;
681 print "AV - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
685 ##********************************************************************
686 ## print_dewey(): gets info from MARC field 082
687 ## Arguments: field (082)
688 ##********************************************************************
690 my($deweyfield) = @_;
692 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
693 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
694 warn("no Dewey number found") if $marcprint;
697 print "U1 - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
701 ##********************************************************************
702 ## print_pubinfo(): gets info from MARC field 260
703 ## Arguments: field (260)
704 ##********************************************************************
706 my($pubinfofield) = @_;
708 if (!$pubinfofield) {
709 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
710 warn("no publication information found") if $marcprint;
713 ## the following information is available in MARC21:
715 ## $b publisher -> PB
717 ## the corresponding subfields for UNIMARC:
719 ## $c publisher -> PB
722 ## all of them are repeatable. We pool all places into a
723 ## comma-separated list in CY. We also pool all publishers
724 ## into a comma-separated list in PB. We break the rule with
725 ## the date field because this wouldn't make much sense. In
726 ## this case, we use the first occurrence for PY, the second
727 ## for Y2, and ignore the rest
729 my @pubsubfields = $pubinfofield->subfields();
735 my $pubsub_publisher;
738 my $marcflavour = C4
::Context
->preference("marcflavour");
739 my $intype = lc($marcflavour);
740 if ($intype eq "unimarc") {
742 $pubsub_publisher = "c";
745 else { ## assume marc21
747 $pubsub_publisher = "b";
751 ## loop over all subfield list entries
752 for my $tuple (@pubsubfields) {
753 ## each tuple consists of the subfield code and the value
754 if (@
$tuple[0] eq $pubsub_place) {
755 ## strip any trailing crap
758 ## pool all occurrences in a list
761 elsif (@
$tuple[0] eq $pubsub_publisher) {
762 ## strip any trailing crap
765 ## pool all occurrences in a list
766 push (@publishers, $_);
768 elsif (@
$tuple[0] eq $pubsub_date) {
769 ## the dates are free-form, so we want to extract
770 ## a four-digit year and leave the rest as
772 my $protoyear = @
$tuple[1];
773 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
775 ## strip any separator chars at the end
776 $protoyear =~ s
% *[\
.;:/]*$%%;
778 ## isolate a four-digit year. We discard anything
779 ## preceding the year, but keep everything after
780 ## the year as other info.
781 $protoyear =~ s
%\D
*([0-9\
-]{4})(.*)%$1///$2%;
783 ## check what we've got. If there is no four-digit
784 ## year, make it up. If digits are replaced by '-',
785 ## replace those with 0s
787 if (index($protoyear, "/") == 4) {
789 ## replace all '-' in the four-digit year
791 substr($protoyear,0,4) =~ s!-!0!g;
795 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
796 $protoyear = "0000///$protoyear";
797 warn("no four-digit year found, use 0000") if $marcprint;
800 if ($pycounter == 0 && length($protoyear)) {
801 print "PY - $protoyear\r\n";
803 elsif ($pycounter == 1 && length($_)) {
804 print "Y2 - $protoyear\r\n";
811 ## now dump the collected CY and PB lists
813 print "CY - ", join(", ", @cities), "\r\n";
815 if (@publishers > 0) {
816 print "PB - ", join(", ", @publishers), "\r\n";
821 ##********************************************************************
822 ## get_keywords(): prints info from MARC fields 6XX
823 ## Arguments: list of fields (6XX)
824 ##********************************************************************
826 my($fieldname, @keywords) = @_;
829 ## a list of all possible subfields
830 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
832 ## loop over all 6XX fields
833 foreach my $kwfield (@keywords) {
834 if ($kwfield != undef) {
835 ## authornames get special treatment
836 if ($fieldname eq "600") {
837 my $val = normalize_author
($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
839 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
842 ## retrieve all available subfields
843 my @kwsubfields = $kwfield->subfields();
845 ## loop over all available subfield tuples
846 foreach my $kwtuple (@kwsubfields) {
847 ## loop over all subfields to check
848 foreach my $subfield (@subfields) {
849 ## [0] contains subfield code
850 if (@
$kwtuple[0] eq $subfield) {
851 ## [1] contains value, remove trailing separators
852 @
$kwtuple[1] =~ s
% *[,;.:/]*$%%;
853 if (length(@
$kwtuple[1]) > 0) {
854 push @kw, @
$kwtuple[1];
855 print "<marc>Field $fieldname subfield $subfield:", @
$kwtuple[1], "\r\n" if $marcprint;
857 ## we can leave the subfields loop here
868 ##********************************************************************
869 ## pool_subx(): adds contents of several subfields to a list
870 ## Arguments: reference to a list
872 ## list of fields (5XX)
873 ##********************************************************************
875 my($aref, $fieldname, @notefields) = @_;
877 ## we use a list that contains the interesting subfields
879 # ToDo: this is apparently correct only for marc21
882 if ($fieldname eq "500") {
885 elsif ($fieldname eq "501") {
888 elsif ($fieldname eq "502") {
891 elsif ($fieldname eq "504") {
892 @subfields = ('a', 'b');
894 elsif ($fieldname eq "505") {
895 @subfields = ('a', 'g', 'r', 't', 'u');
897 elsif ($fieldname eq "506") {
898 @subfields = ('a', 'b', 'c', 'd', 'e');
900 elsif ($fieldname eq "507") {
901 @subfields = ('a', 'b');
903 elsif ($fieldname eq "508") {
906 elsif ($fieldname eq "510") {
907 @subfields = ('a', 'b', 'c', 'x', '3');
909 elsif ($fieldname eq "511") {
912 elsif ($fieldname eq "513") {
913 @subfields = ('a', 'b');
915 elsif ($fieldname eq "514") {
916 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
918 elsif ($fieldname eq "515") {
921 elsif ($fieldname eq "516") {
924 elsif ($fieldname eq "518") {
925 @subfields = ('a', '3');
927 elsif ($fieldname eq "521") {
928 @subfields = ('a', 'b', '3');
930 elsif ($fieldname eq "522") {
933 elsif ($fieldname eq "524") {
934 @subfields = ('a', '2', '3');
936 elsif ($fieldname eq "525") {
939 elsif ($fieldname eq "526") {
940 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
942 elsif ($fieldname eq "530") {
943 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
945 elsif ($fieldname eq "533") {
946 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
948 elsif ($fieldname eq "534") {
949 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
951 elsif ($fieldname eq "535") {
952 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
955 ## loop over all notefields
956 foreach my $notefield (@notefields) {
957 if (defined $notefield) {
958 ## retrieve all available subfield tuples
959 my @notesubfields = $notefield->subfields();
961 ## loop over all subfield tuples
962 foreach my $notetuple (@notesubfields) {
963 ## loop over all subfields to check
964 foreach my $subfield (@subfields) {
965 ## [0] contains subfield code
966 if (@
$notetuple[0] eq $subfield) {
967 ## [1] contains value, remove trailing separators
968 print "<marc>field $fieldname subfield $subfield: ", @
$notetuple[1], "\r\n" if $marcprint;
969 @
$notetuple[1] =~ s
% *[,;.:/]*$%%;
970 if (length(@
$notetuple[1]) > 0) {
972 push @
{$aref}, @
$notetuple[1];
982 ##********************************************************************
983 ## print_abstract(): prints abstract fields
984 ## Arguments: list of fields (520)
985 ##********************************************************************
987 # ToDo: take care of repeatable subfields
990 ## we check the following subfields
991 my @subfields = ('a', 'b');
993 ## we generate a list for all useful strings
996 ## loop over all abfields
997 foreach my $abfield (@abfields) {
998 foreach my $field (@subfields) {
999 if ( length( $abfield->subfield($field) ) > 0 ) {
1000 my $ab = $abfield->subfield($field);
1002 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1004 ## strip trailing separators
1005 $ab =~ s
% *[;,:./]*$%%;
1007 ## add string to the list
1008 push( @abstrings, $ab );
1013 my $allabs = join "; ", @abstrings;
1015 if (length($allabs) > 0) {
1016 print "N2 - ", $allabs, "\r\n";