MT 2037 : Update ISBD
[koha.git] / C4 / Ris.pm
blobd3026c0c08afc80084ab45089f20dc37433d9ee8
1 package C4::Ris;
3 # Original script :
4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
6 ##
7 ## usage: perl marc2ris < infile.marc > outfile.ris
8 ##
9 ## Dependencies: perl 5.6.0 or later
10 ## MARC::Record
11 ## MARC::Charset
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
19 ##
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
43 # This file is part of Koha.
45 # Koha is free software; you can redistribute it and/or modify it under the
46 # terms of the GNU General Public License as published by the Free Software
47 # Foundation; either version 2 of the License, or (at your option) any later
48 # version.
50 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
51 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
52 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
54 # You should have received a copy of the GNU General Public License along with
55 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
56 # Suite 330, Boston, MA 02111-1307 USA
61 use vars qw($VERSION @ISA @EXPORT);
63 # set the version for version checking
64 $VERSION = 3.00;
66 @ISA = qw(Exporter);
68 # only export API methods
70 @EXPORT = qw(
71 &marc2ris
75 =head2 marc2bibtex - Convert from UNIMARC to RIS
77 =over 4
79 my ($ris) = marc2ris($record);
81 Returns a RIS scalar
83 =over 2
85 C<$record> - a MARC::Record object
87 =back
89 =back
91 =cut
93 sub marc2ris {
94 my ($record) = @_;
95 my $output;
97 my $marcflavour = C4::Context->preference("marcflavour");
98 my $intype = lc($marcflavour);
99 my $marcprint = 1; # Debug
101 # Let's redirect stdout
102 open my $oldout, ">&STDOUT";
103 my $outvar;
104 close STDOUT;
105 open STDOUT,'>', \$outvar;
108 ## First we should check the character encoding. This may be
109 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
110 ## by 'a' at position 09 (zero-based) of the leader
111 my $leader = $record->leader();
112 if ($intype eq "marc21") {
113 if ($leader =~ /^.{9}a/) {
114 print "<marc>---\n<marc>UTF-8 data\n" if $marcprint;
115 $utf = 1;
117 else {
118 print "<marc>---\n<marc>MARC-8 data\n" if $marcprint;
121 ## else: other MARC formats do not specify the character encoding
122 ## we assume it's *not* UTF-8
124 ## start RIS dataset
125 &print_typetag($leader);
127 ## retrieve all author fields and collect them in a list
128 my @author_fields;
130 if ($intype eq "unimarc") {
131 ## Fields 700, 701, and 702 can contain author names
132 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
134 else { ## marc21, ukmarc
135 ## Field 100 sometimes carries main author
136 ## Field(s) 700 carry added entries - personal names
137 @author_fields = ($record->field('100'), $record->field('700'));
140 ## loop over all author fields
141 foreach my $field (@author_fields) {
142 if (length($field)) {
143 my $author = &get_author($field);
144 print "AU - ",&charconv($author),"\n";
148 # ToDo: should we specify anonymous as author if we didn't find
149 # one? or use one of the corporate/meeting names below?
151 ## add corporate names or meeting names as editors ??
152 my @editor_fields;
154 if ($intype eq "unimarc") {
155 ## Fields 710, 711, and 712 can carry corporate names
156 ## Field(s) 720, 721, 722, 730 have additional candidates
157 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
159 else { ## marc21, ukmarc
160 ## Fields 110 and 111 carry the main entries - corporate name and
161 ## meeting name, respectively
162 ## Field(s) 710, 711 carry added entries - personal names
163 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
166 ## loop over all editor fields
167 foreach my $field (@editor_fields) {
168 if (length($field)) {
169 my $editor = &get_editor($field);
170 print "ED - ",&charconv($editor),"\n";
174 ## get info from the title field
175 if ($intype eq "unimarc") {
176 &print_title($record->field('200'));
178 else { ## marc21, ukmarc
179 &print_title($record->field('245'));
182 ## series title
183 if ($intype eq "unimarc") {
184 &print_stitle($record->field('225'));
186 else { ## marc21, ukmarc
187 &print_stitle($record->field('210'));
190 ## ISBN/ISSN
191 if ($intype eq "unimarc") {
192 &print_isbn($record->field('010'));
193 &print_issn($record->field('011'));
195 elsif ($intype eq "ukmarc") {
196 &print_isbn($record->field('021'));
197 ## this is just an assumption
198 &print_issn($record->field('022'));
200 else { ## assume marc21
201 &print_isbn($record->field('020'));
202 &print_issn($record->field('022'));
205 if ($intype eq "marc21") {
206 &print_loc_callno($record->field('050'));
207 &print_dewey($record->field('082'));
209 ## else: unimarc, ukmarc do not seem to store call numbers?
211 ## publication info
212 if ($intype eq "unimarc") {
213 &print_pubinfo($record->field('210'));
215 else { ## marc21, ukmarc
216 &print_pubinfo($record->field('260'));
219 ## 6XX fields contain KW candidates. We add all of them to a
220 ## hash to eliminate duplicates
221 my %kwpool;
223 if ($intype eq "unimarc") {
224 foreach ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660'. '661', '670', '675', '676', '680', '686') {
225 &get_keywords(\%kwpool, "$_",$record->field($_));
228 elsif ($intype eq "ukmarc") {
229 foreach ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695') {
230 &get_keywords(\%kwpool, "$_",$record->field($_));
233 else { ## assume marc21
234 foreach ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658') {
235 &get_keywords(\%kwpool, "$_",$record->field($_));
239 ## print all keywords found in the hash. The value of each hash
240 ## entry is the number of occurrences, but we're not really interested
241 ## in that and rather print the key
242 while (my ($key, $value) = each %kwpool) {
243 print "KW - ", &charconv($key), "\n";
246 ## 5XX have various candidates for notes and abstracts. We pool
247 ## all notes-like stuff in one list.
248 my @notepool;
250 ## these fields have notes candidates
251 if ($intype eq "unimarc") {
252 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
253 &pool_subx(\@notepool, $_, $record->field($_));
256 elsif ($intype eq "ukmarc") {
257 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
258 &pool_subx(\@notepool, $_, $record->field($_));
261 else { ## assume marc21
262 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
263 &pool_subx(\@notepool, $_, $record->field($_));
267 my $allnotes = join "; ", @notepool;
269 if (length($allnotes) > 0) {
270 print "N1 - ", &charconv($allnotes), "\n";
273 ## 320/520 have the abstract
274 if ($intype eq "unimarc") {
275 &print_abstract($record->field('320'));
277 elsif ($intype eq "ukmarc") {
278 &print_abstract($record->field('512'), $record->field('513'));
280 else { ## assume marc21
281 &print_abstract($record->field('520'));
284 ## end RIS dataset
285 print "ER - \n";
287 warn $outvar;
289 # Let's re-redirect stdout
290 close STDOUT;
291 open STDOUT, ">&", $oldout;
293 return $outvar;
298 ##********************************************************************
299 ## print_typetag(): prints the first line of a RIS dataset including
300 ## the preceeding newline
301 ## Argument: the leader of a MARC dataset
302 ## Returns: the value at leader position 06
303 ##********************************************************************
304 sub print_typetag {
305 ## the keys of typehash are the allowed values at position 06
306 ## of the leader of a MARC record, the values are the RIS types
307 ## that might appropriately represent these types.
308 my %ustypehash = (
309 "a" => "BOOK",
310 "c" => "MUSIC",
311 "d" => "MUSIC",
312 "e" => "MAP",
313 "f" => "MAP",
314 "g" => "ADVS",
315 "i" => "SOUND",
316 "j" => "SOUND",
317 "k" => "ART",
318 "m" => "DATA",
319 "o" => "GEN",
320 "p" => "GEN",
321 "r" => "ART",
322 "t" => "GEN",
325 my %unitypehash = (
326 "a" => "BOOK",
327 "b" => "BOOK",
328 "c" => "MUSIC",
329 "d" => "MUSIC",
330 "e" => "MAP",
331 "f" => "MAP",
332 "g" => "ADVS",
333 "i" => "SOUND",
334 "j" => "SOUND",
335 "k" => "ART",
336 "l" => "ELEC",
337 "m" => "ADVS",
338 "r" => "ART",
341 ## The type of a MARC record is found at position 06 of the leader
342 my $typeofrecord = substr("@_", 6, 1);
344 ## ToDo: for books, field 008 positions 24-27 might have a few more
345 ## hints
347 my $typehash;
349 ## the ukmarc here is just a guess
350 if ($intype eq "marc21" || $intype eq "ukmarc") {
351 $typehash = $ustypehash;
353 elsif ($intype eq "unimarc") {
354 $typehash = $unitypehash;
356 else {
357 ## assume MARC21 as default
358 $typehash = $ustypehash;
361 if (!exists $typehash{$typeofrecord}) {
362 print "\nTY - BOOK\n"; ## most reasonable default
363 warn ("no type found - assume BOOK");
365 else {
366 print "\nTY - $typehash{$typeofrecord}\n";
369 ## use $typeofrecord as the return value, just in case
370 $typeofrecord;
373 ##********************************************************************
374 ## normalize_author(): normalizes an authorname
375 ## Arguments: authorname subfield a
376 ## authorname subfield b
377 ## authorname subfield c
378 ## name type if known: 0=direct order
379 ## 1=only surname or full name in
380 ## inverted order
381 ## 3=family, clan, dynasty name
382 ## Returns: the normalized authorname
383 ##********************************************************************
384 sub normalize_author {
385 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
387 if ($nametype == 0) {
388 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
389 warn("name >>$rawauthora<< in direct order - leave as is");
390 return $rawauthora;
392 elsif ($nametype == 1) {
393 ## start munging subfield a (the real name part)
394 ## remove spaces after separators
395 $rawauthora =~ s%([,.]+) *%$1%g;
397 ## remove trailing separators after spaces
398 $rawauthora =~ s% *[,;:/]*$%%;
400 ## remove periods after a non-abbreviated name
401 $rawauthora =~ s%(\w{2,})\.%$1%g;
403 ## start munging subfield b (something like the suffix)
404 ## remove trailing separators after spaces
405 $rawauthorb =~ s% *[,;:/]*$%%;
407 ## we currently ignore subfield c until someone complains
408 if (length($rawauthorb) > 0) {
409 return join ",", ($rawauthora, $rawauthorb);
411 else {
412 return $rawauthora;
415 elsif ($nametype == 3) {
416 return $rawauthora;
420 ##********************************************************************
421 ## get_author(): gets authorname info from MARC fields 100, 700
422 ## Argument: field (100 or 700)
423 ## Returns: an author string in the format found in the record
424 ##********************************************************************
425 sub get_author {
426 my ($authorfield) = @_;
427 my ($indicator);
429 ## the sequence of the name parts is encoded either in indicator
430 ## 1 (marc21) or 2 (unimarc)
431 if ($intype eq "unimarc") {
432 $indicator = 2;
434 else { ## assume marc21
435 $indicator = 1;
438 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\n" if $marcprint;
439 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\n" if $marcprint;
440 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\n" if $marcprint;
441 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\n" if $marcprint;
442 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\n" if $marcprint;
443 if ($intype eq "ukmarc") {
444 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
445 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
447 else {
448 normalize_author($authorfield->subfield('a'), $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
452 ##********************************************************************
453 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
454 ## Argument: field (110, 111, 710, or 711)
455 ## Returns: an author string in the format found in the record
456 ##********************************************************************
457 sub get_editor {
458 my ($editorfield) = @_;
460 if ($editorfield == undef) {
461 return undef;
463 else {
464 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\n" if $marcprint;
465 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\n" if $marcprint;
466 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\n" if $marcprint;
467 return $editorfield->subfield('a');
471 ##********************************************************************
472 ## print_title(): gets info from MARC field 245
473 ## Arguments: field (245)
474 ## Returns:
475 ##********************************************************************
476 sub print_title {
477 my ($titlefield) = @_;
478 if ($titlefield == undef) {
479 print "<marc>empty title field (245)\n" if $marcprint;
480 warn("empty title field (245)");
483 else {
484 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\n" if $marcprint;
485 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\n" if $marcprint;
486 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\n" if $marcprint;
488 ## The title is usually written in a very odd notation. The title
489 ## proper ($a) often ends with a space followed by a separator like
490 ## a slash or a colon. The subtitle ($b) doesn't start with a space
491 ## so simple concatenation looks odd. We have to conditionally remove
492 ## the separator and make sure there's a space between title and
493 ## subtitle
495 my $clean_title = $titlefield->subfield('a');
497 my $clean_subtitle = $titlefield->subfield('b');
498 $clean_title =~ s% *[/:;.]$%%;
499 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
501 if (length($clean_title) > 0
502 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
503 print "TI - ", &charconv($clean_title);
505 ## subfield $b is relevant only for marc21/ukmarc
506 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
507 print ": ",&charconv($clean_subtitle);
509 print "\n";
512 ## The statement of responsibility is just this: horrors. There is
513 ## no formal definition how authors, editors and the like should
514 ## be written and designated. The field is free-form and resistant
515 ## to all parsing efforts, so this information is lost on me
519 ##********************************************************************
520 ## print_stitle(): prints info from series title field
521 ## Arguments: field
522 ## Returns:
523 ##********************************************************************
524 sub print_stitle {
525 my ($titlefield) = @_;
527 if ($titlefield == undef) {
528 print "<marc>empty series title field\n" if $marcprint;
529 warn("empty series title field");
532 else {
533 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\n" if $marcprint;
534 my $clean_title = $titlefield->subfield('a');
536 $clean_title =~ s% *[/:;.]$%%;
538 if (length($clean_title) > 0) {
539 print "T2 - ", &charconv($clean_title);
542 if ($intype eq "unimarc") {
543 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\n" if $marcprint;
544 if (length($titlefield->subfield('v')) > 0) {
545 print "VL - ", &charconv($titlefield->subfield('v'));
551 ##********************************************************************
552 ## print_isbn(): gets info from MARC field 020
553 ## Arguments: field (020)
554 ##********************************************************************
555 sub print_isbn {
556 my($isbnfield) = @_;
558 if ($isbnfield == undef ||length ($isbnfield->subfield('a')) == 0) {
559 print "<marc>no isbn found (020\$a)\n" if $marcprint;
560 warn("no isbn found");
562 else {
563 if (length ($isbnfield->subfield('a')) < 10) {
564 print "<marc>truncated isbn (020\$a)\n" if $marcprint;
565 warn("truncated isbn");
568 my $isbn = substr($isbnfield->subfield('a'), 0, 10);
569 print "SN - ", &charconv($isbn), "\n";
573 ##********************************************************************
574 ## print_issn(): gets info from MARC field 022
575 ## Arguments: field (022)
576 ##********************************************************************
577 sub print_issn {
578 my($issnfield) = @_;
580 if ($issnfield == undef ||length ($issnfield->subfield('a')) == 0) {
581 print "<marc>no issn found (022\$a)\n" if $marcprint;
582 warn("no issn found");
584 else {
585 if (length ($issnfield->subfield('a')) < 9) {
586 print "<marc>truncated issn (022\$a)\n" if $marcprint;
587 warn("truncated issn");
590 my $issn = substr($issnfield->subfield('a'), 0, 9);
591 print "SN - ", &charconv($issn), "\n";
595 ##********************************************************************
596 ## print_loc_callno(): gets info from MARC field 050
597 ## Arguments: field (050)
598 ##********************************************************************
599 sub print_loc_callno {
600 my($callnofield) = @_;
602 if ($callnofield == undef || length ($callnofield->subfield('a')) == 0) {
603 print "<marc>no LOC call number found (050\$a)\n" if $marcprint;
604 warn("no LOC call number found");
606 else {
607 print "AV - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\n";
611 ##********************************************************************
612 ## print_dewey(): gets info from MARC field 082
613 ## Arguments: field (082)
614 ##********************************************************************
615 sub print_dewey {
616 my($deweyfield) = @_;
618 if ($deweyfield == undef || length ($deweyfield->subfield('a')) == 0) {
619 print "<marc>no Dewey number found (082\$a)\n" if $marcprint;
620 warn("no Dewey number found");
622 else {
623 print "U1 - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\n";
627 ##********************************************************************
628 ## print_pubinfo(): gets info from MARC field 260
629 ## Arguments: field (260)
630 ##********************************************************************
631 sub print_pubinfo {
632 my($pubinfofield) = @_;
634 if ($pubinfofield == undef) {
635 print "<marc>no publication information found (260)\n" if $marcprint;
636 warn("no publication information found");
638 else {
639 ## the following information is available in MARC21:
640 ## $a place -> CY
641 ## $b publisher -> PB
642 ## $c date -> PY
643 ## the corresponding subfields for UNIMARC:
644 ## $a place -> CY
645 ## $c publisher -> PB
646 ## $d date -> PY
648 ## all of them are repeatable. We pool all places into a
649 ## comma-separated list in CY. We also pool all publishers
650 ## into a comma-separated list in PB. We break the rule with
651 ## the date field because this wouldn't make much sense. In
652 ## this case, we use the first occurrence for PY, the second
653 ## for Y2, and ignore the rest
655 my @pubsubfields = $pubinfofield->subfields();
656 my @cities;
657 my @publishers;
658 my $pycounter = 0;
660 my $pubsub_place;
661 my $pubsub_publisher;
662 my $pubsub_date;
664 if ($intype eq "unimarc") {
665 $pubsub_place = "a";
666 $pubsub_publisher = "c";
667 $pubsub_date = "d";
669 else { ## assume marc21
670 $pubsub_place = "a";
671 $pubsub_publisher = "b";
672 $pubsub_date = "c";
675 ## loop over all subfield list entries
676 for my $tuple (@pubsubfields) {
677 ## each tuple consists of the subfield code and the value
678 if (@$tuple[0] eq $pubsub_place) {
679 ## strip any trailing crap
680 $_ = @$tuple[1];
681 s% *[,;:/]$%%;
682 ## pool all occurrences in a list
683 push (@cities, $_);
685 elsif (@$tuple[0] eq $pubsub_publisher) {
686 ## strip any trailing crap
687 $_ = @$tuple[1];
688 s% *[,;:/]$%%;
689 ## pool all occurrences in a list
690 push (@publishers, $_);
692 elsif (@$tuple[0] eq $pubsub_date) {
693 ## the dates are free-form, so we want to extract
694 ## a four-digit year and leave the rest as
695 ## "other info"
696 $protoyear = @$tuple[1];
697 print "<marc>Year (260\$c): $protoyear\n" if $marcprint;
699 ## strip any separator chars at the end
700 $protoyear =~ s% *[\.;:/]*$%%;
702 ## isolate a four-digit year. We discard anything
703 ## preceeding the year, but keep everything after
704 ## the year as other info.
705 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
707 ## check what we've got. If there is no four-digit
708 ## year, make it up. If digits are replaced by '-',
709 ## replace those with 0s
711 if (index($protoyear, "/") == 4) {
712 ## have year info
713 ## replace all '-' in the four-digit year
714 ## by '0'
715 substr($protoyear,0,4) =~ s!-!0!g;
717 else {
718 ## have no year info
719 print "<marc>no four-digit year found, use 0000\n" if $marcprint;
720 $protoyear = "0000///$protoyear";
721 warn("no four-digit year found, use 0000");
724 if ($pycounter == 0 && length($protoyear)) {
725 print "PY - $protoyear\n";
727 elsif ($pycounter == 1 && length($_)) {
728 print "Y2 - $protoyear\n";
730 ## else: discard
732 ## else: discard
735 ## now dump the collected CY and PB lists
736 if (@cities > 0) {
737 print "CY - ", &charconv(join(", ", @cities)), "\n";
739 if (@publishers > 0) {
740 print "PB - ", &charconv(join(", ", @publishers)), "\n";
745 ##********************************************************************
746 ## get_keywords(): prints info from MARC fields 6XX
747 ## Arguments: list of fields (6XX)
748 ##********************************************************************
749 sub get_keywords {
750 my($href, $fieldname, @keywords) = @_;
752 ## a list of all possible subfields
753 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
755 ## loop over all 6XX fields
756 foreach $kwfield (@keywords) {
757 if ($kwfield != undef) {
758 ## authornames get special treatment
759 if ($fieldname eq "600") {
760 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
761 ${$href}{$val} += 1;
762 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\n" if $marcprint;
764 else {
765 ## retrieve all available subfields
766 @kwsubfields = $kwfield->subfields();
768 ## loop over all available subfield tuples
769 foreach $kwtuple (@kwsubfields) {
770 ## loop over all subfields to check
771 foreach $subfield (@subfields) {
772 ## [0] contains subfield code
773 if (@$kwtuple[0] eq $subfield) {
774 ## [1] contains value, remove trailing separators
775 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
776 if (length(@$kwtuple[1]) > 0) {
777 ## add to hash
778 ${$href}{@$kwtuple[1]} += 1;
779 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\n" if $marcprint;
781 ## we can leave the subfields loop here
782 last;
791 ##********************************************************************
792 ## pool_subx(): adds contents of several subfields to a list
793 ## Arguments: reference to a list
794 ## field name
795 ## list of fields (5XX)
796 ##********************************************************************
797 sub pool_subx {
798 my($aref, $fieldname, @notefields) = @_;
800 ## we use a list that contains the interesting subfields
801 ## for each field
802 # ToDo: this is apparently correct only for marc21
803 my @subfields;
805 if ($fieldname eq "500") {
806 @subfields = ('a');
808 elsif ($fieldname eq "501") {
809 @subfields = ('a');
811 elsif ($fieldname eq "502") {
812 @subfields = ('a');
814 elsif ($fieldname eq "504") {
815 @subfields = ('a', 'b');
817 elsif ($fieldname eq "505") {
818 @subfields = ('a', 'g', 'r', 't', 'u');
820 elsif ($fieldname eq "506") {
821 @subfields = ('a', 'b', 'c', 'd', 'e');
823 elsif ($fieldname eq "507") {
824 @subfields = ('a', 'b');
826 elsif ($fieldname eq "508") {
827 @subfields = ('a');
829 elsif ($fieldname eq "510") {
830 @subfields = ('a', 'b', 'c', 'x', '3');
832 elsif ($fieldname eq "511") {
833 @subfields = ('a');
835 elsif ($fieldname eq "513") {
836 @subfields = ('a', 'b');
838 elsif ($fieldname eq "514") {
839 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
841 elsif ($fieldname eq "515") {
842 @subfields = ('a');
844 elsif ($fieldname eq "516") {
845 @subfields = ('a');
847 elsif ($fieldname eq "518") {
848 @subfields = ('a', '3');
850 elsif ($fieldname eq "521") {
851 @subfields = ('a', 'b', '3');
853 elsif ($fieldname eq "522") {
854 @subfields = ('a');
856 elsif ($fieldname eq "524") {
857 @subfields = ('a', '2', '3');
859 elsif ($fieldname eq "525") {
860 @subfields = ('a');
862 elsif ($fieldname eq "526") {
863 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
865 elsif ($fieldname eq "530") {
866 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
868 elsif ($fieldname eq "533") {
869 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
871 elsif ($fieldname eq "534") {
872 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
874 elsif ($fieldname eq "535") {
875 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
878 ## loop over all notefields
879 foreach $notefield (@notefields) {
880 if ($notefield != undef) {
881 ## retrieve all available subfield tuples
882 @notesubfields = $notefield->subfields();
884 ## loop over all subfield tuples
885 foreach $notetuple (@notesubfields) {
886 ## loop over all subfields to check
887 foreach $subfield (@subfields) {
888 ## [0] contains subfield code
889 if (@$notetuple[0] eq $subfield) {
890 ## [1] contains value, remove trailing separators
891 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\n" if $marcprint;
892 @$notetuple[1] =~ s% *[,;.:/]*$%%;
893 if (length(@$notetuple[1]) > 0) {
894 ## add to list
895 push @{$aref}, @$notetuple[1];
897 last;
905 ##********************************************************************
906 ## print_abstract(): prints abstract fields
907 ## Arguments: list of fields (520)
908 ##********************************************************************
909 sub print_abstract {
910 # ToDo: take care of repeatable subfields
911 my(@abfields) = @_;
913 ## we check the following subfields
914 my @subfields = ('a', 'b');
916 ## we generate a list for all useful strings
917 my @abstrings;
919 ## loop over all abfields
920 foreach $abfield (@abfields) {
921 foreach $field (@subfields) {
922 if (length ($abfield->subfield($field)) > 0) {
923 my $ab = $abfield->subfield($field);
925 print "<marc>field 520 subfield $field: $ab\n" if $marcprint;
927 ## strip trailing separators
928 $ab =~ s% *[;,:./]*$%%;
930 ## add string to the list
931 push (@abstrings, $ab);
936 my $allabs = join "; ", @abstrings;
938 if (length($allabs) > 0) {
939 print "N2 - ", &charconv($allabs), "\n";
944 ##********************************************************************
945 ## charconv(): converts to a different charset based on a global var
946 ## Arguments: string
947 ## Returns: string
948 ##********************************************************************
949 sub charconv {
950 if ($utf) {
951 ## return unaltered if already utf-8
952 return @_;
954 elsif ($uniout eq "t") {
955 ## convert to utf-8
956 warn "marc8_to_utf8";
957 return marc8_to_utf8("@_");
959 else {
960 ## return unaltered if no utf-8 requested
961 return @_;