Bug 6875 de-nesting C4::Items
[koha.git] / C4 / Ris.pm
blob085b09cf51eb861fd0ca5b038eef862a577c986b
1 package C4::Ris;
3 # Original script :
4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
6 ##
7 ## usage: perl marc2ris < infile.marc > outfile.ris
8 ##
9 ## Dependencies: perl 5.6.0 or later
10 ## MARC::Record
11 ## MARC::Charset
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
19 ##
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
45 # This file is part of Koha.
47 # Koha is free software; you can redistribute it and/or modify it under the
48 # terms of the GNU General Public License as published by the Free Software
49 # Foundation; either version 2 of the License, or (at your option) any later
50 # version.
52 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
53 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
54 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
56 # You should have received a copy of the GNU General Public License along with
57 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
58 # Suite 330, Boston, MA 02111-1307 USA
62 #use strict;
63 #use warnings; FIXME - Bug 2505
65 use vars qw($VERSION @ISA @EXPORT);
67 # set the version for version checking
68 $VERSION = 3.00;
70 @ISA = qw(Exporter);
72 # only export API methods
74 @EXPORT = qw(
75 &marc2ris
79 =head1 marc2bibtex - Convert from UNIMARC to RIS
81 my ($ris) = marc2ris($record);
83 Returns a RIS scalar
85 C<$record> - a MARC::Record object
87 =cut
89 sub marc2ris {
90 my ($record) = @_;
91 my $output;
93 my $marcflavour = C4::Context->preference("marcflavour");
94 my $intype = lc($marcflavour);
95 my $marcprint = 0; # Debug flag;
97 # Let's redirect stdout
98 open my $oldout, ">&STDOUT";
99 my $outvar;
100 close STDOUT;
101 open STDOUT,'>', \$outvar;
104 ## First we should check the character encoding. This may be
105 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
106 ## by 'a' at position 09 (zero-based) of the leader
107 my $leader = $record->leader();
108 if ($intype eq "marc21") {
109 if ($leader =~ /^.{9}a/) {
110 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
111 $utf = 1;
113 else {
114 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
117 ## else: other MARC formats do not specify the character encoding
118 ## we assume it's *not* UTF-8
120 ## start RIS dataset
121 &print_typetag($leader);
123 ## retrieve all author fields and collect them in a list
124 my @author_fields;
126 if ($intype eq "unimarc") {
127 ## Fields 700, 701, and 702 can contain author names
128 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
130 else { ## marc21, ukmarc
131 ## Field 100 sometimes carries main author
132 ## Field(s) 700 carry added entries - personal names
133 @author_fields = ($record->field('100'), $record->field('700'));
136 ## loop over all author fields
137 foreach my $field (@author_fields) {
138 if (length($field)) {
139 my $author = &get_author($field);
140 print "AU - ",&charconv($author),"\r\n";
144 # ToDo: should we specify anonymous as author if we didn't find
145 # one? or use one of the corporate/meeting names below?
147 ## add corporate names or meeting names as editors ??
148 my @editor_fields;
150 if ($intype eq "unimarc") {
151 ## Fields 710, 711, and 712 can carry corporate names
152 ## Field(s) 720, 721, 722, 730 have additional candidates
153 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
155 else { ## marc21, ukmarc
156 ## Fields 110 and 111 carry the main entries - corporate name and
157 ## meeting name, respectively
158 ## Field(s) 710, 711 carry added entries - personal names
159 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
162 ## loop over all editor fields
163 foreach my $field (@editor_fields) {
164 if (length($field)) {
165 my $editor = &get_editor($field);
166 print "ED - ",&charconv($editor),"\r\n";
170 ## get info from the title field
171 if ($intype eq "unimarc") {
172 &print_title($record->field('200'));
174 else { ## marc21, ukmarc
175 &print_title($record->field('245'));
178 ## series title
179 if ($intype eq "unimarc") {
180 &print_stitle($record->field('225'));
182 else { ## marc21, ukmarc
183 &print_stitle($record->field('490'));
186 ## ISBN/ISSN
187 if ($intype eq "unimarc") {
188 &print_isbn($record->field('010'));
189 &print_issn($record->field('011'));
191 elsif ($intype eq "ukmarc") {
192 &print_isbn($record->field('021'));
193 ## this is just an assumption
194 &print_issn($record->field('022'));
196 else { ## assume marc21
197 &print_isbn($record->field('020'));
198 &print_issn($record->field('022'));
201 if ($intype eq "marc21") {
202 &print_loc_callno($record->field('050'));
203 &print_dewey($record->field('082'));
205 ## else: unimarc, ukmarc do not seem to store call numbers?
207 ## publication info
208 if ($intype eq "unimarc") {
209 &print_pubinfo($record->field('210'));
211 else { ## marc21, ukmarc
212 &print_pubinfo($record->field('260'));
215 ## 6XX fields contain KW candidates. We add all of them to a
216 ## hash to eliminate duplicates
217 my %kwpool;
219 if ($intype eq "unimarc") {
220 foreach ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660'. '661', '670', '675', '676', '680', '686') {
221 &get_keywords(\%kwpool, "$_",$record->field($_));
224 elsif ($intype eq "ukmarc") {
225 foreach ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695') {
226 &get_keywords(\%kwpool, "$_",$record->field($_));
229 else { ## assume marc21
230 foreach ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658') {
231 &get_keywords(\%kwpool, "$_",$record->field($_));
235 ## print all keywords found in the hash. The value of each hash
236 ## entry is the number of occurrences, but we're not really interested
237 ## in that and rather print the key
238 while (my ($key, $value) = each %kwpool) {
239 print "KW - ", &charconv($key), "\r\n";
242 ## 5XX have various candidates for notes and abstracts. We pool
243 ## all notes-like stuff in one list.
244 my @notepool;
246 ## these fields have notes candidates
247 if ($intype eq "unimarc") {
248 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
249 &pool_subx(\@notepool, $_, $record->field($_));
252 elsif ($intype eq "ukmarc") {
253 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
254 &pool_subx(\@notepool, $_, $record->field($_));
257 else { ## assume marc21
258 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
259 &pool_subx(\@notepool, $_, $record->field($_));
263 my $allnotes = join "; ", @notepool;
265 if (length($allnotes) > 0) {
266 print "N1 - ", &charconv($allnotes), "\r\n";
269 ## 320/520 have the abstract
270 if ($intype eq "unimarc") {
271 &print_abstract($record->field('320'));
273 elsif ($intype eq "ukmarc") {
274 &print_abstract($record->field('512'), $record->field('513'));
276 else { ## assume marc21
277 &print_abstract($record->field('520'));
280 # 856u has the URI
281 if ($record->field('856')) {
282 print_uri($record->field('856'));
285 ## end RIS dataset
286 print "ER - \r\n";
288 # Let's re-redirect stdout
289 close STDOUT;
290 open STDOUT, ">&", $oldout;
292 return $outvar;
297 ##********************************************************************
298 ## print_typetag(): prints the first line of a RIS dataset including
299 ## the preceeding newline
300 ## Argument: the leader of a MARC dataset
301 ## Returns: the value at leader position 06
302 ##********************************************************************
303 sub print_typetag {
304 my ($leader)= @_;
305 ## the keys of typehash are the allowed values at position 06
306 ## of the leader of a MARC record, the values are the RIS types
307 ## that might appropriately represent these types.
308 my %ustypehash = (
309 "a" => "BOOK",
310 "c" => "MUSIC",
311 "d" => "MUSIC",
312 "e" => "MAP",
313 "f" => "MAP",
314 "g" => "ADVS",
315 "i" => "SOUND",
316 "j" => "SOUND",
317 "k" => "ART",
318 "m" => "DATA",
319 "o" => "GEN",
320 "p" => "GEN",
321 "r" => "ART",
322 "t" => "GEN",
325 my %unitypehash = (
326 "a" => "BOOK",
327 "b" => "BOOK",
328 "c" => "MUSIC",
329 "d" => "MUSIC",
330 "e" => "MAP",
331 "f" => "MAP",
332 "g" => "ADVS",
333 "i" => "SOUND",
334 "j" => "SOUND",
335 "k" => "ART",
336 "l" => "ELEC",
337 "m" => "ADVS",
338 "r" => "ART",
341 ## The type of a MARC record is found at position 06 of the leader
342 my $typeofrecord = substr($leader, 6, 1);
344 ## ToDo: for books, field 008 positions 24-27 might have a few more
345 ## hints
347 my %typehash;
349 ## the ukmarc here is just a guess
350 if ($intype eq "marc21" || $intype eq "ukmarc") {
351 %typehash = %ustypehash;
353 elsif ($intype eq "unimarc") {
354 %typehash = %unitypehash;
356 else {
357 ## assume MARC21 as default
358 %typehash = %ustypehash;
361 if (!exists $typehash{$typeofrecord}) {
362 print "TY - BOOK\r\n"; ## most reasonable default
363 warn ("no type found - assume BOOK") if $marcprint;
365 else {
366 print "TY - $typehash{$typeofrecord}\r\n";
369 ## use $typeofrecord as the return value, just in case
370 $typeofrecord;
373 ##********************************************************************
374 ## normalize_author(): normalizes an authorname
375 ## Arguments: authorname subfield a
376 ## authorname subfield b
377 ## authorname subfield c
378 ## name type if known: 0=direct order
379 ## 1=only surname or full name in
380 ## inverted order
381 ## 3=family, clan, dynasty name
382 ## Returns: the normalized authorname
383 ##********************************************************************
384 sub normalize_author {
385 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
387 if ($nametype == 0) {
388 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
389 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
390 return $rawauthora;
392 elsif ($nametype == 1) {
393 ## start munging subfield a (the real name part)
394 ## remove spaces after separators
395 $rawauthora =~ s%([,.]+) *%$1%g;
397 ## remove trailing separators after spaces
398 $rawauthora =~ s% *[,;:/]*$%%;
400 ## remove periods after a non-abbreviated name
401 $rawauthora =~ s%(\w{2,})\.%$1%g;
403 ## start munging subfield b (something like the suffix)
404 ## remove trailing separators after spaces
405 $rawauthorb =~ s% *[,;:/]*$%%;
407 ## we currently ignore subfield c until someone complains
408 if (length($rawauthorb) > 0) {
409 return join ",", ($rawauthora, $rawauthorb);
411 else {
412 return $rawauthora;
415 elsif ($nametype == 3) {
416 return $rawauthora;
420 ##********************************************************************
421 ## get_author(): gets authorname info from MARC fields 100, 700
422 ## Argument: field (100 or 700)
423 ## Returns: an author string in the format found in the record
424 ##********************************************************************
425 sub get_author {
426 my ($authorfield) = @_;
427 my ($indicator);
429 ## the sequence of the name parts is encoded either in indicator
430 ## 1 (marc21) or 2 (unimarc)
431 if ($intype eq "unimarc") {
432 $indicator = 2;
434 else { ## assume marc21
435 $indicator = 1;
438 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
439 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
440 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
441 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
442 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
443 if ($intype eq "ukmarc") {
444 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
445 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
447 else {
448 normalize_author($authorfield->subfield('a'), $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
452 ##********************************************************************
453 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
454 ## Argument: field (110, 111, 710, or 711)
455 ## Returns: an author string in the format found in the record
456 ##********************************************************************
457 sub get_editor {
458 my ($editorfield) = @_;
460 if (!$editorfield) {
461 return;
463 else {
464 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
465 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
466 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
467 return $editorfield->subfield('a');
471 ##********************************************************************
472 ## print_title(): gets info from MARC field 245
473 ## Arguments: field (245)
474 ## Returns:
475 ##********************************************************************
476 sub print_title {
477 my ($titlefield) = @_;
478 if (!$titlefield) {
479 print "<marc>empty title field (245)\r\n" if $marcprint;
480 warn("empty title field (245)") if $marcprint;
482 else {
483 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
484 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
485 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
487 ## The title is usually written in a very odd notation. The title
488 ## proper ($a) often ends with a space followed by a separator like
489 ## a slash or a colon. The subtitle ($b) doesn't start with a space
490 ## so simple concatenation looks odd. We have to conditionally remove
491 ## the separator and make sure there's a space between title and
492 ## subtitle
494 my $clean_title = $titlefield->subfield('a');
496 my $clean_subtitle = $titlefield->subfield('b');
497 $clean_title =~ s% *[/:;.]$%%;
498 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
500 if (length($clean_title) > 0
501 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
502 print "TI - ", &charconv($clean_title);
504 ## subfield $b is relevant only for marc21/ukmarc
505 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
506 print ": ",&charconv($clean_subtitle);
508 print "\r\n";
511 ## The statement of responsibility is just this: horrors. There is
512 ## no formal definition how authors, editors and the like should
513 ## be written and designated. The field is free-form and resistant
514 ## to all parsing efforts, so this information is lost on me
518 ##********************************************************************
519 ## print_stitle(): prints info from series title field
520 ## Arguments: field
521 ## Returns:
522 ##********************************************************************
523 sub print_stitle {
524 my ($titlefield) = @_;
526 if (!$titlefield) {
527 print "<marc>empty series title field\r\n" if $marcprint;
529 else {
530 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
531 my $clean_title = $titlefield->subfield('a');
533 $clean_title =~ s% *[/:;.]$%%;
535 if (length($clean_title) > 0) {
536 print "T2 - ", &charconv($clean_title),"\r\n";
539 if ($intype eq "unimarc") {
540 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
541 if (length($titlefield->subfield('v')) > 0) {
542 print "VL - ", &charconv($titlefield->subfield('v')),"\r\n";
548 ##********************************************************************
549 ## print_isbn(): gets info from MARC field 020
550 ## Arguments: field (020)
551 ##********************************************************************
552 sub print_isbn {
553 my($isbnfield) = @_;
555 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
556 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
557 warn("no isbn found") if $marcprint;
559 else {
560 if (length ($isbnfield->subfield('a')) < 10) {
561 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
562 warn("truncated isbn") if $marcprint;
565 my $isbn = substr($isbnfield->subfield('a'), 0, 10);
566 print "SN - ", &charconv($isbn), "\r\n";
570 ##********************************************************************
571 ## print_issn(): gets info from MARC field 022
572 ## Arguments: field (022)
573 ##********************************************************************
574 sub print_issn {
575 my($issnfield) = @_;
577 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
578 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
579 warn("no issn found") if $marcprint;
581 else {
582 if (length ($issnfield->subfield('a')) < 9) {
583 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
584 warn("truncated issn") if $marcprint;
587 my $issn = substr($issnfield->subfield('a'), 0, 9);
588 print "SN - ", &charconv($issn), "\r\n";
593 # print_uri() prints info from 856 u
595 sub print_uri {
596 my @f856s = @_;
598 foreach my $f856 (@f856s) {
599 if (my $uri = $f856->subfield('u')) {
600 print "UR - ", charconv($uri), "\r\n";
605 ##********************************************************************
606 ## print_loc_callno(): gets info from MARC field 050
607 ## Arguments: field (050)
608 ##********************************************************************
609 sub print_loc_callno {
610 my($callnofield) = @_;
612 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
613 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
614 warn("no LOC call number found") if $marcprint;
616 else {
617 print "AV - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
621 ##********************************************************************
622 ## print_dewey(): gets info from MARC field 082
623 ## Arguments: field (082)
624 ##********************************************************************
625 sub print_dewey {
626 my($deweyfield) = @_;
628 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
629 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
630 warn("no Dewey number found") if $marcprint;
632 else {
633 print "U1 - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
637 ##********************************************************************
638 ## print_pubinfo(): gets info from MARC field 260
639 ## Arguments: field (260)
640 ##********************************************************************
641 sub print_pubinfo {
642 my($pubinfofield) = @_;
644 if (!$pubinfofield) {
645 print "<marc>no publication information found (260)\r\n" if $marcprint;
646 warn("no publication information found") if $marcprint;
648 else {
649 ## the following information is available in MARC21:
650 ## $a place -> CY
651 ## $b publisher -> PB
652 ## $c date -> PY
653 ## the corresponding subfields for UNIMARC:
654 ## $a place -> CY
655 ## $c publisher -> PB
656 ## $d date -> PY
658 ## all of them are repeatable. We pool all places into a
659 ## comma-separated list in CY. We also pool all publishers
660 ## into a comma-separated list in PB. We break the rule with
661 ## the date field because this wouldn't make much sense. In
662 ## this case, we use the first occurrence for PY, the second
663 ## for Y2, and ignore the rest
665 my @pubsubfields = $pubinfofield->subfields();
666 my @cities;
667 my @publishers;
668 my $pycounter = 0;
670 my $pubsub_place;
671 my $pubsub_publisher;
672 my $pubsub_date;
674 if ($intype eq "unimarc") {
675 $pubsub_place = "a";
676 $pubsub_publisher = "c";
677 $pubsub_date = "d";
679 else { ## assume marc21
680 $pubsub_place = "a";
681 $pubsub_publisher = "b";
682 $pubsub_date = "c";
685 ## loop over all subfield list entries
686 for my $tuple (@pubsubfields) {
687 ## each tuple consists of the subfield code and the value
688 if (@$tuple[0] eq $pubsub_place) {
689 ## strip any trailing crap
690 $_ = @$tuple[1];
691 s% *[,;:/]$%%;
692 ## pool all occurrences in a list
693 push (@cities, $_);
695 elsif (@$tuple[0] eq $pubsub_publisher) {
696 ## strip any trailing crap
697 $_ = @$tuple[1];
698 s% *[,;:/]$%%;
699 ## pool all occurrences in a list
700 push (@publishers, $_);
702 elsif (@$tuple[0] eq $pubsub_date) {
703 ## the dates are free-form, so we want to extract
704 ## a four-digit year and leave the rest as
705 ## "other info"
706 $protoyear = @$tuple[1];
707 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
709 ## strip any separator chars at the end
710 $protoyear =~ s% *[\.;:/]*$%%;
712 ## isolate a four-digit year. We discard anything
713 ## preceeding the year, but keep everything after
714 ## the year as other info.
715 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
717 ## check what we've got. If there is no four-digit
718 ## year, make it up. If digits are replaced by '-',
719 ## replace those with 0s
721 if (index($protoyear, "/") == 4) {
722 ## have year info
723 ## replace all '-' in the four-digit year
724 ## by '0'
725 substr($protoyear,0,4) =~ s!-!0!g;
727 else {
728 ## have no year info
729 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
730 $protoyear = "0000///$protoyear";
731 warn("no four-digit year found, use 0000") if $marcprint;
734 if ($pycounter == 0 && length($protoyear)) {
735 print "PY - $protoyear\r\n";
737 elsif ($pycounter == 1 && length($_)) {
738 print "Y2 - $protoyear\r\n";
740 ## else: discard
742 ## else: discard
745 ## now dump the collected CY and PB lists
746 if (@cities > 0) {
747 print "CY - ", &charconv(join(", ", @cities)), "\r\n";
749 if (@publishers > 0) {
750 print "PB - ", &charconv(join(", ", @publishers)), "\r\n";
755 ##********************************************************************
756 ## get_keywords(): prints info from MARC fields 6XX
757 ## Arguments: list of fields (6XX)
758 ##********************************************************************
759 sub get_keywords {
760 my($href, $fieldname, @keywords) = @_;
762 ## a list of all possible subfields
763 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
765 ## loop over all 6XX fields
766 foreach $kwfield (@keywords) {
767 if ($kwfield != undef) {
768 ## authornames get special treatment
769 if ($fieldname eq "600") {
770 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
771 ${$href}{$val} += 1;
772 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
774 else {
775 ## retrieve all available subfields
776 @kwsubfields = $kwfield->subfields();
778 ## loop over all available subfield tuples
779 foreach $kwtuple (@kwsubfields) {
780 ## loop over all subfields to check
781 foreach $subfield (@subfields) {
782 ## [0] contains subfield code
783 if (@$kwtuple[0] eq $subfield) {
784 ## [1] contains value, remove trailing separators
785 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
786 if (length(@$kwtuple[1]) > 0) {
787 ## add to hash
788 ${$href}{@$kwtuple[1]} += 1;
789 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
791 ## we can leave the subfields loop here
792 last;
801 ##********************************************************************
802 ## pool_subx(): adds contents of several subfields to a list
803 ## Arguments: reference to a list
804 ## field name
805 ## list of fields (5XX)
806 ##********************************************************************
807 sub pool_subx {
808 my($aref, $fieldname, @notefields) = @_;
810 ## we use a list that contains the interesting subfields
811 ## for each field
812 # ToDo: this is apparently correct only for marc21
813 my @subfields;
815 if ($fieldname eq "500") {
816 @subfields = ('a');
818 elsif ($fieldname eq "501") {
819 @subfields = ('a');
821 elsif ($fieldname eq "502") {
822 @subfields = ('a');
824 elsif ($fieldname eq "504") {
825 @subfields = ('a', 'b');
827 elsif ($fieldname eq "505") {
828 @subfields = ('a', 'g', 'r', 't', 'u');
830 elsif ($fieldname eq "506") {
831 @subfields = ('a', 'b', 'c', 'd', 'e');
833 elsif ($fieldname eq "507") {
834 @subfields = ('a', 'b');
836 elsif ($fieldname eq "508") {
837 @subfields = ('a');
839 elsif ($fieldname eq "510") {
840 @subfields = ('a', 'b', 'c', 'x', '3');
842 elsif ($fieldname eq "511") {
843 @subfields = ('a');
845 elsif ($fieldname eq "513") {
846 @subfields = ('a', 'b');
848 elsif ($fieldname eq "514") {
849 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
851 elsif ($fieldname eq "515") {
852 @subfields = ('a');
854 elsif ($fieldname eq "516") {
855 @subfields = ('a');
857 elsif ($fieldname eq "518") {
858 @subfields = ('a', '3');
860 elsif ($fieldname eq "521") {
861 @subfields = ('a', 'b', '3');
863 elsif ($fieldname eq "522") {
864 @subfields = ('a');
866 elsif ($fieldname eq "524") {
867 @subfields = ('a', '2', '3');
869 elsif ($fieldname eq "525") {
870 @subfields = ('a');
872 elsif ($fieldname eq "526") {
873 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
875 elsif ($fieldname eq "530") {
876 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
878 elsif ($fieldname eq "533") {
879 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
881 elsif ($fieldname eq "534") {
882 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
884 elsif ($fieldname eq "535") {
885 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
888 ## loop over all notefields
889 foreach $notefield (@notefields) {
890 if ($notefield != undef) {
891 ## retrieve all available subfield tuples
892 @notesubfields = $notefield->subfields();
894 ## loop over all subfield tuples
895 foreach $notetuple (@notesubfields) {
896 ## loop over all subfields to check
897 foreach $subfield (@subfields) {
898 ## [0] contains subfield code
899 if (@$notetuple[0] eq $subfield) {
900 ## [1] contains value, remove trailing separators
901 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
902 @$notetuple[1] =~ s% *[,;.:/]*$%%;
903 if (length(@$notetuple[1]) > 0) {
904 ## add to list
905 push @{$aref}, @$notetuple[1];
907 last;
915 ##********************************************************************
916 ## print_abstract(): prints abstract fields
917 ## Arguments: list of fields (520)
918 ##********************************************************************
919 sub print_abstract {
920 # ToDo: take care of repeatable subfields
921 my(@abfields) = @_;
923 ## we check the following subfields
924 my @subfields = ('a', 'b');
926 ## we generate a list for all useful strings
927 my @abstrings;
929 ## loop over all abfields
930 foreach $abfield (@abfields) {
931 foreach $field (@subfields) {
932 if (length ($abfield->subfield($field)) > 0) {
933 my $ab = $abfield->subfield($field);
935 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
937 ## strip trailing separators
938 $ab =~ s% *[;,:./]*$%%;
940 ## add string to the list
941 push (@abstrings, $ab);
946 my $allabs = join "; ", @abstrings;
948 if (length($allabs) > 0) {
949 print "N2 - ", &charconv($allabs), "\r\n";
956 ##********************************************************************
957 ## charconv(): converts to a different charset based on a global var
958 ## Arguments: string
959 ## Returns: string
960 ##********************************************************************
961 sub charconv {
962 if ($utf) {
963 ## return unaltered if already utf-8
964 return @_;
966 elsif ($uniout eq "t") {
967 ## convert to utf-8
968 return marc8_to_utf8("@_");
970 else {
971 ## return unaltered if no utf-8 requested
972 return @_;