Bug 16011: $VERSION - Remove the $VERSION init
[koha.git] / C4 / Ris.pm
blobe158cabcf8114cddd3575db4621190060c51031b
1 package C4::Ris;
3 # Original script :
4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5 ## See comments below for compliance with other MARC dialects
6 ##
7 ## usage: perl marc2ris < infile.marc > outfile.ris
8 ##
9 ## Dependencies: perl 5.6.0 or later
10 ## MARC::Record
11 ## MARC::Charset
13 ## markus@mhoenicka.de 2002-11-16
15 ## This program is free software; you can redistribute it and/or modify
16 ## it under the terms of the GNU General Public License as published by
17 ## the Free Software Foundation; either version 2 of the License, or
18 ## (at your option) any later version.
19 ##
20 ## This program is distributed in the hope that it will be useful,
21 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ## GNU General Public License for more details.
25 ## You should have received a copy of the GNU General Public License
26 ## along with this program; if not, write to the Free Software
27 ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 ## Some background about MARC as understood by this script
30 ## The default input format used in this script is MARC21, which
31 ## superseded USMARC and CANMARC. The specification can be found at:
32 ## http://lcweb.loc.gov/marc/
33 ## UNIMARC follows the specification at:
34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35 ## UKMARC support is a bit shaky because there is no specification available
36 ## for free. The wisdom used in this script was taken from a PDF document
37 ## comparing UKMARC to MARC21 found at:
38 ## www.bl.uk/services/bibliographic/marcchange.pdf
41 # Modified 2008 by BibLibre for Koha
42 # Modified 2011 by Catalyst
43 # Modified 2011 by Equinox Software, Inc.
45 # This file is part of Koha.
47 # Koha is free software; you can redistribute it and/or modify it
48 # under the terms of the GNU General Public License as published by
49 # the Free Software Foundation; either version 3 of the License, or
50 # (at your option) any later version.
52 # Koha is distributed in the hope that it will be useful, but
53 # WITHOUT ANY WARRANTY; without even the implied warranty of
54 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
55 # GNU General Public License for more details.
57 # You should have received a copy of the GNU General Public License
58 # along with Koha; if not, see <http://www.gnu.org/licenses>.
62 use Modern::Perl;
64 use List::MoreUtils qw/uniq/;
65 use vars qw(@ISA @EXPORT);
67 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
68 use Koha::SimpleMARC qw(read_field);
70 # set the version for version checking
72 @ISA = qw(Exporter);
74 # only export API methods
76 @EXPORT = qw(
77 &marc2ris
80 our $utf;
81 our $intype;
82 our $marcprint;
83 our $protoyear;
86 =head1 marc2bibtex - Convert from UNIMARC to RIS
88 my ($ris) = marc2ris($record);
90 Returns a RIS scalar
92 C<$record> - a MARC::Record object
94 =cut
96 sub marc2ris {
97 my ($record) = @_;
98 my $output;
100 my $marcflavour = C4::Context->preference("marcflavour");
101 $intype = lc($marcflavour);
102 my $marcprint = 0; # Debug flag;
104 # Let's redirect stdout
105 open my $oldout, ">&STDOUT";
106 my $outvar;
107 close STDOUT;
108 open STDOUT,'>:encoding(utf8)', \$outvar;
110 ## First we should check the character encoding. This may be
111 ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
112 ## by 'a' at position 09 (zero-based) of the leader
113 my $leader = $record->leader();
114 if ( $intype eq "marc21" ) {
115 if ( $leader =~ /^.{9}a/ ) {
116 print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
117 $utf = 1;
119 else {
120 print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
123 ## else: other MARC formats do not specify the character encoding
124 ## we assume it's *not* UTF-8
126 my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
127 my $ris_additional_fields;
128 if ($RisExportAdditionalFields) {
129 $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
130 $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
131 if ($@) {
132 warn "Unable to parse RisExportAdditionalFields : $@";
133 $ris_additional_fields = undef;
137 ## start RIS dataset
138 if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
139 my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
140 my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
141 if ($type) {
142 print "TY - $type\r\n";
144 else {
145 &print_typetag($leader);
148 else {
149 &print_typetag($leader);
152 ## retrieve all author fields and collect them in a list
153 my @author_fields;
155 if ($intype eq "unimarc") {
156 ## Fields 700, 701, and 702 can contain author names
157 @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
159 else { ## marc21, ukmarc
160 ## Field 100 sometimes carries main author
161 ## Field(s) 700 carry added entries - personal names
162 @author_fields = ($record->field('100'), $record->field('700'));
165 ## loop over all author fields
166 foreach my $field (@author_fields) {
167 if (length($field)) {
168 my $author = &get_author($field);
169 print "AU - ",&charconv($author),"\r\n";
173 # ToDo: should we specify anonymous as author if we didn't find
174 # one? or use one of the corporate/meeting names below?
176 ## add corporate names or meeting names as editors ??
177 my @editor_fields;
179 if ($intype eq "unimarc") {
180 ## Fields 710, 711, and 712 can carry corporate names
181 ## Field(s) 720, 721, 722, 730 have additional candidates
182 @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
184 else { ## marc21, ukmarc
185 ## Fields 110 and 111 carry the main entries - corporate name and
186 ## meeting name, respectively
187 ## Field(s) 710, 711 carry added entries - personal names
188 @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
191 ## loop over all editor fields
192 foreach my $field (@editor_fields) {
193 if (length($field)) {
194 my $editor = &get_editor($field);
195 print "ED - ",&charconv($editor),"\r\n";
199 ## get info from the title field
200 if ($intype eq "unimarc") {
201 &print_title($record->field('200'));
203 else { ## marc21, ukmarc
204 &print_title($record->field('245'));
207 ## series title
208 if ($intype eq "unimarc") {
209 &print_stitle($record->field('225'));
211 else { ## marc21, ukmarc
212 &print_stitle($record->field('490'));
215 ## ISBN/ISSN
216 if ($intype eq "unimarc") {
217 &print_isbn($record->field('010'));
218 &print_issn($record->field('011'));
220 elsif ($intype eq "ukmarc") {
221 &print_isbn($record->field('021'));
222 ## this is just an assumption
223 &print_issn($record->field('022'));
225 else { ## assume marc21
226 &print_isbn($record->field('020'));
227 &print_issn($record->field('022'));
230 if ($intype eq "marc21") {
231 &print_loc_callno($record->field('050'));
232 &print_dewey($record->field('082'));
234 ## else: unimarc, ukmarc do not seem to store call numbers?
236 ## publication info
237 if ($intype eq "unimarc") {
238 &print_pubinfo($record->field('210'));
240 else { ## marc21, ukmarc
241 if ($record->field('264')) {
242 &print_pubinfo($record->field('264'));
244 else {
245 &print_pubinfo($record->field('260'));
249 ## 6XX fields contain KW candidates. We add all of them to a
251 my @field_list;
252 if ($intype eq "unimarc") {
253 @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
254 } elsif ($intype eq "ukmarc") {
255 @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
256 } else { ## assume marc21
257 @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
260 my @kwpool;
261 for my $f ( @field_list ) {
262 my @fields = $record->field($f);
263 push @kwpool, ( get_keywords("$f",$record->field($f)) );
266 # Remove duplicate
267 @kwpool = uniq @kwpool;
269 for my $kw ( @kwpool ) {
270 print "KW - ", &charconv($kw), "\r\n";
273 ## 5XX have various candidates for notes and abstracts. We pool
274 ## all notes-like stuff in one list.
275 my @notepool;
277 ## these fields have notes candidates
278 if ($intype eq "unimarc") {
279 foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
280 &pool_subx(\@notepool, $_, $record->field($_));
283 elsif ($intype eq "ukmarc") {
284 foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
285 &pool_subx(\@notepool, $_, $record->field($_));
288 else { ## assume marc21
289 foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
290 &pool_subx(\@notepool, $_, $record->field($_));
294 my $allnotes = join "; ", @notepool;
296 if (length($allnotes) > 0) {
297 print "N1 - ", &charconv($allnotes), "\r\n";
300 ## 320/520 have the abstract
301 if ($intype eq "unimarc") {
302 &print_abstract($record->field('320'));
304 elsif ($intype eq "ukmarc") {
305 &print_abstract($record->field('512'), $record->field('513'));
307 else { ## assume marc21
308 &print_abstract($record->field('520'));
311 # 856u has the URI
312 if ($record->field('856')) {
313 print_uri($record->field('856'));
316 if ($ris_additional_fields) {
317 foreach my $ris_tag ( keys %$ris_additional_fields ) {
318 next if $ris_tag eq 'TY';
320 my @fields =
321 ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
322 ? @{ $ris_additional_fields->{$ris_tag} }
323 : $ris_additional_fields->{$ris_tag};
325 for my $tag (@fields) {
326 my ( $f, $sf ) = split( /\$/, $tag );
327 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
328 foreach my $v (@values) {
329 print "$ris_tag - $v\r\n";
335 ## end RIS dataset
336 print "ER - \r\n";
338 # Let's re-redirect stdout
339 close STDOUT;
340 open STDOUT, ">&", $oldout;
342 return $outvar;
347 ##********************************************************************
348 ## print_typetag(): prints the first line of a RIS dataset including
349 ## the preceding newline
350 ## Argument: the leader of a MARC dataset
351 ## Returns: the value at leader position 06
352 ##********************************************************************
353 sub print_typetag {
354 my ($leader)= @_;
355 ## the keys of typehash are the allowed values at position 06
356 ## of the leader of a MARC record, the values are the RIS types
357 ## that might appropriately represent these types.
358 my %ustypehash = (
359 "a" => "BOOK",
360 "c" => "MUSIC",
361 "d" => "MUSIC",
362 "e" => "MAP",
363 "f" => "MAP",
364 "g" => "ADVS",
365 "i" => "SOUND",
366 "j" => "SOUND",
367 "k" => "ART",
368 "m" => "DATA",
369 "o" => "GEN",
370 "p" => "GEN",
371 "r" => "ART",
372 "t" => "GEN",
375 my %unitypehash = (
376 "a" => "BOOK",
377 "b" => "BOOK",
378 "c" => "MUSIC",
379 "d" => "MUSIC",
380 "e" => "MAP",
381 "f" => "MAP",
382 "g" => "ADVS",
383 "i" => "SOUND",
384 "j" => "SOUND",
385 "k" => "ART",
386 "l" => "ELEC",
387 "m" => "ADVS",
388 "r" => "ART",
391 ## The type of a MARC record is found at position 06 of the leader
392 my $typeofrecord = substr($leader, 6, 1);
394 ## ToDo: for books, field 008 positions 24-27 might have a few more
395 ## hints
397 my %typehash;
399 ## the ukmarc here is just a guess
400 if ($intype eq "marc21" || $intype eq "ukmarc") {
401 %typehash = %ustypehash;
403 elsif ($intype eq "unimarc") {
404 %typehash = %unitypehash;
406 else {
407 ## assume MARC21 as default
408 %typehash = %ustypehash;
411 if (!exists $typehash{$typeofrecord}) {
412 print "TY - BOOK\r\n"; ## most reasonable default
413 warn ("no type found - assume BOOK") if $marcprint;
415 else {
416 print "TY - $typehash{$typeofrecord}\r\n";
419 ## use $typeofrecord as the return value, just in case
420 $typeofrecord;
423 ##********************************************************************
424 ## normalize_author(): normalizes an authorname
425 ## Arguments: authorname subfield a
426 ## authorname subfield b
427 ## authorname subfield c
428 ## name type if known: 0=direct order
429 ## 1=only surname or full name in
430 ## inverted order
431 ## 3=family, clan, dynasty name
432 ## Returns: the normalized authorname
433 ##********************************************************************
434 sub normalize_author {
435 my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
437 if ($nametype == 0) {
438 # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
439 warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
440 return $rawauthora;
442 elsif ($nametype == 1) {
443 ## start munging subfield a (the real name part)
444 ## remove spaces after separators
445 $rawauthora =~ s%([,.]+) *%$1%g;
447 ## remove trailing separators after spaces
448 $rawauthora =~ s% *[,;:/]*$%%;
450 ## remove periods after a non-abbreviated name
451 $rawauthora =~ s%(\w{2,})\.%$1%g;
453 ## start munging subfield b (something like the suffix)
454 ## remove trailing separators after spaces
455 $rawauthorb =~ s% *[,;:/]*$%%;
457 ## we currently ignore subfield c until someone complains
458 if (length($rawauthorb) > 0) {
459 return join ", ", ($rawauthora, $rawauthorb);
461 else {
462 return $rawauthora;
465 elsif ($nametype == 3) {
466 return $rawauthora;
470 ##********************************************************************
471 ## get_author(): gets authorname info from MARC fields 100, 700
472 ## Argument: field (100 or 700)
473 ## Returns: an author string in the format found in the record
474 ##********************************************************************
475 sub get_author {
476 my ($authorfield) = @_;
477 my ($indicator);
479 ## the sequence of the name parts is encoded either in indicator
480 ## 1 (marc21) or 2 (unimarc)
481 if ($intype eq "unimarc") {
482 $indicator = 2;
484 else { ## assume marc21
485 $indicator = 1;
488 print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
489 print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
490 print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
491 print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
492 print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
493 if ($intype eq "ukmarc") {
494 my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
495 normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
497 else {
498 normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
502 ##********************************************************************
503 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
504 ## Argument: field (110, 111, 710, or 711)
505 ## Returns: an author string in the format found in the record
506 ##********************************************************************
507 sub get_editor {
508 my ($editorfield) = @_;
510 if (!$editorfield) {
511 return;
513 else {
514 print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
515 print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
516 print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
517 return $editorfield->subfield('a');
521 ##********************************************************************
522 ## print_title(): gets info from MARC field 245
523 ## Arguments: field (245)
524 ## Returns:
525 ##********************************************************************
526 sub print_title {
527 my ($titlefield) = @_;
528 if (!$titlefield) {
529 print "<marc>empty title field (245)\r\n" if $marcprint;
530 warn("empty title field (245)") if $marcprint;
532 else {
533 print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
534 print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
535 print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
537 ## The title is usually written in a very odd notation. The title
538 ## proper ($a) often ends with a space followed by a separator like
539 ## a slash or a colon. The subtitle ($b) doesn't start with a space
540 ## so simple concatenation looks odd. We have to conditionally remove
541 ## the separator and make sure there's a space between title and
542 ## subtitle
544 my $clean_title = $titlefield->subfield('a');
546 my $clean_subtitle = $titlefield->subfield('b');
547 $clean_subtitle ||= q{};
548 $clean_title =~ s% *[/:;.]$%%;
549 $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
551 if (length($clean_title) > 0
552 || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
553 print "TI - ", &charconv($clean_title);
555 ## subfield $b is relevant only for marc21/ukmarc
556 if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
557 print ": ",&charconv($clean_subtitle);
559 print "\r\n";
562 ## The statement of responsibility is just this: horrors. There is
563 ## no formal definition how authors, editors and the like should
564 ## be written and designated. The field is free-form and resistant
565 ## to all parsing efforts, so this information is lost on me
569 ##********************************************************************
570 ## print_stitle(): prints info from series title field
571 ## Arguments: field
572 ## Returns:
573 ##********************************************************************
574 sub print_stitle {
575 my ($titlefield) = @_;
577 if (!$titlefield) {
578 print "<marc>empty series title field\r\n" if $marcprint;
580 else {
581 print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
582 my $clean_title = $titlefield->subfield('a');
584 $clean_title =~ s% *[/:;.]$%%;
586 if (length($clean_title) > 0) {
587 print "T2 - ", &charconv($clean_title),"\r\n";
590 if ($intype eq "unimarc") {
591 print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
592 if (length($titlefield->subfield('v')) > 0) {
593 print "VL - ", &charconv($titlefield->subfield('v')),"\r\n";
599 ##********************************************************************
600 ## print_isbn(): gets info from MARC field 020
601 ## Arguments: field (020)
602 ##********************************************************************
603 sub print_isbn {
604 my($isbnfield) = @_;
606 if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
607 print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
608 warn("no isbn found") if $marcprint;
610 else {
611 if (length ($isbnfield->subfield('a')) < 10) {
612 print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
613 warn("truncated isbn") if $marcprint;
616 my $isbn = $isbnfield->subfield('a');
617 print "SN - ", &charconv($isbn), "\r\n";
621 ##********************************************************************
622 ## print_issn(): gets info from MARC field 022
623 ## Arguments: field (022)
624 ##********************************************************************
625 sub print_issn {
626 my($issnfield) = @_;
628 if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
629 print "<marc>no issn found (022\$a)\r\n" if $marcprint;
630 warn("no issn found") if $marcprint;
632 else {
633 if (length ($issnfield->subfield('a')) < 9) {
634 print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
635 warn("truncated issn") if $marcprint;
638 my $issn = substr($issnfield->subfield('a'), 0, 9);
639 print "SN - ", &charconv($issn), "\r\n";
644 # print_uri() prints info from 856 u
646 sub print_uri {
647 my @f856s = @_;
649 foreach my $f856 (@f856s) {
650 if (my $uri = $f856->subfield('u')) {
651 print "UR - ", charconv($uri), "\r\n";
656 ##********************************************************************
657 ## print_loc_callno(): gets info from MARC field 050
658 ## Arguments: field (050)
659 ##********************************************************************
660 sub print_loc_callno {
661 my($callnofield) = @_;
663 if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
664 print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
665 warn("no LOC call number found") if $marcprint;
667 else {
668 print "AV - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
672 ##********************************************************************
673 ## print_dewey(): gets info from MARC field 082
674 ## Arguments: field (082)
675 ##********************************************************************
676 sub print_dewey {
677 my($deweyfield) = @_;
679 if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
680 print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
681 warn("no Dewey number found") if $marcprint;
683 else {
684 print "U1 - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
688 ##********************************************************************
689 ## print_pubinfo(): gets info from MARC field 260
690 ## Arguments: field (260)
691 ##********************************************************************
692 sub print_pubinfo {
693 my($pubinfofield) = @_;
695 if (!$pubinfofield) {
696 print "<marc>no publication information found (260/264)\r\n" if $marcprint;
697 warn("no publication information found") if $marcprint;
699 else {
700 ## the following information is available in MARC21:
701 ## $a place -> CY
702 ## $b publisher -> PB
703 ## $c date -> PY
704 ## the corresponding subfields for UNIMARC:
705 ## $a place -> CY
706 ## $c publisher -> PB
707 ## $d date -> PY
709 ## all of them are repeatable. We pool all places into a
710 ## comma-separated list in CY. We also pool all publishers
711 ## into a comma-separated list in PB. We break the rule with
712 ## the date field because this wouldn't make much sense. In
713 ## this case, we use the first occurrence for PY, the second
714 ## for Y2, and ignore the rest
716 my @pubsubfields = $pubinfofield->subfields();
717 my @cities;
718 my @publishers;
719 my $pycounter = 0;
721 my $pubsub_place;
722 my $pubsub_publisher;
723 my $pubsub_date;
725 if ($intype eq "unimarc") {
726 $pubsub_place = "a";
727 $pubsub_publisher = "c";
728 $pubsub_date = "d";
730 else { ## assume marc21
731 $pubsub_place = "a";
732 $pubsub_publisher = "b";
733 $pubsub_date = "c";
736 ## loop over all subfield list entries
737 for my $tuple (@pubsubfields) {
738 ## each tuple consists of the subfield code and the value
739 if (@$tuple[0] eq $pubsub_place) {
740 ## strip any trailing crap
741 $_ = @$tuple[1];
742 s% *[,;:/]$%%;
743 ## pool all occurrences in a list
744 push (@cities, $_);
746 elsif (@$tuple[0] eq $pubsub_publisher) {
747 ## strip any trailing crap
748 $_ = @$tuple[1];
749 s% *[,;:/]$%%;
750 ## pool all occurrences in a list
751 push (@publishers, $_);
753 elsif (@$tuple[0] eq $pubsub_date) {
754 ## the dates are free-form, so we want to extract
755 ## a four-digit year and leave the rest as
756 ## "other info"
757 $protoyear = @$tuple[1];
758 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
760 ## strip any separator chars at the end
761 $protoyear =~ s% *[\.;:/]*$%%;
763 ## isolate a four-digit year. We discard anything
764 ## preceding the year, but keep everything after
765 ## the year as other info.
766 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
768 ## check what we've got. If there is no four-digit
769 ## year, make it up. If digits are replaced by '-',
770 ## replace those with 0s
772 if (index($protoyear, "/") == 4) {
773 ## have year info
774 ## replace all '-' in the four-digit year
775 ## by '0'
776 substr($protoyear,0,4) =~ s!-!0!g;
778 else {
779 ## have no year info
780 print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
781 $protoyear = "0000///$protoyear";
782 warn("no four-digit year found, use 0000") if $marcprint;
785 if ($pycounter == 0 && length($protoyear)) {
786 print "PY - $protoyear\r\n";
788 elsif ($pycounter == 1 && length($_)) {
789 print "Y2 - $protoyear\r\n";
791 ## else: discard
793 ## else: discard
796 ## now dump the collected CY and PB lists
797 if (@cities > 0) {
798 print "CY - ", &charconv(join(", ", @cities)), "\r\n";
800 if (@publishers > 0) {
801 print "PB - ", &charconv(join(", ", @publishers)), "\r\n";
806 ##********************************************************************
807 ## get_keywords(): prints info from MARC fields 6XX
808 ## Arguments: list of fields (6XX)
809 ##********************************************************************
810 sub get_keywords {
811 my($fieldname, @keywords) = @_;
813 my @kw;
814 ## a list of all possible subfields
815 my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
817 ## loop over all 6XX fields
818 foreach my $kwfield (@keywords) {
819 if ($kwfield != undef) {
820 ## authornames get special treatment
821 if ($fieldname eq "600") {
822 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
823 push @kw, $val;
824 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
826 else {
827 ## retrieve all available subfields
828 my @kwsubfields = $kwfield->subfields();
830 ## loop over all available subfield tuples
831 foreach my $kwtuple (@kwsubfields) {
832 ## loop over all subfields to check
833 foreach my $subfield (@subfields) {
834 ## [0] contains subfield code
835 if (@$kwtuple[0] eq $subfield) {
836 ## [1] contains value, remove trailing separators
837 @$kwtuple[1] =~ s% *[,;.:/]*$%%;
838 if (length(@$kwtuple[1]) > 0) {
839 push @kw, @$kwtuple[1];
840 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
842 ## we can leave the subfields loop here
843 last;
850 return @kw;
853 ##********************************************************************
854 ## pool_subx(): adds contents of several subfields to a list
855 ## Arguments: reference to a list
856 ## field name
857 ## list of fields (5XX)
858 ##********************************************************************
859 sub pool_subx {
860 my($aref, $fieldname, @notefields) = @_;
862 ## we use a list that contains the interesting subfields
863 ## for each field
864 # ToDo: this is apparently correct only for marc21
865 my @subfields;
867 if ($fieldname eq "500") {
868 @subfields = ('a');
870 elsif ($fieldname eq "501") {
871 @subfields = ('a');
873 elsif ($fieldname eq "502") {
874 @subfields = ('a');
876 elsif ($fieldname eq "504") {
877 @subfields = ('a', 'b');
879 elsif ($fieldname eq "505") {
880 @subfields = ('a', 'g', 'r', 't', 'u');
882 elsif ($fieldname eq "506") {
883 @subfields = ('a', 'b', 'c', 'd', 'e');
885 elsif ($fieldname eq "507") {
886 @subfields = ('a', 'b');
888 elsif ($fieldname eq "508") {
889 @subfields = ('a');
891 elsif ($fieldname eq "510") {
892 @subfields = ('a', 'b', 'c', 'x', '3');
894 elsif ($fieldname eq "511") {
895 @subfields = ('a');
897 elsif ($fieldname eq "513") {
898 @subfields = ('a', 'b');
900 elsif ($fieldname eq "514") {
901 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
903 elsif ($fieldname eq "515") {
904 @subfields = ('a');
906 elsif ($fieldname eq "516") {
907 @subfields = ('a');
909 elsif ($fieldname eq "518") {
910 @subfields = ('a', '3');
912 elsif ($fieldname eq "521") {
913 @subfields = ('a', 'b', '3');
915 elsif ($fieldname eq "522") {
916 @subfields = ('a');
918 elsif ($fieldname eq "524") {
919 @subfields = ('a', '2', '3');
921 elsif ($fieldname eq "525") {
922 @subfields = ('a');
924 elsif ($fieldname eq "526") {
925 @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
927 elsif ($fieldname eq "530") {
928 @subfields = ('a', 'b', 'c', 'd', 'u', '3');
930 elsif ($fieldname eq "533") {
931 @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
933 elsif ($fieldname eq "534") {
934 @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
936 elsif ($fieldname eq "535") {
937 @subfields = ('a', 'b', 'c', 'd', 'g', '3');
940 ## loop over all notefields
941 foreach my $notefield (@notefields) {
942 if (defined $notefield) {
943 ## retrieve all available subfield tuples
944 my @notesubfields = $notefield->subfields();
946 ## loop over all subfield tuples
947 foreach my $notetuple (@notesubfields) {
948 ## loop over all subfields to check
949 foreach my $subfield (@subfields) {
950 ## [0] contains subfield code
951 if (@$notetuple[0] eq $subfield) {
952 ## [1] contains value, remove trailing separators
953 print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
954 @$notetuple[1] =~ s% *[,;.:/]*$%%;
955 if (length(@$notetuple[1]) > 0) {
956 ## add to list
957 push @{$aref}, @$notetuple[1];
959 last;
967 ##********************************************************************
968 ## print_abstract(): prints abstract fields
969 ## Arguments: list of fields (520)
970 ##********************************************************************
971 sub print_abstract {
972 # ToDo: take care of repeatable subfields
973 my(@abfields) = @_;
975 ## we check the following subfields
976 my @subfields = ('a', 'b');
978 ## we generate a list for all useful strings
979 my @abstrings;
981 ## loop over all abfields
982 foreach my $abfield (@abfields) {
983 foreach my $field (@subfields) {
984 if ( length( $abfield->subfield($field) ) > 0 ) {
985 my $ab = $abfield->subfield($field);
987 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
989 ## strip trailing separators
990 $ab =~ s% *[;,:./]*$%%;
992 ## add string to the list
993 push( @abstrings, $ab );
998 my $allabs = join "; ", @abstrings;
1000 if (length($allabs) > 0) {
1001 print "N2 - ", &charconv($allabs), "\r\n";
1008 ##********************************************************************
1009 ## charconv(): converts to a different charset based on a global var
1010 ## Arguments: string
1011 ## Returns: string
1012 ##********************************************************************
1013 sub charconv {
1014 if ($utf) {
1015 ## return unaltered if already utf-8
1016 return @_;
1018 elsif (my $uniout eq "t") {
1019 ## convert to utf-8
1020 return marc8_to_utf8("@_");
1022 else {
1023 ## return unaltered if no utf-8 requested
1024 return @_;