C4/Ris.pm

   1 package C4::Ris;
   2
   3 # Original script :
   4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
   5 ##           See comments below for compliance with other MARC dialects
   6 ##
   7 ## usage: perl marc2ris < infile.marc > outfile.ris
   8 ##
   9 ## Dependencies: perl 5.6.0 or later
  10 ##               MARC::Record
  11 ##               MARC::Charset
  12 ##
  13 ## markus@mhoenicka.de 2002-11-16
  14
  15 ##   This program is free software; you can redistribute it and/or modify
  16 ##   it under the terms of the GNU General Public License as published by
  17 ##   the Free Software Foundation; either version 2 of the License, or
  18 ##   (at your option) any later version.
  19 ##
  20 ##   This program is distributed in the hope that it will be useful,
  21 ##   but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 ##   GNU General Public License for more details.
  24
  25 ##   You should have received a copy of the GNU General Public License
  26 ##   along with this program; if not, write to the Free Software
  27 ##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  28
  29 ## Some background about MARC as understood by this script
  30 ## The default input format used in this script is MARC21, which
  31 ## superseded USMARC and CANMARC. The specification can be found at:
  32 ## http://lcweb.loc.gov/marc/
  33 ## UNIMARC follows the specification at:
  34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
  35 ## UKMARC support is a bit shaky because there is no specification available
  36 ## for free. The wisdom used in this script was taken from a PDF document
  37 ## comparing UKMARC to MARC21 found at:
  38 ## www.bl.uk/services/bibliographic/marcchange.pdf
  39
  40
  41 # Modified 2008 by BibLibre for Koha
  42 # Modified 2011 by Catalyst
  43 # Modified 2011 by Equinox Software, Inc.
  44 #
  45 # This file is part of Koha.
  46 #
  47 # Koha is free software; you can redistribute it and/or modify it
  48 # under the terms of the GNU General Public License as published by
  49 # the Free Software Foundation; either version 3 of the License, or
  50 # (at your option) any later version.
  51 #
  52 # Koha is distributed in the hope that it will be useful, but
  53 # WITHOUT ANY WARRANTY; without even the implied warranty of
  54 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  55 # GNU General Public License for more details.
  56 #
  57 # You should have received a copy of the GNU General Public License
  58 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  59 #
  60 #
  61
  62 use Modern::Perl;
  63
  64 use List::MoreUtils qw/uniq/;
  65 use vars qw(@ISA @EXPORT);
  66
  67 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
  68 use Koha::SimpleMARC qw(read_field);
  69
  70 # set the version for version checking
  71
  72 @ISA = qw(Exporter);
  73
  74 # only export API methods
  75
  76 @EXPORT = qw(
  77   &marc2ris
  78 );
  79
  80 our $utf;
  81 our $intype;
  82 our $marcprint;
  83 our $protoyear;
  84
  85
  86 =head1 marc2bibtex - Convert from UNIMARC to RIS
  87
  88   my ($ris) = marc2ris($record);
  89
  90 Returns a RIS scalar
  91
  92 C<$record> - a MARC::Record object
  93
  94 =cut
  95
  96 sub marc2ris {
  97     my ($record) = @_;
  98     my $output;
  99
 100     my $marcflavour = C4::Context->preference("marcflavour");
 101     $intype = lc($marcflavour);
 102     my $marcprint = 0; # Debug flag;
 103
 104     # Let's redirect stdout
 105     open my $oldout, ">&STDOUT";
 106     my $outvar;
 107     close STDOUT;
 108     open STDOUT,'>:encoding(utf8)', \$outvar;
 109
 110     ## First we should check the character encoding. This may be
 111     ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
 112     ## by 'a' at position 09 (zero-based) of the leader
 113     my $leader = $record->leader();
 114     if ( $intype eq "marc21" ) {
 115         if ( $leader =~ /^.{9}a/ ) {
 116             print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
 117             $utf = 1;
 118         }
 119         else {
 120             print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
 121         }
 122     }
 123     ## else: other MARC formats do not specify the character encoding
 124     ## we assume it's *not* UTF-8
 125
 126     my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
 127     my $ris_additional_fields;
 128     if ($RisExportAdditionalFields) {
 129         $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
 130         $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
 131         if ($@) {
 132             warn "Unable to parse RisExportAdditionalFields : $@";
 133             $ris_additional_fields = undef;
 134         }
 135     }
 136
 137     ## start RIS dataset
 138     if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
 139         my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
 140         my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
 141         if ($type) {
 142             print "TY  - $type\r\n";
 143         }
 144         else {
 145             &print_typetag($leader);
 146         }
 147     }
 148     else {
 149         &print_typetag($leader);
 150     }
 151
 152         ## retrieve all author fields and collect them in a list
 153         my @author_fields;
 154
 155         if ($intype eq "unimarc") {
 156             ## Fields 700, 701, and 702 can contain author names
 157             @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
 158         }
 159         else {  ## marc21, ukmarc
 160             ## Field 100 sometimes carries main author
 161             ## Field(s) 700 carry added entries - personal names
 162             @author_fields = ($record->field('100'), $record->field('700'));
 163         }
 164
 165         ## loop over all author fields
 166         foreach my $field (@author_fields) {
 167             if (length($field)) {
 168                 my $author = &get_author($field);
 169                 print "AU  - ",&charconv($author),"\r\n";
 170             }
 171         }
 172
 173         # ToDo: should we specify anonymous as author if we didn't find
 174         # one? or use one of the corporate/meeting names below?
 175
 176         ## add corporate names or meeting names as editors ??
 177         my @editor_fields;
 178
 179         if ($intype eq "unimarc") {
 180             ## Fields 710, 711, and 712 can carry corporate names
 181             ## Field(s) 720, 721, 722, 730 have additional candidates
 182             @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
 183         }
 184         else { ## marc21, ukmarc
 185             ## Fields 110 and 111 carry the main entries - corporate name and
 186             ## meeting name, respectively
 187             ## Field(s) 710, 711 carry added entries - personal names
 188             @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
 189         }
 190
 191         ## loop over all editor fields
 192         foreach my $field (@editor_fields) {
 193             if (length($field)) {
 194                 my $editor = &get_editor($field);
 195                 print "ED  - ",&charconv($editor),"\r\n";
 196             }
 197         }
 198
 199         ## get info from the title field
 200         if ($intype eq "unimarc") {
 201             &print_title($record->field('200'));
 202         }
 203         else { ## marc21, ukmarc
 204             &print_title($record->field('245'));
 205         }
 206
 207         ## series title
 208         if ($intype eq "unimarc") {
 209             &print_stitle($record->field('225'));
 210         }
 211         else { ## marc21, ukmarc
 212             &print_stitle($record->field('490'));
 213         }
 214
 215         ## ISBN/ISSN
 216         if ($intype eq "unimarc") {
 217             &print_isbn($record->field('010'));
 218             &print_issn($record->field('011'));
 219         }
 220         elsif ($intype eq "ukmarc") {
 221             &print_isbn($record->field('021'));
 222             ## this is just an assumption
 223             &print_issn($record->field('022'));
 224         }
 225         else { ## assume marc21
 226             &print_isbn($record->field('020'));
 227             &print_issn($record->field('022'));
 228         }
 229
 230         if ($intype eq "marc21") {
 231             &print_loc_callno($record->field('050'));
 232             &print_dewey($record->field('082'));
 233         }
 234         ## else: unimarc, ukmarc do not seem to store call numbers?
 235
 236         ## publication info
 237         if ($intype eq "unimarc") {
 238             &print_pubinfo($record->field('210'));
 239         }
 240         else { ## marc21, ukmarc
 241             if ($record->field('264')) {
 242                  &print_pubinfo($record->field('264'));
 243             }
 244             else {
 245             &print_pubinfo($record->field('260'));
 246             }
 247         }
 248
 249         ## 6XX fields contain KW candidates. We add all of them to a
 250
 251     my @field_list;
 252     if ($intype eq "unimarc") {
 253         @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
 254     } elsif ($intype eq "ukmarc") {
 255         @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
 256     } else { ## assume marc21
 257         @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
 258     }
 259
 260     my @kwpool;
 261     for my $f ( @field_list ) {
 262         my @fields = $record->field($f);
 263         push @kwpool, ( get_keywords("$f",$record->field($f)) );
 264     }
 265
 266     # Remove duplicate
 267     @kwpool = uniq @kwpool;
 268
 269     for my $kw ( @kwpool ) {
 270         print "KW  - ", &charconv($kw), "\r\n";
 271     }
 272
 273         ## 5XX have various candidates for notes and abstracts. We pool
 274         ## all notes-like stuff in one list.
 275         my @notepool;
 276
 277         ## these fields have notes candidates
 278         if ($intype eq "unimarc") {
 279             foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
 280                 &pool_subx(\@notepool, $_, $record->field($_));
 281             }
 282         }
 283         elsif ($intype eq "ukmarc") {
 284             foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
 285                 &pool_subx(\@notepool, $_, $record->field($_));
 286         }
 287         }
 288         else { ## assume marc21
 289             foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
 290                 &pool_subx(\@notepool, $_, $record->field($_));
 291             }
 292         }
 293
 294         my $allnotes = join "; ", @notepool;
 295
 296         if (length($allnotes) > 0) {
 297             print "N1  - ", &charconv($allnotes), "\r\n";
 298         }
 299
 300         ## 320/520 have the abstract
 301         if ($intype eq "unimarc") {
 302             &print_abstract($record->field('320'));
 303         }
 304         elsif ($intype eq "ukmarc") {
 305             &print_abstract($record->field('512'), $record->field('513'));
 306         }
 307         else { ## assume marc21
 308             &print_abstract($record->field('520'));
 309         }
 310
 311     # 856u has the URI
 312     if ($record->field('856')) {
 313         print_uri($record->field('856'));
 314     }
 315
 316     if ($ris_additional_fields) {
 317         foreach my $ris_tag ( keys %$ris_additional_fields ) {
 318             next if $ris_tag eq 'TY';
 319
 320             my @fields =
 321               ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
 322               ? @{ $ris_additional_fields->{$ris_tag} }
 323               : $ris_additional_fields->{$ris_tag};
 324
 325             for my $tag (@fields) {
 326                 my ( $f, $sf ) = split( /\$/, $tag );
 327                 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
 328                 foreach my $v (@values) {
 329                     print "$ris_tag  - $v\r\n";
 330                 }
 331             }
 332         }
 333     }
 334
 335         ## end RIS dataset
 336         print "ER  - \r\n";
 337
 338     # Let's re-redirect stdout
 339     close STDOUT;
 340     open STDOUT, ">&", $oldout;
 341
 342     return $outvar;
 343
 344 }
 345
 346
 347 ##********************************************************************
 348 ## print_typetag(): prints the first line of a RIS dataset including
 349 ## the preceding newline
 350 ## Argument: the leader of a MARC dataset
 351 ## Returns: the value at leader position 06
 352 ##********************************************************************
 353 sub print_typetag {
 354   my ($leader)= @_;
 355     ## the keys of typehash are the allowed values at position 06
 356     ## of the leader of a MARC record, the values are the RIS types
 357     ## that might appropriately represent these types.
 358     my %ustypehash = (
 359                     "a" => "BOOK",
 360                     "c" => "MUSIC",
 361                     "d" => "MUSIC",
 362                     "e" => "MAP",
 363                     "f" => "MAP",
 364                     "g" => "ADVS",
 365                     "i" => "SOUND",
 366                     "j" => "SOUND",
 367                     "k" => "ART",
 368                     "m" => "DATA",
 369                     "o" => "GEN",
 370                     "p" => "GEN",
 371                     "r" => "ART",
 372                     "t" => "GEN",
 373                 );
 374
 375     my %unitypehash = (
 376                     "a" => "BOOK",
 377                     "b" => "BOOK",
 378                     "c" => "MUSIC",
 379                     "d" => "MUSIC",
 380                     "e" => "MAP",
 381                     "f" => "MAP",
 382                     "g" => "ADVS",
 383                     "i" => "SOUND",
 384                     "j" => "SOUND",
 385                     "k" => "ART",
 386                     "l" => "ELEC",
 387                     "m" => "ADVS",
 388                     "r" => "ART",
 389                 );
 390
 391     ## The type of a MARC record is found at position 06 of the leader
 392     my $typeofrecord = substr($leader, 6, 1);
 393
 394     ## ToDo: for books, field 008 positions 24-27 might have a few more
 395     ## hints
 396
 397     my %typehash;
 398
 399     ## the ukmarc here is just a guess
 400     if ($intype eq "marc21" || $intype eq "ukmarc") {
 401         %typehash = %ustypehash;
 402     }
 403     elsif ($intype eq "unimarc") {
 404         %typehash = %unitypehash;
 405     }
 406     else {
 407         ## assume MARC21 as default
 408         %typehash = %ustypehash;
 409     }
 410
 411     if (!exists $typehash{$typeofrecord}) {
 412         print "TY  - BOOK\r\n"; ## most reasonable default
 413         warn ("no type found - assume BOOK") if $marcprint;
 414     }
 415     else {
 416         print "TY  - $typehash{$typeofrecord}\r\n";
 417     }
 418
 419     ## use $typeofrecord as the return value, just in case
 420     $typeofrecord;
 421 }
 422
 423 ##********************************************************************
 424 ## normalize_author(): normalizes an authorname
 425 ## Arguments: authorname subfield a
 426 ##            authorname subfield b
 427 ##            authorname subfield c
 428 ##            name type if known: 0=direct order
 429 ##                               1=only surname or full name in
 430 ##                                 inverted order
 431 ##                               3=family, clan, dynasty name
 432 ## Returns: the normalized authorname
 433 ##********************************************************************
 434 sub normalize_author {
 435     my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
 436
 437     if ($nametype == 0) {
 438         # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
 439         warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
 440         return $rawauthora;
 441     }
 442     elsif ($nametype == 1) {
 443         ## start munging subfield a (the real name part)
 444         ## remove spaces after separators
 445         $rawauthora =~ s%([,.]+) *%$1%g;
 446
 447         ## remove trailing separators after spaces
 448         $rawauthora =~ s% *[,;:/]*$%%;
 449
 450         ## remove periods after a non-abbreviated name
 451         $rawauthora =~ s%(\w{2,})\.%$1%g;
 452
 453         ## start munging subfield b (something like the suffix)
 454         ## remove trailing separators after spaces
 455         $rawauthorb =~ s% *[,;:/]*$%%;
 456
 457         ## we currently ignore subfield c until someone complains
 458         if (length($rawauthorb) > 0) {
 459         return join ", ", ($rawauthora, $rawauthorb);
 460         }
 461         else {
 462             return $rawauthora;
 463         }
 464     }
 465     elsif ($nametype == 3) {
 466         return $rawauthora;
 467     }
 468 }
 469
 470 ##********************************************************************
 471 ## get_author(): gets authorname info from MARC fields 100, 700
 472 ## Argument: field (100 or 700)
 473 ## Returns: an author string in the format found in the record
 474 ##********************************************************************
 475 sub get_author {
 476     my ($authorfield) = @_;
 477     my ($indicator);
 478
 479     ## the sequence of the name parts is encoded either in indicator
 480     ## 1 (marc21) or 2 (unimarc)
 481     if ($intype eq "unimarc") {
 482         $indicator = 2;
 483     }
 484     else { ## assume marc21
 485         $indicator = 1;
 486     }
 487
 488     print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
 489     print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
 490     print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
 491     print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
 492     print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
 493     if ($intype eq "ukmarc") {
 494         my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
 495         normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
 496     }
 497     else {
 498         normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
 499     }
 500 }
 501
 502 ##********************************************************************
 503 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
 504 ## Argument: field (110, 111, 710, or 711)
 505 ## Returns: an author string in the format found in the record
 506 ##********************************************************************
 507 sub get_editor {
 508     my ($editorfield) = @_;
 509
 510     if (!$editorfield) {
 511         return;
 512     }
 513     else {
 514         print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
 515         print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
 516         print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
 517         return $editorfield->subfield('a');
 518     }
 519 }
 520
 521 ##********************************************************************
 522 ## print_title(): gets info from MARC field 245
 523 ## Arguments: field (245)
 524 ## Returns:
 525 ##********************************************************************
 526 sub print_title {
 527     my ($titlefield) = @_;
 528     if (!$titlefield) {
 529         print "<marc>empty title field (245)\r\n" if $marcprint;
 530         warn("empty title field (245)") if $marcprint;
 531     }
 532     else {
 533         print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 534         print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
 535         print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
 536
 537         ## The title is usually written in a very odd notation. The title
 538         ## proper ($a) often ends with a space followed by a separator like
 539         ## a slash or a colon. The subtitle ($b) doesn't start with a space
 540         ## so simple concatenation looks odd. We have to conditionally remove
 541         ## the separator and make sure there's a space between title and
 542         ## subtitle
 543
 544         my $clean_title = $titlefield->subfield('a');
 545
 546         my $clean_subtitle = $titlefield->subfield('b');
 547 $clean_subtitle ||= q{};
 548         $clean_title =~ s% *[/:;.]$%%;
 549         $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
 550
 551         if (length($clean_title) > 0
 552             || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
 553             print "TI  - ", &charconv($clean_title);
 554
 555             ## subfield $b is relevant only for marc21/ukmarc
 556             if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
 557                 print ": ",&charconv($clean_subtitle);
 558             }
 559             print "\r\n";
 560         }
 561
 562         ## The statement of responsibility is just this: horrors. There is
 563         ## no formal definition how authors, editors and the like should
 564         ## be written and designated. The field is free-form and resistant
 565         ## to all parsing efforts, so this information is lost on me
 566     }
 567 }
 568
 569 ##********************************************************************
 570 ## print_stitle(): prints info from series title field
 571 ## Arguments: field
 572 ## Returns:
 573 ##********************************************************************
 574 sub print_stitle {
 575     my ($titlefield) = @_;
 576
 577     if (!$titlefield) {
 578         print "<marc>empty series title field\r\n" if $marcprint;
 579     }
 580     else {
 581         print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 582         my $clean_title = $titlefield->subfield('a');
 583
 584         $clean_title =~ s% *[/:;.]$%%;
 585
 586         if (length($clean_title) > 0) {
 587             print "T2  - ", &charconv($clean_title),"\r\n";
 588         }
 589
 590         if ($intype eq "unimarc") {
 591             print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
 592             if (length($titlefield->subfield('v')) > 0) {
 593                 print "VL  - ", &charconv($titlefield->subfield('v')),"\r\n";
 594             }
 595         }
 596     }
 597 }
 598
 599 ##********************************************************************
 600 ## print_isbn(): gets info from MARC field 020
 601 ## Arguments: field (020)
 602 ##********************************************************************
 603 sub print_isbn {
 604     my($isbnfield) = @_;
 605
 606     if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
 607         print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
 608         warn("no isbn found") if $marcprint;
 609     }
 610     else {
 611         if (length ($isbnfield->subfield('a')) < 10) {
 612             print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
 613             warn("truncated isbn") if $marcprint;
 614         }
 615
 616     my $isbn = $isbnfield->subfield('a');
 617         print "SN  - ", &charconv($isbn), "\r\n";
 618     }
 619 }
 620
 621 ##********************************************************************
 622 ## print_issn(): gets info from MARC field 022
 623 ## Arguments: field (022)
 624 ##********************************************************************
 625 sub print_issn {
 626     my($issnfield) = @_;
 627
 628     if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
 629         print "<marc>no issn found (022\$a)\r\n" if $marcprint;
 630         warn("no issn found") if $marcprint;
 631     }
 632     else {
 633         if (length ($issnfield->subfield('a')) < 9) {
 634             print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
 635             warn("truncated issn") if $marcprint;
 636         }
 637
 638         my $issn = substr($issnfield->subfield('a'), 0, 9);
 639         print "SN  - ", &charconv($issn), "\r\n";
 640     }
 641 }
 642
 643 ###
 644 # print_uri() prints info from 856 u
 645 ###
 646 sub print_uri {
 647     my @f856s = @_;
 648
 649     foreach my $f856 (@f856s) {
 650         if (my $uri = $f856->subfield('u')) {
 651                 print "UR  - ", charconv($uri), "\r\n";
 652         }
 653     }
 654 }
 655
 656 ##********************************************************************
 657 ## print_loc_callno(): gets info from MARC field 050
 658 ## Arguments: field (050)
 659 ##********************************************************************
 660 sub print_loc_callno {
 661     my($callnofield) = @_;
 662
 663     if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
 664         print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
 665         warn("no LOC call number found") if $marcprint;
 666     }
 667     else {
 668         print "AV  - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
 669     }
 670 }
 671
 672 ##********************************************************************
 673 ## print_dewey(): gets info from MARC field 082
 674 ## Arguments: field (082)
 675 ##********************************************************************
 676 sub print_dewey {
 677     my($deweyfield) = @_;
 678
 679     if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
 680         print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
 681         warn("no Dewey number found") if $marcprint;
 682     }
 683     else {
 684         print "U1  - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
 685     }
 686 }
 687
 688 ##********************************************************************
 689 ## print_pubinfo(): gets info from MARC field 260
 690 ## Arguments: field (260)
 691 ##********************************************************************
 692 sub print_pubinfo {
 693     my($pubinfofield) = @_;
 694
 695     if (!$pubinfofield) {
 696     print "<marc>no publication information found (260/264)\r\n" if $marcprint;
 697         warn("no publication information found") if $marcprint;
 698     }
 699     else {
 700         ## the following information is available in MARC21:
 701         ## $a place -> CY
 702         ## $b publisher -> PB
 703         ## $c date -> PY
 704         ## the corresponding subfields for UNIMARC:
 705         ## $a place -> CY
 706         ## $c publisher -> PB
 707         ## $d date -> PY
 708
 709         ## all of them are repeatable. We pool all places into a
 710         ## comma-separated list in CY. We also pool all publishers
 711         ## into a comma-separated list in PB.  We break the rule with
 712         ## the date field because this wouldn't make much sense. In
 713         ## this case, we use the first occurrence for PY, the second
 714         ## for Y2, and ignore the rest
 715
 716         my @pubsubfields = $pubinfofield->subfields();
 717         my @cities;
 718         my @publishers;
 719         my $pycounter = 0;
 720
 721         my $pubsub_place;
 722         my $pubsub_publisher;
 723         my $pubsub_date;
 724
 725         if ($intype eq "unimarc") {
 726             $pubsub_place = "a";
 727             $pubsub_publisher = "c";
 728             $pubsub_date = "d";
 729         }
 730         else { ## assume marc21
 731             $pubsub_place = "a";
 732             $pubsub_publisher = "b";
 733             $pubsub_date = "c";
 734         }
 735
 736         ## loop over all subfield list entries
 737         for my $tuple (@pubsubfields) {
 738             ## each tuple consists of the subfield code and the value
 739             if (@$tuple[0] eq $pubsub_place) {
 740                 ## strip any trailing crap
 741                 $_ = @$tuple[1];
 742                 s% *[,;:/]$%%;
 743                 ## pool all occurrences in a list
 744                 push (@cities, $_);
 745             }
 746             elsif (@$tuple[0] eq $pubsub_publisher) {
 747                 ## strip any trailing crap
 748                 $_ = @$tuple[1];
 749                 s% *[,;:/]$%%;
 750                 ## pool all occurrences in a list
 751                 push (@publishers, $_);
 752             }
 753             elsif (@$tuple[0] eq $pubsub_date) {
 754                 ## the dates are free-form, so we want to extract
 755                 ## a four-digit year and leave the rest as
 756                 ## "other info"
 757                 $protoyear = @$tuple[1];
 758                 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
 759
 760                 ## strip any separator chars at the end
 761                 $protoyear =~ s% *[\.;:/]*$%%;
 762
 763                 ## isolate a four-digit year. We discard anything
 764         ## preceding the year, but keep everything after
 765                 ## the year as other info.
 766                 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
 767
 768                 ## check what we've got. If there is no four-digit
 769                 ## year, make it up. If digits are replaced by '-',
 770                 ## replace those with 0s
 771
 772                 if (index($protoyear, "/") == 4) {
 773                     ## have year info
 774                     ## replace all '-' in the four-digit year
 775                     ## by '0'
 776                     substr($protoyear,0,4) =~ s!-!0!g;
 777                 }
 778                 else {
 779                     ## have no year info
 780                     print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
 781                     $protoyear = "0000///$protoyear";
 782                     warn("no four-digit year found, use 0000") if $marcprint;
 783                 }
 784
 785                 if ($pycounter == 0 && length($protoyear)) {
 786                     print "PY  - $protoyear\r\n";
 787                 }
 788                 elsif ($pycounter == 1 && length($_)) {
 789                     print "Y2  - $protoyear\r\n";
 790                 }
 791                 ## else: discard
 792             }
 793             ## else: discard
 794         }
 795
 796         ## now dump the collected CY and PB lists
 797         if (@cities > 0) {
 798             print "CY  - ", &charconv(join(", ", @cities)), "\r\n";
 799         }
 800         if (@publishers > 0) {
 801             print "PB  - ", &charconv(join(", ", @publishers)), "\r\n";
 802         }
 803     }
 804 }
 805
 806 ##********************************************************************
 807 ## get_keywords(): prints info from MARC fields 6XX
 808 ## Arguments: list of fields (6XX)
 809 ##********************************************************************
 810 sub get_keywords {
 811     my($fieldname, @keywords) = @_;
 812
 813     my @kw;
 814     ## a list of all possible subfields
 815     my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
 816
 817     ## loop over all 6XX fields
 818     foreach my $kwfield (@keywords) {
 819         if ($kwfield != undef) {
 820             ## authornames get special treatment
 821             if ($fieldname eq "600") {
 822                 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
 823                 push @kw, $val;
 824                 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
 825             }
 826             else {
 827                 ## retrieve all available subfields
 828                 my @kwsubfields = $kwfield->subfields();
 829
 830                 ## loop over all available subfield tuples
 831                 foreach my $kwtuple (@kwsubfields) {
 832                     ## loop over all subfields to check
 833                     foreach my $subfield (@subfields) {
 834                         ## [0] contains subfield code
 835                         if (@$kwtuple[0] eq $subfield) {
 836                             ## [1] contains value, remove trailing separators
 837                             @$kwtuple[1] =~ s% *[,;.:/]*$%%;
 838                             if (length(@$kwtuple[1]) > 0) {
 839                                 push @kw, @$kwtuple[1];
 840                                 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
 841                             }
 842                             ## we can leave the subfields loop here
 843                             last;
 844                         }
 845                     }
 846                 }
 847             }
 848         }
 849     }
 850     return @kw;
 851 }
 852
 853 ##********************************************************************
 854 ## pool_subx(): adds contents of several subfields to a list
 855 ## Arguments: reference to a list
 856 ##            field name
 857 ##            list of fields (5XX)
 858 ##********************************************************************
 859 sub pool_subx {
 860     my($aref, $fieldname, @notefields) = @_;
 861
 862     ## we use a list that contains the interesting subfields
 863     ## for each field
 864     # ToDo: this is apparently correct only for marc21
 865     my @subfields;
 866
 867     if ($fieldname eq "500") {
 868         @subfields = ('a');
 869     }
 870     elsif ($fieldname eq "501") {
 871         @subfields = ('a');
 872     }
 873     elsif ($fieldname eq "502") {
 874         @subfields = ('a');
 875             }
 876     elsif ($fieldname eq "504") {
 877         @subfields = ('a', 'b');
 878     }
 879     elsif ($fieldname eq "505") {
 880         @subfields = ('a', 'g', 'r', 't', 'u');
 881     }
 882     elsif ($fieldname eq "506") {
 883         @subfields = ('a', 'b', 'c', 'd', 'e');
 884     }
 885     elsif ($fieldname eq "507") {
 886         @subfields = ('a', 'b');
 887     }
 888     elsif ($fieldname eq "508") {
 889         @subfields = ('a');
 890     }
 891     elsif ($fieldname eq "510") {
 892         @subfields = ('a', 'b', 'c', 'x', '3');
 893     }
 894     elsif ($fieldname eq "511") {
 895         @subfields = ('a');
 896     }
 897     elsif ($fieldname eq "513") {
 898         @subfields = ('a', 'b');
 899     }
 900     elsif ($fieldname eq "514") {
 901         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
 902     }
 903     elsif ($fieldname eq "515") {
 904         @subfields = ('a');
 905     }
 906     elsif ($fieldname eq "516") {
 907         @subfields = ('a');
 908     }
 909     elsif ($fieldname eq "518") {
 910         @subfields = ('a', '3');
 911     }
 912     elsif ($fieldname eq "521") {
 913         @subfields = ('a', 'b', '3');
 914     }
 915     elsif ($fieldname eq "522") {
 916         @subfields = ('a');
 917     }
 918     elsif ($fieldname eq "524") {
 919         @subfields = ('a', '2', '3');
 920     }
 921     elsif ($fieldname eq "525") {
 922         @subfields = ('a');
 923     }
 924     elsif ($fieldname eq "526") {
 925         @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
 926     }
 927     elsif ($fieldname eq "530") {
 928         @subfields = ('a', 'b', 'c', 'd', 'u', '3');
 929     }
 930     elsif ($fieldname eq "533") {
 931         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
 932     }
 933     elsif ($fieldname eq "534") {
 934         @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
 935     }
 936     elsif ($fieldname eq "535") {
 937         @subfields = ('a', 'b', 'c', 'd', 'g', '3');
 938     }
 939
 940     ## loop over all notefields
 941     foreach my $notefield (@notefields) {
 942         if (defined $notefield) {
 943             ## retrieve all available subfield tuples
 944             my @notesubfields = $notefield->subfields();
 945
 946             ## loop over all subfield tuples
 947             foreach my $notetuple (@notesubfields) {
 948                 ## loop over all subfields to check
 949                 foreach my $subfield (@subfields) {
 950                     ## [0] contains subfield code
 951                     if (@$notetuple[0] eq $subfield) {
 952                         ## [1] contains value, remove trailing separators
 953                         print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
 954                         @$notetuple[1] =~ s% *[,;.:/]*$%%;
 955                         if (length(@$notetuple[1]) > 0) {
 956                             ## add to list
 957                             push @{$aref}, @$notetuple[1];
 958                         }
 959                         last;
 960                     }
 961                 }
 962             }
 963         }
 964     }
 965 }
 966
 967 ##********************************************************************
 968 ## print_abstract(): prints abstract fields
 969 ## Arguments: list of fields (520)
 970 ##********************************************************************
 971 sub print_abstract {
 972     # ToDo: take care of repeatable subfields
 973     my(@abfields) = @_;
 974
 975     ## we check the following subfields
 976     my @subfields = ('a', 'b');
 977
 978     ## we generate a list for all useful strings
 979     my @abstrings;
 980
 981     ## loop over all abfields
 982     foreach my $abfield (@abfields) {
 983         foreach my $field (@subfields) {
 984             if ( length( $abfield->subfield($field) ) > 0 ) {
 985                 my $ab = $abfield->subfield($field);
 986
 987                 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
 988
 989                 ## strip trailing separators
 990                 $ab =~ s% *[;,:./]*$%%;
 991
 992                 ## add string to the list
 993                 push( @abstrings, $ab );
 994             }
 995         }
 996     }
 997
 998     my $allabs = join "; ", @abstrings;
 999
1000     if (length($allabs) > 0) {
1001         print "N2  - ", &charconv($allabs), "\r\n";
1002     }
1003
1004 }
1005
1006
1007
1008 ##********************************************************************
1009 ## charconv(): converts to a different charset based on a global var
1010 ## Arguments: string
1011 ## Returns: string
1012 ##********************************************************************
1013 sub charconv {
1014     if ($utf) {
1015         ## return unaltered if already utf-8
1016         return @_;
1017     }
1018     elsif (my $uniout eq "t") {
1019         ## convert to utf-8
1020         return marc8_to_utf8("@_");
1021     }
1022     else {
1023         ## return unaltered if no utf-8 requested
1024         return @_;
1025     }
1026 }
1027 1;