C4/Ris.pm

   1 package C4::Ris;
   2
   3 # Original script :
   4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
   5 ##           See comments below for compliance with other MARC dialects
   6 ##
   7 ## usage: perl marc2ris < infile.marc > outfile.ris
   8 ##
   9 ## Dependencies: perl 5.6.0 or later
  10 ##               MARC::Record
  11 ##               MARC::Charset
  12 ##
  13 ## markus@mhoenicka.de 2002-11-16
  14
  15 ##   This program is free software; you can redistribute it and/or modify
  16 ##   it under the terms of the GNU General Public License as published by
  17 ##   the Free Software Foundation; either version 2 of the License, or
  18 ##   (at your option) any later version.
  19 ##
  20 ##   This program is distributed in the hope that it will be useful,
  21 ##   but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 ##   GNU General Public License for more details.
  24
  25 ##   You should have received a copy of the GNU General Public License
  26 ##   along with this program; if not, write to the Free Software
  27 ##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  28
  29 ## Some background about MARC as understood by this script
  30 ## The default input format used in this script is MARC21, which
  31 ## superseded USMARC and CANMARC. The specification can be found at:
  32 ## http://lcweb.loc.gov/marc/
  33 ## UNIMARC follows the specification at:
  34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
  35 ## UKMARC support is a bit shaky because there is no specification available
  36 ## for free. The wisdom used in this script was taken from a PDF document
  37 ## comparing UKMARC to MARC21 found at:
  38 ## www.bl.uk/services/bibliographic/marcchange.pdf
  39
  40
  41 # Modified 2008 by BibLibre for Koha
  42 # Modified 2011 by Catalyst
  43 # Modified 2011 by Equinox Software, Inc.
  44 #
  45 # This file is part of Koha.
  46 #
  47 # Koha is free software; you can redistribute it and/or modify it
  48 # under the terms of the GNU General Public License as published by
  49 # the Free Software Foundation; either version 3 of the License, or
  50 # (at your option) any later version.
  51 #
  52 # Koha is distributed in the hope that it will be useful, but
  53 # WITHOUT ANY WARRANTY; without even the implied warranty of
  54 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  55 # GNU General Public License for more details.
  56 #
  57 # You should have received a copy of the GNU General Public License
  58 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  59 #
  60 #
  61
  62 use Modern::Perl;
  63
  64 use List::MoreUtils qw/uniq/;
  65 use vars qw($VERSION @ISA @EXPORT);
  66
  67 use C4::Biblio qw(GetMarcSubfieldStructureFromKohaField);
  68 use Koha::SimpleMARC qw(read_field);
  69
  70 # set the version for version checking
  71 $VERSION = 3.07.00.049;
  72
  73 @ISA = qw(Exporter);
  74
  75 # only export API methods
  76
  77 @EXPORT = qw(
  78   &marc2ris
  79 );
  80
  81 our $utf;
  82 our $intype;
  83 our $marcprint;
  84 our $protoyear;
  85
  86
  87 =head1 marc2bibtex - Convert from UNIMARC to RIS
  88
  89   my ($ris) = marc2ris($record);
  90
  91 Returns a RIS scalar
  92
  93 C<$record> - a MARC::Record object
  94
  95 =cut
  96
  97 sub marc2ris {
  98     my ($record) = @_;
  99     my $output;
 100
 101     my $marcflavour = C4::Context->preference("marcflavour");
 102     $intype = lc($marcflavour);
 103     my $marcprint = 0; # Debug flag;
 104
 105     # Let's redirect stdout
 106     open my $oldout, ">&STDOUT";
 107     my $outvar;
 108     close STDOUT;
 109     open STDOUT,'>:encoding(utf8)', \$outvar;
 110
 111     ## First we should check the character encoding. This may be
 112     ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
 113     ## by 'a' at position 09 (zero-based) of the leader
 114     my $leader = $record->leader();
 115     if ( $intype eq "marc21" ) {
 116         if ( $leader =~ /^.{9}a/ ) {
 117             print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
 118             $utf = 1;
 119         }
 120         else {
 121             print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
 122         }
 123     }
 124     ## else: other MARC formats do not specify the character encoding
 125     ## we assume it's *not* UTF-8
 126
 127     my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
 128     my $ris_additional_fields;
 129     if ($RisExportAdditionalFields) {
 130         $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
 131         $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
 132         if ($@) {
 133             warn "Unable to parse RisExportAdditionalFields : $@";
 134             $ris_additional_fields = undef;
 135         }
 136     }
 137
 138     ## start RIS dataset
 139     if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
 140         my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
 141         my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
 142         if ($type) {
 143             print "TY  - $type\r\n";
 144         }
 145         else {
 146             &print_typetag($leader);
 147         }
 148     }
 149     else {
 150         &print_typetag($leader);
 151     }
 152
 153         ## retrieve all author fields and collect them in a list
 154         my @author_fields;
 155
 156         if ($intype eq "unimarc") {
 157             ## Fields 700, 701, and 702 can contain author names
 158             @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
 159         }
 160         else {  ## marc21, ukmarc
 161             ## Field 100 sometimes carries main author
 162             ## Field(s) 700 carry added entries - personal names
 163             @author_fields = ($record->field('100'), $record->field('700'));
 164         }
 165
 166         ## loop over all author fields
 167         foreach my $field (@author_fields) {
 168             if (length($field)) {
 169                 my $author = &get_author($field);
 170                 print "AU  - ",&charconv($author),"\r\n";
 171             }
 172         }
 173
 174         # ToDo: should we specify anonymous as author if we didn't find
 175         # one? or use one of the corporate/meeting names below?
 176
 177         ## add corporate names or meeting names as editors ??
 178         my @editor_fields;
 179
 180         if ($intype eq "unimarc") {
 181             ## Fields 710, 711, and 712 can carry corporate names
 182             ## Field(s) 720, 721, 722, 730 have additional candidates
 183             @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
 184         }
 185         else { ## marc21, ukmarc
 186             ## Fields 110 and 111 carry the main entries - corporate name and
 187             ## meeting name, respectively
 188             ## Field(s) 710, 711 carry added entries - personal names
 189             @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
 190         }
 191
 192         ## loop over all editor fields
 193         foreach my $field (@editor_fields) {
 194             if (length($field)) {
 195                 my $editor = &get_editor($field);
 196                 print "ED  - ",&charconv($editor),"\r\n";
 197             }
 198         }
 199
 200         ## get info from the title field
 201         if ($intype eq "unimarc") {
 202             &print_title($record->field('200'));
 203         }
 204         else { ## marc21, ukmarc
 205             &print_title($record->field('245'));
 206         }
 207
 208         ## series title
 209         if ($intype eq "unimarc") {
 210             &print_stitle($record->field('225'));
 211         }
 212         else { ## marc21, ukmarc
 213             &print_stitle($record->field('490'));
 214         }
 215
 216         ## ISBN/ISSN
 217         if ($intype eq "unimarc") {
 218             &print_isbn($record->field('010'));
 219             &print_issn($record->field('011'));
 220         }
 221         elsif ($intype eq "ukmarc") {
 222             &print_isbn($record->field('021'));
 223             ## this is just an assumption
 224             &print_issn($record->field('022'));
 225         }
 226         else { ## assume marc21
 227             &print_isbn($record->field('020'));
 228             &print_issn($record->field('022'));
 229         }
 230
 231         if ($intype eq "marc21") {
 232             &print_loc_callno($record->field('050'));
 233             &print_dewey($record->field('082'));
 234         }
 235         ## else: unimarc, ukmarc do not seem to store call numbers?
 236
 237         ## publication info
 238         if ($intype eq "unimarc") {
 239             &print_pubinfo($record->field('210'));
 240         }
 241         else { ## marc21, ukmarc
 242             if ($record->field('264')) {
 243                  &print_pubinfo($record->field('264'));
 244             }
 245             else {
 246             &print_pubinfo($record->field('260'));
 247             }
 248         }
 249
 250         ## 6XX fields contain KW candidates. We add all of them to a
 251
 252     my @field_list;
 253     if ($intype eq "unimarc") {
 254         @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
 255     } elsif ($intype eq "ukmarc") {
 256         @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
 257     } else { ## assume marc21
 258         @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
 259     }
 260
 261     my @kwpool;
 262     for my $f ( @field_list ) {
 263         my @fields = $record->field($f);
 264         push @kwpool, ( get_keywords("$f",$record->field($f)) );
 265     }
 266
 267     # Remove duplicate
 268     @kwpool = uniq @kwpool;
 269
 270     for my $kw ( @kwpool ) {
 271         print "KW  - ", &charconv($kw), "\r\n";
 272     }
 273
 274         ## 5XX have various candidates for notes and abstracts. We pool
 275         ## all notes-like stuff in one list.
 276         my @notepool;
 277
 278         ## these fields have notes candidates
 279         if ($intype eq "unimarc") {
 280             foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
 281                 &pool_subx(\@notepool, $_, $record->field($_));
 282             }
 283         }
 284         elsif ($intype eq "ukmarc") {
 285             foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
 286                 &pool_subx(\@notepool, $_, $record->field($_));
 287         }
 288         }
 289         else { ## assume marc21
 290             foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
 291                 &pool_subx(\@notepool, $_, $record->field($_));
 292             }
 293         }
 294
 295         my $allnotes = join "; ", @notepool;
 296
 297         if (length($allnotes) > 0) {
 298             print "N1  - ", &charconv($allnotes), "\r\n";
 299         }
 300
 301         ## 320/520 have the abstract
 302         if ($intype eq "unimarc") {
 303             &print_abstract($record->field('320'));
 304         }
 305         elsif ($intype eq "ukmarc") {
 306             &print_abstract($record->field('512'), $record->field('513'));
 307         }
 308         else { ## assume marc21
 309             &print_abstract($record->field('520'));
 310         }
 311
 312     # 856u has the URI
 313     if ($record->field('856')) {
 314         print_uri($record->field('856'));
 315     }
 316
 317     if ($ris_additional_fields) {
 318         foreach my $ris_tag ( keys %$ris_additional_fields ) {
 319             next if $ris_tag eq 'TY';
 320
 321             my @fields =
 322               ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
 323               ? @{ $ris_additional_fields->{$ris_tag} }
 324               : $ris_additional_fields->{$ris_tag};
 325
 326             for my $tag (@fields) {
 327                 my ( $f, $sf ) = split( /\$/, $tag );
 328                 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
 329                 foreach my $v (@values) {
 330                     print "$ris_tag  - $v\r\n";
 331                 }
 332             }
 333         }
 334     }
 335
 336         ## end RIS dataset
 337         print "ER  - \r\n";
 338
 339     # Let's re-redirect stdout
 340     close STDOUT;
 341     open STDOUT, ">&", $oldout;
 342
 343     return $outvar;
 344
 345 }
 346
 347
 348 ##********************************************************************
 349 ## print_typetag(): prints the first line of a RIS dataset including
 350 ## the preceding newline
 351 ## Argument: the leader of a MARC dataset
 352 ## Returns: the value at leader position 06
 353 ##********************************************************************
 354 sub print_typetag {
 355   my ($leader)= @_;
 356     ## the keys of typehash are the allowed values at position 06
 357     ## of the leader of a MARC record, the values are the RIS types
 358     ## that might appropriately represent these types.
 359     my %ustypehash = (
 360                     "a" => "BOOK",
 361                     "c" => "MUSIC",
 362                     "d" => "MUSIC",
 363                     "e" => "MAP",
 364                     "f" => "MAP",
 365                     "g" => "ADVS",
 366                     "i" => "SOUND",
 367                     "j" => "SOUND",
 368                     "k" => "ART",
 369                     "m" => "DATA",
 370                     "o" => "GEN",
 371                     "p" => "GEN",
 372                     "r" => "ART",
 373                     "t" => "GEN",
 374                 );
 375
 376     my %unitypehash = (
 377                     "a" => "BOOK",
 378                     "b" => "BOOK",
 379                     "c" => "MUSIC",
 380                     "d" => "MUSIC",
 381                     "e" => "MAP",
 382                     "f" => "MAP",
 383                     "g" => "ADVS",
 384                     "i" => "SOUND",
 385                     "j" => "SOUND",
 386                     "k" => "ART",
 387                     "l" => "ELEC",
 388                     "m" => "ADVS",
 389                     "r" => "ART",
 390                 );
 391
 392     ## The type of a MARC record is found at position 06 of the leader
 393     my $typeofrecord = defined($leader) && length $leader >=6 ?
 394                        substr($leader, 6, 1): undef;
 395
 396     ## ToDo: for books, field 008 positions 24-27 might have a few more
 397     ## hints
 398
 399     my %typehash;
 400
 401     ## the ukmarc here is just a guess
 402     if (! defined $intype) {
 403         ## assume MARC21 as default
 404         %typehash = %ustypehash;
 405     }
 406     elsif ($intype eq "marc21" || $intype eq "ukmarc") {
 407         %typehash = %ustypehash;
 408     }
 409     elsif ($intype eq "unimarc") {
 410         %typehash = %unitypehash;
 411     }
 412     else {
 413         ## assume MARC21 as default
 414         %typehash = %ustypehash;
 415     }
 416
 417     if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
 418         print "TY  - BOOK\r\n"; ## most reasonable default
 419         warn ("no type found - assume BOOK") if $marcprint;
 420     }
 421     else {
 422         print "TY  - $typehash{$typeofrecord}\r\n";
 423     }
 424
 425     ## use $typeofrecord as the return value, just in case
 426     $typeofrecord;
 427 }
 428
 429 ##********************************************************************
 430 ## normalize_author(): normalizes an authorname
 431 ## Arguments: authorname subfield a
 432 ##            authorname subfield b
 433 ##            authorname subfield c
 434 ##            name type if known: 0=direct order
 435 ##                               1=only surname or full name in
 436 ##                                 inverted order
 437 ##                               3=family, clan, dynasty name
 438 ## Returns: the normalized authorname
 439 ##********************************************************************
 440 sub normalize_author {
 441     my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
 442
 443     if ($nametype == 0) {
 444         # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
 445         warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
 446         return $rawauthora;
 447     }
 448     elsif ($nametype == 1) {
 449         ## start munging subfield a (the real name part)
 450         ## remove spaces after separators
 451         $rawauthora =~ s%([,.]+) *%$1%g;
 452
 453         ## remove trailing separators after spaces
 454         $rawauthora =~ s% *[,;:/]*$%%;
 455
 456         ## remove periods after a non-abbreviated name
 457         $rawauthora =~ s%(\w{2,})\.%$1%g;
 458
 459         ## start munging subfield b (something like the suffix)
 460         ## remove trailing separators after spaces
 461         $rawauthorb =~ s% *[,;:/]*$%%;
 462
 463         ## we currently ignore subfield c until someone complains
 464         if (length($rawauthorb) > 0) {
 465         return join ", ", ($rawauthora, $rawauthorb);
 466         }
 467         else {
 468             return $rawauthora;
 469         }
 470     }
 471     elsif ($nametype == 3) {
 472         return $rawauthora;
 473     }
 474 }
 475
 476 ##********************************************************************
 477 ## get_author(): gets authorname info from MARC fields 100, 700
 478 ## Argument: field (100 or 700)
 479 ## Returns: an author string in the format found in the record
 480 ##********************************************************************
 481 sub get_author {
 482     my ($authorfield) = @_;
 483     my ($indicator);
 484
 485     ## the sequence of the name parts is encoded either in indicator
 486     ## 1 (marc21) or 2 (unimarc)
 487     if ($intype eq "unimarc") {
 488         $indicator = 2;
 489     }
 490     else { ## assume marc21
 491         $indicator = 1;
 492     }
 493
 494     print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
 495     print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
 496     print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
 497     print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
 498     print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
 499     if ($intype eq "ukmarc") {
 500         my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
 501         normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
 502     }
 503     else {
 504         normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
 505     }
 506 }
 507
 508 ##********************************************************************
 509 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
 510 ## Argument: field (110, 111, 710, or 711)
 511 ## Returns: an author string in the format found in the record
 512 ##********************************************************************
 513 sub get_editor {
 514     my ($editorfield) = @_;
 515
 516     if (!$editorfield) {
 517         return;
 518     }
 519     else {
 520         print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
 521         print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
 522         print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
 523         return $editorfield->subfield('a');
 524     }
 525 }
 526
 527 ##********************************************************************
 528 ## print_title(): gets info from MARC field 245
 529 ## Arguments: field (245)
 530 ## Returns:
 531 ##********************************************************************
 532 sub print_title {
 533     my ($titlefield) = @_;
 534     if (!$titlefield) {
 535         print "<marc>empty title field (245)\r\n" if $marcprint;
 536         warn("empty title field (245)") if $marcprint;
 537     }
 538     else {
 539         print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 540         print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
 541         print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
 542
 543         ## The title is usually written in a very odd notation. The title
 544         ## proper ($a) often ends with a space followed by a separator like
 545         ## a slash or a colon. The subtitle ($b) doesn't start with a space
 546         ## so simple concatenation looks odd. We have to conditionally remove
 547         ## the separator and make sure there's a space between title and
 548         ## subtitle
 549
 550         my $clean_title = $titlefield->subfield('a');
 551
 552         my $clean_subtitle = $titlefield->subfield('b');
 553 $clean_subtitle ||= q{};
 554         $clean_title =~ s% *[/:;.]$%%;
 555         $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
 556
 557         if (length($clean_title) > 0
 558             || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
 559             print "TI  - ", &charconv($clean_title);
 560
 561             ## subfield $b is relevant only for marc21/ukmarc
 562             if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
 563                 print ": ",&charconv($clean_subtitle);
 564             }
 565             print "\r\n";
 566         }
 567
 568         ## The statement of responsibility is just this: horrors. There is
 569         ## no formal definition how authors, editors and the like should
 570         ## be written and designated. The field is free-form and resistant
 571         ## to all parsing efforts, so this information is lost on me
 572     }
 573 }
 574
 575 ##********************************************************************
 576 ## print_stitle(): prints info from series title field
 577 ## Arguments: field
 578 ## Returns:
 579 ##********************************************************************
 580 sub print_stitle {
 581     my ($titlefield) = @_;
 582
 583     if (!$titlefield) {
 584         print "<marc>empty series title field\r\n" if $marcprint;
 585     }
 586     else {
 587         print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 588         my $clean_title = $titlefield->subfield('a');
 589
 590         $clean_title =~ s% *[/:;.]$%%;
 591
 592         if (length($clean_title) > 0) {
 593             print "T2  - ", &charconv($clean_title),"\r\n";
 594         }
 595
 596         if ($intype eq "unimarc") {
 597             print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
 598             if (length($titlefield->subfield('v')) > 0) {
 599                 print "VL  - ", &charconv($titlefield->subfield('v')),"\r\n";
 600             }
 601         }
 602     }
 603 }
 604
 605 ##********************************************************************
 606 ## print_isbn(): gets info from MARC field 020
 607 ## Arguments: field (020)
 608 ##********************************************************************
 609 sub print_isbn {
 610     my($isbnfield) = @_;
 611
 612     if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
 613         print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
 614         warn("no isbn found") if $marcprint;
 615     }
 616     else {
 617         if (length ($isbnfield->subfield('a')) < 10) {
 618             print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
 619             warn("truncated isbn") if $marcprint;
 620         }
 621
 622     my $isbn = $isbnfield->subfield('a');
 623         print "SN  - ", &charconv($isbn), "\r\n";
 624     }
 625 }
 626
 627 ##********************************************************************
 628 ## print_issn(): gets info from MARC field 022
 629 ## Arguments: field (022)
 630 ##********************************************************************
 631 sub print_issn {
 632     my($issnfield) = @_;
 633
 634     if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
 635         print "<marc>no issn found (022\$a)\r\n" if $marcprint;
 636         warn("no issn found") if $marcprint;
 637     }
 638     else {
 639         if (length ($issnfield->subfield('a')) < 9) {
 640             print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
 641             warn("truncated issn") if $marcprint;
 642         }
 643
 644         my $issn = substr($issnfield->subfield('a'), 0, 9);
 645         print "SN  - ", &charconv($issn), "\r\n";
 646     }
 647 }
 648
 649 ###
 650 # print_uri() prints info from 856 u
 651 ###
 652 sub print_uri {
 653     my @f856s = @_;
 654
 655     foreach my $f856 (@f856s) {
 656         if (my $uri = $f856->subfield('u')) {
 657                 print "UR  - ", charconv($uri), "\r\n";
 658         }
 659     }
 660 }
 661
 662 ##********************************************************************
 663 ## print_loc_callno(): gets info from MARC field 050
 664 ## Arguments: field (050)
 665 ##********************************************************************
 666 sub print_loc_callno {
 667     my($callnofield) = @_;
 668
 669     if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
 670         print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
 671         warn("no LOC call number found") if $marcprint;
 672     }
 673     else {
 674         print "AV  - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
 675     }
 676 }
 677
 678 ##********************************************************************
 679 ## print_dewey(): gets info from MARC field 082
 680 ## Arguments: field (082)
 681 ##********************************************************************
 682 sub print_dewey {
 683     my($deweyfield) = @_;
 684
 685     if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
 686         print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
 687         warn("no Dewey number found") if $marcprint;
 688     }
 689     else {
 690         print "U1  - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
 691     }
 692 }
 693
 694 ##********************************************************************
 695 ## print_pubinfo(): gets info from MARC field 260
 696 ## Arguments: field (260)
 697 ##********************************************************************
 698 sub print_pubinfo {
 699     my($pubinfofield) = @_;
 700
 701     if (!$pubinfofield) {
 702     print "<marc>no publication information found (260/264)\r\n" if $marcprint;
 703         warn("no publication information found") if $marcprint;
 704     }
 705     else {
 706         ## the following information is available in MARC21:
 707         ## $a place -> CY
 708         ## $b publisher -> PB
 709         ## $c date -> PY
 710         ## the corresponding subfields for UNIMARC:
 711         ## $a place -> CY
 712         ## $c publisher -> PB
 713         ## $d date -> PY
 714
 715         ## all of them are repeatable. We pool all places into a
 716         ## comma-separated list in CY. We also pool all publishers
 717         ## into a comma-separated list in PB.  We break the rule with
 718         ## the date field because this wouldn't make much sense. In
 719         ## this case, we use the first occurrence for PY, the second
 720         ## for Y2, and ignore the rest
 721
 722         my @pubsubfields = $pubinfofield->subfields();
 723         my @cities;
 724         my @publishers;
 725         my $pycounter = 0;
 726
 727         my $pubsub_place;
 728         my $pubsub_publisher;
 729         my $pubsub_date;
 730
 731         if ($intype eq "unimarc") {
 732             $pubsub_place = "a";
 733             $pubsub_publisher = "c";
 734             $pubsub_date = "d";
 735         }
 736         else { ## assume marc21
 737             $pubsub_place = "a";
 738             $pubsub_publisher = "b";
 739             $pubsub_date = "c";
 740         }
 741
 742         ## loop over all subfield list entries
 743         for my $tuple (@pubsubfields) {
 744             ## each tuple consists of the subfield code and the value
 745             if (@$tuple[0] eq $pubsub_place) {
 746                 ## strip any trailing crap
 747                 $_ = @$tuple[1];
 748                 s% *[,;:/]$%%;
 749                 ## pool all occurrences in a list
 750                 push (@cities, $_);
 751             }
 752             elsif (@$tuple[0] eq $pubsub_publisher) {
 753                 ## strip any trailing crap
 754                 $_ = @$tuple[1];
 755                 s% *[,;:/]$%%;
 756                 ## pool all occurrences in a list
 757                 push (@publishers, $_);
 758             }
 759             elsif (@$tuple[0] eq $pubsub_date) {
 760                 ## the dates are free-form, so we want to extract
 761                 ## a four-digit year and leave the rest as
 762                 ## "other info"
 763                 $protoyear = @$tuple[1];
 764                 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
 765
 766                 ## strip any separator chars at the end
 767                 $protoyear =~ s% *[\.;:/]*$%%;
 768
 769                 ## isolate a four-digit year. We discard anything
 770         ## preceding the year, but keep everything after
 771                 ## the year as other info.
 772                 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
 773
 774                 ## check what we've got. If there is no four-digit
 775                 ## year, make it up. If digits are replaced by '-',
 776                 ## replace those with 0s
 777
 778                 if (index($protoyear, "/") == 4) {
 779                     ## have year info
 780                     ## replace all '-' in the four-digit year
 781                     ## by '0'
 782                     substr($protoyear,0,4) =~ s!-!0!g;
 783                 }
 784                 else {
 785                     ## have no year info
 786                     print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
 787                     $protoyear = "0000///$protoyear";
 788                     warn("no four-digit year found, use 0000") if $marcprint;
 789                 }
 790
 791                 if ($pycounter == 0 && length($protoyear)) {
 792                     print "PY  - $protoyear\r\n";
 793                 }
 794                 elsif ($pycounter == 1 && length($_)) {
 795                     print "Y2  - $protoyear\r\n";
 796                 }
 797                 ## else: discard
 798             }
 799             ## else: discard
 800         }
 801
 802         ## now dump the collected CY and PB lists
 803         if (@cities > 0) {
 804             print "CY  - ", &charconv(join(", ", @cities)), "\r\n";
 805         }
 806         if (@publishers > 0) {
 807             print "PB  - ", &charconv(join(", ", @publishers)), "\r\n";
 808         }
 809     }
 810 }
 811
 812 ##********************************************************************
 813 ## get_keywords(): prints info from MARC fields 6XX
 814 ## Arguments: list of fields (6XX)
 815 ##********************************************************************
 816 sub get_keywords {
 817     my($fieldname, @keywords) = @_;
 818
 819     my @kw;
 820     ## a list of all possible subfields
 821     my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
 822
 823     ## loop over all 6XX fields
 824     foreach my $kwfield (@keywords) {
 825         if ($kwfield != undef) {
 826             ## authornames get special treatment
 827             if ($fieldname eq "600") {
 828                 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
 829                 push @kw, $val;
 830                 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
 831             }
 832             else {
 833                 ## retrieve all available subfields
 834                 my @kwsubfields = $kwfield->subfields();
 835
 836                 ## loop over all available subfield tuples
 837                 foreach my $kwtuple (@kwsubfields) {
 838                     ## loop over all subfields to check
 839                     foreach my $subfield (@subfields) {
 840                         ## [0] contains subfield code
 841                         if (@$kwtuple[0] eq $subfield) {
 842                             ## [1] contains value, remove trailing separators
 843                             @$kwtuple[1] =~ s% *[,;.:/]*$%%;
 844                             if (length(@$kwtuple[1]) > 0) {
 845                                 push @kw, @$kwtuple[1];
 846                                 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
 847                             }
 848                             ## we can leave the subfields loop here
 849                             last;
 850                         }
 851                     }
 852                 }
 853             }
 854         }
 855     }
 856     return @kw;
 857 }
 858
 859 ##********************************************************************
 860 ## pool_subx(): adds contents of several subfields to a list
 861 ## Arguments: reference to a list
 862 ##            field name
 863 ##            list of fields (5XX)
 864 ##********************************************************************
 865 sub pool_subx {
 866     my($aref, $fieldname, @notefields) = @_;
 867
 868     ## we use a list that contains the interesting subfields
 869     ## for each field
 870     # ToDo: this is apparently correct only for marc21
 871     my @subfields;
 872
 873     if ($fieldname eq "500") {
 874         @subfields = ('a');
 875     }
 876     elsif ($fieldname eq "501") {
 877         @subfields = ('a');
 878     }
 879     elsif ($fieldname eq "502") {
 880         @subfields = ('a');
 881             }
 882     elsif ($fieldname eq "504") {
 883         @subfields = ('a', 'b');
 884     }
 885     elsif ($fieldname eq "505") {
 886         @subfields = ('a', 'g', 'r', 't', 'u');
 887     }
 888     elsif ($fieldname eq "506") {
 889         @subfields = ('a', 'b', 'c', 'd', 'e');
 890     }
 891     elsif ($fieldname eq "507") {
 892         @subfields = ('a', 'b');
 893     }
 894     elsif ($fieldname eq "508") {
 895         @subfields = ('a');
 896     }
 897     elsif ($fieldname eq "510") {
 898         @subfields = ('a', 'b', 'c', 'x', '3');
 899     }
 900     elsif ($fieldname eq "511") {
 901         @subfields = ('a');
 902     }
 903     elsif ($fieldname eq "513") {
 904         @subfields = ('a', 'b');
 905     }
 906     elsif ($fieldname eq "514") {
 907         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
 908     }
 909     elsif ($fieldname eq "515") {
 910         @subfields = ('a');
 911     }
 912     elsif ($fieldname eq "516") {
 913         @subfields = ('a');
 914     }
 915     elsif ($fieldname eq "518") {
 916         @subfields = ('a', '3');
 917     }
 918     elsif ($fieldname eq "521") {
 919         @subfields = ('a', 'b', '3');
 920     }
 921     elsif ($fieldname eq "522") {
 922         @subfields = ('a');
 923     }
 924     elsif ($fieldname eq "524") {
 925         @subfields = ('a', '2', '3');
 926     }
 927     elsif ($fieldname eq "525") {
 928         @subfields = ('a');
 929     }
 930     elsif ($fieldname eq "526") {
 931         @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
 932     }
 933     elsif ($fieldname eq "530") {
 934         @subfields = ('a', 'b', 'c', 'd', 'u', '3');
 935     }
 936     elsif ($fieldname eq "533") {
 937         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
 938     }
 939     elsif ($fieldname eq "534") {
 940         @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
 941     }
 942     elsif ($fieldname eq "535") {
 943         @subfields = ('a', 'b', 'c', 'd', 'g', '3');
 944     }
 945
 946     ## loop over all notefields
 947     foreach my $notefield (@notefields) {
 948         if (defined $notefield) {
 949             ## retrieve all available subfield tuples
 950             my @notesubfields = $notefield->subfields();
 951
 952             ## loop over all subfield tuples
 953             foreach my $notetuple (@notesubfields) {
 954                 ## loop over all subfields to check
 955                 foreach my $subfield (@subfields) {
 956                     ## [0] contains subfield code
 957                     if (@$notetuple[0] eq $subfield) {
 958                         ## [1] contains value, remove trailing separators
 959                         print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
 960                         @$notetuple[1] =~ s% *[,;.:/]*$%%;
 961                         if (length(@$notetuple[1]) > 0) {
 962                             ## add to list
 963                             push @{$aref}, @$notetuple[1];
 964                         }
 965                         last;
 966                     }
 967                 }
 968             }
 969         }
 970     }
 971 }
 972
 973 ##********************************************************************
 974 ## print_abstract(): prints abstract fields
 975 ## Arguments: list of fields (520)
 976 ##********************************************************************
 977 sub print_abstract {
 978     # ToDo: take care of repeatable subfields
 979     my(@abfields) = @_;
 980
 981     ## we check the following subfields
 982     my @subfields = ('a', 'b');
 983
 984     ## we generate a list for all useful strings
 985     my @abstrings;
 986
 987     ## loop over all abfields
 988     foreach my $abfield (@abfields) {
 989         foreach my $field (@subfields) {
 990             if ( length( $abfield->subfield($field) ) > 0 ) {
 991                 my $ab = $abfield->subfield($field);
 992
 993                 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
 994
 995                 ## strip trailing separators
 996                 $ab =~ s% *[;,:./]*$%%;
 997
 998                 ## add string to the list
 999                 push( @abstrings, $ab );
1000             }
1001         }
1002     }
1003
1004     my $allabs = join "; ", @abstrings;
1005
1006     if (length($allabs) > 0) {
1007         print "N2  - ", &charconv($allabs), "\r\n";
1008     }
1009
1010 }
1011
1012
1013
1014 ##********************************************************************
1015 ## charconv(): converts to a different charset based on a global var
1016 ## Arguments: string
1017 ## Returns: string
1018 ##********************************************************************
1019 sub charconv {
1020     if ($utf) {
1021         ## return unaltered if already utf-8
1022         return @_;
1023     }
1024     elsif (my $uniout eq "t") {
1025         ## convert to utf-8
1026         return marc8_to_utf8("@_");
1027     }
1028     else {
1029         ## return unaltered if no utf-8 requested
1030         return @_;
1031     }
1032 }
1033 1;