C4/Ris.pm

   1 package C4::Ris;
   2
   3 # Original script :
   4 ## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
   5 ##           See comments below for compliance with other MARC dialects
   6 ##
   7 ## usage: perl marc2ris < infile.marc > outfile.ris
   8 ##
   9 ## Dependencies: perl 5.6.0 or later
  10 ##               MARC::Record
  11 ##               MARC::Charset
  12 ##
  13 ## markus@mhoenicka.de 2002-11-16
  14
  15 ##   This program is free software; you can redistribute it and/or modify
  16 ##   it under the terms of the GNU General Public License as published by
  17 ##   the Free Software Foundation; either version 2 of the License, or
  18 ##   (at your option) any later version.
  19 ##
  20 ##   This program is distributed in the hope that it will be useful,
  21 ##   but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 ##   GNU General Public License for more details.
  24
  25 ##   You should have received a copy of the GNU General Public License
  26 ##   along with this program; if not, write to the Free Software
  27 ##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  28
  29 ## Some background about MARC as understood by this script
  30 ## The default input format used in this script is MARC21, which
  31 ## superseded USMARC and CANMARC. The specification can be found at:
  32 ## http://lcweb.loc.gov/marc/
  33 ## UNIMARC follows the specification at:
  34 ## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
  35 ## UKMARC support is a bit shaky because there is no specification available
  36 ## for free. The wisdom used in this script was taken from a PDF document
  37 ## comparing UKMARC to MARC21 found at:
  38 ## www.bl.uk/services/bibliographic/marcchange.pdf
  39
  40
  41 # Modified 2008 by BibLibre for Koha
  42 # Modified 2011 by Catalyst
  43 # Modified 2011 by Equinox Software, Inc.
  44 # Modified 2016 by Universidad de El Salvador
  45 #
  46 # This file is part of Koha.
  47 #
  48 # Koha is free software; you can redistribute it and/or modify it
  49 # under the terms of the GNU General Public License as published by
  50 # the Free Software Foundation; either version 3 of the License, or
  51 # (at your option) any later version.
  52 #
  53 # Koha is distributed in the hope that it will be useful, but
  54 # WITHOUT ANY WARRANTY; without even the implied warranty of
  55 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  56 # GNU General Public License for more details.
  57 #
  58 # You should have received a copy of the GNU General Public License
  59 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  60 #
  61 #
  62
  63 use Modern::Perl;
  64
  65 use List::MoreUtils qw/uniq/;
  66 use vars qw(@ISA @EXPORT);
  67
  68 use Koha::SimpleMARC qw(read_field);
  69
  70
  71 @ISA = qw(Exporter);
  72
  73 # only export API methods
  74
  75 @EXPORT = qw(
  76   &marc2ris
  77 );
  78
  79 our $marcprint = 0; # Debug flag;
  80
  81 =head1 marc2bibtex - Convert from UNIMARC to RIS
  82
  83   my ($ris) = marc2ris($record);
  84
  85 Returns a RIS scalar
  86
  87 C<$record> - a MARC::Record object
  88
  89 =cut
  90
  91 sub marc2ris {
  92     my ($record) = @_;
  93
  94     my $marcflavour = C4::Context->preference("marcflavour");
  95     my $intype = lc($marcflavour);
  96
  97     # Let's redirect stdout
  98     open my $oldout, ">&STDOUT";
  99     my $outvar;
 100     close STDOUT;
 101     open STDOUT,'>:encoding(utf8)', \$outvar;
 102
 103     ## First we should check the character encoding. This may be
 104     ## MARC-8 or UTF-8. The former is indicated by a blank, the latter
 105     ## by 'a' at position 09 (zero-based) of the leader
 106     my $leader = $record->leader();
 107     if ( $intype eq "marc21" ) {
 108         if ( $leader =~ /^.{9}a/ ) {
 109             print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
 110         }
 111         else {
 112             print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
 113         }
 114     }
 115     ## else: other MARC formats do not specify the character encoding
 116     ## we assume it's *not* UTF-8
 117
 118     my $RisExportAdditionalFields = C4::Context->preference('RisExportAdditionalFields');
 119     my $ris_additional_fields;
 120     if ($RisExportAdditionalFields) {
 121         $RisExportAdditionalFields = "$RisExportAdditionalFields\n\n";
 122         $ris_additional_fields = eval { YAML::Load($RisExportAdditionalFields); };
 123         if ($@) {
 124             warn "Unable to parse RisExportAdditionalFields : $@";
 125             $ris_additional_fields = undef;
 126         }
 127     }
 128
 129     ## start RIS dataset
 130     if ( $ris_additional_fields && $ris_additional_fields->{TY} ) {
 131         my ( $f, $sf ) = split( /\$/, $ris_additional_fields->{TY} );
 132         my ( $type ) = read_field( { record => $record, field => $f, subfield => $sf, field_numbers => [1] } );
 133         if ($type) {
 134             print "TY  - $type\r\n";
 135         }
 136         else {
 137             &print_typetag($leader);
 138         }
 139     }
 140     else {
 141         &print_typetag($leader);
 142     }
 143
 144         ## retrieve all author fields and collect them in a list
 145         my @author_fields;
 146
 147         if ($intype eq "unimarc") {
 148             ## Fields 700, 701, and 702 can contain author names
 149             @author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
 150         }
 151         else {  ## marc21, ukmarc
 152             ## Field 100 sometimes carries main author
 153             ## Field(s) 700 carry added entries - personal names
 154             @author_fields = ($record->field('100'), $record->field('700'));
 155         }
 156
 157         ## loop over all author fields
 158         foreach my $field (@author_fields) {
 159             if (length($field)) {
 160                 my $author = &get_author($field);
 161                 print "AU  - ",$author,"\r\n";
 162             }
 163         }
 164
 165         # ToDo: should we specify anonymous as author if we didn't find
 166         # one? or use one of the corporate/meeting names below?
 167
 168         ## add corporate names or meeting names as editors ??
 169         my @editor_fields;
 170
 171         if ($intype eq "unimarc") {
 172             ## Fields 710, 711, and 712 can carry corporate names
 173             ## Field(s) 720, 721, 722, 730 have additional candidates
 174             @editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
 175         }
 176         else { ## marc21, ukmarc
 177             ## Fields 110 and 111 carry the main entries - corporate name and
 178             ## meeting name, respectively
 179             ## Field(s) 710, 711 carry added entries - personal names
 180             @editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
 181         }
 182
 183         ## loop over all editor fields
 184         foreach my $field (@editor_fields) {
 185             if (length($field)) {
 186                 my $editor = &get_editor($field);
 187                 print "ED  - ",$editor,"\r\n";
 188             }
 189         }
 190
 191         ## get info from the title field
 192         if ($intype eq "unimarc") {
 193             &print_title($record->field('200'));
 194         }
 195         else { ## marc21, ukmarc
 196             &print_title($record->field('245'));
 197         }
 198
 199         ## series title
 200         if ($intype eq "unimarc") {
 201             &print_stitle($record->field('225'));
 202         }
 203         else { ## marc21, ukmarc
 204             &print_stitle($record->field('490'));
 205         }
 206
 207         ## ISBN/ISSN
 208         if ($intype eq "unimarc") {
 209             &print_isbn($record->field('010'));
 210             &print_issn($record->field('011'));
 211         }
 212         elsif ($intype eq "ukmarc") {
 213             &print_isbn($record->field('021'));
 214             ## this is just an assumption
 215             &print_issn($record->field('022'));
 216         }
 217         else { ## assume marc21
 218             &print_isbn($record->field('020'));
 219             &print_issn($record->field('022'));
 220         }
 221
 222         if ($intype eq "marc21") {
 223             &print_loc_callno($record->field('050'));
 224             &print_dewey($record->field('082'));
 225         }
 226         ## else: unimarc, ukmarc do not seem to store call numbers?
 227
 228         ## publication info
 229         if ($intype eq "unimarc") {
 230             &print_pubinfo($record->field('210'));
 231         }
 232         else { ## marc21, ukmarc
 233             if ($record->field('264')) {
 234                  &print_pubinfo($record->field('264'));
 235             }
 236             else {
 237             &print_pubinfo($record->field('260'));
 238             }
 239         }
 240
 241         ## 6XX fields contain KW candidates. We add all of them to a
 242
 243     my @field_list;
 244     if ($intype eq "unimarc") {
 245         @field_list = ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660', '661', '670', '675', '676', '680', '686');
 246     } elsif ($intype eq "ukmarc") {
 247         @field_list = ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695');
 248     } else { ## assume marc21
 249         @field_list = ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658');
 250     }
 251
 252     my @kwpool;
 253     for my $f ( @field_list ) {
 254         my @fields = $record->field($f);
 255         push @kwpool, ( get_keywords("$f",$record->field($f)) );
 256     }
 257
 258     # Remove duplicate
 259     @kwpool = uniq @kwpool;
 260
 261     for my $kw ( @kwpool ) {
 262         print "KW  - ", $kw, "\r\n";
 263     }
 264
 265         ## 5XX have various candidates for notes and abstracts. We pool
 266         ## all notes-like stuff in one list.
 267         my @notepool;
 268
 269         ## these fields have notes candidates
 270         if ($intype eq "unimarc") {
 271             foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
 272                 &pool_subx(\@notepool, $_, $record->field($_));
 273             }
 274         }
 275         elsif ($intype eq "ukmarc") {
 276             foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
 277                 &pool_subx(\@notepool, $_, $record->field($_));
 278         }
 279         }
 280         else { ## assume marc21
 281             foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
 282                 &pool_subx(\@notepool, $_, $record->field($_));
 283             }
 284         }
 285
 286         my $allnotes = join "; ", @notepool;
 287
 288         if (length($allnotes) > 0) {
 289             print "N1  - ", $allnotes, "\r\n";
 290         }
 291
 292         ## 320/520 have the abstract
 293         if ($intype eq "unimarc") {
 294             &print_abstract($record->field('320'));
 295         }
 296         elsif ($intype eq "ukmarc") {
 297             &print_abstract($record->field('512'), $record->field('513'));
 298         }
 299         else { ## assume marc21
 300             &print_abstract($record->field('520'));
 301         }
 302
 303     # 856u has the URI
 304     if ($record->field('856')) {
 305         print_uri($record->field('856'));
 306     }
 307
 308     if ($ris_additional_fields) {
 309         foreach my $ris_tag ( keys %$ris_additional_fields ) {
 310             next if $ris_tag eq 'TY';
 311
 312             my @fields =
 313               ref( $ris_additional_fields->{$ris_tag} ) eq 'ARRAY'
 314               ? @{ $ris_additional_fields->{$ris_tag} }
 315               : $ris_additional_fields->{$ris_tag};
 316
 317             for my $tag (@fields) {
 318                 my ( $f, $sf ) = split( /\$/, $tag );
 319                 my @values = read_field( { record => $record, field => $f, subfield => $sf } );
 320                 foreach my $v (@values) {
 321                     print "$ris_tag  - $v\r\n";
 322                 }
 323             }
 324         }
 325     }
 326
 327         ## end RIS dataset
 328         print "ER  - \r\n";
 329
 330     # Let's re-redirect stdout
 331     close STDOUT;
 332     open STDOUT, ">&", $oldout;
 333
 334     return $outvar;
 335
 336 }
 337
 338
 339 ##********************************************************************
 340 ## print_typetag(): prints the first line of a RIS dataset including
 341 ## the preceding newline
 342 ## Argument: the leader of a MARC dataset
 343 ## Returns: the value at leader position 06
 344 ##********************************************************************
 345 sub print_typetag {
 346   my ($leader)= @_;
 347     ## the keys of typehash are the allowed values at position 06
 348     ## of the leader of a MARC record, the values are the RIS types
 349     ## that might appropriately represent these types.
 350     my %ustypehash = (
 351             "a" => "BOOK",
 352             "c" => "MUSIC",
 353             "d" => "MUSIC",
 354             "e" => "MAP",
 355             "f" => "MAP",
 356             "g" => "ADVS",
 357             "i" => "SOUND",
 358             "j" => "SOUND",
 359             "k" => "ART",
 360             "m" => "DATA",
 361             "o" => "GEN",
 362             "p" => "GEN",
 363             "r" => "ART",
 364             "t" => "MANSCPT",
 365             );
 366
 367     my %unitypehash = (
 368             "a" => "BOOK",
 369             "b" => "MANSCPT",
 370             "c" => "MUSIC",
 371             "d" => "MUSIC",
 372             "e" => "MAP",
 373             "f" => "MAP",
 374             "g" => "ADVS",
 375             "i" => "SOUND",
 376             "j" => "SOUND",
 377             "k" => "ART",
 378             "l" => "ELEC",
 379             "m" => "GEN",
 380             "r" => "ART",
 381             );
 382
 383     ## The type of a MARC record is found at position 06 of the leader
 384     my $typeofrecord = defined($leader) && length $leader >=6 ?
 385                        substr($leader, 6, 1): undef;
 386     ## Pos 07 == Bibliographic level
 387     my $biblevel = defined($leader) && length $leader >=7 ?
 388                        substr($leader, 7, 1): '';
 389
 390     ## TODO: for books, field 008 positions 24-27 might have a few more
 391     ## hints
 392
 393     my %typehash;
 394     my $marcflavour = C4::Context->preference("marcflavour");
 395     my $intype = lc($marcflavour);
 396     if ($intype eq "unimarc") {
 397         %typehash = %unitypehash;
 398     }
 399     else {
 400         %typehash = %ustypehash;
 401     }
 402
 403     if (!defined $typeofrecord || !exists $typehash{$typeofrecord}) {
 404         print "TY  - GEN\r\n"; ## most reasonable default
 405         warn ("no type found - assume GEN") if $marcprint;
 406     } elsif ( $typeofrecord =~ "a" ) {
 407         if ( $biblevel eq 'a' ) {
 408             print "TY  - GEN\r\n"; ## monographic component part
 409         } elsif ( $biblevel eq 'b' || $biblevel eq 's' ) {
 410             print "TY  - SER\r\n"; ## serial or serial component part
 411         } elsif ( $biblevel eq 'm' ) {
 412             print "TY  - $typehash{$typeofrecord}\r\n"; ## book
 413         } elsif ( $biblevel eq 'c' || $biblevel eq 'd' ) {
 414             print "TY  - GEN\r\n"; ## collections, part of collections or made-up collections
 415         } elsif ( $biblevel eq 'i' ) {
 416             print "TY  - DATA\r\n"; ## updating loose-leafe as Dataset
 417         }
 418     } else {
 419         print "TY  - $typehash{$typeofrecord}\r\n";
 420     }
 421
 422     ## use $typeofrecord as the return value, just in case
 423     $typeofrecord;
 424 }
 425
 426 ##********************************************************************
 427 ## normalize_author(): normalizes an authorname
 428 ## Arguments: authorname subfield a
 429 ##            authorname subfield b
 430 ##            authorname subfield c
 431 ##            name type if known: 0=direct order
 432 ##                               1=only surname or full name in
 433 ##                                 inverted order
 434 ##                               3=family, clan, dynasty name
 435 ## Returns: the normalized authorname
 436 ##********************************************************************
 437 sub normalize_author {
 438     my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
 439
 440     if ($nametype == 0) {
 441         # ToDo: convert every input to Last[,(F.|First)[ (M.|Middle)[,Suffix]]]
 442         warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
 443         return $rawauthora;
 444     }
 445     elsif ($nametype == 1) {
 446         ## start munging subfield a (the real name part)
 447         ## remove spaces after separators
 448         $rawauthora =~ s%([,.]+) *%$1%g;
 449
 450         ## remove trailing separators after spaces
 451         $rawauthora =~ s% *[,;:/]*$%%;
 452
 453         ## remove periods after a non-abbreviated name
 454         $rawauthora =~ s%(\w{2,})\.%$1%g;
 455
 456         ## start munging subfield b (something like the suffix)
 457         ## remove trailing separators after spaces
 458         $rawauthorb =~ s% *[,;:/]*$%%;
 459
 460         ## we currently ignore subfield c until someone complains
 461         if (length($rawauthorb) > 0) {
 462         return join ", ", ($rawauthora, $rawauthorb);
 463         }
 464         else {
 465             return $rawauthora;
 466         }
 467     }
 468     elsif ($nametype == 3) {
 469         return $rawauthora;
 470     }
 471 }
 472
 473 ##********************************************************************
 474 ## get_author(): gets authorname info from MARC fields 100, 700
 475 ## Argument: field (100 or 700)
 476 ## Returns: an author string in the format found in the record
 477 ##********************************************************************
 478 sub get_author {
 479     my ($authorfield) = @_;
 480     my ($indicator);
 481
 482     ## the sequence of the name parts is encoded either in indicator
 483     ## 1 (marc21) or 2 (unimarc)
 484     my $marcflavour = C4::Context->preference("marcflavour");
 485     my $intype = lc($marcflavour);
 486     if ($intype eq "unimarc") {
 487         $indicator = 2;
 488     }
 489     else { ## assume marc21
 490         $indicator = 1;
 491     }
 492
 493     print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
 494     print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
 495     print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
 496     print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
 497     print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
 498     if ($intype eq "ukmarc") {
 499         my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
 500         normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
 501     }
 502     else {
 503         normalize_author($authorfield->subfield('a') // '', $authorfield->subfield('b') // '', $authorfield->subfield('c') // '', $authorfield->indicator("$indicator"));
 504     }
 505 }
 506
 507 ##********************************************************************
 508 ## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
 509 ## Argument: field (110, 111, 710, or 711)
 510 ## Returns: an author string in the format found in the record
 511 ##********************************************************************
 512 sub get_editor {
 513     my ($editorfield) = @_;
 514
 515     if (!$editorfield) {
 516         return;
 517     }
 518     else {
 519         print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
 520         print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
 521         print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
 522         return $editorfield->subfield('a');
 523     }
 524 }
 525
 526 ##********************************************************************
 527 ## print_title(): gets info from MARC field 245
 528 ## Arguments: field (245)
 529 ## Returns:
 530 ##********************************************************************
 531 sub print_title {
 532     my ($titlefield) = @_;
 533     if (!$titlefield) {
 534         print "<marc>empty title field (245)\r\n" if $marcprint;
 535         warn("empty title field (245)") if $marcprint;
 536     }
 537     else {
 538         print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 539         print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
 540         print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
 541
 542         ## The title is usually written in a very odd notation. The title
 543         ## proper ($a) often ends with a space followed by a separator like
 544         ## a slash or a colon. The subtitle ($b) doesn't start with a space
 545         ## so simple concatenation looks odd. We have to conditionally remove
 546         ## the separator and make sure there's a space between title and
 547         ## subtitle
 548
 549         my $clean_title = $titlefield->subfield('a');
 550
 551         my $clean_subtitle = $titlefield->subfield('b');
 552 $clean_subtitle ||= q{};
 553         $clean_title =~ s% *[/:;.]$%%;
 554         $clean_subtitle =~ s%^ *(.*) *[/:;.]$%$1%;
 555
 556     my $marcflavour = C4::Context->preference("marcflavour");
 557     my $intype = lc($marcflavour);
 558         if (length($clean_title) > 0
 559             || (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
 560             print "TI  - ", $clean_title;
 561
 562             ## subfield $b is relevant only for marc21/ukmarc
 563             if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
 564                 print ": ",$clean_subtitle;
 565             }
 566             print "\r\n";
 567         }
 568
 569         ## The statement of responsibility is just this: horrors. There is
 570         ## no formal definition how authors, editors and the like should
 571         ## be written and designated. The field is free-form and resistant
 572         ## to all parsing efforts, so this information is lost on me
 573     }
 574     return;
 575 }
 576
 577 ##********************************************************************
 578 ## print_stitle(): prints info from series title field
 579 ## Arguments: field
 580 ## Returns:
 581 ##********************************************************************
 582 sub print_stitle {
 583     my ($titlefield) = @_;
 584
 585     if (!$titlefield) {
 586         print "<marc>empty series title field\r\n" if $marcprint;
 587     }
 588     else {
 589         print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
 590         my $clean_title = $titlefield->subfield('a');
 591
 592         $clean_title =~ s% *[/:;.]$%%;
 593
 594         if (length($clean_title) > 0) {
 595             print "T2  - ", $clean_title,"\r\n";
 596         }
 597
 598     my $marcflavour = C4::Context->preference("marcflavour");
 599     my $intype = lc($marcflavour);
 600         if ($intype eq "unimarc") {
 601             print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
 602             if (length($titlefield->subfield('v')) > 0) {
 603                 print "VL  - ", $titlefield->subfield('v'),"\r\n";
 604             }
 605         }
 606     }
 607     return;
 608 }
 609
 610 ##********************************************************************
 611 ## print_isbn(): gets info from MARC field 020
 612 ## Arguments: field (020)
 613 ##********************************************************************
 614 sub print_isbn {
 615     my($isbnfield) = @_;
 616
 617     if (!$isbnfield || length ($isbnfield->subfield('a')) == 0) {
 618         print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
 619         warn("no isbn found") if $marcprint;
 620     }
 621     else {
 622         if (length ($isbnfield->subfield('a')) < 10) {
 623             print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
 624             warn("truncated isbn") if $marcprint;
 625         }
 626
 627     my $isbn = $isbnfield->subfield('a');
 628         print "SN  - ", $isbn, "\r\n";
 629     }
 630 }
 631
 632 ##********************************************************************
 633 ## print_issn(): gets info from MARC field 022
 634 ## Arguments: field (022)
 635 ##********************************************************************
 636 sub print_issn {
 637     my($issnfield) = @_;
 638
 639     if (!$issnfield || length ($issnfield->subfield('a')) == 0) {
 640         print "<marc>no issn found (022\$a)\r\n" if $marcprint;
 641         warn("no issn found") if $marcprint;
 642     }
 643     else {
 644         if (length ($issnfield->subfield('a')) < 9) {
 645             print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
 646             warn("truncated issn") if $marcprint;
 647         }
 648
 649         my $issn = substr($issnfield->subfield('a'), 0, 9);
 650         print "SN  - ", $issn, "\r\n";
 651     }
 652 }
 653
 654 ###
 655 # print_uri() prints info from 856 u
 656 ###
 657 sub print_uri {
 658     my @f856s = @_;
 659
 660     foreach my $f856 (@f856s) {
 661         if (my $uri = $f856->subfield('u')) {
 662                 print "UR  - ", $uri, "\r\n";
 663         }
 664     }
 665 }
 666
 667 ##********************************************************************
 668 ## print_loc_callno(): gets info from MARC field 050
 669 ## Arguments: field (050)
 670 ##********************************************************************
 671 sub print_loc_callno {
 672     my($callnofield) = @_;
 673
 674     if (!$callnofield || length ($callnofield->subfield('a')) == 0) {
 675         print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
 676         warn("no LOC call number found") if $marcprint;
 677     }
 678     else {
 679         print "AV  - ", $callnofield->subfield('a'), " ", $callnofield->subfield('b'), "\r\n";
 680     }
 681 }
 682
 683 ##********************************************************************
 684 ## print_dewey(): gets info from MARC field 082
 685 ## Arguments: field (082)
 686 ##********************************************************************
 687 sub print_dewey {
 688     my($deweyfield) = @_;
 689
 690     if (!$deweyfield || length ($deweyfield->subfield('a')) == 0) {
 691         print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
 692         warn("no Dewey number found") if $marcprint;
 693     }
 694     else {
 695         print "U1  - ", $deweyfield->subfield('a'), " ", $deweyfield->subfield('2'), "\r\n";
 696     }
 697 }
 698
 699 ##********************************************************************
 700 ## print_pubinfo(): gets info from MARC field 260
 701 ## Arguments: field (260)
 702 ##********************************************************************
 703 sub print_pubinfo {
 704     my($pubinfofield) = @_;
 705
 706     if (!$pubinfofield) {
 707     print "<marc>no publication information found (260/264)\r\n" if $marcprint;
 708         warn("no publication information found") if $marcprint;
 709     }
 710     else {
 711         ## the following information is available in MARC21:
 712         ## $a place -> CY
 713         ## $b publisher -> PB
 714         ## $c date -> PY
 715         ## the corresponding subfields for UNIMARC:
 716         ## $a place -> CY
 717         ## $c publisher -> PB
 718         ## $d date -> PY
 719
 720         ## all of them are repeatable. We pool all places into a
 721         ## comma-separated list in CY. We also pool all publishers
 722         ## into a comma-separated list in PB.  We break the rule with
 723         ## the date field because this wouldn't make much sense. In
 724         ## this case, we use the first occurrence for PY, the second
 725         ## for Y2, and ignore the rest
 726
 727         my @pubsubfields = $pubinfofield->subfields();
 728         my @cities;
 729         my @publishers;
 730         my $pycounter = 0;
 731
 732         my $pubsub_place;
 733         my $pubsub_publisher;
 734         my $pubsub_date;
 735
 736     my $marcflavour = C4::Context->preference("marcflavour");
 737     my $intype = lc($marcflavour);
 738         if ($intype eq "unimarc") {
 739             $pubsub_place = "a";
 740             $pubsub_publisher = "c";
 741             $pubsub_date = "d";
 742         }
 743         else { ## assume marc21
 744             $pubsub_place = "a";
 745             $pubsub_publisher = "b";
 746             $pubsub_date = "c";
 747         }
 748
 749         ## loop over all subfield list entries
 750         for my $tuple (@pubsubfields) {
 751             ## each tuple consists of the subfield code and the value
 752             if (@$tuple[0] eq $pubsub_place) {
 753                 ## strip any trailing crap
 754                 $_ = @$tuple[1];
 755                 s% *[,;:/]$%%;
 756                 ## pool all occurrences in a list
 757                 push (@cities, $_);
 758             }
 759             elsif (@$tuple[0] eq $pubsub_publisher) {
 760                 ## strip any trailing crap
 761                 $_ = @$tuple[1];
 762                 s% *[,;:/]$%%;
 763                 ## pool all occurrences in a list
 764                 push (@publishers, $_);
 765             }
 766             elsif (@$tuple[0] eq $pubsub_date) {
 767                 ## the dates are free-form, so we want to extract
 768                 ## a four-digit year and leave the rest as
 769                 ## "other info"
 770         my $protoyear = @$tuple[1];
 771                 print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
 772
 773                 ## strip any separator chars at the end
 774                 $protoyear =~ s% *[\.;:/]*$%%;
 775
 776                 ## isolate a four-digit year. We discard anything
 777         ## preceding the year, but keep everything after
 778                 ## the year as other info.
 779                 $protoyear =~ s%\D*([0-9\-]{4})(.*)%$1///$2%;
 780
 781                 ## check what we've got. If there is no four-digit
 782                 ## year, make it up. If digits are replaced by '-',
 783                 ## replace those with 0s
 784
 785                 if (index($protoyear, "/") == 4) {
 786                     ## have year info
 787                     ## replace all '-' in the four-digit year
 788                     ## by '0'
 789                     substr($protoyear,0,4) =~ s!-!0!g;
 790                 }
 791                 else {
 792                     ## have no year info
 793                     print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
 794                     $protoyear = "0000///$protoyear";
 795                     warn("no four-digit year found, use 0000") if $marcprint;
 796                 }
 797
 798                 if ($pycounter == 0 && length($protoyear)) {
 799                     print "PY  - $protoyear\r\n";
 800                 }
 801                 elsif ($pycounter == 1 && length($_)) {
 802                     print "Y2  - $protoyear\r\n";
 803                 }
 804                 ## else: discard
 805             }
 806             ## else: discard
 807         }
 808
 809         ## now dump the collected CY and PB lists
 810         if (@cities > 0) {
 811             print "CY  - ", join(", ", @cities), "\r\n";
 812         }
 813         if (@publishers > 0) {
 814             print "PB  - ", join(", ", @publishers), "\r\n";
 815         }
 816     }
 817 }
 818
 819 ##********************************************************************
 820 ## get_keywords(): prints info from MARC fields 6XX
 821 ## Arguments: list of fields (6XX)
 822 ##********************************************************************
 823 sub get_keywords {
 824     my($fieldname, @keywords) = @_;
 825
 826     my @kw;
 827     ## a list of all possible subfields
 828     my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
 829
 830     ## loop over all 6XX fields
 831     foreach my $kwfield (@keywords) {
 832         if ($kwfield != undef) {
 833             ## authornames get special treatment
 834             if ($fieldname eq "600") {
 835                 my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
 836                 push @kw, $val;
 837                 print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
 838             }
 839             else {
 840                 ## retrieve all available subfields
 841                 my @kwsubfields = $kwfield->subfields();
 842
 843                 ## loop over all available subfield tuples
 844                 foreach my $kwtuple (@kwsubfields) {
 845                     ## loop over all subfields to check
 846                     foreach my $subfield (@subfields) {
 847                         ## [0] contains subfield code
 848                         if (@$kwtuple[0] eq $subfield) {
 849                             ## [1] contains value, remove trailing separators
 850                             @$kwtuple[1] =~ s% *[,;.:/]*$%%;
 851                             if (length(@$kwtuple[1]) > 0) {
 852                                 push @kw, @$kwtuple[1];
 853                                 print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
 854                             }
 855                             ## we can leave the subfields loop here
 856                             last;
 857                         }
 858                     }
 859                 }
 860             }
 861         }
 862     }
 863     return @kw;
 864 }
 865
 866 ##********************************************************************
 867 ## pool_subx(): adds contents of several subfields to a list
 868 ## Arguments: reference to a list
 869 ##            field name
 870 ##            list of fields (5XX)
 871 ##********************************************************************
 872 sub pool_subx {
 873     my($aref, $fieldname, @notefields) = @_;
 874
 875     ## we use a list that contains the interesting subfields
 876     ## for each field
 877     # ToDo: this is apparently correct only for marc21
 878     my @subfields;
 879
 880     if ($fieldname eq "500") {
 881         @subfields = ('a');
 882     }
 883     elsif ($fieldname eq "501") {
 884         @subfields = ('a');
 885     }
 886     elsif ($fieldname eq "502") {
 887         @subfields = ('a');
 888             }
 889     elsif ($fieldname eq "504") {
 890         @subfields = ('a', 'b');
 891     }
 892     elsif ($fieldname eq "505") {
 893         @subfields = ('a', 'g', 'r', 't', 'u');
 894     }
 895     elsif ($fieldname eq "506") {
 896         @subfields = ('a', 'b', 'c', 'd', 'e');
 897     }
 898     elsif ($fieldname eq "507") {
 899         @subfields = ('a', 'b');
 900     }
 901     elsif ($fieldname eq "508") {
 902         @subfields = ('a');
 903     }
 904     elsif ($fieldname eq "510") {
 905         @subfields = ('a', 'b', 'c', 'x', '3');
 906     }
 907     elsif ($fieldname eq "511") {
 908         @subfields = ('a');
 909     }
 910     elsif ($fieldname eq "513") {
 911         @subfields = ('a', 'b');
 912     }
 913     elsif ($fieldname eq "514") {
 914         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
 915     }
 916     elsif ($fieldname eq "515") {
 917         @subfields = ('a');
 918     }
 919     elsif ($fieldname eq "516") {
 920         @subfields = ('a');
 921     }
 922     elsif ($fieldname eq "518") {
 923         @subfields = ('a', '3');
 924     }
 925     elsif ($fieldname eq "521") {
 926         @subfields = ('a', 'b', '3');
 927     }
 928     elsif ($fieldname eq "522") {
 929         @subfields = ('a');
 930     }
 931     elsif ($fieldname eq "524") {
 932         @subfields = ('a', '2', '3');
 933     }
 934     elsif ($fieldname eq "525") {
 935         @subfields = ('a');
 936     }
 937     elsif ($fieldname eq "526") {
 938         @subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
 939     }
 940     elsif ($fieldname eq "530") {
 941         @subfields = ('a', 'b', 'c', 'd', 'u', '3');
 942     }
 943     elsif ($fieldname eq "533") {
 944         @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
 945     }
 946     elsif ($fieldname eq "534") {
 947         @subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
 948     }
 949     elsif ($fieldname eq "535") {
 950         @subfields = ('a', 'b', 'c', 'd', 'g', '3');
 951     }
 952
 953     ## loop over all notefields
 954     foreach my $notefield (@notefields) {
 955         if (defined $notefield) {
 956             ## retrieve all available subfield tuples
 957             my @notesubfields = $notefield->subfields();
 958
 959             ## loop over all subfield tuples
 960             foreach my $notetuple (@notesubfields) {
 961                 ## loop over all subfields to check
 962                 foreach my $subfield (@subfields) {
 963                     ## [0] contains subfield code
 964                     if (@$notetuple[0] eq $subfield) {
 965                         ## [1] contains value, remove trailing separators
 966                         print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
 967                         @$notetuple[1] =~ s% *[,;.:/]*$%%;
 968                         if (length(@$notetuple[1]) > 0) {
 969                             ## add to list
 970                             push @{$aref}, @$notetuple[1];
 971                         }
 972                         last;
 973                     }
 974                 }
 975             }
 976         }
 977     }
 978 }
 979
 980 ##********************************************************************
 981 ## print_abstract(): prints abstract fields
 982 ## Arguments: list of fields (520)
 983 ##********************************************************************
 984 sub print_abstract {
 985     # ToDo: take care of repeatable subfields
 986     my(@abfields) = @_;
 987
 988     ## we check the following subfields
 989     my @subfields = ('a', 'b');
 990
 991     ## we generate a list for all useful strings
 992     my @abstrings;
 993
 994     ## loop over all abfields
 995     foreach my $abfield (@abfields) {
 996         foreach my $field (@subfields) {
 997             if ( length( $abfield->subfield($field) ) > 0 ) {
 998                 my $ab = $abfield->subfield($field);
 999
1000                 print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
1001
1002                 ## strip trailing separators
1003                 $ab =~ s% *[;,:./]*$%%;
1004
1005                 ## add string to the list
1006                 push( @abstrings, $ab );
1007             }
1008         }
1009     }
1010
1011     my $allabs = join "; ", @abstrings;
1012
1013     if (length($allabs) > 0) {
1014         print "N2  - ", $allabs, "\r\n";
1015     }
1016
1017 }
1018
1019 1;