Bug 19365: Fix search for duplicate authorities with Elasticsearch
[koha.git] / misc / batchCompareMARCvsFrameworks.pl
blob9bbcde46df2982a428c281d5e2bdee71b4e1cbb2
1 #!/usr/bin/perl
2 # small script that dumps an iso2709 file.
5 use strict;
6 #use warnings; FIXME - Bug 2505
7 BEGIN {
8 # find Koha's Perl modules
9 # test carefully before changing this
10 use FindBin;
11 eval { require "$FindBin::Bin/kohalib.pl" };
14 # Koha modules used
15 use C4::Context;
16 use MARC::File::USMARC;
17 use MARC::Record;
18 use MARC::Batch;
20 use Getopt::Long;
21 use IO::File;
23 my ( $input_marc_file,$number,$nowarning,$frameworkcode) = ('',0);
24 my $version;
25 GetOptions(
26 'file:s' => \$input_marc_file,
27 'n:s' => \$number,
28 'v' => \$version,
29 'w' => \$nowarning,
30 'c' => \$frameworkcode,
33 $frameworkcode="" unless $frameworkcode;
35 if ($version || ($input_marc_file eq '')) {
36 print <<EOF
37 This script compares an iso2709 file and Koha's MARC frameworks
38 It will show the marc fields/subfields used in Koha, and that
39 are not in the iso2709 file and which fields/subfields that are
40 used in the iso2709 file and not in Koha.
42 parameters :
43 \tv : this version/help screen
44 \tfile /path/to/file/to/dump : the file to dump
45 \tw : warning and strict off. If your dump fails, try -w option. It it works, then, the file is iso2709, but a buggy one !
46 \tc : the frameworkcode. If omitted, set to ""
48 SAMPLE : ./compare_iso_and_marc_parameters.pl -file /home/paul/koha.dev/local/npl -n 1
50 EOF
52 die;
53 }#/
55 my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer
56 my $batch = MARC::Batch->new( 'USMARC', $fh );
57 $batch->warnings_off() unless $nowarning;
58 $batch->strict_off() unless $nowarning;
59 my $dbh=C4::Context->dbh;
60 my $sth = $dbh->prepare("select tagfield,tagsubfield,tab from marc_subfield_structure where frameworkcode=?");
61 $sth->execute($frameworkcode);
63 my %hash_unused;
64 my %hash_used;
65 while (my ($tagfield,$tagsubfield,$tab) = $sth->fetchrow) {
66 $hash_unused{"$tagfield$tagsubfield"} = 1 if ($tab eq -1);
67 $hash_used{"$tagfield$tagsubfield"} = 1 if ($tab ne -1);
69 my $i=0;
70 while ( my $record = $batch->next() ) {
71 $i++;
72 foreach my $MARCfield ($record->fields()) {
73 next if $MARCfield->is_control_field(); # tag num < 10
74 if ($MARCfield) {
75 foreach my $fields ($MARCfield->subfields()) {
76 if ($fields) {
77 if ($hash_unused{$MARCfield->tag().@$fields[0]}>=1) {
78 $hash_unused{$MARCfield->tag().@$fields[0]}++;
80 if ($hash_used{$MARCfield->tag().@$fields[0]}>=1) {
81 $hash_used{$MARCfield->tag().@$fields[0]}++;
84 # foreach my $field (@$fields) {
85 # warn "==>".$MARCfield->tag().@$fields[0];
86 # }
91 print "Undeclared tag/subfields that exists in the file\n";
92 print "================================================\n";
93 foreach my $key (sort keys %hash_unused) {
94 print "$key => ".($hash_unused{$key}-1)."\n" unless ($hash_unused{$key}==1);
97 print "Declared tag/subfields unused in the iso2709 file\n";
98 print "=================================================\n";
99 foreach my $key (sort keys %hash_used) {
100 print "$key => ".($hash_used{$key}-1)."\n" if ($hash_used{$key}==1);
103 # foreach my $x (sort keys %resB) {
104 # print "$x => ".$resB{$x}."\n";
106 print "\n==================\n$i record parsed\n";