8 # find Koha's Perl modules
9 # test carefully before changing this
11 eval { require "$FindBin::Bin/kohalib.pl" };
20 use Time
::HiRes qw
/time/;
21 use POSIX qw
/strftime ceil/;
22 use Module
::Load
::Conditional
qw(can_load);
25 pod2usage
( -verbose
=> 2 );
31 # command-line parameters
40 my $result = GetOptions
(
41 'v|verbose' => \
$verbose,
42 't|test' => \
$test_only,
43 'l|link-report' => \
$link_report,
44 'a|auth-limit=s' => \
$auth_limit,
45 'b|bib-limit=s' => \
$bib_limit,
46 'c|commit=i' => \
$commit,
47 'h|help' => \
$want_help
50 binmode( STDOUT
, ":encoding(UTF-8)" );
52 if ( not $result or $want_help ) {
57 "C4::Linker::" . ( C4
::Context
->preference("LinkerModule") || 'Default' );
58 unless ( can_load
( modules
=> { $linker_module => undef } ) ) {
59 $linker_module = 'C4::Linker::Default';
60 unless ( can_load
( modules
=> { $linker_module => undef } ) ) {
61 die "Unable to load linker module. Aborting.";
65 my $linker = $linker_module->new(
67 'auth_limit' => $auth_limit,
68 'options' => C4
::Context
->preference("LinkerOptions")
72 my $num_bibs_processed = 0;
73 my $num_bibs_modified = 0;
75 my %unlinked_headings;
78 my $dbh = C4
::Context
->dbh;
79 $dbh->{AutoCommit
} = 0;
80 process_bibs
( $linker, $bib_limit, $auth_limit, $commit );
86 my ( $linker, $bib_limit, $auth_limit, $commit ) = @_;
88 my $starttime = time();
90 $bib_where = "WHERE $bib_limit";
93 "SELECT biblionumber FROM biblio $bib_where ORDER BY biblionumber ASC";
94 my $sth = $dbh->prepare($sql);
96 while ( my ($biblionumber) = $sth->fetchrow_array() ) {
97 $num_bibs_processed++;
98 process_bib
( $linker, $biblionumber );
100 if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
101 print_progress_and_commit
($num_bibs_processed);
105 if ( not $test_only ) {
109 my $headings_linked = 0;
110 my $headings_unlinked = 0;
111 my $headings_fuzzy = 0;
112 for ( values %linked_headings ) { $headings_linked += $_; }
113 for ( values %unlinked_headings ) { $headings_unlinked += $_; }
114 for ( values %fuzzy_headings ) { $headings_fuzzy += $_; }
116 my $endtime = time();
117 my $totaltime = ceil
(($endtime - $starttime) * 1000);
118 $starttime = strftime
('%D %T', localtime($starttime));
119 $endtime = strftime
('%D %T', localtime($endtime));
121 my $summary = <<_SUMMARY_
;
123 Bib authority heading linking report
124 =======================================================
125 Linker module
: $linker_module
126 Run started at
: $starttime
127 Run ended at
: $endtime
128 Total run
time: $totaltime ms
129 Number of bibs checked
: $num_bibs_processed
130 Number of bibs modified
: $num_bibs_modified
131 Number of bibs with errors
: $num_bad_bibs
132 Number of headings linked
: $headings_linked
133 Number of headings unlinked
: $headings_unlinked
134 Number of headings fuzzily linked
: $headings_fuzzy
136 $summary .= "\n**** Ran in test mode only ****\n" if $test_only;
141 print <<_LINKED_HEADER_
;
143 Linked headings
(from most frequent to least
):
144 -------------------------------------------------------
149 $linked_headings{$b} <=> $linked_headings{$a} or "\L$a" cmp "\L$b"
150 } keys %linked_headings;
151 foreach my $key (@keys) {
152 print "$key:\t" . $linked_headings{$key} . " occurrences\n";
155 print <<_UNLINKED_HEADER_
;
157 Unlinked headings
(from most frequent to least
):
158 -------------------------------------------------------
163 $unlinked_headings{$b} <=> $unlinked_headings{$a}
165 } keys %unlinked_headings;
166 foreach my $key (@keys) {
167 print "$key:\t" . $unlinked_headings{$key} . " occurrences\n";
170 print <<_FUZZY_HEADER_
;
172 Fuzzily
-matched headings
(from most frequent to least
):
173 -------------------------------------------------------
178 $fuzzy_headings{$b} <=> $fuzzy_headings{$a} or "\L$a" cmp "\L$b"
179 } keys %fuzzy_headings;
180 foreach my $key (@keys) {
181 print "$key:\t" . $fuzzy_headings{$key} . " occurrences\n";
189 my $biblionumber = shift;
191 my $bib = GetMarcBiblio
({ biblionumber
=> $biblionumber });
192 unless ( defined $bib ) {
194 "\nCould not retrieve bib $biblionumber from the database - record is corrupt.\n";
199 my $frameworkcode = GetFrameworkCode
($biblionumber);
201 my ( $headings_changed, $results ) =
202 LinkBibHeadingsToAuthorities
( $linker, $bib, $frameworkcode );
203 foreach my $key ( keys %{ $results->{'unlinked'} } ) {
204 $unlinked_headings{$key} += $results->{'unlinked'}->{$key};
206 foreach my $key ( keys %{ $results->{'linked'} } ) {
207 $linked_headings{$key} += $results->{'linked'}->{$key};
209 foreach my $key ( keys %{ $results->{'fuzzy'} } ) {
210 $fuzzy_headings{$key} += $results->{'fuzzy'}->{$key};
213 if ($headings_changed) {
215 my $title = substr( $bib->title, 0, 20 );
217 "Bib %12d (%-20s): %3d headings changed\n",
223 if ( not $test_only ) {
224 ModBiblio
( $bib, $biblionumber, $frameworkcode, 1 );
225 #Last param is to note ModBiblio was called from linking script and bib should not be linked again
226 $num_bibs_modified++;
231 sub print_progress_and_commit
{
234 print "... processed $recs records\n";
239 link_bibs_to_authorities.pl
243 link_bibs_to_authorities.pl
244 link_bibs_to_authorities.pl -v
245 link_bibs_to_authorities.pl -l
246 link_bibs_to_authorities.pl --commit=1000
247 link_bibs_to_authorities.pl --auth-limit=STRING
248 link_bibs_to_authorities.pl --bib-limit=STRING
252 This batch job checks each bib record in the Koha database and attempts to link
253 each of its headings to the matching authority record.
261 =item B<-v|--verbose>
263 Provide verbose log information (print the number of headings changed for each
266 =item B<-l|--link-report>
268 Provide a report of all the headings that were processed: which were matched,
271 =item B<--auth-limit=S>
273 Only process those headings which match an authority record that matches the
274 user-specified WHERE clause.
276 =item B<--bib-limit=S>
278 Only process those bib records that match the user-specified WHERE clause.
282 Commit the results to the database after every N records are processed.
286 Only test the authority linking and report the results; do not change the bib