Bug 6557: Record bib popularity in totalissues
[koha.git] / misc / cronjobs / update_totalissues.pl
blob71da3aa8693567425ddbf5940d409975ef694db5
1 #!/usr/bin/perl
3 # Copyright 2012 C & P Bibliography Services
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 use strict;
21 use warnings;
23 BEGIN {
25 # find Koha's Perl modules
26 # test carefully before changing this
27 use FindBin;
28 eval { require "$FindBin::Bin/../kohalib.pl" };
31 use Getopt::Long;
32 use Pod::Usage;
33 use C4::Context;
34 use C4::Biblio;
35 use DateTime;
36 use DateTime::Format::MySQL;
37 use Time::HiRes qw/time/;
38 use POSIX qw/strftime ceil/;
40 sub usage {
41 pod2usage( -verbose => 2 );
42 exit;
45 $| = 1;
47 # command-line parameters
48 my $verbose = 0;
49 my $test_only = 0;
50 my $want_help = 0;
51 my $since;
52 my $interval;
53 my $usestats = 0;
54 my $useitems = 0;
55 my $incremental = 0;
56 my $commit = 100;
57 my $unit;
59 my $result = GetOptions(
60 'v|verbose' => \$verbose,
61 't|test' => \$test_only,
62 's|since=s' => \$since,
63 'i|interval=s' => \$interval,
64 'use-stats' => \$usestats,
65 'use-items' => \$useitems,
66 'incremental' => \$incremental,
67 'c|commit=i' => \$commit,
68 'h|help' => \$want_help
71 binmode( STDOUT, ":utf8" );
73 if ( defined $since && defined $interval ) {
74 print "The --since and --interval options are mutually exclusive.\n\n";
75 $want_help = 1;
78 if ( $useitems && $incremental ) {
79 print
80 "The --use-items and --incremental options are mutually exclusive.\n\n";
81 $want_help = 1;
84 if ( $incremental && !( defined $since || defined $interval ) ) {
85 $interval = '24h';
88 unless ( $usestats || $useitems ) {
89 print "You must specify either --use-stats and/or --use-items.\n\n";
90 $want_help = 1;
93 if ( not $result or $want_help ) {
94 usage();
97 my $dbh = C4::Context->dbh;
98 $dbh->{AutoCommit} = 0;
100 my $num_bibs_processed = 0;
102 my $starttime = time();
104 process_items() if $useitems;
105 process_stats() if $usestats;
107 report();
109 exit 0;
111 sub process_items {
112 my $query =
113 "SELECT items.biblionumber, SUM(items.issues) FROM items GROUP BY items.biblionumber;";
114 process_query($query);
117 sub process_stats {
118 if ($interval) {
119 my $dt = DateTime->now;
121 my %units = (
122 h => 'hours',
123 d => 'days',
124 w => 'weeks',
125 m => 'months',
126 y => 'years'
129 $interval =~ m/([0-9]*)([hdwmy]?)$/;
130 $unit = $2 || 'd';
131 $since = DateTime::Format::MySQL->format_datetime(
132 $dt->subtract( $units{$unit} => $1 ) );
134 my $limit = '';
135 $limit = " AND statistics.datetime >= ?" if ( $interval || $since );
137 my $query =
138 "SELECT biblio.biblionumber, COUNT(statistics.itemnumber) FROM biblio LEFT JOIN items ON (biblio.biblionumber=items.biblionumber) LEFT JOIN statistics ON (items.itemnumber=statistics.itemnumber) WHERE statistics.type = 'issue' $limit GROUP BY biblio.biblionumber;";
139 process_query( $query, $limit );
141 unless ($incremental) {
142 $query =
143 "SELECT biblio.biblionumber, 0 FROM biblio LEFT JOIN items ON (biblio.biblionumber=items.biblionumber) LEFT JOIN statistics ON (items.itemnumber=statistics.itemnumber) WHERE statistics.itemnumber IS NULL GROUP BY biblio.biblionumber;";
144 process_query( $query, '' );
146 $query =
147 "SELECT biblio.biblionumber, 0 FROM biblio LEFT JOIN items ON (biblio.biblionumber=items.biblionumber) WHERE items.itemnumber IS NULL GROUP BY biblio.biblionumber;";
148 process_query( $query, '' );
151 $dbh->commit();
154 sub process_query {
155 my $query = shift;
156 my $uselimit = shift;
157 my $sth = $dbh->prepare($query);
159 if ( $since && $uselimit ) {
160 $sth->execute($since);
162 else {
163 $sth->execute();
166 while ( my ( $biblionumber, $totalissues ) = $sth->fetchrow_array() ) {
167 $num_bibs_processed++;
168 $totalissues = 0 unless $totalissues;
169 print "Processing bib $biblionumber ($totalissues issues)\n"
170 if $verbose;
171 if ( not $test_only ) {
172 if ( $incremental && $totalissues > 0 ) {
173 UpdateTotalIssues( $biblionumber, $totalissues );
175 else {
176 UpdateTotalIssues( $biblionumber, 0, $totalissues );
179 if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
180 print_progress_and_commit($num_bibs_processed);
184 $dbh->commit();
187 sub report {
188 my $endtime = time();
189 my $totaltime = ceil( ( $endtime - $starttime ) * 1000 );
190 $starttime = strftime( '%D %T', localtime($starttime) );
191 $endtime = strftime( '%D %T', localtime($endtime) );
193 my $summary = <<_SUMMARY_;
195 Update total issues count script report
196 =======================================================
197 Run started at: $starttime
198 Run ended at: $endtime
199 Total run time: $totaltime ms
200 Number of bibs modified: $num_bibs_processed
201 _SUMMARY_
202 $summary .= "\n**** Ran in test mode only ****\n" if $test_only;
203 print $summary;
206 sub print_progress_and_commit {
207 my $recs = shift;
208 $dbh->commit();
209 print "... processed $recs records\n";
212 =head1 NAME
214 update_totalissues.pl
216 =head1 SYNOPSIS
218 update_totalissues.pl --use-stats
219 update_totalissues.pl --use-items
220 update_totalissues.pl --commit=1000
221 update_totalissues.pl --since='2012-01-01'
222 update_totalissues.pl --interval=30d
224 =head1 DESCRIPTION
226 This batch job populates bibliographic records' total issues count based
227 on historical issue statistics.
229 =over 8
231 =item B<--help>
233 Prints this help
235 =item B<-v|--verbose>
237 Provide verbose log information (list every bib modified).
239 =item B<--use-stats>
241 Use the data in the statistics table for populating total issues.
243 =item B<--use-items>
245 Use items.issues data for populating total issues. Note that issues
246 data from the items table does not respect the --since or --interval
247 options, by definition. Also note that if both --use-stats and
248 --use-items are specified, the count of biblios processed will be
249 misleading.
251 =item B<-s|--since=DATE>
253 Only process issues recorded in the statistics table since DATE.
255 =item B<-i|--interval=S>
257 Only process issues recorded in the statistics table in the last N
258 units of time. The interval should consist of a number with a one-letter
259 unit suffix. The valid suffixes are h (hours), d (days), w (weeks),
260 m (months), and y (years). The default unit is days.
262 =item B<--incremental>
264 Add the number of issues found in the statistics table to the existing
265 total issues count. Intended so that this script can be used as a cron
266 job to update popularity information during low-usage periods. If neither
267 --since or --interval are specified, incremental mode will default to
268 processing the last twenty-four hours.
270 =item B<--commit=N>
272 Commit the results to the database after every N records are processed.
274 =item B<--test>
276 Only test the popularity population script.
278 =back
280 =head1 WARNING
282 If the time on your database server does not match the time on your Koha
283 server you will need to take that into account, and probably use the
284 --since argument instead of the --interval argument for incremental
285 updating.
287 =head1 CREDITS
289 This patch to Koha was sponsored by the Arcadia Public Library and the
290 Arcadia Public Library Foundation in honor of Jackie Faust-Moreno, late
291 director of the Arcadia Public Library.
293 =head1 AUTHOR
295 Jared Camins-Esakov <jcamins AT cpbibliography DOT com>
297 =cut