Bug 22954: Minor markup error in OPAC messaging template
[koha.git] / misc / cronjobs / check-url-quick.pl
blob22d2dfe461ba9b6a89dc4371d0a243ab68766c25
1 #!/usr/bin/perl
3 # Copyright 2012 Tamil s.a.r.l.
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use Modern::Perl;
21 use Pod::Usage;
22 use Getopt::Long;
23 use C4::Context;
24 use C4::Biblio;
25 use AnyEvent;
26 use AnyEvent::HTTP;
27 use Encode;
29 my ( $verbose, $help, $html ) = ( 0, 0, 0 );
30 my ( $host, $host_intranet ) = ( '', '' );
31 my ( $timeout, $maxconn ) = ( 10, 200 );
32 my @tags;
33 my $uriedit = "/cgi-bin/koha/cataloguing/addbiblio.pl?biblionumber=";
34 my $user_agent = 'Mozilla/5.0 (compatible; U; Koha checkurl)';
35 GetOptions(
36 'verbose' => \$verbose,
37 'html' => \$html,
38 'h|help' => \$help,
39 'host=s' => \$host,
40 'host-intranet=s' => \$host_intranet,
41 'timeout=i' => \$timeout,
42 'maxconn=i' => \$maxconn,
43 'tags=s{,}' => \@tags,
46 # Validate tags to check
48 my %h = map { $_ => undef } @tags;
49 @tags = sort keys %h;
50 my @invalids;
51 for (@tags) {
52 push @invalids, $_ unless /^\d{3}$/;
54 if (@invalids) {
55 say "Invalid tag(s): ", join( ' ', @invalids );
56 exit;
58 push @tags, '856' unless @tags;
61 sub usage {
62 pod2usage( -verbose => 2 );
63 exit;
66 sub report {
67 my ( $hdr, $biblionumber, $url ) = @_;
68 print $html
69 ? "<tr>\n <td><a href=\""
70 . $host_intranet
71 . $uriedit
72 . $biblionumber
73 . "\">$biblionumber</a>"
74 . "</td>\n <td>$url</td>\n <td>"
75 . "$hdr->{Status} $hdr->{Reason}</td>\n</tr>\n"
76 : "$biblionumber\t$url\t" . "$hdr->{Status} $hdr->{Reason}\n";
79 # Check all URLs from all current Koha biblio records
81 sub check_all_url {
82 my $sth = C4::Context->dbh->prepare(
83 "SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
84 $sth->execute;
86 my $count = 0; # Number of requested URL
87 my $cv = AnyEvent->condvar;
88 say "<html>\n<body>\n<div id=\"checkurl\">\n<table>" if $html;
89 my $idle = AnyEvent->timer(
90 interval => .3,
91 cb => sub {
92 return if $count > $maxconn;
93 while ( my ($biblionumber) = $sth->fetchrow ) {
94 my $record = GetMarcBiblio({ biblionumber => $biblionumber });
95 for my $tag (@tags) {
96 foreach my $field ( $record->field($tag) ) {
97 my $url = $field->subfield('u');
98 next unless $url;
99 $url = "$host/$url" unless $url =~ /^http/i;
100 $url = encode_utf8($url);
101 $count++;
102 http_request(
103 HEAD => $url,
104 headers => { 'user-agent' => $user_agent },
105 timeout => $timeout,
106 sub {
107 my ( undef, $hdr ) = @_;
108 $count--;
109 report( $hdr, $biblionumber, $url )
110 if $hdr->{Status} !~ /^2/ || $verbose;
115 return if $count > $maxconn;
117 $cv->send;
120 $cv->recv;
121 $idle = undef;
123 # Few more time for pending requests
124 $cv = AnyEvent->condvar;
125 my $timer = AnyEvent->timer(
126 after => $timeout,
127 interval => $timeout,
128 cb => sub { $cv->send if $count == 0; }
130 $cv->recv;
131 say "</table>\n</div>\n</body>\n</html>" if $html;
134 usage() if $help;
136 if ( $html && !$host_intranet ) {
137 if ($host) {
138 $host_intranet = $host;
140 else {
142 "Error: host-intranet parameter or host must be provided in html mode";
143 exit;
147 check_all_url();
149 =head1 NAME
151 check-url-quick.pl - Check URLs from biblio records
153 =head1 USAGE
155 =over
157 =item check-url-quick [--verbose|--help|--html] [--tags 310 856] [--host=http://default.tld]
158 [--host-intranet]
160 Scan all URLs found by default in 856$u of bib records and display if resources
161 are available or not. HTTP requests are sent in parallel for efficiency, and
162 speed. This script replaces check-url.pl script.
164 =back
166 =head1 PARAMETERS
168 =over
170 =item B<--host=http://default.tld>
172 Server host used when URL doesn't have one, ie doesn't begin with 'http:'.
173 For example, if --host=http://www.mylib.com, then when 856$u contains
174 'img/image.jpg', the url checked is: http://www.mylib.com/image.jpg'.
176 =item B<--tags>
178 Tags containing URLs in $u subfields. If not provided, 856 tag is checked. Multiple tags can be specified, for example:
180 check-url-quick.pl --tags 310 410 856
182 =item B<--verbose|-v>
184 Outputs both successful and failed URLs.
186 =item B<--html>
188 Formats output in HTML. The result can be redirected to a file
189 accessible by http. This way, it's possible to link directly to biblio
190 record in edit mode. With this parameter B<--host-intranet> is required.
192 =item B<--host-intranet=http://koha-pro.tld>
194 Server host used to link to biblio record editing page in Koha intranet
195 interface.
197 =item B<--timeout=10>
199 Timeout for fetching URLs. By default 10 seconds.
201 =item B<--maxconn=1000>
203 Number of simulaneous HTTP requests. By default 200 connexions.
205 =item B<--help|-h>
207 Print this help page.
209 =back
211 =cut