4 # Copyright 2009 Tamil s.a.r.l.
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
23 package C4
::URL
::Checker
;
27 C4::URL::Checker - base object for checking URL stored in Koha DB
33 my $checker = C4::URL::Checker->new( );
34 $checker->{ host_default } = 'http://mylib.kohalibrary.com';
35 my $checked_urls = $checker->check_biblio( 123 );
36 foreach my $url ( @$checked_urls ) {
37 print "url: ", $url->{ url }, "\n",
38 "is_success: ", $url->{ is_success }, "\n",
39 "status: ", $url->{ status }, "\n";
46 Create a URL Checker. The returned object can be used to set
47 default host variable :
49 my $checker = C4::URL::Checker->new( );
50 $checker->{ host_default } = 'http://mylib.kohalibrary.com';
54 Check all URL from a biblio record. Returns a pointer to an array
55 containing all URLs with checking for each of them.
57 my $checked_urls = $checker->check_biblio( 123 );
59 With 2 URLs, the returned array will look like that:
63 'url' => 'http://mylib.tamil.fr/img/62265_0055B.JPG',
68 'url' => 'http://mylib.tamil.fr//img/62265_0055C.JPG',
70 'status' => '404 - Page not found'
88 my ($class, $timeout, $agent) = @_;
90 my $uagent = new LWP
::UserAgent
;
91 $uagent->agent( $agent ) if $agent;
92 $uagent->timeout( $timeout) if $timeout;
93 $self->{ user_agent
} = $uagent;
94 $self->{ bad_url
} = { };
103 my $biblionumber = shift;
104 my $uagent = $self->{ user_agent
};
105 my $host = $self->{ host_default
};
106 my $bad_url = $self->{ bad_url
};
108 my $record = GetMarcBiblio
({ biblionumber
=> $biblionumber });
109 return unless $record->field('856');
112 foreach my $field ( $record->field('856') ) {
113 my $url = $field->subfield('u');
115 $url = "$host/$url" unless $url =~ /^http
/;
116 my $check = { url
=> $url };
117 if ( $bad_url->{ $url } ) {
118 $check->{ is_success
} = 1;
119 $check->{ status
} = '500 Site already checked';
122 my $req = HTTP
::Request
->new( GET
=> $url );
123 my $res = $uagent->request( $req, sub { die }, 1 );
124 if ( $res->is_success ) {
125 $check->{ is_success
} = 1;
126 $check->{ status
} = 'ok';
129 $check->{ is_success
} = 0;
130 $check->{ status
} = $res->status_line;
131 $bad_url->{ $url } = 1;
159 my $uriedit = "/cgi-bin/koha/cataloguing/addbiblio.pl?biblionumber=";
163 'verbose' => \
$verbose,
167 'host-pro=s' => \
$host_pro,
168 'agent=s' => \
$agent,
169 'timeout=i', => \
$timeout,
174 pod2usage
( -verbose
=> 2 );
180 my $biblionumber = shift;
181 my $html = "<a href=\"$host_pro$uriedit$biblionumber\">$biblionumber</a>";
187 # Check all URLs from all current Koha biblio records
190 my $checker = C4
::URL
::Checker
->new($timeout,$agent);
191 $checker->{ host_default
} = $host;
193 my $context = new C4
::Context
( );
194 my $dbh = $context->dbh;
195 my $sth = $dbh->prepare(
196 "SELECT biblionumber FROM biblioitems WHERE url <> ''" );
205 while ( my ($biblionumber) = $sth->fetchrow ) {
206 my $result = $checker->check_biblio( $biblionumber );
207 next unless $result; # No URL
208 foreach my $url ( @
$result ) {
209 if ( ! $url->{ is_success
} || $verbose ) {
211 ?
"<tr>\n<td>" . bibediturl
( $biblionumber ) .
212 "</td>\n<td>" . $url->{url
} . "</td>\n<td>" .
213 $url->{status
} . "</td>\n</tr>\n\n"
214 : "$biblionumber\t" . $url->{ url
} . "\t" .
215 $url->{ status
} . "\n";
219 print "</table>\n</body>\n</html>\n" if $html;
227 if ( $html && !$host_pro ) {
232 print "Error: host-pro parameter or host must be provided in html mode\n";
243 check-url.pl - Check URLs from 856$u field.
249 =item check-url.pl [--verbose|--help] [--agent=agent-string] [--host=http://default.tld]
251 Scan all URLs found in 856$u of bib records
252 and display if resources are available or not.
253 This script is deprecated. You should rather use check-url-quick.pl.
261 =item B<--host=http://default.tld>
263 Server host used when URL doesn't have one, ie doesn't begin with 'http:'.
264 For example, if --host=http://www.mylib.com, then when 856$u contains
265 'img/image.jpg', the url checked is: http://www.mylib.com/image.jpg'.
267 =item B<--verbose|-v>
269 Outputs both successful and failed URLs.
273 Formats output in HTML. The result can be redirected to a file
274 accessible by http. This way, it's possible to link directly to biblio
275 record in edit mode. With this parameter B<--host-pro> is required.
277 =item B<--host-pro=http://koha-pro.tld>
279 Server host used to link to biblio record editing page.
281 =item B<--agent=agent-string>
283 Change default libwww user-agent string to custom. Some sites do
284 not like libwww user-agent and return false 40x failure codes,
285 so this allows Koha to report itself as Koha, or a browser.
287 =item B<--timeout=15>
289 Timeout for fetching URLs. By default 15 seconds.
293 Print this help page.