5 check_URLs.pl - validate URLs located in module code and POD
9 B<check_URLs.pl> [B<-d|--dir> path] [B<-v|--verbose>] [B<-?|-h|--help>]
10 [B<-o|--outfile> filename]
14 Checks code and POD of all bioperl-live modules for URLs, and validates them.
16 Output is a series of lines containing two fields, tab separated.
17 The first field is the file with the bad URL, the second is the URL itself.
19 The whole URL is not retrieved, only a HTTP "HEAD" request is done
20 to see if the URL exists on the server. The request is done using
21 B<LWP::Simple> so the B<http_proxy> environmental variable will be
24 The URL parsing may not be perfect - although I use the B<Regexp::Common::URI>
25 module, I have to manually clean up some URLs which are embedded in Perl
26 strings to convert the matched URL to a more probable real world URL,
27 e.g. most URLs don\'t end in "'," or ")" :-)
35 use Regexp
::Common
qw(URI);
36 use LWP
::Simple
qw($ua head);
41 # command line options
44 my ($verbose, $dir, $help) = (0, '../Bio/', undef);
47 'v|verbose' => \$verbose,
49 'o|outfile:s' => \$file,
50 'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
56 open($fh, '>', $file) || die "Can't open file : $!";
65 find( \&find_modules, $dir );
68 # validate unique URLs and print fail cases to stdout
74 my ($url, $file) = @_;
75 if (exists $cached_urls{$url}) {
76 print STDERR "$url checked in ".$cached_urls{$url}[0].":".$cached_urls{$url}[1]."\n" if $verbose;
77 print $fh "$file\t$url\n" if $cached_urls{$url}[1] ne 'ok';
80 print STDERR "Checking $url in $file... " if $verbose;
82 my $status = $ok ? 'ok' : 'FAIL!';
83 print STDERR "$status!\n" if $verbose;
84 print $fh "$file\t$url\n" if !$ok;
85 $cached_urls{$url} = [$file, $status];
88 close $fh if $file; # don't close STDOUT
91 # this is where the action is
95 # only want files with .pm
96 return unless m/\.pm$/;
102 my $text = do { local( @ARGV, $/ ) = $fname ; <> } ;
105 while ($text =~ m/$RE{URI}{HTTP}{-keep}/g) {
106 my $url = $1 or next;
107 # remove Perl code if URL was embedded in string and other stuff
108 $url =~ s/\s*[.,;'")]*\s*$//;
109 check_url($url, $File::Find::name);
118 =item B<-d | --dir> path
120 Overides the default directory to recursively look for .pm file
121 (Default is '../Bio')
123 =item B<-v | --verbose>
125 Show the progress through files during the POD checking.
127 =item B<-? | -h | --help>
137 User feedback is an integral part of the evolution of this and other
138 Bioperl modules. Send your comments and suggestions preferably to
139 the Bioperl mailing list. Your participation is much appreciated.
141 bioperl-l@bioperl.org - General discussion
142 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
144 =head2 Reporting Bugs
146 Report bugs to the Bioperl bug tracking system to help us keep track
147 of the bugs and their resolution. Bug reports can be submitted via the
150 https://redmine.open-bio.org/projects/bioperl/
152 =head1 AUTHOR - Torsten Seemann
154 Email: torsten-dot-seemann-at-infotech-dot-monash-dot-edu-dot-au