reimplement various methods in terms of get_dbxrefs, for consistency
[bioperl-live.git] / maintenance / modules.pl
blobaaee1e86f5354f8de4f876f249ba2953d6d89577
1 #!/usr/bin/perl -w
2 # $Id$
4 =head1 NAME
6 modules.pl - information about modules in BioPerl core
8 =head1 SYNOPSIS
10 B<modules.pl> [B<-V|--verbose>] [B<-c|--count>] | [B<-l|--list>] |
11 [B<-u|--untested>] | [B<-i|--info> class] | [B<-i|--inherit> |
12 [B<-d|--dir> path ] | [B<-v|--version> | [B<-?|-h|--help>]
14 =head1 DESCRIPTION
16 This script counts, lists and provides other information about bioperl
17 modules. It is mainly meant to be run by bioperl maintainers.
19 The default action is to count modules in the bioperl core
20 distribution. Based on the class name it tries to classify them into
21 categories. The following is a tentative glossary of terms used.
24 =over 4
26 =item Base
28 Synonyms: Generic class, parameterized class, generic module.
30 A class that you don't instantiate in your scripts, but that it's a
31 template for other classes.
33 Examples: Bio::Tools::Run::WrapperBase - a base object for wrappers
34 around executables. Bio::Tools::Analysis::SimpleAnalysisBase - an
35 abstract superclass for SimpleAnalysis implementations
37 This are counted with C</:Base/ | /Base$/>; They have "Base" in the
38 beginning or end of the name.
40 =item Interface
42 Synonyms: protocol, feature set.
44 Class that defines a set of features that are common to a group of
45 classes.
47 Example: Bio::Tree::NodeI - interface describing a Tree Node.
49 This are counted with C</[^A-Z]I$/>; They have "I" at the end of the
50 name.
52 =item Component
54 A class that implements a small subset of their superclass. They are in
55 a directory with an identical name of the superclass. There are plenty
56 of them. You need only a small number of methods to be overridden.
58 Example: Bio::SeqIO::fasta.
60 This is counted with C</.*:[a-z]/>; Classes are inside their base directory
61 and all in lowercase.
63 =item Instance
65 The rest of them. It is sometimes helpful to divide them into two
66 types:
68 =over 2
70 =item Algorithmic classes
72 Example: Bio::AlignIO - Handler for AlignIO formats
74 =item Storage classes
76 Example: Bio::SimpleAlign - Multiple alignments held as a set of
77 sequences
79 =back
81 =back
83 =cut
86 # The helper class to store class status;
88 package BioClass;
90 sub new {
91 my $class = shift;
92 my $name = shift;
93 die unless $name;
95 my $self = {};
96 $self->{'name'} = $name;
97 $self->{'tested'} = 0;
98 $self->{'type'} = '';
99 $self->{'path'} = '';
101 bless $self, $class;
105 sub name {
106 my $self = shift;
107 return $self->{'name'};
110 sub tested {
111 my $self = shift;
112 my $value = shift;
113 $self->{'tested'} = 1 if defined $value && $value;
114 return $self->{'tested'} || 0;
117 sub type {
118 my $self = shift;
119 my $value = shift;
120 $self->{'type'} = $value if defined $value;
121 return $self->{'type'};
124 sub path {
125 my $self = shift;
126 my $value = shift;
127 $self->{'path'} = $value if defined $value;
128 return $self->{'path'};
131 sub add_superclass {
132 my $self = shift;
133 my $superclass = shift;
134 return unless $superclass;
135 $self->{'superclasses'}->{$superclass} = 1 ;
138 sub each_superclass {
139 my $self = shift;
140 return keys %{$self->{'superclasses'}};
143 sub add_used_class {
144 my $self = shift;
145 my $used_class = shift;
146 return unless $used_class;
147 $self->{'used_classes'}->{$used_class} = 1 ;
150 sub each_used_class {
151 my $self = shift;
152 return keys %{$self->{'used_classes'}};
155 package main;
157 use File::Find;
158 use Getopt::Long;
159 use Data::Dumper;
160 use strict;
163 # declare subroutines
164 sub dir;
165 sub modules;
166 sub count;
167 sub list_all;
168 sub untested;
169 sub info;
170 sub inherit;
171 sub synopsis;
172 sub version;
174 # command line options
175 my ($dir, $count,$list, $verbose,$info,$untested, $inherit, $synopsis,
176 $version);
177 GetOptions(
178 'dir:s' => \$dir,
179 'count' => \$count,
180 'list' => \$list,
181 'test_BioClass' => \&_test_BioClass,
182 'V|verbose' => \$verbose,
183 'untested' => \$untested,
184 'info:s' => \$info,
185 'inherit' => \$inherit,
186 'synopsis' => \$synopsis,
187 'version' => \$version,
188 'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
192 our %MODULES; # storage structure
194 # find modules
195 my $pwd = $ENV{PWD};
196 my $seachdir = "$pwd/../Bio"; #default
197 my %FIND_OPTIONS = ( wanted => \&modules );
199 $seachdir = "$pwd/$dir" if $dir;
200 find \%FIND_OPTIONS, $seachdir;
203 # call subroutines
204 if ($list) { list_all }
205 elsif ($untested) { untested }
206 elsif ($info) { info($info) }
207 elsif ($inherit) { inherit }
208 elsif ($synopsis) { synopsis }
209 elsif ($version) { version }
210 else { count }
213 ################# end main ####################
217 # subroutines;
220 sub _test_BioClass {
221 $a = new BioClass('Bio::Test');
222 print "Class name: ", $a->name(), "\n";
223 $a->add_superclass('Bio::Super');
224 $a->add_superclass('Bio::Super2');
225 $a->tested(1);
226 $a->type('instance');
227 print Dumper [$a->each_superclass] if $a->tested;
228 print Dumper $a;
229 exit;
232 sub modules {
233 return unless /\.pm$/ ;
234 #return unless -e $_;
235 #print "file: $_\n" if $verbose;
236 open (F, $_) or warn "can't open file $_: $!" && return;
237 my $class;
238 while (<F>) {
239 if (/^package\s+([\w:]+)\s*;/) {
240 #print $1, "\n" if $verbose;
241 $_ = $1;
242 $class = new BioClass($_);
243 $MODULES{$_} = $class;
244 if (/.*:[a-z]/) {
245 $class->type('component');
246 } elsif (/:Base/ | /Base$/) {
247 $class->type('base');
248 } elsif (/[^A-Z]I$/) {
249 $class->type('interface');
250 } else {
251 $class->type('instance');
253 $class->path($File::Find::name);
255 if (/^\w*use/ && /(Bio[\w:]+)\W*;/ && not /base/) {
256 next unless $class;
257 #print "\t$1\n" if $verbose;
258 $class->add_used_class($1);
260 if ((/\@ISA/ || /use base/) && /Bio/) {
261 next unless $class;
262 my $line = $_;
263 while ( $line =~ /(Bio[\w:]+)/g) {
264 #print "\t$1\n" if $verbose;
265 $class->add_superclass($1);
268 if (/\@ISA/ && /Bio/) {
269 next unless $class;
270 my $line = $_;
271 while ( $line =~ /(Bio[\w:]+)/g) {
272 #print "\t$1\n" if $verbose;
273 $class->add_superclass($1);
277 close F;
280 =head1 OPTIONS
282 Only one option is processed on each run of the script. The --verbose
283 is an exception, it modifies the amount of output.
285 =over 4
287 =item B<-V | --verbose>
289 B<INACTIVE>
291 Set this option if you want to see more verbose output. Often that
292 will mean seeing warnings normally going into STDERR.
294 =cut
296 =item B<-d | --dir> path
298 Overides the default directories to check by one directory 'path' and
299 all its subdirectories.
301 =item B<-c | --count>
303 The default action if no other option is given. Gives the count of
304 modules broken to B<instance> ("usable"), B<base> ( (abstract)?
305 superclass) , B<interface> (the "I" files) and B<component> (used from
306 instantiable parent) modules, in addition to total number of modules.
308 Note that abstract superclass in bioperl is not an enforced concept and
309 they are not clearly indicateded in the class name.
311 =cut
313 sub count {
314 printf "Instance : %3d\n",
315 scalar (grep $MODULES{$_}->type =~ /instance/ , keys %MODULES);
316 printf "Base : %3d\n",
317 scalar (grep $MODULES{$_}->type =~ /base/ , keys %MODULES);
318 printf "Interface: %3d\n",
319 scalar (grep $MODULES{$_}->type =~ /interface/ , keys %MODULES);
320 printf "Component: %3d\n",
321 scalar (grep $MODULES{$_}->type =~ /component/ , keys %MODULES);
322 print "--------------\n";
323 printf "Total : %3d\n", scalar (keys %MODULES);
327 =item B<-l | --list>
329 Prints all the module names in alphabetical order. The output is a tab
330 separated list of category (see above) and module name per line. The
331 output can be processed with standard UNIX command line tools.
333 =cut
335 sub list_all {
336 foreach ( sort keys %MODULES) {
337 print $MODULES{$_}->type. "\t$_\n";
341 =item B<-u | --untested>
343 Prints a list of instance modules which are I<not> explicitly used by
344 test files in the directory. Superclasess or any classes used by others
345 are not reported, either, since their methods are assumed to be tested
346 by subclass tests.
349 =cut
351 sub _used_and_super {
352 my $name = shift;
353 # print "-:$name\n" if /Locati/;
354 foreach ($MODULES{$name}->each_superclass) {
355 next unless defined $MODULES{$_};
356 # print "-^$_\n" if /Locati/;
357 # unless (defined $MODULES{$_} or $MODULES{$_}->tested) {
358 if (not $MODULES{$_}->tested) {
359 $MODULES{$_}->tested(1);
360 _used_and_super($_);
363 foreach ($MODULES{$name}->each_used_class) {
364 next unless defined $MODULES{$_};
365 # print "--$_\n" if /Locati/;
366 # unless (defined $MODULES{$_} or $MODULES{$_}->tested) {
367 if (not $MODULES{$_}->tested) {
368 $MODULES{$_}->tested(1);
369 _used_and_super($_);
371 # $MODULES{$_}->tested(1) && _used_and_super($_)
372 # unless defined $MODULES{$_} or $MODULES{$_}->tested;
374 return 1;
377 sub untested {
378 foreach (`find ../t -name "*.t" -print | xargs grep -hs "[ur][se][eq]"`) {
379 s/.*use +//;
380 s/.*require +//;
381 next unless /^Bio/;
383 s/[\W;]+$//;
384 s/([\w:]+).*/$1/;
385 my $name = $_;
387 next unless $MODULES{$_};
388 $MODULES{$_}->tested(1)
389 unless defined $MODULES{$_} and $MODULES{$_}->tested;
391 next if $MODULES{$name}->name eq "Bio::SeqIO::abi"; # exception: requires bioperl ext package
392 next if $MODULES{$name}->name eq "Bio::SeqIO::ctf"; # exception: requires bioperl ext package
393 next if $MODULES{$name}->name eq "Bio::SeqIO::exp"; # exception: requires bioperl ext package
394 next if $MODULES{$name}->name eq "Bio::SeqIO::pln"; # exception: requires bioperl ext package
395 next if $MODULES{$name}->name eq "Bio::SeqIO::ztr"; # exception: requires bioperl ext package
396 # print $MODULES{$name}->name, "\n";
397 # print Dumper $MODULES{$name};
399 _used_and_super($name);
403 foreach ( sort keys %MODULES) {
405 # skip some name spaces
406 next if /^Bio::Search/; # Bio::Search and Bio::SearchIO are extensively tested
407 # but classes are used by attribute naming
409 print "$_\n" if
410 $MODULES{$_}->type eq 'instance' and ($MODULES{$_}->tested == 0) ;
415 =item B<-i | --info> class
417 Dumps information about a class given as an argument.
419 =cut
421 sub info {
422 my $class = shift;
423 die "" unless $class;
424 #print Dumper $MODULES{$class};
425 my $c = $MODULES{$class};
426 print $c->name, "\n";
427 printf " Type:\n\t%s\n", $c->type;
428 print " Superclasses:\n";
429 foreach (sort $c->each_superclass) {
430 print "\t$_\n";
432 print " Used classes:\n";
433 foreach (sort $c->each_used_class) {
434 print "\t$_\n";
439 =item B<-i | --inherit>
441 Finds interface modules which inherit from an instantiable class.
443 Could be extended to check other bad inheritance patterns.
445 =cut
447 sub inherit {
448 foreach ( sort keys %MODULES) {
449 my $c=$MODULES{$_};
450 next unless $c->type =~ /interface/;
451 foreach my $super ($c->each_superclass) {
452 next if $super =~ /I$/;
453 print "Check this inheritance: ", $c->name, " <-- $super\n";
458 =item B<-s | --synopsis>
460 Test SYNOPSIS section of bioperl modules for runnability
462 =cut
464 sub synopsis {
465 foreach ( sort keys %MODULES) {
466 my $c=$MODULES{$_};
468 next unless $c->type eq "instance";
469 next if $c->name eq 'Bio::Root::Version';
470 next if $c->name eq 'Bio::Tools::HMM';
472 my $synopsis = '';
473 open (F, $c->path) or warn "can't open file ".$c->name.": $!" && return;
475 my $flag = 0;
476 while (<F>) {
477 last if $flag && /^=/;
478 $synopsis .= $_ if $flag;
479 $flag = 1 if /^=head1 +SYNOPSIS/;
482 # remove comments
483 $synopsis =~ s/[^\$]#[^\n]*//g;
484 # allow linking to an other Bio module, e.g.: See L<Bio::DB::GFF>.
485 $synopsis =~ s/[^\n]*L<Bio[^\n]*//g;
486 # protect single quotes
487 $synopsis =~ s/'/"/g;
489 my $res = `perl -ce '$synopsis' 2>&1 `;
490 next if $res =~ /syntax OK/;
491 print $c->path, "\n";
492 print $synopsis;
493 print $res;
494 print "-" x 70, "\n";
495 # print "SYNOPSIS not runnable\n";
496 close F;
500 =item B<-v | --version>
502 Test the VERSION of the module against the global one set in
503 Bio::Root::Variation. Print out the different ones.
505 =cut
507 sub version {
509 use Bio::Root::Version;
510 my $version = $Bio::Root::Version::VERSION;
512 my %skip = ( # these are defined together with an other module
513 # and can not be use independently
514 'Bio::AnalysisI::JobI' => 1,
515 'Bio::PrimarySeq::Fasta' => 1,
516 'Bio::DB::Fasta::Stream' => 1,
517 'Bio::DB::GFF::ID_Iterator' => 1,
518 'Bio::DB::GFF::Adaptor::dbi::faux_dbh' =>1,
519 'Bio::LiveSeq::IO::SRS' =>1 # tries to call an external module
522 foreach ( sort keys %MODULES) {
523 my $n=$MODULES{$_}->name;
524 next if $skip{$n};
525 my $vv= "\$${n}::VERSION";
526 my $v = `perl -we 'use $n; print $vv;'`;
527 printf "%50s %-3s\n", $n, $v unless $version eq $v;
531 __END__
533 =item B<-? | -h | --help>
535 This help text.
537 =back
539 =head1 FEEDBACK
541 =head2 Mailing Lists
543 User feedback is an integral part of the evolution of this and other
544 Bioperl modules. Send your comments and suggestions preferably to
545 the Bioperl mailing list. Your participation is much appreciated.
547 bioperl-l@bioperl.org - General discussion
548 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
550 =head2 Reporting Bugs
552 Report bugs to the Bioperl bug tracking system to help us keep track
553 of the bugs and their resolution. Bug reports can be submitted via the
554 web:
556 http://bugzilla.open-bio.org/
558 =head1 AUTHOR
560 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
562 =head1 Contributors
564 Albert Vilella, avilella-AT-gmail-DOT-com
566 =cut