trust your DB implementation, particularly if ancestor data are already available
[bioperl-live.git] / maintenance / module_usage.pl
blobe1d0925bf62c8712f5b47b52ef94586518530134
1 #!/usr/bin/perl
2 #
3 # Counts up all the used and inherited modules in a directory of modules to
4 # help indicate which the most important modules are, graphs it also
6 # Written by Sendu Bala, using much code directly from
7 # http://www.perlmonks.org/?displaytype=displaycode;node_id=87329
8 # and also
9 # http://search.cpan.org/src/NEILB/pmusage-1.2/pmusage
11 use strict;
12 use warnings;
14 use IO::File;
15 use File::Find;
16 use Getopt::Std;
17 use GraphViz;
19 sub usage
21 print <<EOF;
22 $0 -- Shows which modules are used the most in a directory of modules
24 usage: $0 [-f outfile] [-l listfile] [-h] [-v] [dir]
25 -f outfile specify output file (default=module_usage)
26 -h get this help message
27 -i fmt set image format to fmt (default=jpeg)
28 also available: canon,text,ps,hpgl,pcl,mif,pic,gd,gd2,gif,jpeg,
29 png,wbmp,vrml,vtx,mp,fig,svg,plain *nb* only jpeg is tested as
30 working correctly
31 -l listfile get filenames/options from listfile
32 -v list filenames to STDERR
34 If directory names are given, all the *.p[lm] files in the directory will
35 be processed. The default is to do all the Perl files in the current directory.
38 The graphical output is a *simplification with information loss*, since BioPerl
39 is too complex to graph 'raw'. The simplification proceeds as follows:
40 # It is determined which modules each BioPerl package (aka class, module)
41 'uses' (what modules does it load via 'use', 'require' or inherit from
42 via 'use base', excluding external (non-BioPerl) modules).
43 # Packages with identical usage (ignoring the type of usage) are grouped
44 together.
45 # The graph shows all the groups with more than one member as nodes, with edges
46 from them pointing to the individual packages that they use.
47 # The set of those individual packages pointed to by groups also have edges
48 showing their use-relationship to other members of the set (only).
49 # Members of that set are also shaded in red. The saturation of the shade
50 indicates how many packages use that package (so dark red packages are used a
51 lot).
52 # Edges are coloured green to show inheritance-type usage, or blue to show
53 'use'/'require'-type usage. It is possible that some members of a group
54 inherit from a particular package, whilst other members only 'use' it. In that
55 case, the colour is based on which is most common for the group.
56 # Groups with members all from the same subdirectory, and individual packages
57 pointed to by groups, are 'clustered' if they are from the same subdirectory
58 EOF
59 exit shift;
62 # process cmdline options
63 my $opts = 'f:l:hvi:';
64 my %opts;
65 getopts($opts, \%opts) || usage(1);
66 usage(0) if defined($opts{h});
67 while (defined($opts{l})) {
68 my $lFile = IO::File->new($opts{l}) or die "can't open -l file $opts{l} : $!\n";
69 my @largs = <$lFile>;
70 chomp(@largs);
71 splice(@ARGV, 0, 0, @largs);
72 delete($opts{l});
73 getopts($opts, \%opts) || usage(1);
74 $lFile->close();
77 my $outfile = defined($opts{f}) ? $opts{f} : "module_usage";
78 my $format = defined($opts{i}) ? $opts{i} : 'jpeg';
80 # now filenames are in @ARGV
81 push(@ARGV, '.') if !@ARGV;
83 my @files;
84 my %sections;
86 sub findPerlFiles {
87 -f $_ && /^.*\.p[ml]\z/si && push(@files, $File::Find::name);
90 # process directories
91 foreach my $top (@ARGV) {
92 File::Find::find({wanted => \&findPerlFiles}, $top);
95 my %usage;
96 my %users;
97 my %inheritance;
98 my %packages;
100 sub store_package_usage {
101 my ($package, $used) = @_;
102 my %used = %{$used};
104 STDERR->print("package $package used (".join(' ', keys %used).")\n") if $opts{v};
106 $packages{$package} = \%used;
108 foreach my $module (keys %used) {
109 $usage{$module}++;
110 push (@{$users{$module}}, $package);
114 foreach my $file (@files) {
115 $file =~ s#^./##;
116 STDERR->print("processing $file\n") if $opts{v};
117 my $f = IO::File->new($file) or warn "can't open $file: $!\n", next;
119 my ($package, %used);
121 my $pod = 0;
122 while (<$f>) {
123 if (/^=cut/) {
124 $pod=0;
125 next;
127 if (/^=[a-zA-Z]+/) {
128 $pod=1;
129 next;
131 next if $pod;
133 if (/^\s*package\s+([[:word:]:]+)\s*;/) {
134 if ($package) {
135 store_package_usage($package, \%used);
136 %used = ();
139 $package = $1;
140 next;
142 if (/use base\s*(.*)/) {
143 my $tmp = $1;
144 while (!/;/) # accumulate ISA value for multiple lines
146 $_ = <$f>;
147 $tmp .= $_;
149 my @use_base = eval $tmp;
150 if ($@) { warn "Unparseable 'use base' line for $package: $tmp"; next }
152 foreach my $module (@use_base) {
153 $used{$module} = 1;
154 $inheritance{$package}->{$module} = 1;
157 elsif (/^\s*use\s+([^\s;()]+)/ || /^\s*require\s+([^\s;()'"]+)/) {
158 $used{$1} = 1;
161 $f->close();
163 if ($package) {
164 store_package_usage($package, \%used);
168 # simplify so we can view a graph of usage: we group all packages that have
169 # identical usage. NB: this doesn't look at external modules at all
170 my %groups;
171 while (my ($package, $used_hash) = each %packages) {
172 my @used_packages;
173 foreach my $used_module (sort keys %{$used_hash}) {
174 next unless defined $packages{$used_module};
175 push(@used_packages, $used_module);
177 @used_packages || next;
179 push(@{$groups{join('|', @used_packages)}}, $package);
182 # we're going to shade boxes based on usage later, figure out an appropriate
183 # shade range by ranking
184 my %counts;
185 while (my ($group, $pack_list) = each %groups) {
186 my @children = @{$pack_list};
188 @children > 1 || next;
190 my @parents = split(/\|/, $group);
191 foreach my $parent (@parents) {
192 my $count = $usage{$parent};
193 $counts{$parent} = $count;
196 my %ranks;
197 my $rank = 0;
198 my $prev_count;
199 foreach my $parent (sort { $counts{$a} <=> $counts{$b} } keys %counts) {
200 my $this_count = $counts{$parent};
201 $ranks{$parent} = $prev_count && $prev_count != $this_count ? ++$rank : $rank;
202 $prev_count = $this_count;
205 sub class_to_subdir {
206 my $class = shift;
207 $class =~ s/::[^:]+$//;
208 return $class;
211 my $g = GraphViz->new(concentrate => 1,
212 node => {shape => 'box'},
213 $format eq 'ps' ? (pagewidth => 46.81, pageheight => 33.11) : ()); # A0 for ps output
214 my $inherited_edge_colour = 'green';
215 my $used_edge_colour = 'blue';
216 my $cluster_colour = 'black'; #*** darkgray, 0,0,0.31 don't work, why?!
217 my $child_id = 0;
218 my $group_definitions = '';
219 my %parents;
220 while (my ($group, $pack_list) = each %groups) {
221 my @children = @{$pack_list};
223 # ignore single child groups (required or graph gets too wide to jpeg)
224 @children > 1 || next;
226 # we'll cluster if all children belong to the same subdirectory
227 my %subdirs;
228 foreach my $child (@children) {
229 $subdirs{class_to_subdir($child)} = 1;
231 my $subdir;
232 if (keys %subdirs == 1) {
233 ($subdir) = keys %subdirs;
234 undef $subdir if $subdir eq 'Bio';
237 my $this_child = 'group'.++$child_id;
238 $g->add_node($this_child,
239 style => 'dashed',
240 label => "$this_child:\n".join("\n", @children),
241 $subdir ? (cluster => {name => $subdir, style => 'dotted', color => $cluster_colour}) : ());
243 my @parents = split(/\|/, $group);
245 $group_definitions .= " $this_child consists of ".scalar(@children)." packages: ".join(', ', @children)."\n $this_child members use ".scalar(@parents)." other packages: ".join(', ', @parents)."\n\n";
247 foreach my $parent (@parents) {
248 # we'll shade the parent box based on how many packages use it
249 my $this_rank = $ranks{$parent};
250 my $shade = (1 / $rank) * $this_rank;
252 # we'll colour the edge based on if we inherited this parent or just
253 # used it, going by the most common for the group
254 my ($inherited, $used) = (0, 0);
255 foreach my $child (@children) {
256 if (defined $inheritance{$child}->{$parent}) {
257 $inherited++;
259 else {
260 $used++;
263 my $edge_colour = $inherited > $used ? $inherited_edge_colour : $used_edge_colour;
265 # we'll cluster if this isn't a base Bio::x class
266 my $subdir = class_to_subdir($parent);
267 undef $subdir if $subdir eq 'Bio';
269 $g->add_node($parent,
270 style => 'filled',
271 fillcolor => "0,$shade,1",
272 $subdir ? (cluster => {name => $subdir, style => 'dotted', color => $cluster_colour}) : ());
273 $parents{$parent} = 1;
274 $g->add_edge($this_child => $parent, color => $edge_colour);
278 # show links between parents
279 foreach my $parent (keys %parents) {
280 my %used = %{$packages{$parent}};
282 foreach my $used (keys %used) {
283 next unless defined $parents{$used};
284 $g->add_edge($parent => $used, color => defined $inheritance{$parent}->{$used} ? $inherited_edge_colour : $used_edge_colour);
288 # write out graph
289 my $output = IO::File->new($outfile.".$format", 'w') or die "can't open $outfile.$format: $!\n";
290 $output->print(eval "\$g->as_$format()");
291 $output->close();
294 my $package_count = keys %packages;
295 my $total_used = keys %usage;
297 my $results_str = "Packages investigated: $package_count\nTotal modules used: $total_used\n\n";
300 # descriptive text output
301 # list by popularity
302 my @internal;
303 my @external;
304 foreach my $module (sort { $usage{$b} <=> $usage{$a} || $a cmp $b } keys %usage) {
305 my $count = $usage{$module};
307 if (defined $packages{$module}) {
308 push(@internal, " $module => used $count times");
310 else {
311 my $by = '';
312 if ($count <= 5) {
313 $by = " by ".join(", ", @{$users{$module}});
315 push(@external, " $module => used $count times$by");
319 $results_str .= "External module usage:\n".join("\n", @external);
320 $results_str .= "\n\nPackage usage:\n".join("\n", @internal);
322 # list the packages that aren't used by any other package
323 $results_str .= "\n\nPackages not used by any other:\n";
324 foreach my $package (sort keys %packages) {
325 next if $usage{$package};
326 $results_str .= " $package\n";
329 # define the groups referenced in the graph
330 $results_str .= "\nGroup definitions:\n$group_definitions";
332 # write out descriptive text file
333 $output = IO::File->new($outfile.'.txt', 'w') or die "can't open $outfile.txt: $!\n";
334 $output->print($results_str);
335 $output->close();
337 exit;