new issue
[andk-cpan-tools.git] / bin / pick-regression.pl
blobcc0ead876f1ad4d6534a2f88644b59fd3397b936
1 #!/usr/bin/perl
3 # use 5.010;
4 use strict;
5 use warnings;
7 =head1 NAME
11 =head1 SYNOPSIS
15 =head1 OPTIONS
17 =over 8
19 =cut
21 my @opt = <<'=back' =~ /B<--(\S+)>/g;
23 =item B<--dir=s>
25 Path to the directory containing all .slv files. Originally defaulted
26 to C</home/ftp/cnntp-solver-2009/workdir/solved>. For ds8143 changed
27 to C</home/andreas/data/cnntp-solver-2009/workdir/solved>
29 =item B<--help|h!>
31 This help
33 =item B<--name=s>
35 Name of a regression variable. Originally written for 1.005000_002.
36 Later defaulted to C<eq_1.005000_004>. Now changed to eq_0.99.
38 =item B<--pick=s@>
40 Instead of a readdir in the directory given in --dir, just pick the
41 names in this option. Useful for debugging.
43 =item B<--title=s>
45 Defaults to C<mod:Test::More>. Title of the regression.
47 =item B<--verbose!>
49 Reports which distro it is working on
51 =back
53 =head1 DESCRIPTION
55 Developed for the Test-Simple-1.005000_002 debakel in 2011. Parses all
56 .slv files we have for regressions matching title and variable and
57 shows R^2, Theta, and age of the slv file.
59 =head1 HISTORY
61 =head2 First incantation for 1.005000_002:
63 /usr/local/perl-5.12.3/bin/perl -I CPAN-Blame/lib /tmp/pick-regression.pl | awk '$3<-0.75' | sort -nr -k 2 > pick-regression.out
65 Many hits with very low R^2 included. Slow. Started at 2011-12-29
66 11:37, finished at 11:51. Very stupid idea to filter in a pipe because
67 afterwards we want to see how many hits we have filtered out.
69 =head2 Second try (slightly manipulated):
71 time /usr/local/perl-5.12.3/bin/perl -I CPAN-Blame/lib bin/pick-regression.pl | sort -nr --key=2,3 > pick-regression.out-20120101 &|
73 This gives us 840 lines among which we find plenty of tests working
74 well. Where do we want to cut?
76 awk '$3<-0.75 && $2>0.35' pick-regression.out2 | wc
77 103 309 6901
79 103 hits seems like a useful sample.
81 Issue posted: https://github.com/schwern/test-more/issues/249 with the
82 title C<100 candidates having bad experiences with 1.005000_002>
84 =head2 Third try 2013-10 when Test-Simple-0.99 was buggy:
86 I had no luck finding the interesting bits. I ran
88 % awk '$3<=-0.75' pick-regression.out-Test-Simple-0.99-20131026-03 | sort -n -k 2
90 But for nearly all highscores it turned out that it was the co-variant
91 effect of having 0.99 in the newest results. Most of them were broken
92 for different reasons and since the newest tests were run with 0.99
93 they came out with high values. No idea what we can do about it short
94 of fixing *all* bugs. Or filter by gut feeling. A good thing is to
95 install previous versions again and again: this makes the older
96 version also appear among the candidates, not only the one we are
97 chasing. But it is not good enough. Open question.
99 =head2 Fourth try. Changing default for $Opt{name} to C< eq_1.302013_014 >.
101 2016-04-30: recommended: call me with C<< |& tee bin/pick-regression.out-`date +%F` >>
102 Now going to call with arguments: --name=eq_1.302014_009 and --title=mod:Test::More
104 =head2 Different name and title
106 --title="qr:Unescaped left brace in regex is illegal"
107 --name=eq_1
109 % time ~/src/installed-perls/v5.16.0/4e6d/bin/perl -I CPAN-Blame/lib bin/pick-regression.pl \
110 --name=eq_1 --title="qr:Unescaped left brace in regex is illegal" \
111 |& tee bin/pick-regression.out-`date +%FT%T`
113 This run took 3 hours.
115 =cut
118 use FindBin;
119 use lib "$FindBin::Bin/../lib";
120 BEGIN {
121 push @INC, qw( );
124 use Dumpvalue;
125 use File::Basename qw(dirname);
126 use File::Path qw(mkpath);
127 use File::Spec;
128 use File::Temp;
129 use Getopt::Long;
130 use Hash::Util qw(lock_keys);
131 use YAML::Syck;
133 use FindBin ();
134 use lib "$FindBin::Bin/../CPAN-Blame/lib";
135 use CPAN::Blame::Model::Solved;
137 our %Opt;
138 lock_keys %Opt, map { /([^=!]+)/ } @opt;
139 GetOptions(\%Opt,
140 @opt,
141 ) or pod2usage(1);
143 $Opt{dir} ||= "/home/andreas/data/cnntp-solver-2009/workdir/solved";
144 $Opt{name} ||= "eq_1.302013_014";
145 $Opt{title} ||= "mod:Test::More";
147 warn "title: $Opt{title}
148 name: $Opt{name}
151 opendir my $dh, $Opt{dir} or die "Could not opendir '$Opt{dir}': $!";
152 $|=1;
153 my @dirent;
154 if ($Opt{pick}) {
155 @dirent = @{$Opt{pick}};
156 } else {
157 @dirent = readdir $dh;
159 for my $dirent (@dirent) {
160 next unless $dirent =~ /^(.+)\.slv$/;
161 my $vdistro = $1;
162 my $abs = "$Opt{dir}/$dirent";
163 my $content = do {
164 open my $fh, "<", $abs or die "Could not open: $!";
165 local $/;
166 <$fh>
168 my $age = -M $abs;
169 my $yml_file = "$Opt{dir}/$vdistro.yml";
170 my $author = "???";
171 if (-e $yml_file) {
172 my $y = YAML::Syck::LoadFile($yml_file);
173 $author = $y->{author} if $y->{author};
175 if ($Opt{verbose}) {
176 warn "Processing vdistro '$vdistro'\n";
178 my $regs = CPAN::Blame::Model::Solved->regression_tables($content);
179 my($reg) = grep { $_->{name} eq $Opt{title} } @$regs or next;
180 my($line) = grep { @$_ and $_->[0] =~ /='\Q$Opt{name}\E'/ } @{$reg->{regression}} or next;
181 my $author_vdistro = sprintf "%s/%s", $author, $vdistro;
182 printf "%-48s %-19s %6.3f %5.2f\n", $author_vdistro, $reg->{rsq}, $line->[1], $age;
185 # Local Variables:
186 # mode: cperl
187 # cperl-indent-level: 4
188 # End: