bin/pick-regression.pl

   1 #!/usr/bin/perl
   2
   3 # use 5.010;
   4 use strict;
   5 use warnings;
   6
   7 =head1 NAME
   8
   9
  10
  11 =head1 SYNOPSIS
  12
  13
  14
  15 =head1 OPTIONS
  16
  17 =over 8
  18
  19 =cut
  20
  21 my @opt = <<'=back' =~ /B<--(\S+)>/g;
  22
  23 =item B<--dir=s>
  24
  25 Path to the directory containing all .slv files. Originally defaulted
  26 to C</home/ftp/cnntp-solver-2009/workdir/solved>. For ds8143 changed
  27 to C</home/andreas/data/cnntp-solver-2009/workdir/solved>
  28
  29 =item B<--help|h!>
  30
  31 This help
  32
  33 =item B<--name=s>
  34
  35 Name of a regression variable. Originally written for 1.005000_002.
  36 Later defaulted to C<eq_1.005000_004>. Now changed to eq_0.99.
  37
  38 =item B<--pick=s@>
  39
  40 Instead of a readdir in the directory given in --dir, just pick the
  41 names in this option. Useful for debugging.
  42
  43 =item B<--title=s>
  44
  45 Defaults to C<mod:Test::More>. Title of the regression.
  46
  47 =item B<--verbose!>
  48
  49 Reports which distro it is working on
  50
  51 =back
  52
  53 =head1 DESCRIPTION
  54
  55 Developed for the Test-Simple-1.005000_002 debakel in 2011. Parses all
  56 .slv files we have for regressions matching title and variable and
  57 shows R^2, Theta, and age of the slv file.
  58
  59 =head1 HISTORY
  60
  61 =head2 First incantation for 1.005000_002:
  62
  63   /usr/local/perl-5.12.3/bin/perl -I CPAN-Blame/lib /tmp/pick-regression.pl | awk '$3<-0.75' | sort -nr -k 2 > pick-regression.out
  64
  65 Many hits with very low R^2 included. Slow. Started at 2011-12-29
  66 11:37, finished at 11:51. Very stupid idea to filter in a pipe because
  67 afterwards we want to see how many hits we have filtered out.
  68
  69 =head2 Second try (slightly manipulated):
  70
  71   time /usr/local/perl-5.12.3/bin/perl -I CPAN-Blame/lib bin/pick-regression.pl | sort -nr --key=2,3 > pick-regression.out-20120101 &|
  72
  73 This gives us 840 lines among which we find plenty of tests working
  74 well. Where do we want to cut?
  75
  76   awk '$3<-0.75 && $2>0.35' pick-regression.out2 | wc
  77     103     309    6901
  78
  79 103 hits seems like a useful sample.
  80
  81 Issue posted: https://github.com/schwern/test-more/issues/249 with the
  82 title C<100 candidates having bad experiences with 1.005000_002>
  83
  84 =head2 Third try 2013-10 when Test-Simple-0.99 was buggy:
  85
  86 I had no luck finding the interesting bits. I ran
  87
  88   % awk '$3<=-0.75' pick-regression.out-Test-Simple-0.99-20131026-03 | sort -n -k 2
  89
  90 But for nearly all highscores it turned out that it was the co-variant
  91 effect of having 0.99 in the newest results. Most of them were broken
  92 for different reasons and since the newest tests were run with 0.99
  93 they came out with high values. No idea what we can do about it short
  94 of fixing *all* bugs. Or filter by gut feeling. A good thing is to
  95 install previous versions again and again: this makes the older
  96 version also appear among the candidates, not only the one we are
  97 chasing. But it is not good enough. Open question.
  98
  99 =head2 Fourth try. Changing default for $Opt{name} to C< eq_1.302013_014 >.
 100
 101 2016-04-30: recommended: call me with C<< |& tee bin/pick-regression.out-`date +%F` >>
 102 Now going to call with arguments: --name=eq_1.302014_009 and --title=mod:Test::More
 103
 104 =head2 Different name and title
 105
 106   --title="qr:Unescaped left brace in regex is illegal"
 107   --name=eq_1
 108
 109   % time ~/src/installed-perls/v5.16.0/4e6d/bin/perl -I CPAN-Blame/lib  bin/pick-regression.pl \
 110     --name=eq_1 --title="qr:Unescaped left brace in regex is illegal" \
 111     |& tee bin/pick-regression.out-`date +%FT%T`
 112
 113 This run took 3 hours.
 114
 115 =cut
 116
 117
 118 use FindBin;
 119 use lib "$FindBin::Bin/../lib";
 120 BEGIN {
 121     push @INC, qw(       );
 122 }
 123
 124 use Dumpvalue;
 125 use File::Basename qw(dirname);
 126 use File::Path qw(mkpath);
 127 use File::Spec;
 128 use File::Temp;
 129 use Getopt::Long;
 130 use Hash::Util qw(lock_keys);
 131 use YAML::Syck;
 132
 133 use FindBin ();
 134 use lib "$FindBin::Bin/../CPAN-Blame/lib";
 135 use CPAN::Blame::Model::Solved;
 136
 137 our %Opt;
 138 lock_keys %Opt, map { /([^=!]+)/ } @opt;
 139 GetOptions(\%Opt,
 140            @opt,
 141           ) or pod2usage(1);
 142
 143 $Opt{dir} ||= "/home/andreas/data/cnntp-solver-2009/workdir/solved";
 144 $Opt{name} ||= "eq_1.302013_014";
 145 $Opt{title} ||= "mod:Test::More";
 146
 147 warn "title: $Opt{title}
 148 name: $Opt{name}
 149 ";
 150
 151 opendir my $dh, $Opt{dir} or die "Could not opendir '$Opt{dir}': $!";
 152 $|=1;
 153 my @dirent;
 154 if ($Opt{pick}) {
 155     @dirent = @{$Opt{pick}};
 156 } else {
 157     @dirent = readdir $dh;
 158 }
 159 for my $dirent (@dirent) {
 160     next unless $dirent =~ /^(.+)\.slv$/;
 161     my $vdistro = $1;
 162     my $abs = "$Opt{dir}/$dirent";
 163     my $content = do {
 164         open my $fh, "<", $abs or die "Could not open: $!";
 165         local $/;
 166         <$fh>
 167     };
 168     my $age = -M $abs;
 169     my $yml_file = "$Opt{dir}/$vdistro.yml";
 170     my $author = "???";
 171     if (-e $yml_file) {
 172         my $y = YAML::Syck::LoadFile($yml_file);
 173         $author = $y->{author} if $y->{author};
 174     }
 175     if ($Opt{verbose}) {
 176         warn "Processing vdistro '$vdistro'\n";
 177     }
 178     my $regs = CPAN::Blame::Model::Solved->regression_tables($content);
 179     my($reg) = grep { $_->{name} eq $Opt{title} } @$regs or next;
 180     my($line) = grep { @$_ and $_->[0] =~ /='\Q$Opt{name}\E'/ } @{$reg->{regression}} or next;
 181     my $author_vdistro = sprintf "%s/%s", $author, $vdistro;
 182     printf "%-48s %-19s %6.3f %5.2f\n", $author_vdistro, $reg->{rsq}, $line->[1], $age;
 183 }
 184
 185 # Local Variables:
 186 # mode: cperl
 187 # cperl-indent-level: 4
 188 # End: