Medium sized Internalization made by flattener against megalog-2018-06-29
[andk-cpan-tools.git] / bin / verify-distlookup-lcpassfail.pl
blob891a043c65177485b3f62d9d20d07143b30e9c14
1 #!/usr/bin/perl
3 # use 5.010;
4 use strict;
5 use warnings;
7 =head1 NAME
11 =head1 SYNOPSIS
13 verify-distlookup-cutipassfail.pl distv ...
15 =head1 OPTIONS
17 =over 8
19 =cut
21 my @opt = <<'=back' =~ /B<--(\S+)>/g;
23 =item B<--help|h!>
25 This help
27 =item B<--repairnth=i>
29 Defaults to 18, meaning every 18th record is subject to a zero run,
30 i.e. a run of
32 cnntp-solver.pl --cpanstats_distrofacts_zero=...
34 Setting this to 0 disables automatic repair.
36 =item B<--sleep=i>
38 Defaults to 2, meaning after every repair we sleep for 2 seconds.
40 =back
42 =head1 DESCRIPTION
44 distv arguments are optional. without such you get all
45 pass/pass/fail/fail data, that have deviating numbers for lastcalc
46 according to slvdv vs DB.
48 B<Probably a script of small utility>, but we keep it because it
49 helped us to find a missing record in the database.
51 Sind aber distv argumente gegeben, dann werden die Vergleiche fuer
52 diese angezeigt.
54 Die wahre Zahl der PASS und FAIL der letzten Calc findet man in der
55 .slvdv.gz Datei unter meta:ok in der verwegenen Struktur
57 "meta:ok" : {
58 "PASS" : {
59 "PASS" : 1411
61 "FAIL" : {
62 "FAIL" : 37
64 "NA" : {
65 "NA" : 1
69 Die Datei ist in json. Prove:
71 % zcat /home/andreas/data/cnntp-solver-2009/workdir/solved/MIME-tools-5.505.slvdv.gz | perl -le '
72 my $j = do {local $/; <STDIN>};
73 use JSON::XS;
74 my $h = JSON::XS->new->decode($j);
75 warn join ",", map { $_ => $h->{"meta:ok"}{$_}{$_} } qw(PASS FAIL NA)'
76 PASS,1411,FAIL,37,NA,1 at -e line 5, <STDIN> line 1.
78 lc_pass und lc_fail wurde lange Zeit und wird noch immer falsch
79 geschrieben. Sobald das in cnntp-solver.pl gefixt ist, sollte diese
80 Liste automatisch kuerzer werden, aber wer weiss. Die confusion about
81 lastcalc, lc_*, passfail_overview, timestamp bug, etc. ist enorm, da
82 muss etwas reduziert werden: canonische verbindliche Adressen fuer
83 primaerdaten und fuer gecachetes Material.
85 =cut
88 use FindBin;
89 use lib "$FindBin::Bin/../lib";
90 BEGIN {
91 push @INC, qw( );
94 use Dumpvalue;
95 use File::Basename qw(dirname);
96 use File::Path qw(mkpath);
97 use File::Spec;
98 use File::Temp;
99 use Getopt::Long;
100 use Pod::Usage;
101 use Hash::Util qw(lock_keys);
103 our %Opt;
104 lock_keys %Opt, map { /([^=|!]+)/ } @opt;
105 GetOptions(\%Opt,
106 @opt,
107 ) or pod2usage(1);
108 if ($Opt{help}) {
109 pod2usage(0);
111 $Opt{repairnth} //= 18;
112 $Opt{sleep} //= 2;
113 use CPAN::Blame::Config::Cnntp;
114 use Time::Moment;
115 use Algorithm::Numerical::Shuffle qw /shuffle/;
116 require DBI;
117 use JSON::XS;
119 my($workdir,$cpan_home,$ext_src);
120 BEGIN {
121 $workdir = File::Spec->catdir
122 ($CPAN::Blame::Config::Cnntp::Config->{solver_vardir},
123 "workdir");
124 $cpan_home = $CPAN::Blame::Config::Cnntp::Config->{cpan_home};
125 $ext_src = $CPAN::Blame::Config::Cnntp::Config->{ext_src};
128 my $dbi = DBI->connect ("dbi:Pg:dbname=analysis");
129 my $sql = "SELECT lc_pass, lc_fail FROM distlookup WHERE distv=?";
130 my $sth = $dbi->prepare($sql);
131 my $slvdir = "$workdir/solved";
132 opendir my $dh, $slvdir or die;
133 my %argv = map {($_ => 1)} @ARGV;
134 my $headerwritten = 0;
135 my $line = 0;
136 my @dirent = shuffle grep {/slvdv\.gz$/} readdir $dh;
137 DIRENT: for my $dirent (@dirent) {
138 my($distv) = $dirent =~ /(.+)\.slvdv\.gz$/ or next;
139 my $wantsee;
140 if (keys %argv) {
141 if ($argv{$distv}) {
142 $wantsee = "u"; # unconditionally
143 } else {
144 next DIRENT;
146 } else {
147 $wantsee = "g"; # slv is greater
148 # $wantsee = "d"; # any diff
150 my $abs = "$slvdir/$dirent";
151 my $j = do {local $/; open my $fh, "-|", zcat => $abs; <$fh> };
152 my $h = eval { JSON::XS->new->decode($j); };
153 if ($@ or !defined $h) {
154 warn "error while decoding $abs: $@";
155 next;
157 my %slv = map { $_ => $h->{"meta:ok"}{$_}{$_} } qw(PASS FAIL NA);
158 $sth->execute($distv);
159 next unless $sth->rows >= 1;
160 my(@row) = $sth->fetchrow_array();
161 next unless defined $row[0] or defined $row[1];
162 $row[0] ||= 0;
163 $row[1] ||= 0;
164 my @stat = stat $abs;
165 if ($wantsee eq "d") {
166 no warnings 'uninitialized';
167 my $iseq = $slv{PASS}==$row[0] && $slv{FAIL}==$row[1];
168 next if $iseq;
169 } elsif ($wantsee eq "g") {
170 no warnings 'uninitialized';
171 my $isslvgt = $slv{PASS}>$row[0] || $slv{FAIL}>$row[1];
172 next unless $isslvgt;
173 } elsif ($wantsee eq "u") {
174 # no next, we want to see everything
175 } else {
176 die "illegal value for wantsee: $wantsee";
178 #my $smalldiff = abs($slv{PASS}-$row[0]) + abs($slv{FAIL}-$row[1]) <= 4;
179 #next if $smalldiff;
180 unless ($headerwritten++) {
181 print "|distv========|=pass-per-slv |=pass-per-db |=fail-per-slv |=fail-per-db\n";
183 ++$line;
184 my $time = Time::Moment->from_epoch($stat[9])->at_utc->strftime("%FT%T%Z");
185 $time =~ s/:[0-9][0-9]Z/z/;
187 no warnings 'uninitialized';
188 warn sprintf "%d/%d %-31s %4d %4d %4d %4d %s\n", $line, scalar @dirent, $distv, $slv{PASS}, $row[0], $slv{FAIL}, $row[1], $time;
190 if ($Opt{repairnth}) {
191 unless ($line % $Opt{repairnth}) {
192 system "$^X bin/cnntp-solver.pl --cpanstats_distrofacts_zero=$distv";
193 sleep $Opt{sleep} if $Opt{sleep};
198 # Local Variables:
199 # mode: cperl
200 # cperl-indent-level: 4
201 # End: