1 #!/home/src/perl/repoperls/installed-perls/perl/pVNtS9N/perl-5.8.0@32642/bin/perl
9 cnntp-solver - run cpan-testers-parsereport -solve over a bunch from the cpantesters DB
21 my @opt = <<'=back' =~ /B<--(\S+)>/g;
23 =item B<--addctgargs=s>
25 Additional string to inject into the options that we pass to
26 ctgetreports. Typically used together with --pick when there is
27 something to grep for in the reports. E.g.
29 --pick RDF-NS-20130402 --addctgargs " --q qr:'(Bad blank node identifier.*)'"
31 =item B<--calcsample=i>
33 Defaults to arbitrary 500 on normal runs and to 2500 if pick is set. 0
34 is treated as unlimited. If the number of passes and fails is so low
35 that the expected numbers in the sample would be too low, then the
36 sample is set higher, no matter what this parameter specifies.
38 =item B<--cpanstats_distrofacts=s>
40 For debugging only: string must consist of whitespace separated
41 arguments $dist and $version (App-MusicExpo 1.002) or the more common
42 $dist-$version (App-MusicExpo-1.002). Output is a JSON represenation
43 of what the function cpanstats_distrofacts(undef,$dist,$version)
46 =item B<--cpanstats_distrofacts_zero=s>
48 Like above but with resetting to zero beforehand.
50 =item B<--db_fixup_release_date=s>
52 For bug fixing only: string must consist of whitespace separated
53 arguments $dist and $version. The function db_fixup_release_date will
54 be called with these arguments and has the potential to fix a
55 I<allversions> bug. Output is a bit unstructured.
57 =item B<--leavewhennojobsleft!>
59 If this option is true the program finishes when it (1) is only
60 waiting for mincalctime and (2) the number of jobs in the jobsqueue
61 drops to zero. It is not used to make the program wait for a longer
62 time than mincalctime, the purpose is only to leave earlier when no
63 reason to wait exists anymore. The purpose of mincalctime is to keep
64 the shell loop around us trivial and the purpose of
65 leavewhennojobsleft is to get back to work asap when the jobqueue is
68 =item B<--maxfromannotate=i>
70 Per default we read all annotations from the file annotations.txt. But
71 the maximum number we want to read can be set lower here. Useful when
72 debugging the enqueing process of a certain package. Put this package
73 into the first line and run
75 .../perl -d bin/cnntp-solver.pl --maxfromannotate=1 --maxfromsolved=0 --maxfromdb=0 --urgent
77 =item B<--maxfromdb=i>
79 Defaults to 666. Per default we read through the huge SQL query. We
80 switch gears in the middle from thorough reading every row to skipping
81 huge amounts of records randomly. But the maximum number we want to
82 read at all can be set lower here.
84 =item B<--maxfromsolved=i>
86 Per default we read through all already previously solved
87 calculations. But the maximum number we want to read can be set lower
90 =item B<--mincalctime=i>
92 When nothing left to do, let the script sleep until this amount of
93 time has passed. Not interesting anymore. Redis queue takes over.
97 Like a ctgetreports for this single item. We fake that it was found in
98 annotations and skip all other steps. Pick per default implies
99 sample=2500 but this can be overridden. Pick implies some kind of
100 force in that we do not calculate whether a recalc is necessary
101 depending on the age of a previous report. We simpy do it, regardless.
103 Maybe it's too much in the single parameter and we should separate some
106 =item B<--retrieve_distrofacts=s>
108 For debugging only: string must consist of whitespace separated
109 arguments $dist and $version. Output is a JSON represenation of what
110 the function retrieve_distrofacts(undef,$dist,$version) returns.
112 =item B<--onlystartwhennojobs!>
114 If this option is true the program does not start as long as there are
117 =item B<--this_urgent=s>
119 For debugging only: string must consist of whitespace separated
120 arguments $dist and $version. Output is a JSON represenation of what
121 the function this_urgent(undef,$dist,$version) returns.
125 Dangerous, use with care. Sets urgent globally for the whole run.
126 Useful during debugging to avoid having to play dirty tricks with
133 Fulfills two roles on analysis: (1) enqueueing to redis is done by
134 running this script from crontabs with different options; (2)
135 dequeueing from redis is done by parallelized runs of this script with
136 the C<--pick> option.
144 #use lib qw( /usr/local/perl-5.10.1-uld/lib/5.10.1/x86_64-linux-ld
145 # /usr/local/perl-5.10.1-uld/lib/5.10.1
146 # /usr/local/perl-5.10.1-uld/lib/site_perl/5.10.1/x86_64-linux-ld
147 # /usr/local/perl-5.10.1-uld/lib/site_perl/5.10.1
149 #use lib "/home/src/perl/CPAN-Testers-Common-Utils-0.002-82Cocq/blib/lib";
150 #use lib "/home/src/perl/CPAN-Testers-Common-DBUtils-0.05-zqYV7K/blib/lib";
151 #use lib "/home/src/perl/CPAN-Testers-Common-Article-0.42-4OIZRu/blib/lib";
152 #use lib "/home/src/perl/CPAN-Testers-Data-Generator-0.41-bzdKLH/blib/lib";
153 #use CPAN::Testers::Data::Generator;
154 # methods discussed in CPAN::Testers::Common::DBUtils
155 # schema in CPAN::WWW::Testers::Generator
158 use File
::Basename
qw(dirname);
159 use File
::Path
qw(mkpath);
163 use Hash
::Util
qw(lock_keys);
164 use Pod
::Usage
qw(pod2usage);
166 lock_keys
%Opt, map { /([^=!]+)/ } @opt;
170 unless (defined $Opt{calcsample
}) {
172 $Opt{calcsample
} = 2500; # arbitrary
174 $Opt{calcsample
} = 500; # arbitrary
177 $Opt{maxfromdb
} = 666 unless defined $Opt{maxfromdb
};
179 sub cpanstats_distrofacts
;
180 sub retrieve_distrofacts
;
184 use lib
"$FindBin::Bin/../CPAN-Blame/lib";
185 use CPAN
::Blame
::Config
::Cnntp
;
186 my($workdir,$cpan_home,$ext_src);
188 $workdir = File
::Spec
->catdir
189 ($CPAN::Blame
::Config
::Cnntp
::Config
->{solver_vardir
},
191 $cpan_home = $CPAN::Blame
::Config
::Cnntp
::Config
->{cpan_home
};
192 $ext_src = $CPAN::Blame
::Config
::Cnntp
::Config
->{ext_src
};
195 use lib
"$ext_src/cpan-testers-parsereport/lib"
196 #, "/home/src/perl/Config-Perl-V-0.10-ymOAl_/blib/lib"
197 #, "/home/src/perl/DateTime-Format-DateParse-0.04-eYwnxv/blib/lib"
199 use CPAN
::Testers
::ParseReport
0.002000; # we use it only indirectly but want
200 # to fail early if it is missing
201 use lib
"$FindBin::Bin/../CPAN-Blame/lib";
203 use CPAN
::Blame
::Model
::Solved
;
205 use lib
"$ext_src/rersyncrecent/lib";
206 use File
::Rsync
::Mirror
::Recent
;
208 use CPAN
::DistnameInfo
;
211 use lib
"$ext_src/rersyncrecent/lib";
213 use List
::Util
qw(max min minstr reduce sum);
214 use List
::MoreUtils
qw(uniq);
215 use Storable
qw(dclone);
217 use Time
::Duration
qw(duration);
218 use Time
::HiRes
qw(sleep);
219 use Time
::Local
qw(timegm);
225 use version
; our $VERSION = qv
('1.1.1');
227 ###### constants (mostly arbitrary) ######
228 use constant MUST_CONSIDER
=> 1200; # 2015-06-03: current pause makes nearly 1500 per day
229 # 2016-03-28: s/1400/1000/: try out whether it feels faster
230 use constant MIN_CALCTIME
=> 20; # not interesting anymore, just a hot-loop-cooler
231 use constant MAX_CALCTIME
=> 60*3600; # nonsense since redis we are done in no time
233 ###### debugging ######
235 if ($Opt{cpanstats_distrofacts_zero
}) {
236 $Opt{cpanstats_distrofacts
} = $Opt{cpanstats_distrofacts_zero
};
238 for my $func_name (qw(
239 cpanstats_distrofacts
241 db_fixup_release_date
244 if (my $opt = $Opt{$func_name}) {
245 my($dist,$version) = split " ", $opt;
246 unless (defined $version && length $version){
247 my $d = CPAN
::DistnameInfo
->new("FOO/$opt.tgz");
249 $version = $d->version;
250 unless (defined $version && length $version){
251 die "argument '$opt' could not be split";
255 my $func = \
&{$func_name};
256 my $href = $func->(undef,$dist,$version);
257 my $jsonxs = JSON
::XS
->new->pretty(1)->canonical(1);
258 print $jsonxs->encode($href);
263 ###### subroutines ######
265 =item ok_value_and_distro
270 sub ok_value_and_distro
{
272 $article->{subject
} =~ /(\S+)\s+(\S+)/;
283 my $dbi = DBI
->connect ("dbi:SQLite:dbname=$file");
293 my $dbi = DBI
->connect ("dbi:Pg:dbname=analysis");
302 my $redis = Redis
->new(reconnect
=> 120, every
=> 1000);
311 my($dbi,$sql,@args) = @_;
313 my $sth = $dbi->prepare($sql);
314 my $rv = eval { $sth->execute(@args); };
316 my $err = $sth->errstr;
317 warn "Warning: error occurred while executing '$sql': $err";
320 while (my(@trow) = $sth->fetchrow_array()) {
332 my($dbi,$sql,@args) = @_;
333 local($dbi->{PrintError
}) = 0;
334 my $success = $dbi->do($sql,undef,@args);
336 my $err = $dbi->errstr;
337 unless ($err =~ /duplicate key/) {
341 "Warning: error occurred while executing sql[%s]with args[%s]: %s",
343 join(":",map { defined $_ ?
"'$_'" : "<undef>"} @args),
351 =item cpan_lookup_dist
357 package CPAN
::Shell
::devnull
;
358 sub myprint
{ return; }
359 sub mywarn
{ return; }
360 sub mydie
{ my($self,$why) = @_; warn "Caught error[$why]; continuing"; return; }
361 sub mysleep
{ return; }
363 sub cpan_lookup_dist
{
364 my($mdbi,$dist,$version) = @_;
365 return unless defined $dist && defined $version;
366 my $distv = "$dist-$version";
368 my($author,$upload_date,$distroid) = db_memoized
($mdbi,$distv);
369 return ($author,$upload_date,$distroid)
370 if $author && $upload_date && $distroid and $upload_date =~ /^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] /;
372 # $DB::single = $dist eq "Net-Twitter" && $version eq "3.07002";
373 # $DB::single = $dist eq "App-CLI-Plugin-Log-Message";
375 use lib
"$ext_src/cpanpm/lib";
376 our $CPAN_SETUP_DONE;
377 unless ($CPAN_SETUP_DONE++) {
379 CPAN
::HandleConfig
->load;
380 CPAN
::Shell
::setup_output
();
383 $CPAN::Frontend
= "CPAN::Shell"; # alt: devnull
384 my @ret = CPAN
::Shell
->expand("Distribution", "/\\/$distv/");
386 my $la = length(mybasename
($a->{ID
}));
387 my $lb = length(mybasename
($b->{ID
}));
391 my($upload_date_authoritative) = 0;
394 my $oldyaml = read_ctx
($distv);
395 if ($oldyaml->{upload_date
}
396 and $oldyaml->{upload_date
} =~ /^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] /
397 and $oldyaml->{upload_date
} !~ /\?/) {
398 $upload_date = $oldyaml->{upload_date
};
399 } elsif ($best->{UPLOAD_DATE
}) {
400 # not expected to happen
401 $upload_date = $best->{UPLOAD_DATE
}
402 } elsif (my @stat = stat "$cpan_home/authors/id/$best->{ID}") {
404 my(@mtime) = gmtime $stat[9];
407 $upload_date = sprintf "%04d-%02d-%02dT%02d:%02d:%02dz", @mtime[reverse 0..5];
408 $upload_date_authoritative = 1;
411 my($id) = $best->{ID
};
413 local($CPAN::Frontend
) = "CPAN::Shell::devnull";
414 CPAN
::Shell
->ls($id); # find out the upload date
415 $upload_date = $best->upload_date || "???";
417 $distroid = $author = substr($best->{ID
},5);
419 } elsif (my $x = find_on_cpan
($dist,$version)) {
420 my $absdistro = "$cpan_home/authors/$x";
421 ($author) = $absdistro =~ m{.././../(.+?)/};
422 ($distroid) = $absdistro =~ m{.././../(.+)$};
423 if (my(@stat) = stat $absdistro) {
424 my(@mtime) = gmtime $stat[9];
427 $upload_date = sprintf "%04d-%02d-%02d", @mtime[5,4,3];
429 # warn "Alert: Could not stat '$absdistro', will try to continue: $!";
430 $upload_date = "....-..-..";
438 $upload_date = sprintf "%04d-%02d-%02d(?)", @now[5,4,3];
439 # leaving $distroid = undef;
441 db_memoize
($mdbi,$distv,$author,$upload_date,$distroid,$upload_date_authoritative);
442 return ($author,$upload_date,$distroid);
451 my($dbi,$distv) = @_;
452 my $sql = "SELECT author,upload_date,distroid FROM distlookup WHERE distv=?";
453 my $rows = my_get_query
459 return unless @
$rows;
461 return @
{$rows->[0]};
464 die "Panic: cannot happen: '$n' rows when querying primary key '$distv'";
468 =item db_memoize($dbi,$distv)
470 =item db_memoize($dbi,$distv,$author,$upload_date,$distroid,$upload_date_authoritative)
472 First form reserves a record, second form fills it a bit. Undefined
473 values are ignored (*not* written as NULL).
477 my($dbi,$distv,$author,$upload_date,$distroid,$upload_date_authoritative) = @_;
478 my $sql = "INSERT INTO distlookup (distv) VALUES (?)";
479 my $success = eval { my_do_query
488 upload_date
=> $upload_date,
489 distroid
=> $distroid,
490 $upload_date_authoritative ?
(release_date_metacpan
=> $upload_date) : (),
493 while (my($k,$v) = each %othersets) {
499 return unless @bind; # they had no data
502 sprintf "UPDATE distlookup SET %s WHERE distv=?",
511 warn "Error while processing '$distv' with sql[$sql]: $@";
528 sub read_annotations
{
530 local($CWD) = "$FindBin::Bin/..";
531 unless ($Opt{pick
}) {
533 last if 0 == system git
=> "pull"; # may fail
537 my $annofile = "$FindBin::Bin/../annotate.txt";
539 unless (open $fh, $annofile) {
547 ANNOLINE
: while (<$fh>) {
549 next ANNOLINE
if /^\s*$/;
551 if (defined $Opt{maxfromannotate
}) {
552 last ANNOLINE
if $i > $Opt{maxfromannotate
};
554 my($dist,$splain) = split " ", $_, 2;
555 $anno->{$dist} = $splain;
556 $anno_line{$dist} = $i;
569 my($dist,$version) = @_;
570 return unless defined $dist;
571 my $rf = File
::Rsync
::Mirror
::Recent
->new(local => "$cpan_home/authors/RECENT.recent");
572 my $recent = $rf->news();
578 $recent->[$_]{path
} =~ m{/\Q$dist\E-\Q$version\E}
579 && $recent->[$_]{path
} !~ m{(meta|readme)$}
580 # it doesn't matter if it is deleted, we found it!
581 # && $recent->[$_]{type} eq "new"
583 my @path = uniq
map { $_->{path
} } @cand;
586 } elsif (@path != 1){
588 # 0 'id/D/DR/DROLSKY/Moose-0.89_02.tar.gz'
589 # 1 'id/D/DR/DROLSKY/Moose-0.89_01.tar.gz'
590 # 2 'id/D/DR/DROLSKY/Moose-0.89.tar.gz'
603 =item $href = cpanstats_distrofacts($dbi, $dist, $version, $distv, $lfh)
605 The original design of this function was and still is to query the
606 C<cpanstats> table for all reports on B<dist> and not B<distv>. It
607 then constructs the order of versions how they entered the database in
608 C<allversions> and count pass and fail and other for this version and
609 determine the third fail day.
611 A sample $href returned (slightly faked):
616 "date" : "201302071447",
620 "date" : "201302111128",
624 "date" : "201303270850",
628 "date" : "201401241127",
635 "thirdfail" : "2013-12-19 14:54",
636 "thirdfailid" : 37664541,
637 "thirdpass" : "2011-04-17 14:04",
638 "thirdpassid" : 11739866
641 Since this function can also be called from the option
642 --cpanstats_distrofacts, the question is how well it works in
643 concurrency. I *think* the algorithm is OK but I did not try to test
644 for border cases. To avoid corruption we introduced the $lfh
647 If the $lfh is false, we try to get a lockfilehandle and hold it to
648 the end. If it is a valid lockfilehandle, all the better.
651 sub cpanstats_distrofacts
{
652 my($dbi,$dist,$version,$distv,$lfh) = @_;
653 die "ALERT: illegal distname [$dist]" if $dist =~ /'/;
654 $distv ||= "$dist-$version";
656 my $slv_file = slv_file
($distv);
657 $lfh = lockfilehandle
("$slv_file.LCK") or return;
659 $dbi ||= db_quickaccess_handle
();
660 my($oldfacts) = retrieve_distrofacts
($dbi,$dist,$version,$distv);
661 # CUT: we do not yet use the oldfacts
662 my($fails, $passes, $others,
663 $thirdfailid, $thirdpassid, $alreadycounted,
664 $thirdfail, $thirdpass, $cuti_reset)
665 = @
{$oldfacts}{qw(fails passes others
666 thirdfailid thirdpassid alreadycounted
667 thirdfail thirdpass cuti_reset
669 my $allversions = dclone
$oldfacts->{allversions
};
671 my $sql0 = "SELECT MAX(id) FROM cpanstats";
672 my $rows0 = my_get_query
($dbi, $sql0);
675 my $maxid_ts = Time
::Moment
->now_utc->to_string; # 2014-02-09T09:59:11.664143Z
676 $alreadycounted ||= 0;
677 my $sql = "SELECT id,version,fulldate,state
682 AND type=2 ORDER BY id";
683 my $rows = my_get_query
($dbi, $sql, $dist, $alreadycounted, $maxid);
684 my(%seen,%xseen,%is_authoritative);
685 for my $av (@
$allversions) {
686 $seen{$av->{version
}}++; # prevent duplicates
687 $xseen{$av->{version
}}{$av->{date
}}++; # preserve earliest date from previous runs
688 if ($av->{authoritative
}) {
689 $is_authoritative{$av->{version
}} = 1;
695 for my $row (@
$rows) {
696 my($id,$v,$date,$state) = @
$row; # id,version,fulldate,state
698 unless ($seen{$v}++) {
699 push @
$allversions, { version
=> $v };
701 if (defined $version and $v eq $version) {
702 if ($state eq "fail") {
704 if (!$thirdfailid && $fails == 3) {
705 $thirdfail = $date; # format 200909190440
708 } elsif ($state eq "pass") {
710 if (!$thirdpassid && $passes == 3) {
711 $thirdpass = $date; # format 200909190440
718 $date = "" unless defined $date;
719 $is_authoritative{$v} || $xseen{$v}{$date}++; # format 200909190440
723 $thirdfail =~ s/^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)/$1-$2-$3 $4:$5z/;
726 $thirdpass =~ s/^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)/$1-$2-$3 $4:$5z/;
728 for my $v (@
$allversions) {
729 $v->{date
} = minstr
grep { length $_ } keys %{$xseen{$v->{version
}}};
731 # sleep 0.25; # throttle
734 allversions
=> $allversions,
735 thirdfail
=> $thirdfail,
736 thirdpass
=> $thirdpass,
737 thirdfailid
=> $thirdfailid,
738 thirdpassid
=> $thirdpassid,
740 db_passes
=> $passes,
741 db_others
=> $others,
743 db_maxid_ts
=> $maxid_ts,
744 cuti_reset
=> $cuti_reset,
746 store_distrofacts
($dbi,$dist,$distv,$oldfacts,$facts);
750 =item store_distrofacts
752 Cache the pass/fail/other counts, thirdfailid and thirdpassid, and the
753 allversions structure. Remember the highest id up to which we have
754 counted/observed so far, so we can start counting there next time.
757 sub store_distrofacts
{
758 my($dbi,$dist,$distv,$oldfacts,$facts) = @_;
759 no warnings
'uninitialized';
761 # do we need an insert first? See "insert missing" below
762 db_memoize
($dbi,$distv);
765 my $sql = "UPDATE distlookup
766 SET cuti_fail=?, cuti_pass=?, cuti_others=?,
767 thirdfailid=?, thirdpassid=?, counted_up_to_id=?,
768 cuti_ts=?, cuti_reset=?
773 @
{$facts}{qw(db_fails db_passes db_others
774 thirdfailid thirdpassid db_maxid
775 db_maxid_ts cuti_reset)},
778 @otherwhere{qw(fails passes others
779 thirdfailid thirdpassid alreadycounted
781 = qw(cuti_fail cuti_pass cuti_others
782 thirdfailid thirdpassid counted_up_to_id
784 while (my($k,$v) = each %otherwhere) {
785 if (defined $oldfacts->{$k}) {
787 push @bind, $oldfacts->{$k};
789 $sql .= " AND $v IS NULL";
792 my $sth = $dbi->prepare($sql);
793 my $rv = $sth->execute(@bind);
795 my $bind = join ",", map { defined $_ ?
"'$_'" : "<undef>" } @bind;
796 warn "WARNING: $rv records updated with sql[$sql]bind[$bind]".
797 " => insert missing!";
800 my $oldav = { map {("$dist-$_->{version}" => $_->{date
})}
801 @
{$oldfacts->{allversions
}||[]}
803 my $av = $facts->{allversions
};
804 DREF
: for my $i (0..$#$av) {
805 my $dref = $av->[$i] or next; # "distro-reference" { version=>"0.12", date=>"20070408" }
806 my $tdistv = "$dist-$dref->{version}";
807 my $tdate = $dref->{date
};
808 if ($i == $#$av and $tdate =~ /[0-9]{8}/) {
809 eval { db_fixup_release_date
($dbi,$dist,$dref->{version
},$tdistv); };
811 warn "Warning: fixup complained [$@]";
814 next DREF
if $tdate && $oldav->{$tdistv} && $tdate eq $oldav->{$tdistv};
815 db_quickaccess_insert_or_update
($dbi,$tdistv,undef,undef,$dist);
816 db_memoize
($dbi,$tdistv,undef,$tdate,undef,0);
819 my($fail,$pass) = @
{$facts}{qw(db_fails db_passes)};
820 #### hset missing in our ubuntu redis, using zscore as a workaround
821 $redis->zadd("analysis:distv:pass",$pass,$distv) if defined $pass;
822 $redis->zadd("analysis:distv:fail",$fail,$distv) if defined $fail;
827 =item retrieve_distrofacts
829 The distlookup and distcontext tables hold aggregate data that are
830 generated from cpanstats (plus some). We update them on every run in
831 store_distrofacts and reuse them next time we come by so that we do
832 not have to read so much from cpanstats.
834 When a record has a zero for counted_up_to_id and one of the
835 pass/fail/etc values are set, the whole record is reset. This means, a
836 convenient way to restart counting is by resetting counted_up_to_id.
837 For convenience, this reset can be triggered with the
838 --cpanstats_distrofacts_zero option.
841 sub retrieve_distrofacts
{
842 my($dbi,$dist,$version,$distv) = @_;
843 $dbi ||= db_quickaccess_handle
();
844 $distv ||= "$dist-$version";
846 my $sth = $dbi->prepare("SELECT cuti_fail, cuti_pass, cuti_others,
847 thirdfailid, thirdpassid, counted_up_to_id,
849 FROM distlookup WHERE distv=?");
850 $sth->execute($distv);
851 my(@row) = $sth->fetchrow_array;
852 @
{$facts}{qw(fails passes others thirdfailid thirdpassid alreadycounted cuti_reset)} = @row;
854 my $reset = $Opt{cpanstats_distrofacts_zero
} || !$facts->{alreadycounted
};
856 my $cuti_reset ||= 0;
857 if (time - $cuti_reset > 86400*90) {
858 if (rand(1) < 0.001) {
865 @
{$nil}{qw(db_fails db_passes db_others thirdfailid thirdpassid alreadycounted)} = ();
866 $nil->{cuti_reset
} = time;
867 store_distrofacts
($dbi,$dist,$distv,$facts,$nil);
870 $sth = $dbi->prepare("SELECT fulldate FROM cpanstats WHERE id=?");
871 $sth->execute($facts->{thirdfailid
});
872 (@row) = $sth->fetchrow_array;
873 $facts->{thirdfail
} = $row[0];
874 $sth->execute($facts->{thirdpassid
});
875 (@row) = $sth->fetchrow_array;
876 $facts->{thirdpass
} = $row[0];
877 $sth = $dbi->prepare("SELECT distv FROM distcontext WHERE dist=?");
878 my($sth2) = $dbi->prepare("SELECT upload_date, release_date_metacpan
881 $sth->execute($dist);
882 while (@row = $sth->fetchrow_array) {
884 my($v) = substr($tdistv,length($dist)+1);
886 $sth2->execute($tdistv);
887 my($tud,$trdmc) = $sth2->fetchrow_array;
888 my($tdate) = $trdmc || $tud;
890 $tdate =~ s/[^0-9]//g;
892 substr($tdate,12)="" if length($tdate)>12;
893 my $av = { version
=> $v, date
=> $tdate };
894 $av->{authoritative
} = 1 if $trdmc;
897 @av = sort { $a->{date
} cmp $b->{date
}
898 || $a->{version
} <=> $b->{version
}
899 || $a->{version
} cmp $b->{version
}
902 $facts->{allversions
} = \
@av;
906 =item $href = distroquestion($dbi,$i,$distroquestlabel,$dist,$version,$lfh)
908 (Called from three different places, and reacting to their context)
910 If the $lfh is false, we try to get a lockfilehandle and hold it to
911 the end. If it is a valid lockfilehandle, all the better.
915 my($dbi,$i,$distroquestlabel,$dist,$version,$lfh) = @_;
916 my $distv = "$dist-$version";
918 my $slv_file = slv_file
($distv);
919 $lfh = lockfilehandle
("$slv_file.LCK") or return;
923 my($author,$upload_date,$distroid) = cpan_lookup_dist
($dbi,$dist,$version);
924 my($tell_upload_date);
925 my($csdfacts) = cpanstats_distrofacts
($dbi,$dist,$version,$distv,$lfh);
926 my($allversions) = $csdfacts->{allversions
};
927 my($thirdfail) = $csdfacts->{thirdfail
};
928 my $passfail_overview =
930 pass
=> $csdfacts->{db_passes
},
931 fail
=> $csdfacts->{db_fails
},
932 other
=> $csdfacts->{db_others
},
934 my $tell_passfail_overview;
937 allversions
=> $allversions,
940 distroid
=> $distroid,
942 passfail_overview
=> $passfail_overview,
943 thirdfail
=> $thirdfail,
944 upload_date
=> $upload_date,
950 if (0 == $passfail_overview->{pass
}) {
951 $color_on = Term
::ANSIColor
::color
("red");
952 $color_off = Term
::ANSIColor
::color
("reset");
953 } elsif ($passfail_overview->{pass
} >= 3 && $passfail_overview->{fail
}>=3) {
954 $color_on = Term
::ANSIColor
::color
("green");
955 $color_off = Term
::ANSIColor
::color
("reset");
958 $tell_passfail_overview = sprintf
961 $passfail_overview->{pass
},
962 $passfail_overview->{fail
},
963 $passfail_overview->{other
},
968 my($y,$m,$d,$hour,$minute,$second) = $upload_date =~ /(\d+)-(\d+)-(\d+)(?:T(\d+):(\d+):(\d+))?/;
969 if (not defined $y) {
971 printf "Warning: no upload date for %s\n", $ret->{distv
};
974 my $dtu = DateTime
->new
979 minute
=> $minute||0,
980 second
=> $second||0,
981 time_zone
=> "floating", ### should change to utc, but may soon be unreachable
983 my $dtn = DateTime
->now;
984 my $dur = $dtn->subtract_datetime_absolute($dtu);
985 $distro_age = $dur->delta_seconds;
987 my $maxyears = $CPAN::Blame
::Config
::Cnntp
::Config
->{maxyears
} || 8;
988 if ($distro_age >= $maxyears*365.25*86400) {
989 $color_on = Term
::ANSIColor
::color
("yellow");
990 } elsif ( $upload_date =~ /\?/) {
991 $color_on = Term
::ANSIColor
::color
("yellow");
992 } elsif ($distro_age > 0) {
993 $color_on = Term
::ANSIColor
::color
("green");
996 $color_on = Term
::ANSIColor
::color
("red");
998 my $color_off = Term
::ANSIColor
::color
("reset");
999 $tell_upload_date = $color_on . substr($upload_date,2,8) . $color_off;
1001 my $tell_current_version;
1003 my $current_version = @
$allversions ?
$allversions->[-1]{version
} : 0;
1005 if ($current_version eq $version) {
1006 $tell_current_version = "current";
1007 $color_on = Term
::ANSIColor
::color
("green");
1009 } elsif ($Opt{pick
}) {
1010 $tell_current_version = "<?>";
1011 $color_on = Term
::ANSIColor
::color
("green");
1013 $tell_current_version = "<$current_version";
1014 $color_on = Term
::ANSIColor
::color
("red");
1016 my $color_off = Term
::ANSIColor
::color
("reset");
1017 $tell_current_version = sprintf "%s%s%s", $color_on,$tell_current_version,$color_off;
1019 my $ddist = length($distv)<=52 ?
$distv : (substr($distv,0,49)."...");
1021 print "gmtime now: ".gmtime()."\n";
1024 "%3d %-8s %s %s %s/%s (%s)\n",
1025 $i, $distroquestlabel,
1027 $tell_passfail_overview,
1030 $tell_current_version,
1032 my $slv_content = slv_content
($distv);
1033 my $rtables = CPAN
::Blame
::Model
::Solved
->regression_tables($slv_content);
1034 my $rsq1_regressions = join ", ", map { $_->{name
} } grep { $_->{rsq
}==1 } @
$rtables;
1035 $ret->{greenish
} = $greenish;
1036 $ret->{distro_age
} = $distro_age;
1037 $ret->{rsq1_regressions
} = $rsq1_regressions;
1038 # sleep 0.5; # throttle
1049 sprintf "%s/solved/%s.slv", $workdir, $distv;
1059 my $slv_file = slv_file
($distv);
1060 return unless -e
$slv_file;
1061 my $content = do { open my $fh, $slv_file or die "Could not open '$slv_file': $!";
1065 =item lockfilehandle
1068 sub lockfilehandle
{
1070 use Fcntl
qw( :flock );
1071 mkpath dirname
$lockfile;
1073 unless (open $lfh, "+<", $lockfile) {
1074 unless ( open $lfh, ">>", $lockfile ) {
1075 die "ALERT: Could not open >> '$lockfile': $!"; # XXX should not die
1077 unless ( open $lfh, "+<", $lockfile ) {
1078 die "ALERT: Could not open +< '$lockfile': $!"; # XXX should not die
1081 if (flock $lfh, LOCK_EX
|LOCK_NB
) {
1082 print "Info[$$]: Got the lock, continuing";
1084 print "FATAL[$$]: lockfile '$lockfile' locked by a different process";
1091 =item call_ctgetreports
1096 sub call_ctgetreports
{
1097 my($ctx,$ctxi,$ctxtotal) = @_;
1098 my($distv,$distro_age) = ($ctx->{distv
},$ctx->{distro_age
});
1099 my $slv_file = slv_file
($distv);
1100 my @stat = stat ($slv_file . "dv.gz");
1102 @stat = stat ($slv_file . "dv");
1104 my $slv_file_age = @stat ?
time-$stat[9] : 0;
1105 # tuning this knob is always difficult for me. We are saying here
1106 # that we compare the age of the distro and the age of the
1107 # regression calculation we have around. Iow, older distros need
1108 # to be recalculated less often. If we increase this parameter
1109 # towards 1, we never calculate again because any result will be
1110 # younger than the distro. And if we go toward 0 we calculate
1111 # every time we come around. So if at 0.1 and with a very slow
1112 # pause2 we feel that 0.1 calculates too often, then we inc to
1113 # 0.25 to reduce the number of recalcs.
1114 my $must_run_ctgetreports;
1116 $must_run_ctgetreports = 1;
1118 # tentative name for this variable? it is a relative age, a
1119 # threshold, and it guards recalculations:
1120 # RECALC_AGE_THRESHOLD? Sanity next time we come around
1121 $must_run_ctgetreports = !$slv_file_age || $slv_file_age > $distro_age*0.40; # arbitrary
1123 # not allowed to go beyond this point without having a lock
1124 my $lfh = lockfilehandle
("$slv_file.LCK") or return;
1125 if (!$must_run_ctgetreports && -e
$slv_file && $slv_file_age > 86400) {
1126 # we want to overrule the decision for dists that have changed
1127 # much since last computation
1128 my $slv_content = slv_content
($distv);
1129 my($calctimepass,$calctimefail) = (0,0);
1130 my @lines = split /\n/, $slv_content;
1132 next unless /^ ([A-Z]+)/;
1135 } elsif ($1 eq "FAIL") {
1139 if ($ctx->{passfail_overview
}) {
1140 if ( $calctimepass && $ctx->{passfail_overview
}{pass
}/$calctimepass > 1.5 # arbitrary
1141 || $calctimefail && $ctx->{passfail_overview
}{fail
}/$calctimefail > 1.5 # arbitrary
1143 $must_run_ctgetreports=1;
1147 # its ugly to log within a subroutine and to log things that
1148 # belong to the outer scope and to pass parameters in we only need
1149 # for logging's sake:(
1151 ("DEBUG: at [%s UTC] for distv[%s] slv_file_age[%s] upload_date[%s] distro_age[%s] must_run_ctg[%s] progress[%d/%d]\n",
1155 $ctx->{upload_date
},
1157 $must_run_ctgetreports,
1161 if ($must_run_ctgetreports) {
1162 mkpath dirname
$slv_file;
1163 my $dumpfile = $slv_file;
1164 $dumpfile =~ s/\.slv$/.slvdv/;
1165 if (-e
"$dumpfile.gz") {
1166 0 == system "gunzip -f $dumpfile.gz" or die;
1168 my $ctg = sprintf "%s/ctgetreports", dirname
($^X
);
1169 $ctg = "$ext_src/cpan-testers-parsereport/bin/ctgetreports";
1170 die "ctgetreports not installed" unless -e
$ctg;
1171 my $ctg_cachedir = $CPAN::Blame
::Config
::Cnntp
::Config
->{ctgetreports_dir
};
1172 my $hashed_ctg_parent;
1173 if (-d
$ctg_cachedir) {
1174 my $letter = substr($distv,0,1);
1175 $hashed_ctg_parent = "$ctg_cachedir/$letter";
1176 if (-f
"$hashed_ctg_parent/$distv.tar") {
1177 0 == system qq{tar
-C
"$hashed_ctg_parent" -xf
"$hashed_ctg_parent/$distv.tar"}
1178 or die "Problem while untarring $distv.tar";
1181 die "directory '$ctg_cachedir' doesn't exist. Care to create it?";
1183 my $sample = $Opt{calcsample
};
1184 if ($sample && $ctx->{passfail_overview
}{fail
} && $ctx->{passfail_overview
}{pass
}) {
1185 # correct the sample size if there is a small amount of either fails or passes
1186 my $NF = $ctx->{passfail_overview
}{fail
};
1187 my $NP = $ctx->{passfail_overview
}{pass
};
1188 my $CRIT = min
($NP,$NF);
1190 if ($CRIT * $sample / $N < 3) {
1191 $sample = 1 + int(3.15 * $N / $CRIT); # 3.15 security margin over 3
1194 # emergency break 2014-12-24:
1195 # Class-C3-Componentised-1.001000 takes longer than 48
1196 # hours, we must avert those; if the result is really
1197 # interesting, we can expect that we'll get more fails
1198 # in the following days
1199 if ($sample > 3200) { # arbitrary: CGI-Struct with 3300 took 53000 seconds
1200 warn "Alert: Late refusing of ctgetreports for $distv; sample too big: $sample";
1207 my $sample_argument = $sample ?
" --sample=$sample" : "";
1208 my $addctgargs = $Opt{addctgargs
} || "";
1211 qq{"$^X" -I
"$ext_src/cpan-testers-parsereport/lib" "$ctg"}
1212 .qq{ --pce
--cachedir
="$hashed_ctg_parent/$distv"}
1214 .qq{ --q qr
:'(.*version .+? required--this is only.*)'}
1215 .qq{ --q qr
:'("[A-Za-z0-9_]+" is not exported by the [A-Za-z0-9_:]+ module.*)'}
1216 .qq{ --q qr
:'(.*instance running.*)'}
1217 # cf Xref: k85.linux.bogus sent:7814
1218 #.qq{ --q qr:'(.*could.?n.t (?:open|connect).*)'}
1219 .qq{ --q qr
:'(.*(?i:could.?n.t (?:open|connect|find)).*)'}
1220 .qq{ --q qr
:'(.*Base class package.*)'}
1221 .qq{ --q qr
:'(Can.t call method .*)'}
1222 .qq{ --q qr
:'(Can.t use an undefined value.*)'}
1223 .qq{ --q qr
:'(Can.t use string .*)'}
1224 .qq{ --q qr
:'(Can.t modify division)'}
1225 .qq{ --q qr
:'(Can\\047t locate loadable object)'}
1226 .qq{ --q qr
:'(Connection (?:refused|reset by peer))'}
1227 .qq{ --q qr
:'Constants from lexical variables potentially modified elsewhere are deprecated'}
1228 .qq{ --q qr
:'(Can.t use keyword .*? as a label)'}
1229 .qq{ --q qr
:'(Could not open .*)'}
1230 .qq{ --q qr
:'(Address already in use)'}
1231 .qq{ --q qr
:'(.*is deprecated (?:and will be removed)?)'}
1232 .qq{ --q qr
:'(skip(?:ping |ped: | test).*)'}
1233 .qq{ --q qr
:'(Please rerun the make command)'}
1234 .qq{ --q qr
:'(v-string .* non-portable at \\S+)'}
1235 .qq{ --q qr
:'(The module .* isn.t available on CPAN)'}
1236 .qq{ --q qr
:'(Invalid version format)'}
1237 .qq{ --q qr
:'did you mean \\\$\\] \\?'}
1238 .qq{ --q qr
:'Unescaped left brace in regex is deprecated here, passed through'}
1239 .qq{ --q qr
:'Unescaped left brace in regex is deprecated'}
1240 .qq{ --q qr
:'Unescaped left brace in regex is illegal'}
1241 .qq{ --q qr
:'Unknown warnings category'}
1242 .qq{ --q qr
:'Non-ASCII character seen before =encoding'}
1243 .qq{ --q qr
:'((Smartmatch|given|when) is experimental)'}
1244 .qq{ --q qr
:'# (.+ alternative text .+ contains non-escaped .+)'}
1245 .qq{ --q qr
:'=over without closing =back'}
1246 .qq{ --q qr
:'starts or ends with whitespace'}
1247 .qq{ --q qr
:'Acme::Override::INET replaced IO::Socket::INET'}
1248 .qq{ --q qr
:'Invalid byte sequence'}
1249 .qq{ --q qr
:'Insecure dependency in (require|eval)'}
1250 .qq{ --q qr
:'Passing a list of values to enum'}
1251 .qq{ --q qr
:'(undefined symbol: [A-Za-z0-9_]+ )'}
1252 .qq{ --q qr
:'Bailout called\\.'}
1253 .qq{ --q qr
:'Out of memory\\!'}
1254 .qq{ --q qr
:'Prototype mismatch:'}
1255 .qq{ --q qr
:'Connection reset by peer'}
1256 .qq{ --q qr
:'depends on targets in skipped section'}
1257 .qq{ --q qr
:'Network error'}
1258 .qq{ --q qr
:'Insecure .+ while running with'}
1259 .qq{ --q qr
:'must be adjacent in regex'}
1260 .qq{ --q qr
:'locate object method .new. via package .Test2::Context.'}
1261 .qq{ --q qr
:'Use of code point 0x[[:xdigit:]]+ is deprecated; the permissible max is 0x7FFFFFFF'}
1262 .qq{ --q qr
:'# (.+ but done_testing.+ expects .+)'}
1263 .qq{ --q qr
:'Non-zero wait status: ([0-9]+)'}
1265 .qq{ --solve
--solvetop
=123$sample_argument}
1266 .qq{ --dumpfile
=$dumpfile}
1268 .qq{ --transport
=http_cpantesters_gzip
}
1269 .qq{ --prefer
-local-reports
}
1270 .qq{ --vdistro
=$distv $ctx->{dist
} > $slv_file.new
2>&1}
1271 .q{ --filtercb='my $r = shift; $r->{"meta:ok"} = "FILTERED" if $r->{"qr:locate object method .new. via package .Test2::Context."}'}
1272 .q{ --filtercb='my $r = shift; $r->{"meta:ok"} = "FILTERED" if $r->{"qr:Use of code point 0x[[:xdigit:]]+ is deprecated; the permissible max is 0x7FFFFFFF"}'}
1273 .q{ --filtercb='my $r = shift; for (split " ", $r->{"conf:ccflags"}){$r->{"conf:ccflags~$_"} = 1}'}
1275 warn "Info: running system[$system]";
1276 my $ret = process_ctg($system, $distv);
1277 # gzip: /home/ftp/cnntp-solver-2009/workdir/solved/Dist-Zilla-Plugin-Twitter-0.015-TRIAL.slvdv: No such file or directory
1278 my $redis = myredis;
1279 $redis->zrem("analysis:distv:eta",$distv);
1281 unless (0 == system "gzip -9f $dumpfile") {
1282 warn "Warning: error while running gzip on '$dumpfile', continuing...";
1286 qq{tar -C "$hashed_ctg_parent" --remo -cf "$hashed_ctg_parent/$distv.tar" "$distv"}) {
1287 warn "Warning: error while running tar cf on '$distv', continuing...";
1290 my @stat = stat "$dumpfile.gz";
1291 $redis->zadd("analysis:distv:calctimestamp",$stat[9],$distv) if @stat;
1292 my $lc_ts = Time::Moment->from_epoch($stat[9])->to_string; # lc=last calc
1293 open my $zfh, "-|", "zcat", "$dumpfile.gz" or die "could not fork: $!";
1294 my $j = do {local $/; <$zfh>};
1296 my $h = JSON::XS->new->decode($j);
1297 my %passfail = map { $_ => $h->{"meta:ok"}{$_}{$_} } qw(PASS FAIL);
1298 for my $pf (qw(pass fail)) {
1299 my $redis_key = "analysis:distv:lc$pf"; # analysis:distv:lcpass
1300 my $pf_value = $passfail{uc $pf} // 0; # $passfail{PASS}
1301 $redis->zadd($redis_key, $pf_value, $distv);
1303 rename "$slv_file.new", $slv_file or die "Could not rename: $!";
1304 mytouch
("slv",{name
=> $distv});
1308 lc_pass
=> $passfail{PASS
}//0,
1309 lc_fail
=> $passfail{FAIL
}//0,
1312 warn "Alert: Error running ctgetreports; command was [$system]";
1323 my($distv,$want_ctgetreport,$ctxi) = @_;
1324 if ($distv eq "libxml-enno-1.02") {
1326 Carp
::cluck
("$distv is generally unwanted, go away.");
1330 my $redis = myredis
;
1331 unless (defined $old_highscore) {
1332 my($topdistv,$highscore) = $redis->zrevrange("analysis:jobqueue:q",0,0,"withscores");
1333 $old_highscore = $highscore;
1334 $old_highscore //= 0;
1336 my($thisscore) = $redis->zscore("analysis:jobqueue:q",$distv);
1338 my $score = $want_ctgetreport - $ctxi; # linear decreasing
1339 if ($ctxi < 40 && $old_highscore > $score) {
1340 # the first 40 deserve a better score than the old ones
1341 $score = $old_highscore + 40 - $ctxi;
1346 if ($thisscore >= $score) {
1352 my($zcard) = $redis->zcard("analysis:jobqueue:q");
1353 warn "DEBUG: enqueueing ctxi[$ctxi] old_highscore[$old_highscore] score[$score] distv[$distv] zcard[$zcard]\n";
1355 $redis->zadd("analysis:jobqueue:q", $score, $distv);
1361 Runs a ctgetreports job with all of our redis interaction
1363 Revisited 2015-04-06: looks wrong to me. But remember, we have redis
1364 1.2 on our ds8143 and that did not even have a hash. That's for
1365 example why we used JSON::XS here, we could have used hash instead.
1366 But I'm sure that the "pattern" that I was reading in the Redis
1367 documentation at that time has changed significantly since then. Maybe
1368 we should rewrite the code according to the current description.
1372 my($system, $distv) = @_;
1374 my $redis = myredis
;
1375 my $jsonxs = JSON
::XS
->new->indent(0);
1376 my $job_object = {distv
=>$distv,start
=>$start};
1377 my $json = $jsonxs->encode($job_object);
1378 my $sha1 = Digest
::SHA
->new(1);
1380 my $hex = $sha1->hexdigest;
1381 my $id = $redis->get("analysis:jobqueue:jobs:$hex\:id");
1383 my($nid) = $redis->incr("analysis:jobqueue:next.job.id");
1384 if (my($setnx) = $redis->setnx("analysis:jobqueue:jobs:$hex\:id" => $nid)) {
1387 warn "WARNING: Should never happen, it would mean that two jobs calc the same thing at the same time. json[$json]hex[$hex]";
1388 $id = $redis->get("analysis:jobqueue:jobs:$hex\:id");
1391 $redis->set("analysis:jobqueue:jobs:$id\:descr" => $json); # will set again afterwards
1392 $redis->set("analysis:jobqueue:jobs:$id\:hex" => $hex);
1393 $redis->sadd("analysis:jobqueue:runningjobs" => $id); # must srem
1394 my $ret = system $system;
1395 my $finished = time;
1396 my $took = $finished - $start;
1397 $job_object->{finished
} = $finished;
1398 $job_object->{took
} = $took;
1399 $job_object->{ret
} = $ret;
1400 $json = $jsonxs->encode($job_object);
1401 $redis->set("analysis:jobqueue:jobs:$id\:descr" => $json); # setting again
1402 $redis->srem("analysis:jobqueue:runningjobs" => $id); # as promised
1403 $redis->rpush("analysis:jobqueue:recentjobs" => $id);
1404 while ($redis->llen("analysis:jobqueue:recentjobs") > 10000) {
1405 my($del_id) = $redis->lpop("analysis:jobqueue:recentjobs");
1406 my($del_hex) = $redis->get("analysis:jobqueue:jobs:$del_id\:hex");
1407 $redis->del("analysis:jobqueue:jobs:$del_hex\:id");
1408 $redis->del("analysis:jobqueue:jobs:$del_id\:descr");
1409 $redis->del("analysis:jobqueue:jobs:$del_id\:hex");
1412 warn "Warning: slow calc on '$distv'; took[$took]";
1419 Reads the context of this distv from the yaml file
1424 my $outfile = sprintf "%s/solved/%s.yml", $workdir, $distv;
1425 my @stat = stat $outfile or return;
1426 YAML
::Syck
::LoadFile
($outfile);
1429 =item store_ctx ($dbi,$ctx)
1431 $dbi may be undef. (For the record, we often have no dbi handle
1432 around, but if we have one, we want to pass it around. This has its
1433 historical reason in sqlite's misbehaving when faced with concurrent
1434 accesses. So we tried to keep the lifespan of dbi handles as short as
1437 Checks the hashref $ctx against the yaml file on disk, writes it if
1438 something has changed. Then writes it also to the database. If this is
1439 a first-time encounter and greenish is >= 3, it is also enqueued for a
1445 my $outfile = sprintf "%s/solved/%s.yml", $workdir, $ctx->{distv
};
1446 my @stat = stat $outfile;
1448 my $want_enqueue = 0;
1450 my $reduced_ctx = dclone
$ctx;
1451 delete $reduced_ctx->{distro_age
};
1452 my $curr_dump = YAML
::Syck
::Dump
($reduced_ctx);
1455 { open my $fh, $outfile or die "Could not open '$outfile': $!";
1459 $want_write = $curr_dump ne $last_dump;
1460 no warnings
'uninitialized';
1461 if ($want_write and $ctx->{lastcalc
} and $ctx->{passfail_overview
}) {
1462 my $old_ctx = YAML
::Syck
::Load
($last_dump);
1463 if ($ctx->{lastcalc
} ne $old_ctx->{lastcalc
}) {
1464 # distlookup: last_calc_ts, lc_pass, lc_fail
1476 if ($ctx->{greenish
} && $ctx->{greenish
} >= 3) {
1481 mkpath dirname
$outfile;
1482 open my $fh, ">", "$outfile.new" or die "Could not open > '$outfile.new': $!";
1483 print $fh $curr_dump;
1484 close $fh or die "Could not close: $!";
1485 rename "$outfile.new", $outfile or die "Could not rename: $!";
1487 db_quickaccess_insert_or_update
1496 if ($want_enqueue) {
1497 enqueue
($ctx->{distv
},0,1);
1499 mytouch
("yml",{name
=> $ctx->{distv
}});
1502 =item db_quickaccess_handle
1507 sub db_quickaccess_handle
{
1508 return CPAN
::Blame
::Config
::Cnntp
::common_quickaccess_handle
();
1511 =item db_quickaccess_insert_or_update
1513 Convenience function for getting stuff into distcontext table. You
1514 cannot set anything to null here, only to some value. Any value given
1515 as undef is skipped in the generated update statement. Against common
1516 best practice, this code first looks whether a record exists and only
1517 does an insert if it does not exist. We believe this is OK since we
1518 never want to delete records here.
1520 Besides that, this function does nothing when the caller would not
1524 sub db_quickaccess_insert_or_update
{
1525 my($dbi,$distv,$yaml,$greenish,$dist) = @_;
1526 $dbi ||= db_quickaccess_handle
();
1527 my $sql = "SELECT yaml,greenish,dist FROM distcontext WHERE distv=?";
1528 my $rows = my_get_query
1535 $sql = "INSERT INTO distcontext (distv) VALUES (?)";
1543 no warnings
'uninitialized';
1544 my(%othersets) = (yaml
=> $yaml, greenish
=> $greenish, dist
=> $dist);
1547 yaml
=> $rows->[0][0],
1548 greenish
=> $rows->[0][1],
1549 dist
=> $rows->[0][2],
1552 while (my($k,$v) = each %othersets) {
1553 if (defined $v && $v ne $oldvalues{$k}) {
1558 return unless @bind;
1560 # $sql = "UPDATE distcontext SET yaml=?, greenish=?, dist=? WHERE distv=?";
1561 $sql = sprintf "UPDATE distcontext SET %s WHERE distv=?", join(", ", @set);
1562 warn "DEBUG: sql[$sql] bind[@bind]";
1563 my_do_query
($dbi, $sql, @bind);
1566 =item db_quickaccess_delete
1568 forbidden call, because we believe that we want to keep distv=>dist
1569 relation here forever and in any case. We use that relation in
1573 sub db_quickaccess_delete
{
1576 Carp
::confess
("somebody called db_quickaccess_delete");
1577 my $dbi = db_quickaccess_handle
();
1578 my $sql = "DELETE FROM distcontext WHERE distv=?";
1592 my($type,$message) = @_;
1593 my $file = sprintf "%s/solved/lastchange_%s.ts", $workdir, $type;
1594 open my $fh, ">", $file or die "Could not open >'$file': $!";
1595 print $fh YAML
::Syck
::Dump
($message);
1596 close $fh or die "Could not close '$file': $!";
1606 my @date = unpack "a4 a1 a2 a1 a2", $iso; # 2006-07-04
1607 $date[2]--; # make month zero-based
1608 my $epoch = eval { timegm
(0,0,0,@date[4,2,0]); };
1609 if ($@
|| !$epoch) {
1611 Carp
::confess
("ALERT: date[@date] iso[$iso]");
1616 =item db_quickaccess_select_dist
1621 sub db_quickaccess_select_dist
{
1622 my($dbi,$distv) = @_;
1623 my $sql = "SELECT dist FROM distcontext WHERE distv=?";
1624 my $rows = my_get_query
1630 return unless @
$rows>0;
1631 return $rows->[0][0];
1634 =item db_fixup_release_date
1636 Fetch a release date from metacpan and write it into
1637 release_date_metacpan in distlookup. Dies if the field is not empty.
1640 sub db_fixup_release_date
{
1641 my($dbi,$dist,$version,$distv) = @_;
1642 $dbi ||= db_quickaccess_handle
();
1643 $distv ||= "$dist-$version";
1644 my $sql = "SELECT release_date_metacpan FROM distlookup WHERE distv=?";
1645 my $rows = my_get_query
1651 if (defined $rows->[0][0] and length $rows->[0][0]) {
1652 die "Alert: found record for distv '$distv' where release_date_metacpan is '$rows->[0][0]'";
1654 my $ua = LWP
::UserAgent
->new(agent
=> "analysis.cpantesters.org/$VERSION");
1655 $ua->default_header("Accept-Encoding", "gzip");
1656 my $jsonxs = JSON
::XS
->new->indent(0);
1657 # my $query = "http://api.metacpan.org/v0/release/_search?q=release.name:$distv&fields=name,date,status";
1658 my $query = "http://fastapi.metacpan.org/v1/release/_search?q=name:$distv&fields=name,date,status";
1659 my $resp = $ua->get($query);
1661 if ($resp->is_success) {
1662 my $content = $resp->decoded_content;
1663 my $mcpananswer = $jsonxs->decode($content);
1664 if (my $h = $mcpananswer->{hits
}{hits
}) {
1665 $date = $h->[0]{fields
}{date
};
1667 warn sprintf "Warning: result from metacpan api has no hits: %s\n", $content;
1670 die sprintf "No success getting from metacpan: query '%s' response '%s'", $query, $resp->as_string;
1672 warn sprintf "DEBUG: rows[%d] distv[%s] dist[%s] date[%s]\n", scalar @
$rows, $distv, $dist, $date;
1673 $sql = "UPDATE distlookup set release_date_metacpan=? WHERE distv=?";
1674 my $success = my_do_query
($dbi,$sql,$date,$distv);
1677 success
=> $success,
1681 =item (void) db_store_lastcalc ($dbi, $distv, $last_calc_ts, $lc_pass, $lc_fail)
1683 Stores the last calc's coordinates. Short-circuits if they are already there.
1686 sub db_store_lastcalc
{
1687 my($dbi, $distv, $last_calc_ts, $lc_pass, $lc_fail) = @_;
1688 $dbi ||= db_quickaccess_handle
();
1689 my $sql = "SELECT last_calc_ts, lc_pass, lc_fail FROM distlookup WHERE distv=?";
1690 my $rows = my_get_query
1696 if ( defined $rows->[0][0]
1697 and length $rows->[0][0]
1699 if ( $last_calc_ts eq $rows->[0][0]
1700 and $lc_pass == $rows->[0][1]
1701 and $lc_fail == $rows->[0][2]
1703 return; # short circuit
1705 $sql = "UPDATE distlookup SET last_calc_ts=?, lc_pass=?, lc_fail=? WHERE distv=?";
1706 my $success = my_do_query
($dbi, $sql, $last_calc_ts, $lc_pass, $lc_fail, $distv);
1708 warn "Warning: something went wrong storing lastcalc stuff";
1713 warn "Warning: something wrong with this record, rows->[0][0] empty";
1718 =item db_quickaccess_fixup
1720 as of 201401 never reached. Deprecated 201708.
1723 sub db_quickaccess_fixup
{
1724 my($dbi,$distv,$dist) = @_;
1725 die "db_quickaccess_fixup is deprecated";
1730 as of 201401 never reached
1734 my($touched_distv) = @_;
1735 my $slv_dir = sprintf("%s/solved", $workdir);
1736 opendir my $dh, $slv_dir or die "Could not opendir '$slv_dir': $!";
1737 my(%deleted_distv, %fixed_dist);
1739 my @readdir = readdir $dh;
1740 warn sprintf "Info: readdir found %d entries in solv_dir[%s]\n", scalar @readdir, $slv_dir;
1741 my $skipped_cleanup_because_touched = 0;
1742 DIRENT
: for my $dirent (@readdir) {
1743 next DIRENT
if $dirent =~ /\~$/;
1745 my($basename,$exte) = $dirent =~ /(.+)\.(yml|slv|slvdv|slvdv\.gz|ts)$/;
1747 ("DEBUG: %s UTC: checking dirent %d: %s; total deleted: %d",
1751 scalar keys %deleted_distv,
1752 ) unless $i % 10000;
1753 next DIRENT
unless defined $exte;
1754 my $dbi = db_quickaccess_handle
();
1755 my $dist = db_quickaccess_select_dist
($dbi,$basename);
1756 if ($dist and !$fixed_dist{$dist}++) {
1757 db_quickaccess_fixup
($dbi,$basename,$dist);
1759 next DIRENT
if $exte =~ /^(yml|ts)$/;
1760 if ( $touched_distv->{$basename} ){
1761 $skipped_cleanup_because_touched++;
1762 # we may have to get a fresh context so we can delete
1763 # outdated modules now?
1766 next DIRENT
if $deleted_distv{$basename};
1767 # boring, boring: since we do not delete old stuff in the
1768 # directory, this pretends to delete in quickaccess all
1769 # things already deleted, 15000 distros and growing
1770 #### print "DEBUG: deleting from quickaccess $basename\n";
1771 db_quickaccess_delete
($basename);
1772 $deleted_distv{$basename}++;
1774 warn "DEBUG: skipped cleanup because touched[$skipped_cleanup_because_touched]";
1783 use IPC
::ConcurrencyLimit
;
1784 my($basename) = File
::Basename
::basename
(__FILE__
);
1785 my $limit = IPC
::ConcurrencyLimit
->new
1788 path
=> "$workdir/IPC-ConcurrencyLimit-$basename-megaquery",
1790 my $limitid = $limit->get_lock;
1791 while (not $limitid) {
1792 warn "Another process appears to be running a megaquery, sleeping";
1794 $limitid = $limit->get_lock;
1796 my $dbi = mypgdbi
();
1797 my $sqlimit = 30_000_000
;
1798 my $elasticity = 2000;
1799 if ($Opt{maxfromdb
} && $Opt{maxfromdb
} < $sqlimit/$elasticity) {
1800 # the $elasticity here is a pure guess that says if we want
1801 # 200 hits, we should read 200*$elasticity records; indeed we
1802 # got 119 record from reading 200000
1803 $sqlimit = $elasticity*$Opt{maxfromdb
};
1806 my $sql = "SELECT id,state,dist,version,fulldate
1808 WHERE type=2 and state='fail'
1809 ORDER BY id DESC LIMIT $sqlimit";
1810 print "Info: about to megaquery ".gmtime()." UTC\n";
1811 my $rows = my_get_query
($dbi, $sql);
1812 print "Info: megaquery done ".gmtime()." UTC\n";
1822 my($want_ctgetreport, $touched_distv, $distroquestlabel, $anno, $timetogo,$urgenttimetogo) = @_;
1823 CTG
: for my $ctxi (0..$#$want_ctgetreport) {
1824 my $ctx = $want_ctgetreport->[$ctxi];
1825 my($dist,$distv,$version) = @
{$ctx}{"dist","distv","version"};
1826 $touched_distv->{$distv}=1;
1828 my $didit = call_ctgetreports
($ctx,1+$ctxi,1+$#$want_ctgetreport);
1830 my $lfh = $didit->{lfh
};
1831 # update the yml, we might have to parse something in the new reports
1832 my $dbi = mypgdbi
();
1833 $ctx = distroquestion
($dbi,1+$ctxi,$distroquestlabel,$dist,$version,$lfh);
1834 $ctx->{annotation
} = $anno->{$distv};
1835 $ctx->{lastcalc
} = $didit->{lc_ts
};
1836 $ctx->{lc_pass
} = $didit->{lc_pass
};
1837 $ctx->{lc_fail
} = $didit->{lc_fail
};
1838 store_ctx
($dbi,$ctx);
1841 enqueue
($distv,$#$want_ctgetreport,$ctxi);
1845 if ($time > $urgenttimetogo) {
1847 } elsif ($ctxi == $#$want_ctgetreport){
1851 && $time >= $timetogo
1852 && $ctxi >= MUST_CONSIDER
1857 my $utc = gmtime($timetogo);
1858 warn "DEBUG: proc[$$]ctxi[$ctxi]timetogo[$timetogo]that is[$utc UTC]";
1866 Originally the decision whether a new calc was due was taken by a
1867 random function. We looked at the timestamp of the yaml file and of
1868 the slvfile. Then we read the yaml file and there we concluded on the
1869 upload date. Then we decided how often we want to calc a distro
1870 depending on its age. We mixed some randomness in and said this is
1871 urgent or not. Mostly crap.
1873 Mixing in cpanstats_distrofacts and analysis:distv:calctimestamp?
1874 Especially interested in pass/fail on last calc and pass/fail now.
1876 In cpanstats_distrofacts I see only (for Crypt-Password-0.28)
1879 "db_maxid" : 41454838,
1880 "db_maxid_ts" : "2014-04-18T09:29:04.016700Z",
1883 "thirdfail" : "2012-02-19 02:40z",
1884 "thirdfailid" : "20108048",
1885 "thirdpass" : "2012-02-18 03:29z",
1886 "thirdpassid" : "20078591"
1888 analysis:distv:calctimestamp has only 2500 entries at the moment, so
1889 is not too interesting yet.
1891 Nein, distlookup ist unser Kandidat. Diese pg Tabelle hat
1893 cuti_ts | timestamp with time zone |
1894 cuti_pass | integer |
1895 cuti_fail | integer |
1896 last_calc_ts | timestamp with time zone |
1900 wobei cuti fuer counted_up_to_id steht. Und wobei last_calc_ts
1901 anscheinend redundant auf analysis:distv:calctimestamp ist.
1905 my($dbi,$dist,$version,$distv,$anno) = @_;
1906 $distv ||= "$dist-$version";
1907 $anno ||= read_annotations
();
1909 $dbi ||= db_quickaccess_handle
();
1910 my $sql = "SELECT last_calc_ts, lc_pass, lc_fail, cuti_ts, cuti_pass, cuti_fail
1911 FROM distlookup WHERE distv=?";
1912 my $rows = my_get_query
1918 my $expensive_type = 0; # 9 huge, 0 not
1919 my $lowactivity = "";
1920 my $maybeexpensive = "";
1921 if (!$rows->[0][4] || !$rows->[0][5]) {
1922 return +{ this_urgent
=> 0 };
1924 unless (grep { ($rows->[0][$_]||0)==0 } 1,2,4,5) {
1925 my($lc_ts,$lc_p,$lc_f,$cnt_ts,$cnt_p,$cnt_f) = @
{$rows->[0]};
1926 if ($cnt_p>1.025*$lc_p || $cnt_f>1.025*$lc_f) { # arbitrary
1927 my $lc_total = $lc_p + $lc_f;
1928 if ( $lc_total < 2400 ){
1929 return +{ this_urgent
=> 1 }; # activity
1931 my($limiting) = min
($cnt_p,$cnt_f);
1932 $maybeexpensive = "over 2400: $lc_p=>$cnt_p, $lc_f=>$cnt_f";
1933 if ( $lc_total/$limiting > 250 ) { # arbitrary
1934 $expensive_type = 9;
1936 $expensive_type = 5;
1939 } elsif ($cnt_p < $lc_p || $cnt_f < $lc_f) { # arbitrary
1940 # we would like to get rid of such brethren; maybe run verify-distlookup-lcpassfail.pl
1941 warn "Sigh, $distv has cnt_p < lc_p || cnt_f < lc_f ($cnt_p < $lc_p || $cnt_f < $lc_f)";
1942 return +{ this_urgent
=> 0 }; # fix up bad numbers
1943 } elsif ($cnt_p == $lc_p && $cnt_f == $lc_f) {
1944 $lowactivity = "NO growth: $lc_p=>$cnt_p, $lc_f=>$cnt_f";
1945 } elsif ($cnt_p < 1.002*$lc_p && $cnt_f < 1.002*$lc_f) { # arbitrary
1946 $lowactivity = "small growth: $lc_p=>$cnt_p, $lc_f=>$cnt_f";
1950 my($csdfacts) = cpanstats_distrofacts
($dbi,$dist,$version,$distv,undef);
1953 && ! $expensive_type
1954 && $csdfacts->{allversions
}
1955 && "ARRAY" eq ref $csdfacts->{allversions
}
1956 && scalar @
{$csdfacts->{allversions
}}
1957 && exists $csdfacts->{allversions
}[-1]{version
}
1958 && $csdfacts->{allversions
}[-1]{version
} ne $version
1960 warn "DEBUG: this is urgent: $distv $version ne $csdfacts->{allversions}[-1]{version}";
1961 return +{ this_urgent
=> 1 };
1964 return +{ this_urgent
=> 0, why_not_urgent
=> $lowactivity };
1967 my $averagewait = 86400; # XXX arbitrary: the smaller the more
1968 # calc; Actually: we use rand to avoid
1969 # spikes; zero waits and a double of this
1970 # wait are equally likely
1972 my $minimumwait = 1.9; # XXX arbitrary: the smaller the more calc;
1973 # Actually: for every year of age of the
1974 # distro we add this times $averagewait
1975 # waiting time. Since no age below 1 is
1976 # possible, this is the minimum wait;
1977 # setting from 1.7.to 1.8 when the usually
1978 # set task was 3500 but we only calculated
1979 # 1000; either we increase the mincalc or
1980 # reduce the pensum, I would guess;
1981 # 20160527: taking back to (1.7, 2500),
1982 # disk gets too full too quickly
1984 if ($maybeexpensive) {
1985 # XXX arbitrary: if we have indications that the calc takes a
1986 # longer time, push back
1987 $minimumwait = $expensive_type >= 6 ?
42 : 15;
1989 if ($anno->{$distv} && $minimumwait < 5) {
1990 $minimumwait = 5; # e.g. GFUJI/MouseX-Getopt-0.34.tar.gz
1993 my $slv_file = slv_file
($distv);
1994 my @slvstat = stat ($slv_file . "dv.gz");
1995 @slvstat = stat $slv_file unless @slvstat;
1996 my $age_of_calc = @slvstat ?
time-$slvstat[9] : 999999;
1998 my $yaml = read_ctx
($distv);
2000 and (!$yaml->{annotation
} || $yaml->{annotation
} ne $anno->{$distv})) {
2001 # looks like this annotation has not been stored yet
2002 return +{ this_urgent
=> 1 };
2004 my ($this_urgent, $why_not_urgent) = (0,"");
2005 my $age_in_secs = eval { age_in_secs
($yaml->{upload_date
}) }; # distro age in secs
2006 if ($@
|| !$age_in_secs) {
2009 Carp
::cluck
("WARNING: skipping '$yaml->{distv}' error[$@] upload_date[$yaml->{upload_date}] distv[$distv]");
2012 $why_not_urgent = $err;
2014 $why_not_urgent = "no upload date";
2016 return +{ this_urgent
=> $this_urgent, why_not_urgent
=> $why_not_urgent};
2018 my $seconds_per_year = 86400*365.25;
2019 my $age_points = $age_in_secs < $seconds_per_year ?
2020 1 : $age_in_secs/$seconds_per_year; # between 1 and 8
2021 my $rand = int(rand(2*$averagewait));
2022 my $rand2 = int($age_points*($rand+$averagewait*$minimumwait));
2024 $this_urgent = $age_of_calc > $rand2; # XXX arbitrary
2025 warn sprintf "DEBUGurg: age_of_calc[%s] age_points[%.3f] rand[%s] rand2[%s] urg[%d]\n",
2026 $age_of_calc, $age_points, $rand, $rand2, $this_urgent;
2027 if (!$this_urgent) {
2030 warn "DEBUG urg2: urgent because no slvstat"; # never happens
2032 $why_not_urgent = "age/point calculations";
2035 return +{ this_urgent
=> $this_urgent, why_not_urgent
=> $why_not_urgent};
2038 my($timetogo,$urgenttimetogo);
2040 $timetogo = $^T
+ ($Opt{mincalctime
} || MIN_CALCTIME
);
2041 $urgenttimetogo = MAX_CALCTIME
+ $^T
;
2044 my $mainentrytime = time;
2047 if ($Opt{onlystartwhennojobs
}) {
2048 my $redis = myredis
;
2050 my($zcard) = $redis->zcard("analysis:jobqueue:q");
2051 last WAITNOQ
if $zcard == 0;
2056 warn "saw --pick=$Opt{pick}, skipping readdir on solved/ directory";
2057 my $shadow_anno = read_annotations
();
2058 if (exists $shadow_anno->{$Opt{pick
}}) {
2059 $anno->{$Opt{pick
}} = $shadow_anno->{$Opt{pick
}};
2062 $anno = read_annotations
();
2063 unless (defined $Opt{maxfromsolved
} && 0 == $Opt{maxfromsolved
}) {
2064 # looks in workdir/solved/ where we find eg. a2pdf-1.13.{slv,slvdv,yml}
2065 $solved = CPAN
::Blame
::Model
::Solved
->all("readdir");
2068 my(%seen,@want_ctgetreport);
2069 # revisit all our friends first
2070 my $touched_distv = {};
2071 ANNO_SOLVED
: for my $set
2073 {name
=> "anno", data
=> $anno}, # anno is a hashref
2074 {name
=> "solved", data
=> $solved}, # solved is an arrayref
2076 my @k; # names like "a2pdf-1.13"
2077 my $urgent = $Opt{urgent
} || 0;
2079 } elsif ($Opt{pick
}) {
2080 # pick trumps, the others don't play when we are a worker
2082 } elsif ($set->{name
} eq "anno") {
2083 @k = keys %{$set->{data
}};
2084 if (defined $Opt{maxfromannotate
}) {
2085 pop @k while @k > $Opt{maxfromannotate
};
2087 } elsif ($set->{name
} eq "solved") {
2088 @k = map { $_->{distv
} } @
{$set->{data
}};
2089 if (defined $Opt{maxfromsolved
}) {
2090 pop @k while @k > $Opt{maxfromsolved
};
2095 my $distroquestlabel = substr($set->{name
},0,4) . "-" . scalar @k; # gives a bit of orientation in the runlog
2097 DISTV
: for my $k (@k) {
2098 $touched_distv->{$k}=1;
2099 my $d = CPAN
::DistnameInfo
->new("FOO/$k.tgz");
2100 my $dist = $d->dist;
2101 my $version = $d->version;
2103 next DISTV
if $seen{$dist,$version}++;
2104 my($this_urgent, $why_not_urgent);
2105 my $slv_file = slv_file
($k);
2108 } elsif (! -e
($slv_file."dv.gz") && ! -e
($slv_file."dv")) {
2110 } elsif ($Opt{pick
}) {
2113 my $href = this_urgent
(undef,$dist,$version,$k,$anno);
2114 ($this_urgent, $why_not_urgent) = @
{$href}{qw(this_urgent why_not_urgent)};
2116 unless ($this_urgent){
2117 printf "Warning: skipping %s (%s)\n", $k, $why_not_urgent;
2120 my $pgdbi = mypgdbi
();
2121 my $ctx = distroquestion
($pgdbi,$i,$distroquestlabel,$dist,$version,undef) or next;
2122 $ctx->{annotation
} = $anno->{$k};
2123 store_ctx
($pgdbi,$ctx);
2124 push @want_ctgetreport, $ctx;
2130 unless (defined $Opt{maxfromdb
} && 0==$Opt{maxfromdb
}) {
2131 $rows = megaquery
();
2134 my $skip_random = 0;
2141 $mindate = sprintf "%04d%02d%02d%02d%02d", $now[5]-$CPAN::Blame
::Config
::Cnntp
::Config
->{maxyears
}, @now[reverse 1..4];
2145 "DEBUG: before going through megaquery result: array want_ctgetreport[%d] rows from megaquery[%d]\n",
2146 scalar @want_ctgetreport,
2149 my $distroquestlabel = "B" . scalar @
$rows;
2150 my $why_last_article = "";
2151 ARTICLE
: while (my $row = shift @
$rows) {
2153 my($id,$state,$dist,$version,$date) = @
$row;
2155 $why_last_article = "no id in row[@$row]";
2158 if ($date lt $mindate){
2159 $why_last_article = "date[$date] lt mindate[$mindate] i[$i]";
2162 next ARTICLE
unless defined $dist and defined $version;
2163 if ($seen{$dist,$version}++){
2164 $why_last_article = "last action was skipping $dist,$version (reaching last ARTICLE via next)";
2167 # note: when we arrive here. We will display current $i in the leftmost column as in
2168 # 170 B2801844 13-10-29 147: 21: 0 KSTAR/Dancer-Plugin-DynamicConfig-0.04 (<0.07)
2170 if ($Opt{maxfromdb
} && $i > $Opt{maxfromdb
}) {
2173 my $pgdbi = mypgdbi
();
2174 my $ctx = distroquestion
($pgdbi,$i,$distroquestlabel,$dist,$version,undef) or next;
2175 $ctx->{annotation
} = $anno->{$ctx->{distv
}};
2176 store_ctx
($pgdbi,$ctx);
2177 if ($ctx->{greenish
} && $ctx->{greenish
} >= 3) {
2178 push @want_ctgetreport, $ctx;
2180 if ($i >= 487) { # XXX arbitrary
2182 # note: in 2013 we never reached 800, we reached the 8
2183 # year old distros before that, yesterday at $i==666,
2184 # and want_ctgetreport had 3823 entries
2185 if ($i >= 1200 && @want_ctgetreport >= 200) { # XXX arbitrary
2186 $why_last_article = "i[$i] > xxx and want_ctgetreport > 150";
2188 } elsif ($i >= 800 && @want_ctgetreport >= 300) { # XXX arbitrary
2189 $why_last_article = "i[$i] > xxx and want_ctgetreport > 200";
2193 printf "Debug: switching to skip_random mode\n";
2198 # compensate for the fact that we do not have and do not
2199 # even want to provide the computing power to calculate
2200 # all possible regressions simultaneously; yes, we are a
2201 # bit sloppy but do not believe anybody will notice; some
2202 # day we may want to stop this program to go into the
2203 # oldest data in the database because it will become
2204 # worthless; then we probably want to make sure all data
2205 # in a certain range get processed exactly once.
2206 my $skip = int rand 40000; # XXX arbitrary
2207 splice @
$rows, 0, $skip;
2210 $why_last_article = "simply reached end of loop";
2212 warn sprintf "DEBUG: Reached last ARTICLE why?[%s] array want_ctgetreport[%d]", $why_last_article, scalar @want_ctgetreport;
2213 @want_ctgetreport = sort
2215 # Wishlist: first sort criterium should be in favor of
2216 # those who have no calc yet at all. But then we might
2217 # schedule them too early because cpantesters are behind
2219 ($b->{upload_date
}||"") cmp ($a->{upload_date
}||"")
2221 ($a->{distro_age
}||0) <=> ($b->{distro_age
}||0)
2223 $a <=> $b # crap, just the address because there is nothing better in sight
2224 } @want_ctgetreport;
2225 $distroquestlabel = "ctg-" . scalar @want_ctgetreport;
2226 main_enqueuer
(\
@want_ctgetreport, $touched_distv, $distroquestlabel, $anno, $timetogo,$urgenttimetogo);
2228 warn "nothing will be deleted, we're on a picking trip";
2230 # XXX temporarily disabling cleanup_quick on 2014-01-01;
2231 # because we do not understand the exact implications of the
2232 # queuing system; but this for sure must be re-examined.
2233 #### cleanup_quick($touched_distv);
2234 WAITTOGO
: while (time < $timetogo) {
2235 if ($Opt{leavewhennojobsleft
}) {
2236 my $redis = myredis
;
2237 my($zcard) = $redis->zcard("analysis:jobqueue:q");
2238 last WAITTOGO
if $zcard == 0;
2256 Too slow update cycle
2258 Often does not recognize when a newer version is available, maybe only
2259 when the newer version is all-green or so?
2263 =head1 HISTORICAL NOTES
2265 show the most recent FAILs on cpantesters. Compare to annotations.txt
2266 which I maintain manually in annotations.txt. Roughly the job is
2267 organized as (1) update the database syncing with cpantesters, (2)
2268 process (2.1) annotations, (2.2) previously solved regressions, (2.3)
2269 database to schedule some 1000 batchjob-candidates with (2.3.1) is the
2270 megaquery and (2.3.2) steps through it. (3) Run these batchjobs, many
2271 of which are being skipped. (4) delete several thousand outdated files
2272 and/or directories. Timings on 2013-10-29 and -30
2274 | | 20131029 | 20131127 |
2275 |---------+----------+----------|
2276 | (1) | 06:30:09 | |
2277 | (2.1) | 10:54:22 | 10:28:49 |
2278 | (2.2) | 15:15:43 | 11:09:37 |
2279 | (2.3.1) | 20:41:42 | 13:26:49 |
2280 | (2.3.2) | 21:08:51 | 13:29:02 |
2281 | (3) | 22:53:15 | 14:29:28 |
2282 | (4) | 16:44:07 | |
2283 | (T) | 16:45:00 | |
2285 The schema of cpanstats.db once was considered to be
2287 0 | id | 5379605 |5430514
2288 1 | state | pass |fail
2289 2 | postdate | 200909 |
2290 3 | tester | bingos@cpan.org |
2291 4 | dist | Yahoo-Photos |Apache-Admin-Config
2292 5 | version | 0.0.2 |0.94
2293 6 | platform | i386-freebsd-thread-multi-64int |i386-freebsd
2295 8 | osname | freebsd |
2296 9 | osvers | 7.2-release |7.2-release
2297 10 | date | 200909190440 |200909190440
2301 id INTEGER PRIMARY KEY, -- 1
2312 guid char(36) DEFAULT '', -- 12
2313 type int(2) default 0) -- 13
2315 guid and type are new. guid replaces id but is in a different format. schema is generated by
2317 Revision history for Perl module CPAN::Testers::Data::Generator.
2320 - fixes to change the 'id' (was NNTP ID) to an auto incremental field.
2321 - reworked logic to better fit latest changes.
2322 - added repository to META.yml.
2323 - documentation updates.
2326 - fixes to accommodate GUID changes.
2327 - added support for 'type' field.
2329 From reading the source, type seems to be 0 or 2 for valid records. 1
2330 is for pause announcements and 3 is for disabled reports. IIUC 0 is
2331 for old records and 2 for new ones. That would mean that there is a
2332 constant number of records with type==0?
2334 A few months ago we counted:
2336 sqlite> select type,count(*) from cpanstats group by type;
2344 sqlite> select type,count(*) from cpanstats group by type;
2350 So we can rely on having to read only type==2 from now on.
2352 But before we can do anything we must
2353 s/CPAN::WWW::Testers::Generator::Database/CPAN::Testers::Data::Generator/g; or
2354 something equivalent.
2356 I see no reason why I should make use of the guid except maybe for
2357 links to cpantesters. Internal stuff should work with id just like it
2360 Update 2011-10-12 andk: schema change again, new schema is:
2362 id INTEGER PRIMARY KEY, -- 1
2373 fulldate TEXT, -- is new, probably was date (11)