new perls v5.39.10
[andk-cpan-tools.git] / bin / analysis-oldreports-extinction-program.pl
blob8d3cde3cee4d94cba796dda933979906da5125f9
1 #!/home/andreas/src/installed-perls/v5.16.0/4e6d/bin/perl
3 # use 5.010;
4 use strict;
5 use warnings;
7 =head1 NAME
11 =head1 SYNOPSIS
15 =head1 OPTIONS
17 =over 8
19 =cut
21 my $optpod = <<'=back';
23 =item B<--focus=s> Default: tar
25 Valid values: C<tar> or C<dir>. RTFS.
27 =item B<--help|h!>
29 This help
31 =item B<--keepresults=i> Default: 4
33 Number of result-directories or result-tarballs to never delete.
35 =item B<--minage=f> Default: 690
37 Minimum age in days of the target directory
38 (measured with C<-M>).
40 =item B<--noninteractive|noni>
42 Don't ask, just apply the default answer.
44 =item B<--untildf=i> Default: 48000000
46 Run extinction until available blocks reported
47 by df is above this value.
49 =item B<--yes|y!>
51 The interactive question is normally answered with no as the default.
52 With --yes the default becomes yes.
54 =back
56 =head1 DESCRIPTION
58 What we look for is an old and big and outdated directory. For the
59 arbitrary current thresholds RTFS. When we have listed one, we exit.
60 We show it with age, size and neighbors of the same Distname.
62 =head1 BUGS
64 The files in C<workdir/solved/> are removed too (which is good) but
65 they were not removed before 2016-03-31, so we should bulk remove the
66 left overs separately some day; this affects files like the output of:
68 ls -lt /home/andreas/data/cnntp-solver-2009/workdir/solved/Any-Moose-0.*
69 ls -lt /home/andreas/data/cnntp-solver-2009/workdir/solved/package-watchdog-*
71 Note that package-watchdog-0.07.slv is the oldest(2009) and
72 package-watchdog-0.04.yml is the newest (2013). Why? Because the
73 package has ceased to exist. Update 2017-12-31: we have now
74 analysis-oldsolutions-extinction-program.pl for that.
76 What is the thing with suggest_alternative()? It currently says only
77 FIXME and dumps stuff. The idea was that when the lowest version
78 number is not old enough, then we could try whether the second-lowest
79 version number is probably old enough and then we could remove that
80 one instead. suggest_alternative shows what we would get as the
81 outcome and when I saw this, I was not convinced this to be a good
82 idea. Probably counterproductive. Watch out for cases where this
83 algorithm would remove the newest version and keep the older ones. Not
84 trustworthy!
86 =cut
89 use FindBin;
90 use lib "$FindBin::Bin/../lib";
91 BEGIN {
92 push @INC, qw( );
95 use Dumpvalue;
96 use File::Basename qw(dirname);
97 use File::Path qw(mkpath rmtree);
98 use File::Spec;
99 use File::Temp;
100 use Getopt::Long;
101 use Pod::Usage;
102 use Hash::Util qw(lock_keys);
103 use Redis 1.967;
104 sub myredis () {
105 my $redis = Redis->new(reconnect => 120, every => 1000);
108 use LWP::UserAgent;
109 use JSON::XS;
110 use CPAN::DistnameInfo;
111 use List::AllUtils qw(reduce);
112 use CPAN::Version;
113 use FindBin;
114 use lib "$FindBin::Bin/../CPAN-Blame/lib";
115 use CPAN::Blame::Config::Cnntp;
116 use CPAN::DistnameInfo;
117 use Term::Prompt qw(prompt);
118 use YAML::XS;
119 use Filesys::Df qw(df);
120 use Time::HiRes qw(sleep);
122 my $ua = LWP::UserAgent->new();
123 $ua->default_header("Accept-Encoding", "gzip");
124 my $jsonxs = JSON::XS->new->indent(0);
126 my @opt = $optpod =~ /B<--(\S+)>/g;
127 our %Opt;
128 lock_keys %Opt, map { /([^=|!]+)/ } @opt;
129 GetOptions(\%Opt,
130 @opt,
131 ) or pod2usage(1);
132 if ($Opt{help}) {
133 pod2usage(0);
135 for my $opt (qw(focus untildf minage keepresults)) {
136 next if defined $Opt{$opt};
137 LINE: for my $line (split /\n/, $optpod) {
138 if ($line =~ /--\Q$opt\E=.+?Default: (.+)/) {
139 $Opt{$opt} = $1;
140 last LINE;
145 sub show_df (){
146 my $df = df '/';
147 warn "df bavail: $df->{bavail}\n";
148 $df->{bavail};
151 sub rm_other ($) {
152 my($distv) = @_;
153 for my $ext (qw(slv.LCK yml slvdv.gz slv slvdv)) {
154 my $workdirfile = "/home/andreas/data/cnntp-solver-2009/workdir/solved/$distv.$ext";
155 if (-e $workdirfile) {
156 warn "Deleting $workdirfile\n";
157 unlink $workdirfile or die "Could not unlink '$workdirfile': $!";
160 for my $zs (qw(analysis:distv:calctimestamp)) {
161 warn "zrem-redis: redis-cli zrem $zs $distv\n";
162 my $redis = myredis;
163 $redis->zrem($zs, $distv);
165 warn "Done.\n";
167 sub treat_dir ($$$$) {
168 my($parent, $distv, $dist, $v) = @_;
169 my $query = sprintf "http://fastapi.metacpan.org/v1/release/_search?q=distribution:%s&fields=name,date,status,version,author&size=400", $dist;
170 warn "$query\n";
171 sleep 0.3;
172 my $resp = $ua->get($query);
173 # print $query;
174 my $jsontxt = $resp->decoded_content;
175 my $j = eval { $jsonxs->decode($jsontxt); };
176 if (! $j or $@){
177 warn "ERROR: decode failed on query '$query'";
178 return;
180 my $hits = $j->{hits}{hits};
181 my($releasedate) = map { $_->{fields}{date} } grep { $_->{fields}{name} eq $distv } @$hits;
182 unless ($releasedate) {
183 warn "WARNING: Did not find own releasedate for $distv";
184 return;
186 my $cpanversion = reduce {
187 CPAN::Version->vgt($a,$b) ? $a : $b
188 } map { $_->{fields}{version} } grep { $_->{fields}{date} ge $releasedate } @$hits;
189 if (CPAN::Version->vgt($cpanversion, $v)) {
190 my $default_answer = $Opt{yes} ? "y" : "n";
191 my $answer;
192 if ($Opt{noninteractive}) {
193 $answer = $default_answer;
194 } else {
195 $answer = lc prompt "x",
196 "Shall I delete $distv (cpanversion=$cpanversion) and related workdir files? (y/n)", "", 'y';
198 if ($answer eq "y") {
199 rmtree "$parent/$distv";
200 rm_other $distv;
201 } else {
202 warn "INFO: Skipping $distv";
206 sub suggest_alternative ($$$) {
207 my($timeline, $l, $minage) = @_;
208 my @timeline = sort { CPAN::Version->vcmp($a->{v}, $b->{v}) } @$timeline;
209 warn sprintf "FIXME: investigate an alternative for %s\n", YAML::XS::Dump(\@timeline);
210 for ( my $i=0; $i<=$#$timeline; $i++ ) {
211 if ($timeline[$i]{M} >= $minage) {
212 my($who) = map { $_->[0] } grep { $timeline[$i]{v} eq $_->[0]{v} } values %$l;
213 return { i => $i, object => $timeline[$i], who => $who };
216 return;
219 my $root = $CPAN::Blame::Config::Cnntp::Config->{ctgetreports_dir};
220 my $bavail = show_df;
221 my $outer_purged_something = 0;
222 LOOP: while ($bavail < $Opt{untildf}) {
223 my $purged_something = 0;
224 opendir my $dh, $root or die "Could not open '$root': $!";
225 OUTER: for my $letter (readdir $dh) {
226 next unless $letter =~ /^[A-Za-z]$/;
227 opendir my $dh2, "$root/$letter" or die "Could not open '$root/$letter': $!";
228 my %odist;
229 INNER: for my $distdir (readdir $dh2) {
230 next if $distdir eq "." or $distdir eq "..";
231 my $tdir_or_tar = "$root/$letter/$distdir";
232 my($distv, $isdir, $want_treat_dir);
233 if (0) {
234 } elsif (-d $tdir_or_tar) {
235 # warn "looking at directory $tdir_or_tar\n";
236 if ($Opt{focus} eq "tar") {
237 next INNER;
238 } elsif ($Opt{focus} eq "dir") {
239 if (-f "$tdir_or_tar.tar") {
240 warn "skipping $tdir_or_tar because tarball also found";
241 my($most_recent_file) = qx"find $tdir_or_tar -type f | xargs ls -t";
242 chomp $most_recent_file;
243 my @triple = ($tdir_or_tar, "$tdir_or_tar.tar", $most_recent_file);
244 system "ls -ld @triple";
245 my @triple_age = sort { $a <=> $b } map { -M $_ } @triple;
246 warn "triple_age [@triple_age]";
247 # There must be a rule to say we remove the
248 # dir or not (because somebody works with it).
249 # We know that the directory timestamp can be
250 # very old because nobody updates it. So
251 # usually the youngest timestamp is from a
252 # downloaded file or the tarball. But removing
253 # the directory is dangerous, no matter how
254 # old it is
255 # Shall I delete /home/andreas/data/cpantesters/reports/L/Log-Agent-1.000.tar
256 # (Mtime=718.514236111111111) and related workdir files? (y/n/q)
257 # [default y]
258 # Deleting /home/andreas/data/cpantesters/reports/L/Log-Agent-1.000.tar
259 # Deleting /home/andreas/data/cnntp-solver-2009/workdir/solved/Log-Agent-1.000.slv.LCK
260 # Deleting /home/andreas/data/cnntp-solver-2009/workdir/solved/Log-Agent-1.000.yml
261 # Deleting /home/andreas/data/cnntp-solver-2009/workdir/solved/Log-Agent-1.000.slvdv.gz
262 # Deleting /home/andreas/data/cnntp-solver-2009/workdir/solved/Log-Agent-1.000.slv
263 # zrem-redis: redis-cli zrem analysis:distv:calctimestamp Log-Agent-1.000
264 # Done.
265 # Done.
266 # df bavail: 26508336
268 # the solution is probably in the LCK file.
271 # but when in dearest need, we can say we
272 # remove the directory when the youngest is
273 # older than 5 days
274 if ($triple_age[0] > 5) {
275 warn "Ruthless deletion only directory: $tdir_or_tar\n";
276 rmtree $tdir_or_tar;
278 next INNER;
279 } else {
280 $want_treat_dir = 1;
282 } else {
283 die "Illegal focus '$Opt{focus}'";
285 $distv = $distdir;
286 $isdir = 1;
287 } elsif ($distdir =~ /(.+)\.tar$/) {
288 $distv = $1;
289 $isdir = 0;
290 } else {
291 # ignore me for now
292 next INNER;
294 my $d = CPAN::DistnameInfo->new("FOO/$distv.tgz");
295 my $dist = $d->dist;
296 my $v = $d->version;
297 if ($want_treat_dir) {
298 treat_dir("$root/$letter", $distdir, $dist, $v);
299 $bavail = show_df;
300 sleep 0.1;
301 next INNER;
303 my $a = $odist{$dist}{$distv} ||= [];
304 my $i = @$a;
305 push @$a,
307 abs => $tdir_or_tar,
308 isdir => $isdir,
309 M => -M $tdir_or_tar,
310 dist => $dist,
311 distv => $distv,
312 v => $v,
313 i => $i,
316 ODIST: for my $d (sort {scalar keys %{$odist{$b}} <=> scalar keys %{$odist{$a}}} keys %odist) {
317 my $l = $odist{$d};
318 my($howold_by_M, $who_finally_deletable);
320 next ODIST unless scalar keys %$l >= $Opt{keepresults};
322 # m_oldest ==> oldest by tarball mtime
323 my $m_oldest = reduce { $a > $b ? $a : $b } map { $_->{M} } map { @{$l->{$_}} } keys %$l;
324 next ODIST unless $m_oldest >= $Opt{minage};
326 # v_oldest ==> oldest by version number
327 my $v_oldest = reduce { CPAN::Version->vlt($a,$b) ? $a : $b } map { $_->{v} } map { @{$l->{$_}} } keys %$l;
328 my($who_v_oldest) = map { $_->[0] } grep { $v_oldest eq $_->[0]{v} } values %$l;
329 $howold_by_M = $who_v_oldest->{M};
330 if ($howold_by_M < $Opt{minage}) {
331 warn "Oldest by version on $who_v_oldest->{dist} would be $v_oldest, but it is younger than $Opt{minage}, namely $howold_by_M, investigating $who_v_oldest->{abs}\n";
332 my @timeline =
333 map { +{ M => $_->{M}, v => $_->{v} } } map { @{$l->{$_}} } keys %$l;
334 my $ctx = suggest_alternative(\@timeline, $l, $Opt{minage});
335 if ($ctx) {
336 warn sprintf "FIXME 2: would suggest %s\n", YAML::XS::Dump($ctx);
338 next ODIST;
339 } else {
340 $who_finally_deletable = $who_v_oldest;
344 my($abs_of_oldest);
345 DISTV: for my $distv (keys %$l) {
346 for my $i (0..$#{$l->{$distv}}) {
347 if ($l->{$distv}[$i]{M} == $howold_by_M) {
348 $abs_of_oldest = $l->{$distv}[$i]{abs};
349 last DISTV;
353 open my $duh, "-|", du => "-s", $abs_of_oldest or die "Could not fork for 'du -s $abs_of_oldest': $!";
354 my($duline) = <$duh>;
355 my($du_of_oldest) = split " ", $duline;
356 $who_finally_deletable->{du} = $du_of_oldest;
358 # next unless $du_of_oldest > 7000; # arbitrary
359 my $ll = []; # = [ map { @{$l->{$_}} } sort { $l->{$b}->{M} <=> $l->{$a}->{M} } keys %$l ];
360 for my $distv (keys %$l) {
361 for my $i (0..$#{$l->{$distv}}) {
362 push @$ll, $l->{$distv}[$i];
366 my $i = 0;
367 print YAML::XS::Dump [map {$_->{'_'}=++$i;$_} sort { CPAN::Version->vcmp($b->{v},$a->{v}) } @$ll];
369 my $default_answer = $Opt{yes} ? "y" : "n";
370 my $answer;
371 if ($Opt{noninteractive}) {
372 $answer = $default_answer;
373 } else {
374 $answer = lc prompt "x",
375 "Shall I delete $abs_of_oldest (Mtime=$howold_by_M) and related workdir files? (y/n/q)", "", $default_answer;
377 if ($answer eq "q") {
378 warn "Leaving LOOP\n";
379 last LOOP;
380 } elsif ($answer eq "y") {
381 warn "Deleting $abs_of_oldest\n";
382 rmtree $abs_of_oldest;
383 my $distv = $who_finally_deletable->{distv};
384 rm_other($distv);
385 warn "Done.\n";
386 $purged_something = 1;
387 $outer_purged_something = 1;
388 } else {
389 warn "Nothing deleted\n";
391 last OUTER;
394 $bavail = show_df;
395 unless ($purged_something) {
396 my $else = $outer_purged_something ? " else" : "";
397 warn "Info: Nothing$else found to delete, giving up";
398 last LOOP;
400 sleep 0.2;
403 # Local Variables:
404 # mode: cperl
405 # cperl-indent-level: 4
406 # End: