Sort the yaml dump by v desc
[andk-cpan-tools.git] / bin / analysis-oldreports-extinction-program.pl
blobcf268f7217dad2136b8afb3e4bafc84805cb0d4d
1 #!/home/andreas/src/installed-perls/v5.16.0/4e6d/bin/perl
3 # use 5.010;
4 use strict;
5 use warnings;
7 =head1 NAME
11 =head1 SYNOPSIS
15 =head1 OPTIONS
17 =over 8
19 =cut
21 my $optpod = <<'=back';
23 =item B<--focus=s> Default: tar
25 Valid values: C<tar> or C<dir>. RTFS.
27 =item B<--help|h!>
29 This help
31 =item B<--keepresults=i> Default: 4
33 Number of result-directories or result-tarballs to never delete.
35 =item B<--minage=f> Default: 690
37 Minimum age in days of the target directory
38 (measured with C<-M>).
40 =item B<--noninteractive|noni>
42 Don't ask, just apply the default answer.
44 =item B<--untildf=i> Default: 48000000
46 Run extinction until available blocks reported
47 by df is above this value.
49 =item B<--yes|y!>
51 The interactive question is normally answered with no as the default.
52 With --yes the default becomes yes.
54 =back
56 =head1 DESCRIPTION
58 What we look for is an old and big and outdated directory. For the
59 arbitrary current thresholds RTFS. When we have listed one, we exit.
60 We show it with age, size and neighbors of the same Distname.
62 =head1 BUGS
64 The files in C<workdir/solved/> are removed too (which is good) but
65 they were not removed before 2016-03-31, so we should bulk remove the
66 left overs separately some day; this affects files like the output of:
68 ls -lt /home/andreas/data/cnntp-solver-2009/workdir/solved/Any-Moose-0.*
69 ls -lt /home/andreas/data/cnntp-solver-2009/workdir/solved/package-watchdog-*
71 Note that package-watchdog-0.07.slv is the oldest(2009) and
72 package-watchdog-0.04.yml is the newest (2013). Why? Because the
73 package has ceased to exist.
75 =cut
78 use FindBin;
79 use lib "$FindBin::Bin/../lib";
80 BEGIN {
81 push @INC, qw( );
84 use Dumpvalue;
85 use File::Basename qw(dirname);
86 use File::Path qw(mkpath rmtree);
87 use File::Spec;
88 use File::Temp;
89 use Getopt::Long;
90 use Pod::Usage;
91 use Hash::Util qw(lock_keys);
92 use Redis 1.967;
93 sub myredis () {
94 my $redis = Redis->new(reconnect => 120, every => 1000);
97 use LWP::UserAgent;
98 use JSON::XS;
99 use CPAN::DistnameInfo;
100 use List::AllUtils qw(reduce);
101 use CPAN::Version;
102 use FindBin;
103 use lib "$FindBin::Bin/../CPAN-Blame/lib";
104 use CPAN::Blame::Config::Cnntp;
105 use CPAN::DistnameInfo;
106 use Term::Prompt qw(prompt);
107 use YAML::XS;
108 use Filesys::Df qw(df);
109 use Time::HiRes qw(sleep);
111 my $ua = LWP::UserAgent->new();
112 $ua->default_header("Accept-Encoding", "gzip");
113 my $jsonxs = JSON::XS->new->indent(0);
115 my @opt = $optpod =~ /B<--(\S+)>/g;
116 our %Opt;
117 lock_keys %Opt, map { /([^=|!]+)/ } @opt;
118 GetOptions(\%Opt,
119 @opt,
120 ) or pod2usage(1);
121 if ($Opt{help}) {
122 pod2usage(0);
124 for my $opt (qw(focus untildf minage keepresults)) {
125 next if defined $Opt{$opt};
126 LINE: for my $line (split /\n/, $optpod) {
127 if ($line =~ /--\Q$opt\E=.+?Default: (.+)/) {
128 $Opt{$opt} = $1;
129 last LINE;
134 sub show_df (){
135 my $df = df '/';
136 warn "df bavail: $df->{bavail}\n";
137 $df->{bavail};
140 sub rm_other ($) {
141 my($distv) = @_;
142 for my $ext (qw(slv.LCK yml slvdv.gz slv slvdv)) {
143 my $workdirfile = "/home/andreas/data/cnntp-solver-2009/workdir/solved/$distv.$ext";
144 if (-e $workdirfile) {
145 warn "Deleting $workdirfile\n";
146 unlink $workdirfile or die "Could not unlink '$workdirfile': $!";
149 for my $zs (qw(analysis:distv:calctimestamp)) {
150 warn "zrem-redis: redis-cli zrem $zs $distv\n";
151 my $redis = myredis;
152 $redis->zrem($zs, $distv);
154 warn "Done.\n";
156 sub treat_dir ($$$$) {
157 my($parent, $distv, $dist, $v) = @_;
158 my $query = sprintf "http://fastapi.metacpan.org/v1/release/_search?q=distribution:%s&fields=name,date,status,version,author&size=400", $dist;
159 sleep 0.2;
160 my $resp = $ua->get($query);
161 # print $query;
162 my $jsontxt = $resp->decoded_content;
163 my $j = eval { $jsonxs->decode($jsontxt); };
164 if (! $j or $@){
165 warn "ERROR: decode failed on query '$query'";
166 return;
168 my $hits = $j->{hits}{hits};
169 my($releasedate) = map { $_->{fields}{date} } grep { $_->{fields}{name} eq $distv } @$hits;
170 unless ($releasedate) {
171 warn "WARNING: Did not find own releasedate for $distv";
172 return;
174 my $cpanversion = reduce {
175 CPAN::Version->vgt($a,$b) ? $a : $b
176 } map { $_->{fields}{version} } grep { $_->{fields}{date} ge $releasedate } @$hits;
177 if (CPAN::Version->vgt($cpanversion, $v)) {
178 my $default_answer = $Opt{yes} ? "y" : "n";
179 my $answer;
180 if ($Opt{noninteractive}) {
181 $answer = $default_answer;
182 } else {
183 $answer = lc prompt "x",
184 "Shall I delete $distv (cpanversion=$cpanversion) and related workdir files? (y/n)", "", 'y';
186 if ($answer eq "y") {
187 rmtree "$parent/$distv";
188 rm_other $distv;
189 } else {
190 warn "INFO: Skipping $distv";
195 my $root = $CPAN::Blame::Config::Cnntp::Config->{ctgetreports_dir};
196 my $bavail = show_df;
197 my $outer_purged_something = 0;
198 LOOP: while ($bavail < $Opt{untildf}) {
199 my $purged_something = 0;
200 opendir my $dh, $root or die "Could not open '$root': $!";
201 OUTER: for my $letter (readdir $dh) {
202 next unless $letter =~ /^[A-Za-z]$/;
203 opendir my $dh2, "$root/$letter" or die "Could not open '$root/$letter': $!";
204 my %odist;
205 INNER: for my $distdir (readdir $dh2) {
206 next if $distdir eq "." or $distdir eq "..";
207 my $tdir_or_tar = "$root/$letter/$distdir";
208 my($distv, $isdir, $want_treat_dir);
209 if (0) {
210 } elsif (-d $tdir_or_tar) {
211 warn "looking at directory $tdir_or_tar\n";
212 if ($Opt{focus} eq "tar") {
213 next INNER;
214 } elsif ($Opt{focus} eq "dir") {
215 if (-f "$tdir_or_tar.tar") {
216 warn "skipping $tdir_or_tar because tarball also found";
217 my($most_recent_file) = qx"find $tdir_or_tar -type f | xargs ls -t";
218 chomp $most_recent_file;
219 my @triple = ($tdir_or_tar, "$tdir_or_tar.tar", $most_recent_file);
220 system "ls -ld @triple";
221 my @triple_age = sort { $a <=> $b } map { -M $_ } @triple;
222 warn "triple_age [@triple_age]";
223 # There must be a rule to say we remove the
224 # dir or not (because somebody works with it)
225 next INNER;
226 } else {
227 $want_treat_dir = 1;
229 } else {
230 die "Illegal focus '$Opt{focus}'";
232 $distv = $distdir;
233 $isdir = 1;
234 } elsif ($distdir =~ /(.+)\.tar$/) {
235 $distv = $1;
236 $isdir = 0;
237 } else {
238 # ignore me for now
239 next INNER;
241 my $d = CPAN::DistnameInfo->new("FOO/$distv.tgz");
242 my $dist = $d->dist;
243 my $v = $d->version;
244 if ($want_treat_dir) {
245 treat_dir("$root/$letter", $distdir, $dist, $v);
246 $bavail = show_df;
247 sleep 1;
248 next INNER;
250 my $a = $odist{$dist}{$distv} ||= [];
251 my $i = @$a;
252 push @$a,
254 abs => $tdir_or_tar,
255 isdir => $isdir,
256 M => -M $tdir_or_tar,
257 dist => $dist,
258 distv => $distv,
259 v => $v,
260 i => $i,
263 for my $d (sort {scalar keys %{$odist{$b}} <=> scalar keys %{$odist{$a}}} keys %odist) {
264 my $l = $odist{$d};
265 next unless scalar keys %$l > $Opt{keepresults};
267 # m_oldest ==> oldest by tarball mtime
268 my $m_oldest = reduce { $a > $b ? $a : $b } map { $_->{M} } map { @{$l->{$_}} } keys %$l;
269 next unless $m_oldest >= $Opt{minage};
271 # v_oldest ==> oldest by version number
272 my $v_oldest = reduce { $a < $b ? $a : $b } map { $_->{v} } map { @{$l->{$_}} } keys %$l;
273 my($who_v_oldest) = map { $_->[0] } grep { $v_oldest == $_->[0]{v} } values %$l;
274 my $howold_by_M = $who_v_oldest->{M};
275 if ($howold_by_M < $Opt{minage}) {
276 warn "Oldest_by_version would be $v_oldest, but it is younger than $Opt{minage}, namely $howold_by_M, skipping";
277 next;
280 my($abs_of_oldest);
281 DISTV: for my $distv (keys %$l) {
282 for my $i (0..$#{$l->{$distv}}) {
283 if ($l->{$distv}[$i]{M} == $howold_by_M) {
284 $abs_of_oldest = $l->{$distv}[$i]{abs};
285 last DISTV;
289 open my $duh, "-|", du => "-s", $abs_of_oldest or die "Could not fork for 'du -s $abs_of_oldest': $!";
290 my($duline) = <$duh>;
291 my($du_of_oldest) = split " ", $duline;
292 $who_v_oldest->{du} = $du_of_oldest;
294 # next unless $du_of_oldest > 7000; # arbitrary
295 my $ll = []; # = [ map { @{$l->{$_}} } sort { $l->{$b}->{M} <=> $l->{$a}->{M} } keys %$l ];
296 for my $distv (keys %$l) {
297 for my $i (0..$#{$l->{$distv}}) {
298 push @$ll, $l->{$distv}[$i];
301 print YAML::XS::Dump [sort { $b->{v} <=> $a->{v} } @$ll];
302 my $default_answer = $Opt{yes} ? "y" : "n";
303 my $answer;
304 if ($Opt{noninteractive}) {
305 $answer = $default_answer;
306 } else {
307 $answer = lc prompt "x",
308 "Shall I delete $abs_of_oldest (Mtime=$howold_by_M) and related workdir files? (y/n)", "", $default_answer;
310 if ($answer eq "y") {
311 warn "Deleting $abs_of_oldest\n";
312 rmtree $abs_of_oldest;
313 my $distv = $who_v_oldest->{distv};
314 rm_other($distv);
315 warn "Done.\n";
316 $purged_something = 1;
317 $outer_purged_something = 1;
318 } else {
319 warn "Nothing deleted\n";
321 last OUTER;
324 $bavail = show_df;
325 unless ($purged_something) {
326 my $else = $outer_purged_something ? " else" : "";
327 die "Nothing$else found to delete, giving up";
329 sleep 1;
332 # Local Variables:
333 # mode: cperl
334 # cperl-indent-level: 4
335 # End: