remove dupes
[andk-cpan-tools.git] / bin / annotate-reductionist.pl
blobbf5241a06cbe6fbc87776da25d4918abf5241510
1 #!/usr/bin/perl
3 # die " Deadly, don't use yet. We must implement date verification as well, at least ";
5 # Have eliminated 10 lines: Dancer2-Logger-Radis-0.001(=>0.002) Yote-0.1022(=>2.02) DBD-SQLAnywhere-2.08(=>2.13) Template-Mustache-1.0.0_0(=>1.1.0) Data-Rmap-0.64(=>0.65) Pcore-PDF-v0.1.0(=>v0.4.4) IO-Socket-Socks-0.71(=>0.74) Data-Sah-Resolve-0.003(=>0.007) Git-PurePerl-0.51(=>0.53) Archive-Zip-1.57(=>1_11)
8 # use 5.010;
9 use strict;
10 use warnings;
12 =head1 NAME
16 =head1 SYNOPSIS
20 =head1 OPTIONS
22 =over 8
24 =cut
26 my @opt = <<'=back' =~ /B<--(\S+)>/g;
28 =item B<--annotatefile=s>
30 Defaults to C<annotate.txt>. Filename to work on.
32 =item B<--debug|d!>
34 More output
36 =item B<--dry-run|n!>
38 Only the diagnostics, no overwrite
40 =item B<--help|h!>
42 This help
44 =item B<--max=i>
46 Stop after how many
48 =item B<--rand!>
50 If true, pick your sample randomly from all lines
52 =back
54 =head1 DESCRIPTION
56 Visits fastapi for each line of the annotate.txt file and deletes
57 lines that have a newer counterpart.
59 =cut
62 use FindBin;
63 use lib "$FindBin::Bin/../lib";
64 BEGIN {
65 push @INC, qw( );
68 use Time::Piece;
69 use Dumpvalue;
70 use File::Basename qw(dirname);
71 use File::Path qw(mkpath);
72 use File::Spec;
73 use File::Temp;
74 use Getopt::Long;
75 use Pod::Usage;
76 use Hash::Util qw(lock_keys);
78 our %Opt;
79 lock_keys %Opt, map { /([^=|!]+)/ } @opt;
80 GetOptions(\%Opt,
81 @opt,
82 ) or pod2usage(1);
83 if ($Opt{help}) {
84 pod2usage(0);
86 $Opt{annotatefile} //= "annotate.txt";
88 use LWP::UserAgent;
89 use JSON::XS;
90 use CPAN::DistnameInfo;
91 use List::AllUtils qw(reduce);
92 use CPAN::Version;
94 my $ua = LWP::UserAgent->new();
95 $ua->default_header("Accept-Encoding", "gzip");
96 my $jsonxs = JSON::XS->new->indent(0);
98 my @Sredu = my @Stmp = do { open my $fh, $Opt{annotatefile} or die; local $/="\n"; map {chop;$_} <$fh> };
100 my $max = defined $Opt{max} ? $Opt{max} : scalar @Stmp;
101 my $maxi = $max - 1;
103 my @DONE;
105 for my $i (0..$maxi) {
106 my $si = $Opt{rand} ? int rand @Stmp : $i;
107 my($distv) = $Stmp[$si] =~ /(\S+)/;
108 my %w = ( distv => $distv ); # work
109 warn "\ndistv => $distv\n" if $Opt{debug};
110 my $d = CPAN::DistnameInfo->new("A/AA/AAA/$distv.tar.gz");
111 $w{version} = $d->version;
112 warn "version => $w{version}\n" if $Opt{debug};
113 my $query = sprintf "http://fastapi.metacpan.org/v1/release/_search?q=distribution:%s&fields=name,date,status,version,author&size=400&_source=tests", $d->dist;
114 my $resp = $ua->get($query);
115 # print $query;
116 my $jsontxt = $resp->decoded_content;
117 my $j = $jsonxs->decode($jsontxt);
118 my $hits = $j->{hits}{hits};
119 my($releasedate) = map { $_->{fields}{date} } grep { $_->{fields}{name} eq $distv } @$hits;
120 unless ($releasedate) {
121 die "Did not find own releasedate for $distv";
123 $w{cpanversion} = reduce {
124 CPAN::Version->vgt($a,$b) ? $a : $b
125 } map { $_->{fields}{version} } grep { $_->{fields}{date} ge $releasedate } @$hits;
126 warn "cpanversion => $w{cpanversion}\n" if $Opt{debug};
127 my $line = splice @Stmp, $si, 1;
128 $w{eliminated} = 0;
129 if (CPAN::Version->vgt($w{cpanversion},$w{version})) {
130 my($highest_distro) = grep { $_->{fields}{version} eq $w{cpanversion} } @$hits;
131 if (($highest_distro->{_source}{tests}{fail}||0) > 0) {
132 $w{whynot} = "found fails";
133 } else {
134 my $tepoch = Time::Piece->strptime($highest_distro->{fields}{date}, "%Y-%m-%dT%T")->epoch;
135 if (time - $tepoch < 14*86400) {
136 $w{whynot} = "too fresh";
137 } else {
138 my($redui) = grep { $Sredu[$_] eq $line } 0..$#Sredu;
139 splice @Sredu, $redui, 1;
140 $w{eliminated} = 1;
144 warn "eliminated => $w{eliminated}\n" if $Opt{debug};
145 if ($w{whynot}) {
146 warn "whynot => $w{whynot}\n" if $Opt{debug};
148 push @DONE, \%w;
150 my @elim = map {
151 sprintf "%s(=>%s)", $_->{distv}, $_->{cpanversion};
152 } grep { $_->{eliminated} } @DONE;
153 if ($Opt{"dry-run"}) {
154 my $lines = scalar @elim == 1 ? "line" : "lines";
155 warn sprintf "Would have eliminated %d $lines: %s\n", scalar @elim, join(" ", @elim);
156 } else {
157 warn sprintf "Have eliminated %d lines: %s\n", scalar @elim, join(" ", @elim);
158 open my $fh, ">", $Opt{annotatefile};
159 print $fh "$_\n" for @Sredu;
160 close $fh or die "Could not write $Opt{annotatefile}: $!";
163 # Local Variables:
164 # mode: cperl
165 # cperl-indent-level: 4
166 # End: