flattening bundle
[andk-cpan-tools.git] / bin / snapshotflattener.pl
blobc7c1dacf330f1c7969df437768e64f51cf20e39a
1 #!/usr/bin/perl
3 # use 5.010;
4 use strict;
5 use warnings;
7 =head1 NAME
11 =head1 SYNOPSIS
13 bin/snapshotflattener.pl ~sand/megalog/megalog-9999-99-99T99:99:99.log
15 bin/snapshotflattener.pl ~sand/megalog/megalog-9999-99-99T99:99:99.
17 =head1 OPTIONS
19 =over 8
21 =cut
23 my @opt = <<'=back' =~ /B<--(\S+)>/g;
25 =item B<--dry-run|n!>
27 No rewrite please.
29 =item B<--showfirstrequirer|sfr!>
31 Spits out a YAML structure showing in which file we have a requirer,
32 the name of that one, and the names of the dependencies that are not
33 already listed in from of him. Implies --dry-run and an unlimited
34 --top.
36 =item B<--help|h!>
38 This help
40 =item B<--maxcand=i>
42 Defaults to 0. If given, we limit the number of candidates to process by
43 randomly kicking out candidates.
45 =item B<--top=i>
47 Defaults to 12, so we get the top 12 reported. If the value is 0, we
48 just get the old output which showed us whenever we had a new
49 highscore (something quite useless which we considered good enough for
50 too long).
52 =back
54 =head1 DESCRIPTION
56 We rank all modules that trigger a "requires" line, that is all
57 modules that are not yet installed when they are suddenly needed as a
58 dependency. This ranking is called RANKIN. We rank these modules again
59 according to the relative lines they occupy in the Bundle/Snapshot
60 files. We call this ranking RANKOUT. For the record: we rank them by
61 module name, not by line number. (???)
63 Finally we calculate the maximum diff between RANKIN and RANKOUT. If
64 the diff is e.g. 700, we can say: we should move this module past 700
65 *ranked* modules. In reality this may mean a movement of thousands of
66 lines up.
68 =head1 HISTORY
70 Iff all modules were in perfect linear order to satisfy all
71 dependencies, we could easily read the time it takes to build one
72 module. *And* we could easily rebalance the 5 snapshotfiles when one
73 of them goes over the well established max of 10 hours.
75 I came to this when trying to run C<megalog-overview.pl --debug=reps>
76 which turned out to be buggy.
78 =cut
81 use FindBin;
82 use lib "$FindBin::Bin/../lib";
83 BEGIN {
84 push @INC, qw( );
87 use Dumpvalue;
88 use File::Basename qw(dirname);
89 use File::Path qw(mkpath);
90 use File::Spec;
91 use File::Temp;
92 use Getopt::Long;
93 use Pod::Usage;
94 use Hash::Util qw(lock_keys);
96 our %Opt;
97 lock_keys %Opt, map { /([^=|!]+)/ } @opt;
98 GetOptions(\%Opt,
99 @opt,
100 ) or pod2usage(1);
101 if ($Opt{help}) {
102 pod2usage(0);
104 $Opt{maxcand} //= 0;
105 $Opt{top} //= 12;
106 if ($Opt{showfirstrequirer}) {
107 $Opt{top} = 999999999;
108 $Opt{"dry-run"} = 1;
109 require YAML;
112 my $megalog = shift or pod2usage(1);
113 $megalog =~ s/\.$/.log/;
114 my @snaps = glob("lib/Bundle/Snapshot_2011_10_21_0?.pm");
116 our(%INSTATE,
117 %CANDIDATES,
118 @SHIFTLINES,
119 %RANKIN,
120 %RANKOUT,
121 %FIRSTREQUIRER,
122 $REQUIRER,
123 %IN_FILE,
124 %OUT_FILE,
125 $FILE,
126 %REQTYPE,
127 %BUNDLELINE,
129 $INSTATE{contents}=0;
130 open my $fh, $megalog or die;
131 while (<$fh>){
132 if (/\s(\S+)\s+\[(\S*requires)\]/) {
133 $CANDIDATES{$1}++;
134 unless (exists $RANKIN{$1}) {
135 $RANKIN{$1} = 1 + scalar keys %RANKIN;
137 $FIRSTREQUIRER{$1} ||= $REQUIRER;
138 $IN_FILE{$1} ||= $FILE;
139 $REQTYPE{$1} ||= $2;
140 } elsif (/Running install for module\s+'([A-Za-z0-9\:]+)'/) {
141 $REQUIRER = $1;
142 } elsif (/^path\|-> Bundle::\S+([0-9][0-9])\s*$/) {
143 $FILE = $1
146 if ($Opt{maxcand}) {
147 while (keys %CANDIDATES > $Opt{maxcand}){
148 my @c = keys %CANDIDATES;
149 my $r = $c[rand $#c];
150 delete $CANDIDATES{$r};
154 my $bundleline = 0;
155 for my $snap (@snaps) {
156 my($fh, $fh2);
157 my $out_file = substr($snap,-5,2);
158 if ($Opt{"dry-run"}) {
159 open $fh, $snap or die "Could not open $snap: $!";
160 } else {
161 rename $snap, "$snap~" or die "Could not rename $snap: $!";
162 open $fh, "$snap~" or die "Could not open $snap~: $!";
163 open $fh2, ">", $snap or die "Could not open >$snap: $!";
164 select $fh2;
166 $\ = $/ = "\n";
167 while (<$fh>) {
168 chop;
169 if (/^=/){
170 if (/^=head1 CONTENTS/){
171 $INSTATE{contents}=1;
172 unless ($Opt{"dry-run"}) {
173 print;
175 next;
176 } else {
177 $INSTATE{contents}=0;
178 if (/^=head1 CONFIGU/){
179 unless ($Opt{"dry-run"}) {
180 print ((shift @SHIFTLINES)->[0]) while @SHIFTLINES;
185 if ($INSTATE{contents}){
186 if (/^([A-Za-z]\S*)/){
187 $bundleline++;
188 $BUNDLELINE{$1} ||= $bundleline;
189 if ($CANDIDATES{$1}){
190 $RANKOUT{$1} = 1 + scalar keys %RANKOUT;
191 $OUT_FILE{$1} = $out_file;
192 unshift @SHIFTLINES, [$_, $1];
193 } else {
194 push @SHIFTLINES, [$_, $1];
196 if (@SHIFTLINES > 1){
197 my $line = (shift @SHIFTLINES)->[0];
198 $line =~ s/\n//g;
199 unless ($Opt{"dry-run"}) {
200 print $line;
203 } else {
204 if (@SHIFTLINES){
205 $SHIFTLINES[-1][0] .= "\n" unless $SHIFTLINES[-1][0] =~ /\n/;
206 } else {
207 push @SHIFTLINES, [""];
210 } else {
211 unless ($Opt{"dry-run"}) {
212 print;
217 my $max = { diff => -99999 };
218 my @reported;
219 my @all;
220 for my $k (sort keys %CANDIDATES) {
221 unless ($RANKOUT{$k}){
222 $RANKOUT{$k} = 1 + scalar keys %RANKOUT;
224 my $diff = $RANKOUT{$k} - $RANKIN{$k};
225 if ($Opt{top}) {
226 push @all, { diff=>$RANKOUT{$k}-$RANKIN{$k}, in=>$RANKIN{$k}, out=>$RANKOUT{$k}, name=>$k, reqdby=>$FIRSTREQUIRER{$k}, infile=>$IN_FILE{$k}, outfile=>$OUT_FILE{$k}||"--", reqtype=>$REQTYPE{$k}, bundleline=>$BUNDLELINE{$k}||=0 };
227 } else {
228 if ($diff > $max->{diff}) {
229 $max->{diff} = $diff;
230 $max->{in} = $RANKIN{$k};
231 $max->{out} = $RANKOUT{$k};
232 $max->{k} = $k;
233 next if $max->{diff} < 0;
234 warn "Note: so far largest distance with $max->{k}: $max->{out} - $max->{in} = $max->{diff}\n";
235 push @reported, $k;
239 if ($Opt{top}) {
240 for my $rec (@all) {
241 $rec->{bundleline_reqdby} = $BUNDLELINE{$rec->{reqdby}};
242 $rec->{reqtype_short} = substr($rec->{reqtype},0,1);
244 my $showfirstrequirer = {};
245 for my $rec (sort { $a->{bundleline_reqdby} <=> $b->{bundleline_reqdby}
246 || $a->{bundleline} <=> $b->{bundleline}
247 } @all) {
248 next if $BUNDLELINE{$rec->{name}} < $BUNDLELINE{$rec->{reqdby}};
249 if ($Opt{showfirstrequirer}) {
250 $showfirstrequirer->{bfile} ||= $rec->{infile};
251 $showfirstrequirer->{name} ||= $rec->{reqdby};
252 if ($rec->{reqdby} ne $showfirstrequirer->{name}) {
253 print YAML::Dump($showfirstrequirer);
254 last;
256 $showfirstrequirer->{line} = $rec->{bundleline_reqdby};
257 my $a = $showfirstrequirer->{deps} ||= [];
258 push @$a, $rec->{name};
259 next;
261 printf "%5d %-58s %s %5d %5d %s %s %s\n", @{$rec}{qw/diff name reqtype_short bundleline bundleline_reqdby outfile infile reqdby/};
262 push @reported, $rec->{name};
263 last if @reported >= $Opt{top};
266 my $show_maybe_try = 0;
267 if ($show_maybe_try) {
268 warn sprintf "Maybe try:
269 egrep -n '^(%s)(\$| )' lib/Bundle/Snapshot_2011_10_21_0*.pm
270 ", join("|", @reported);
271 warn sprintf "And/Or:
272 egrep -an ' (%s) *\\[.*requi|^perl\\|' %s
273 ", join("|", @reported), $megalog;
274 warn sprintf "And/Or:
275 egrep -an ' \\S+ *\\[.*requi|^perl\\||Running install for module' %s|egrep -B2 ' (%s) '
276 ", $megalog, join("|", @reported), ;
279 # Local Variables:
280 # mode: cperl
281 # cperl-indent-level: 4
282 # End: