new issue
[andk-cpan-tools.git] / bin / snapshotflattener.pl
blob9ac436425d9432e14daa40e41763d3d1a6ede8d9
1 #!/usr/bin/perl
3 # use 5.010;
4 use strict;
5 use warnings;
7 =head1 NAME
11 =head1 SYNOPSIS
13 bin/snapshotflattener.pl ~sand/megalog/megalog-9999-99-99T99:99:99.log
15 bin/snapshotflattener.pl ~sand/megalog/megalog-9999-99-99T99:99:99.
17 =head1 OPTIONS
19 =over 8
21 =cut
23 my @opt = <<'=back' =~ /B<--(\S+)>/g;
25 =item B<--help|h!>
27 This help
29 =item B<--max=i>
31 No default. If given, we reduce the number of changes to this value by
32 deleting random candidates.
34 =back
36 =head1 DESCRIPTION
38 We rank all modules that trigger a "required" line, that is all
39 modules that are not yet installed when they are suddenly needed as a
40 dependency. This ranking is called RANKIN. We rank these modules again
41 according to the relative lines they occupy in the Bundle/Snapshot
42 files. We call this ranking RANKOUT. For the record: we rank them by
43 module name, not by line number.
45 Finally we calculate the maximum diff between RANKIN and RANKOUT. If
46 the diff is e.g. 700, we can say: we should move this module past 700
47 *ranked* modules. In reality this may mean a movement of thousands of
48 lines up.
50 =head1 HISTORY
52 Iff all modules were in perfect linear order to satisfy all
53 dependencies, we could easily read the time it takes to build one
54 module. *And* we could easily rebalance the 5 snapshotfiles when one
55 of them goes over the well established max of 10 hours.
57 I came to this when trying to run C<megalog-overview.pl --debug=reps>
58 which tunred out to be buggy.
60 =cut
63 use FindBin;
64 use lib "$FindBin::Bin/../lib";
65 BEGIN {
66 push @INC, qw( );
69 use Dumpvalue;
70 use File::Basename qw(dirname);
71 use File::Path qw(mkpath);
72 use File::Spec;
73 use File::Temp;
74 use Getopt::Long;
75 use Pod::Usage;
76 use Hash::Util qw(lock_keys);
78 our %Opt;
79 lock_keys %Opt, map { /([^=|!]+)/ } @opt;
80 GetOptions(\%Opt,
81 @opt,
82 ) or pod2usage(1);
83 if ($Opt{help}) {
84 pod2usage(0);
86 $Opt{max} //= 0;
88 my $megalog = shift or pod2usage(1);
89 $megalog =~ s/\.$/.log/;
90 my @snaps = glob("lib/Bundle/Snapshot_2011_10_21_0?.pm");
92 our(%INSTATE,%CANDIDATES,@SHIFTLINES,%RANKIN,%RANKOUT);
93 $INSTATE{contents}=0;
94 open my $fh, $megalog or die;
95 while (<$fh>){
96 next unless /\s(\S+)\s+\[\S*requires\]/;
97 $CANDIDATES{$1}++;
98 $RANKIN{$1} = undef;
99 $RANKIN{$1} = scalar keys %RANKIN;
101 if ($Opt{max}) {
102 while (keys %CANDIDATES > $Opt{max}){
103 my @c = keys %CANDIDATES;
104 my $r = $c[rand $#c];
105 delete $CANDIDATES{$r};
109 for my $snap (@snaps) {
110 rename $snap, "$snap~" or die "Could not rename $snap: $!";
111 open my $fh, "$snap~" or die "Could not open $snap: $!";
112 open my $fh2, ">", $snap or die "Could not open $snap: $!";
113 select $fh2;
114 $\ = $/ = "\n";
115 while (<$fh>) {
116 chop;
117 if (/^=/){
118 if (/^=head1 CONTENTS/){
119 $INSTATE{contents}=1;
120 print;
121 next;
122 } else {
123 $INSTATE{contents}=0;
124 if (/^=head1 CONFIGU/){
125 print ((shift @SHIFTLINES)->[0]) while @SHIFTLINES;
129 if ($INSTATE{contents}){
130 if (/^([A-Za-z]\S*)/){
131 if ($CANDIDATES{$1}){
132 $RANKOUT{$1} = undef;
133 $RANKOUT{$1} = scalar keys %RANKOUT;
134 unshift @SHIFTLINES, [$_, $1];
135 } else {
136 push @SHIFTLINES, [$_, $1];
138 if (@SHIFTLINES > 1){
139 my $line = (shift @SHIFTLINES)->[0];
140 $line =~ s/\n//g;
141 print $line;
143 } else {
144 if (@SHIFTLINES){
145 $SHIFTLINES[-1][0] .= "\n" unless $SHIFTLINES[-1][0] =~ /\n/;
146 } else {
147 push @SHIFTLINES, [""];
150 } else {
151 print;
155 my $max = { diff => -99999 };
156 my @reported;
157 for my $k (keys %CANDIDATES) {
158 next unless $RANKOUT{$k} && $RANKIN{$k};
159 my $diff = $RANKOUT{$k} - $RANKIN{$k};
160 if ($diff > $max->{diff}) {
161 $max->{diff} = $diff;
162 $max->{in} = $RANKIN{$k};
163 $max->{out} = $RANKOUT{$k};
164 $max->{k} = $k;
165 next if $max->{diff} < 0;
166 warn "Note: so far largest distance with $max->{k}: $max->{out} - $max->{in} = $max->{diff}\n";
167 push @reported, $k;
170 warn sprintf "Maybe try:
171 egrep -n '^(%s)(\$| )' lib/Bundle/Snapshot_2011_10_21_0*.pm
172 ", join("|", @reported);
173 warn sprintf "And/Or:
174 egrep -an ' (%s) *\\[.*requi|^perl\\|' %s
175 ", join("|", @reported), $megalog;
177 # Local Variables:
178 # mode: cperl
179 # cperl-indent-level: 4
180 # End: