Medium sized Internalization made by flattener against megalog-2017-10-28
[andk-cpan-tools.git] / bin / colorout-to-dir.pl
blob1ba9c173dcbdfc91e97dd32c496a4b95758c433a
1 #!/usr/bin/perl -0777 -nl
3 use strict;
4 use Dumpvalue;
5 use Encode qw(decode);
6 use Encode::Detect ();
7 use File::Path qw(mkpath);
8 use List::MoreUtils qw(uniq);
9 use Time::HiRes qw(sleep);
10 use YAML::Syck;
12 our($perl_path) = m|(/home\S+/installed-perls/(?:.*?)/p.*?/perl-5.*?@(?:\d+))|;
13 our $outdir = $ARGV;
14 $outdir =~ s/.out$/.d/ or die;
15 mkpath $outdir;
16 my $perl = "$perl_path/bin/perl";
18 sub mystore ($$$$){
19 my($shortdistro,$log,$ok,$seq) = @_;
20 my $outfile = $shortdistro;
21 $outfile =~ s!\.(tar.gz|tgz|tar.bz2|tbz|zip)?$!.xml!;
22 $outfile =~ s|$|.xml| unless $outfile =~ /\.xml$/;
23 $outfile =~ s|/|!|g;
24 $outfile =~ s|^|$outdir/|;
25 my($time) = $outdir =~ /(\d{8}T\d{4})/;
26 open my $fh, ">:utf8", $outfile or die;
27 for ($time,$perl_path,$shortdistro,$ok) {
28 s!\&!\&!g;
29 s!"!"!g;
30 s!<!&lt;!g;
31 s!>!&gt;!g;
33 my $ulog = decode("Detect",$log);
34 my $dumper = Dumpvalue->new(unctrl => "unctrl");
35 $ulog =~ s/([\x00-\x09\x0b\x0c\x0e-\x1f])/ $dumper->stringify($1,1) /ge;
36 print $fh qq{<distro time="$time" perl="$perl_path" distro="$shortdistro" ok="$ok" seq="$seq">};
37 print $fh $ulog;
38 print $fh "</distro>\n";
39 close $fh or die;
40 sleep 1/16;
43 # the first part is a duplication of colorterm-to-html.pl which I
44 # wrote for my Munich talk:
45 s!\&!\&amp;!g;
46 sleep 1;
47 s!"!&quot;!g;
48 sleep 1;
49 s!<!&lt;!g;
50 sleep 1;
51 s!>!&gt;!g;
52 sleep 1;
53 s!\e\[1;3[45](?:;\d+)?m(.*?)\e\[0m!<span style="color: blue">$1</span>!sg;
54 sleep 1;
55 s!\e\[1;31(?:;\d+)?m(.*?)\e\[0m!<span style="color: red">$1</span>!sg;
56 sleep 1;
57 #s!\n!<br/>\n!g;
58 s!\r\n!\n!g;
59 sleep 1;
60 s!.+\r!!g;
61 sleep 1;
63 =pod
65 lines like
67 CPAN.pm: Going to build (A/AB/ABH/XML-RSS-1.22.tar.gz)
69 can occur once or twice. The latter means dependencies get in the way
70 and between the first and second occurrence there are the dependencies.
72 $1 is the distro.
74 From the second occurrence (or if there is only one, from the first)
75 until the consecutive two lines
77 /^$HTMLSPANSTUFF {2}(.+)\n$HTMLSPANSTUFF {2}.+install.+\s+--\s(NOT )?OK$/
79 we expect the data for exactly this distro. $1 is again the distro.
81 =cut
83 our $HTMLSPANSTUFF = qr/(?:<[^<>]+>)*/;
85 my @logs = ($_);
86 my @residua;
87 my %seq;
88 my $found = 0;
89 while ($_ = shift @logs) {
90 my @distros = uniq /^ CPAN\.pm: Going to build (.*)/mg;
91 unless (keys %seq) {
92 # on the first run we can determine the absolute position within
93 # the file for all distros of that session
94 %seq = map { $distros[$_] => $_+1 } 0..$#distros;
96 warn sprintf(
97 "NEW LOG length %d, unprocessed logs ATM: %d, expected distros here: %d",
98 length($_),
99 scalar(@logs),
100 scalar(@distros),
102 sleep 1;
103 my $cnt = 0;
104 while (my $d = pop @distros) {
105 # my $d = splice @distros, int(scalar(@distros)/2), 1;
106 my $shortdistro = $d;
107 $shortdistro =~ s!^[A-Z]/[A-Z][A-Z]/!!;
108 if (
111 <span[^<>]+>
112 Running[ ]make[ ]for[ ]\Q$d\E\n
113 [\s\S]+\n
114 ^[ ][ ]CPAN\.pm:[ ]Going[ ]to[ ]build[ ]\Q$d\E\n
115 [\s\S]+\n
116 ^$HTMLSPANSTUFF[ ]{2}(?:\Q$shortdistro\E)\n
117 $HTMLSPANSTUFF[ ]{2}.+\s+--\s+((?:NOT\s)?OK|NA)\n
118 <\/span>
119 )//mx
121 my $log = $1;
122 my $ok = $2;
123 my @distros_under = uniq $log =~ /^ CPAN\.pm: Going to build (.*)/mg;
124 if (@distros_under == 1) {
125 warn sprintf "FOUND %d: %s (%d)\n", ++$found, $d, length($log);
126 mystore($shortdistro,$log,$ok,$seq{$d});
127 } elsif (length $_ == 0) { # exhausted
128 if (++$cnt >= 100) { # endless loop detector
129 warn "endless loop?";
130 push @residua, $log;
131 } else {
132 unshift @distros, $d;
133 $_ = $log;
134 warn sprintf("RESHUFFLE. Delaying[%s]uniq distros[%d]",
136 scalar @distros_under,
138 if (2 == @distros_under) {
139 require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new(\@distros_under,[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
142 sleep 1;
144 } else {
145 push @logs, $log;
148 } # while @distros
149 push @residua, $_ if length $_;
150 open my $rfh, ">", "$outdir/residuum.yml" or die;
151 print $rfh YAML::Syck::Dump(\@residua);
152 close $rfh or die;
153 } # while @logs
156 if (-e $perl) {
157 open my $fh, ">", "$outdir/perl-V.txt" or die "Could not open >$outdir/perl-V.txt: $!";
158 open my $pfh, "-|", $perl, "-V" or die "cannot fork: $!";
159 while (<$pfh>) {
160 print $fh $_;
162 close $pfh or die "perl died during -V";
163 close $fh or die "could not write '$outdir/perl-V.txt': $!";
166 __END__
168 =pod
170 This is the data we want to gather:
172 distribution MIYAGAWA/XML-Atom-1.2.3.tar.gz
173 perl /home/src/perl/..../perl !reveals maint vs perl
174 logfile (=date) megainstall.20070422T1717.out
175 ok OK or "make_test NO" or something
176 log_as_xml
178 So if we take the input filename, s/.out/.d/ on it and make that a
179 directory, we have the storage area and the first metadata. If we then
180 write a file "perl" with the path to perl, we have the second metadata
181 thing. We should really store the output of '$perl -V' there, just in
182 case.
184 If we then use the distroname and replace slashes with bangs, we have
185 a good flat filename. We could then even s|!.+!|!| for the filename if
186 we keep the original distroname for inside. We could write
188 <distro time="$time" perl="$perl_path" distro="$distro_orig">
189 $report
190 </distro>
192 and of course, we must escape properly.
194 BUGS:
196 BUG1
198 we should recognize when a distro reaches "delayed until after
199 prerequisites", write this first piece into the splitted logfile and
200 append the other part.
203 </span><span style="color: blue">Running install for module 'Archive::Zip'
204 </span><span style="color: blue">Running make for A/AD/ADAMK/Archive-Zip-1.18.tar.gz
205 </span><span style="color: blue">Checksum for /home/k/.cpan/sources/authors/id/A/AD/ADAMK/Archive-Zip-1.18.tar.gz ok
206 </span>Archive-Zip-1.18/
207 Archive-Zip-1.18/t/
208 [...]
209 <span style="color: blue">
210 CPAN.pm: Going to build A/AD/ADAMK/Archive-Zip-1.18.tar.gz
212 </span>Warning: prerequisite File::Which 0.05 not found.
213 Checking if your kit is complete...
214 Looks good
215 Writing Makefile for Archive::Zip
216 <span style="color: blue">---- Unsatisfied dependencies detected during ----
217 ---- ADAMK/Archive-Zip-1.18.tar.gz ----
218 File::Which [requires]
219 </span><span style="color: blue">Running make test
220 </span><span style="color: blue"> Delayed until after prerequisites
221 </span><span style="color: blue">Running make install
222 </span><span style="color: blue"> Delayed until after prerequisites
225 BUG2
227 When we reached megainstall.20070406T1526.out this program started to
228 become extremely slow. 11 hourse between the two timestamps:
230 -rw-rw-r-- 1 sand sand 5060 Apr 28 16:54 DMAKI!DateTime-Util-Calc-0.13
231 -rw-rw-r-- 1 sand sand 3566 Apr 29 04:05 DMAKI!DateTime-Util-Astro-0.08
233 Ah, this was an endless loop in CPAN.pm and DateTime-Util-Astro was
234 built again and again.
237 THE NEW ALGORITHM:
239 Start a new array @logs which starts out as ($_). Cut a
240 single-distro-log out (Matrushka2). If it does not contain further
241 logs, write it to disk, otherwise push it onto @logs. Continue until
242 you have tried all candidates.
244 The game ends when we reach the end of @logs. Then @logs will be an
245 array of residua which we shall dump for further considerations.
247 BUG3
249 encoding not clear on AWRIGLEY/HTML-Summary-0.017 and illegal
250 codepoint 27
252 This test is obviously writing test output in multiple encodings. It's
253 certainly not our goal to detect each test's encoding. But we need
254 legal output.
256 So I must do something to unctrl the control characters and to detect
257 the encoding when we have high bits set. And if detect does not
258 succeed, I think I can without further considerations pretend latin-1.
260 Before I do that I need a list of broken XML files.
262 find logs -name "*.xml" -exec make mega-validate MEGA_XML={} \; >& mega-validate.out
264 BUG4
266 residuum still reveals bad "return"s in CPAN.pm that leave us no clue
267 where to cut the log.
270 =cut