CPANPLUS beginning with 0.89_06(?) has a toolchain stanza
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
blobf76ea81255ee65fd157430bffd7395c59ffbc7a9
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use Config::Perl::V ();
7 use DateTime::Format::Strptime;
8 use DateTime::Format::DateParse;
9 use File::Basename qw(basename);
10 use File::Path qw(mkpath);
11 use HTML::Entities qw(decode_entities);
12 use LWP::UserAgent;
13 use List::Util qw(max min sum);
14 use MIME::QuotedPrint ();
15 use Net::NNTP ();
16 use Time::Local ();
17 use XML::LibXML;
18 use XML::LibXML::XPathContext;
20 our $default_ctformat = "yaml";
21 our $default_transport = "nntp";
22 our $default_cturl = "http://www.cpantesters.org/show";
23 our $Signal = 0;
25 =encoding utf-8
27 =head1 NAME
29 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
31 =cut
33 use version; our $VERSION = qv('0.1.7');
35 =head1 SYNOPSIS
37 The documentation in here is normally not needed because the code is
38 meant to be run from a standalone program, L<ctgetreports>.
40 ctgetreports --q mod:Moose Devel-Events
42 =head1 DESCRIPTION
44 This is the core module for CPAN::Testers::ParseReport. If you're not
45 looking to extend or alter the behaviour of this module, you probably
46 want to look at L<ctgetreports> instead.
48 =head1 OPTIONS
50 Are described in the L<ctgetreports> manpage and are passed through to
51 the functions unaltered.
53 =head1 FUNCTIONS
55 =head2 parse_distro($distro,%options)
57 reads the cpantesters HTML page or the YAML file or the local database
58 for the distro and loops through the reports for the specified or most
59 recent version of that distro found in these data.
61 parse_distro() intentionally has no meaningful return value, different
62 options would require different ones.
64 =head2 $extract = parse_single_report($report,$dumpvars,%options)
66 mirrors and reads this report. $report is of the form
68 { id => number }
70 $dumpvar is a hashreference that gets filled with data.
72 $extract is the result of parse_report() described below.
74 =cut
77 my $ua;
78 sub _ua {
79 return $ua if $ua;
80 $ua = LWP::UserAgent->new
82 keep_alive => 1,
83 env_proxy => 1,
85 $ua->parse_head(0);
86 $ua;
91 my $nntp;
92 sub _nntp {
93 return $nntp if $nntp;
94 $nntp = Net::NNTP->new("nntp.perl.org");
95 $nntp->group("perl.cpan.testers");
96 return $nntp;
101 my $xp;
102 sub _xp {
103 return $xp if $xp;
104 $xp = XML::LibXML->new;
105 $xp->keep_blanks(0);
106 $xp->clean_namespaces(1);
107 my $catalog = __FILE__;
108 $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
109 $xp->load_catalog($catalog);
110 return $xp;
114 sub _download_overview {
115 my($cts_dir, $distro, %Opt) = @_;
116 my $format = $Opt{ctformat} ||= $default_ctformat;
117 my $cturl = $Opt{cturl} ||= $default_cturl;
118 my $ctarget = "$cts_dir/$distro.$format";
119 my $cheaders = "$cts_dir/$distro.headers";
120 if ($Opt{local}) {
121 unless (-e $ctarget) {
122 die "Alert: No local file '$ctarget' found, cannot continue\n";
124 } else {
125 if (! -e $ctarget or -M $ctarget > .25) {
126 if (-e $ctarget && $Opt{verbose}) {
127 my(@stat) = stat _;
128 my $timestamp = gmtime $stat[9];
129 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
131 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
132 my $uri = "$cturl/$distro.$format";
133 my $resp = _ua->mirror($uri,$ctarget);
134 if ($resp->is_success) {
135 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
136 open my $fh, ">", $cheaders or die;
137 for ($resp->headers->as_string) {
138 print $fh $_;
139 if ($Opt{verbose} && $Opt{verbose}>1) {
140 print STDERR $_ unless $Opt{quiet};
143 } elsif (304 == $resp->code) {
144 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
145 my $atime = my $mtime = time;
146 utime $atime, $mtime, $cheaders;
147 } else {
148 die sprintf
150 "No success downloading %s: %s",
151 $uri,
152 $resp->status_line,
157 return $ctarget;
160 sub _parse_html {
161 my($ctarget, %Opt) = @_;
162 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
163 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
164 if ($preprocesswithtreebuilder) {
165 require HTML::TreeBuilder;
166 my $tree = HTML::TreeBuilder->new;
167 $tree->implicit_tags(1);
168 $tree->p_strict(1);
169 $tree->ignore_ignorable_whitespace(0);
170 $tree->parse_content($content);
171 $tree->eof;
172 $content = $tree->as_XML;
174 my $parser = _xp();
175 my $doc = eval { $parser->parse_string($content) };
176 my $err = $@;
177 unless ($doc) {
178 my $distro = basename $ctarget;
179 die sprintf "Error while parsing %s\: %s", $distro, $err;
181 my $xc = XML::LibXML::XPathContext->new($doc);
182 my $nsu = $doc->documentElement->namespaceURI;
183 $xc->registerNs('x', $nsu) if $nsu;
184 my($selected_release_ul,$selected_release_distrov,$excuse_string);
185 my($cparentdiv)
186 = $nsu ?
187 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
188 $doc->findnodes("/html/body/div[\@id = 'doc']");
189 my(@releasedivs) = $nsu ?
190 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
191 $cparentdiv->findnodes("//div[h2 and ul]");
192 my $releasediv;
193 if ($Opt{vdistro}) {
194 $excuse_string = "selected distro '$Opt{vdistro}'";
195 my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
196 $fallbacktoversion = 0 unless defined $fallbacktoversion;
197 RELEASE: for my $i (0..$#releasedivs) {
198 my $picked = "";
199 my($x) = $nsu ?
200 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
201 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
202 if ($x) {
203 if ($x eq $Opt{vdistro}) {
204 $releasediv = $i;
205 $picked = " (picked)";
207 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
208 } else {
209 ($x) = $nsu ?
210 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
211 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
212 if ($x eq $fallbacktoversion) {
213 $releasediv = $i;
214 $picked = " (picked)";
216 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
219 } else {
220 $excuse_string = "any distro";
222 unless (defined $releasediv) {
223 $releasediv = 0;
225 # using a[1] because a[2] is missing on the first entry
226 ($selected_release_distrov) = $nsu ?
227 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
228 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
229 ($selected_release_ul) = $nsu ?
230 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
231 $releasedivs[$releasediv]->findnodes("ul");
232 unless (defined $selected_release_distrov) {
233 warn "Warning: could not find $excuse_string in '$ctarget'";
234 return;
236 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
237 my($id);
238 my @all;
239 for my $test ($nsu ?
240 $xc->findnodes("x:li",$selected_release_ul) :
241 $selected_release_ul->findnodes("li")) {
242 $id = $nsu ?
243 $xc->findvalue("x:a[1]/text()",$test) :
244 $test->findvalue("a[1]/text()");
245 push @all, {id=>$id};
246 return if $Signal;
248 return \@all;
251 sub _parse_yaml {
252 my($ctarget, %Opt) = @_;
253 require YAML::Syck;
254 my $arr = YAML::Syck::LoadFile($ctarget);
255 my($selected_release_ul,$selected_release_distrov,$excuse_string);
256 if ($Opt{vdistro}) {
257 $excuse_string = "selected distro '$Opt{vdistro}'";
258 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
259 ($selected_release_distrov) = $arr->[0]{distversion};
260 } else {
261 $excuse_string = "any distro";
262 my $last_addition;
263 my %seen;
264 for my $report (sort { $a->{id} <=> $b->{id} } @$arr) {
265 unless ($seen{$report->{distversion}}++) {
266 $last_addition = $report->{distversion};
269 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
270 ($selected_release_distrov) = $last_addition;
272 unless ($selected_release_distrov) {
273 warn "Warning: could not find $excuse_string in '$ctarget'";
274 return;
276 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
277 my @all;
278 for my $test (@$arr) {
279 my $id = $test->{id};
280 push @all, {id=>$id};
281 return if $Signal;
283 @all = sort { $b->{id} <=> $a->{id} } @all;
284 return \@all;
287 sub parse_single_report {
288 my($report, $dumpvars, %Opt) = @_;
289 my($id) = $report->{id};
290 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
291 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
292 mkpath $nnt_dir;
293 my $target = "$nnt_dir/$id";
294 if ($Opt{local}) {
295 unless (-e $target) {
296 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
298 } else {
299 if (! -e $target) {
300 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
301 $Opt{transport} ||= $default_transport;
302 if ($Opt{transport} eq "nntp") {
303 my $article = _nntp->article($id);
304 unless ($article) {
305 die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
307 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
308 print $fh @$article;
309 } elsif ($Opt{transport} eq "http") {
310 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
311 if ($resp->is_success) {
312 if ($Opt{verbose}) {
313 my(@stat) = stat $target;
314 my $timestamp = gmtime $stat[9];
315 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
316 if ($Opt{verbose} > 1) {
317 print STDERR $resp->headers->as_string unless $Opt{quiet};
320 my $headers = "$target.headers";
321 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
322 print $fh $resp->headers->as_string;
323 } else {
324 die {severity=>0,
325 text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
327 } else {
328 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
332 parse_report($target, $dumpvars, %Opt);
335 sub parse_distro {
336 my($distro,%Opt) = @_;
337 my %dumpvars;
338 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
339 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
340 mkpath $cts_dir;
341 if ($Opt{solve}) {
342 require Statistics::Regression;
343 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
345 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
346 $Opt{vdistro} = $distro;
347 $distro = $1;
349 my $reports;
350 if (my $ctdb = $Opt{ctdb}) {
351 require CPAN::WWW::Testers::Generator::Database;
352 require CPAN::DistnameInfo;
353 my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
354 unless ($Opt{vdistro}) {
355 my $sql = "select version from cpanstats where dist=? order by id";
356 my @rows = $dbi->get_query($sql,$distro);
357 my($newest,%seen);
358 for my $row (@rows) {
359 $newest = $row->[0] unless $seen{$row->[0]}++;
361 $Opt{vdistro} = "$distro-$newest";
363 my $d = CPAN::DistnameInfo->new("FOO/$Opt{vdistro}.tgz");
364 my $dist = $d->dist;
365 my $version = $d->version;
366 my $sql = "select id from cpanstats where dist=? and version=? order by id desc";
367 my @rows = $dbi->get_query($sql,$dist,$version);
368 my @all;
369 for my $row (@rows) {
370 my $id = $row->[0];
371 push @all, {id=>$id};
373 $reports = \@all;
374 } else {
375 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
376 $Opt{ctformat} ||= $default_ctformat;
377 if ($Opt{ctformat} eq "html") {
378 $reports = _parse_html($ctarget,%Opt);
379 } else {
380 $reports = _parse_yaml($ctarget,%Opt);
383 return unless $reports;
384 for my $report (@$reports) {
385 eval {parse_single_report($report, \%dumpvars, %Opt)};
386 if ($@) {
387 if (ref $@) {
388 if ($@->{severity}) {
389 die $@->{text};
390 } else {
391 warn $@->{text};
393 } else {
394 die $@;
397 last if $Signal;
399 if ($Opt{dumpvars}) {
400 require YAML::Syck;
401 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
402 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
403 print $fh YAML::Syck::Dump(\%dumpvars);
404 close $fh or die "Could not close '$dumpfile': $!"
406 if ($Opt{solve}) {
407 solve(\%dumpvars,%Opt);
411 =head2 $bool = _looks_like_qp($raw_report)
413 We had to acknowledge the fact that some MTAs swallow the MIME-Version
414 header while passing MIME through. So we introduce fallback heuristics
415 that try to determine if a report is written in quoted printable.
417 Note that this subroutine is internal, just documented to have the
418 internals documented.
420 The current implementation counts the number of QP escaped spaces and
421 equal signs.
423 =cut
425 sub _looks_like_qp {
426 my($report) = @_;
427 my $count_space = () = $report =~ /=20/g;
428 return 1 if $count_space > 12;
429 my $count_equal = () = $report =~ /=3D/g;
430 return 1 if $count_equal > 12;
431 return 1 if $count_space+$count_equal > 24;
432 return 0; # waiting for a counter example
435 =head2 $extract = parse_report($target,$dumpvars,%Opt)
437 Reads one report. $target is the local filename to read. $dumpvars is
438 a hashref which gets filled with descriptive stats about
439 PASS/FAIL/etc. %Opt are the options as described in the
440 C<ctgetreports> manpage. $extract is a hashref containing the found
441 variables.
443 Note: this parsing is a bit dirty but as it seems good enough I'm not
444 inclined to change it. We parse HTML with regexps only, not an HTML
445 parser. Only the entities are decoded.
447 Update around version 0.0.17: switching to nntp now but keeping the
448 parser for HTML around to read old local copies.
450 Update around 0.0.18: In %Options you can use
452 article => $some_full_article_as_scalar
454 to use this function to parse one full article as text. When this is
455 given, the argument $target is not read, but its basename is taken to
456 be the id of the article. (OMG, hackers!)
458 =cut
459 sub parse_report {
460 my($target,$dumpvars,%Opt) = @_;
461 our @q;
462 my $id = basename($target);
463 # warn "DEBUG: id[$id]";
464 my($ok,$about);
466 my(%extract);
468 my($report,$isHTML);
469 if ($report = $Opt{article}) {
470 $isHTML = $report =~ /^</;
471 undef $target;
473 if ($target) {
474 open my $fh, $target or die "Could not open '$target': $!";
475 local $/;
476 my $raw_report = <$fh>;
477 $isHTML = $raw_report =~ /^</;
478 if ($isHTML) {
479 $report = decode_entities($raw_report);
480 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
482 _looks_like_qp($raw_report)
484 # minimizing MIME effort; don't know about reports in other formats
485 $report = MIME::QuotedPrint::decode_qp($raw_report);
486 } else {
487 $report = $raw_report;
489 close $fh;
491 my @qr = map /^qr:(.+)/, @{$Opt{q}};
492 if ($Opt{raw} || @qr) {
493 for my $qr (@qr) {
494 my $cqr = eval "qr{$qr}";
495 die "Could not compile regular expression '$qr': $@" if $@;
496 my(@matches) = $report =~ $cqr;
497 my $v;
498 if (@matches) {
499 if (@matches==1) {
500 $v = $matches[0];
501 } else {
502 $v = join "", map {"($_)"} @matches;
504 } else {
505 $v = "";
507 $extract{"qr:$qr"} = $v;
511 my $report_writer;
512 my $moduleunpack = {};
513 my $expect_prereq = 0;
514 my $expect_toolchain = 0;
515 my $expecting_toolchain_soon = 0;
517 my $in_summary = 0;
518 my $in_summary_seen_platform = 0;
519 my $in_prg_output = 0;
520 my $in_env_context = 0;
522 my $current_headline;
523 my @previous_line = ""; # so we can neutralize line breaks
524 my @rlines = split /\r?\n/, $report;
525 LINE: for (@rlines) {
526 next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject:\s*(\S+)\s+(\S+)/);
527 $ok = $1;
528 $about = $2;
529 $extract{"meta:ok"} = $ok;
530 $extract{"meta:about"} = $about;
531 last;
533 LINE: while (@rlines) {
534 $_ = shift @rlines;
535 while (/!$/ and @rlines) {
536 my $followupline = shift @rlines;
537 $followupline =~ s/^\s+//; # remo leading space
538 $_ .= $followupline;
540 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
541 $current_headline = $previous_line[-1];
542 if ($current_headline =~ /PROGRAM OUTPUT/) {
543 $in_prg_output = 1;
544 } else {
545 $in_prg_output = 0;
547 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
548 $in_env_context = 1;
549 } else {
550 $in_env_context = 0;
553 if ($extract{"meta:perl"}) {
554 if ( $in_summary
555 and !$extract{"conf:git_commit_id"}
556 and /Commit id:\s*([[:xdigit:]]+)/) {
557 $extract{"conf:git_commit_id"} = $1;
559 } else {
560 my $p5;
561 if (0) {
562 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
563 $p5 = $1;
564 $in_summary = 1;
565 $in_env_context = 0;
567 if ($p5) {
568 my($r,$v,$s,$p);
569 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
570 $r =~ s/\.0//; # 5.0 6 2!
571 $extract{"meta:perl"} = "$r.$v.$s\@$p";
572 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
573 $r =~ s/\.0//;
574 $extract{"meta:perl"} = "$r.$v.$s";
575 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
576 $r =~ s/\.0//;
577 $extract{"meta:perl"} = "$r.$v.$s";
578 } else {
579 $extract{"meta:perl"} = $p5;
583 unless ($extract{"meta:from"}) {
584 if (0) {
585 } elsif ($isHTML ?
586 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
587 m|^From:\s*(.+)|
589 $extract{"meta:from"} = $1;
591 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
593 unless ($extract{"meta:date"}) {
594 if (0) {
595 } elsif ($isHTML ?
596 m|<div class="h_name">Date:</div> (.+?)<br/>| :
597 m|^Date:\s*(.+)|
599 my $date = $1;
600 my($dt);
601 if ($isHTML) {
602 my $p;
603 $p = DateTime::Format::Strptime->new(
604 locale => "en",
605 time_zone => "UTC",
606 # April 13, 2005 23:50
607 pattern => "%b %d, %Y %R",
609 $dt = $p->parse_datetime($date);
610 } else {
611 # Sun, 28 Sep 2008 12:23:12 +0100 # but was not consistent
612 # pattern => "%a, %d %b %Y %T %z",
613 $dt = eval { DateTime::Format::DateParse->parse_datetime($date) };
615 unless ($dt) {
616 warn "Could not parse date[$date], setting to epoch 0";
617 $dt = DateTime->from_epoch( epoch => 0 );
619 $extract{"meta:date"} = $dt->datetime;
621 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
623 unless ($extract{"meta:writer"}) {
624 for ("$previous_line[-1] $_") {
625 if (0) {
626 } elsif (/CPANPLUS, version (\S+)/) {
627 $extract{"meta:writer"} = "CPANPLUS $1";
628 } elsif (/created (?:automatically )?by (\S+)/) {
629 $extract{"meta:writer"} = $1;
630 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
631 $extract{"meta:writer"} = "$1 $2";
633 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
636 if ($in_summary) {
637 # we do that first three lines a bit too often
638 my $qr = $Opt{dumpvars} || "";
639 $qr = qr/$qr/ if $qr;
640 unless (@q) {
641 @q = @{$Opt{q}||[]};
642 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
645 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
647 if (/^\s+Platform:$/) {
648 $in_summary_seen_platform=1;
649 } elsif (/^\s*$/ || m|</pre>|) {
650 # if not html, we have reached the end now
651 if ($in_summary_seen_platform) {
652 # some perls have an empty line after the summary line
653 $in_summary = 0;
655 } else {
656 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
657 while (my($k,$v) = each %kv) {
658 my $ck = "conf:$k";
659 $ck =~ s/\s+$//;
660 $v =~ s/,$//;
661 if ($v =~ /^'(.*)'$/) {
662 $v = $1;
664 $v =~ s/^\s+//;
665 $v =~ s/\s+$//;
666 if ($qr && $ck =~ $qr) {
667 $extract{$ck} = $v;
668 } elsif ($conf_vars{$ck}) {
669 $extract{$ck} = $v;
674 if ($in_prg_output) {
675 unless ($extract{"meta:output_from"}) {
676 if (/Output from (.+):$/) {
677 $extract{"meta:output_from"} = $1
681 if ($in_env_context) {
682 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
683 $extract{"env:$1"} = $2;
686 push @previous_line, $_;
687 if ($expect_prereq || $expect_toolchain) {
688 if (/Perl module toolchain versions installed/) {
689 # first time discovered in CPANPLUS 0.89_06
690 $expecting_toolchain_soon = 1;
691 $expect_prereq=0;
692 next LINE;
694 if (exists $moduleunpack->{type}) {
695 my($module,$v,$needwant);
696 # type 1 and 2 are about prereqs, type three about toolchain
697 if ($moduleunpack->{type} == 1) {
698 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
699 next LINE if $@;
700 if ($leader =~ /^-/) {
701 $moduleunpack = {};
702 $expect_prereq = 0;
703 next LINE;
704 } elsif ($leader =~ /^(
705 buil # build_requires:
706 )/x) {
707 next LINE;
708 } elsif ($module =~ /^(
709 - # line drawing
710 )/x) {
711 next LINE;
713 } elsif ($moduleunpack->{type} == 2) {
714 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
715 next LINE if $@;
716 if ($leader =~ /^\*/) {
717 $moduleunpack = {};
718 $expect_prereq = 0;
719 next LINE;
720 } elsif ($v =~ /\s/) {
721 ($module,$v) = split " ", $_;
723 } elsif ($moduleunpack->{type} == 3) {
724 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
725 next LINE if $@;
726 if (!$module) {
727 $moduleunpack = {};
728 $expect_toolchain = 0;
729 next LINE;
730 } elsif ($module =~ /^-/) {
731 next LINE;
734 $module =~ s/\s+$//;
735 if ($module) {
736 $v =~ s/^\s+//;
737 $v =~ s/\s+$//;
738 my($modulename,$versionlead) = split " ", $module;
739 if (defined $modulename and defined $versionlead) {
740 $module = $modulename;
741 $v = "$versionlead$v";
743 if ($v eq "Have") {
744 next LINE;
746 $extract{"mod:$module"} = $v;
747 if ($needwant) {
748 $needwant =~ s/^\s+//;
749 $needwant =~ s/\s+$//;
750 $extract{"prereq:$module"} = $needwant;
754 if (/(\s+)(Module\s+)(Need\s+)Have/) {
755 $in_env_context = 0;
756 $moduleunpack = {
757 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
758 type => 1,
760 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
761 $in_env_context = 0;
762 my $adjust_1 = 0;
763 my $adjust_2 = -length($4);
764 my $adjust_3 = length($4);
765 # two pass would be required to see where the
766 # columns really are. Or could we get away with split?
767 $moduleunpack = {
768 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
769 type => 2,
773 if (/PREREQUISITES|Prerequisite modules loaded/) {
774 $in_env_context = 0;
775 $expect_prereq=1;
777 if ($expecting_toolchain_soon) {
778 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
779 $in_env_context = 0;
780 $expect_toolchain=1;
781 $expecting_toolchain_soon=0;
782 $moduleunpack = {
783 tpl => 'a'.length($1).'a'.length($2).'a*',
784 type => 3,
788 if (/toolchain versions installed/) {
789 $in_env_context = 0;
790 $expecting_toolchain_soon=1;
792 } # LINE
793 if ($Opt{solve}) {
794 $extract{id} = $id;
795 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
796 $extract{"conf:archame+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
798 my $data = $dumpvars->{"==DATA=="} ||= [];
799 push @$data, \%extract;
801 # ---- %extract finished ----
802 my $diag = "";
803 if (my $qr = $Opt{dumpvars}) {
804 $qr = qr/$qr/;
805 while (my($k,$v) = each %extract) {
806 if ($k =~ $qr) {
807 $dumpvars->{$k}{$v}{$ok}++;
811 for my $want (@q) {
812 my $have = $extract{$want} || "";
813 $diag .= " $want\[$have]";
815 printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
816 if ($Opt{raw}) {
817 $report =~ s/\s+\z//;
818 print STDERR $report, "\n================\n" unless $Opt{quiet};
820 if ($Opt{interactive}) {
821 require IO::Prompt;
822 local @ARGV;
823 local $ARGV;
824 my $ans = IO::Prompt::prompt
826 -p => "View $id? [onechar: ynq] ",
827 -d => "y",
828 -u => qr/[ynq]/,
829 -onechar,
831 print STDERR "\n" unless $Opt{quiet};
832 if ($ans eq "y") {
833 open my $ifh, "<", $target or die "Could not open $target: $!";
834 $Opt{pager} ||= "less";
835 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
836 local $/;
837 print {$lfh} <$ifh>;
838 close $ifh or die "Could not close $target: $!";
839 close $lfh or die "Could not close pager: $!"
840 } elsif ($ans eq "q") {
841 $Signal++;
842 return;
845 return \%extract;
848 =head2 solve
850 Feeds a couple of potentially interesting data to
851 Statistics::Regression and sorts the result by R^2 descending. Do not
852 confuse this with a prove, rather take it as a useful hint. It can
853 save you minutes of staring at data and provide a quick overview where
854 one should look closer. Displays the N top candidates, where N
855 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
856 Regressions results with an R^2 of 1.00 are displayed in any case.
858 The function is called when the option C<-solve> is give on the
859 commandline. Several extra config variables are calculated, see source
860 code for details.
862 =cut
864 my %never_solve_on = map {($_ => 1)}
866 "conf:ccflags",
867 "conf:config_args",
868 "conf:cppflags",
869 "conf:lddlflags",
870 "conf:uname",
871 "env:PATH",
872 "env:PERL5LIB",
873 "env:PERL5OPT",
874 'env:$^X',
875 'env:$EGID',
876 'env:$GID',
877 'env:$UID/$EUID',
878 'env:PERL5_CPANPLUS_IS_RUNNING',
879 'env:PERL5_CPAN_IS_RUNNING',
880 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
881 'meta:ok',
883 my %normalize_numeric =
885 id => sub { return shift },
886 'meta:date' => sub {
887 my $v = shift;
888 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
889 unless (defined $M) {
890 die "illegal value[$v] for a date";
892 Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
895 my %normalize_value =
897 'meta:perl' => sub {
898 my($perlatpatchlevel) = shift;
899 my $perl = $perlatpatchlevel;
900 $perl =~ s/\@.*//;
901 $perl;
904 sub solve {
905 my($V,%Opt) = @_;
906 require Statistics::Regression;
907 my @regression;
908 my $ycb;
909 if (my $ycbbody = $Opt{ycb}) {
910 $ycb = eval('sub {'.$ycbbody.'}');
911 die if $@;
912 } else {
913 $ycb = sub {
914 my $rec = shift;
915 my $y;
916 if ($rec->{"meta:ok"} eq "PASS") {
917 $y = 1;
918 } elsif ($rec->{"meta:ok"} eq "FAIL") {
919 $y = 0;
921 return $y
924 VAR: for my $variable (sort keys %$V) {
925 next if $variable eq "==DATA==";
926 if ($never_solve_on{$variable}){
927 warn "Skipping '$variable'\n" unless $Opt{quiet};
928 next VAR;
930 my $value_distribution = $V->{$variable};
931 my $keys = keys %$value_distribution;
932 my @X = qw(const);
933 if ($normalize_numeric{$variable}) {
934 push @X, "n_$variable";
935 } else {
936 my %seen = ();
937 for my $value (sort keys %$value_distribution) {
938 my $pf = $value_distribution->{$value};
939 $pf->{PASS} ||= 0;
940 $pf->{FAIL} ||= 0;
941 if ($pf->{PASS} || $pf->{FAIL}) {
942 my $Xele = sprintf "eq_%s",
944 $normalize_value{$variable} ?
945 $normalize_value{$variable}->($value) :
946 $value
948 push @X, $Xele unless $seen{$Xele}++;
951 if (
952 $pf->{PASS} xor $pf->{FAIL}
954 my $vl = 40;
955 substr($value,$vl) = "..." if length $value > 3+$vl;
956 my $poor_mans_freehand_estimation = 0;
957 if ($poor_mans_freehand_estimation) {
958 warn sprintf
960 "%4d %4d %-23s | %s\n",
961 $pf->{PASS},
962 $pf->{FAIL},
963 $variable,
964 $value,
970 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
971 next VAR unless @X > 1;
972 my %regdata =
974 X => \@X,
975 data => [],
977 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
978 my $y = $ycb->($rec);
979 next RECORD unless defined $y;
980 my %obs;
981 $obs{Y} = $y;
982 @obs{@X} = (0) x @X;
983 $obs{const} = 1;
984 for my $x (@X) {
985 if ($x =~ /^eq_(.+)/) {
986 my $read_v = $1;
987 if (exists $rec->{$variable}
988 && defined $rec->{$variable}
990 my $use_v = (
991 $normalize_value{$variable} ?
992 $normalize_value{$variable}->($rec->{$variable}) :
993 $rec->{$variable}
995 if ($use_v eq $read_v) {
996 $obs{$x} = 1;
999 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1000 } elsif ($x =~ /^n_(.+)/) {
1001 my $v = $1;
1002 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1003 if ($@) {
1004 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1008 push @{$regdata{data}}, \%obs;
1010 _run_regression ($variable, \%regdata, \@regression, \%Opt);
1012 my $top = min ($Opt{solvetop} || 3, scalar @regression);
1013 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
1014 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1015 my $score = 0;
1016 printf
1018 "State after regression testing: %d results, showing top %d\n\n",
1019 scalar @regression,
1020 $top,
1022 for my $reg (sort {
1023 $b->rsq <=> $a->rsq
1025 $a->k <=> $b->k
1026 } @regression) {
1027 printf "(%d)\n", ++$score;
1028 eval { $reg->print; };
1029 if ($@) {
1030 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1032 last if --$top <= 0;
1037 # $variable is the name we pass through to S:R constructor
1038 # $regdata is hash and has the arrays "X" and "data" (observations)
1039 # X goes to S:R constructor
1040 # each observation has a Y which we pass to S:R in an include() call
1041 # $regression is the collector array of results
1042 # $opt are the options from outside, used to see if we are "verbose"
1043 sub _run_regression {
1044 my($variable,$regdata,$regression,$opt) = @_;
1045 my @X = @{$regdata->{X}};
1046 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1047 # hold the reference
1048 # group
1049 while (@X > 1) {
1050 my $reg = Statistics::Regression->new($variable,\@X);
1051 for my $obs (@{$regdata->{data}}) {
1052 my $y = delete $obs->{Y};
1053 $reg->include($y, $obs);
1054 $obs->{Y} = $y;
1056 eval {$reg->theta;
1057 my @e = $reg->standarderrors;
1058 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1059 $reg->rsq;};
1060 if ($@) {
1061 if ($opt->{verbose} && $opt->{verbose}>=2) {
1062 require YAML::Syck;
1063 warn YAML::Syck::Dump
1064 ({error=>"could not determine some regression parameters",
1065 variable=>$variable,
1066 k=>$reg->k,
1067 n=>$reg->n,
1068 X=>$regdata->{"X"},
1069 errorstr => $@,
1072 # reduce k in case that linear dependencies disturbed us;
1073 # often called reference group; I'm tempted to collect and
1074 # make visible
1075 splice @X, 1, 1;
1076 } else {
1077 # $reg->print;
1078 push @$regression, $reg;
1079 return;
1084 =head1 AUTHOR
1086 Andreas König
1088 =head1 BUGS
1090 Please report any bugs or feature requests through the web
1091 interface at
1092 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1093 I will be notified, and then you'll automatically be notified of
1094 progress on your bug as I make changes.
1096 =head1 SUPPORT
1098 You can find documentation for this module with the perldoc command.
1100 perldoc CPAN::Testers::ParseReport
1103 You can also look for information at:
1105 =over 4
1107 =item * RT: CPAN's request tracker
1109 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1111 =item * AnnoCPAN: Annotated CPAN documentation
1113 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1115 =item * CPAN Ratings
1117 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1119 =item * Search CPAN
1121 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1123 =back
1126 =head1 ACKNOWLEDGEMENTS
1128 Thanks to RJBS for module-starter.
1130 =head1 COPYRIGHT & LICENSE
1132 Copyright 2008 Andreas König.
1134 This program is free software; you can redistribute it and/or modify it
1135 under the same terms as Perl itself.
1138 =cut
1140 1; # End of CPAN::Testers::ParseReport