prepare a hackathon release
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
blobd82c7c853a43a6b841d234df646a8454a3c9fe43
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use DateTime::Format::Strptime;
7 use DateTime::Format::DateParse;
8 use File::Basename qw(basename);
9 use File::Path qw(mkpath);
10 use HTML::Entities qw(decode_entities);
11 use LWP::UserAgent;
12 use List::Util qw(max min sum);
13 use Net::NNTP ();
14 use Time::Local ();
15 use XML::LibXML;
16 use XML::LibXML::XPathContext;
18 our $default_ctformat = "yaml";
19 our $default_transport = "nntp";
20 our $default_cturl = "http://www.cpantesters.org/show";
21 our $Signal = 0;
23 =encoding utf-8
25 =head1 NAME
27 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
29 =cut
31 use version; our $VERSION = qv('0.1.0');
33 =head1 SYNOPSIS
35 The documentation in here is normally not needed because the code is
36 meant to be run from a standalone program, L<ctgetreports>.
38 ctgetreports --q mod:Moose Devel-Events
40 =head1 DESCRIPTION
42 This is the core module for CPAN::Testers::ParseReport. If you're not
43 looking to extend or alter the behaviour of this module, you probably
44 want to look at L<ctgetreports> instead.
46 =head1 OPTIONS
48 Are described in the L<ctgetreports> manpage and are passed through to
49 the functions unaltered.
51 =head1 FUNCTIONS
53 =head2 parse_distro($distro,%options)
55 reads the cpantesters HTML page or the YAML file for the distro and
56 loops through the reports for the specified or most recent version of
57 that distro found in these data.
59 =head2 parse_single_report($report,$dumpvars,%options)
61 mirrors and reads this report. $report is of the form
63 { id => number }
65 $dumpvar is a hashreference that gets filled with data.
67 =cut
70 my $ua;
71 sub _ua {
72 return $ua if $ua;
73 $ua = LWP::UserAgent->new
75 keep_alive => 1,
76 env_proxy => 1,
78 $ua->parse_head(0);
79 $ua;
84 my $nntp;
85 sub _nntp {
86 return $nntp if $nntp;
87 $nntp = Net::NNTP->new("nntp.perl.org");
88 $nntp->group("perl.cpan.testers");
89 return $nntp;
94 my $xp;
95 sub _xp {
96 return $xp if $xp;
97 $xp = XML::LibXML->new;
98 $xp->keep_blanks(0);
99 $xp->clean_namespaces(1);
100 my $catalog = __FILE__;
101 $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
102 $xp->load_catalog($catalog);
103 return $xp;
107 sub _download_overview {
108 my($cts_dir, $distro, %Opt) = @_;
109 my $format = $Opt{ctformat} ||= $default_ctformat;
110 my $cturl = $Opt{cturl} ||= $default_cturl;
111 my $ctarget = "$cts_dir/$distro.$format";
112 my $cheaders = "$cts_dir/$distro.headers";
113 if ($Opt{local}) {
114 unless (-e $ctarget) {
115 die "Alert: No local file '$ctarget' found, cannot continue\n";
117 } else {
118 if (! -e $ctarget or -M $ctarget > .25) {
119 if (-e $ctarget && $Opt{verbose}) {
120 my(@stat) = stat _;
121 my $timestamp = gmtime $stat[9];
122 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
124 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
125 my $uri = "$cturl/$distro.$format";
126 my $resp = _ua->mirror($uri,$ctarget);
127 if ($resp->is_success) {
128 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
129 open my $fh, ">", $cheaders or die;
130 for ($resp->headers->as_string) {
131 print $fh $_;
132 if ($Opt{verbose} && $Opt{verbose}>1) {
133 print STDERR $_ unless $Opt{quiet};
136 } elsif (304 == $resp->code) {
137 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
138 my $atime = my $mtime = time;
139 utime $atime, $mtime, $cheaders;
140 } else {
141 die sprintf
143 "No success downloading %s: %s",
144 $uri,
145 $resp->status_line,
150 return $ctarget;
153 sub _parse_html {
154 my($ctarget, %Opt) = @_;
155 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
156 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
157 if ($preprocesswithtreebuilder) {
158 require HTML::TreeBuilder;
159 my $tree = HTML::TreeBuilder->new;
160 $tree->implicit_tags(1);
161 $tree->p_strict(1);
162 $tree->ignore_ignorable_whitespace(0);
163 $tree->parse_content($content);
164 $tree->eof;
165 $content = $tree->as_XML;
167 my $parser = _xp();
168 my $doc = eval { $parser->parse_string($content) };
169 my $err = $@;
170 unless ($doc) {
171 my $distro = basename $ctarget;
172 die sprintf "Error while parsing %s\: %s", $distro, $err;
174 my $xc = XML::LibXML::XPathContext->new($doc);
175 my $nsu = $doc->documentElement->namespaceURI;
176 $xc->registerNs('x', $nsu) if $nsu;
177 my($selected_release_ul,$selected_release_distrov,$excuse_string);
178 my($cparentdiv)
179 = $nsu ?
180 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
181 $doc->findnodes("/html/body/div[\@id = 'doc']");
182 my(@releasedivs) = $nsu ?
183 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
184 $cparentdiv->findnodes("//div[h2 and ul]");
185 my $releasediv;
186 if ($Opt{vdistro}) {
187 $excuse_string = "selected distro '$Opt{vdistro}'";
188 my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
189 $fallbacktoversion = 0 unless defined $fallbacktoversion;
190 RELEASE: for my $i (0..$#releasedivs) {
191 my $picked = "";
192 my($x) = $nsu ?
193 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
194 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
195 if ($x) {
196 if ($x eq $Opt{vdistro}) {
197 $releasediv = $i;
198 $picked = " (picked)";
200 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
201 } else {
202 ($x) = $nsu ?
203 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
204 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
205 if ($x eq $fallbacktoversion) {
206 $releasediv = $i;
207 $picked = " (picked)";
209 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
212 } else {
213 $excuse_string = "any distro";
215 unless (defined $releasediv) {
216 $releasediv = 0;
218 # using a[1] because a[2] is missing on the first entry
219 ($selected_release_distrov) = $nsu ?
220 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
221 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
222 ($selected_release_ul) = $nsu ?
223 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
224 $releasedivs[$releasediv]->findnodes("ul");
225 unless (defined $selected_release_distrov) {
226 warn "Warning: could not find $excuse_string in '$ctarget'";
227 return;
229 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
230 my($id);
231 my @all;
232 for my $test ($nsu ?
233 $xc->findnodes("x:li",$selected_release_ul) :
234 $selected_release_ul->findnodes("li")) {
235 $id = $nsu ?
236 $xc->findvalue("x:a[1]/text()",$test) :
237 $test->findvalue("a[1]/text()");
238 push @all, {id=>$id};
239 return if $Signal;
241 return \@all;
244 sub _parse_yaml {
245 my($ctarget, %Opt) = @_;
246 require YAML::Syck;
247 my $arr = YAML::Syck::LoadFile($ctarget);
248 my($selected_release_ul,$selected_release_distrov,$excuse_string);
249 if ($Opt{vdistro}) {
250 $excuse_string = "selected distro '$Opt{vdistro}'";
251 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
252 ($selected_release_distrov) = $arr->[0]{distversion};
253 } else {
254 $excuse_string = "any distro";
255 my $last_addition;
256 my %seen;
257 for my $report (@$arr) {
258 unless ($seen{$report->{distversion}}++) {
259 $last_addition = $report->{distversion};
262 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
263 ($selected_release_distrov) = $last_addition;
265 unless ($selected_release_distrov) {
266 warn "Warning: could not find $excuse_string in '$ctarget'";
267 return;
269 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
270 my @all;
271 for my $test (@$arr) {
272 my $id = $test->{id};
273 push @all, {id=>$id};
274 return if $Signal;
276 @all = sort { $b->{id} <=> $a->{id} } @all;
277 return \@all;
280 sub parse_single_report {
281 my($report, $dumpvars, %Opt) = @_;
282 my($id) = $report->{id};
283 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
284 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
285 mkpath $nnt_dir;
286 my $target = "$nnt_dir/$id";
287 if ($Opt{local}) {
288 unless (-e $target) {
289 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
291 } else {
292 if (! -e $target) {
293 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
294 $Opt{transport} ||= $default_transport;
295 if ($Opt{transport} eq "nntp") {
296 my $article = _nntp->article($id);
297 unless ($article) {
298 die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
300 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
301 print $fh @$article;
302 } elsif ($Opt{transport} eq "http") {
303 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
304 if ($resp->is_success) {
305 if ($Opt{verbose}) {
306 my(@stat) = stat $target;
307 my $timestamp = gmtime $stat[9];
308 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
309 if ($Opt{verbose} > 1) {
310 print STDERR $resp->headers->as_string unless $Opt{quiet};
313 my $headers = "$target.headers";
314 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
315 print $fh $resp->headers->as_string;
316 } else {
317 die {severity=>0,
318 text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
320 } else {
321 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
325 parse_report($target, $dumpvars, %Opt);
328 sub parse_distro {
329 my($distro,%Opt) = @_;
330 my %dumpvars;
331 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
332 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
333 mkpath $cts_dir;
334 if ($Opt{solve}) {
335 require Statistics::Regression;
336 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
338 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
339 $Opt{vdistro} = $distro;
340 $distro = $1;
342 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
343 my $reports;
344 $Opt{ctformat} ||= $default_ctformat;
345 if ($Opt{ctformat} eq "html") {
346 $reports = _parse_html($ctarget,%Opt);
347 } else {
348 $reports = _parse_yaml($ctarget,%Opt);
350 return unless $reports;
351 for my $report (@$reports) {
352 eval {parse_single_report($report, \%dumpvars, %Opt)};
353 if ($@) {
354 if (ref $@) {
355 if ($@->{severity}) {
356 die $@->{text};
357 } else {
358 warn $@->{text};
360 } else {
361 die $@;
364 last if $Signal;
366 if ($Opt{dumpvars}) {
367 require YAML::Syck;
368 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
369 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
370 print $fh YAML::Syck::Dump(\%dumpvars);
371 close $fh or die "Could not close '$dumpfile': $!"
373 if ($Opt{solve}) {
374 solve(\%dumpvars,%Opt);
378 =head2 $extract = parse_report($target,$dumpvars,%Opt)
380 Reads one report. $target is the local filename to read. $dumpvars is
381 a hashref which gets filled with descriptive stats about
382 PASS/FAIL/etc. %Opt are the options as described in the
383 C<ctgetreports> manpage. $extract is a hashref containing the found
384 variables.
386 Note: this parsing is a bit dirty but as it seems good enough I'm not
387 inclined to change it. We parse HTML with regexps only, not an HTML
388 parser. Only the entities are decoded.
390 Update around version 0.0.17: switching to nntp now but keeping the
391 parser for HTML around to read old local copies.
393 Update around 0.0.18: In %Options you can use
395 article => $some_full_article_as_scalar
397 to use this function to parse one full article as text. When this is
398 given, the argument $target is not read, but its basename is taken to
399 be the id of the article. (OMG, hackers!)
401 =cut
402 sub parse_report {
403 my($target,$dumpvars,%Opt) = @_;
404 our @q;
405 my $id = basename($target);
406 my($ok,$about);
408 my(%extract);
410 my($report,$isHTML);
411 if ($report = $Opt{article}) {
412 $isHTML = $report =~ /^</;
413 undef $target;
415 if ($target) {
416 open my $fh, $target or die "Could not open '$target': $!";
417 local $/;
418 my $raw_report = <$fh>;
419 $isHTML = $raw_report =~ /^</;
420 $report = $isHTML ? decode_entities($raw_report) : $raw_report;
421 close $fh;
423 my @qr = map /^qr:(.+)/, @{$Opt{q}};
424 if ($Opt{raw} || @qr) {
425 for my $qr (@qr) {
426 my $cqr = eval "qr{$qr}";
427 die "Could not compile regular expression '$qr': $@" if $@;
428 my(@matches) = $report =~ $cqr;
429 my $v;
430 if (@matches) {
431 if (@matches==1) {
432 $v = $matches[0];
433 } else {
434 $v = join "", map {"($_)"} @matches;
436 } else {
437 $v = "";
439 $extract{"qr:$qr"} = $v;
443 my $report_writer;
444 my $moduleunpack = {};
445 my $expect_prereq = 0;
446 my $expect_toolchain = 0;
447 my $expecting_toolchain_soon = 0;
449 my $in_summary = 0;
450 my $in_prg_output = 0;
451 my $in_env_context = 0;
453 my $current_headline;
454 my @previous_line = ""; # so we can neutralize line breaks
455 my @rlines = split /\r?\n/, $report;
456 LINE: for (@rlines) {
457 next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject: (\S+)\s+(\S+)/);
458 $ok = $1;
459 $about = $2;
460 $extract{"meta:ok"} = $ok;
461 $extract{"meta:about"} = $about;
462 last;
464 LINE: while (@rlines) {
465 $_ = shift @rlines;
466 while (/!$/) {
467 my $followupline = shift @rlines;
468 $followupline =~ s/^\s+//; # remo leading space
469 $_ .= $followupline;
471 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
472 $current_headline = $previous_line[-1];
473 if ($current_headline =~ /PROGRAM OUTPUT/) {
474 $in_prg_output = 1;
475 } else {
476 $in_prg_output = 0;
478 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
479 $in_env_context = 1;
480 } else {
481 $in_env_context = 0;
484 if ($extract{"meta:perl"}) {
485 if ( $in_summary
486 and !$extract{"conf:git_commit_id"}
487 and /Commit id: ([[:xdigit:]]+)/) {
488 $extract{"conf:git_commit_id"} = $1;
490 } else {
491 my $p5;
492 if (0) {
493 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
494 $p5 = $1;
495 $in_summary = 1;
496 $in_env_context = 0;
498 if ($p5) {
499 my($r,$v,$s,$p);
500 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
501 $r =~ s/\.0//; # 5.0 6 2!
502 $extract{"meta:perl"} = "$r.$v.$s\@$p";
503 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
504 $r =~ s/\.0//;
505 $extract{"meta:perl"} = "$r.$v.$s";
506 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
507 $r =~ s/\.0//;
508 $extract{"meta:perl"} = "$r.$v.$s";
509 } else {
510 $extract{"meta:perl"} = $p5;
514 unless ($extract{"meta:from"}) {
515 if (0) {
516 } elsif ($isHTML ?
517 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
518 m|^From: (.+)|
520 $extract{"meta:from"} = $1;
522 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
524 unless ($extract{"meta:date"}) {
525 if (0) {
526 } elsif ($isHTML ?
527 m|<div class="h_name">Date:</div> (.+?)<br/>| :
528 m|^Date: (.+)|
530 my $date = $1;
531 my($dt);
532 if ($isHTML) {
533 my $p;
534 $p = DateTime::Format::Strptime->new(
535 locale => "en",
536 time_zone => "UTC",
537 # April 13, 2005 23:50
538 pattern => "%b %d, %Y %R",
540 $dt = $p->parse_datetime($date);
541 } else {
542 # Sun, 28 Sep 2008 12:23:12 +0100 # but was not consistent
543 # pattern => "%a, %d %b %Y %T %z",
544 $dt = DateTime::Format::DateParse->parse_datetime($date);
546 unless ($dt) {
547 warn "Could not parse date[$date], setting to epoch 0";
548 $dt = DateTime->from_epoch( epoch => 0 );
550 $extract{"meta:date"} = $dt->datetime;
552 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
554 unless ($extract{"meta:writer"}) {
555 for ("$previous_line[-1] $_") {
556 if (0) {
557 } elsif (/CPANPLUS, version (\S+)/) {
558 $extract{"meta:writer"} = "CPANPLUS $1";
559 } elsif (/created (?:automatically )?by (\S+)/) {
560 $extract{"meta:writer"} = $1;
561 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
562 $extract{"meta:writer"} = "$1 $2";
564 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
567 if ($in_summary) {
568 # we do that first three lines a bit too often
569 my $qr = $Opt{dumpvars} || "";
570 $qr = qr/$qr/ if $qr;
571 unless (@q) {
572 @q = @{$Opt{q}||[]};
573 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
576 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
578 if (/^\s*$/ || m|</pre>|) {
579 # if not html, we have reached the end now
580 $in_summary = 0;
581 } else {
582 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
583 while (my($k,$v) = each %kv) {
584 my $ck = "conf:$k";
585 $ck =~ s/\s+$//;
586 $v =~ s/,$//;
587 if ($v =~ /^'(.*)'$/) {
588 $v = $1;
590 $v =~ s/^\s+//;
591 $v =~ s/\s+$//;
592 if ($qr && $ck =~ $qr) {
593 $extract{$ck} = $v;
594 } elsif ($conf_vars{$ck}) {
595 $extract{$ck} = $v;
600 if ($in_prg_output) {
601 unless ($extract{"meta:output_from"}) {
602 if (/Output from (.+):$/) {
603 $extract{"meta:output_from"} = $1
607 if ($in_env_context) {
608 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
609 $extract{"env:$1"} = $2;
612 push @previous_line, $_;
613 if ($expect_prereq || $expect_toolchain) {
614 if (exists $moduleunpack->{type}) {
615 my($module,$v,$needwant);
616 # type 1 and 2 are about prereqs, type three about toolchain
617 if ($moduleunpack->{type} == 1) {
618 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
619 next LINE if $@;
620 if ($leader =~ /^-/) {
621 $moduleunpack = {};
622 $expect_prereq = 0;
623 next LINE;
624 } elsif ($leader =~ /^(
625 buil # build_requires:
626 )/x) {
627 next LINE;
628 } elsif ($module =~ /^(
629 - # line drawing
630 )/x) {
631 next LINE;
633 } elsif ($moduleunpack->{type} == 2) {
634 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
635 next LINE if $@;
636 if ($leader =~ /^\*/) {
637 $moduleunpack = {};
638 $expect_prereq = 0;
639 next LINE;
640 } elsif ($v =~ /\s/) {
641 ($module,$v) = split " ", $_;
643 } elsif ($moduleunpack->{type} == 3) {
644 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
645 next LINE if $@;
646 if (!$module) {
647 $moduleunpack = {};
648 $expect_toolchain = 0;
649 next LINE;
650 } elsif ($module =~ /^-/) {
651 next LINE;
654 $module =~ s/\s+$//;
655 if ($module) {
656 $v =~ s/^\s+//;
657 $v =~ s/\s+$//;
658 if ($v eq "Have") {
659 next LINE;
661 $extract{"mod:$module"} = $v;
662 if ($needwant) {
663 $needwant =~ s/^\s+//;
664 $needwant =~ s/\s+$//;
665 $extract{"prereq:$module"} = $needwant;
669 if (/(\s+)(Module\s+)(Need\s+)Have/) {
670 $in_env_context = 0;
671 $moduleunpack = {
672 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
673 type => 1,
675 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
676 $in_env_context = 0;
677 my $adjust_1 = 0;
678 my $adjust_2 = -length($4);
679 my $adjust_3 = length($4);
680 # two pass would be required to see where the
681 # columns really are. Or could we get away with split?
682 $moduleunpack = {
683 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
684 type => 2,
688 if (/PREREQUISITES|Prerequisite modules loaded/) {
689 $in_env_context = 0;
690 $expect_prereq=1;
692 if ($expecting_toolchain_soon) {
693 if (/(\s+)(Module\s+) Have/) {
694 $in_env_context = 0;
695 $expect_toolchain=1;
696 $expecting_toolchain_soon=0;
697 $moduleunpack = {
698 tpl => 'a'.length($1).'a'.length($2).'a*',
699 type => 3,
703 if (/toolchain versions installed/) {
704 $in_env_context = 0;
705 $expecting_toolchain_soon=1;
707 } # LINE
708 if ($Opt{solve}) {
709 $extract{id} = $id;
710 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
711 $extract{"conf:archame+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
713 my $data = $dumpvars->{"==DATA=="} ||= [];
714 push @$data, \%extract;
716 # ---- %extract finished ----
717 my $diag = "";
718 if (my $qr = $Opt{dumpvars}) {
719 $qr = qr/$qr/;
720 while (my($k,$v) = each %extract) {
721 if ($k =~ $qr) {
722 $dumpvars->{$k}{$v}{$ok}++;
726 for my $want (@q) {
727 my $have = $extract{$want} || "";
728 $diag .= " $want\[$have]";
730 printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
731 if ($Opt{raw}) {
732 $report =~ s/\s+\z//;
733 print STDERR $report, "\n================\n" unless $Opt{quiet};
735 if ($Opt{interactive}) {
736 require IO::Prompt;
737 local @ARGV;
738 local $ARGV;
739 my $ans = IO::Prompt::prompt
741 -p => "View $id? [onechar: ynq] ",
742 -d => "y",
743 -u => qr/[ynq]/,
744 -onechar,
746 print STDERR "\n" unless $Opt{quiet};
747 if ($ans eq "y") {
748 open my $ifh, "<", $target or die "Could not open $target: $!";
749 $Opt{pager} ||= "less";
750 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
751 local $/;
752 print {$lfh} <$ifh>;
753 close $ifh or die "Could not close $target: $!";
754 close $lfh or die "Could not close pager: $!"
755 } elsif ($ans eq "q") {
756 $Signal++;
757 return;
760 return \%extract;
763 =head2 solve
765 Feeds a couple of potentially interesting data to
766 Statistics::Regression and sorts the result by R^2 descending. Do not
767 confuse this with a prove, rather take it as a useful hint. It can
768 save you minutes of staring at data and provide a quick overview where
769 one should look closer. Displays the N top candidates, where N
770 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
771 Regressions results with an R^2 of 1.00 are displayed in any case.
773 The function is called when the option C<-solve> is give on the
774 commandline. Several extra config variables are calculated, see source
775 code for details.
777 =cut
779 my %never_solve_on = map {($_ => 1)}
781 "conf:ccflags",
782 "conf:config_args",
783 "conf:cppflags",
784 "conf:lddlflags",
785 "conf:uname",
786 "env:PATH",
787 "env:PERL5LIB",
788 "env:PERL5OPT",
789 'env:$^X',
790 'env:$EGID',
791 'env:$GID',
792 'env:$UID/$EUID',
793 'env:PERL5_CPANPLUS_IS_RUNNING',
794 'env:PERL5_CPAN_IS_RUNNING',
795 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
796 'meta:ok',
798 my %normalize_numeric =
800 id => sub { return shift },
801 'meta:date' => sub {
802 my($Y,$M,$D,$h,$m,$s) = shift =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
803 Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
806 my %normalize_value =
808 'meta:perl' => sub {
809 my($perlatpatchlevel) = shift;
810 my $perl = $perlatpatchlevel;
811 $perl =~ s/\@.*//;
812 $perl;
815 sub solve {
816 my($V,%Opt) = @_;
817 require Statistics::Regression;
818 my @regression;
819 my $ycb;
820 if (my $ycbbody = $Opt{ycb}) {
821 $ycb = eval('sub {'.$ycbbody.'}');
822 die if $@;
823 } else {
824 $ycb = sub {
825 my $rec = shift;
826 my $y;
827 if ($rec->{"meta:ok"} eq "PASS") {
828 $y = 1;
829 } elsif ($rec->{"meta:ok"} eq "FAIL") {
830 $y = 0;
832 return $y
835 VAR: for my $variable (sort keys %$V) {
836 next if $variable eq "==DATA==";
837 if ($never_solve_on{$variable}){
838 warn "Skipping '$variable'\n" unless $Opt{quiet};
839 next VAR;
841 my $value_distribution = $V->{$variable};
842 my $keys = keys %$value_distribution;
843 my @X = qw(const);
844 if ($normalize_numeric{$variable}) {
845 push @X, "n_$variable";
846 } else {
847 my %seen = ();
848 for my $value (sort keys %$value_distribution) {
849 my $pf = $value_distribution->{$value};
850 $pf->{PASS} ||= 0;
851 $pf->{FAIL} ||= 0;
852 if ($pf->{PASS} || $pf->{FAIL}) {
853 my $Xele = sprintf "eq_%s",
855 $normalize_value{$variable} ?
856 $normalize_value{$variable}->($value) :
857 $value
859 push @X, $Xele unless $seen{$Xele}++;
862 if (
863 $pf->{PASS} xor $pf->{FAIL}
865 my $vl = 40;
866 substr($value,$vl) = "..." if length $value > 3+$vl;
867 my $poor_mans_freehand_estimation = 0;
868 if ($poor_mans_freehand_estimation) {
869 warn sprintf
871 "%4d %4d %-23s | %s\n",
872 $pf->{PASS},
873 $pf->{FAIL},
874 $variable,
875 $value,
881 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
882 next VAR unless @X > 1;
883 my %regdata =
885 X => \@X,
886 data => [],
888 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
889 my $y = $ycb->($rec);
890 next RECORD unless defined $y;
891 my %obs;
892 $obs{Y} = $y;
893 @obs{@X} = (0) x @X;
894 $obs{const} = 1;
895 for my $x (@X) {
896 if ($x =~ /^eq_(.+)/) {
897 my $read_v = $1;
898 if (exists $rec->{$variable}
899 && defined $rec->{$variable}
901 my $use_v = (
902 $normalize_value{$variable} ?
903 $normalize_value{$variable}->($rec->{$variable}) :
904 $rec->{$variable}
906 if ($use_v eq $read_v) {
907 $obs{$x} = 1;
910 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
911 } elsif ($x =~ /^n_(.+)/) {
912 my $v = $1;
913 $obs{$x} = $normalize_numeric{$v}->($rec->{$v});
916 push @{$regdata{data}}, \%obs;
918 _run_regression ($variable, \%regdata, \@regression, \%Opt);
920 my $top = min ($Opt{solvetop} || 3, scalar @regression);
921 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
922 $top = $max_rsq if $max_rsq && $max_rsq > $top;
923 my $score = 0;
924 printf
926 "State after regression testing: %d results, showing top %d\n\n",
927 scalar @regression,
928 $top,
930 for my $reg (sort {
931 $b->rsq <=> $a->rsq
933 $a->k <=> $b->k
934 } @regression) {
935 printf "(%d)\n", ++$score;
936 eval { $reg->print; };
937 if ($@) {
938 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
940 last if --$top <= 0;
945 sub _run_regression {
946 my($variable,$regdata,$regression,$opt) = @_;
947 my @X = @{$regdata->{X}};
948 while (@X > 1) {
949 my $reg = Statistics::Regression->new($variable,\@X);
950 for my $obs (@{$regdata->{data}}) {
951 my $y = delete $obs->{Y};
952 $reg->include($y, $obs);
953 $obs->{Y} = $y;
955 eval {$reg->theta;
956 my @e = $reg->standarderrors;
957 die "found standarderrors == 0" if grep { 0 == $_ } @e;
958 $reg->rsq;};
959 if ($@) {
960 if ($opt->{verbose} && $opt->{verbose}>=2) {
961 require YAML::Syck;
962 warn YAML::Syck::Dump
963 ({error=>"could not determine some regression parameters",
964 variable=>$variable,
965 k=>$reg->k,
966 n=>$reg->n,
967 X=>$regdata->{"X"},
968 errorstr => $@,
971 # reduce k in case that linear dependencies disturbed us
972 splice @X, 1, 1;
973 } else {
974 # $reg->print;
975 push @$regression, $reg;
976 return;
981 =head1 AUTHOR
983 Andreas König
985 =head1 BUGS
987 Please report any bugs or feature requests through the web
988 interface at
989 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
990 I will be notified, and then you'll automatically be notified of
991 progress on your bug as I make changes.
993 =head1 SUPPORT
995 You can find documentation for this module with the perldoc command.
997 perldoc CPAN::Testers::ParseReport
1000 You can also look for information at:
1002 =over 4
1004 =item * RT: CPAN's request tracker
1006 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1008 =item * AnnoCPAN: Annotated CPAN documentation
1010 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1012 =item * CPAN Ratings
1014 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1016 =item * Search CPAN
1018 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1020 =back
1023 =head1 ACKNOWLEDGEMENTS
1025 Thanks to RJBS for module-starter.
1027 =head1 COPYRIGHT & LICENSE
1029 Copyright 2008 Andreas König.
1031 This program is free software; you can redistribute it and/or modify it
1032 under the same terms as Perl itself.
1035 =cut
1037 1; # End of CPAN::Testers::ParseReport