generate a conf:git_commit_id if possible
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
blobb997361841b9b83b2af125c399ca7e105c4a2683
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use DateTime::Format::Strptime;
7 use DateTime::Format::DateParse;
8 use File::Basename qw(basename);
9 use File::Path qw(mkpath);
10 use HTML::Entities qw(decode_entities);
11 use LWP::UserAgent;
12 use List::Util qw(max min sum);
13 use Net::NNTP ();
14 use Time::Local ();
15 use XML::LibXML;
16 use XML::LibXML::XPathContext;
18 our $default_ctformat = "yaml";
19 our $default_transport = "nntp";
20 our $default_cturl = "http://www.cpantesters.org/show";
21 our $Signal = 0;
23 =encoding utf-8
25 =head1 NAME
27 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
29 =cut
31 use version; our $VERSION = qv('0.1.0');
33 =head1 SYNOPSIS
35 The documentation in here is normally not needed because the code is
36 meant to be run from a standalone program, L<ctgetreports>.
38 ctgetreports --q mod:Moose Devel-Events
40 =head1 DESCRIPTION
42 This is the core module for CPAN::Testers::ParseReport. If you're not
43 looking to extend or alter the behaviour of this module, you probably
44 want to look at L<ctgetreports> instead.
46 =head1 OPTIONS
48 Are described in the L<ctgetreports> manpage and are passed through to
49 the functions unaltered.
51 =head1 FUNCTIONS
53 =head2 parse_distro($distro,%options)
55 reads the cpantesters HTML page or the YAML file for the distro and
56 loops through the reports for the specified or most recent version of
57 that distro found in these data.
59 =head2 parse_single_report($report,$dumpvars,%options)
61 mirrors and reads this report. $report is of the form
63 { id => number }
65 $dumpvar is a hashreference that gets filled with data.
67 =cut
70 my $ua;
71 sub _ua {
72 return $ua if $ua;
73 $ua = LWP::UserAgent->new
75 keep_alive => 1,
76 env_proxy => 1,
78 $ua->parse_head(0);
79 $ua;
84 my $nntp;
85 sub _nntp {
86 return $nntp if $nntp;
87 $nntp = Net::NNTP->new("nntp.perl.org");
88 $nntp->group("perl.cpan.testers");
89 return $nntp;
94 my $xp;
95 sub _xp {
96 return $xp if $xp;
97 $xp = XML::LibXML->new;
98 $xp->keep_blanks(0);
99 $xp->clean_namespaces(1);
100 my $catalog = __FILE__;
101 $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
102 $xp->load_catalog($catalog);
103 return $xp;
107 sub _download_overview {
108 my($cts_dir, $distro, %Opt) = @_;
109 my $format = $Opt{ctformat} ||= $default_ctformat;
110 my $cturl = $Opt{cturl} ||= $default_cturl;
111 my $ctarget = "$cts_dir/$distro.$format";
112 my $cheaders = "$cts_dir/$distro.headers";
113 if ($Opt{local}) {
114 unless (-e $ctarget) {
115 die "Alert: No local file '$ctarget' found, cannot continue\n";
117 } else {
118 if (! -e $ctarget or -M $ctarget > .25) {
119 if (-e $ctarget && $Opt{verbose}) {
120 my(@stat) = stat _;
121 my $timestamp = gmtime $stat[9];
122 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
124 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
125 my $uri = "$cturl/$distro.$format";
126 my $resp = _ua->mirror($uri,$ctarget);
127 if ($resp->is_success) {
128 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
129 open my $fh, ">", $cheaders or die;
130 for ($resp->headers->as_string) {
131 print $fh $_;
132 if ($Opt{verbose} && $Opt{verbose}>1) {
133 print STDERR $_ unless $Opt{quiet};
136 } elsif (304 == $resp->code) {
137 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
138 my $atime = my $mtime = time;
139 utime $atime, $mtime, $cheaders;
140 } else {
141 die sprintf
143 "No success downloading %s: %s",
144 $uri,
145 $resp->status_line,
150 return $ctarget;
153 sub _parse_html {
154 my($ctarget, %Opt) = @_;
155 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
156 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
157 if ($preprocesswithtreebuilder) {
158 require HTML::TreeBuilder;
159 my $tree = HTML::TreeBuilder->new;
160 $tree->implicit_tags(1);
161 $tree->p_strict(1);
162 $tree->ignore_ignorable_whitespace(0);
163 $tree->parse_content($content);
164 $tree->eof;
165 $content = $tree->as_XML;
167 my $parser = _xp();
168 my $doc = eval { $parser->parse_string($content) };
169 my $err = $@;
170 unless ($doc) {
171 my $distro = basename $ctarget;
172 die sprintf "Error while parsing %s\: %s", $distro, $err;
174 my $xc = XML::LibXML::XPathContext->new($doc);
175 my $nsu = $doc->documentElement->namespaceURI;
176 $xc->registerNs('x', $nsu) if $nsu;
177 my($selected_release_ul,$selected_release_distrov,$excuse_string);
178 my($cparentdiv)
179 = $nsu ?
180 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
181 $doc->findnodes("/html/body/div[\@id = 'doc']");
182 my(@releasedivs) = $nsu ?
183 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
184 $cparentdiv->findnodes("//div[h2 and ul]");
185 my $releasediv;
186 if ($Opt{vdistro}) {
187 $excuse_string = "selected distro '$Opt{vdistro}'";
188 my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
189 $fallbacktoversion = 0 unless defined $fallbacktoversion;
190 RELEASE: for my $i (0..$#releasedivs) {
191 my $picked = "";
192 my($x) = $nsu ?
193 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
194 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
195 if ($x) {
196 if ($x eq $Opt{vdistro}) {
197 $releasediv = $i;
198 $picked = " (picked)";
200 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
201 } else {
202 ($x) = $nsu ?
203 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
204 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
205 if ($x eq $fallbacktoversion) {
206 $releasediv = $i;
207 $picked = " (picked)";
209 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
212 } else {
213 $excuse_string = "any distro";
215 unless (defined $releasediv) {
216 $releasediv = 0;
218 # using a[1] because a[2] is missing on the first entry
219 ($selected_release_distrov) = $nsu ?
220 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
221 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
222 ($selected_release_ul) = $nsu ?
223 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
224 $releasedivs[$releasediv]->findnodes("ul");
225 unless (defined $selected_release_distrov) {
226 warn "Warning: could not find $excuse_string in '$ctarget'";
227 return;
229 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
230 my($id);
231 my @all;
232 for my $test ($nsu ?
233 $xc->findnodes("x:li",$selected_release_ul) :
234 $selected_release_ul->findnodes("li")) {
235 $id = $nsu ?
236 $xc->findvalue("x:a[1]/text()",$test) :
237 $test->findvalue("a[1]/text()");
238 push @all, {id=>$id};
239 return if $Signal;
241 return \@all;
244 sub _parse_yaml {
245 my($ctarget, %Opt) = @_;
246 require YAML::Syck;
247 my $arr = YAML::Syck::LoadFile($ctarget);
248 my($selected_release_ul,$selected_release_distrov,$excuse_string);
249 if ($Opt{vdistro}) {
250 $excuse_string = "selected distro '$Opt{vdistro}'";
251 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
252 ($selected_release_distrov) = $arr->[0]{distversion};
253 } else {
254 $excuse_string = "any distro";
255 my $last_addition;
256 my %seen;
257 for my $report (@$arr) {
258 unless ($seen{$report->{distversion}}++) {
259 $last_addition = $report->{distversion};
262 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
263 ($selected_release_distrov) = $last_addition;
265 unless ($selected_release_distrov) {
266 warn "Warning: could not find $excuse_string in '$ctarget'";
267 return;
269 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
270 my @all;
271 for my $test (@$arr) {
272 my $id = $test->{id};
273 push @all, {id=>$id};
274 return if $Signal;
276 @all = sort { $b->{id} <=> $a->{id} } @all;
277 return \@all;
280 sub parse_single_report {
281 my($report, $dumpvars, %Opt) = @_;
282 my($id) = $report->{id};
283 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
284 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
285 mkpath $nnt_dir;
286 my $target = "$nnt_dir/$id";
287 if ($Opt{local}) {
288 unless (-e $target) {
289 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
291 } else {
292 if (! -e $target) {
293 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
294 $Opt{transport} ||= $default_transport;
295 if ($Opt{transport} eq "nntp") {
296 my $article = _nntp->article($id);
297 unless ($article) {
298 die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
300 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
301 print $fh @$article;
302 } elsif ($Opt{transport} eq "http") {
303 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
304 if ($resp->is_success) {
305 if ($Opt{verbose}) {
306 my(@stat) = stat $target;
307 my $timestamp = gmtime $stat[9];
308 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
309 if ($Opt{verbose} > 1) {
310 print STDERR $resp->headers->as_string unless $Opt{quiet};
313 my $headers = "$target.headers";
314 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
315 print $fh $resp->headers->as_string;
316 } else {
317 die {severity=>0,
318 text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
320 } else {
321 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
325 parse_report($target, $dumpvars, %Opt);
328 sub parse_distro {
329 my($distro,%Opt) = @_;
330 my %dumpvars;
331 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
332 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
333 mkpath $cts_dir;
334 if ($Opt{solve}) {
335 require Statistics::Regression;
336 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
338 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
339 $Opt{vdistro} = $distro;
340 $distro = $1;
342 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
343 my $reports;
344 $Opt{ctformat} ||= $default_ctformat;
345 if ($Opt{ctformat} eq "html") {
346 $reports = _parse_html($ctarget,%Opt);
347 } else {
348 $reports = _parse_yaml($ctarget,%Opt);
350 return unless $reports;
351 for my $report (@$reports) {
352 eval {parse_single_report($report, \%dumpvars, %Opt)};
353 if ($@) {
354 if (ref $@) {
355 if ($@->{severity}) {
356 die $@->{text};
357 } else {
358 warn $@->{text};
360 } else {
361 die $@;
364 last if $Signal;
366 if ($Opt{dumpvars}) {
367 require YAML::Syck;
368 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
369 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
370 print $fh YAML::Syck::Dump(\%dumpvars);
371 close $fh or die "Could not close '$dumpfile': $!"
373 if ($Opt{solve}) {
374 solve(\%dumpvars,%Opt);
378 =head2 $extract = parse_report($target,$dumpvars,%Opt)
380 Reads one report. $target is the local filename to read. $dumpvars is
381 a hashref which gets filled with descriptive stats about
382 PASS/FAIL/etc. %Opt are the options as described in the
383 C<ctgetreports> manpage. $extract is a hashref containing the found
384 variables.
386 Note: this parsing is a bit dirty but as it seems good enough I'm not
387 inclined to change it. We parse HTML with regexps only, not an HTML
388 parser. Only the entities are decoded.
390 Update around version 0.0.17: switching to nntp now but keeping the
391 parser for HTML around to read old local copies.
393 Update around 0.0.18: In %Options you can use
395 article => $some_full_article_as_scalar
397 to use this function to parse one full article as text. When this is
398 given, the argument $target is not read, but its basename is taken to
399 be the id of the article. (OMG, hackers!)
401 =cut
402 sub parse_report {
403 my($target,$dumpvars,%Opt) = @_;
404 our @q;
405 my $id = basename($target);
406 my($ok,$about);
408 my(%extract);
410 my($report,$isHTML);
411 if ($report = $Opt{article}) {
412 $isHTML = $report =~ /^</;
413 undef $target;
415 if ($target) {
416 open my $fh, $target or die "Could not open '$target': $!";
417 local $/;
418 my $raw_report = <$fh>;
419 $isHTML = $raw_report =~ /^</;
420 $report = $isHTML ? decode_entities($raw_report) : $raw_report;
421 close $fh;
423 my @qr = map /^qr:(.+)/, @{$Opt{q}};
424 if ($Opt{raw} || @qr) {
425 for my $qr (@qr) {
426 my $cqr = eval "qr{$qr}";
427 die "Could not compile regular expression '$qr': $@" if $@;
428 my(@matches) = $report =~ $cqr;
429 my $v;
430 if (@matches) {
431 if (@matches==1) {
432 $v = $matches[0];
433 } else {
434 $v = join "", map {"($_)"} @matches;
436 } else {
437 $v = "";
439 $extract{"qr:$qr"} = $v;
443 my $report_writer;
444 my $moduleunpack = {};
445 my $expect_prereq = 0;
446 my $expect_toolchain = 0;
447 my $expecting_toolchain_soon = 0;
449 my $in_summary = 0;
450 my $in_prg_output = 0;
451 my $in_env_context = 0;
453 my $current_headline;
454 my @previous_line = ""; # so we can neutralize line breaks
455 my @rlines = split /\r?\n/, $report;
456 LINE: for (@rlines) {
457 next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject: (\S+)\s+(\S+)/);
458 $ok = $1;
459 $about = $2;
460 $extract{"meta:ok"} = $ok;
461 $extract{"meta:about"} = $about;
462 last;
464 LINE: while (@rlines) {
465 $_ = shift @rlines;
466 while (/!$/) {
467 my $followupline = shift @rlines;
468 $followupline =~ s/^\s+//; # remo leading space
469 $_ .= $followupline;
471 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
472 $current_headline = $previous_line[-1];
473 if ($current_headline =~ /PROGRAM OUTPUT/) {
474 $in_prg_output = 1;
475 } else {
476 $in_prg_output = 0;
478 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
479 $in_env_context = 1;
480 } else {
481 $in_env_context = 0;
484 if ($extract{"meta:perl"}) {
485 if ( $in_summary
486 and !$extract{"conf:git_commit_id"}
487 and /Commit id: ([[:xdigit:]]+)/) {
488 $extract{"conf:git_commit_id"} = $1;
490 } else {
491 my $p5;
492 if (0) {
493 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
494 $p5 = $1;
495 $in_summary = 1;
496 $in_env_context = 0;
498 if ($p5) {
499 my($r,$v,$s,$p);
500 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
501 $r =~ s/\.0//; # 5.0 6 2!
502 $extract{"meta:perl"} = "$r.$v.$s\@$p";
503 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
504 $r =~ s/\.0//;
505 $extract{"meta:perl"} = "$r.$v.$s";
506 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
507 $r =~ s/\.0//;
508 $extract{"meta:perl"} = "$r.$v.$s";
509 } else {
510 $extract{"meta:perl"} = $p5;
514 unless ($extract{"meta:from"}) {
515 if (0) {
516 } elsif ($isHTML ?
517 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
518 m|^From: (.+)|
520 $extract{"meta:from"} = $1;
522 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
524 unless ($extract{"meta:date"}) {
525 if (0) {
526 } elsif ($isHTML ?
527 m|<div class="h_name">Date:</div> (.+?)<br/>| :
528 m|^Date: (.+)|
530 my $date = $1;
531 my($dt);
532 if ($isHTML) {
533 my $p;
534 $p = DateTime::Format::Strptime->new(
535 locale => "en",
536 time_zone => "UTC",
537 # April 13, 2005 23:50
538 pattern => "%b %d, %Y %R",
540 $dt = $p->parse_datetime($date);
541 } else {
542 # Sun, 28 Sep 2008 12:23:12 +0100 # but was not consistent
543 # pattern => "%a, %d %b %Y %T %z",
544 $dt = DateTime::Format::DateParse->parse_datetime($date);
546 unless ($dt) {
547 warn "Could not parse date[$date], setting to epoch 0";
548 $dt = DateTime->from_epoch( epoch => 0 );
550 $extract{"meta:date"} = $dt->datetime;
552 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
554 unless ($extract{"meta:writer"}) {
555 for ("$previous_line[-1] $_") {
556 if (0) {
557 } elsif (/CPANPLUS, version (\S+)/) {
558 $extract{"meta:writer"} = "CPANPLUS $1";
559 } elsif (/created (?:automatically )?by (\S+)/) {
560 $extract{"meta:writer"} = $1;
561 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
562 $extract{"meta:writer"} = "$1 $2";
564 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
567 if ($in_summary) {
568 # we do that first three lines a bit too often
569 my $qr = $Opt{dumpvars} || "";
570 $qr = qr/$qr/ if $qr;
571 unless (@q) {
572 @q = @{$Opt{q}||[]};
573 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
576 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
578 if (/^\s*$/ || m|</pre>|) {
579 # if not html, we have reached the end now
580 $in_summary = 0;
581 } else {
582 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
583 while (my($k,$v) = each %kv) {
584 my $ck = "conf:$k";
585 $ck =~ s/\s+$//;
586 $v =~ s/,$//;
587 if ($v =~ /^'(.*)'$/) {
588 $v = $1;
590 $v =~ s/^\s+//;
591 $v =~ s/\s+$//;
592 if ($qr && $ck =~ $qr) {
593 $extract{$ck} = $v;
594 } elsif ($conf_vars{$ck}) {
595 $extract{$ck} = $v;
600 if ($in_prg_output) {
601 unless ($extract{"meta:output_from"}) {
602 if (/Output from (.+):$/) {
603 $extract{"meta:output_from"} = $1
607 if ($in_env_context) {
608 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
609 $extract{"env:$1"} = $2;
612 push @previous_line, $_;
613 if ($expect_prereq || $expect_toolchain) {
614 if (exists $moduleunpack->{type}) {
615 my($module,$v);
616 if ($moduleunpack->{type} == 1) {
617 (my $leader,$module,undef,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
618 next LINE if $@;
619 if ($leader =~ /^-/) {
620 $moduleunpack = {};
621 $expect_prereq = 0;
622 next LINE;
623 } elsif ($leader =~ /^(
624 buil # build_requires:
625 )/x) {
626 next LINE;
627 } elsif ($module =~ /^(
628 - # line drawing
629 )/x) {
630 next LINE;
632 } elsif ($moduleunpack->{type} == 2) {
633 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
634 next LINE if $@;
635 if ($leader =~ /^\*/) {
636 $moduleunpack = {};
637 $expect_prereq = 0;
638 next LINE;
639 } elsif ($v =~ /\s/) {
640 ($module,$v) = split " ", $_;
642 } elsif ($moduleunpack->{type} == 3) {
643 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
644 next LINE if $@;
645 if (!$module) {
646 $moduleunpack = {};
647 $expect_toolchain = 0;
648 next LINE;
649 } elsif ($module =~ /^-/) {
650 next LINE;
653 $module =~ s/\s+$//;
654 if ($module) {
655 $v =~ s/^\s+//;
656 $v =~ s/\s+$//;
657 if ($v eq "Have") {
658 next LINE;
660 $extract{"mod:$module"} = $v;
663 if (/(\s+)(Module\s+)(Need\s+)Have/) {
664 $in_env_context = 0;
665 $moduleunpack = {
666 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
667 type => 1,
669 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
670 $in_env_context = 0;
671 my $adjust_1 = 0;
672 my $adjust_2 = -length($4);
673 my $adjust_3 = length($4);
674 # two pass would be required to see where the
675 # columns really are. Or could we get away with split?
676 $moduleunpack = {
677 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3),
678 type => 2,
682 if (/PREREQUISITES|Prerequisite modules loaded/) {
683 $in_env_context = 0;
684 $expect_prereq=1;
686 if ($expecting_toolchain_soon) {
687 if (/(\s+)(Module\s+) Have/) {
688 $in_env_context = 0;
689 $expect_toolchain=1;
690 $expecting_toolchain_soon=0;
691 $moduleunpack = {
692 tpl => 'a'.length($1).'a'.length($2).'a*',
693 type => 3,
697 if (/toolchain versions installed/) {
698 $in_env_context = 0;
699 $expecting_toolchain_soon=1;
701 } # LINE
702 if ($Opt{solve}) {
703 $extract{id} = $id;
704 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
705 $extract{"conf:archame+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
707 my $data = $dumpvars->{"==DATA=="} ||= [];
708 push @$data, \%extract;
710 # ---- %extract finished ----
711 my $diag = "";
712 if (my $qr = $Opt{dumpvars}) {
713 $qr = qr/$qr/;
714 while (my($k,$v) = each %extract) {
715 if ($k =~ $qr) {
716 $dumpvars->{$k}{$v}{$ok}++;
720 for my $want (@q) {
721 my $have = $extract{$want} || "";
722 $diag .= " $want\[$have]";
724 printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
725 if ($Opt{raw}) {
726 $report =~ s/\s+\z//;
727 print STDERR $report, "\n================\n" unless $Opt{quiet};
729 if ($Opt{interactive}) {
730 require IO::Prompt;
731 local @ARGV;
732 local $ARGV;
733 my $ans = IO::Prompt::prompt
735 -p => "View $id? [onechar: ynq] ",
736 -d => "y",
737 -u => qr/[ynq]/,
738 -onechar,
740 print STDERR "\n" unless $Opt{quiet};
741 if ($ans eq "y") {
742 open my $ifh, "<", $target or die "Could not open $target: $!";
743 $Opt{pager} ||= "less";
744 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
745 local $/;
746 print {$lfh} <$ifh>;
747 close $ifh or die "Could not close $target: $!";
748 close $lfh or die "Could not close pager: $!"
749 } elsif ($ans eq "q") {
750 $Signal++;
751 return;
754 return \%extract;
757 =head2 solve
759 Feeds a couple of potentially interesting data to
760 Statistics::Regression and sorts the result by R^2 descending. Do not
761 confuse this with a prove, rather take it as a useful hint. It can
762 save you minutes of staring at data and provide a quick overview where
763 one should look closer. Displays the N top candidates, where N
764 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
765 Regressions results with an R^2 of 1.00 are displayed in any case.
767 The function is called when the option C<-solve> is give on the
768 commandline. Several extra config variables are calculated, see source
769 code for details.
771 =cut
773 my %never_solve_on = map {($_ => 1)}
775 "conf:ccflags",
776 "conf:config_args",
777 "conf:cppflags",
778 "conf:lddlflags",
779 "conf:uname",
780 "env:PATH",
781 "env:PERL5LIB",
782 "env:PERL5OPT",
783 'env:$^X',
784 'env:$EGID',
785 'env:$GID',
786 'env:$UID/$EUID',
787 'env:PERL5_CPANPLUS_IS_RUNNING',
788 'env:PERL5_CPAN_IS_RUNNING',
789 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
790 'meta:ok',
792 my %normalize_numeric =
794 id => sub { return shift },
795 'meta:date' => sub {
796 my($Y,$M,$D,$h,$m,$s) = shift =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
797 Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
800 my %normalize_value =
802 'meta:perl' => sub {
803 my($perlatpatchlevel) = shift;
804 my $perl = $perlatpatchlevel;
805 $perl =~ s/\@.*//;
806 $perl;
809 sub solve {
810 my($V,%Opt) = @_;
811 require Statistics::Regression;
812 my @regression;
813 my $ycb;
814 if (my $ycbbody = $Opt{ycb}) {
815 $ycb = eval('sub {'.$ycbbody.'}');
816 die if $@;
817 } else {
818 $ycb = sub {
819 my $rec = shift;
820 my $y;
821 if ($rec->{"meta:ok"} eq "PASS") {
822 $y = 1;
823 } elsif ($rec->{"meta:ok"} eq "FAIL") {
824 $y = 0;
826 return $y
829 VAR: for my $variable (sort keys %$V) {
830 next if $variable eq "==DATA==";
831 if ($never_solve_on{$variable}){
832 warn "Skipping '$variable'\n" unless $Opt{quiet};
833 next VAR;
835 my $value_distribution = $V->{$variable};
836 my $keys = keys %$value_distribution;
837 my @X = qw(const);
838 if ($normalize_numeric{$variable}) {
839 push @X, "n_$variable";
840 } else {
841 my %seen = ();
842 for my $value (sort keys %$value_distribution) {
843 my $pf = $value_distribution->{$value};
844 $pf->{PASS} ||= 0;
845 $pf->{FAIL} ||= 0;
846 if ($pf->{PASS} || $pf->{FAIL}) {
847 my $Xele = sprintf "eq_%s",
849 $normalize_value{$variable} ?
850 $normalize_value{$variable}->($value) :
851 $value
853 push @X, $Xele unless $seen{$Xele}++;
856 if (
857 $pf->{PASS} xor $pf->{FAIL}
859 my $vl = 40;
860 substr($value,$vl) = "..." if length $value > 3+$vl;
861 my $poor_mans_freehand_estimation = 0;
862 if ($poor_mans_freehand_estimation) {
863 warn sprintf
865 "%4d %4d %-23s | %s\n",
866 $pf->{PASS},
867 $pf->{FAIL},
868 $variable,
869 $value,
875 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
876 next VAR unless @X > 1;
877 my %regdata =
879 X => \@X,
880 data => [],
882 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
883 my $y = $ycb->($rec);
884 next RECORD unless defined $y;
885 my %obs;
886 $obs{Y} = $y;
887 @obs{@X} = (0) x @X;
888 $obs{const} = 1;
889 for my $x (@X) {
890 if ($x =~ /^eq_(.+)/) {
891 my $read_v = $1;
892 if (exists $rec->{$variable}
893 && defined $rec->{$variable}
895 my $use_v = (
896 $normalize_value{$variable} ?
897 $normalize_value{$variable}->($rec->{$variable}) :
898 $rec->{$variable}
900 if ($use_v eq $read_v) {
901 $obs{$x} = 1;
904 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
905 } elsif ($x =~ /^n_(.+)/) {
906 my $v = $1;
907 $obs{$x} = $normalize_numeric{$v}->($rec->{$v});
910 push @{$regdata{data}}, \%obs;
912 _run_regression ($variable, \%regdata, \@regression, \%Opt);
914 my $top = min ($Opt{solvetop} || 3, scalar @regression);
915 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
916 $top = $max_rsq if $max_rsq && $max_rsq > $top;
917 my $score = 0;
918 printf
920 "State after regression testing: %d results, showing top %d\n\n",
921 scalar @regression,
922 $top,
924 for my $reg (sort {
925 $b->rsq <=> $a->rsq
927 $a->k <=> $b->k
928 } @regression) {
929 printf "(%d)\n", ++$score;
930 eval { $reg->print; };
931 if ($@) {
932 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
934 last if --$top <= 0;
939 sub _run_regression {
940 my($variable,$regdata,$regression,$opt) = @_;
941 my @X = @{$regdata->{X}};
942 while (@X > 1) {
943 my $reg = Statistics::Regression->new($variable,\@X);
944 for my $obs (@{$regdata->{data}}) {
945 my $y = delete $obs->{Y};
946 $reg->include($y, $obs);
947 $obs->{Y} = $y;
949 eval {$reg->theta;
950 my @e = $reg->standarderrors;
951 die "found standarderrors == 0" if grep { 0 == $_ } @e;
952 $reg->rsq;};
953 if ($@) {
954 if ($opt->{verbose} && $opt->{verbose}>=2) {
955 require YAML::Syck;
956 warn YAML::Syck::Dump
957 ({error=>"could not determine some regression parameters",
958 variable=>$variable,
959 k=>$reg->k,
960 n=>$reg->n,
961 X=>$regdata->{"X"},
962 errorstr => $@,
965 # reduce k in case that linear dependencies disturbed us
966 splice @X, 1, 1;
967 } else {
968 # $reg->print;
969 push @$regression, $reg;
970 return;
975 =head1 AUTHOR
977 Andreas König
979 =head1 BUGS
981 Please report any bugs or feature requests through the web
982 interface at
983 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
984 I will be notified, and then you'll automatically be notified of
985 progress on your bug as I make changes.
987 =head1 SUPPORT
989 You can find documentation for this module with the perldoc command.
991 perldoc CPAN::Testers::ParseReport
994 You can also look for information at:
996 =over 4
998 =item * RT: CPAN's request tracker
1000 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1002 =item * AnnoCPAN: Annotated CPAN documentation
1004 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1006 =item * CPAN Ratings
1008 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1010 =item * Search CPAN
1012 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1014 =back
1017 =head1 ACKNOWLEDGEMENTS
1019 Thanks to RJBS for module-starter.
1021 =head1 COPYRIGHT & LICENSE
1023 Copyright 2008 Andreas König.
1025 This program is free software; you can redistribute it and/or modify it
1026 under the same terms as Perl itself.
1029 =cut
1031 1; # End of CPAN::Testers::ParseReport