1 package CPAN
::Testers
::ParseReport
;
6 use Config
::Perl
::V
();
7 use DateTime
::Format
::Strptime
;
8 use File
::Basename
qw(basename);
9 use File
::Path
qw(mkpath);
10 use HTML
::Entities
qw(decode_entities);
12 use List
::Util
qw(max min sum);
13 use MIME
::QuotedPrint
();
17 use XML
::LibXML
::XPathContext
;
19 our $default_ctformat = "yaml";
20 our $default_transport = "nntp";
21 our $default_cturl = "http://www.cpantesters.org/show";
28 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
32 use version
; our $VERSION = qv
('0.1.10');
36 The documentation in here is normally not needed because the code is
37 meant to be run from a standalone program, L<ctgetreports>.
39 ctgetreports --q mod:Moose Devel-Events
43 This is the core module for CPAN::Testers::ParseReport. If you're not
44 looking to extend or alter the behaviour of this module, you probably
45 want to look at L<ctgetreports> instead.
49 Are described in the L<ctgetreports> manpage and are passed through to
50 the functions unaltered.
54 =head2 parse_distro($distro,%options)
56 reads the cpantesters HTML page or the YAML file or the local database
57 for the distro and loops through the reports for the specified or most
58 recent version of that distro found in these data.
60 parse_distro() intentionally has no meaningful return value, different
61 options would require different ones.
63 =head2 $extract = parse_single_report($report,$dumpvars,%options)
65 mirrors and reads this report. $report is of the form
69 $dumpvar is a hashreference that gets filled with data.
71 $extract is the result of parse_report() described below.
79 $ua = LWP
::UserAgent
->new
92 return $nntp if $nntp;
93 $nntp = Net
::NNTP
->new("nntp.perl.org");
94 $nntp->group("perl.cpan.testers");
103 $xp = XML
::LibXML
->new;
105 $xp->clean_namespaces(1);
106 my $catalog = __FILE__
;
107 $catalog =~ s
|ParseReport
.pm
$|ParseReport
/catalog
|;
108 $xp->load_catalog($catalog);
113 sub _download_overview
{
114 my($cts_dir, $distro, %Opt) = @_;
115 my $format = $Opt{ctformat
} ||= $default_ctformat;
116 my $cturl = $Opt{cturl
} ||= $default_cturl;
117 my $ctarget = "$cts_dir/$distro.$format";
118 my $cheaders = "$cts_dir/$distro.headers";
120 unless (-e
$ctarget) {
121 die "Alert: No local file '$ctarget' found, cannot continue\n";
124 if (! -e
$ctarget or -M
$ctarget > .25) {
125 if (-e
$ctarget && $Opt{verbose
}) {
127 my $timestamp = gmtime $stat[9];
128 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
130 print STDERR
"Fetching $ctarget..." if $Opt{verbose
} && !$Opt{quiet
};
131 my $uri = "$cturl/$distro.$format";
132 my $resp = _ua
->mirror($uri,$ctarget);
133 if ($resp->is_success) {
134 print STDERR
"DONE\n" if $Opt{verbose
} && !$Opt{quiet
};
135 open my $fh, ">", $cheaders or die;
136 for ($resp->headers->as_string) {
138 if ($Opt{verbose
} && $Opt{verbose
}>1) {
139 print STDERR
$_ unless $Opt{quiet
};
142 } elsif (304 == $resp->code) {
143 print STDERR
"DONE (not modified)\n" if $Opt{verbose
} && !$Opt{quiet
};
144 my $atime = my $mtime = time;
145 utime $atime, $mtime, $cheaders;
149 "No success downloading %s: %s",
160 my($ctarget, %Opt) = @_;
161 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
162 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
163 if ($preprocesswithtreebuilder) {
164 require HTML
::TreeBuilder
;
165 my $tree = HTML
::TreeBuilder
->new;
166 $tree->implicit_tags(1);
168 $tree->ignore_ignorable_whitespace(0);
169 $tree->parse_content($content);
171 $content = $tree->as_XML;
174 my $doc = eval { $parser->parse_string($content) };
177 my $distro = basename
$ctarget;
178 die sprintf "Error while parsing %s\: %s", $distro, $err;
180 my $xc = XML
::LibXML
::XPathContext
->new($doc);
181 my $nsu = $doc->documentElement->namespaceURI;
182 $xc->registerNs('x', $nsu) if $nsu;
183 my($selected_release_ul,$selected_release_distrov,$excuse_string);
186 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
187 $doc->findnodes("/html/body/div[\@id = 'doc']");
188 my(@releasedivs) = $nsu ?
189 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
190 $cparentdiv->findnodes("//div[h2 and ul]");
193 $excuse_string = "selected distro '$Opt{vdistro}'";
194 my($fallbacktoversion) = $Opt{vdistro
} =~ /(\d+\..*)/;
195 $fallbacktoversion = 0 unless defined $fallbacktoversion;
196 RELEASE
: for my $i (0..$#releasedivs) {
199 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
200 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
202 if ($x eq $Opt{vdistro
}) {
204 $picked = " (picked)";
206 print STDERR
"FOUND DISTRO: $x$picked\n" unless $Opt{quiet
};
209 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
210 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
211 if ($x eq $fallbacktoversion) {
213 $picked = " (picked)";
215 print STDERR
"FOUND VERSION: $x$picked\n" unless $Opt{quiet
};
219 $excuse_string = "any distro";
221 unless (defined $releasediv) {
224 # using a[1] because a[2] is missing on the first entry
225 ($selected_release_distrov) = $nsu ?
226 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
227 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
228 ($selected_release_ul) = $nsu ?
229 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
230 $releasedivs[$releasediv]->findnodes("ul");
231 unless (defined $selected_release_distrov) {
232 warn "Warning: could not find $excuse_string in '$ctarget'";
235 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
239 $xc->findnodes("x:li",$selected_release_ul) :
240 $selected_release_ul->findnodes("li")) {
242 $xc->findvalue("x:a[1]/text()",$test) :
243 $test->findvalue("a[1]/text()");
244 push @all, {id
=>$id};
251 my($ctarget, %Opt) = @_;
253 my $arr = YAML
::Syck
::LoadFile
($ctarget);
254 my($selected_release_ul,$selected_release_distrov,$excuse_string);
256 $excuse_string = "selected distro '$Opt{vdistro}'";
257 $arr = [grep {$_->{distversion
} eq $Opt{vdistro
}} @
$arr];
258 ($selected_release_distrov) = $arr->[0]{distversion
};
260 $excuse_string = "any distro";
263 for my $report (sort { $a->{id
} <=> $b->{id
} } @
$arr) {
264 unless ($seen{$report->{distversion
}}++) {
265 $last_addition = $report->{distversion
};
268 $arr = [grep {$_->{distversion
} eq $last_addition} @
$arr];
269 ($selected_release_distrov) = $last_addition;
271 unless ($selected_release_distrov) {
272 warn "Warning: could not find $excuse_string in '$ctarget'";
275 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
277 for my $test (@
$arr) {
278 my $id = $test->{id
};
279 push @all, {id
=>$id};
282 @all = sort { $b->{id
} <=> $a->{id
} } @all;
286 sub parse_single_report
{
287 my($report, $dumpvars, %Opt) = @_;
288 my($id) = $report->{id
};
289 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
290 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
292 my $target = "$nnt_dir/$id";
294 unless (-e
$target) {
295 die {severity
=>0,text
=>"Warning: No local file '$target' found, skipping\n"};
299 print STDERR
"Fetching $target..." if $Opt{verbose
} && !$Opt{quiet
};
300 $Opt{transport
} ||= $default_transport;
301 if ($Opt{transport
} eq "nntp") {
302 my $article = _nntp
->article($id);
304 die {severity
=>0,text
=>"NNTP-Server did not return an article for id[$id]"};
306 open my $fh, ">", $target or die {severity
=>1,text
=>"Could not open >$target: $!"};
308 } elsif ($Opt{transport
} eq "http") {
309 my $resp = _ua
->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
310 if ($resp->is_success) {
312 my(@stat) = stat $target;
313 my $timestamp = gmtime $stat[9];
314 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
315 if ($Opt{verbose
} > 1) {
316 print STDERR
$resp->headers->as_string unless $Opt{quiet
};
319 my $headers = "$target.headers";
320 open my $fh, ">", $headers or die {severity
=>1,text
=>"Could not open >$headers: $!"};
321 print $fh $resp->headers->as_string;
324 text
=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
327 die {severity
=>1,text
=>"Illegal value for --transport: '$Opt{transport}'"};
331 parse_report
($target, $dumpvars, %Opt);
335 my($distro,%Opt) = @_;
337 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
338 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
341 require Statistics
::Regression
;
342 $Opt{dumpvars
} = "." unless defined $Opt{dumpvars
};
344 if (!$Opt{vdistro
} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
345 $Opt{vdistro
} = $distro;
349 if (my $ctdb = $Opt{ctdb
}) {
350 require CPAN
::WWW
::Testers
::Generator
::Database
;
351 require CPAN
::DistnameInfo
;
352 my $dbi = CPAN
::WWW
::Testers
::Generator
::Database
->new(database
=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
353 unless ($Opt{vdistro
}) {
354 my $sql = "select version from cpanstats where dist=? order by id";
355 my @rows = $dbi->get_query($sql,$distro);
357 for my $row (@rows) {
358 $newest = $row->[0] unless $seen{$row->[0]}++;
360 $Opt{vdistro
} = "$distro-$newest";
362 my $d = CPAN
::DistnameInfo
->new("FOO/$Opt{vdistro}.tgz");
364 my $version = $d->version;
365 my $sql = "select id from cpanstats where dist=? and version=? order by id desc";
366 my @rows = $dbi->get_query($sql,$dist,$version);
368 for my $row (@rows) {
370 push @all, {id
=>$id};
374 my $ctarget = _download_overview
($cts_dir, $distro, %Opt);
375 $Opt{ctformat
} ||= $default_ctformat;
376 if ($Opt{ctformat
} eq "html") {
377 $reports = _parse_html
($ctarget,%Opt);
379 $reports = _parse_yaml
($ctarget,%Opt);
382 return unless $reports;
383 for my $report (@
$reports) {
384 eval {parse_single_report
($report, \
%dumpvars, %Opt)};
387 if ($@
->{severity
}) {
398 if ($Opt{dumpvars
}) {
400 my $dumpfile = $Opt{dumpfile
} || "ctgetreports.out";
401 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
402 print $fh YAML
::Syck
::Dump
(\
%dumpvars);
403 close $fh or die "Could not close '$dumpfile': $!"
406 solve
(\
%dumpvars,%Opt);
410 =head2 $bool = _looks_like_qp($raw_report)
412 We had to acknowledge the fact that some MTAs swallow the MIME-Version
413 header while passing MIME through. So we introduce fallback heuristics
414 that try to determine if a report is written in quoted printable.
416 Note that this subroutine is internal, just documented to have the
417 internals documented.
419 The current implementation counts the number of QP escaped spaces and
426 my $count_space = () = $report =~ /=20/g;
427 return 1 if $count_space > 12;
428 my $count_equal = () = $report =~ /=3D/g;
429 return 1 if $count_equal > 12;
430 return 1 if $count_space+$count_equal > 24;
431 return 0; # waiting for a counter example
434 =head2 $extract = parse_report($target,$dumpvars,%Opt)
436 Reads one report. $target is the local filename to read. $dumpvars is
437 a hashref which gets filled with descriptive stats about
438 PASS/FAIL/etc. %Opt are the options as described in the
439 C<ctgetreports> manpage. $extract is a hashref containing the found
442 Note: this parsing is a bit dirty but as it seems good enough I'm not
443 inclined to change it. We parse HTML with regexps only, not an HTML
444 parser. Only the entities are decoded.
446 Update around version 0.0.17: switching to nntp now but keeping the
447 parser for HTML around to read old local copies.
449 Update around 0.0.18: In %Options you can use
451 article => $some_full_article_as_scalar
453 to use this function to parse one full article as text. When this is
454 given, the argument $target is not read, but its basename is taken to
455 be the id of the article. (OMG, hackers!)
459 my($target,$dumpvars,%Opt) = @_;
461 my $id = basename
($target);
462 # warn "DEBUG: id[$id]";
467 my($report,$isHTML) = _get_cooked_report
($target, \
%Opt);
468 my @qr = map /^qr:(.+)/, @
{$Opt{q
}};
469 if ($Opt{raw
} || @qr) {
471 my $cqr = eval "qr{$qr}";
472 die "Could not compile regular expression '$qr': $@" if $@
;
473 my(@matches) = $report =~ $cqr;
479 $v = join "", map {"($_)"} @matches;
484 $extract{"qr:$qr"} = $v;
489 my $moduleunpack = {};
490 my $expect_prereq = 0;
491 my $expect_toolchain = 0;
492 my $expecting_toolchain_soon = 0;
493 my $fallback_p5 = "";
496 my $in_summary_seen_platform = 0;
497 my $in_prg_output = 0;
498 my $in_env_context = 0;
500 my $current_headline;
501 my @previous_line = ""; # so we can neutralize line breaks
502 my @rlines = split /\r?\n/, $report;
503 LINE
: for (@rlines) {
504 next LINE
unless ($isHTML ?
m/<title>(\S+)\s+(\S+)/ : m/^Subject:\s*(\S+)\s+(\S+)/);
507 $extract{"meta:ok"} = $ok;
508 $extract{"meta:about"} = $about;
511 LINE
: while (@rlines) {
513 while (/!$/ and @rlines) {
514 my $followupline = shift @rlines;
515 $followupline =~ s/^\s+//; # remo leading space
518 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
519 $current_headline = $previous_line[-1];
520 if ($current_headline =~ /PROGRAM OUTPUT/) {
525 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
531 if ($extract{"meta:perl"}) {
533 and !$extract{"conf:git_commit_id"}
534 and /Commit id:\s*([[:xdigit:]]+)/) {
535 $extract{"conf:git_commit_id"} = $1;
540 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
547 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
548 $r =~ s/\.0//; # 5.0 6 2!
549 $extract{"meta:perl"} = "$r.$v.$s\@$p";
550 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
552 $extract{"meta:perl"} = "$r.$v.$s";
553 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
555 $extract{"meta:perl"} = "$r.$v.$s";
557 $extract{"meta:perl"} = $p5;
561 unless ($extract{"meta:from"}) {
564 m
|<div
class="h_name">From
:</div> <b>(.+?)</b
><br
/>| :
567 $extract{"meta:from"} = $1;
569 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
571 unless ($extract{"meta:date"}) {
574 m
|<div
class="h_name">Date
:</div> (.+?)<br/>| :
579 DATEFMT
: for my $pat ("%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
580 "%b %d, %Y %R", # July 10,...
581 "%b %d, %Y %R", # July 4,...
584 my $p = DateTime
::Format
::Strptime
->new
590 $p->parse_datetime($date)
595 warn "Could not parse date[$date], setting to epoch 0";
596 $dt = DateTime
->from_epoch( epoch
=> 0 );
598 $extract{"meta:date"} = $dt->datetime;
600 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
602 unless ($extract{"meta:writer"}) {
603 for ("$previous_line[-1] $_") {
605 } elsif (/CPANPLUS, version (\S+)/) {
606 $extract{"meta:writer"} = "CPANPLUS $1";
607 } elsif (/created (?:automatically )?by (\S+)/) {
608 $extract{"meta:writer"} = $1;
609 if (/\s+on\s+perl\s+([^,]+),/) {
612 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
613 $extract{"meta:writer"} = "$1 $2";
615 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
619 # we do that first three lines a bit too often
620 my $qr = $Opt{dumpvars
} || "";
621 $qr = qr/$qr/ if $qr;
624 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
627 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
629 if (/^\s+Platform:$/) {
630 $in_summary_seen_platform=1;
631 } elsif (/^\s*$/ || m
|</pre
>|) {
632 # if not html, we have reached the end now
633 if ($in_summary_seen_platform) {
634 # some perls have an empty line after the summary line
638 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
639 while (my($k,$v) = each %kv) {
643 if ($v =~ /^'(.*)'$/) {
648 if ($qr && $ck =~ $qr) {
650 } elsif ($conf_vars{$ck}) {
656 if ($in_prg_output) {
657 unless ($extract{"meta:output_from"}) {
658 if (/Output from (.+):$/) {
659 $extract{"meta:output_from"} = $1
663 if ($in_env_context) {
664 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
665 $extract{"env:$1"} = $2;
668 push @previous_line, $_;
669 if ($expect_prereq || $expect_toolchain) {
670 if (/Perl module toolchain versions installed/) {
671 # first time discovered in CPANPLUS 0.89_06
672 $expecting_toolchain_soon = 1;
676 if (exists $moduleunpack->{type
}) {
677 my($module,$v,$needwant);
678 # type 1 and 2 are about prereqs, type three about toolchain
679 if ($moduleunpack->{type
} == 1) {
680 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
682 if ($leader =~ /^-/) {
686 } elsif ($leader =~ /^(
687 buil
# build_requires:
690 } elsif ($module =~ /^(
695 } elsif ($moduleunpack->{type
} == 2) {
696 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl
}, $_; };
698 if ($leader =~ /^\*/) {
703 or !defined $needwant
707 ($module,$v,$needwant) = split " ", $_;
709 } elsif ($moduleunpack->{type
} == 3) {
710 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
714 $expect_toolchain = 0;
716 } elsif ($module =~ /^-/) {
724 my($modulename,$versionlead) = split " ", $module;
725 if (defined $modulename and defined $versionlead) {
726 $module = $modulename;
727 $v = "$versionlead$v";
732 $extract{"mod:$module"} = $v;
734 $needwant =~ s/^\s+//;
735 $needwant =~ s/\s+$//;
736 $extract{"prereq:$module"} = $needwant;
740 if (/(\s+)(Module\s+)(Need\s+)Have/) {
743 tpl
=> 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
746 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
749 my $adjust_2 = -length($4);
750 my $adjust_3 = length($4);
751 # I think they do not really try to align, usually we
752 # get away with split
754 tpl
=> 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
759 if (/PREREQUISITES|Prerequisite modules loaded/) {
763 if ($expecting_toolchain_soon) {
764 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
767 $expecting_toolchain_soon=0;
769 tpl
=> 'a'.length($1).'a'.length($2).'a*',
774 if (/toolchain versions installed/) {
776 $expecting_toolchain_soon=1;
779 if (! $extract{"meta:perl"} && $fallback_p5) {
780 my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
781 $extract{"meta:perl"} = $p5;
782 $extract{"conf:git_describe"} = $patch;
786 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
787 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
789 my $data = $dumpvars->{"==DATA=="} ||= [];
790 push @
$data, \
%extract;
792 # ---- %extract finished ----
794 if (my $qr = $Opt{dumpvars
}) {
796 while (my($k,$v) = each %extract) {
798 $dumpvars->{$k}{$v}{$ok}++;
803 my $have = $extract{$want} || "";
804 $diag .= " $want\[$have]";
806 printf STDERR
" %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet
};
808 $report =~ s/\s+\z//;
809 print STDERR
$report, "\n================\n" unless $Opt{quiet
};
811 if ($Opt{interactive
}) {
815 my $ans = IO
::Prompt
::prompt
817 -p
=> "View $id? [onechar: ynq] ",
822 print STDERR
"\n" unless $Opt{quiet
};
824 my($report) = _get_cooked_report
($target, \
%Opt);
825 $Opt{pager
} ||= "less";
826 open my $lfh, "|-", $Opt{pager
} or die "Could not fork '$Opt{pager}': $!";
828 print {$lfh} $report;
829 close $lfh or die "Could not close pager: $!"
830 } elsif ($ans eq "q") {
838 sub _get_cooked_report
{
839 my($target, $Opt_ref) = @_;
840 my($report, $isHTML);
841 if ($report = $Opt_ref->{article
}) {
842 $isHTML = $report =~ /^</;
846 open my $fh, $target or die "Could not open '$target': $!";
848 my $raw_report = <$fh>;
849 $isHTML = $raw_report =~ /^</;
851 $report = decode_entities
($raw_report);
852 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
854 _looks_like_qp
($raw_report)
856 # minimizing MIME effort; don't know about reports in other formats
857 $report = MIME
::QuotedPrint
::decode_qp
($raw_report);
859 $report = $raw_report;
863 if ($report =~ /\r\n/) {
864 my @rlines = split /\r?\n/, $report;
865 $report = join "\n", @rlines;
872 Feeds a couple of potentially interesting data to
873 Statistics::Regression and sorts the result by R^2 descending. Do not
874 confuse this with a prove, rather take it as a useful hint. It can
875 save you minutes of staring at data and provide a quick overview where
876 one should look closer. Displays the N top candidates, where N
877 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
878 Regressions results with an R^2 of 1.00 are displayed in any case.
880 The function is called when the option C<-solve> is give on the
881 commandline. Several extra config variables are calculated, see source
886 my %never_solve_on = map {($_ => 1)}
900 'env:PERL5_CPANPLUS_IS_RUNNING',
901 'env:PERL5_CPAN_IS_RUNNING',
902 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
905 my %normalize_numeric =
907 id
=> sub { return shift },
910 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
911 unless (defined $M) {
912 die "illegal value[$v] for a date";
914 Time
::Local
::timegm
($s,$m,$h,$D,$M-1,$Y);
917 my %normalize_value =
920 my($perlatpatchlevel) = shift;
921 my $perl = $perlatpatchlevel;
928 require Statistics
::Regression
;
931 if (my $ycbbody = $Opt{ycb
}) {
932 $ycb = eval('sub {'.$ycbbody.'}');
938 if ($rec->{"meta:ok"} eq "PASS") {
940 } elsif ($rec->{"meta:ok"} eq "FAIL") {
946 VAR
: for my $variable (sort keys %$V) {
947 next if $variable eq "==DATA==";
948 if ($never_solve_on{$variable}){
949 warn "Skipping '$variable'\n" unless $Opt{quiet
};
952 my $value_distribution = $V->{$variable};
953 my $keys = keys %$value_distribution;
955 if ($normalize_numeric{$variable}) {
956 push @X, "n_$variable";
959 for my $value (sort keys %$value_distribution) {
960 my $pf = $value_distribution->{$value};
963 if ($pf->{PASS
} || $pf->{FAIL
}) {
964 my $Xele = sprintf "eq_%s",
966 $normalize_value{$variable} ?
967 $normalize_value{$variable}->($value) :
970 push @X, $Xele unless $seen{$Xele}++;
974 $pf->{PASS
} xor $pf->{FAIL
}
977 substr($value,$vl) = "..." if length $value > 3+$vl;
978 my $poor_mans_freehand_estimation = 0;
979 if ($poor_mans_freehand_estimation) {
982 "%4d %4d %-23s | %s\n",
992 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet
};
993 next VAR
unless @X > 1;
999 RECORD
: for my $rec (@
{$V->{"==DATA=="}}) {
1000 my $y = $ycb->($rec);
1001 next RECORD
unless defined $y;
1004 @obs{@X} = (0) x
@X;
1007 if ($x =~ /^eq_(.+)/) {
1009 if (exists $rec->{$variable}
1010 && defined $rec->{$variable}
1013 $normalize_value{$variable} ?
1014 $normalize_value{$variable}->($rec->{$variable}) :
1017 if ($use_v eq $read_v) {
1021 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1022 } elsif ($x =~ /^n_(.+)/) {
1024 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1026 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1030 push @
{$regdata{data
}}, \
%obs;
1032 _run_regression
($variable, \
%regdata, \
@regression, \
%Opt);
1034 my $top = min
($Opt{solvetop
} || 3, scalar @regression);
1035 my $max_rsq = sum
map {1==$_->rsq ?
1 : 0} @regression;
1036 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1040 "State after regression testing: %d results, showing top %d\n\n",
1049 printf "(%d)\n", ++$score;
1050 eval { $reg->print; };
1052 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1054 last if --$top <= 0;
1059 # $variable is the name we pass through to S:R constructor
1060 # $regdata is hash and has the arrays "X" and "data" (observations)
1061 # X goes to S:R constructor
1062 # each observation has a Y which we pass to S:R in an include() call
1063 # $regression is the collector array of results
1064 # $opt are the options from outside, used to see if we are "verbose"
1065 sub _run_regression
{
1066 my($variable,$regdata,$regression,$opt) = @_;
1067 my @X = @
{$regdata->{X
}};
1068 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1069 # hold the reference
1072 my $reg = Statistics
::Regression
->new($variable,\
@X);
1073 for my $obs (@
{$regdata->{data
}}) {
1074 my $y = delete $obs->{Y
};
1075 $reg->include($y, $obs);
1079 my @e = $reg->standarderrors;
1080 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1083 if ($opt->{verbose
} && $opt->{verbose
}>=2) {
1085 warn YAML
::Syck
::Dump
1086 ({error
=>"could not determine some regression parameters",
1087 variable
=>$variable,
1094 # reduce k in case that linear dependencies disturbed us;
1095 # often called reference group; I'm tempted to collect and
1100 push @
$regression, $reg;
1112 Please report any bugs or feature requests through the web
1114 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1115 I will be notified, and then you'll automatically be notified of
1116 progress on your bug as I make changes.
1120 You can find documentation for this module with the perldoc command.
1122 perldoc CPAN::Testers::ParseReport
1125 You can also look for information at:
1129 =item * RT: CPAN's request tracker
1131 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1133 =item * AnnoCPAN: Annotated CPAN documentation
1135 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1137 =item * CPAN Ratings
1139 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1143 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1148 =head1 ACKNOWLEDGEMENTS
1150 Thanks to RJBS for module-starter.
1152 =head1 COPYRIGHT & LICENSE
1154 Copyright 2008 Andreas König.
1156 This program is free software; you can redistribute it and/or modify it
1157 under the same terms as Perl itself.
1162 1; # End of CPAN::Testers::ParseReport