1 package CPAN
::Testers
::ParseReport
;
6 use Config
::Perl
::V
();
7 use DateTime
::Format
::Strptime
;
8 use DateTime
::Format
::DateParse
;
9 use File
::Basename
qw(basename);
10 use File
::Path
qw(mkpath);
11 use HTML
::Entities
qw(decode_entities);
13 use List
::Util
qw(max min sum);
14 use MIME
::QuotedPrint
();
18 use XML
::LibXML
::XPathContext
;
20 our $default_ctformat = "yaml";
21 our $default_transport = "nntp";
22 our $default_cturl = "http://www.cpantesters.org/show";
29 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
33 use version
; our $VERSION = qv
('0.1.7');
37 The documentation in here is normally not needed because the code is
38 meant to be run from a standalone program, L<ctgetreports>.
40 ctgetreports --q mod:Moose Devel-Events
44 This is the core module for CPAN::Testers::ParseReport. If you're not
45 looking to extend or alter the behaviour of this module, you probably
46 want to look at L<ctgetreports> instead.
50 Are described in the L<ctgetreports> manpage and are passed through to
51 the functions unaltered.
55 =head2 parse_distro($distro,%options)
57 reads the cpantesters HTML page or the YAML file or the local database
58 for the distro and loops through the reports for the specified or most
59 recent version of that distro found in these data.
61 parse_distro() intentionally has no meaningful return value, different
62 options would require different ones.
64 =head2 $extract = parse_single_report($report,$dumpvars,%options)
66 mirrors and reads this report. $report is of the form
70 $dumpvar is a hashreference that gets filled with data.
72 $extract is the result of parse_report() described below.
80 $ua = LWP
::UserAgent
->new
93 return $nntp if $nntp;
94 $nntp = Net
::NNTP
->new("nntp.perl.org");
95 $nntp->group("perl.cpan.testers");
104 $xp = XML
::LibXML
->new;
106 $xp->clean_namespaces(1);
107 my $catalog = __FILE__
;
108 $catalog =~ s
|ParseReport
.pm
$|ParseReport
/catalog
|;
109 $xp->load_catalog($catalog);
114 sub _download_overview
{
115 my($cts_dir, $distro, %Opt) = @_;
116 my $format = $Opt{ctformat
} ||= $default_ctformat;
117 my $cturl = $Opt{cturl
} ||= $default_cturl;
118 my $ctarget = "$cts_dir/$distro.$format";
119 my $cheaders = "$cts_dir/$distro.headers";
121 unless (-e
$ctarget) {
122 die "Alert: No local file '$ctarget' found, cannot continue\n";
125 if (! -e
$ctarget or -M
$ctarget > .25) {
126 if (-e
$ctarget && $Opt{verbose
}) {
128 my $timestamp = gmtime $stat[9];
129 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
131 print STDERR
"Fetching $ctarget..." if $Opt{verbose
} && !$Opt{quiet
};
132 my $uri = "$cturl/$distro.$format";
133 my $resp = _ua
->mirror($uri,$ctarget);
134 if ($resp->is_success) {
135 print STDERR
"DONE\n" if $Opt{verbose
} && !$Opt{quiet
};
136 open my $fh, ">", $cheaders or die;
137 for ($resp->headers->as_string) {
139 if ($Opt{verbose
} && $Opt{verbose
}>1) {
140 print STDERR
$_ unless $Opt{quiet
};
143 } elsif (304 == $resp->code) {
144 print STDERR
"DONE (not modified)\n" if $Opt{verbose
} && !$Opt{quiet
};
145 my $atime = my $mtime = time;
146 utime $atime, $mtime, $cheaders;
150 "No success downloading %s: %s",
161 my($ctarget, %Opt) = @_;
162 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
163 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
164 if ($preprocesswithtreebuilder) {
165 require HTML
::TreeBuilder
;
166 my $tree = HTML
::TreeBuilder
->new;
167 $tree->implicit_tags(1);
169 $tree->ignore_ignorable_whitespace(0);
170 $tree->parse_content($content);
172 $content = $tree->as_XML;
175 my $doc = eval { $parser->parse_string($content) };
178 my $distro = basename
$ctarget;
179 die sprintf "Error while parsing %s\: %s", $distro, $err;
181 my $xc = XML
::LibXML
::XPathContext
->new($doc);
182 my $nsu = $doc->documentElement->namespaceURI;
183 $xc->registerNs('x', $nsu) if $nsu;
184 my($selected_release_ul,$selected_release_distrov,$excuse_string);
187 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
188 $doc->findnodes("/html/body/div[\@id = 'doc']");
189 my(@releasedivs) = $nsu ?
190 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
191 $cparentdiv->findnodes("//div[h2 and ul]");
194 $excuse_string = "selected distro '$Opt{vdistro}'";
195 my($fallbacktoversion) = $Opt{vdistro
} =~ /(\d+\..*)/;
196 $fallbacktoversion = 0 unless defined $fallbacktoversion;
197 RELEASE
: for my $i (0..$#releasedivs) {
200 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
201 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
203 if ($x eq $Opt{vdistro
}) {
205 $picked = " (picked)";
207 print STDERR
"FOUND DISTRO: $x$picked\n" unless $Opt{quiet
};
210 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
211 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
212 if ($x eq $fallbacktoversion) {
214 $picked = " (picked)";
216 print STDERR
"FOUND VERSION: $x$picked\n" unless $Opt{quiet
};
220 $excuse_string = "any distro";
222 unless (defined $releasediv) {
225 # using a[1] because a[2] is missing on the first entry
226 ($selected_release_distrov) = $nsu ?
227 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
228 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
229 ($selected_release_ul) = $nsu ?
230 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
231 $releasedivs[$releasediv]->findnodes("ul");
232 unless (defined $selected_release_distrov) {
233 warn "Warning: could not find $excuse_string in '$ctarget'";
236 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
240 $xc->findnodes("x:li",$selected_release_ul) :
241 $selected_release_ul->findnodes("li")) {
243 $xc->findvalue("x:a[1]/text()",$test) :
244 $test->findvalue("a[1]/text()");
245 push @all, {id
=>$id};
252 my($ctarget, %Opt) = @_;
254 my $arr = YAML
::Syck
::LoadFile
($ctarget);
255 my($selected_release_ul,$selected_release_distrov,$excuse_string);
257 $excuse_string = "selected distro '$Opt{vdistro}'";
258 $arr = [grep {$_->{distversion
} eq $Opt{vdistro
}} @
$arr];
259 ($selected_release_distrov) = $arr->[0]{distversion
};
261 $excuse_string = "any distro";
264 for my $report (sort { $a->{id
} <=> $b->{id
} } @
$arr) {
265 unless ($seen{$report->{distversion
}}++) {
266 $last_addition = $report->{distversion
};
269 $arr = [grep {$_->{distversion
} eq $last_addition} @
$arr];
270 ($selected_release_distrov) = $last_addition;
272 unless ($selected_release_distrov) {
273 warn "Warning: could not find $excuse_string in '$ctarget'";
276 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
278 for my $test (@
$arr) {
279 my $id = $test->{id
};
280 push @all, {id
=>$id};
283 @all = sort { $b->{id
} <=> $a->{id
} } @all;
287 sub parse_single_report
{
288 my($report, $dumpvars, %Opt) = @_;
289 my($id) = $report->{id
};
290 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
291 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
293 my $target = "$nnt_dir/$id";
295 unless (-e
$target) {
296 die {severity
=>0,text
=>"Warning: No local file '$target' found, skipping\n"};
300 print STDERR
"Fetching $target..." if $Opt{verbose
} && !$Opt{quiet
};
301 $Opt{transport
} ||= $default_transport;
302 if ($Opt{transport
} eq "nntp") {
303 my $article = _nntp
->article($id);
305 die {severity
=>0,text
=>"NNTP-Server did not return an article for id[$id]"};
307 open my $fh, ">", $target or die {severity
=>1,text
=>"Could not open >$target: $!"};
309 } elsif ($Opt{transport
} eq "http") {
310 my $resp = _ua
->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
311 if ($resp->is_success) {
313 my(@stat) = stat $target;
314 my $timestamp = gmtime $stat[9];
315 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
316 if ($Opt{verbose
} > 1) {
317 print STDERR
$resp->headers->as_string unless $Opt{quiet
};
320 my $headers = "$target.headers";
321 open my $fh, ">", $headers or die {severity
=>1,text
=>"Could not open >$headers: $!"};
322 print $fh $resp->headers->as_string;
325 text
=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
328 die {severity
=>1,text
=>"Illegal value for --transport: '$Opt{transport}'"};
332 parse_report
($target, $dumpvars, %Opt);
336 my($distro,%Opt) = @_;
338 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
339 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
342 require Statistics
::Regression
;
343 $Opt{dumpvars
} = "." unless defined $Opt{dumpvars
};
345 if (!$Opt{vdistro
} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
346 $Opt{vdistro
} = $distro;
350 if (my $ctdb = $Opt{ctdb
}) {
351 require CPAN
::WWW
::Testers
::Generator
::Database
;
352 require CPAN
::DistnameInfo
;
353 my $dbi = CPAN
::WWW
::Testers
::Generator
::Database
->new(database
=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
354 unless ($Opt{vdistro
}) {
355 my $sql = "select version from cpanstats where dist=? order by id";
356 my @rows = $dbi->get_query($sql,$distro);
358 for my $row (@rows) {
359 $newest = $row->[0] unless $seen{$row->[0]}++;
361 $Opt{vdistro
} = "$distro-$newest";
363 my $d = CPAN
::DistnameInfo
->new("FOO/$Opt{vdistro}.tgz");
365 my $version = $d->version;
366 my $sql = "select id from cpanstats where dist=? and version=? order by id desc";
367 my @rows = $dbi->get_query($sql,$dist,$version);
369 for my $row (@rows) {
371 push @all, {id
=>$id};
375 my $ctarget = _download_overview
($cts_dir, $distro, %Opt);
376 $Opt{ctformat
} ||= $default_ctformat;
377 if ($Opt{ctformat
} eq "html") {
378 $reports = _parse_html
($ctarget,%Opt);
380 $reports = _parse_yaml
($ctarget,%Opt);
383 return unless $reports;
384 for my $report (@
$reports) {
385 eval {parse_single_report
($report, \
%dumpvars, %Opt)};
388 if ($@
->{severity
}) {
399 if ($Opt{dumpvars
}) {
401 my $dumpfile = $Opt{dumpfile
} || "ctgetreports.out";
402 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
403 print $fh YAML
::Syck
::Dump
(\
%dumpvars);
404 close $fh or die "Could not close '$dumpfile': $!"
407 solve
(\
%dumpvars,%Opt);
411 =head2 $bool = _looks_like_qp($raw_report)
413 We had to acknowledge the fact that some MTAs swallow the MIME-Version
414 header while passing MIME through. So we introduce fallback heuristics
415 that try to determine if a report is written in quoted printable.
417 Note that this subroutine is internal, just documented to have the
418 internals documented.
420 The current implementation counts the number of QP escaped spaces and
427 my $count_space = () = $report =~ /=20/g;
428 return 1 if $count_space > 12;
429 my $count_equal = () = $report =~ /=3D/g;
430 return 1 if $count_equal > 12;
431 return 1 if $count_space+$count_equal > 24;
432 return 0; # waiting for a counter example
435 =head2 $extract = parse_report($target,$dumpvars,%Opt)
437 Reads one report. $target is the local filename to read. $dumpvars is
438 a hashref which gets filled with descriptive stats about
439 PASS/FAIL/etc. %Opt are the options as described in the
440 C<ctgetreports> manpage. $extract is a hashref containing the found
443 Note: this parsing is a bit dirty but as it seems good enough I'm not
444 inclined to change it. We parse HTML with regexps only, not an HTML
445 parser. Only the entities are decoded.
447 Update around version 0.0.17: switching to nntp now but keeping the
448 parser for HTML around to read old local copies.
450 Update around 0.0.18: In %Options you can use
452 article => $some_full_article_as_scalar
454 to use this function to parse one full article as text. When this is
455 given, the argument $target is not read, but its basename is taken to
456 be the id of the article. (OMG, hackers!)
460 my($target,$dumpvars,%Opt) = @_;
462 my $id = basename
($target);
463 # warn "DEBUG: id[$id]";
469 if ($report = $Opt{article
}) {
470 $isHTML = $report =~ /^</;
474 open my $fh, $target or die "Could not open '$target': $!";
476 my $raw_report = <$fh>;
477 $isHTML = $raw_report =~ /^</;
479 $report = decode_entities
($raw_report);
480 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
482 _looks_like_qp
($raw_report)
484 # minimizing MIME effort; don't know about reports in other formats
485 $report = MIME
::QuotedPrint
::decode_qp
($raw_report);
487 $report = $raw_report;
491 my @qr = map /^qr:(.+)/, @
{$Opt{q
}};
492 if ($Opt{raw
} || @qr) {
494 my $cqr = eval "qr{$qr}";
495 die "Could not compile regular expression '$qr': $@" if $@
;
496 my(@matches) = $report =~ $cqr;
502 $v = join "", map {"($_)"} @matches;
507 $extract{"qr:$qr"} = $v;
512 my $moduleunpack = {};
513 my $expect_prereq = 0;
514 my $expect_toolchain = 0;
515 my $expecting_toolchain_soon = 0;
518 my $in_summary_seen_platform = 0;
519 my $in_prg_output = 0;
520 my $in_env_context = 0;
522 my $current_headline;
523 my @previous_line = ""; # so we can neutralize line breaks
524 my @rlines = split /\r?\n/, $report;
525 LINE
: for (@rlines) {
526 next LINE
unless ($isHTML ?
m/<title>(\S+)\s+(\S+)/ : m/^Subject:\s*(\S+)\s+(\S+)/);
529 $extract{"meta:ok"} = $ok;
530 $extract{"meta:about"} = $about;
533 LINE
: while (@rlines) {
535 while (/!$/ and @rlines) {
536 my $followupline = shift @rlines;
537 $followupline =~ s/^\s+//; # remo leading space
540 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
541 $current_headline = $previous_line[-1];
542 if ($current_headline =~ /PROGRAM OUTPUT/) {
547 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
553 if ($extract{"meta:perl"}) {
555 and !$extract{"conf:git_commit_id"}
556 and /Commit id:\s*([[:xdigit:]]+)/) {
557 $extract{"conf:git_commit_id"} = $1;
562 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
569 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
570 $r =~ s/\.0//; # 5.0 6 2!
571 $extract{"meta:perl"} = "$r.$v.$s\@$p";
572 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
574 $extract{"meta:perl"} = "$r.$v.$s";
575 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
577 $extract{"meta:perl"} = "$r.$v.$s";
579 $extract{"meta:perl"} = $p5;
583 unless ($extract{"meta:from"}) {
586 m
|<div
class="h_name">From
:</div> <b>(.+?)</b
><br
/>| :
589 $extract{"meta:from"} = $1;
591 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
593 unless ($extract{"meta:date"}) {
596 m
|<div
class="h_name">Date
:</div> (.+?)<br/>| :
603 $p = DateTime
::Format
::Strptime
->new(
606 # April 13, 2005 23:50
607 pattern
=> "%b %d, %Y %R",
609 $dt = $p->parse_datetime($date);
611 # Sun, 28 Sep 2008 12:23:12 +0100 # but was not consistent
612 # pattern => "%a, %d %b %Y %T %z",
613 $dt = eval { DateTime
::Format
::DateParse
->parse_datetime($date) };
616 warn "Could not parse date[$date], setting to epoch 0";
617 $dt = DateTime
->from_epoch( epoch
=> 0 );
619 $extract{"meta:date"} = $dt->datetime;
621 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
623 unless ($extract{"meta:writer"}) {
624 for ("$previous_line[-1] $_") {
626 } elsif (/CPANPLUS, version (\S+)/) {
627 $extract{"meta:writer"} = "CPANPLUS $1";
628 } elsif (/created (?:automatically )?by (\S+)/) {
629 $extract{"meta:writer"} = $1;
630 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
631 $extract{"meta:writer"} = "$1 $2";
633 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
637 # we do that first three lines a bit too often
638 my $qr = $Opt{dumpvars
} || "";
639 $qr = qr/$qr/ if $qr;
642 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
645 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
647 if (/^\s+Platform:$/) {
648 $in_summary_seen_platform=1;
649 } elsif (/^\s*$/ || m
|</pre
>|) {
650 # if not html, we have reached the end now
651 if ($in_summary_seen_platform) {
652 # some perls have an empty line after the summary line
656 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
657 while (my($k,$v) = each %kv) {
661 if ($v =~ /^'(.*)'$/) {
666 if ($qr && $ck =~ $qr) {
668 } elsif ($conf_vars{$ck}) {
674 if ($in_prg_output) {
675 unless ($extract{"meta:output_from"}) {
676 if (/Output from (.+):$/) {
677 $extract{"meta:output_from"} = $1
681 if ($in_env_context) {
682 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
683 $extract{"env:$1"} = $2;
686 push @previous_line, $_;
687 if ($expect_prereq || $expect_toolchain) {
688 if (/Perl module toolchain versions installed/) {
689 # first time discovered in CPANPLUS 0.89_06
690 $expecting_toolchain_soon = 1;
694 if (exists $moduleunpack->{type
}) {
695 my($module,$v,$needwant);
696 # type 1 and 2 are about prereqs, type three about toolchain
697 if ($moduleunpack->{type
} == 1) {
698 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
700 if ($leader =~ /^-/) {
704 } elsif ($leader =~ /^(
705 buil
# build_requires:
708 } elsif ($module =~ /^(
713 } elsif ($moduleunpack->{type
} == 2) {
714 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl
}, $_; };
716 if ($leader =~ /^\*/) {
720 } elsif ($v =~ /\s/) {
721 ($module,$v) = split " ", $_;
723 } elsif ($moduleunpack->{type
} == 3) {
724 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
728 $expect_toolchain = 0;
730 } elsif ($module =~ /^-/) {
738 my($modulename,$versionlead) = split " ", $module;
739 if (defined $modulename and defined $versionlead) {
740 $module = $modulename;
741 $v = "$versionlead$v";
746 $extract{"mod:$module"} = $v;
748 $needwant =~ s/^\s+//;
749 $needwant =~ s/\s+$//;
750 $extract{"prereq:$module"} = $needwant;
754 if (/(\s+)(Module\s+)(Need\s+)Have/) {
757 tpl
=> 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
760 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
763 my $adjust_2 = -length($4);
764 my $adjust_3 = length($4);
765 # two pass would be required to see where the
766 # columns really are. Or could we get away with split?
768 tpl
=> 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
773 if (/PREREQUISITES|Prerequisite modules loaded/) {
777 if ($expecting_toolchain_soon) {
778 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
781 $expecting_toolchain_soon=0;
783 tpl
=> 'a'.length($1).'a'.length($2).'a*',
788 if (/toolchain versions installed/) {
790 $expecting_toolchain_soon=1;
795 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
796 $extract{"conf:archame+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
798 my $data = $dumpvars->{"==DATA=="} ||= [];
799 push @
$data, \
%extract;
801 # ---- %extract finished ----
803 if (my $qr = $Opt{dumpvars
}) {
805 while (my($k,$v) = each %extract) {
807 $dumpvars->{$k}{$v}{$ok}++;
812 my $have = $extract{$want} || "";
813 $diag .= " $want\[$have]";
815 printf STDERR
" %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet
};
817 $report =~ s/\s+\z//;
818 print STDERR
$report, "\n================\n" unless $Opt{quiet
};
820 if ($Opt{interactive
}) {
824 my $ans = IO
::Prompt
::prompt
826 -p
=> "View $id? [onechar: ynq] ",
831 print STDERR
"\n" unless $Opt{quiet
};
833 open my $ifh, "<", $target or die "Could not open $target: $!";
834 $Opt{pager
} ||= "less";
835 open my $lfh, "|-", $Opt{pager
} or die "Could not fork '$Opt{pager}': $!";
838 close $ifh or die "Could not close $target: $!";
839 close $lfh or die "Could not close pager: $!"
840 } elsif ($ans eq "q") {
850 Feeds a couple of potentially interesting data to
851 Statistics::Regression and sorts the result by R^2 descending. Do not
852 confuse this with a prove, rather take it as a useful hint. It can
853 save you minutes of staring at data and provide a quick overview where
854 one should look closer. Displays the N top candidates, where N
855 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
856 Regressions results with an R^2 of 1.00 are displayed in any case.
858 The function is called when the option C<-solve> is give on the
859 commandline. Several extra config variables are calculated, see source
864 my %never_solve_on = map {($_ => 1)}
878 'env:PERL5_CPANPLUS_IS_RUNNING',
879 'env:PERL5_CPAN_IS_RUNNING',
880 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
883 my %normalize_numeric =
885 id
=> sub { return shift },
888 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
889 unless (defined $M) {
890 die "illegal value[$v] for a date";
892 Time
::Local
::timegm
($s,$m,$h,$D,$M-1,$Y);
895 my %normalize_value =
898 my($perlatpatchlevel) = shift;
899 my $perl = $perlatpatchlevel;
906 require Statistics
::Regression
;
909 if (my $ycbbody = $Opt{ycb
}) {
910 $ycb = eval('sub {'.$ycbbody.'}');
916 if ($rec->{"meta:ok"} eq "PASS") {
918 } elsif ($rec->{"meta:ok"} eq "FAIL") {
924 VAR
: for my $variable (sort keys %$V) {
925 next if $variable eq "==DATA==";
926 if ($never_solve_on{$variable}){
927 warn "Skipping '$variable'\n" unless $Opt{quiet
};
930 my $value_distribution = $V->{$variable};
931 my $keys = keys %$value_distribution;
933 if ($normalize_numeric{$variable}) {
934 push @X, "n_$variable";
937 for my $value (sort keys %$value_distribution) {
938 my $pf = $value_distribution->{$value};
941 if ($pf->{PASS
} || $pf->{FAIL
}) {
942 my $Xele = sprintf "eq_%s",
944 $normalize_value{$variable} ?
945 $normalize_value{$variable}->($value) :
948 push @X, $Xele unless $seen{$Xele}++;
952 $pf->{PASS
} xor $pf->{FAIL
}
955 substr($value,$vl) = "..." if length $value > 3+$vl;
956 my $poor_mans_freehand_estimation = 0;
957 if ($poor_mans_freehand_estimation) {
960 "%4d %4d %-23s | %s\n",
970 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet
};
971 next VAR
unless @X > 1;
977 RECORD
: for my $rec (@
{$V->{"==DATA=="}}) {
978 my $y = $ycb->($rec);
979 next RECORD
unless defined $y;
985 if ($x =~ /^eq_(.+)/) {
987 if (exists $rec->{$variable}
988 && defined $rec->{$variable}
991 $normalize_value{$variable} ?
992 $normalize_value{$variable}->($rec->{$variable}) :
995 if ($use_v eq $read_v) {
999 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1000 } elsif ($x =~ /^n_(.+)/) {
1002 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1004 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1008 push @
{$regdata{data
}}, \
%obs;
1010 _run_regression
($variable, \
%regdata, \
@regression, \
%Opt);
1012 my $top = min
($Opt{solvetop
} || 3, scalar @regression);
1013 my $max_rsq = sum
map {1==$_->rsq ?
1 : 0} @regression;
1014 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1018 "State after regression testing: %d results, showing top %d\n\n",
1027 printf "(%d)\n", ++$score;
1028 eval { $reg->print; };
1030 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1032 last if --$top <= 0;
1037 # $variable is the name we pass through to S:R constructor
1038 # $regdata is hash and has the arrays "X" and "data" (observations)
1039 # X goes to S:R constructor
1040 # each observation has a Y which we pass to S:R in an include() call
1041 # $regression is the collector array of results
1042 # $opt are the options from outside, used to see if we are "verbose"
1043 sub _run_regression
{
1044 my($variable,$regdata,$regression,$opt) = @_;
1045 my @X = @
{$regdata->{X
}};
1046 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1047 # hold the reference
1050 my $reg = Statistics
::Regression
->new($variable,\
@X);
1051 for my $obs (@
{$regdata->{data
}}) {
1052 my $y = delete $obs->{Y
};
1053 $reg->include($y, $obs);
1057 my @e = $reg->standarderrors;
1058 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1061 if ($opt->{verbose
} && $opt->{verbose
}>=2) {
1063 warn YAML
::Syck
::Dump
1064 ({error
=>"could not determine some regression parameters",
1065 variable
=>$variable,
1072 # reduce k in case that linear dependencies disturbed us;
1073 # often called reference group; I'm tempted to collect and
1078 push @
$regression, $reg;
1090 Please report any bugs or feature requests through the web
1092 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1093 I will be notified, and then you'll automatically be notified of
1094 progress on your bug as I make changes.
1098 You can find documentation for this module with the perldoc command.
1100 perldoc CPAN::Testers::ParseReport
1103 You can also look for information at:
1107 =item * RT: CPAN's request tracker
1109 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1111 =item * AnnoCPAN: Annotated CPAN documentation
1113 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1115 =item * CPAN Ratings
1117 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1121 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1126 =head1 ACKNOWLEDGEMENTS
1128 Thanks to RJBS for module-starter.
1130 =head1 COPYRIGHT & LICENSE
1132 Copyright 2008 Andreas König.
1134 This program is free software; you can redistribute it and/or modify it
1135 under the same terms as Perl itself.
1140 1; # End of CPAN::Testers::ParseReport