1 package CPAN
::Testers
::ParseReport
;
6 use Config
::Perl
::V
();
7 use DateTime
::Format
::Strptime
;
8 use File
::Basename
qw(basename);
9 use File
::Path
qw(mkpath);
10 use HTML
::Entities
qw(decode_entities);
12 use List
::Util
qw(max min sum);
13 use MIME
::QuotedPrint
();
16 use XML
::LibXML
::XPathContext
;
18 our $default_ctformat = "yaml";
19 our $default_transport = "http_cpantesters";
20 our $default_cturl = "http://www.cpantesters.org/show";
27 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
31 use version
; our $VERSION = qv
('0.1.15');
35 The documentation in here is normally not needed because the code is
36 meant to be run from the standalone program C<ctgetreports>.
38 ctgetreports --q mod:Moose Devel-Events
42 This is the core module for CPAN::Testers::ParseReport. If you're not
43 looking to extend or alter the behaviour of this module, you probably
44 want to look at L<ctgetreports> instead.
48 Options are described in the L<ctgetreports> manpage and are passed
49 through to the functions unaltered.
53 =head2 parse_distro($distro,%options)
55 reads the cpantesters HTML page or the YAML file or the local database
56 for the distro and loops through the reports for the specified or most
57 recent version of that distro found in these data.
59 parse_distro() intentionally has no meaningful return value, different
60 options would require different ones.
62 =head2 $extract = parse_single_report($report,$dumpvars,%options)
64 mirrors and reads this report. $report is of the form
68 $dumpvar is a hashreference that gets filled with data.
70 $extract is the result of parse_report() described below.
78 $ua = LWP
::UserAgent
->new
85 # I would love to support gzipped transfer but it doesn't seem
86 # to mix well with mirroring:
88 # $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
98 $xp = XML
::LibXML
->new;
100 $xp->clean_namespaces(1);
101 my $catalog = __FILE__
;
102 $catalog =~ s
|ParseReport
.pm
$|ParseReport
/catalog
|;
103 $xp->load_catalog($catalog);
108 sub _download_overview
{
109 my($cts_dir, $distro, %Opt) = @_;
110 my $format = $Opt{ctformat
} ||= $default_ctformat;
111 my $cturl = $Opt{cturl
} ||= $default_cturl;
112 my $ctarget = "$cts_dir/$distro.$format";
113 my $cheaders = "$cts_dir/$distro.headers";
115 unless (-e
$ctarget) {
116 die "Alert: No local file '$ctarget' found, cannot continue\n";
119 if (! -e
$ctarget or -M
$ctarget > .25) {
120 if (-e
$ctarget && $Opt{verbose
}) {
122 my $timestamp = gmtime $stat[9];
123 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
125 print STDERR
"Fetching $ctarget..." if $Opt{verbose
} && !$Opt{quiet
};
126 my $uri = "$cturl/$distro.$format";
127 my $resp = _ua
->mirror($uri,$ctarget);
128 if ($resp->is_success) {
129 print STDERR
"DONE\n" if $Opt{verbose
} && !$Opt{quiet
};
130 open my $fh, ">", $cheaders or die;
131 for ($resp->headers->as_string) {
133 if ($Opt{verbose
} && $Opt{verbose
}>1) {
134 print STDERR
$_ unless $Opt{quiet
};
137 } elsif (304 == $resp->code) {
138 print STDERR
"DONE (not modified)\n" if $Opt{verbose
} && !$Opt{quiet
};
139 my $atime = my $mtime = time;
140 utime $atime, $mtime, $cheaders;
144 "No success downloading %s: %s",
155 my($ctarget, %Opt) = @_;
156 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
157 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
158 if ($preprocesswithtreebuilder) {
159 require HTML
::TreeBuilder
;
160 my $tree = HTML
::TreeBuilder
->new;
161 $tree->implicit_tags(1);
163 $tree->ignore_ignorable_whitespace(0);
164 $tree->parse_content($content);
166 $content = $tree->as_XML;
169 my $doc = eval { $parser->parse_string($content) };
172 my $distro = basename
$ctarget;
173 die sprintf "Error while parsing %s\: %s", $distro, $err;
175 my $xc = XML
::LibXML
::XPathContext
->new($doc);
176 my $nsu = $doc->documentElement->namespaceURI;
177 $xc->registerNs('x', $nsu) if $nsu;
178 my($selected_release_ul,$selected_release_distrov,$excuse_string);
181 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
182 $doc->findnodes("/html/body/div[\@id = 'doc']");
183 my(@releasedivs) = $nsu ?
184 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
185 $cparentdiv->findnodes("//div[h2 and ul]");
188 $excuse_string = "selected distro '$Opt{vdistro}'";
189 my($fallbacktoversion) = $Opt{vdistro
} =~ /(\d+\..*)/;
190 $fallbacktoversion = 0 unless defined $fallbacktoversion;
191 RELEASE
: for my $i (0..$#releasedivs) {
194 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
195 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
197 if ($x eq $Opt{vdistro
}) {
199 $picked = " (picked)";
201 print STDERR
"FOUND DISTRO: $x$picked\n" unless $Opt{quiet
};
204 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
205 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
206 if ($x eq $fallbacktoversion) {
208 $picked = " (picked)";
210 print STDERR
"FOUND VERSION: $x$picked\n" unless $Opt{quiet
};
214 $excuse_string = "any distro";
216 unless (defined $releasediv) {
219 # using a[1] because a[2] is missing on the first entry
220 ($selected_release_distrov) = $nsu ?
221 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
222 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
223 ($selected_release_ul) = $nsu ?
224 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
225 $releasedivs[$releasediv]->findnodes("ul");
226 unless (defined $selected_release_distrov) {
227 warn "Warning: could not find $excuse_string in '$ctarget'";
230 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
234 $xc->findnodes("x:li",$selected_release_ul) :
235 $selected_release_ul->findnodes("li")) {
237 $xc->findvalue("x:a[1]/text()",$test) :
238 $test->findvalue("a[1]/text()");
239 push @all, {id
=>$id};
246 my($ctarget, %Opt) = @_;
248 my $arr = YAML
::Syck
::LoadFile
($ctarget);
249 my($selected_release_ul,$selected_release_distrov,$excuse_string);
251 $excuse_string = "selected distro '$Opt{vdistro}'";
252 $arr = [grep {$_->{distversion
} eq $Opt{vdistro
}} @
$arr];
253 ($selected_release_distrov) = $arr->[0]{distversion
};
255 $excuse_string = "any distro";
258 for my $report (sort { $a->{id
} <=> $b->{id
} } @
$arr) {
259 unless ($seen{$report->{distversion
}}++) {
260 $last_addition = $report->{distversion
};
263 $arr = [grep {$_->{distversion
} eq $last_addition} @
$arr];
264 ($selected_release_distrov) = $last_addition;
266 unless ($selected_release_distrov) {
267 warn "Warning: could not find $excuse_string in '$ctarget'";
270 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
272 for my $test (@
$arr) {
273 my $id = $test->{id
};
274 push @all, {id
=>$id};
277 @all = sort { $b->{id
} <=> $a->{id
} } @all;
281 sub parse_single_report
{
282 my($report, $dumpvars, %Opt) = @_;
283 my($id) = $report->{id
};
284 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
285 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
287 my $target = "$nnt_dir/$id";
289 unless (-e
$target) {
290 die {severity
=>0,text
=>"Warning: No local file '$target' found, skipping\n"};
294 print STDERR
"Fetching $target..." if $Opt{verbose
} && !$Opt{quiet
};
295 $Opt{transport
} ||= $default_transport;
297 } elsif ($Opt{transport
} eq "http_cpantesters") {
298 my $resp = _ua
->mirror("http://www.cpantesters.org/cgi-bin/pages.cgi?act=cpan-report&raw=1&id=$id",$target);
299 if ($resp->is_success) {
301 my(@stat) = stat $target;
302 my $timestamp = gmtime $stat[9];
303 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
304 if ($Opt{verbose
} > 1) {
305 print STDERR
$resp->headers->as_string unless $Opt{quiet
};
308 my $headers = "$target.headers";
309 open my $fh, ">", $headers or die {severity
=>1,text
=>"Could not open >$headers: $!"};
310 print $fh $resp->headers->as_string;
313 text
=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
315 } elsif ($Opt{transport
} eq "http_cpantesters_gzip") {
316 if (-e
"$target.gz") {
317 0 == system gunzip
=> $target or die;
319 my $resp = _ua
->mirror("http://www.cpantesters.org/cgi-bin/pages.cgi?act=cpan-report&raw=1&id=$id",$target);
320 if ($resp->is_success) {
322 my(@stat) = stat $target;
323 my $timestamp = gmtime $stat[9];
324 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
325 if ($Opt{verbose
} > 1) {
326 print STDERR
$resp->headers->as_string unless $Opt{quiet
};
329 my $headers = "$target.headers";
330 open my $fh, ">", $headers or die {severity
=>1,text
=>"Could not open >$headers: $!"};
331 print $fh $resp->headers->as_string;
332 0 == system gzip
=> $target or die;
335 text
=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
338 die {severity
=>1,text
=>"Illegal value for --transport: '$Opt{transport}'"};
342 parse_report
($target, $dumpvars, %Opt);
346 my($distro,%Opt) = @_;
348 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
349 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
352 require Statistics
::Regression
;
353 $Opt{dumpvars
} = "." unless defined $Opt{dumpvars
};
355 if (!$Opt{vdistro
} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
356 $Opt{vdistro
} = $distro;
360 if (my $ctdb = $Opt{ctdb
}) {
361 require CPAN
::WWW
::Testers
::Generator
::Database
;
362 require CPAN
::DistnameInfo
;
363 my $dbi = CPAN
::WWW
::Testers
::Generator
::Database
->new(database
=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
364 unless ($Opt{vdistro
}) {
365 my $sql = "select version from cpanstats where dist=? order by id";
366 my @rows = $dbi->get_query($sql,$distro);
368 for my $row (@rows) {
369 $newest = $row->[0] unless $seen{$row->[0]}++;
371 $Opt{vdistro
} = "$distro-$newest";
373 my $d = CPAN
::DistnameInfo
->new("FOO/$Opt{vdistro}.tgz");
375 my $version = $d->version;
376 my $sql = "select id from cpanstats where dist=? and version=? order by id desc";
377 my @rows = $dbi->get_query($sql,$dist,$version);
379 for my $row (@rows) {
381 push @all, {id
=>$id};
385 my $ctarget = _download_overview
($cts_dir, $distro, %Opt);
386 $Opt{ctformat
} ||= $default_ctformat;
387 if ($Opt{ctformat
} eq "html") {
388 $reports = _parse_html
($ctarget,%Opt);
390 $reports = _parse_yaml
($ctarget,%Opt);
393 return unless $reports;
396 my $samplesize = $Opt{sample
} || 0;
397 $samplesize = 0 if $samplesize && $samplesize >= @
$reports;
398 REPORT
: for my $report (@
$reports) {
401 my $need = $samplesize - $sampled;
402 next REPORT
unless $need;
403 my $left = @
$reports - $i;
404 # warn sprintf "tot %d i %d sampled %d need %d left %d\n", scalar @$reports, $i, $sampled, $need, $left;
405 my $want_this = (rand(1) <= ($need/$left));
406 next REPORT
unless $want_this;
408 eval {parse_single_report
($report, \
%dumpvars, %Opt)};
411 if ($@
->{severity
}) {
423 if ($Opt{dumpvars
}) {
425 my $dumpfile = $Opt{dumpfile
} || "ctgetreports.out";
426 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
427 print $fh YAML
::Syck
::Dump
(\
%dumpvars);
428 close $fh or die "Could not close '$dumpfile': $!"
431 solve
(\
%dumpvars,%Opt);
435 =head2 $bool = _looks_like_qp($raw_report)
437 We had to acknowledge the fact that some MTAs swallow the MIME-Version
438 header while passing MIME through. So we introduce fallback heuristics
439 that try to determine if a report is written in quoted printable.
441 Note that this subroutine is internal, just documented to have the
442 internals documented.
444 The current implementation counts the number of QP escaped spaces and
451 my $count_space = () = $report =~ /=20/g;
452 return 1 if $count_space > 12;
453 my $count_equal = () = $report =~ /=3D/g;
454 return 1 if $count_equal > 12;
455 return 1 if $count_space+$count_equal > 24;
456 return 0; # waiting for a counter example
459 =head2 $extract = parse_report($target,$dumpvars,%Opt)
461 Reads one report. $target is the local filename to read. $dumpvars is
462 a hashref which gets filled with descriptive stats about
463 PASS/FAIL/etc. %Opt are the options as described in the
464 C<ctgetreports> manpage. $extract is a hashref containing the found
467 Note: this parsing is a bit dirty but as it seems good enough I'm not
468 inclined to change it. We parse HTML with regexps only, not an HTML
469 parser. Only the entities are decoded.
473 article => $some_full_article_as_scalar
475 to use this function to parse one full article as text. When this is
476 given, the argument $target is not read, but its basename is taken to
477 be the id of the article. (OMG, hackers!)
481 my($target,$dumpvars,%Opt) = @_;
483 my $id = basename
($target);
484 # warn "DEBUG: id[$id]";
489 my($report,$isHTML) = _get_cooked_report
($target, \
%Opt);
490 my @qr = map /^qr:(.+)/, @
{$Opt{q
}};
491 if ($Opt{raw
} || @qr) {
493 my $cqr = eval "qr{$qr}";
494 die "Could not compile regular expression '$qr': $@" if $@
;
495 my(@matches) = $report =~ $cqr;
501 $v = join "", map {"($_)"} @matches;
506 $extract{"qr:$qr"} = $v;
511 my $moduleunpack = {};
512 my $expect_prereq = 0;
513 my $expect_toolchain = 0;
514 my $expecting_toolchain_soon = 0;
515 my $fallback_p5 = "";
518 my $in_summary_seen_platform = 0;
519 my $in_prg_output = 0;
520 my $in_env_context = 0;
522 my $current_headline;
523 my @previous_line = ""; # so we can neutralize line breaks
524 my @rlines = split /\r?\n/, $report;
525 LINE
: for (@rlines) {
526 next LINE
unless ($isHTML ?
m/<title>((\S+)\s+(\S+))/ : m/^Subject:\s*((\S+)\s+(\S+))/);
528 $s = $1 if $s =~ m{<strong>(.+)};
529 if ($s =~ /(\S+)\s+(\S+)/) {
533 $extract{"meta:ok"} = $ok;
534 $extract{"meta:about"} = $about;
537 unless ($extract{"meta:about"}) {
538 $extract{"meta:about"} = $Opt{vdistro
};
539 unless ($extract{"meta:ok"}) {
541 warn "Warning: could not determine state of report";
544 LINE
: while (@rlines) {
546 while (/!$/ and @rlines) {
547 my $followupline = shift @rlines;
548 $followupline =~ s/^\s+//; # remo leading space
551 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
552 $current_headline = $previous_line[-1];
553 if ($current_headline =~ /PROGRAM OUTPUT/) {
558 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
564 if ($extract{"meta:perl"}) {
566 and !$extract{"conf:git_commit_id"}
567 and /Commit id:\s*([[:xdigit:]]+)/) {
568 $extract{"conf:git_commit_id"} = $1;
573 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
580 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
581 $r =~ s/\.0//; # 5.0 6 2!
582 $extract{"meta:perl"} = "$r.$v.$s\@$p";
583 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
585 $extract{"meta:perl"} = "$r.$v.$s";
586 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
588 $extract{"meta:perl"} = "$r.$v.$s";
590 $extract{"meta:perl"} = $p5;
594 unless ($extract{"meta:from"}) {
597 m
|<div
class="h_name">From
:</div> <b>(.+?)</b
><br
/>| :
601 $f = $1 if $f =~ m{<strong>(.+)</strong>};
602 $extract{"meta:from"} = $f;
604 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
606 unless ($extract{"meta:date"}) {
609 m
|<div
class="h_name">Date
:</div> (.+?)<br/>| :
613 $date = $1 if $date =~ m{<strong>(.+)</strong>};
615 DATEFMT
: for my $pat ("%Y-%m-%dT%TZ", # 2010-07-07T14:01:40Z
616 "%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
617 "%b %d, %Y %R", # July 10,...
618 "%b %d, %Y %R", # July 4,...
621 my $p = DateTime
::Format
::Strptime
->new
627 $p->parse_datetime($date)
632 warn "Could not parse date[$date], setting to epoch 0";
633 $dt = DateTime
->from_epoch( epoch
=> 0 );
635 $extract{"meta:date"} = $dt->datetime;
637 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
639 unless ($extract{"meta:writer"}) {
640 for ("$previous_line[-1] $_") {
642 } elsif (/CPANPLUS, version (\S+)/) {
643 $extract{"meta:writer"} = "CPANPLUS $1";
644 } elsif (/created (?:automatically )?by (\S+)/) {
645 $extract{"meta:writer"} = $1;
646 if (/\s+on\s+perl\s+([^,]+),/) {
649 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
650 $extract{"meta:writer"} = "$1 $2";
652 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
656 # we do that first three lines a bit too often
657 my $qr = $Opt{dumpvars
} || "";
658 $qr = qr/$qr/ if $qr;
661 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
664 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
666 if (/^\s+Platform:$/) {
667 $in_summary_seen_platform=1;
668 } elsif (/^\s*$/ || m
|</pre
>|) {
669 # if not html, we have reached the end now
670 if ($in_summary_seen_platform) {
671 # some perls have an empty line after the summary line
675 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
676 while (my($k,$v) = each %kv) {
680 if ($v =~ /^'(.*)'$/) {
685 if ($qr && $ck =~ $qr) {
687 } elsif ($conf_vars{$ck}) {
693 if ($in_prg_output) {
694 unless ($extract{"meta:output_from"}) {
695 if (/Output from (.+):$/) {
696 $extract{"meta:output_from"} = $1
700 if ($in_env_context) {
701 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
702 $extract{"env:$1"} = $2;
705 push @previous_line, $_;
706 if ($expect_prereq || $expect_toolchain) {
707 if (/Perl module toolchain versions installed/) {
708 # first time discovered in CPANPLUS 0.89_06
709 $expecting_toolchain_soon = 1;
713 if (exists $moduleunpack->{type
}) {
714 my($module,$v,$needwant);
715 # type 1 and 2 are about prereqs, type three about toolchain
716 if ($moduleunpack->{type
} == 1) {
717 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
719 if ($leader =~ /^-/) {
723 } elsif ($leader =~ /^(
724 buil
# build_requires:
725 |conf
# configure_requires:
728 } elsif ($module =~ /^(
733 } elsif ($moduleunpack->{type
} == 2) {
734 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl
}, $_; };
736 for ($module,$v,$needwant) {
740 if ($leader =~ /^\*/) {
745 or !defined $needwant
749 ($module,$v,$needwant) = split " ", $_;
751 } elsif ($moduleunpack->{type
} == 3) {
752 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl
}, $_; };
756 $expect_toolchain = 0;
758 } elsif ($module =~ /^-/) {
766 my($modulename,$versionlead) = split " ", $module;
767 if (defined $modulename and defined $versionlead) {
768 $module = $modulename;
769 $v = "$versionlead$v";
774 $extract{"mod:$module"} = $v;
776 $needwant =~ s/^\s+//;
777 $needwant =~ s/\s+$//;
778 $extract{"prereq:$module"} = $needwant;
782 if (/(\s+)(Module\s+)(Need\s+)Have/) {
785 tpl
=> 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
788 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
791 my $adjust_2 = -length($4);
792 my $adjust_3 = length($4);
793 # I think they do not really try to align, usually we
794 # get away with split
796 tpl
=> 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
801 if (/PREREQUISITES|Prerequisite modules loaded/) {
805 if ($expecting_toolchain_soon) {
806 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
809 $expecting_toolchain_soon=0;
811 tpl
=> 'a'.length($1).'a'.length($2).'a*',
816 if (/toolchain versions installed/) {
818 $expecting_toolchain_soon=1;
821 if (! $extract{"meta:perl"} && $fallback_p5) {
822 my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
823 $extract{"meta:perl"} = $p5;
824 $extract{"conf:git_describe"} = $patch if defined $patch;
827 if (my $filtercbbody = $Opt{filtercb
}) {
828 my $filtercb = eval('sub {'.$filtercbbody.'}');
829 $filtercb->(\
%extract);
832 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
833 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
835 my $data = $dumpvars->{"==DATA=="} ||= [];
836 push @
$data, \
%extract;
838 # ---- %extract finished ----
840 if (my $qr = $Opt{dumpvars
}) {
842 while (my($k,$v) = each %extract) {
844 $dumpvars->{$k}{$v}{$extract{"meta:ok"}}++;
849 my $have = $extract{$want} || "";
850 $diag .= " $want\[$have]";
852 printf STDERR
" %-4s %8d%s\n", $extract{"meta:ok"}, $id, $diag unless $Opt{quiet
};
854 $report =~ s/\s+\z//;
855 print STDERR
$report, "\n================\n" unless $Opt{quiet
};
857 if ($Opt{interactive
}) {
861 my $ans = IO
::Prompt
::prompt
863 -p
=> "View $id? [onechar: ynq] ",
868 print STDERR
"\n" unless $Opt{quiet
};
870 my($report) = _get_cooked_report
($target, \
%Opt);
871 $Opt{pager
} ||= "less";
872 open my $lfh, "|-", $Opt{pager
} or die "Could not fork '$Opt{pager}': $!";
874 print {$lfh} $report;
875 close $lfh or die "Could not close pager: $!"
876 } elsif ($ans eq "q") {
884 sub _get_cooked_report
{
885 my($target, $Opt_ref) = @_;
886 my($report, $isHTML);
887 if ($report = $Opt_ref->{article
}) {
888 $isHTML = $report =~ /^</;
894 } elsif (-e
$target) {
895 open $fh, '<', $target or die "Could not open '$target': $!";
896 } elsif (-e
"$target.gz") {
897 open $fh, "-|", "zcat", $target or die "Could not open '$target.gz': $!";
899 die "Could not find '$target' or '$target.gz'";
902 my $raw_report = <$fh>;
903 $isHTML = $raw_report =~ /^</;
905 if ($raw_report =~ m{^<\?.+?<html.+?<head.+?<body.+?<pre[^>]*>(.+)</pre>.*</body>.*</html>}s) {
906 $raw_report = decode_entities
($1);
911 $report = decode_entities
($raw_report);
912 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
914 _looks_like_qp
($raw_report)
916 # minimizing MIME effort; don't know about reports in other formats
917 $report = MIME
::QuotedPrint
::decode_qp
($raw_report);
919 $report = $raw_report;
923 if ($report =~ /\r\n/) {
924 my @rlines = split /\r?\n/, $report;
925 $report = join "\n", @rlines;
932 Feeds a couple of potentially interesting data to
933 Statistics::Regression and sorts the result by R^2 descending. Do not
934 confuse this with a prove, rather take it as a useful hint. It can
935 save you minutes of staring at data and provide a quick overview where
936 one should look closer. Displays the N top candidates, where N
937 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
938 Regressions results with an R^2 of 1.00 are displayed in any case.
940 The function is called when the option C<-solve> is give on the
941 commandline. Several extra config variables are calculated, see source
946 my %never_solve_on = map {($_ => 1)}
960 'env:PERL5_CPANPLUS_IS_RUNNING',
961 'env:PERL5_CPAN_IS_RUNNING',
962 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
965 my %normalize_numeric =
967 id
=> sub { return shift },
970 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
971 unless (defined $M) {
972 die "illegal value[$v] for a date";
974 Time
::Local
::timegm
($s,$m,$h,$D,$M-1,$Y);
977 my %normalize_value =
980 my($perlatpatchlevel) = shift;
981 my $perl = $perlatpatchlevel;
988 require Statistics
::Regression
;
991 if (my $ycbbody = $Opt{ycb
}) {
992 $ycb = eval('sub {'.$ycbbody.'}');
998 if ($rec->{"meta:ok"} eq "PASS") {
1000 } elsif ($rec->{"meta:ok"} eq "FAIL") {
1006 VAR
: for my $variable (sort keys %$V) {
1007 next if $variable eq "==DATA==";
1008 if ($never_solve_on{$variable}){
1009 warn "Skipping '$variable'\n" unless $Opt{quiet
};
1012 my $value_distribution = $V->{$variable};
1013 my $keys = keys %$value_distribution;
1015 if ($normalize_numeric{$variable}) {
1016 push @X, "n_$variable";
1019 for my $value (sort keys %$value_distribution) {
1020 my $pf = $value_distribution->{$value};
1023 if ($pf->{PASS
} || $pf->{FAIL
}) {
1024 my $Xele = sprintf "eq_%s",
1026 $normalize_value{$variable} ?
1027 $normalize_value{$variable}->($value) :
1030 push @X, $Xele unless $seen{$Xele}++;
1034 $pf->{PASS
} xor $pf->{FAIL
}
1037 substr($value,$vl) = "..." if length $value > 3+$vl;
1038 my $poor_mans_freehand_estimation = 0;
1039 if ($poor_mans_freehand_estimation) {
1042 "%4d %4d %-23s | %s\n",
1052 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet
};
1053 next VAR
unless @X > 1;
1059 RECORD
: for my $rec (@
{$V->{"==DATA=="}}) {
1060 my $y = $ycb->($rec);
1061 next RECORD
unless defined $y;
1064 @obs{@X} = (0) x
@X;
1067 if ($x =~ /^eq_(.+)/) {
1069 if (exists $rec->{$variable}
1070 && defined $rec->{$variable}
1073 $normalize_value{$variable} ?
1074 $normalize_value{$variable}->($rec->{$variable}) :
1077 if ($use_v eq $read_v) {
1081 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1082 } elsif ($x =~ /^n_(.+)/) {
1084 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1086 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1090 push @
{$regdata{data
}}, \
%obs;
1092 _run_regression
($variable, \
%regdata, \
@regression, \
%Opt);
1094 my $top = min
($Opt{solvetop
} || 3, scalar @regression);
1095 my $max_rsq = sum
map {1==$_->rsq ?
1 : 0} @regression;
1096 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1100 "State after regression testing: %d results, showing top %d\n\n",
1109 printf "(%d)\n", ++$score;
1110 eval { $reg->print; };
1112 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1114 last if --$top <= 0;
1119 # $variable is the name we pass through to S:R constructor
1120 # $regdata is hash and has the arrays "X" and "data" (observations)
1121 # X goes to S:R constructor
1122 # each observation has a Y which we pass to S:R in an include() call
1123 # $regression is the collector array of results
1124 # $opt are the options from outside, used to see if we are "verbose"
1125 sub _run_regression
{
1126 my($variable,$regdata,$regression,$opt) = @_;
1127 my @X = @
{$regdata->{X
}};
1128 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1129 # hold the reference
1132 my $reg = Statistics
::Regression
->new($variable,\
@X);
1133 for my $obs (@
{$regdata->{data
}}) {
1134 my $y = delete $obs->{Y
};
1135 $reg->include($y, $obs);
1139 my @e = $reg->standarderrors;
1140 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1143 if ($opt->{verbose
} && $opt->{verbose
}>=2) {
1145 warn YAML
::Syck
::Dump
1146 ({error
=>"could not determine some regression parameters",
1147 variable
=>$variable,
1154 # reduce k in case that linear dependencies disturbed us;
1155 # often called reference group; I'm tempted to collect and
1160 push @
$regression, $reg;
1172 Please report any bugs or feature requests through the web
1174 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1175 I will be notified, and then you'll automatically be notified of
1176 progress on your bug as I make changes.
1180 You can find documentation for this module with the perldoc command.
1182 perldoc CPAN::Testers::ParseReport
1185 You can also look for information at:
1189 =item * RT: CPAN's request tracker
1191 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1193 =item * AnnoCPAN: Annotated CPAN documentation
1195 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1197 =item * CPAN Ratings
1199 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1203 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1208 =head1 ACKNOWLEDGEMENTS
1210 Thanks to RJBS for module-starter.
1212 =head1 COPYRIGHT & LICENSE
1214 Copyright 2008 Andreas König.
1216 This program is free software; you can redistribute it and/or modify it
1217 under the same terms as Perl itself.
1222 1; # End of CPAN::Testers::ParseReport