1 package CPAN
::Testers
::ParseReport
;
7 use DateTime
::Format
::Strptime
;
8 use File
::Basename
qw(basename);
9 use File
::Path
qw(mkpath);
10 use HTML
::Entities
qw(decode_entities);
12 use List
::Util
qw(max min sum);
13 use List
::MoreUtils
qw(uniq);
14 use MIME
::QuotedPrint
();
18 our $default_transport = "http_cpantesters";
19 our $default_cturl = "http://static.cpantesters.org/distro";
26 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
30 use version
; our $VERSION = qv
('0.2.9');
34 The documentation in here is normally not needed because the code is
35 meant to be run from the standalone program C<ctgetreports>.
37 ctgetreports --q mod:Moose Devel-Events
41 This is the core module for CPAN::Testers::ParseReport. If you're not
42 looking to extend or alter the behaviour of this module, you probably
43 want to look at L<ctgetreports> instead.
47 Options are described in the L<ctgetreports> manpage and are passed
48 through to the functions unaltered.
52 =head2 parse_distro($distro,%options)
54 reads the cpantesters JSON file or the local database for the distro
55 and loops through the reports for the specified or most recent version
56 of that distro found in these data.
58 parse_distro() intentionally has no meaningful return value, different
59 options would require different ones.
61 =head2 $extract = parse_single_report($report,$dumpvars,%options)
63 mirrors and reads this report. $report is of the form
65 { id => <integer>, guid => <guid>, }
67 $dumpvar is a hashreference that gets filled with data.
69 $extract is the result of parse_report() described below.
77 $ua = LWP
::UserAgent
->new
90 $ua = LWP
::UserAgent
->new
96 $ua->default_header('Accept-Encoding' => scalar HTTP
::Message
::decodable
());
102 # we called it yaml because it was yaml; now it is json
104 my $j = JSON
::XS
->new->ascii->pretty;
108 open my $fh, $file or die "Could not open '$file': $!";
112 $j->decode(_slurp
shift);
119 sub _download_overview
{
120 my($cts_dir, $distro, %Opt) = @_;
121 my $cturl = $Opt{cturl
} ||= $default_cturl;
122 my $ctarget = "$cts_dir/$distro.json";
123 my $cheaders = "$cts_dir/$distro.headers";
125 unless (-e
$ctarget) {
126 die "Alert: No local file '$ctarget' found, cannot continue\n";
129 if (! -e
$ctarget or -M
$ctarget > .25) {
130 if (-e
$ctarget && $Opt{verbose
}) {
132 my $timestamp = gmtime $stat[9];
133 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
135 print STDERR
"Fetching $ctarget..." if $Opt{verbose
} && !$Opt{quiet
};
136 my $firstletter = substr($distro,0,1);
137 my $uri = "$cturl/$firstletter/$distro.json";
138 my $resp = _ua
->mirror($uri,$ctarget);
139 if ($resp->is_success) {
140 print STDERR
"DONE\n" if $Opt{verbose
} && !$Opt{quiet
};
141 open my $fh, ">", $cheaders or die;
142 for ($resp->headers->as_string) {
144 if ($Opt{verbose
} && $Opt{verbose
}>1) {
145 print STDERR
$_ unless $Opt{quiet
};
148 } elsif (304 == $resp->code) {
149 print STDERR
"DONE (not modified)\n" if $Opt{verbose
} && !$Opt{quiet
};
150 my $atime = my $mtime = time;
151 utime $atime, $mtime, $cheaders;
155 "No success downloading %s: %s",
166 my($ctarget, %Opt) = @_;
167 my $arr = _yaml_loadfile
($ctarget);
168 my($selected_release_ul,$selected_release_distrov,$excuse_string);
170 $excuse_string = "selected distro '$Opt{vdistro}'";
171 $arr = [grep {$_->{distversion
} eq $Opt{vdistro
}} @
$arr];
172 ($selected_release_distrov) = $arr->[0]{distversion
};
174 $excuse_string = "any distro";
177 for my $report (sort { $a->{id
} <=> $b->{id
} } @
$arr) {
178 unless ($seen{$report->{distversion
}}++) {
179 $last_addition = $report->{distversion
};
182 $arr = [grep {$_->{distversion
} eq $last_addition} @
$arr];
183 ($selected_release_distrov) = $last_addition;
185 unless ($selected_release_distrov) {
186 warn "Warning: could not find $excuse_string in '$ctarget'";
189 print STDERR
"SELECTED: $selected_release_distrov\n" unless $Opt{quiet
};
191 for my $test (@
$arr) {
192 my $id = $test->{id
};
195 guid
=> $test->{guid
},
199 @all = sort { $b->{id
} <=> $a->{id
} } @all;
203 sub parse_single_report
{
204 my($report, $dumpvars, %Opt) = @_;
205 my($id) = $report->{id
};
206 my($guid) = $report->{guid
};
207 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
208 # the name nntp-testers was picked because originally the reports
209 # were available from an NNTP server
210 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
212 my $target = "$nnt_dir/$id";
214 unless (-e
$target) {
215 die {severity
=>0,text
=>"Warning: No local file '$target' found, skipping\n"};
218 $Opt{transport
} ||= $default_transport;
222 } elsif (-e
"$target.gz") {
223 $ttarget = "$target.gz";
227 open my $fh, $ttarget or die "Could not open '$ttarget': $!";
229 } elsif ($Opt{transport
} eq "http_cpantesters") {
232 } elsif ($Opt{transport
} eq "http_cpantesters_gzip") {
233 my $gz = Compress
::Zlib
::gzopen
($fh, "rb");
236 while (my $bytesread = $gz->gzread($buffer)) {
237 $raw_report .= $buffer;
240 if ($raw_report =~ m{<title>.*(Report not found|Error).*</title>}) {
241 unlink $ttarget or die "Could not unlink '$ttarget': $!";
245 print STDERR
"Fetching $target..." if $Opt{verbose
} && !$Opt{quiet
};
247 } elsif ($Opt{transport
} eq "http_cpantesters") {
249 if ($Opt{"prefer-local-reports"}) {
250 unless (-e
$target) {
257 my $resp = _ua
->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1",$target);
258 if ($resp->is_success) {
260 my(@stat) = stat $target;
261 my $timestamp = gmtime $stat[9];
262 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
263 if ($Opt{verbose
} > 1) {
264 print STDERR
$resp->headers->as_string unless $Opt{quiet
};
267 my $headers = "$target.headers";
268 open my $fh, ">", $headers or die {severity
=>1,text
=>"Could not open >$headers: $!"};
269 print $fh $resp->headers->as_string;
272 text
=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
275 } elsif ($Opt{transport
} eq "http_cpantesters_gzip") {
277 if ($Opt{"prefer-local-reports"}) {
278 unless (-e
"$target.gz") {
285 my $resp = _ua_gzip
->mirror("http://www.cpantesters.org/cpan/report/$guid?raw=1","$target.gz");
286 if ($resp->is_success) {
288 my(@stat) = stat "$target.gz";
289 my $timestamp = gmtime $stat[9];
290 print STDERR
"(timestamp $timestamp GMT)\n" unless $Opt{quiet
};
291 if ($Opt{verbose
} > 1) {
292 print STDERR
$resp->headers->as_string unless $Opt{quiet
};
295 my $headers = "$target.headers";
296 open my $fh, ">", $headers or die {severity
=>1,text
=>"Could not open >$headers: $!"};
297 print $fh $resp->headers->as_string;
300 text
=>sprintf "HTTP Server Error[%s] for id[%s] guid[%s]", $resp->status_line, $id, $guid};
304 die {severity
=>1,text
=>"Illegal value for --transport: '$Opt{transport}'"};
308 parse_report
($target, $dumpvars, %Opt);
312 my($distro,%Opt) = @_;
314 $Opt{cachedir
} ||= "$ENV{HOME}/var/cpantesters";
315 # the name cpantesters-show was picked because originally
316 # http://www.cpantesters.org/show/ contained html file that we had
318 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
321 require Statistics
::Regression
;
322 $Opt{dumpvars
} = "." unless defined $Opt{dumpvars
};
324 if (!$Opt{vdistro
} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
325 $Opt{vdistro
} = $distro;
329 if (my $ctdb = $Opt{ctdb
}) {
330 require CPAN
::WWW
::Testers
::Generator
::Database
;
331 require CPAN
::DistnameInfo
;
332 my $dbi = CPAN
::WWW
::Testers
::Generator
::Database
->new(database
=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
333 unless ($Opt{vdistro
}) {
334 my $sql = "select version from cpanstats where dist=? order by id";
335 my @rows = $dbi->get_query($sql,$distro);
337 for my $row (@rows) {
338 $newest = $row->[0] unless $seen{$row->[0]}++;
340 $Opt{vdistro
} = "$distro-$newest";
342 my $d = CPAN
::DistnameInfo
->new("FOO/$Opt{vdistro}.tgz");
344 my $version = $d->version;
345 my $sql = "select id, guid from cpanstats where dist=? and version=? order by id desc";
346 my @rows = $dbi->get_query($sql,$dist,$version);
348 for my $row (@rows) {
356 my $ctarget = _download_overview
($cts_dir, $distro, %Opt);
357 $reports = _parse_yaml
($ctarget,%Opt);
359 return unless $reports;
361 my $samplesize = $Opt{sample
} || 0;
362 $samplesize = 0 if $samplesize && $samplesize >= @
$reports;
366 REPORT
: for my $report (@
$reports) {
369 my $need = $samplesize - $sampled;
370 next REPORT
unless $need;
371 my $left = @
$reports - $i;
372 # warn sprintf "tot %d i %d sampled %d need %d left %d\n", scalar @$reports, $i, $sampled, $need, $left;
373 my $want_this = (rand(1) <= ($need/$left));
374 next REPORT
unless $want_this;
376 eval {parse_single_report
($report, \
%dumpvars, %Opt)};
379 if ($@
->{severity
}) {
390 last REPEATER
if $Signal;
393 PASSFAIL
: for my $pf ("pass","fail") {
394 my $minx = $Opt{"min".$pf} or next PASSFAIL
;
395 my $x = $dumpvars{"meta:ok"}{uc $pf}{uc $pf};
397 # bump samplesize, remove already sampled reports from array, redo
398 my $bump = int($samplesize * 0.05)+1;
399 $samplesize += $bump;
400 for my $k (sort {$b <=> $a} keys %taken) {
401 splice @
$reports, $k, 1;
408 if ($Opt{dumpvars
}) {
409 my $dumpfile = $Opt{dumpfile
} || "ctgetreports.out";
410 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
411 print $fh _yaml_dump
(\
%dumpvars);
412 close $fh or die "Could not close '$dumpfile': $!"
415 solve
(\
%dumpvars,%Opt);
419 =head2 $bool = _looks_like_qp($raw_report)
421 We had to acknowledge the fact that some MTAs swallow the MIME-Version
422 header while passing MIME through. So we introduce fallback heuristics
423 that try to determine if a report is written in quoted printable.
425 Note that this subroutine is internal, just documented to have the
426 internals documented.
428 The current implementation counts the number of QP escaped spaces and
435 my $count_space = () = $report =~ /=20/g;
436 return 1 if $count_space > 12;
437 my $count_equal = () = $report =~ /=3D/g;
438 return 1 if $count_equal > 12;
439 return 1 if $count_space+$count_equal > 24;
440 return 0; # waiting for a counter example
443 =head2 $extract = parse_report($target,$dumpvars,%Opt)
445 Reads one report. $target is the local filename to read. $dumpvars is
446 a hashref which gets filled with descriptive stats about
447 PASS/FAIL/etc. %Opt are the options as described in the
448 C<ctgetreports> manpage. $extract is a hashref containing the found
451 Note: this parsing is a bit dirty but as it seems good enough I'm not
452 inclined to change it. We parse HTML with regexps only, not an HTML
453 parser. Only the entities are decoded.
457 article => $some_full_article_as_scalar
459 to use this function to parse one full article as text. When this is
460 given, the argument $target is not read, but its basename is taken to
461 be the id of the article. (OMG, hackers!)
465 my($target,$dumpvars,%Opt) = @_;
467 my $id = basename
($target);
468 # warn "DEBUG: id[$id]";
473 my($report,$isHTML) = _get_cooked_report
($target, \
%Opt);
474 my @qr = map /^qr:(.+)/, @
{$Opt{q
}};
475 if ($Opt{raw
} || @qr) {
477 my $cqr = eval "qr{$qr}";
478 die "Could not compile regular expression '$qr': $@" if $@
;
479 my(@matches) = $report =~ $cqr;
485 $v = join "", map {"($_)"} @matches;
490 $extract{"qr:$qr"} = $v;
495 my $moduleunpack = {};
496 my $expect_prereq = 0;
497 my $expect_toolchain = 0;
498 my $expecting_toolchain_soon = 0;
499 my $fallback_p5 = "";
502 my $in_summary_seen_platform = 0;
503 my $in_prg_output = 0;
504 my $in_env_context = 0;
505 my $in_test_summary = 0;
507 my $current_headline;
508 my @previous_line = ""; # so we can neutralize line breaks
509 my @rlines = split /\r?\n/, $report;
510 LINE
: for (@rlines) {
511 next LINE
unless ($isHTML ?
m/<title>((\S+)\s+(\S+))/ : m/^Subject:\s*((\S+)\s+(\S+))/);
513 $s = $1 if $s =~ m{<strong>(.+)};
514 if ($s =~ /(\S+)\s+(\S+)/) {
518 $extract{"meta:ok"} = $ok;
519 $extract{"meta:about"} = $about;
522 unless ($extract{"meta:about"}) {
523 $extract{"meta:about"} = $Opt{vdistro
};
524 unless ($extract{"meta:ok"}) {
525 warn "Warning: could not determine state of report";
528 LINE
: while (@rlines) {
530 while (/!$/ and @rlines) {
531 my $followupline = shift @rlines;
532 $followupline =~ s/^\s+//; # remo leading space
536 if ($previous_line[-2] && $previous_line[-2] =~ /^--------/) {
537 $current_headline = $previous_line[-1];
538 if ($current_headline =~ /PROGRAM OUTPUT/) {
543 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
548 } elsif ($previous_line[-1] && $previous_line[-1] =~ /Test Summary Report/) {
549 $in_test_summary = 1;
553 if ($extract{"meta:perl"}) {
555 and !$extract{"conf:git_commit_id"}
556 and /Commit id:\s*([[:xdigit:]]+)/) {
557 $extract{"conf:git_commit_id"} = $1;
562 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
569 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
570 $r =~ s/\.0//; # 5.0 6 2!
571 $extract{"meta:perl"} = "$r.$v.$s\@$p";
572 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
574 $extract{"meta:perl"} = "$r.$v.$s";
575 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
577 $extract{"meta:perl"} = "$r.$v.$s";
579 $extract{"meta:perl"} = $p5;
583 unless ($extract{"meta:from"}) {
586 m
|<div
class="h_name">From
:</div> <b>(.+?)</b
><br
/>| :
590 $f = $1 if $f =~ m{<strong>(.+)</strong>};
591 $extract{"meta:from"} = $f;
593 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
595 unless ($extract{"meta:date"}) {
598 m
|<div
class="h_name">Date
:</div> (.+?)<br/>| :
602 $date = $1 if $date =~ m{<strong>(.+)</strong>};
604 DATEFMT
: for my $pat ("%Y-%m-%dT%TZ", # 2010-07-07T14:01:40Z
605 "%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
606 "%b %d, %Y %R", # July 10,...
607 "%b %d, %Y %R", # July 4,...
610 my $p = DateTime
::Format
::Strptime
->new
616 $p->parse_datetime($date)
621 warn "Could not parse date[$date], setting to epoch 0";
622 $dt = DateTime
->from_epoch( epoch
=> 0 );
624 $extract{"meta:date"} = $dt->datetime;
626 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
628 unless ($extract{"meta:writer"}) {
629 for ("$previous_line[-1] $_") {
631 } elsif (/CPANPLUS, version (\S+)/) {
632 $extract{"meta:writer"} = "CPANPLUS $1";
633 } elsif (/created (?:automatically )?by (\S+)/) {
634 $extract{"meta:writer"} = $1;
635 if (/\s+on\s+perl\s+([^,]+),/) {
638 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
639 $extract{"meta:writer"} = "$1 $2";
641 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
645 # we do that first three lines a bit too often
646 my $qr = $Opt{dumpvars
} || "";
647 $qr = qr/$qr/ if $qr;
650 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
653 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
655 if (/^\s+Platform:$/) {
656 $in_summary_seen_platform=1;
657 } elsif (/^\s*$/ || m
|</pre
>|) {
658 # if not html, we have reached the end now
659 if ($in_summary_seen_platform) {
660 # some perls have an empty line after the summary line
664 my(%kv) = m
!\G
,?\s
*([^=]+)= # left hand side and equal sign
666 [^',\s]+(?=.+=) # use64bitint=define use64bitall=define uselongdouble=undef
667 # (lookahead needed for left-over equal sign)
669 [^',]+$ # libpth=/usr/lib /usr/local/lib
671 '[^']+?
' # cccdlflags='-DPIC
-fPIC
'
673 \S+ # useshrplib=false
675 while (my($k,$v) = each %kv) {
679 if ($v =~ /^'(.*)'$/) {
684 if ($qr && $ck =~ $qr) {
686 } elsif ($conf_vars{$ck}) {
692 if ($in_prg_output) {
693 unless ($extract{"meta:output_from"}) {
694 if (/Output from (.+):$/) {
695 $extract{"meta:output_from"} = $1
699 if ($in_env_context) {
700 if ($extract{"meta:writer"} =~ /^CPANPLUS\b/
702 exists $extract{"env:PERL5_CPANPLUS_IS_VERSION"}
705 s/Perl:\s+\$\^X/\$^X/
707 s/EUID:\s+\$>/\$EUID/
711 s/EGID:\s+\$\)/\$EGID/
716 if (my($left,$right) = /^\s{4}(\S+)\s*=\s*(.*)$/) {
717 if ($left eq '$UID/$EUID') {
718 my($uid,$euid) = split m{\s*/\s*}, $right;
719 $extract{'env
:$UID'} = $uid;
720 $extract{'env
:$EUID'} = $euid;
721 } elsif ($left =~ /GID/) {
722 for my $xgid (uniq split " ", $right) {
723 $extract{"env:$left∋$xgid"} = "true";
726 $extract{"env:$left"} = $right;
730 if ($in_test_summary) {
731 if (/^(?:Result:|Files=\d)/) {
732 $in_test_summary = 0;
733 } elsif (/^(\S+)\s+\(Wstat:.+?Tests:.+?Failed:\s*(\d+)\)$/) {
734 my $in_test_summary_current_test = $1; # t/globtest.t or t\globtest.t
735 my $in_test_summary_current_failed = $2;
736 $in_test_summary_current_test =~ s|\\|/|g; # only t/globtest.t
737 $extract{"fail:$in_test_summary_current_test"} = $in_test_summary_current_failed;
738 } elsif (/^\s+Failed tests?:/) {
739 # ignoring the exact combination of tests for now, seems like overkill
742 push @previous_line, $_;
743 if ($expect_prereq || $expect_toolchain) {
744 if (/Perl module toolchain versions installed/) {
745 # first time discovered in CPANPLUS 0.89_06
746 $expecting_toolchain_soon = 1;
750 if (exists $moduleunpack->{type}) {
751 my($module,$v,$needwant);
752 # type 1 and 2 are about prereqs, type three about toolchain
753 if ($moduleunpack->{type} == 1) {
754 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
756 if ($leader =~ /^-/) {
760 } elsif ($leader =~ /^(
761 buil # build_requires:
762 |conf # configure_requires:
765 } elsif ($module =~ /^(
770 } elsif ($moduleunpack->{type} == 2) {
771 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
773 for ($module,$v,$needwant) {
777 if ($leader =~ /^\*/) {
782 or !defined $needwant
786 ($module,$v,$needwant) = split " ", $_;
788 } elsif ($moduleunpack->{type} == 3) {
789 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
793 $expect_toolchain = 0;
795 } elsif ($module =~ /^-/) {
803 my($modulename,$versionlead) = split " ", $module;
804 if (defined $modulename and defined $versionlead) {
805 $module = $modulename;
806 $v = "$versionlead$v";
811 $extract{"mod:$module"} = $v;
812 if (defined $needwant) {
813 $needwant =~ s/^\s+//;
814 $needwant =~ s/\s+$//;
815 $extract{"prereq:$module"} = $needwant;
819 if (/(\s+)(Module\s+)(Need\s+)Have/) {
822 tpl => 'a
'.length($1).'a
'.length($2).'a
'.length($3).'a
*',
825 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
828 my $adjust_2 = -length($4);
829 my $adjust_3 = length($4);
830 # I think they do not really try to align, usually we
831 # get away with split
833 tpl => 'a
'.length($1).'a
'.(length($2)+$adjust_2).'a
'.(length($3)+$adjust_3).'a
*',
838 if (/PREREQUISITES|Prerequisite modules loaded/) {
842 if ($expecting_toolchain_soon) {
843 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
846 $expecting_toolchain_soon=0;
848 tpl => 'a
'.length($1).'a
'.length($2).'a
*',
853 if (/toolchain versions installed/) {
855 $expecting_toolchain_soon=1;
858 if (! $extract{"mod:CPANPLUS"} && $extract{"meta:writer"} =~ /^CPANPLUS\s(\d+(\.\d+))$/) {
859 $extract{"mod:CPANPLUS"} = $1;
861 if (! $extract{"meta:perl"} && $fallback_p5) {
862 my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
863 $extract{"meta:perl"} = $p5;
864 $extract{"conf:git_describe"} = $patch if defined $patch;
867 if (my $filtercbbody = $Opt{filtercb}) {
868 my $filtercb = eval('sub {'.$filtercbbody.'}');
869 $filtercb->(\%extract);
872 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
873 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
875 if ($extract{"meta:perl"} && $extract{"conf:osname"}) {
876 $extract{"meta:osname+perl"} = join " ", @extract{"conf:osname","meta:perl"};
878 my $data = $dumpvars->{"==DATA=="} ||= [];
879 push @$data, \%extract;
881 # ---- %extract finished ----
883 if (my $qr = $Opt{dumpvars}) {
885 while (my($k,$v) = each %extract) {
887 $dumpvars->{$k}{$v}{$extract{"meta:ok"}}++;
892 my $have = $extract{$want} || "";
893 $diag .= " $want\[$have]";
895 printf STDERR " %-4s %8d%s\n", $extract{"meta:ok"}, $id, $diag unless $Opt{quiet};
897 $report =~ s/\s+\z//;
898 print STDERR $report, "\n================\n" unless $Opt{quiet};
900 if ($Opt{interactive}) {
901 eval { require IO::Prompt; 1; } or
902 die "Option '--interactive
' requires IO::Prompt installed";
905 my $ans = IO::Prompt::prompt
907 -p => "View $id? [onechar: ynq] ",
912 print STDERR "\n" unless $Opt{quiet};
914 my($report) = _get_cooked_report($target, \%Opt);
915 $Opt{pager} ||= "less";
916 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager
}': $!";
918 print {$lfh} $report;
919 close $lfh or die "Could not close pager: $!"
920 } elsif ($ans eq "q") {
928 sub _get_cooked_report {
929 my($target, $Opt_ref) = @_;
930 my($report, $isHTML);
931 if ($report = $Opt_ref->{article}) {
932 $isHTML = $report =~ /^</;
939 } elsif (-e $target) {
940 open my $fh, '<', $target or die "Could not open '$target': $!";
942 } elsif (-e "$target.gz") {
943 open my $fh, "<", "$target.gz" or die "Could not open '$target.gz
': $!";
945 # Opens a gzip (.gz) file for reading or writing. The mode parameter
946 # is as in fopen ("rb" or "wb") but can also include a compression level
947 # ("wb9") or a strategy: 'f
' for filtered data as in "wb6f", 'h
' for
948 # Huffman only compression as in "wb1h", or 'R
' for run-length encoding
949 # as in "wb1R". (See the description of deflateInit2 for more information
950 # about the strategy parameter.)
952 my $gz = Compress::Zlib::gzopen($fh, "rb");
955 while (my $bytesread = $gz->gzread($buffer)) {
956 $raw_report .= $buffer;
959 die "Could not find '$target' or '$target.gz
'";
961 $isHTML = $raw_report =~ /^</;
963 if ($raw_report =~ m{^<\?.+?<html.+?<head.+?<body.+?<pre[^>]*>(.+)</pre>.*</body>.*</html>}s) {
964 $raw_report = decode_entities($1);
969 $report = decode_entities($raw_report);
970 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
972 _looks_like_qp($raw_report)
974 # minimizing MIME effort; don't know about reports
in other formats
975 $report = MIME
::QuotedPrint
::decode_qp
($raw_report);
977 $report = $raw_report;
980 if ($report =~ /\r\n/) {
981 my @rlines = split /\r?\n/, $report;
982 $report = join "\n", @rlines;
989 Feeds a couple of potentially interesting data to
990 Statistics::Regression and sorts the result by R^2 descending. Do not
991 confuse this with a prove, rather take it as a useful hint. It can
992 save you minutes of staring at data and provide a quick overview where
993 one should look closer. Displays the N top candidates, where N
994 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
995 Regressions results with an R^2 of 1.00 are displayed in any case.
997 The function is called when the option C<-solve> is give on the
998 commandline. Several extra config variables are calculated, see source
1003 my %never_solve_on = map {($_ => 1)}
1014 'env:PERL5_CPANPLUS_IS_RUNNING',
1015 'env:PERL5_CPAN_IS_RUNNING',
1016 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
1019 my %normalize_numeric =
1021 id
=> sub { return shift },
1023 # here we were treating date as numeric; current
1024 # implementation requires to decide for one normalization, so
1025 # we decided 2012-02 for a sampling focussing on recentness;
1027 #'meta:date' => sub {
1029 # my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1030 # unless (defined $M) {
1031 # die "illegal value[$v] for a date";
1033 # Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
1036 my %normalize_value =
1038 'meta:perl' => sub {
1039 my($perlatpatchlevel) = shift;
1040 my $perl = $perlatpatchlevel;
1044 'meta:date' => sub {
1046 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
1047 unless (defined $M) {
1048 die "illegal value[$v] for a date";
1050 my $epoch = Time
::Local
::timegm
($s,$m,$h,$D,$M-1,$Y);
1051 my $Y_epoch = time - 2*365.25*86400;
1053 if ($epoch < $Y_epoch) {
1056 my @gmtime = gmtime $Y_epoch; $gmtime[5] += 1900;
1057 if ($Y == $gmtime[5]) {
1060 my $M_epoch = time - 9*7*86400;
1061 if ($epoch < $M_epoch) {
1064 my @gmtime = gmtime $M_epoch; $gmtime[5] += 1900; $gmtime[4]++;
1065 if ($Y == $gmtime[5] && $M == $gmtime[4]) {
1078 require Statistics
::Regression
;
1081 if (my $ycbbody = $Opt{ycb
}) {
1082 $ycb = eval('sub {'.$ycbbody.'}');
1088 if ($rec->{"meta:ok"} eq "PASS") {
1090 } elsif ($rec->{"meta:ok"} eq "FAIL") {
1096 VAR
: for my $variable (sort keys %$V) {
1097 next if $variable eq "==DATA==";
1098 if ($never_solve_on{$variable}){
1099 warn "Skipping '$variable'\n" unless $Opt{quiet
};
1102 my $value_distribution = $V->{$variable};
1103 my $keys = keys %$value_distribution;
1105 if ($normalize_numeric{$variable}) {
1106 push @X, "n_$variable";
1109 for my $value (sort keys %$value_distribution) {
1110 my $pf = $value_distribution->{$value};
1113 if ($pf->{PASS
} || $pf->{FAIL
}) {
1114 my $Xele = sprintf "eq_%s",
1116 $normalize_value{$variable} ?
1117 $normalize_value{$variable}->($value) :
1120 push @X, $Xele unless $seen{$Xele}++;
1124 $pf->{PASS
} xor $pf->{FAIL
}
1127 substr($value,$vl) = "..." if length $value > 3+$vl;
1128 my $poor_mans_freehand_estimation = 0;
1129 if ($poor_mans_freehand_estimation) {
1132 "%4d %4d %-23s | %s\n",
1142 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet
};
1143 next VAR
unless @X > 1;
1149 RECORD
: for my $rec (@
{$V->{"==DATA=="}}) {
1150 my $y = $ycb->($rec);
1151 next RECORD
unless defined $y;
1154 @obs{@X} = (0) x
@X;
1157 if ($x =~ /^eq_(.+)/) {
1159 if (exists $rec->{$variable}
1160 && defined $rec->{$variable}
1163 $normalize_value{$variable} ?
1164 $normalize_value{$variable}->($rec->{$variable}) :
1167 if ($use_v eq $read_v) {
1171 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1172 } elsif ($x =~ /^n_(.+)/) {
1174 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1176 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1180 push @
{$regdata{data
}}, \
%obs;
1182 _run_regression
($variable, \
%regdata, \
@regression, \
%Opt);
1184 my $top = min
($Opt{solvetop
} || 3, scalar @regression);
1185 my $max_rsq = sum
map {1==$_->rsq ?
1 : 0} @regression;
1186 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1190 "State after regression testing: %d results, showing top %d\n\n",
1199 printf "(%d)\n", ++$score;
1200 eval { $reg->print; };
1202 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1204 last if --$top <= 0;
1209 # $variable is the name we pass through to S:R constructor
1210 # $regdata is hash and has the arrays "X" and "data" (observations)
1211 # X goes to S:R constructor
1212 # each observation has a Y which we pass to S:R in an include() call
1213 # $regression is the collector array of results
1214 # $opt are the options from outside, used to see if we are "verbose"
1215 sub _run_regression
{
1216 my($variable,$regdata,$regression,$opt) = @_;
1217 my @X = @
{$regdata->{X
}};
1218 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1219 # hold the reference
1222 my $reg = Statistics
::Regression
->new($variable,\
@X);
1223 for my $obs (@
{$regdata->{data
}}) {
1224 my $y = delete $obs->{Y
};
1225 $reg->include($y, $obs);
1229 my @e = $reg->standarderrors;
1230 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1233 if ($opt->{verbose
} && $opt->{verbose
}>=2) {
1235 ({error
=>"could not determine some regression parameters",
1236 variable
=>$variable,
1243 # reduce k in case that linear dependencies disturbed us;
1244 # often called reference group; I'm tempted to collect and
1249 push @
$regression, $reg;
1261 Please report any bugs or feature requests through the web
1263 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1264 I will be notified, and then you'll automatically be notified of
1265 progress on your bug as I make changes.
1269 You can find documentation for this module with the perldoc command.
1271 perldoc CPAN::Testers::ParseReport
1274 You can also look for information at:
1278 =item * RT: CPAN's request tracker
1280 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1282 =item * AnnoCPAN: Annotated CPAN documentation
1284 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1286 =item * CPAN Ratings
1288 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1292 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1297 =head1 ACKNOWLEDGEMENTS
1299 Thanks to RJBS for module-starter.
1301 =head1 COPYRIGHT & LICENSE
1303 Copyright 2008,2009,2010,2011,2012,2013 Andreas König.
1305 This program is free software; you can redistribute it and/or modify it
1306 under the same terms as Perl itself.
1311 1; # End of CPAN::Testers::ParseReport