take perl version from first para if summary is lost
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
blobfa17619dd7fbd11f5da1940032e9e5fca08335cc
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use Config::Perl::V ();
7 use DateTime::Format::Strptime;
8 use File::Basename qw(basename);
9 use File::Path qw(mkpath);
10 use HTML::Entities qw(decode_entities);
11 use LWP::UserAgent;
12 use List::Util qw(max min sum);
13 use MIME::QuotedPrint ();
14 use Net::NNTP ();
15 use Time::Local ();
16 use XML::LibXML;
17 use XML::LibXML::XPathContext;
19 our $default_ctformat = "yaml";
20 our $default_transport = "nntp";
21 our $default_cturl = "http://www.cpantesters.org/show";
22 our $Signal = 0;
24 =encoding utf-8
26 =head1 NAME
28 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
30 =cut
32 use version; our $VERSION = qv('0.1.10');
34 =head1 SYNOPSIS
36 The documentation in here is normally not needed because the code is
37 meant to be run from a standalone program, L<ctgetreports>.
39 ctgetreports --q mod:Moose Devel-Events
41 =head1 DESCRIPTION
43 This is the core module for CPAN::Testers::ParseReport. If you're not
44 looking to extend or alter the behaviour of this module, you probably
45 want to look at L<ctgetreports> instead.
47 =head1 OPTIONS
49 Are described in the L<ctgetreports> manpage and are passed through to
50 the functions unaltered.
52 =head1 FUNCTIONS
54 =head2 parse_distro($distro,%options)
56 reads the cpantesters HTML page or the YAML file or the local database
57 for the distro and loops through the reports for the specified or most
58 recent version of that distro found in these data.
60 parse_distro() intentionally has no meaningful return value, different
61 options would require different ones.
63 =head2 $extract = parse_single_report($report,$dumpvars,%options)
65 mirrors and reads this report. $report is of the form
67 { id => number }
69 $dumpvar is a hashreference that gets filled with data.
71 $extract is the result of parse_report() described below.
73 =cut
76 my $ua;
77 sub _ua {
78 return $ua if $ua;
79 $ua = LWP::UserAgent->new
81 keep_alive => 1,
82 env_proxy => 1,
84 $ua->parse_head(0);
85 $ua;
90 my $nntp;
91 sub _nntp {
92 return $nntp if $nntp;
93 $nntp = Net::NNTP->new("nntp.perl.org");
94 $nntp->group("perl.cpan.testers");
95 return $nntp;
100 my $xp;
101 sub _xp {
102 return $xp if $xp;
103 $xp = XML::LibXML->new;
104 $xp->keep_blanks(0);
105 $xp->clean_namespaces(1);
106 my $catalog = __FILE__;
107 $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
108 $xp->load_catalog($catalog);
109 return $xp;
113 sub _download_overview {
114 my($cts_dir, $distro, %Opt) = @_;
115 my $format = $Opt{ctformat} ||= $default_ctformat;
116 my $cturl = $Opt{cturl} ||= $default_cturl;
117 my $ctarget = "$cts_dir/$distro.$format";
118 my $cheaders = "$cts_dir/$distro.headers";
119 if ($Opt{local}) {
120 unless (-e $ctarget) {
121 die "Alert: No local file '$ctarget' found, cannot continue\n";
123 } else {
124 if (! -e $ctarget or -M $ctarget > .25) {
125 if (-e $ctarget && $Opt{verbose}) {
126 my(@stat) = stat _;
127 my $timestamp = gmtime $stat[9];
128 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
130 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
131 my $uri = "$cturl/$distro.$format";
132 my $resp = _ua->mirror($uri,$ctarget);
133 if ($resp->is_success) {
134 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
135 open my $fh, ">", $cheaders or die;
136 for ($resp->headers->as_string) {
137 print $fh $_;
138 if ($Opt{verbose} && $Opt{verbose}>1) {
139 print STDERR $_ unless $Opt{quiet};
142 } elsif (304 == $resp->code) {
143 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
144 my $atime = my $mtime = time;
145 utime $atime, $mtime, $cheaders;
146 } else {
147 die sprintf
149 "No success downloading %s: %s",
150 $uri,
151 $resp->status_line,
156 return $ctarget;
159 sub _parse_html {
160 my($ctarget, %Opt) = @_;
161 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
162 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
163 if ($preprocesswithtreebuilder) {
164 require HTML::TreeBuilder;
165 my $tree = HTML::TreeBuilder->new;
166 $tree->implicit_tags(1);
167 $tree->p_strict(1);
168 $tree->ignore_ignorable_whitespace(0);
169 $tree->parse_content($content);
170 $tree->eof;
171 $content = $tree->as_XML;
173 my $parser = _xp();
174 my $doc = eval { $parser->parse_string($content) };
175 my $err = $@;
176 unless ($doc) {
177 my $distro = basename $ctarget;
178 die sprintf "Error while parsing %s\: %s", $distro, $err;
180 my $xc = XML::LibXML::XPathContext->new($doc);
181 my $nsu = $doc->documentElement->namespaceURI;
182 $xc->registerNs('x', $nsu) if $nsu;
183 my($selected_release_ul,$selected_release_distrov,$excuse_string);
184 my($cparentdiv)
185 = $nsu ?
186 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
187 $doc->findnodes("/html/body/div[\@id = 'doc']");
188 my(@releasedivs) = $nsu ?
189 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
190 $cparentdiv->findnodes("//div[h2 and ul]");
191 my $releasediv;
192 if ($Opt{vdistro}) {
193 $excuse_string = "selected distro '$Opt{vdistro}'";
194 my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
195 $fallbacktoversion = 0 unless defined $fallbacktoversion;
196 RELEASE: for my $i (0..$#releasedivs) {
197 my $picked = "";
198 my($x) = $nsu ?
199 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
200 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
201 if ($x) {
202 if ($x eq $Opt{vdistro}) {
203 $releasediv = $i;
204 $picked = " (picked)";
206 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
207 } else {
208 ($x) = $nsu ?
209 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
210 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
211 if ($x eq $fallbacktoversion) {
212 $releasediv = $i;
213 $picked = " (picked)";
215 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
218 } else {
219 $excuse_string = "any distro";
221 unless (defined $releasediv) {
222 $releasediv = 0;
224 # using a[1] because a[2] is missing on the first entry
225 ($selected_release_distrov) = $nsu ?
226 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
227 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
228 ($selected_release_ul) = $nsu ?
229 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
230 $releasedivs[$releasediv]->findnodes("ul");
231 unless (defined $selected_release_distrov) {
232 warn "Warning: could not find $excuse_string in '$ctarget'";
233 return;
235 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
236 my($id);
237 my @all;
238 for my $test ($nsu ?
239 $xc->findnodes("x:li",$selected_release_ul) :
240 $selected_release_ul->findnodes("li")) {
241 $id = $nsu ?
242 $xc->findvalue("x:a[1]/text()",$test) :
243 $test->findvalue("a[1]/text()");
244 push @all, {id=>$id};
245 return if $Signal;
247 return \@all;
250 sub _parse_yaml {
251 my($ctarget, %Opt) = @_;
252 require YAML::Syck;
253 my $arr = YAML::Syck::LoadFile($ctarget);
254 my($selected_release_ul,$selected_release_distrov,$excuse_string);
255 if ($Opt{vdistro}) {
256 $excuse_string = "selected distro '$Opt{vdistro}'";
257 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
258 ($selected_release_distrov) = $arr->[0]{distversion};
259 } else {
260 $excuse_string = "any distro";
261 my $last_addition;
262 my %seen;
263 for my $report (sort { $a->{id} <=> $b->{id} } @$arr) {
264 unless ($seen{$report->{distversion}}++) {
265 $last_addition = $report->{distversion};
268 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
269 ($selected_release_distrov) = $last_addition;
271 unless ($selected_release_distrov) {
272 warn "Warning: could not find $excuse_string in '$ctarget'";
273 return;
275 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
276 my @all;
277 for my $test (@$arr) {
278 my $id = $test->{id};
279 push @all, {id=>$id};
280 return if $Signal;
282 @all = sort { $b->{id} <=> $a->{id} } @all;
283 return \@all;
286 sub parse_single_report {
287 my($report, $dumpvars, %Opt) = @_;
288 my($id) = $report->{id};
289 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
290 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
291 mkpath $nnt_dir;
292 my $target = "$nnt_dir/$id";
293 if ($Opt{local}) {
294 unless (-e $target) {
295 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
297 } else {
298 if (! -e $target) {
299 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
300 $Opt{transport} ||= $default_transport;
301 if ($Opt{transport} eq "nntp") {
302 my $article = _nntp->article($id);
303 unless ($article) {
304 die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
306 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
307 print $fh @$article;
308 } elsif ($Opt{transport} eq "http") {
309 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
310 if ($resp->is_success) {
311 if ($Opt{verbose}) {
312 my(@stat) = stat $target;
313 my $timestamp = gmtime $stat[9];
314 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
315 if ($Opt{verbose} > 1) {
316 print STDERR $resp->headers->as_string unless $Opt{quiet};
319 my $headers = "$target.headers";
320 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
321 print $fh $resp->headers->as_string;
322 } else {
323 die {severity=>0,
324 text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
326 } else {
327 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
331 parse_report($target, $dumpvars, %Opt);
334 sub parse_distro {
335 my($distro,%Opt) = @_;
336 my %dumpvars;
337 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
338 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
339 mkpath $cts_dir;
340 if ($Opt{solve}) {
341 require Statistics::Regression;
342 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
344 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
345 $Opt{vdistro} = $distro;
346 $distro = $1;
348 my $reports;
349 if (my $ctdb = $Opt{ctdb}) {
350 require CPAN::WWW::Testers::Generator::Database;
351 require CPAN::DistnameInfo;
352 my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
353 unless ($Opt{vdistro}) {
354 my $sql = "select version from cpanstats where dist=? order by id";
355 my @rows = $dbi->get_query($sql,$distro);
356 my($newest,%seen);
357 for my $row (@rows) {
358 $newest = $row->[0] unless $seen{$row->[0]}++;
360 $Opt{vdistro} = "$distro-$newest";
362 my $d = CPAN::DistnameInfo->new("FOO/$Opt{vdistro}.tgz");
363 my $dist = $d->dist;
364 my $version = $d->version;
365 my $sql = "select id from cpanstats where dist=? and version=? order by id desc";
366 my @rows = $dbi->get_query($sql,$dist,$version);
367 my @all;
368 for my $row (@rows) {
369 my $id = $row->[0];
370 push @all, {id=>$id};
372 $reports = \@all;
373 } else {
374 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
375 $Opt{ctformat} ||= $default_ctformat;
376 if ($Opt{ctformat} eq "html") {
377 $reports = _parse_html($ctarget,%Opt);
378 } else {
379 $reports = _parse_yaml($ctarget,%Opt);
382 return unless $reports;
383 for my $report (@$reports) {
384 eval {parse_single_report($report, \%dumpvars, %Opt)};
385 if ($@) {
386 if (ref $@) {
387 if ($@->{severity}) {
388 die $@->{text};
389 } else {
390 warn $@->{text};
392 } else {
393 die $@;
396 last if $Signal;
398 if ($Opt{dumpvars}) {
399 require YAML::Syck;
400 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
401 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
402 print $fh YAML::Syck::Dump(\%dumpvars);
403 close $fh or die "Could not close '$dumpfile': $!"
405 if ($Opt{solve}) {
406 solve(\%dumpvars,%Opt);
410 =head2 $bool = _looks_like_qp($raw_report)
412 We had to acknowledge the fact that some MTAs swallow the MIME-Version
413 header while passing MIME through. So we introduce fallback heuristics
414 that try to determine if a report is written in quoted printable.
416 Note that this subroutine is internal, just documented to have the
417 internals documented.
419 The current implementation counts the number of QP escaped spaces and
420 equal signs.
422 =cut
424 sub _looks_like_qp {
425 my($report) = @_;
426 my $count_space = () = $report =~ /=20/g;
427 return 1 if $count_space > 12;
428 my $count_equal = () = $report =~ /=3D/g;
429 return 1 if $count_equal > 12;
430 return 1 if $count_space+$count_equal > 24;
431 return 0; # waiting for a counter example
434 =head2 $extract = parse_report($target,$dumpvars,%Opt)
436 Reads one report. $target is the local filename to read. $dumpvars is
437 a hashref which gets filled with descriptive stats about
438 PASS/FAIL/etc. %Opt are the options as described in the
439 C<ctgetreports> manpage. $extract is a hashref containing the found
440 variables.
442 Note: this parsing is a bit dirty but as it seems good enough I'm not
443 inclined to change it. We parse HTML with regexps only, not an HTML
444 parser. Only the entities are decoded.
446 Update around version 0.0.17: switching to nntp now but keeping the
447 parser for HTML around to read old local copies.
449 Update around 0.0.18: In %Options you can use
451 article => $some_full_article_as_scalar
453 to use this function to parse one full article as text. When this is
454 given, the argument $target is not read, but its basename is taken to
455 be the id of the article. (OMG, hackers!)
457 =cut
458 sub parse_report {
459 my($target,$dumpvars,%Opt) = @_;
460 our @q;
461 my $id = basename($target);
462 # warn "DEBUG: id[$id]";
463 my($ok,$about);
465 my(%extract);
467 my($report,$isHTML) = _get_cooked_report($target, \%Opt);
468 my @qr = map /^qr:(.+)/, @{$Opt{q}};
469 if ($Opt{raw} || @qr) {
470 for my $qr (@qr) {
471 my $cqr = eval "qr{$qr}";
472 die "Could not compile regular expression '$qr': $@" if $@;
473 my(@matches) = $report =~ $cqr;
474 my $v;
475 if (@matches) {
476 if (@matches==1) {
477 $v = $matches[0];
478 } else {
479 $v = join "", map {"($_)"} @matches;
481 } else {
482 $v = "";
484 $extract{"qr:$qr"} = $v;
488 my $report_writer;
489 my $moduleunpack = {};
490 my $expect_prereq = 0;
491 my $expect_toolchain = 0;
492 my $expecting_toolchain_soon = 0;
493 my $fallback_p5 = "";
495 my $in_summary = 0;
496 my $in_summary_seen_platform = 0;
497 my $in_prg_output = 0;
498 my $in_env_context = 0;
500 my $current_headline;
501 my @previous_line = ""; # so we can neutralize line breaks
502 my @rlines = split /\r?\n/, $report;
503 LINE: for (@rlines) {
504 next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject:\s*(\S+)\s+(\S+)/);
505 $ok = $1;
506 $about = $2;
507 $extract{"meta:ok"} = $ok;
508 $extract{"meta:about"} = $about;
509 last;
511 LINE: while (@rlines) {
512 $_ = shift @rlines;
513 while (/!$/ and @rlines) {
514 my $followupline = shift @rlines;
515 $followupline =~ s/^\s+//; # remo leading space
516 $_ .= $followupline;
518 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
519 $current_headline = $previous_line[-1];
520 if ($current_headline =~ /PROGRAM OUTPUT/) {
521 $in_prg_output = 1;
522 } else {
523 $in_prg_output = 0;
525 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
526 $in_env_context = 1;
527 } else {
528 $in_env_context = 0;
531 if ($extract{"meta:perl"}) {
532 if ( $in_summary
533 and !$extract{"conf:git_commit_id"}
534 and /Commit id:\s*([[:xdigit:]]+)/) {
535 $extract{"conf:git_commit_id"} = $1;
537 } else {
538 my $p5;
539 if (0) {
540 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
541 $p5 = $1;
542 $in_summary = 1;
543 $in_env_context = 0;
545 if ($p5) {
546 my($r,$v,$s,$p);
547 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
548 $r =~ s/\.0//; # 5.0 6 2!
549 $extract{"meta:perl"} = "$r.$v.$s\@$p";
550 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
551 $r =~ s/\.0//;
552 $extract{"meta:perl"} = "$r.$v.$s";
553 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
554 $r =~ s/\.0//;
555 $extract{"meta:perl"} = "$r.$v.$s";
556 } else {
557 $extract{"meta:perl"} = $p5;
561 unless ($extract{"meta:from"}) {
562 if (0) {
563 } elsif ($isHTML ?
564 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
565 m|^From:\s*(.+)|
567 $extract{"meta:from"} = $1;
569 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
571 unless ($extract{"meta:date"}) {
572 if (0) {
573 } elsif ($isHTML ?
574 m|<div class="h_name">Date:</div> (.+?)<br/>| :
575 m|^Date:\s*(.+)|
577 my $date = $1;
578 my($dt);
579 DATEFMT: for my $pat ("%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
580 "%b %d, %Y %R", # July 10,...
581 "%b %d, %Y %R", # July 4,...
583 $dt = eval {
584 my $p = DateTime::Format::Strptime->new
586 locale => "en",
587 time_zone => "UTC",
588 pattern => $pat,
590 $p->parse_datetime($date)
592 last DATEFMT if $dt;
594 unless ($dt) {
595 warn "Could not parse date[$date], setting to epoch 0";
596 $dt = DateTime->from_epoch( epoch => 0 );
598 $extract{"meta:date"} = $dt->datetime;
600 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
602 unless ($extract{"meta:writer"}) {
603 for ("$previous_line[-1] $_") {
604 if (0) {
605 } elsif (/CPANPLUS, version (\S+)/) {
606 $extract{"meta:writer"} = "CPANPLUS $1";
607 } elsif (/created (?:automatically )?by (\S+)/) {
608 $extract{"meta:writer"} = $1;
609 if (/\s+on\s+perl\s+(\S+),/) {
610 $fallback_p5 = $1;
612 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
613 $extract{"meta:writer"} = "$1 $2";
615 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
618 if ($in_summary) {
619 # we do that first three lines a bit too often
620 my $qr = $Opt{dumpvars} || "";
621 $qr = qr/$qr/ if $qr;
622 unless (@q) {
623 @q = @{$Opt{q}||[]};
624 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
627 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
629 if (/^\s+Platform:$/) {
630 $in_summary_seen_platform=1;
631 } elsif (/^\s*$/ || m|</pre>|) {
632 # if not html, we have reached the end now
633 if ($in_summary_seen_platform) {
634 # some perls have an empty line after the summary line
635 $in_summary = 0;
637 } else {
638 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
639 while (my($k,$v) = each %kv) {
640 my $ck = "conf:$k";
641 $ck =~ s/\s+$//;
642 $v =~ s/,$//;
643 if ($v =~ /^'(.*)'$/) {
644 $v = $1;
646 $v =~ s/^\s+//;
647 $v =~ s/\s+$//;
648 if ($qr && $ck =~ $qr) {
649 $extract{$ck} = $v;
650 } elsif ($conf_vars{$ck}) {
651 $extract{$ck} = $v;
656 if ($in_prg_output) {
657 unless ($extract{"meta:output_from"}) {
658 if (/Output from (.+):$/) {
659 $extract{"meta:output_from"} = $1
663 if ($in_env_context) {
664 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
665 $extract{"env:$1"} = $2;
668 push @previous_line, $_;
669 if ($expect_prereq || $expect_toolchain) {
670 if (/Perl module toolchain versions installed/) {
671 # first time discovered in CPANPLUS 0.89_06
672 $expecting_toolchain_soon = 1;
673 $expect_prereq=0;
674 next LINE;
676 if (exists $moduleunpack->{type}) {
677 my($module,$v,$needwant);
678 # type 1 and 2 are about prereqs, type three about toolchain
679 if ($moduleunpack->{type} == 1) {
680 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
681 next LINE if $@;
682 if ($leader =~ /^-/) {
683 $moduleunpack = {};
684 $expect_prereq = 0;
685 next LINE;
686 } elsif ($leader =~ /^(
687 buil # build_requires:
688 )/x) {
689 next LINE;
690 } elsif ($module =~ /^(
691 - # line drawing
692 )/x) {
693 next LINE;
695 } elsif ($moduleunpack->{type} == 2) {
696 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
697 next LINE if $@;
698 if ($leader =~ /^\*/) {
699 $moduleunpack = {};
700 $expect_prereq = 0;
701 next LINE;
702 } elsif (!defined $v
703 or !defined $needwant
704 or $v =~ /\s/
705 or $needwant =~ /\s/
707 ($module,$v,$needwant) = split " ", $_;
709 } elsif ($moduleunpack->{type} == 3) {
710 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
711 next LINE if $@;
712 if (!$module) {
713 $moduleunpack = {};
714 $expect_toolchain = 0;
715 next LINE;
716 } elsif ($module =~ /^-/) {
717 next LINE;
720 $module =~ s/\s+$//;
721 if ($module) {
722 $v =~ s/^\s+//;
723 $v =~ s/\s+$//;
724 my($modulename,$versionlead) = split " ", $module;
725 if (defined $modulename and defined $versionlead) {
726 $module = $modulename;
727 $v = "$versionlead$v";
729 if ($v eq "Have") {
730 next LINE;
732 $extract{"mod:$module"} = $v;
733 if ($needwant) {
734 $needwant =~ s/^\s+//;
735 $needwant =~ s/\s+$//;
736 $extract{"prereq:$module"} = $needwant;
740 if (/(\s+)(Module\s+)(Need\s+)Have/) {
741 $in_env_context = 0;
742 $moduleunpack = {
743 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
744 type => 1,
746 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
747 $in_env_context = 0;
748 my $adjust_1 = 0;
749 my $adjust_2 = -length($4);
750 my $adjust_3 = length($4);
751 # I think they do not really try to align, usually we
752 # get away with split
753 $moduleunpack = {
754 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
755 type => 2,
759 if (/PREREQUISITES|Prerequisite modules loaded/) {
760 $in_env_context = 0;
761 $expect_prereq=1;
763 if ($expecting_toolchain_soon) {
764 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
765 $in_env_context = 0;
766 $expect_toolchain=1;
767 $expecting_toolchain_soon=0;
768 $moduleunpack = {
769 tpl => 'a'.length($1).'a'.length($2).'a*',
770 type => 3,
774 if (/toolchain versions installed/) {
775 $in_env_context = 0;
776 $expecting_toolchain_soon=1;
778 } # LINE
779 if (! $extract{"meta:perl"} && $fallback_p5) {
780 $extract{"meta:perl"} = $fallback_p5;
782 if ($Opt{solve}) {
783 $extract{id} = $id;
784 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
785 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
787 my $data = $dumpvars->{"==DATA=="} ||= [];
788 push @$data, \%extract;
790 # ---- %extract finished ----
791 my $diag = "";
792 if (my $qr = $Opt{dumpvars}) {
793 $qr = qr/$qr/;
794 while (my($k,$v) = each %extract) {
795 if ($k =~ $qr) {
796 $dumpvars->{$k}{$v}{$ok}++;
800 for my $want (@q) {
801 my $have = $extract{$want} || "";
802 $diag .= " $want\[$have]";
804 printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
805 if ($Opt{raw}) {
806 $report =~ s/\s+\z//;
807 print STDERR $report, "\n================\n" unless $Opt{quiet};
809 if ($Opt{interactive}) {
810 require IO::Prompt;
811 local @ARGV;
812 local $ARGV;
813 my $ans = IO::Prompt::prompt
815 -p => "View $id? [onechar: ynq] ",
816 -d => "y",
817 -u => qr/[ynq]/,
818 -onechar,
820 print STDERR "\n" unless $Opt{quiet};
821 if ($ans eq "y") {
822 my($report) = _get_cooked_report($target, \%Opt);
823 $Opt{pager} ||= "less";
824 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
825 local $/;
826 print {$lfh} $report;
827 close $lfh or die "Could not close pager: $!"
828 } elsif ($ans eq "q") {
829 $Signal++;
830 return;
833 return \%extract;
836 sub _get_cooked_report {
837 my($target, $Opt_ref) = @_;
838 my($report, $isHTML);
839 if ($report = $Opt_ref->{article}) {
840 $isHTML = $report =~ /^</;
841 undef $target;
843 if ($target) {
844 open my $fh, $target or die "Could not open '$target': $!";
845 local $/;
846 my $raw_report = <$fh>;
847 $isHTML = $raw_report =~ /^</;
848 if ($isHTML) {
849 $report = decode_entities($raw_report);
850 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
852 _looks_like_qp($raw_report)
854 # minimizing MIME effort; don't know about reports in other formats
855 $report = MIME::QuotedPrint::decode_qp($raw_report);
856 } else {
857 $report = $raw_report;
859 close $fh;
861 if ($report =~ /\r\n/) {
862 my @rlines = split /\r?\n/, $report;
863 $report = join "\n", @rlines;
865 ($report, $isHTML);
868 =head2 solve
870 Feeds a couple of potentially interesting data to
871 Statistics::Regression and sorts the result by R^2 descending. Do not
872 confuse this with a prove, rather take it as a useful hint. It can
873 save you minutes of staring at data and provide a quick overview where
874 one should look closer. Displays the N top candidates, where N
875 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
876 Regressions results with an R^2 of 1.00 are displayed in any case.
878 The function is called when the option C<-solve> is give on the
879 commandline. Several extra config variables are calculated, see source
880 code for details.
882 =cut
884 my %never_solve_on = map {($_ => 1)}
886 "conf:ccflags",
887 "conf:config_args",
888 "conf:cppflags",
889 "conf:lddlflags",
890 "conf:uname",
891 "env:PATH",
892 "env:PERL5LIB",
893 "env:PERL5OPT",
894 'env:$^X',
895 'env:$EGID',
896 'env:$GID',
897 'env:$UID/$EUID',
898 'env:PERL5_CPANPLUS_IS_RUNNING',
899 'env:PERL5_CPAN_IS_RUNNING',
900 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
901 'meta:ok',
903 my %normalize_numeric =
905 id => sub { return shift },
906 'meta:date' => sub {
907 my $v = shift;
908 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
909 unless (defined $M) {
910 die "illegal value[$v] for a date";
912 Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
915 my %normalize_value =
917 'meta:perl' => sub {
918 my($perlatpatchlevel) = shift;
919 my $perl = $perlatpatchlevel;
920 $perl =~ s/\@.*//;
921 $perl;
924 sub solve {
925 my($V,%Opt) = @_;
926 require Statistics::Regression;
927 my @regression;
928 my $ycb;
929 if (my $ycbbody = $Opt{ycb}) {
930 $ycb = eval('sub {'.$ycbbody.'}');
931 die if $@;
932 } else {
933 $ycb = sub {
934 my $rec = shift;
935 my $y;
936 if ($rec->{"meta:ok"} eq "PASS") {
937 $y = 1;
938 } elsif ($rec->{"meta:ok"} eq "FAIL") {
939 $y = 0;
941 return $y
944 VAR: for my $variable (sort keys %$V) {
945 next if $variable eq "==DATA==";
946 if ($never_solve_on{$variable}){
947 warn "Skipping '$variable'\n" unless $Opt{quiet};
948 next VAR;
950 my $value_distribution = $V->{$variable};
951 my $keys = keys %$value_distribution;
952 my @X = qw(const);
953 if ($normalize_numeric{$variable}) {
954 push @X, "n_$variable";
955 } else {
956 my %seen = ();
957 for my $value (sort keys %$value_distribution) {
958 my $pf = $value_distribution->{$value};
959 $pf->{PASS} ||= 0;
960 $pf->{FAIL} ||= 0;
961 if ($pf->{PASS} || $pf->{FAIL}) {
962 my $Xele = sprintf "eq_%s",
964 $normalize_value{$variable} ?
965 $normalize_value{$variable}->($value) :
966 $value
968 push @X, $Xele unless $seen{$Xele}++;
971 if (
972 $pf->{PASS} xor $pf->{FAIL}
974 my $vl = 40;
975 substr($value,$vl) = "..." if length $value > 3+$vl;
976 my $poor_mans_freehand_estimation = 0;
977 if ($poor_mans_freehand_estimation) {
978 warn sprintf
980 "%4d %4d %-23s | %s\n",
981 $pf->{PASS},
982 $pf->{FAIL},
983 $variable,
984 $value,
990 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
991 next VAR unless @X > 1;
992 my %regdata =
994 X => \@X,
995 data => [],
997 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
998 my $y = $ycb->($rec);
999 next RECORD unless defined $y;
1000 my %obs;
1001 $obs{Y} = $y;
1002 @obs{@X} = (0) x @X;
1003 $obs{const} = 1;
1004 for my $x (@X) {
1005 if ($x =~ /^eq_(.+)/) {
1006 my $read_v = $1;
1007 if (exists $rec->{$variable}
1008 && defined $rec->{$variable}
1010 my $use_v = (
1011 $normalize_value{$variable} ?
1012 $normalize_value{$variable}->($rec->{$variable}) :
1013 $rec->{$variable}
1015 if ($use_v eq $read_v) {
1016 $obs{$x} = 1;
1019 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1020 } elsif ($x =~ /^n_(.+)/) {
1021 my $v = $1;
1022 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1023 if ($@) {
1024 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1028 push @{$regdata{data}}, \%obs;
1030 _run_regression ($variable, \%regdata, \@regression, \%Opt);
1032 my $top = min ($Opt{solvetop} || 3, scalar @regression);
1033 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
1034 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1035 my $score = 0;
1036 printf
1038 "State after regression testing: %d results, showing top %d\n\n",
1039 scalar @regression,
1040 $top,
1042 for my $reg (sort {
1043 $b->rsq <=> $a->rsq
1045 $a->k <=> $b->k
1046 } @regression) {
1047 printf "(%d)\n", ++$score;
1048 eval { $reg->print; };
1049 if ($@) {
1050 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1052 last if --$top <= 0;
1057 # $variable is the name we pass through to S:R constructor
1058 # $regdata is hash and has the arrays "X" and "data" (observations)
1059 # X goes to S:R constructor
1060 # each observation has a Y which we pass to S:R in an include() call
1061 # $regression is the collector array of results
1062 # $opt are the options from outside, used to see if we are "verbose"
1063 sub _run_regression {
1064 my($variable,$regdata,$regression,$opt) = @_;
1065 my @X = @{$regdata->{X}};
1066 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1067 # hold the reference
1068 # group
1069 while (@X > 1) {
1070 my $reg = Statistics::Regression->new($variable,\@X);
1071 for my $obs (@{$regdata->{data}}) {
1072 my $y = delete $obs->{Y};
1073 $reg->include($y, $obs);
1074 $obs->{Y} = $y;
1076 eval {$reg->theta;
1077 my @e = $reg->standarderrors;
1078 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1079 $reg->rsq;};
1080 if ($@) {
1081 if ($opt->{verbose} && $opt->{verbose}>=2) {
1082 require YAML::Syck;
1083 warn YAML::Syck::Dump
1084 ({error=>"could not determine some regression parameters",
1085 variable=>$variable,
1086 k=>$reg->k,
1087 n=>$reg->n,
1088 X=>$regdata->{"X"},
1089 errorstr => $@,
1092 # reduce k in case that linear dependencies disturbed us;
1093 # often called reference group; I'm tempted to collect and
1094 # make visible
1095 splice @X, 1, 1;
1096 } else {
1097 # $reg->print;
1098 push @$regression, $reg;
1099 return;
1104 =head1 AUTHOR
1106 Andreas König
1108 =head1 BUGS
1110 Please report any bugs or feature requests through the web
1111 interface at
1112 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1113 I will be notified, and then you'll automatically be notified of
1114 progress on your bug as I make changes.
1116 =head1 SUPPORT
1118 You can find documentation for this module with the perldoc command.
1120 perldoc CPAN::Testers::ParseReport
1123 You can also look for information at:
1125 =over 4
1127 =item * RT: CPAN's request tracker
1129 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1131 =item * AnnoCPAN: Annotated CPAN documentation
1133 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1135 =item * CPAN Ratings
1137 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1139 =item * Search CPAN
1141 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1143 =back
1146 =head1 ACKNOWLEDGEMENTS
1148 Thanks to RJBS for module-starter.
1150 =head1 COPYRIGHT & LICENSE
1152 Copyright 2008 Andreas König.
1154 This program is free software; you can redistribute it and/or modify it
1155 under the same terms as Perl itself.
1158 =cut
1160 1; # End of CPAN::Testers::ParseReport