extend the version parsing for fallback_p5 to the git_describe that I found in 6155800
[cpan-testers-parsereport.git] / lib / CPAN / Testers / ParseReport.pm
blobe3757c280c867dfa404cfa4be9edb475529bc5f4
1 package CPAN::Testers::ParseReport;
3 use warnings;
4 use strict;
6 use Config::Perl::V ();
7 use DateTime::Format::Strptime;
8 use File::Basename qw(basename);
9 use File::Path qw(mkpath);
10 use HTML::Entities qw(decode_entities);
11 use LWP::UserAgent;
12 use List::Util qw(max min sum);
13 use MIME::QuotedPrint ();
14 use Net::NNTP ();
15 use Time::Local ();
16 use XML::LibXML;
17 use XML::LibXML::XPathContext;
19 our $default_ctformat = "yaml";
20 our $default_transport = "nntp";
21 our $default_cturl = "http://www.cpantesters.org/show";
22 our $Signal = 0;
24 =encoding utf-8
26 =head1 NAME
28 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
30 =cut
32 use version; our $VERSION = qv('0.1.10');
34 =head1 SYNOPSIS
36 The documentation in here is normally not needed because the code is
37 meant to be run from a standalone program, L<ctgetreports>.
39 ctgetreports --q mod:Moose Devel-Events
41 =head1 DESCRIPTION
43 This is the core module for CPAN::Testers::ParseReport. If you're not
44 looking to extend or alter the behaviour of this module, you probably
45 want to look at L<ctgetreports> instead.
47 =head1 OPTIONS
49 Are described in the L<ctgetreports> manpage and are passed through to
50 the functions unaltered.
52 =head1 FUNCTIONS
54 =head2 parse_distro($distro,%options)
56 reads the cpantesters HTML page or the YAML file or the local database
57 for the distro and loops through the reports for the specified or most
58 recent version of that distro found in these data.
60 parse_distro() intentionally has no meaningful return value, different
61 options would require different ones.
63 =head2 $extract = parse_single_report($report,$dumpvars,%options)
65 mirrors and reads this report. $report is of the form
67 { id => number }
69 $dumpvar is a hashreference that gets filled with data.
71 $extract is the result of parse_report() described below.
73 =cut
76 my $ua;
77 sub _ua {
78 return $ua if $ua;
79 $ua = LWP::UserAgent->new
81 keep_alive => 1,
82 env_proxy => 1,
84 $ua->parse_head(0);
85 $ua;
90 my $nntp;
91 sub _nntp {
92 return $nntp if $nntp;
93 $nntp = Net::NNTP->new("nntp.perl.org");
94 $nntp->group("perl.cpan.testers");
95 return $nntp;
100 my $xp;
101 sub _xp {
102 return $xp if $xp;
103 $xp = XML::LibXML->new;
104 $xp->keep_blanks(0);
105 $xp->clean_namespaces(1);
106 my $catalog = __FILE__;
107 $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
108 $xp->load_catalog($catalog);
109 return $xp;
113 sub _download_overview {
114 my($cts_dir, $distro, %Opt) = @_;
115 my $format = $Opt{ctformat} ||= $default_ctformat;
116 my $cturl = $Opt{cturl} ||= $default_cturl;
117 my $ctarget = "$cts_dir/$distro.$format";
118 my $cheaders = "$cts_dir/$distro.headers";
119 if ($Opt{local}) {
120 unless (-e $ctarget) {
121 die "Alert: No local file '$ctarget' found, cannot continue\n";
123 } else {
124 if (! -e $ctarget or -M $ctarget > .25) {
125 if (-e $ctarget && $Opt{verbose}) {
126 my(@stat) = stat _;
127 my $timestamp = gmtime $stat[9];
128 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
130 print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
131 my $uri = "$cturl/$distro.$format";
132 my $resp = _ua->mirror($uri,$ctarget);
133 if ($resp->is_success) {
134 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
135 open my $fh, ">", $cheaders or die;
136 for ($resp->headers->as_string) {
137 print $fh $_;
138 if ($Opt{verbose} && $Opt{verbose}>1) {
139 print STDERR $_ unless $Opt{quiet};
142 } elsif (304 == $resp->code) {
143 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
144 my $atime = my $mtime = time;
145 utime $atime, $mtime, $cheaders;
146 } else {
147 die sprintf
149 "No success downloading %s: %s",
150 $uri,
151 $resp->status_line,
156 return $ctarget;
159 sub _parse_html {
160 my($ctarget, %Opt) = @_;
161 my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
162 my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
163 if ($preprocesswithtreebuilder) {
164 require HTML::TreeBuilder;
165 my $tree = HTML::TreeBuilder->new;
166 $tree->implicit_tags(1);
167 $tree->p_strict(1);
168 $tree->ignore_ignorable_whitespace(0);
169 $tree->parse_content($content);
170 $tree->eof;
171 $content = $tree->as_XML;
173 my $parser = _xp();
174 my $doc = eval { $parser->parse_string($content) };
175 my $err = $@;
176 unless ($doc) {
177 my $distro = basename $ctarget;
178 die sprintf "Error while parsing %s\: %s", $distro, $err;
180 my $xc = XML::LibXML::XPathContext->new($doc);
181 my $nsu = $doc->documentElement->namespaceURI;
182 $xc->registerNs('x', $nsu) if $nsu;
183 my($selected_release_ul,$selected_release_distrov,$excuse_string);
184 my($cparentdiv)
185 = $nsu ?
186 $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
187 $doc->findnodes("/html/body/div[\@id = 'doc']");
188 my(@releasedivs) = $nsu ?
189 $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
190 $cparentdiv->findnodes("//div[h2 and ul]");
191 my $releasediv;
192 if ($Opt{vdistro}) {
193 $excuse_string = "selected distro '$Opt{vdistro}'";
194 my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
195 $fallbacktoversion = 0 unless defined $fallbacktoversion;
196 RELEASE: for my $i (0..$#releasedivs) {
197 my $picked = "";
198 my($x) = $nsu ?
199 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
200 $releasedivs[$i]->findvalue("h2/a[2]/\@name");
201 if ($x) {
202 if ($x eq $Opt{vdistro}) {
203 $releasediv = $i;
204 $picked = " (picked)";
206 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
207 } else {
208 ($x) = $nsu ?
209 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
210 $releasedivs[$i]->findvalue("h2/a[1]/\@name");
211 if ($x eq $fallbacktoversion) {
212 $releasediv = $i;
213 $picked = " (picked)";
215 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
218 } else {
219 $excuse_string = "any distro";
221 unless (defined $releasediv) {
222 $releasediv = 0;
224 # using a[1] because a[2] is missing on the first entry
225 ($selected_release_distrov) = $nsu ?
226 $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
227 $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
228 ($selected_release_ul) = $nsu ?
229 $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
230 $releasedivs[$releasediv]->findnodes("ul");
231 unless (defined $selected_release_distrov) {
232 warn "Warning: could not find $excuse_string in '$ctarget'";
233 return;
235 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
236 my($id);
237 my @all;
238 for my $test ($nsu ?
239 $xc->findnodes("x:li",$selected_release_ul) :
240 $selected_release_ul->findnodes("li")) {
241 $id = $nsu ?
242 $xc->findvalue("x:a[1]/text()",$test) :
243 $test->findvalue("a[1]/text()");
244 push @all, {id=>$id};
245 return if $Signal;
247 return \@all;
250 sub _parse_yaml {
251 my($ctarget, %Opt) = @_;
252 require YAML::Syck;
253 my $arr = YAML::Syck::LoadFile($ctarget);
254 my($selected_release_ul,$selected_release_distrov,$excuse_string);
255 if ($Opt{vdistro}) {
256 $excuse_string = "selected distro '$Opt{vdistro}'";
257 $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
258 ($selected_release_distrov) = $arr->[0]{distversion};
259 } else {
260 $excuse_string = "any distro";
261 my $last_addition;
262 my %seen;
263 for my $report (sort { $a->{id} <=> $b->{id} } @$arr) {
264 unless ($seen{$report->{distversion}}++) {
265 $last_addition = $report->{distversion};
268 $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
269 ($selected_release_distrov) = $last_addition;
271 unless ($selected_release_distrov) {
272 warn "Warning: could not find $excuse_string in '$ctarget'";
273 return;
275 print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
276 my @all;
277 for my $test (@$arr) {
278 my $id = $test->{id};
279 push @all, {id=>$id};
280 return if $Signal;
282 @all = sort { $b->{id} <=> $a->{id} } @all;
283 return \@all;
286 sub parse_single_report {
287 my($report, $dumpvars, %Opt) = @_;
288 my($id) = $report->{id};
289 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
290 my $nnt_dir = "$Opt{cachedir}/nntp-testers";
291 mkpath $nnt_dir;
292 my $target = "$nnt_dir/$id";
293 if ($Opt{local}) {
294 unless (-e $target) {
295 die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
297 } else {
298 if (! -e $target) {
299 print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
300 $Opt{transport} ||= $default_transport;
301 if ($Opt{transport} eq "nntp") {
302 my $article = _nntp->article($id);
303 unless ($article) {
304 die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
306 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
307 print $fh @$article;
308 } elsif ($Opt{transport} eq "http") {
309 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
310 if ($resp->is_success) {
311 if ($Opt{verbose}) {
312 my(@stat) = stat $target;
313 my $timestamp = gmtime $stat[9];
314 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
315 if ($Opt{verbose} > 1) {
316 print STDERR $resp->headers->as_string unless $Opt{quiet};
319 my $headers = "$target.headers";
320 open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
321 print $fh $resp->headers->as_string;
322 } else {
323 die {severity=>0,
324 text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
326 } else {
327 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
331 parse_report($target, $dumpvars, %Opt);
334 sub parse_distro {
335 my($distro,%Opt) = @_;
336 my %dumpvars;
337 $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
338 my $cts_dir = "$Opt{cachedir}/cpantesters-show";
339 mkpath $cts_dir;
340 if ($Opt{solve}) {
341 require Statistics::Regression;
342 $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
344 if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
345 $Opt{vdistro} = $distro;
346 $distro = $1;
348 my $reports;
349 if (my $ctdb = $Opt{ctdb}) {
350 require CPAN::WWW::Testers::Generator::Database;
351 require CPAN::DistnameInfo;
352 my $dbi = CPAN::WWW::Testers::Generator::Database->new(database=>$ctdb) or die "Alert: unknown error while opening database '$ctdb'";
353 unless ($Opt{vdistro}) {
354 my $sql = "select version from cpanstats where dist=? order by id";
355 my @rows = $dbi->get_query($sql,$distro);
356 my($newest,%seen);
357 for my $row (@rows) {
358 $newest = $row->[0] unless $seen{$row->[0]}++;
360 $Opt{vdistro} = "$distro-$newest";
362 my $d = CPAN::DistnameInfo->new("FOO/$Opt{vdistro}.tgz");
363 my $dist = $d->dist;
364 my $version = $d->version;
365 my $sql = "select id from cpanstats where dist=? and version=? order by id desc";
366 my @rows = $dbi->get_query($sql,$dist,$version);
367 my @all;
368 for my $row (@rows) {
369 my $id = $row->[0];
370 push @all, {id=>$id};
372 $reports = \@all;
373 } else {
374 my $ctarget = _download_overview($cts_dir, $distro, %Opt);
375 $Opt{ctformat} ||= $default_ctformat;
376 if ($Opt{ctformat} eq "html") {
377 $reports = _parse_html($ctarget,%Opt);
378 } else {
379 $reports = _parse_yaml($ctarget,%Opt);
382 return unless $reports;
383 for my $report (@$reports) {
384 eval {parse_single_report($report, \%dumpvars, %Opt)};
385 if ($@) {
386 if (ref $@) {
387 if ($@->{severity}) {
388 die $@->{text};
389 } else {
390 warn $@->{text};
392 } else {
393 die $@;
396 last if $Signal;
398 if ($Opt{dumpvars}) {
399 require YAML::Syck;
400 my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
401 open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
402 print $fh YAML::Syck::Dump(\%dumpvars);
403 close $fh or die "Could not close '$dumpfile': $!"
405 if ($Opt{solve}) {
406 solve(\%dumpvars,%Opt);
410 =head2 $bool = _looks_like_qp($raw_report)
412 We had to acknowledge the fact that some MTAs swallow the MIME-Version
413 header while passing MIME through. So we introduce fallback heuristics
414 that try to determine if a report is written in quoted printable.
416 Note that this subroutine is internal, just documented to have the
417 internals documented.
419 The current implementation counts the number of QP escaped spaces and
420 equal signs.
422 =cut
424 sub _looks_like_qp {
425 my($report) = @_;
426 my $count_space = () = $report =~ /=20/g;
427 return 1 if $count_space > 12;
428 my $count_equal = () = $report =~ /=3D/g;
429 return 1 if $count_equal > 12;
430 return 1 if $count_space+$count_equal > 24;
431 return 0; # waiting for a counter example
434 =head2 $extract = parse_report($target,$dumpvars,%Opt)
436 Reads one report. $target is the local filename to read. $dumpvars is
437 a hashref which gets filled with descriptive stats about
438 PASS/FAIL/etc. %Opt are the options as described in the
439 C<ctgetreports> manpage. $extract is a hashref containing the found
440 variables.
442 Note: this parsing is a bit dirty but as it seems good enough I'm not
443 inclined to change it. We parse HTML with regexps only, not an HTML
444 parser. Only the entities are decoded.
446 Update around version 0.0.17: switching to nntp now but keeping the
447 parser for HTML around to read old local copies.
449 Update around 0.0.18: In %Options you can use
451 article => $some_full_article_as_scalar
453 to use this function to parse one full article as text. When this is
454 given, the argument $target is not read, but its basename is taken to
455 be the id of the article. (OMG, hackers!)
457 =cut
458 sub parse_report {
459 my($target,$dumpvars,%Opt) = @_;
460 our @q;
461 my $id = basename($target);
462 # warn "DEBUG: id[$id]";
463 my($ok,$about);
465 my(%extract);
467 my($report,$isHTML) = _get_cooked_report($target, \%Opt);
468 my @qr = map /^qr:(.+)/, @{$Opt{q}};
469 if ($Opt{raw} || @qr) {
470 for my $qr (@qr) {
471 my $cqr = eval "qr{$qr}";
472 die "Could not compile regular expression '$qr': $@" if $@;
473 my(@matches) = $report =~ $cqr;
474 my $v;
475 if (@matches) {
476 if (@matches==1) {
477 $v = $matches[0];
478 } else {
479 $v = join "", map {"($_)"} @matches;
481 } else {
482 $v = "";
484 $extract{"qr:$qr"} = $v;
488 my $report_writer;
489 my $moduleunpack = {};
490 my $expect_prereq = 0;
491 my $expect_toolchain = 0;
492 my $expecting_toolchain_soon = 0;
493 my $fallback_p5 = "";
495 my $in_summary = 0;
496 my $in_summary_seen_platform = 0;
497 my $in_prg_output = 0;
498 my $in_env_context = 0;
500 my $current_headline;
501 my @previous_line = ""; # so we can neutralize line breaks
502 my @rlines = split /\r?\n/, $report;
503 LINE: for (@rlines) {
504 next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject:\s*(\S+)\s+(\S+)/);
505 $ok = $1;
506 $about = $2;
507 $extract{"meta:ok"} = $ok;
508 $extract{"meta:about"} = $about;
509 last;
511 LINE: while (@rlines) {
512 $_ = shift @rlines;
513 while (/!$/ and @rlines) {
514 my $followupline = shift @rlines;
515 $followupline =~ s/^\s+//; # remo leading space
516 $_ .= $followupline;
518 if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
519 $current_headline = $previous_line[-1];
520 if ($current_headline =~ /PROGRAM OUTPUT/) {
521 $in_prg_output = 1;
522 } else {
523 $in_prg_output = 0;
525 if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
526 $in_env_context = 1;
527 } else {
528 $in_env_context = 0;
531 if ($extract{"meta:perl"}) {
532 if ( $in_summary
533 and !$extract{"conf:git_commit_id"}
534 and /Commit id:\s*([[:xdigit:]]+)/) {
535 $extract{"conf:git_commit_id"} = $1;
537 } else {
538 my $p5;
539 if (0) {
540 } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
541 $p5 = $1;
542 $in_summary = 1;
543 $in_env_context = 0;
545 if ($p5) {
546 my($r,$v,$s,$p);
547 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
548 $r =~ s/\.0//; # 5.0 6 2!
549 $extract{"meta:perl"} = "$r.$v.$s\@$p";
550 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
551 $r =~ s/\.0//;
552 $extract{"meta:perl"} = "$r.$v.$s";
553 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
554 $r =~ s/\.0//;
555 $extract{"meta:perl"} = "$r.$v.$s";
556 } else {
557 $extract{"meta:perl"} = $p5;
561 unless ($extract{"meta:from"}) {
562 if (0) {
563 } elsif ($isHTML ?
564 m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
565 m|^From:\s*(.+)|
567 $extract{"meta:from"} = $1;
569 $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
571 unless ($extract{"meta:date"}) {
572 if (0) {
573 } elsif ($isHTML ?
574 m|<div class="h_name">Date:</div> (.+?)<br/>| :
575 m|^Date:\s*(.+)|
577 my $date = $1;
578 my($dt);
579 DATEFMT: for my $pat ("%a, %d %b %Y %T %z", # Sun, 28 Sep 2008 12:23:12 +0100
580 "%b %d, %Y %R", # July 10,...
581 "%b %d, %Y %R", # July 4,...
583 $dt = eval {
584 my $p = DateTime::Format::Strptime->new
586 locale => "en",
587 time_zone => "UTC",
588 pattern => $pat,
590 $p->parse_datetime($date)
592 last DATEFMT if $dt;
594 unless ($dt) {
595 warn "Could not parse date[$date], setting to epoch 0";
596 $dt = DateTime->from_epoch( epoch => 0 );
598 $extract{"meta:date"} = $dt->datetime;
600 $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
602 unless ($extract{"meta:writer"}) {
603 for ("$previous_line[-1] $_") {
604 if (0) {
605 } elsif (/CPANPLUS, version (\S+)/) {
606 $extract{"meta:writer"} = "CPANPLUS $1";
607 } elsif (/created (?:automatically )?by (\S+)/) {
608 $extract{"meta:writer"} = $1;
609 if (/\s+on\s+perl\s+([^,]+),/) {
610 $fallback_p5 = $1;
612 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
613 $extract{"meta:writer"} = "$1 $2";
615 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
618 if ($in_summary) {
619 # we do that first three lines a bit too often
620 my $qr = $Opt{dumpvars} || "";
621 $qr = qr/$qr/ if $qr;
622 unless (@q) {
623 @q = @{$Opt{q}||[]};
624 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
627 my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
629 if (/^\s+Platform:$/) {
630 $in_summary_seen_platform=1;
631 } elsif (/^\s*$/ || m|</pre>|) {
632 # if not html, we have reached the end now
633 if ($in_summary_seen_platform) {
634 # some perls have an empty line after the summary line
635 $in_summary = 0;
637 } else {
638 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
639 while (my($k,$v) = each %kv) {
640 my $ck = "conf:$k";
641 $ck =~ s/\s+$//;
642 $v =~ s/,$//;
643 if ($v =~ /^'(.*)'$/) {
644 $v = $1;
646 $v =~ s/^\s+//;
647 $v =~ s/\s+$//;
648 if ($qr && $ck =~ $qr) {
649 $extract{$ck} = $v;
650 } elsif ($conf_vars{$ck}) {
651 $extract{$ck} = $v;
656 if ($in_prg_output) {
657 unless ($extract{"meta:output_from"}) {
658 if (/Output from (.+):$/) {
659 $extract{"meta:output_from"} = $1
663 if ($in_env_context) {
664 if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
665 $extract{"env:$1"} = $2;
668 push @previous_line, $_;
669 if ($expect_prereq || $expect_toolchain) {
670 if (/Perl module toolchain versions installed/) {
671 # first time discovered in CPANPLUS 0.89_06
672 $expecting_toolchain_soon = 1;
673 $expect_prereq=0;
674 next LINE;
676 if (exists $moduleunpack->{type}) {
677 my($module,$v,$needwant);
678 # type 1 and 2 are about prereqs, type three about toolchain
679 if ($moduleunpack->{type} == 1) {
680 (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
681 next LINE if $@;
682 if ($leader =~ /^-/) {
683 $moduleunpack = {};
684 $expect_prereq = 0;
685 next LINE;
686 } elsif ($leader =~ /^(
687 buil # build_requires:
688 )/x) {
689 next LINE;
690 } elsif ($module =~ /^(
691 - # line drawing
692 )/x) {
693 next LINE;
695 } elsif ($moduleunpack->{type} == 2) {
696 (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
697 next LINE if $@;
698 if ($leader =~ /^\*/) {
699 $moduleunpack = {};
700 $expect_prereq = 0;
701 next LINE;
702 } elsif (!defined $v
703 or !defined $needwant
704 or $v =~ /\s/
705 or $needwant =~ /\s/
707 ($module,$v,$needwant) = split " ", $_;
709 } elsif ($moduleunpack->{type} == 3) {
710 (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
711 next LINE if $@;
712 if (!$module) {
713 $moduleunpack = {};
714 $expect_toolchain = 0;
715 next LINE;
716 } elsif ($module =~ /^-/) {
717 next LINE;
720 $module =~ s/\s+$//;
721 if ($module) {
722 $v =~ s/^\s+//;
723 $v =~ s/\s+$//;
724 my($modulename,$versionlead) = split " ", $module;
725 if (defined $modulename and defined $versionlead) {
726 $module = $modulename;
727 $v = "$versionlead$v";
729 if ($v eq "Have") {
730 next LINE;
732 $extract{"mod:$module"} = $v;
733 if ($needwant) {
734 $needwant =~ s/^\s+//;
735 $needwant =~ s/\s+$//;
736 $extract{"prereq:$module"} = $needwant;
740 if (/(\s+)(Module\s+)(Need\s+)Have/) {
741 $in_env_context = 0;
742 $moduleunpack = {
743 tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
744 type => 1,
746 } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
747 $in_env_context = 0;
748 my $adjust_1 = 0;
749 my $adjust_2 = -length($4);
750 my $adjust_3 = length($4);
751 # I think they do not really try to align, usually we
752 # get away with split
753 $moduleunpack = {
754 tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
755 type => 2,
759 if (/PREREQUISITES|Prerequisite modules loaded/) {
760 $in_env_context = 0;
761 $expect_prereq=1;
763 if ($expecting_toolchain_soon) {
764 if (/(\s+)(Module(?:\sName)?\s+) Have/) {
765 $in_env_context = 0;
766 $expect_toolchain=1;
767 $expecting_toolchain_soon=0;
768 $moduleunpack = {
769 tpl => 'a'.length($1).'a'.length($2).'a*',
770 type => 3,
774 if (/toolchain versions installed/) {
775 $in_env_context = 0;
776 $expecting_toolchain_soon=1;
778 } # LINE
779 if (! $extract{"meta:perl"} && $fallback_p5) {
780 my($p5,$patch) = split /\s+patch\s+/, $fallback_p5;
781 $extract{"meta:perl"} = $p5;
782 $extract{"conf:git_describe"} = $patch;
784 if ($Opt{solve}) {
785 $extract{id} = $id;
786 if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
787 $extract{"conf:archname+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
789 my $data = $dumpvars->{"==DATA=="} ||= [];
790 push @$data, \%extract;
792 # ---- %extract finished ----
793 my $diag = "";
794 if (my $qr = $Opt{dumpvars}) {
795 $qr = qr/$qr/;
796 while (my($k,$v) = each %extract) {
797 if ($k =~ $qr) {
798 $dumpvars->{$k}{$v}{$ok}++;
802 for my $want (@q) {
803 my $have = $extract{$want} || "";
804 $diag .= " $want\[$have]";
806 printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
807 if ($Opt{raw}) {
808 $report =~ s/\s+\z//;
809 print STDERR $report, "\n================\n" unless $Opt{quiet};
811 if ($Opt{interactive}) {
812 require IO::Prompt;
813 local @ARGV;
814 local $ARGV;
815 my $ans = IO::Prompt::prompt
817 -p => "View $id? [onechar: ynq] ",
818 -d => "y",
819 -u => qr/[ynq]/,
820 -onechar,
822 print STDERR "\n" unless $Opt{quiet};
823 if ($ans eq "y") {
824 my($report) = _get_cooked_report($target, \%Opt);
825 $Opt{pager} ||= "less";
826 open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
827 local $/;
828 print {$lfh} $report;
829 close $lfh or die "Could not close pager: $!"
830 } elsif ($ans eq "q") {
831 $Signal++;
832 return;
835 return \%extract;
838 sub _get_cooked_report {
839 my($target, $Opt_ref) = @_;
840 my($report, $isHTML);
841 if ($report = $Opt_ref->{article}) {
842 $isHTML = $report =~ /^</;
843 undef $target;
845 if ($target) {
846 open my $fh, $target or die "Could not open '$target': $!";
847 local $/;
848 my $raw_report = <$fh>;
849 $isHTML = $raw_report =~ /^</;
850 if ($isHTML) {
851 $report = decode_entities($raw_report);
852 } elsif ($raw_report =~ /^MIME-Version: 1.0$/m
854 _looks_like_qp($raw_report)
856 # minimizing MIME effort; don't know about reports in other formats
857 $report = MIME::QuotedPrint::decode_qp($raw_report);
858 } else {
859 $report = $raw_report;
861 close $fh;
863 if ($report =~ /\r\n/) {
864 my @rlines = split /\r?\n/, $report;
865 $report = join "\n", @rlines;
867 ($report, $isHTML);
870 =head2 solve
872 Feeds a couple of potentially interesting data to
873 Statistics::Regression and sorts the result by R^2 descending. Do not
874 confuse this with a prove, rather take it as a useful hint. It can
875 save you minutes of staring at data and provide a quick overview where
876 one should look closer. Displays the N top candidates, where N
877 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
878 Regressions results with an R^2 of 1.00 are displayed in any case.
880 The function is called when the option C<-solve> is give on the
881 commandline. Several extra config variables are calculated, see source
882 code for details.
884 =cut
886 my %never_solve_on = map {($_ => 1)}
888 "conf:ccflags",
889 "conf:config_args",
890 "conf:cppflags",
891 "conf:lddlflags",
892 "conf:uname",
893 "env:PATH",
894 "env:PERL5LIB",
895 "env:PERL5OPT",
896 'env:$^X',
897 'env:$EGID',
898 'env:$GID',
899 'env:$UID/$EUID',
900 'env:PERL5_CPANPLUS_IS_RUNNING',
901 'env:PERL5_CPAN_IS_RUNNING',
902 'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
903 'meta:ok',
905 my %normalize_numeric =
907 id => sub { return shift },
908 'meta:date' => sub {
909 my $v = shift;
910 my($Y,$M,$D,$h,$m,$s) = $v =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
911 unless (defined $M) {
912 die "illegal value[$v] for a date";
914 Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
917 my %normalize_value =
919 'meta:perl' => sub {
920 my($perlatpatchlevel) = shift;
921 my $perl = $perlatpatchlevel;
922 $perl =~ s/\@.*//;
923 $perl;
926 sub solve {
927 my($V,%Opt) = @_;
928 require Statistics::Regression;
929 my @regression;
930 my $ycb;
931 if (my $ycbbody = $Opt{ycb}) {
932 $ycb = eval('sub {'.$ycbbody.'}');
933 die if $@;
934 } else {
935 $ycb = sub {
936 my $rec = shift;
937 my $y;
938 if ($rec->{"meta:ok"} eq "PASS") {
939 $y = 1;
940 } elsif ($rec->{"meta:ok"} eq "FAIL") {
941 $y = 0;
943 return $y
946 VAR: for my $variable (sort keys %$V) {
947 next if $variable eq "==DATA==";
948 if ($never_solve_on{$variable}){
949 warn "Skipping '$variable'\n" unless $Opt{quiet};
950 next VAR;
952 my $value_distribution = $V->{$variable};
953 my $keys = keys %$value_distribution;
954 my @X = qw(const);
955 if ($normalize_numeric{$variable}) {
956 push @X, "n_$variable";
957 } else {
958 my %seen = ();
959 for my $value (sort keys %$value_distribution) {
960 my $pf = $value_distribution->{$value};
961 $pf->{PASS} ||= 0;
962 $pf->{FAIL} ||= 0;
963 if ($pf->{PASS} || $pf->{FAIL}) {
964 my $Xele = sprintf "eq_%s",
966 $normalize_value{$variable} ?
967 $normalize_value{$variable}->($value) :
968 $value
970 push @X, $Xele unless $seen{$Xele}++;
973 if (
974 $pf->{PASS} xor $pf->{FAIL}
976 my $vl = 40;
977 substr($value,$vl) = "..." if length $value > 3+$vl;
978 my $poor_mans_freehand_estimation = 0;
979 if ($poor_mans_freehand_estimation) {
980 warn sprintf
982 "%4d %4d %-23s | %s\n",
983 $pf->{PASS},
984 $pf->{FAIL},
985 $variable,
986 $value,
992 warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
993 next VAR unless @X > 1;
994 my %regdata =
996 X => \@X,
997 data => [],
999 RECORD: for my $rec (@{$V->{"==DATA=="}}) {
1000 my $y = $ycb->($rec);
1001 next RECORD unless defined $y;
1002 my %obs;
1003 $obs{Y} = $y;
1004 @obs{@X} = (0) x @X;
1005 $obs{const} = 1;
1006 for my $x (@X) {
1007 if ($x =~ /^eq_(.+)/) {
1008 my $read_v = $1;
1009 if (exists $rec->{$variable}
1010 && defined $rec->{$variable}
1012 my $use_v = (
1013 $normalize_value{$variable} ?
1014 $normalize_value{$variable}->($rec->{$variable}) :
1015 $rec->{$variable}
1017 if ($use_v eq $read_v) {
1018 $obs{$x} = 1;
1021 # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
1022 } elsif ($x =~ /^n_(.+)/) {
1023 my $v = $1;
1024 $obs{$x} = eval { $normalize_numeric{$v}->($rec->{$v}); };
1025 if ($@) {
1026 warn "Warning: error during parsing v[$v] in record[$rec->{id}]: $@; continuing with undef value";
1030 push @{$regdata{data}}, \%obs;
1032 _run_regression ($variable, \%regdata, \@regression, \%Opt);
1034 my $top = min ($Opt{solvetop} || 3, scalar @regression);
1035 my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
1036 $top = $max_rsq if $max_rsq && $max_rsq > $top;
1037 my $score = 0;
1038 printf
1040 "State after regression testing: %d results, showing top %d\n\n",
1041 scalar @regression,
1042 $top,
1044 for my $reg (sort {
1045 $b->rsq <=> $a->rsq
1047 $a->k <=> $b->k
1048 } @regression) {
1049 printf "(%d)\n", ++$score;
1050 eval { $reg->print; };
1051 if ($@) {
1052 printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
1054 last if --$top <= 0;
1059 # $variable is the name we pass through to S:R constructor
1060 # $regdata is hash and has the arrays "X" and "data" (observations)
1061 # X goes to S:R constructor
1062 # each observation has a Y which we pass to S:R in an include() call
1063 # $regression is the collector array of results
1064 # $opt are the options from outside, used to see if we are "verbose"
1065 sub _run_regression {
1066 my($variable,$regdata,$regression,$opt) = @_;
1067 my @X = @{$regdata->{X}};
1068 # my $splo = $regdata->{"spliced-out"} = []; # maybe can be used to
1069 # hold the reference
1070 # group
1071 while (@X > 1) {
1072 my $reg = Statistics::Regression->new($variable,\@X);
1073 for my $obs (@{$regdata->{data}}) {
1074 my $y = delete $obs->{Y};
1075 $reg->include($y, $obs);
1076 $obs->{Y} = $y;
1078 eval {$reg->theta;
1079 my @e = $reg->standarderrors;
1080 die "found standarderrors == 0" if grep { 0 == $_ } @e;
1081 $reg->rsq;};
1082 if ($@) {
1083 if ($opt->{verbose} && $opt->{verbose}>=2) {
1084 require YAML::Syck;
1085 warn YAML::Syck::Dump
1086 ({error=>"could not determine some regression parameters",
1087 variable=>$variable,
1088 k=>$reg->k,
1089 n=>$reg->n,
1090 X=>$regdata->{"X"},
1091 errorstr => $@,
1094 # reduce k in case that linear dependencies disturbed us;
1095 # often called reference group; I'm tempted to collect and
1096 # make visible
1097 splice @X, 1, 1;
1098 } else {
1099 # $reg->print;
1100 push @$regression, $reg;
1101 return;
1106 =head1 AUTHOR
1108 Andreas König
1110 =head1 BUGS
1112 Please report any bugs or feature requests through the web
1113 interface at
1114 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
1115 I will be notified, and then you'll automatically be notified of
1116 progress on your bug as I make changes.
1118 =head1 SUPPORT
1120 You can find documentation for this module with the perldoc command.
1122 perldoc CPAN::Testers::ParseReport
1125 You can also look for information at:
1127 =over 4
1129 =item * RT: CPAN's request tracker
1131 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1133 =item * AnnoCPAN: Annotated CPAN documentation
1135 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1137 =item * CPAN Ratings
1139 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1141 =item * Search CPAN
1143 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1145 =back
1148 =head1 ACKNOWLEDGEMENTS
1150 Thanks to RJBS for module-starter.
1152 =head1 COPYRIGHT & LICENSE
1154 Copyright 2008 Andreas König.
1156 This program is free software; you can redistribute it and/or modify it
1157 under the same terms as Perl itself.
1160 =cut
1162 1; # End of CPAN::Testers::ParseReport