lib/CPAN/Testers/ParseReport.pm

   1 package CPAN::Testers::ParseReport;
   2
   3 use warnings;
   4 use strict;
   5
   6 use DateTime::Format::Strptime;
   7 use DateTime::Format::DateParse;
   8 use File::Basename qw(basename);
   9 use File::Path qw(mkpath);
  10 use HTML::Entities qw(decode_entities);
  11 use LWP::UserAgent;
  12 use List::Util qw(max min sum);
  13 use Net::NNTP ();
  14 use Time::Local ();
  15 use XML::LibXML;
  16 use XML::LibXML::XPathContext;
  17
  18 our $default_ctformat = "yaml";
  19 our $default_transport = "nntp";
  20 our $default_cturl = "http://www.cpantesters.org/show";
  21 our $Signal = 0;
  22
  23 =encoding utf-8
  24
  25 =head1 NAME
  26
  27 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
  28
  29 =cut
  30
  31 use version; our $VERSION = qv('0.1.0');
  32
  33 =head1 SYNOPSIS
  34
  35 The documentation in here is normally not needed because the code is
  36 meant to be run from a standalone program, L<ctgetreports>.
  37
  38   ctgetreports --q mod:Moose Devel-Events
  39
  40 =head1 DESCRIPTION
  41
  42 This is the core module for CPAN::Testers::ParseReport. If you're not
  43 looking to extend or alter the behaviour of this module, you probably
  44 want to look at L<ctgetreports> instead.
  45
  46 =head1 OPTIONS
  47
  48 Are described in the L<ctgetreports> manpage and are passed through to
  49 the functions unaltered.
  50
  51 =head1 FUNCTIONS
  52
  53 =head2 parse_distro($distro,%options)
  54
  55 reads the cpantesters HTML page or the YAML file for the distro and
  56 loops through the reports for the specified or most recent version of
  57 that distro found in these data.
  58
  59 =head2 parse_single_report($report,$dumpvars,%options)
  60
  61 mirrors and reads this report. $report is of the form
  62
  63   { id => number }
  64
  65 $dumpvar is a hashreference that gets filled with data.
  66
  67 =cut
  68
  69 {
  70     my $ua;
  71     sub _ua {
  72         return $ua if $ua;
  73         $ua = LWP::UserAgent->new
  74             (
  75              keep_alive => 1,
  76              env_proxy => 1,
  77             );
  78         $ua->parse_head(0);
  79         $ua;
  80     }
  81 }
  82
  83 {
  84     my $nntp;
  85     sub _nntp {
  86         return $nntp if $nntp;
  87         $nntp = Net::NNTP->new("nntp.perl.org");
  88         $nntp->group("perl.cpan.testers");
  89         return $nntp;
  90     }
  91 }
  92
  93 {
  94     my $xp;
  95     sub _xp {
  96         return $xp if $xp;
  97         $xp = XML::LibXML->new;
  98         $xp->keep_blanks(0);
  99         $xp->clean_namespaces(1);
 100         my $catalog = __FILE__;
 101         $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
 102         $xp->load_catalog($catalog);
 103         return $xp;
 104     }
 105 }
 106
 107 sub _download_overview {
 108     my($cts_dir, $distro, %Opt) = @_;
 109     my $format = $Opt{ctformat} ||= $default_ctformat;
 110     my $cturl = $Opt{cturl} ||= $default_cturl;
 111     my $ctarget = "$cts_dir/$distro.$format";
 112     my $cheaders = "$cts_dir/$distro.headers";
 113     if ($Opt{local}) {
 114         unless (-e $ctarget) {
 115             die "Alert: No local file '$ctarget' found, cannot continue\n";
 116         }
 117     } else {
 118         if (! -e $ctarget or -M $ctarget > .25) {
 119             if (-e $ctarget && $Opt{verbose}) {
 120                 my(@stat) = stat _;
 121                 my $timestamp = gmtime $stat[9];
 122                 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 123             }
 124             print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
 125             my $uri = "$cturl/$distro.$format";
 126             my $resp = _ua->mirror($uri,$ctarget);
 127             if ($resp->is_success) {
 128                 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
 129                 open my $fh, ">", $cheaders or die;
 130                 for ($resp->headers->as_string) {
 131                     print $fh $_;
 132                     if ($Opt{verbose} && $Opt{verbose}>1) {
 133                         print STDERR $_ unless $Opt{quiet};
 134                     }
 135                 }
 136             } elsif (304 == $resp->code) {
 137                 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
 138                 my $atime = my $mtime = time;
 139                 utime $atime, $mtime, $cheaders;
 140             } else {
 141                 die sprintf
 142                     (
 143                      "No success downloading %s: %s",
 144                      $uri,
 145                      $resp->status_line,
 146                     );
 147             }
 148         }
 149     }
 150     return $ctarget;
 151 }
 152
 153 sub _parse_html {
 154     my($ctarget, %Opt) = @_;
 155     my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
 156     my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
 157     if ($preprocesswithtreebuilder) {
 158         require HTML::TreeBuilder;
 159         my $tree = HTML::TreeBuilder->new;
 160         $tree->implicit_tags(1);
 161         $tree->p_strict(1);
 162         $tree->ignore_ignorable_whitespace(0);
 163         $tree->parse_content($content);
 164         $tree->eof;
 165         $content = $tree->as_XML;
 166     }
 167     my $parser = _xp();
 168     my $doc = eval { $parser->parse_string($content) };
 169     my $err = $@;
 170     unless ($doc) {
 171         my $distro = basename $ctarget;
 172         die sprintf "Error while parsing %s\: %s", $distro, $err;
 173     }
 174     my $xc = XML::LibXML::XPathContext->new($doc);
 175     my $nsu = $doc->documentElement->namespaceURI;
 176     $xc->registerNs('x', $nsu) if $nsu;
 177     my($selected_release_ul,$selected_release_distrov,$excuse_string);
 178     my($cparentdiv)
 179         = $nsu ?
 180             $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
 181                 $doc->findnodes("/html/body/div[\@id = 'doc']");
 182     my(@releasedivs) = $nsu ?
 183         $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
 184             $cparentdiv->findnodes("//div[h2 and ul]");
 185     my $releasediv;
 186     if ($Opt{vdistro}) {
 187         $excuse_string = "selected distro '$Opt{vdistro}'";
 188         my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
 189         $fallbacktoversion = 0 unless defined $fallbacktoversion;
 190       RELEASE: for my $i (0..$#releasedivs) {
 191             my $picked = "";
 192             my($x) = $nsu ?
 193                 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
 194                     $releasedivs[$i]->findvalue("h2/a[2]/\@name");
 195             if ($x) {
 196                 if ($x eq $Opt{vdistro}) {
 197                     $releasediv = $i;
 198                     $picked = " (picked)";
 199                 }
 200                 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
 201             } else {
 202                 ($x) = $nsu ?
 203                     $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
 204                         $releasedivs[$i]->findvalue("h2/a[1]/\@name");
 205                 if ($x eq $fallbacktoversion) {
 206                     $releasediv = $i;
 207                     $picked = " (picked)";
 208                 }
 209                 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
 210             }
 211         }
 212     } else {
 213         $excuse_string = "any distro";
 214     }
 215     unless (defined $releasediv) {
 216         $releasediv = 0;
 217     }
 218     # using a[1] because a[2] is missing on the first entry
 219     ($selected_release_distrov) = $nsu ?
 220         $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
 221             $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
 222     ($selected_release_ul) = $nsu ?
 223         $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
 224             $releasedivs[$releasediv]->findnodes("ul");
 225     unless (defined $selected_release_distrov) {
 226         warn "Warning: could not find $excuse_string in '$ctarget'";
 227         return;
 228     }
 229     print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
 230     my($id);
 231     my @all;
 232     for my $test ($nsu ?
 233                   $xc->findnodes("x:li",$selected_release_ul) :
 234                   $selected_release_ul->findnodes("li")) {
 235         $id = $nsu ?
 236             $xc->findvalue("x:a[1]/text()",$test)     :
 237                 $test->findvalue("a[1]/text()");
 238         push @all, {id=>$id};
 239         return if $Signal;
 240     }
 241     return \@all;
 242 }
 243
 244 sub _parse_yaml {
 245     my($ctarget, %Opt) = @_;
 246     require YAML::Syck;
 247     my $arr = YAML::Syck::LoadFile($ctarget);
 248     my($selected_release_ul,$selected_release_distrov,$excuse_string);
 249     if ($Opt{vdistro}) {
 250         $excuse_string = "selected distro '$Opt{vdistro}'";
 251         $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
 252         ($selected_release_distrov) = $arr->[0]{distversion};
 253     } else {
 254         $excuse_string = "any distro";
 255         my $last_addition;
 256         my %seen;
 257         for my $report (@$arr) {
 258             unless ($seen{$report->{distversion}}++) {
 259                 $last_addition = $report->{distversion};
 260             }
 261         }
 262         $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
 263         ($selected_release_distrov) = $last_addition;
 264     }
 265     unless ($selected_release_distrov) {
 266         warn "Warning: could not find $excuse_string in '$ctarget'";
 267         return;
 268     }
 269     print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
 270     my @all;
 271     for my $test (@$arr) {
 272         my $id = $test->{id};
 273         push @all, {id=>$id};
 274         return if $Signal;
 275     }
 276     @all = sort { $b->{id} <=> $a->{id} } @all;
 277     return \@all;
 278 }
 279
 280 sub parse_single_report {
 281     my($report, $dumpvars, %Opt) = @_;
 282     my($id) = $report->{id};
 283     $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
 284     my $nnt_dir = "$Opt{cachedir}/nntp-testers";
 285     mkpath $nnt_dir;
 286     my $target = "$nnt_dir/$id";
 287     if ($Opt{local}) {
 288         unless (-e $target) {
 289             die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
 290         }
 291     } else {
 292         if (! -e $target) {
 293             print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
 294             $Opt{transport} ||= $default_transport;
 295             if ($Opt{transport} eq "nntp") {
 296                 my $article = _nntp->article($id);
 297                 unless ($article) {
 298                     die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
 299                 }
 300                 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
 301                 print $fh @$article;
 302             } elsif ($Opt{transport} eq "http") {
 303                 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
 304                 if ($resp->is_success) {
 305                     if ($Opt{verbose}) {
 306                         my(@stat) = stat $target;
 307                         my $timestamp = gmtime $stat[9];
 308                         print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 309                         if ($Opt{verbose} > 1) {
 310                             print STDERR $resp->headers->as_string unless $Opt{quiet};
 311                         }
 312                     }
 313                     my $headers = "$target.headers";
 314                     open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
 315                     print $fh $resp->headers->as_string;
 316                 } else {
 317                     die {severity=>0,
 318                              text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
 319                 }
 320             } else {
 321                 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
 322             }
 323         }
 324     }
 325     parse_report($target, $dumpvars, %Opt);
 326 }
 327
 328 sub parse_distro {
 329     my($distro,%Opt) = @_;
 330     my %dumpvars;
 331     $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
 332     my $cts_dir = "$Opt{cachedir}/cpantesters-show";
 333     mkpath $cts_dir;
 334     if ($Opt{solve}) {
 335         require Statistics::Regression;
 336         $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
 337     }
 338     if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
 339         $Opt{vdistro} = $distro;
 340         $distro = $1;
 341     }
 342     my $ctarget = _download_overview($cts_dir, $distro, %Opt);
 343     my $reports;
 344     $Opt{ctformat} ||= $default_ctformat;
 345     if ($Opt{ctformat} eq "html") {
 346         $reports = _parse_html($ctarget,%Opt);
 347     } else {
 348         $reports = _parse_yaml($ctarget,%Opt);
 349     }
 350     return unless $reports;
 351     for my $report (@$reports) {
 352         eval {parse_single_report($report, \%dumpvars, %Opt)};
 353         if ($@) {
 354             if (ref $@) {
 355                 if ($@->{severity}) {
 356                     die $@->{text};
 357                 } else {
 358                     warn $@->{text};
 359                 }
 360             } else {
 361                 die $@;
 362             }
 363         }
 364         last if $Signal;
 365     }
 366     if ($Opt{dumpvars}) {
 367         require YAML::Syck;
 368         my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
 369         open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
 370         print $fh YAML::Syck::Dump(\%dumpvars);
 371         close $fh or die "Could not close '$dumpfile': $!"
 372     }
 373     if ($Opt{solve}) {
 374         solve(\%dumpvars,%Opt);
 375     }
 376 }
 377
 378 =head2 $extract = parse_report($target,$dumpvars,%Opt)
 379
 380 Reads one report. $target is the local filename to read. $dumpvars is
 381 a hashref which gets filled with descriptive stats about
 382 PASS/FAIL/etc. %Opt are the options as described in the
 383 C<ctgetreports> manpage. $extract is a hashref containing the found
 384 variables.
 385
 386 Note: this parsing is a bit dirty but as it seems good enough I'm not
 387 inclined to change it. We parse HTML with regexps only, not an HTML
 388 parser. Only the entities are decoded.
 389
 390 Update around version 0.0.17: switching to nntp now but keeping the
 391 parser for HTML around to read old local copies.
 392
 393 Update around 0.0.18: In %Options you can use
 394
 395     article => $some_full_article_as_scalar
 396
 397 to use this function to parse one full article as text. When this is
 398 given, the argument $target is not read, but its basename is taken to
 399 be the id of the article. (OMG, hackers!)
 400
 401 =cut
 402 sub parse_report {
 403     my($target,$dumpvars,%Opt) = @_;
 404     our @q;
 405     my $id = basename($target);
 406     my($ok,$about);
 407
 408     my(%extract);
 409
 410     my($report,$isHTML);
 411     if ($report = $Opt{article}) {
 412         $isHTML = $report =~ /^</;
 413         undef $target;
 414     }
 415     if ($target) {
 416         open my $fh, $target or die "Could not open '$target': $!";
 417         local $/;
 418         my $raw_report = <$fh>;
 419         $isHTML = $raw_report =~ /^</;
 420         $report = $isHTML ? decode_entities($raw_report) : $raw_report;
 421         close $fh;
 422     }
 423     my @qr = map /^qr:(.+)/, @{$Opt{q}};
 424     if ($Opt{raw} || @qr) {
 425         for my $qr (@qr) {
 426             my $cqr = eval "qr{$qr}";
 427             die "Could not compile regular expression '$qr': $@" if $@;
 428             my(@matches) = $report =~ $cqr;
 429             my $v;
 430             if (@matches) {
 431                 if (@matches==1) {
 432                     $v = $matches[0];
 433                 } else {
 434                     $v = join "", map {"($_)"} @matches;
 435                 }
 436             } else {
 437                 $v = "";
 438             }
 439             $extract{"qr:$qr"} = $v;
 440         }
 441     }
 442
 443     my $report_writer;
 444     my $moduleunpack = {};
 445     my $expect_prereq = 0;
 446     my $expect_toolchain = 0;
 447     my $expecting_toolchain_soon = 0;
 448
 449     my $in_summary = 0;
 450     my $in_prg_output = 0;
 451     my $in_env_context = 0;
 452
 453     my $current_headline;
 454     my @previous_line = ""; # so we can neutralize line breaks
 455     my @rlines = split /\r?\n/, $report;
 456  LINE: for (@rlines) {
 457         next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject: (\S+)\s+(\S+)/);
 458         $ok = $1;
 459         $about = $2;
 460         $extract{"meta:ok"}    = $ok;
 461         $extract{"meta:about"} = $about;
 462         last;
 463     }
 464  LINE: while (@rlines) {
 465         $_ = shift @rlines;
 466         while (/!$/) {
 467             my $followupline = shift @rlines;
 468             $followupline =~ s/^\s+//; # remo leading space
 469             $_ .= $followupline;
 470         }
 471         if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
 472             $current_headline = $previous_line[-1];
 473             if ($current_headline =~ /PROGRAM OUTPUT/) {
 474                 $in_prg_output = 1;
 475             } else {
 476                 $in_prg_output = 0;
 477             }
 478             if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
 479                 $in_env_context = 1;
 480             } else {
 481                 $in_env_context = 0;
 482             }
 483         }
 484         if ($extract{"meta:perl"}) {
 485             if (    $in_summary
 486                 and !$extract{"conf:git_commit_id"}
 487                 and /Commit id: ([[:xdigit:]]+)/) {
 488                 $extract{"conf:git_commit_id"} = $1;
 489             }
 490         } else {
 491             my $p5;
 492             if (0) {
 493             } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
 494                 $p5 = $1;
 495                 $in_summary = 1;
 496                 $in_env_context = 0;
 497             }
 498             if ($p5) {
 499                 my($r,$v,$s,$p);
 500                 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
 501                     $r =~ s/\.0//; # 5.0 6 2!
 502                     $extract{"meta:perl"} = "$r.$v.$s\@$p";
 503                 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
 504                     $r =~ s/\.0//;
 505                     $extract{"meta:perl"} = "$r.$v.$s";
 506                 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
 507                     $r =~ s/\.0//;
 508                     $extract{"meta:perl"} = "$r.$v.$s";
 509                 } else {
 510                     $extract{"meta:perl"} = $p5;
 511                 }
 512             }
 513         }
 514         unless ($extract{"meta:from"}) {
 515             if (0) {
 516             } elsif ($isHTML ?
 517                      m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
 518                      m|^From: (.+)|
 519                     ) {
 520                 $extract{"meta:from"} = $1;
 521             }
 522             $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
 523         }
 524         unless ($extract{"meta:date"}) {
 525             if (0) {
 526             } elsif ($isHTML ?
 527                      m|<div class="h_name">Date:</div> (.+?)<br/>| :
 528                      m|^Date: (.+)|
 529                     ) {
 530                 my $date = $1;
 531                 my($dt);
 532                 if ($isHTML) {
 533                     my $p;
 534                     $p = DateTime::Format::Strptime->new(
 535                                                          locale => "en",
 536                                                          time_zone => "UTC",
 537                                                          # April 13, 2005 23:50
 538                                                          pattern => "%b %d, %Y %R",
 539                                                         );
 540                     $dt = $p->parse_datetime($date);
 541                 } else {
 542                     # Sun, 28 Sep 2008 12:23:12 +0100 # but was not consistent
 543                     # pattern => "%a, %d %b %Y %T %z",
 544                     $dt = DateTime::Format::DateParse->parse_datetime($date);
 545                 }
 546                 unless ($dt) {
 547                     warn "Could not parse date[$date], setting to epoch 0";
 548                     $dt = DateTime->from_epoch( epoch => 0 );
 549                 }
 550                 $extract{"meta:date"} = $dt->datetime;
 551             }
 552             $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
 553         }
 554         unless ($extract{"meta:writer"}) {
 555             for ("$previous_line[-1] $_") {
 556                 if (0) {
 557                 } elsif (/CPANPLUS, version (\S+)/) {
 558                     $extract{"meta:writer"} = "CPANPLUS $1";
 559                 } elsif (/created (?:automatically )?by (\S+)/) {
 560                     $extract{"meta:writer"} = $1;
 561                 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
 562                     $extract{"meta:writer"} = "$1 $2";
 563                 }
 564                 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
 565             }
 566         }
 567         if ($in_summary) {
 568             # we do that first three lines a bit too often
 569             my $qr = $Opt{dumpvars} || "";
 570             $qr = qr/$qr/ if $qr;
 571             unless (@q) {
 572                 @q = @{$Opt{q}||[]};
 573                 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
 574             }
 575
 576             my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
 577
 578             if (/^\s*$/ || m|</pre>|) {
 579                 # if not html, we have reached the end now
 580                 $in_summary = 0;
 581             } else {
 582                 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
 583                 while (my($k,$v) = each %kv) {
 584                     my $ck = "conf:$k";
 585                     $ck =~ s/\s+$//;
 586                     $v =~ s/,$//;
 587                     if ($v =~ /^'(.*)'$/) {
 588                         $v = $1;
 589                     }
 590                     $v =~ s/^\s+//;
 591                     $v =~ s/\s+$//;
 592                     if ($qr && $ck =~ $qr) {
 593                         $extract{$ck} = $v;
 594                     } elsif ($conf_vars{$ck}) {
 595                         $extract{$ck} = $v;
 596                     }
 597                 }
 598             }
 599         }
 600         if ($in_prg_output) {
 601             unless ($extract{"meta:output_from"}) {
 602                 if (/Output from (.+):$/) {
 603                     $extract{"meta:output_from"} = $1
 604                 }
 605             }
 606         }
 607         if ($in_env_context) {
 608             if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
 609                 $extract{"env:$1"} = $2;
 610             }
 611         }
 612         push @previous_line, $_;
 613         if ($expect_prereq || $expect_toolchain) {
 614             if (exists $moduleunpack->{type}) {
 615                 my($module,$v);
 616                 if ($moduleunpack->{type} == 1) {
 617                     (my $leader,$module,undef,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 618                     next LINE if $@;
 619                     if ($leader =~ /^-/) {
 620                         $moduleunpack = {};
 621                         $expect_prereq = 0;
 622                         next LINE;
 623                     } elsif ($leader =~ /^(
 624                                          buil          # build_requires:
 625                                         )/x) {
 626                         next LINE;
 627                     } elsif ($module =~ /^(
 628                                          -             # line drawing
 629                                         )/x) {
 630                         next LINE;
 631                     }
 632                 } elsif ($moduleunpack->{type} == 2) {
 633                     (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 634                     next LINE if $@;
 635                     if ($leader =~ /^\*/) {
 636                         $moduleunpack = {};
 637                         $expect_prereq = 0;
 638                         next LINE;
 639                     } elsif ($v =~ /\s/) {
 640                         ($module,$v) = split " ", $_;
 641                     }
 642                 } elsif ($moduleunpack->{type} == 3) {
 643                     (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 644                     next LINE if $@;
 645                     if (!$module) {
 646                         $moduleunpack = {};
 647                         $expect_toolchain = 0;
 648                         next LINE;
 649                     } elsif ($module =~ /^-/) {
 650                         next LINE;
 651                     }
 652                 }
 653                 $module =~ s/\s+$//;
 654                 if ($module) {
 655                     $v =~ s/^\s+//;
 656                     $v =~ s/\s+$//;
 657                     if ($v eq "Have") {
 658                         next LINE;
 659                     }
 660                     $extract{"mod:$module"} = $v;
 661                 }
 662             }
 663             if (/(\s+)(Module\s+)(Need\s+)Have/) {
 664                 $in_env_context = 0;
 665                 $moduleunpack = {
 666                                  tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
 667                                  type => 1,
 668                                 };
 669             } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
 670                 $in_env_context = 0;
 671                 my $adjust_1 = 0;
 672                 my $adjust_2 = -length($4);
 673                 my $adjust_3 = length($4);
 674                 # two pass would be required to see where the
 675                 # columns really are. Or could we get away with split?
 676                 $moduleunpack = {
 677                                  tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3),
 678                                  type => 2,
 679                                 };
 680             }
 681         }
 682         if (/PREREQUISITES|Prerequisite modules loaded/) {
 683             $in_env_context = 0;
 684             $expect_prereq=1;
 685         }
 686         if ($expecting_toolchain_soon) {
 687             if (/(\s+)(Module\s+) Have/) {
 688                 $in_env_context = 0;
 689                 $expect_toolchain=1;
 690                 $expecting_toolchain_soon=0;
 691                 $moduleunpack = {
 692                                  tpl => 'a'.length($1).'a'.length($2).'a*',
 693                                  type => 3,
 694                                 };
 695             }
 696         }
 697         if (/toolchain versions installed/) {
 698             $in_env_context = 0;
 699             $expecting_toolchain_soon=1;
 700         }
 701     }                           # LINE
 702     if ($Opt{solve}) {
 703         $extract{id} = $id;
 704         if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
 705             $extract{"conf:archame+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
 706         }
 707         my $data = $dumpvars->{"==DATA=="} ||= [];
 708         push @$data, \%extract;
 709     }
 710     # ---- %extract finished ----
 711     my $diag = "";
 712     if (my $qr = $Opt{dumpvars}) {
 713         $qr = qr/$qr/;
 714         while (my($k,$v) = each %extract) {
 715             if ($k =~ $qr) {
 716                 $dumpvars->{$k}{$v}{$ok}++;
 717             }
 718         }
 719     }
 720     for my $want (@q) {
 721         my $have  = $extract{$want} || "";
 722         $diag .= " $want\[$have]";
 723     }
 724     printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
 725     if ($Opt{raw}) {
 726         $report =~ s/\s+\z//;
 727         print STDERR $report, "\n================\n" unless $Opt{quiet};
 728     }
 729     if ($Opt{interactive}) {
 730         require IO::Prompt;
 731         local @ARGV;
 732         local $ARGV;
 733         my $ans = IO::Prompt::prompt
 734             (
 735              -p => "View $id? [onechar: ynq] ",
 736              -d => "y",
 737              -u => qr/[ynq]/,
 738              -onechar,
 739             );
 740         print STDERR "\n" unless $Opt{quiet};
 741         if ($ans eq "y") {
 742             open my $ifh, "<", $target or die "Could not open $target: $!";
 743             $Opt{pager} ||= "less";
 744             open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
 745             local $/;
 746             print {$lfh} <$ifh>;
 747             close $ifh or die "Could not close $target: $!";
 748             close $lfh or die "Could not close pager: $!"
 749         } elsif ($ans eq "q") {
 750             $Signal++;
 751             return;
 752         }
 753     }
 754     return \%extract;
 755 }
 756
 757 =head2 solve
 758
 759 Feeds a couple of potentially interesting data to
 760 Statistics::Regression and sorts the result by R^2 descending. Do not
 761 confuse this with a prove, rather take it as a useful hint. It can
 762 save you minutes of staring at data and provide a quick overview where
 763 one should look closer. Displays the N top candidates, where N
 764 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
 765 Regressions results with an R^2 of 1.00 are displayed in any case.
 766
 767 The function is called when the option C<-solve> is give on the
 768 commandline. Several extra config variables are calculated, see source
 769 code for details.
 770
 771 =cut
 772 {
 773     my %never_solve_on = map {($_ => 1)}
 774         (
 775          "conf:ccflags",
 776          "conf:config_args",
 777          "conf:cppflags",
 778          "conf:lddlflags",
 779          "conf:uname",
 780          "env:PATH",
 781          "env:PERL5LIB",
 782          "env:PERL5OPT",
 783          'env:$^X',
 784          'env:$EGID',
 785          'env:$GID',
 786          'env:$UID/$EUID',
 787          'env:PERL5_CPANPLUS_IS_RUNNING',
 788          'env:PERL5_CPAN_IS_RUNNING',
 789          'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
 790          'meta:ok',
 791         );
 792     my %normalize_numeric =
 793         (
 794          id => sub { return shift },
 795          'meta:date' => sub {
 796              my($Y,$M,$D,$h,$m,$s) = shift =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
 797              Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
 798          },
 799         );
 800     my %normalize_value =
 801         (
 802          'meta:perl' => sub {
 803              my($perlatpatchlevel) = shift;
 804              my $perl = $perlatpatchlevel;
 805              $perl =~ s/\@.*//;
 806              $perl;
 807          },
 808         );
 809 sub solve {
 810     my($V,%Opt) = @_;
 811     require Statistics::Regression;
 812     my @regression;
 813     my $ycb;
 814     if (my $ycbbody = $Opt{ycb}) {
 815         $ycb = eval('sub {'.$ycbbody.'}');
 816         die if $@;
 817     } else {
 818         $ycb = sub {
 819             my $rec = shift;
 820             my $y;
 821             if ($rec->{"meta:ok"} eq "PASS") {
 822                 $y = 1;
 823             } elsif ($rec->{"meta:ok"} eq "FAIL") {
 824                 $y = 0;
 825             }
 826             return $y
 827         };
 828     }
 829   VAR: for my $variable (sort keys %$V) {
 830         next if $variable eq "==DATA==";
 831         if ($never_solve_on{$variable}){
 832             warn "Skipping '$variable'\n" unless $Opt{quiet};
 833             next VAR;
 834         }
 835         my $value_distribution = $V->{$variable};
 836         my $keys = keys %$value_distribution;
 837         my @X = qw(const);
 838         if ($normalize_numeric{$variable}) {
 839             push @X, "n_$variable";
 840         } else {
 841             my %seen = ();
 842             for my $value (sort keys %$value_distribution) {
 843                 my $pf = $value_distribution->{$value};
 844                 $pf->{PASS} ||= 0;
 845                 $pf->{FAIL} ||= 0;
 846                 if ($pf->{PASS} || $pf->{FAIL}) {
 847                     my $Xele = sprintf "eq_%s",
 848                         (
 849                          $normalize_value{$variable} ?
 850                          $normalize_value{$variable}->($value) :
 851                          $value
 852                         );
 853                     push @X, $Xele unless $seen{$Xele}++;
 854
 855                 }
 856                 if (
 857                     $pf->{PASS} xor $pf->{FAIL}
 858                    ) {
 859                     my $vl = 40;
 860                     substr($value,$vl) = "..." if length $value > 3+$vl;
 861                     my $poor_mans_freehand_estimation = 0;
 862                     if ($poor_mans_freehand_estimation) {
 863                         warn sprintf
 864                             (
 865                              "%4d %4d %-23s | %s\n",
 866                              $pf->{PASS},
 867                              $pf->{FAIL},
 868                              $variable,
 869                              $value,
 870                             );
 871                     }
 872                 }
 873             }
 874         }
 875         warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
 876         next VAR unless @X > 1;
 877         my %regdata =
 878             (
 879              X => \@X,
 880              data => [],
 881             );
 882       RECORD: for my $rec (@{$V->{"==DATA=="}}) {
 883             my $y = $ycb->($rec);
 884             next RECORD unless defined $y;
 885             my %obs;
 886             $obs{Y} = $y;
 887             @obs{@X} = (0) x @X;
 888             $obs{const} = 1;
 889             for my $x (@X) {
 890                 if ($x =~ /^eq_(.+)/) {
 891                     my $read_v = $1;
 892                     if (exists $rec->{$variable}
 893                         && defined $rec->{$variable}
 894                        ) {
 895                         my $use_v = (
 896                                      $normalize_value{$variable} ?
 897                                      $normalize_value{$variable}->($rec->{$variable}) :
 898                                      $rec->{$variable}
 899                                     );
 900                         if ($use_v eq $read_v) {
 901                             $obs{$x} = 1;
 902                         }
 903                     }
 904                     # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
 905                 } elsif ($x =~ /^n_(.+)/) {
 906                     my $v = $1;
 907                     $obs{$x} = $normalize_numeric{$v}->($rec->{$v});
 908                 }
 909             }
 910             push @{$regdata{data}}, \%obs;
 911         }
 912         _run_regression ($variable, \%regdata, \@regression, \%Opt);
 913     }
 914     my $top = min ($Opt{solvetop} || 3, scalar @regression);
 915     my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
 916     $top = $max_rsq if $max_rsq && $max_rsq > $top;
 917     my $score = 0;
 918     printf
 919         (
 920          "State after regression testing: %d results, showing top %d\n\n",
 921          scalar @regression,
 922          $top,
 923         );
 924     for my $reg (sort {
 925                      $b->rsq <=> $a->rsq
 926                      ||
 927                      $a->k <=> $b->k
 928                  } @regression) {
 929         printf "(%d)\n", ++$score;
 930         eval { $reg->print; };
 931         if ($@) {
 932             printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
 933         }
 934         last if --$top <= 0;
 935     }
 936 }
 937 }
 938
 939 sub _run_regression {
 940     my($variable,$regdata,$regression,$opt) = @_;
 941     my @X = @{$regdata->{X}};
 942     while (@X > 1) {
 943         my $reg = Statistics::Regression->new($variable,\@X);
 944         for my $obs (@{$regdata->{data}}) {
 945             my $y = delete $obs->{Y};
 946             $reg->include($y, $obs);
 947             $obs->{Y} = $y;
 948         }
 949         eval {$reg->theta;
 950               my @e = $reg->standarderrors;
 951               die "found standarderrors == 0" if grep { 0 == $_ } @e;
 952               $reg->rsq;};
 953         if ($@) {
 954             if ($opt->{verbose} && $opt->{verbose}>=2) {
 955                 require YAML::Syck;
 956                 warn YAML::Syck::Dump
 957                     ({error=>"could not determine some regression parameters",
 958                       variable=>$variable,
 959                       k=>$reg->k,
 960                       n=>$reg->n,
 961                       X=>$regdata->{"X"},
 962                       errorstr => $@,
 963                      });
 964             }
 965             # reduce k in case that linear dependencies disturbed us
 966             splice @X, 1, 1;
 967         } else {
 968             # $reg->print;
 969             push @$regression, $reg;
 970             return;
 971         }
 972     }
 973 }
 974
 975 =head1 AUTHOR
 976
 977 Andreas König
 978
 979 =head1 BUGS
 980
 981 Please report any bugs or feature requests through the web
 982 interface at
 983 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
 984 I will be notified, and then you'll automatically be notified of
 985 progress on your bug as I make changes.
 986
 987 =head1 SUPPORT
 988
 989 You can find documentation for this module with the perldoc command.
 990
 991     perldoc CPAN::Testers::ParseReport
 992
 993
 994 You can also look for information at:
 995
 996 =over 4
 997
 998 =item * RT: CPAN's request tracker
 999
1000 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1001
1002 =item * AnnoCPAN: Annotated CPAN documentation
1003
1004 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1005
1006 =item * CPAN Ratings
1007
1008 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1009
1010 =item * Search CPAN
1011
1012 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1013
1014 =back
1015
1016
1017 =head1 ACKNOWLEDGEMENTS
1018
1019 Thanks to RJBS for module-starter.
1020
1021 =head1 COPYRIGHT & LICENSE
1022
1023 Copyright 2008 Andreas König.
1024
1025 This program is free software; you can redistribute it and/or modify it
1026 under the same terms as Perl itself.
1027
1028
1029 =cut
1030
1031 1; # End of CPAN::Testers::ParseReport