lib/CPAN/Testers/ParseReport.pm

   1 package CPAN::Testers::ParseReport;
   2
   3 use warnings;
   4 use strict;
   5
   6 use DateTime::Format::Strptime;
   7 use DateTime::Format::DateParse;
   8 use File::Basename qw(basename);
   9 use File::Path qw(mkpath);
  10 use HTML::Entities qw(decode_entities);
  11 use LWP::UserAgent;
  12 use List::Util qw(max min sum);
  13 use Net::NNTP ();
  14 use Time::Local ();
  15 use XML::LibXML;
  16 use XML::LibXML::XPathContext;
  17
  18 our $default_ctformat = "yaml";
  19 our $default_transport = "nntp";
  20 our $default_cturl = "http://www.cpantesters.org/show";
  21 our $Signal = 0;
  22
  23 =encoding utf-8
  24
  25 =head1 NAME
  26
  27 CPAN::Testers::ParseReport - parse reports to www.cpantesters.org from various sources
  28
  29 =cut
  30
  31 use version; our $VERSION = qv('0.1.0');
  32
  33 =head1 SYNOPSIS
  34
  35 The documentation in here is normally not needed because the code is
  36 meant to be run from a standalone program, L<ctgetreports>.
  37
  38   ctgetreports --q mod:Moose Devel-Events
  39
  40 =head1 DESCRIPTION
  41
  42 This is the core module for CPAN::Testers::ParseReport. If you're not
  43 looking to extend or alter the behaviour of this module, you probably
  44 want to look at L<ctgetreports> instead.
  45
  46 =head1 OPTIONS
  47
  48 Are described in the L<ctgetreports> manpage and are passed through to
  49 the functions unaltered.
  50
  51 =head1 FUNCTIONS
  52
  53 =head2 parse_distro($distro,%options)
  54
  55 reads the cpantesters HTML page or the YAML file for the distro and
  56 loops through the reports for the specified or most recent version of
  57 that distro found in these data.
  58
  59 =head2 parse_single_report($report,$dumpvars,%options)
  60
  61 mirrors and reads this report. $report is of the form
  62
  63   { id => number }
  64
  65 $dumpvar is a hashreference that gets filled with data.
  66
  67 =cut
  68
  69 {
  70     my $ua;
  71     sub _ua {
  72         return $ua if $ua;
  73         $ua = LWP::UserAgent->new
  74             (
  75              keep_alive => 1,
  76              env_proxy => 1,
  77             );
  78         $ua->parse_head(0);
  79         $ua;
  80     }
  81 }
  82
  83 {
  84     my $nntp;
  85     sub _nntp {
  86         return $nntp if $nntp;
  87         $nntp = Net::NNTP->new("nntp.perl.org");
  88         $nntp->group("perl.cpan.testers");
  89         return $nntp;
  90     }
  91 }
  92
  93 {
  94     my $xp;
  95     sub _xp {
  96         return $xp if $xp;
  97         $xp = XML::LibXML->new;
  98         $xp->keep_blanks(0);
  99         $xp->clean_namespaces(1);
 100         my $catalog = __FILE__;
 101         $catalog =~ s|ParseReport.pm$|ParseReport/catalog|;
 102         $xp->load_catalog($catalog);
 103         return $xp;
 104     }
 105 }
 106
 107 sub _download_overview {
 108     my($cts_dir, $distro, %Opt) = @_;
 109     my $format = $Opt{ctformat} ||= $default_ctformat;
 110     my $cturl = $Opt{cturl} ||= $default_cturl;
 111     my $ctarget = "$cts_dir/$distro.$format";
 112     my $cheaders = "$cts_dir/$distro.headers";
 113     if ($Opt{local}) {
 114         unless (-e $ctarget) {
 115             die "Alert: No local file '$ctarget' found, cannot continue\n";
 116         }
 117     } else {
 118         if (! -e $ctarget or -M $ctarget > .25) {
 119             if (-e $ctarget && $Opt{verbose}) {
 120                 my(@stat) = stat _;
 121                 my $timestamp = gmtime $stat[9];
 122                 print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 123             }
 124             print STDERR "Fetching $ctarget..." if $Opt{verbose} && !$Opt{quiet};
 125             my $uri = "$cturl/$distro.$format";
 126             my $resp = _ua->mirror($uri,$ctarget);
 127             if ($resp->is_success) {
 128                 print STDERR "DONE\n" if $Opt{verbose} && !$Opt{quiet};
 129                 open my $fh, ">", $cheaders or die;
 130                 for ($resp->headers->as_string) {
 131                     print $fh $_;
 132                     if ($Opt{verbose} && $Opt{verbose}>1) {
 133                         print STDERR $_ unless $Opt{quiet};
 134                     }
 135                 }
 136             } elsif (304 == $resp->code) {
 137                 print STDERR "DONE (not modified)\n" if $Opt{verbose} && !$Opt{quiet};
 138                 my $atime = my $mtime = time;
 139                 utime $atime, $mtime, $cheaders;
 140             } else {
 141                 die sprintf
 142                     (
 143                      "No success downloading %s: %s",
 144                      $uri,
 145                      $resp->status_line,
 146                     );
 147             }
 148         }
 149     }
 150     return $ctarget;
 151 }
 152
 153 sub _parse_html {
 154     my($ctarget, %Opt) = @_;
 155     my $content = do { open my $fh, $ctarget or die; local $/; <$fh> };
 156     my $preprocesswithtreebuilder = 0; # not needed since barbie switched to XHTML
 157     if ($preprocesswithtreebuilder) {
 158         require HTML::TreeBuilder;
 159         my $tree = HTML::TreeBuilder->new;
 160         $tree->implicit_tags(1);
 161         $tree->p_strict(1);
 162         $tree->ignore_ignorable_whitespace(0);
 163         $tree->parse_content($content);
 164         $tree->eof;
 165         $content = $tree->as_XML;
 166     }
 167     my $parser = _xp();
 168     my $doc = eval { $parser->parse_string($content) };
 169     my $err = $@;
 170     unless ($doc) {
 171         my $distro = basename $ctarget;
 172         die sprintf "Error while parsing %s\: %s", $distro, $err;
 173     }
 174     my $xc = XML::LibXML::XPathContext->new($doc);
 175     my $nsu = $doc->documentElement->namespaceURI;
 176     $xc->registerNs('x', $nsu) if $nsu;
 177     my($selected_release_ul,$selected_release_distrov,$excuse_string);
 178     my($cparentdiv)
 179         = $nsu ?
 180             $xc->findnodes("/x:html/x:body/x:div[\@id = 'doc']") :
 181                 $doc->findnodes("/html/body/div[\@id = 'doc']");
 182     my(@releasedivs) = $nsu ?
 183         $xc->findnodes("//x:div[x:h2 and x:ul]",$cparentdiv) :
 184             $cparentdiv->findnodes("//div[h2 and ul]");
 185     my $releasediv;
 186     if ($Opt{vdistro}) {
 187         $excuse_string = "selected distro '$Opt{vdistro}'";
 188         my($fallbacktoversion) = $Opt{vdistro} =~ /(\d+\..*)/;
 189         $fallbacktoversion = 0 unless defined $fallbacktoversion;
 190       RELEASE: for my $i (0..$#releasedivs) {
 191             my $picked = "";
 192             my($x) = $nsu ?
 193                 $xc->findvalue("x:h2/x:a[2]/\@name",$releasedivs[$i]) :
 194                     $releasedivs[$i]->findvalue("h2/a[2]/\@name");
 195             if ($x) {
 196                 if ($x eq $Opt{vdistro}) {
 197                     $releasediv = $i;
 198                     $picked = " (picked)";
 199                 }
 200                 print STDERR "FOUND DISTRO: $x$picked\n" unless $Opt{quiet};
 201             } else {
 202                 ($x) = $nsu ?
 203                     $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$i]) :
 204                         $releasedivs[$i]->findvalue("h2/a[1]/\@name");
 205                 if ($x eq $fallbacktoversion) {
 206                     $releasediv = $i;
 207                     $picked = " (picked)";
 208                 }
 209                 print STDERR "FOUND VERSION: $x$picked\n" unless $Opt{quiet};
 210             }
 211         }
 212     } else {
 213         $excuse_string = "any distro";
 214     }
 215     unless (defined $releasediv) {
 216         $releasediv = 0;
 217     }
 218     # using a[1] because a[2] is missing on the first entry
 219     ($selected_release_distrov) = $nsu ?
 220         $xc->findvalue("x:h2/x:a[1]/\@name",$releasedivs[$releasediv]) :
 221             $releasedivs[$releasediv]->findvalue("h2/a[1]/\@name");
 222     ($selected_release_ul) = $nsu ?
 223         $xc->findnodes("x:ul",$releasedivs[$releasediv]) :
 224             $releasedivs[$releasediv]->findnodes("ul");
 225     unless (defined $selected_release_distrov) {
 226         warn "Warning: could not find $excuse_string in '$ctarget'";
 227         return;
 228     }
 229     print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
 230     my($id);
 231     my @all;
 232     for my $test ($nsu ?
 233                   $xc->findnodes("x:li",$selected_release_ul) :
 234                   $selected_release_ul->findnodes("li")) {
 235         $id = $nsu ?
 236             $xc->findvalue("x:a[1]/text()",$test)     :
 237                 $test->findvalue("a[1]/text()");
 238         push @all, {id=>$id};
 239         return if $Signal;
 240     }
 241     return \@all;
 242 }
 243
 244 sub _parse_yaml {
 245     my($ctarget, %Opt) = @_;
 246     require YAML::Syck;
 247     my $arr = YAML::Syck::LoadFile($ctarget);
 248     my($selected_release_ul,$selected_release_distrov,$excuse_string);
 249     if ($Opt{vdistro}) {
 250         $excuse_string = "selected distro '$Opt{vdistro}'";
 251         $arr = [grep {$_->{distversion} eq $Opt{vdistro}} @$arr];
 252         ($selected_release_distrov) = $arr->[0]{distversion};
 253     } else {
 254         $excuse_string = "any distro";
 255         my $last_addition;
 256         my %seen;
 257         for my $report (@$arr) {
 258             unless ($seen{$report->{distversion}}++) {
 259                 $last_addition = $report->{distversion};
 260             }
 261         }
 262         $arr = [grep {$_->{distversion} eq $last_addition} @$arr];
 263         ($selected_release_distrov) = $last_addition;
 264     }
 265     unless ($selected_release_distrov) {
 266         warn "Warning: could not find $excuse_string in '$ctarget'";
 267         return;
 268     }
 269     print STDERR "SELECTED: $selected_release_distrov\n" unless $Opt{quiet};
 270     my @all;
 271     for my $test (@$arr) {
 272         my $id = $test->{id};
 273         push @all, {id=>$id};
 274         return if $Signal;
 275     }
 276     @all = sort { $b->{id} <=> $a->{id} } @all;
 277     return \@all;
 278 }
 279
 280 sub parse_single_report {
 281     my($report, $dumpvars, %Opt) = @_;
 282     my($id) = $report->{id};
 283     $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
 284     my $nnt_dir = "$Opt{cachedir}/nntp-testers";
 285     mkpath $nnt_dir;
 286     my $target = "$nnt_dir/$id";
 287     if ($Opt{local}) {
 288         unless (-e $target) {
 289             die {severity=>0,text=>"Warning: No local file '$target' found, skipping\n"};
 290         }
 291     } else {
 292         if (! -e $target) {
 293             print STDERR "Fetching $target..." if $Opt{verbose} && !$Opt{quiet};
 294             $Opt{transport} ||= $default_transport;
 295             if ($Opt{transport} eq "nntp") {
 296                 my $article = _nntp->article($id);
 297                 unless ($article) {
 298                     die {severity=>0,text=>"NNTP-Server did not return an article for id[$id]"};
 299                 }
 300                 open my $fh, ">", $target or die {severity=>1,text=>"Could not open >$target: $!"};
 301                 print $fh @$article;
 302             } elsif ($Opt{transport} eq "http") {
 303                 my $resp = _ua->mirror("http://www.nntp.perl.org/group/perl.cpan.testers/$id",$target);
 304                 if ($resp->is_success) {
 305                     if ($Opt{verbose}) {
 306                         my(@stat) = stat $target;
 307                         my $timestamp = gmtime $stat[9];
 308                         print STDERR "(timestamp $timestamp GMT)\n" unless $Opt{quiet};
 309                         if ($Opt{verbose} > 1) {
 310                             print STDERR $resp->headers->as_string unless $Opt{quiet};
 311                         }
 312                     }
 313                     my $headers = "$target.headers";
 314                     open my $fh, ">", $headers or die {severity=>1,text=>"Could not open >$headers: $!"};
 315                     print $fh $resp->headers->as_string;
 316                 } else {
 317                     die {severity=>0,
 318                              text=>sprintf "HTTP Server Error[%s] for id[%s]", $resp->status_line, $id};
 319                 }
 320             } else {
 321                 die {severity=>1,text=>"Illegal value for --transport: '$Opt{transport}'"};
 322             }
 323         }
 324     }
 325     parse_report($target, $dumpvars, %Opt);
 326 }
 327
 328 sub parse_distro {
 329     my($distro,%Opt) = @_;
 330     my %dumpvars;
 331     $Opt{cachedir} ||= "$ENV{HOME}/var/cpantesters";
 332     my $cts_dir = "$Opt{cachedir}/cpantesters-show";
 333     mkpath $cts_dir;
 334     if ($Opt{solve}) {
 335         require Statistics::Regression;
 336         $Opt{dumpvars} = "." unless defined $Opt{dumpvars};
 337     }
 338     if (!$Opt{vdistro} && $distro =~ /^(.+)-(?i:v?\d+)(?:\.\d+)*\w*$/) {
 339         $Opt{vdistro} = $distro;
 340         $distro = $1;
 341     }
 342     my $ctarget = _download_overview($cts_dir, $distro, %Opt);
 343     my $reports;
 344     $Opt{ctformat} ||= $default_ctformat;
 345     if ($Opt{ctformat} eq "html") {
 346         $reports = _parse_html($ctarget,%Opt);
 347     } else {
 348         $reports = _parse_yaml($ctarget,%Opt);
 349     }
 350     return unless $reports;
 351     for my $report (@$reports) {
 352         eval {parse_single_report($report, \%dumpvars, %Opt)};
 353         if ($@) {
 354             if (ref $@) {
 355                 if ($@->{severity}) {
 356                     die $@->{text};
 357                 } else {
 358                     warn $@->{text};
 359                 }
 360             } else {
 361                 die $@;
 362             }
 363         }
 364         last if $Signal;
 365     }
 366     if ($Opt{dumpvars}) {
 367         require YAML::Syck;
 368         my $dumpfile = $Opt{dumpfile} || "ctgetreports.out";
 369         open my $fh, ">", $dumpfile or die "Could not open '$dumpfile' for writing: $!";
 370         print $fh YAML::Syck::Dump(\%dumpvars);
 371         close $fh or die "Could not close '$dumpfile': $!"
 372     }
 373     if ($Opt{solve}) {
 374         solve(\%dumpvars,%Opt);
 375     }
 376 }
 377
 378 =head2 $extract = parse_report($target,$dumpvars,%Opt)
 379
 380 Reads one report. $target is the local filename to read. $dumpvars is
 381 a hashref which gets filled with descriptive stats about
 382 PASS/FAIL/etc. %Opt are the options as described in the
 383 C<ctgetreports> manpage. $extract is a hashref containing the found
 384 variables.
 385
 386 Note: this parsing is a bit dirty but as it seems good enough I'm not
 387 inclined to change it. We parse HTML with regexps only, not an HTML
 388 parser. Only the entities are decoded.
 389
 390 Update around version 0.0.17: switching to nntp now but keeping the
 391 parser for HTML around to read old local copies.
 392
 393 Update around 0.0.18: In %Options you can use
 394
 395     article => $some_full_article_as_scalar
 396
 397 to use this function to parse one full article as text. When this is
 398 given, the argument $target is not read, but its basename is taken to
 399 be the id of the article. (OMG, hackers!)
 400
 401 =cut
 402 sub parse_report {
 403     my($target,$dumpvars,%Opt) = @_;
 404     our @q;
 405     my $id = basename($target);
 406     my($ok,$about);
 407
 408     my(%extract);
 409
 410     my($report,$isHTML);
 411     if ($report = $Opt{article}) {
 412         $isHTML = $report =~ /^</;
 413         undef $target;
 414     }
 415     if ($target) {
 416         open my $fh, $target or die "Could not open '$target': $!";
 417         local $/;
 418         my $raw_report = <$fh>;
 419         $isHTML = $raw_report =~ /^</;
 420         $report = $isHTML ? decode_entities($raw_report) : $raw_report;
 421         close $fh;
 422     }
 423     my @qr = map /^qr:(.+)/, @{$Opt{q}};
 424     if ($Opt{raw} || @qr) {
 425         for my $qr (@qr) {
 426             my $cqr = eval "qr{$qr}";
 427             die "Could not compile regular expression '$qr': $@" if $@;
 428             my(@matches) = $report =~ $cqr;
 429             my $v;
 430             if (@matches) {
 431                 if (@matches==1) {
 432                     $v = $matches[0];
 433                 } else {
 434                     $v = join "", map {"($_)"} @matches;
 435                 }
 436             } else {
 437                 $v = "";
 438             }
 439             $extract{"qr:$qr"} = $v;
 440         }
 441     }
 442
 443     my $report_writer;
 444     my $moduleunpack = {};
 445     my $expect_prereq = 0;
 446     my $expect_toolchain = 0;
 447     my $expecting_toolchain_soon = 0;
 448
 449     my $in_summary = 0;
 450     my $in_prg_output = 0;
 451     my $in_env_context = 0;
 452
 453     my $current_headline;
 454     my @previous_line = ""; # so we can neutralize line breaks
 455     my @rlines = split /\r?\n/, $report;
 456  LINE: for (@rlines) {
 457         next LINE unless ($isHTML ? m/<title>(\S+)\s+(\S+)/ : m/^Subject: (\S+)\s+(\S+)/);
 458         $ok = $1;
 459         $about = $2;
 460         $extract{"meta:ok"}    = $ok;
 461         $extract{"meta:about"} = $about;
 462         last;
 463     }
 464  LINE: while (@rlines) {
 465         $_ = shift @rlines;
 466         while (/!$/) {
 467             my $followupline = shift @rlines;
 468             $followupline =~ s/^\s+//; # remo leading space
 469             $_ .= $followupline;
 470         }
 471         if (/^--------/ && $previous_line[-2] && $previous_line[-2] =~ /^--------/) {
 472             $current_headline = $previous_line[-1];
 473             if ($current_headline =~ /PROGRAM OUTPUT/) {
 474                 $in_prg_output = 1;
 475             } else {
 476                 $in_prg_output = 0;
 477             }
 478             if ($current_headline =~ /ENVIRONMENT AND OTHER CONTEXT/) {
 479                 $in_env_context = 1;
 480             } else {
 481                 $in_env_context = 0;
 482             }
 483         }
 484         if ($extract{"meta:perl"}) {
 485             if (    $in_summary
 486                 and !$extract{"conf:git_commit_id"}
 487                 and /Commit id: ([[:xdigit:]]+)/) {
 488                 $extract{"conf:git_commit_id"} = $1;
 489             }
 490         } else {
 491             my $p5;
 492             if (0) {
 493             } elsif (/Summary of my perl5 \((.+)\) configuration:/) {
 494                 $p5 = $1;
 495                 $in_summary = 1;
 496                 $in_env_context = 0;
 497             }
 498             if ($p5) {
 499                 my($r,$v,$s,$p);
 500                 if (($r,$v,$s,$p) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+) patch (\S+)/) {
 501                     $r =~ s/\.0//; # 5.0 6 2!
 502                     $extract{"meta:perl"} = "$r.$v.$s\@$p";
 503                 } elsif (($r,$v,$s) = $p5 =~ /revision (\S+) version (\S+) subversion (\S+)/) {
 504                     $r =~ s/\.0//;
 505                     $extract{"meta:perl"} = "$r.$v.$s";
 506                 } elsif (($r,$v,$s) = $p5 =~ /(\d+\S*) patchlevel (\S+) subversion (\S+)/) {
 507                     $r =~ s/\.0//;
 508                     $extract{"meta:perl"} = "$r.$v.$s";
 509                 } else {
 510                     $extract{"meta:perl"} = $p5;
 511                 }
 512             }
 513         }
 514         unless ($extract{"meta:from"}) {
 515             if (0) {
 516             } elsif ($isHTML ?
 517                      m|<div class="h_name">From:</div> <b>(.+?)</b><br/>| :
 518                      m|^From: (.+)|
 519                     ) {
 520                 $extract{"meta:from"} = $1;
 521             }
 522             $extract{"meta:from"} =~ s/\.$// if $extract{"meta:from"};
 523         }
 524         unless ($extract{"meta:date"}) {
 525             if (0) {
 526             } elsif ($isHTML ?
 527                      m|<div class="h_name">Date:</div> (.+?)<br/>| :
 528                      m|^Date: (.+)|
 529                     ) {
 530                 my $date = $1;
 531                 my($dt);
 532                 if ($isHTML) {
 533                     my $p;
 534                     $p = DateTime::Format::Strptime->new(
 535                                                          locale => "en",
 536                                                          time_zone => "UTC",
 537                                                          # April 13, 2005 23:50
 538                                                          pattern => "%b %d, %Y %R",
 539                                                         );
 540                     $dt = $p->parse_datetime($date);
 541                 } else {
 542                     # Sun, 28 Sep 2008 12:23:12 +0100 # but was not consistent
 543                     # pattern => "%a, %d %b %Y %T %z",
 544                     $dt = DateTime::Format::DateParse->parse_datetime($date);
 545                 }
 546                 unless ($dt) {
 547                     warn "Could not parse date[$date], setting to epoch 0";
 548                     $dt = DateTime->from_epoch( epoch => 0 );
 549                 }
 550                 $extract{"meta:date"} = $dt->datetime;
 551             }
 552             $extract{"meta:date"} =~ s/\.$// if $extract{"meta:date"};
 553         }
 554         unless ($extract{"meta:writer"}) {
 555             for ("$previous_line[-1] $_") {
 556                 if (0) {
 557                 } elsif (/CPANPLUS, version (\S+)/) {
 558                     $extract{"meta:writer"} = "CPANPLUS $1";
 559                 } elsif (/created (?:automatically )?by (\S+)/) {
 560                     $extract{"meta:writer"} = $1;
 561                 } elsif (/This report was machine-generated by (\S+) (\S+)/) {
 562                     $extract{"meta:writer"} = "$1 $2";
 563                 }
 564                 $extract{"meta:writer"} =~ s/[\.,]$// if $extract{"meta:writer"};
 565             }
 566         }
 567         if ($in_summary) {
 568             # we do that first three lines a bit too often
 569             my $qr = $Opt{dumpvars} || "";
 570             $qr = qr/$qr/ if $qr;
 571             unless (@q) {
 572                 @q = @{$Opt{q}||[]};
 573                 @q = qw(meta:perl conf:archname conf:usethreads conf:optimize meta:writer meta:from) unless @q;
 574             }
 575
 576             my %conf_vars = map {($_ => 1)} grep { /^conf:/ } @q;
 577
 578             if (/^\s*$/ || m|</pre>|) {
 579                 # if not html, we have reached the end now
 580                 $in_summary = 0;
 581             } else {
 582                 my(%kv) = /\G,?\s*([^=]+)=('[^']+?'|\S+)/gc;
 583                 while (my($k,$v) = each %kv) {
 584                     my $ck = "conf:$k";
 585                     $ck =~ s/\s+$//;
 586                     $v =~ s/,$//;
 587                     if ($v =~ /^'(.*)'$/) {
 588                         $v = $1;
 589                     }
 590                     $v =~ s/^\s+//;
 591                     $v =~ s/\s+$//;
 592                     if ($qr && $ck =~ $qr) {
 593                         $extract{$ck} = $v;
 594                     } elsif ($conf_vars{$ck}) {
 595                         $extract{$ck} = $v;
 596                     }
 597                 }
 598             }
 599         }
 600         if ($in_prg_output) {
 601             unless ($extract{"meta:output_from"}) {
 602                 if (/Output from (.+):$/) {
 603                     $extract{"meta:output_from"} = $1
 604                 }
 605             }
 606         }
 607         if ($in_env_context) {
 608             if (/^\s{4}(\S+)\s*=\s*(.*)$/) {
 609                 $extract{"env:$1"} = $2;
 610             }
 611         }
 612         push @previous_line, $_;
 613         if ($expect_prereq || $expect_toolchain) {
 614             if (exists $moduleunpack->{type}) {
 615                 my($module,$v,$needwant);
 616                 # type 1 and 2 are about prereqs, type three about toolchain
 617                 if ($moduleunpack->{type} == 1) {
 618                     (my $leader,$module,$needwant,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 619                     next LINE if $@;
 620                     if ($leader =~ /^-/) {
 621                         $moduleunpack = {};
 622                         $expect_prereq = 0;
 623                         next LINE;
 624                     } elsif ($leader =~ /^(
 625                                          buil          # build_requires:
 626                                         )/x) {
 627                         next LINE;
 628                     } elsif ($module =~ /^(
 629                                          -             # line drawing
 630                                         )/x) {
 631                         next LINE;
 632                     }
 633                 } elsif ($moduleunpack->{type} == 2) {
 634                     (my $leader,$module,$v,$needwant) = eval { unpack $moduleunpack->{tpl}, $_; };
 635                     next LINE if $@;
 636                     if ($leader =~ /^\*/) {
 637                         $moduleunpack = {};
 638                         $expect_prereq = 0;
 639                         next LINE;
 640                     } elsif ($v =~ /\s/) {
 641                         ($module,$v) = split " ", $_;
 642                     }
 643                 } elsif ($moduleunpack->{type} == 3) {
 644                     (my $leader,$module,$v) = eval { unpack $moduleunpack->{tpl}, $_; };
 645                     next LINE if $@;
 646                     if (!$module) {
 647                         $moduleunpack = {};
 648                         $expect_toolchain = 0;
 649                         next LINE;
 650                     } elsif ($module =~ /^-/) {
 651                         next LINE;
 652                     }
 653                 }
 654                 $module =~ s/\s+$//;
 655                 if ($module) {
 656                     $v =~ s/^\s+//;
 657                     $v =~ s/\s+$//;
 658                     if ($v eq "Have") {
 659                         next LINE;
 660                     }
 661                     $extract{"mod:$module"} = $v;
 662                     if ($needwant) {
 663                         $needwant =~ s/^\s+//;
 664                         $needwant =~ s/\s+$//;
 665                         $extract{"prereq:$module"} = $needwant;
 666                     }
 667                 }
 668             }
 669             if (/(\s+)(Module\s+)(Need\s+)Have/) {
 670                 $in_env_context = 0;
 671                 $moduleunpack = {
 672                                  tpl => 'a'.length($1).'a'.length($2).'a'.length($3).'a*',
 673                                  type => 1,
 674                                 };
 675             } elsif (/(\s+)(Module Name\s+)(Have)(\s+)Want/) {
 676                 $in_env_context = 0;
 677                 my $adjust_1 = 0;
 678                 my $adjust_2 = -length($4);
 679                 my $adjust_3 = length($4);
 680                 # two pass would be required to see where the
 681                 # columns really are. Or could we get away with split?
 682                 $moduleunpack = {
 683                                  tpl => 'a'.length($1).'a'.(length($2)+$adjust_2).'a'.(length($3)+$adjust_3).'a*',
 684                                  type => 2,
 685                                 };
 686             }
 687         }
 688         if (/PREREQUISITES|Prerequisite modules loaded/) {
 689             $in_env_context = 0;
 690             $expect_prereq=1;
 691         }
 692         if ($expecting_toolchain_soon) {
 693             if (/(\s+)(Module\s+) Have/) {
 694                 $in_env_context = 0;
 695                 $expect_toolchain=1;
 696                 $expecting_toolchain_soon=0;
 697                 $moduleunpack = {
 698                                  tpl => 'a'.length($1).'a'.length($2).'a*',
 699                                  type => 3,
 700                                 };
 701             }
 702         }
 703         if (/toolchain versions installed/) {
 704             $in_env_context = 0;
 705             $expecting_toolchain_soon=1;
 706         }
 707     }                           # LINE
 708     if ($Opt{solve}) {
 709         $extract{id} = $id;
 710         if ($extract{"conf:osvers"} && $extract{"conf:archname"}) {
 711             $extract{"conf:archame+osvers"} = join " ", @extract{"conf:archname","conf:osvers"};
 712         }
 713         my $data = $dumpvars->{"==DATA=="} ||= [];
 714         push @$data, \%extract;
 715     }
 716     # ---- %extract finished ----
 717     my $diag = "";
 718     if (my $qr = $Opt{dumpvars}) {
 719         $qr = qr/$qr/;
 720         while (my($k,$v) = each %extract) {
 721             if ($k =~ $qr) {
 722                 $dumpvars->{$k}{$v}{$ok}++;
 723             }
 724         }
 725     }
 726     for my $want (@q) {
 727         my $have  = $extract{$want} || "";
 728         $diag .= " $want\[$have]";
 729     }
 730     printf STDERR " %-4s %8d%s\n", $ok, $id, $diag unless $Opt{quiet};
 731     if ($Opt{raw}) {
 732         $report =~ s/\s+\z//;
 733         print STDERR $report, "\n================\n" unless $Opt{quiet};
 734     }
 735     if ($Opt{interactive}) {
 736         require IO::Prompt;
 737         local @ARGV;
 738         local $ARGV;
 739         my $ans = IO::Prompt::prompt
 740             (
 741              -p => "View $id? [onechar: ynq] ",
 742              -d => "y",
 743              -u => qr/[ynq]/,
 744              -onechar,
 745             );
 746         print STDERR "\n" unless $Opt{quiet};
 747         if ($ans eq "y") {
 748             open my $ifh, "<", $target or die "Could not open $target: $!";
 749             $Opt{pager} ||= "less";
 750             open my $lfh, "|-", $Opt{pager} or die "Could not fork '$Opt{pager}': $!";
 751             local $/;
 752             print {$lfh} <$ifh>;
 753             close $ifh or die "Could not close $target: $!";
 754             close $lfh or die "Could not close pager: $!"
 755         } elsif ($ans eq "q") {
 756             $Signal++;
 757             return;
 758         }
 759     }
 760     return \%extract;
 761 }
 762
 763 =head2 solve
 764
 765 Feeds a couple of potentially interesting data to
 766 Statistics::Regression and sorts the result by R^2 descending. Do not
 767 confuse this with a prove, rather take it as a useful hint. It can
 768 save you minutes of staring at data and provide a quick overview where
 769 one should look closer. Displays the N top candidates, where N
 770 defaults to 3 and can be set with the C<$Opt{solvetop}> variable.
 771 Regressions results with an R^2 of 1.00 are displayed in any case.
 772
 773 The function is called when the option C<-solve> is give on the
 774 commandline. Several extra config variables are calculated, see source
 775 code for details.
 776
 777 =cut
 778 {
 779     my %never_solve_on = map {($_ => 1)}
 780         (
 781          "conf:ccflags",
 782          "conf:config_args",
 783          "conf:cppflags",
 784          "conf:lddlflags",
 785          "conf:uname",
 786          "env:PATH",
 787          "env:PERL5LIB",
 788          "env:PERL5OPT",
 789          'env:$^X',
 790          'env:$EGID',
 791          'env:$GID',
 792          'env:$UID/$EUID',
 793          'env:PERL5_CPANPLUS_IS_RUNNING',
 794          'env:PERL5_CPAN_IS_RUNNING',
 795          'env:PERL5_CPAN_IS_RUNNING_IN_RECURSION',
 796          'meta:ok',
 797         );
 798     my %normalize_numeric =
 799         (
 800          id => sub { return shift },
 801          'meta:date' => sub {
 802              my($Y,$M,$D,$h,$m,$s) = shift =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)/;
 803              Time::Local::timegm($s,$m,$h,$D,$M-1,$Y);
 804          },
 805         );
 806     my %normalize_value =
 807         (
 808          'meta:perl' => sub {
 809              my($perlatpatchlevel) = shift;
 810              my $perl = $perlatpatchlevel;
 811              $perl =~ s/\@.*//;
 812              $perl;
 813          },
 814         );
 815 sub solve {
 816     my($V,%Opt) = @_;
 817     require Statistics::Regression;
 818     my @regression;
 819     my $ycb;
 820     if (my $ycbbody = $Opt{ycb}) {
 821         $ycb = eval('sub {'.$ycbbody.'}');
 822         die if $@;
 823     } else {
 824         $ycb = sub {
 825             my $rec = shift;
 826             my $y;
 827             if ($rec->{"meta:ok"} eq "PASS") {
 828                 $y = 1;
 829             } elsif ($rec->{"meta:ok"} eq "FAIL") {
 830                 $y = 0;
 831             }
 832             return $y
 833         };
 834     }
 835   VAR: for my $variable (sort keys %$V) {
 836         next if $variable eq "==DATA==";
 837         if ($never_solve_on{$variable}){
 838             warn "Skipping '$variable'\n" unless $Opt{quiet};
 839             next VAR;
 840         }
 841         my $value_distribution = $V->{$variable};
 842         my $keys = keys %$value_distribution;
 843         my @X = qw(const);
 844         if ($normalize_numeric{$variable}) {
 845             push @X, "n_$variable";
 846         } else {
 847             my %seen = ();
 848             for my $value (sort keys %$value_distribution) {
 849                 my $pf = $value_distribution->{$value};
 850                 $pf->{PASS} ||= 0;
 851                 $pf->{FAIL} ||= 0;
 852                 if ($pf->{PASS} || $pf->{FAIL}) {
 853                     my $Xele = sprintf "eq_%s",
 854                         (
 855                          $normalize_value{$variable} ?
 856                          $normalize_value{$variable}->($value) :
 857                          $value
 858                         );
 859                     push @X, $Xele unless $seen{$Xele}++;
 860
 861                 }
 862                 if (
 863                     $pf->{PASS} xor $pf->{FAIL}
 864                    ) {
 865                     my $vl = 40;
 866                     substr($value,$vl) = "..." if length $value > 3+$vl;
 867                     my $poor_mans_freehand_estimation = 0;
 868                     if ($poor_mans_freehand_estimation) {
 869                         warn sprintf
 870                             (
 871                              "%4d %4d %-23s | %s\n",
 872                              $pf->{PASS},
 873                              $pf->{FAIL},
 874                              $variable,
 875                              $value,
 876                             );
 877                     }
 878                 }
 879             }
 880         }
 881         warn "variable[$variable]keys[$keys]X[@X]\n" unless $Opt{quiet};
 882         next VAR unless @X > 1;
 883         my %regdata =
 884             (
 885              X => \@X,
 886              data => [],
 887             );
 888       RECORD: for my $rec (@{$V->{"==DATA=="}}) {
 889             my $y = $ycb->($rec);
 890             next RECORD unless defined $y;
 891             my %obs;
 892             $obs{Y} = $y;
 893             @obs{@X} = (0) x @X;
 894             $obs{const} = 1;
 895             for my $x (@X) {
 896                 if ($x =~ /^eq_(.+)/) {
 897                     my $read_v = $1;
 898                     if (exists $rec->{$variable}
 899                         && defined $rec->{$variable}
 900                        ) {
 901                         my $use_v = (
 902                                      $normalize_value{$variable} ?
 903                                      $normalize_value{$variable}->($rec->{$variable}) :
 904                                      $rec->{$variable}
 905                                     );
 906                         if ($use_v eq $read_v) {
 907                             $obs{$x} = 1;
 908                         }
 909                     }
 910                     # warn "DEBUG: y[$y]x[$x]obs[$obs{$x}]\n";
 911                 } elsif ($x =~ /^n_(.+)/) {
 912                     my $v = $1;
 913                     $obs{$x} = $normalize_numeric{$v}->($rec->{$v});
 914                 }
 915             }
 916             push @{$regdata{data}}, \%obs;
 917         }
 918         _run_regression ($variable, \%regdata, \@regression, \%Opt);
 919     }
 920     my $top = min ($Opt{solvetop} || 3, scalar @regression);
 921     my $max_rsq = sum map {1==$_->rsq ? 1 : 0} @regression;
 922     $top = $max_rsq if $max_rsq && $max_rsq > $top;
 923     my $score = 0;
 924     printf
 925         (
 926          "State after regression testing: %d results, showing top %d\n\n",
 927          scalar @regression,
 928          $top,
 929         );
 930     for my $reg (sort {
 931                      $b->rsq <=> $a->rsq
 932                      ||
 933                      $a->k <=> $b->k
 934                  } @regression) {
 935         printf "(%d)\n", ++$score;
 936         eval { $reg->print; };
 937         if ($@) {
 938             printf "\n\nOops, Statistics::Regression died during ->print() with error message[$@]\n\n";
 939         }
 940         last if --$top <= 0;
 941     }
 942 }
 943 }
 944
 945 sub _run_regression {
 946     my($variable,$regdata,$regression,$opt) = @_;
 947     my @X = @{$regdata->{X}};
 948     while (@X > 1) {
 949         my $reg = Statistics::Regression->new($variable,\@X);
 950         for my $obs (@{$regdata->{data}}) {
 951             my $y = delete $obs->{Y};
 952             $reg->include($y, $obs);
 953             $obs->{Y} = $y;
 954         }
 955         eval {$reg->theta;
 956               my @e = $reg->standarderrors;
 957               die "found standarderrors == 0" if grep { 0 == $_ } @e;
 958               $reg->rsq;};
 959         if ($@) {
 960             if ($opt->{verbose} && $opt->{verbose}>=2) {
 961                 require YAML::Syck;
 962                 warn YAML::Syck::Dump
 963                     ({error=>"could not determine some regression parameters",
 964                       variable=>$variable,
 965                       k=>$reg->k,
 966                       n=>$reg->n,
 967                       X=>$regdata->{"X"},
 968                       errorstr => $@,
 969                      });
 970             }
 971             # reduce k in case that linear dependencies disturbed us
 972             splice @X, 1, 1;
 973         } else {
 974             # $reg->print;
 975             push @$regression, $reg;
 976             return;
 977         }
 978     }
 979 }
 980
 981 =head1 AUTHOR
 982
 983 Andreas König
 984
 985 =head1 BUGS
 986
 987 Please report any bugs or feature requests through the web
 988 interface at
 989 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-Testers-ParseReport>.
 990 I will be notified, and then you'll automatically be notified of
 991 progress on your bug as I make changes.
 992
 993 =head1 SUPPORT
 994
 995 You can find documentation for this module with the perldoc command.
 996
 997     perldoc CPAN::Testers::ParseReport
 998
 999
1000 You can also look for information at:
1001
1002 =over 4
1003
1004 =item * RT: CPAN's request tracker
1005
1006 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-Testers-ParseReport>
1007
1008 =item * AnnoCPAN: Annotated CPAN documentation
1009
1010 L<http://annocpan.org/dist/CPAN-Testers-ParseReport>
1011
1012 =item * CPAN Ratings
1013
1014 L<http://cpanratings.perl.org/d/CPAN-Testers-ParseReport>
1015
1016 =item * Search CPAN
1017
1018 L<http://search.cpan.org/dist/CPAN-Testers-ParseReport>
1019
1020 =back
1021
1022
1023 =head1 ACKNOWLEDGEMENTS
1024
1025 Thanks to RJBS for module-starter.
1026
1027 =head1 COPYRIGHT & LICENSE
1028
1029 Copyright 2008 Andreas König.
1030
1031 This program is free software; you can redistribute it and/or modify it
1032 under the same terms as Perl itself.
1033
1034
1035 =cut
1036
1037 1; # End of CPAN::Testers::ParseReport